Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Geting xml data using xml parser expat

I have managed to parse ok. But now I am having trouble getting the values that I need. I can get the element and the attributes. But cannot get the values. I would like to get the value of frame in this xml it is 20.

/* track the current level in the xml tree */
static int depth = 0;
/* first when start element is encountered */
void start_element(void *data, const char *element, const char **attribute)
{
int i;

for(i = 0; i < depth; i++)
{
    printf(" ");
}

printf("%s", element);

for(i = 0; attribute[i]; i += 2)
{
    printf(" %s= '%s'", attribute[i], attribute[i + 1]);
}

printf("\n");
depth++;
}

/* decrement the current level of the tree */
void end_element(void *data, const char *el)
{
depth--;
}
int parse_xml(char *buff, size_t buff_size)
{
    FILE *fp;
    fp = fopen("start_indication.xml", "r");
    if(fp == NULL)
    {
    printf("Failed to open file\n");
    return 1;
    }

    XML_Parser parser = XML_ParserCreate(NULL);
    int done;
    XML_SetElementHandler(parser, start_element, end_element);

    memset(buff, 0, buff_size);
    printf("strlen(buff) before parsing: %d\n", strlen(buff));

    size_t file_size = 0;
    file_size = fread(buff, sizeof(char), buff_size, fp);

    /* parse the xml */
    if(XML_Parse(parser, buff, strlen(buff), XML_TRUE) == XML_STATUS_ERROR)
    {
        printf("Error: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
    }

    fclose(fp);
    XML_ParserFree(parser);

    return 0;
}



<data>
    <header length="4">
            <item name="time" type="time">16</item>
            <item name="ref" type="string">3843747</item>
            <item name="port" type="int16">0</item>
            <item name="frame" type="int16">20</item>
    </header>
</data>

Output from parsing


Element: data
Element: header length= '4'
Element: item name= 'time' type= 'time'
Element: item name= 'ref' type= 'string'
Element: item name= 'port' type= 'int16'
Element: item name= 'frame' type= 'int16'
like image 671
ant2009 Avatar asked Mar 04 '09 05:03

ant2009


People also ask

What is expat XML parser?

Expat is a library, written in C, for parsing XML documents. It's the underlying XML parser for the open source Mozilla project, Perl's XML::Parser , Python's xml. parsers. expat , and other open-source XML parsers.

Does Python use expat?

Python allows XML data to be read and processed through its inbuilt module called expat. It is a non-validating XML parser. it creates an XML parser object and captures the attributes of its objects into various handler functions.

How does XML parser work?

XML parser is a software library or a package that provides interface for client applications to work with XML documents. It checks for proper format of the XML document and may also validate the XML documents. Modern day browsers have built-in XML parsers. The goal of a parser is to transform XML into a readable code.


2 Answers

It is quite difficult with expat. expat is better when you are only interested with the structure, not the content of the elements. Why not using libxml instead? What are your reasons for using an even-based parser like expat, rather than a tree-based one?

Anyway, the way to do it is to set a character data handler. Here is an example, based on your code:

#include <expat.h>
#include <stdio.h>
#include <string.h>

#define BUFFER_SIZE 100000

/* track the current level in the xml tree */
static int      depth = 0;

static char    *last_content;

/* first when start element is encountered */
void
start_element(void *data, const char *element, const char **attribute)
{
    int             i;

    for (i = 0; i < depth; i++) {
        printf(" ");
    }

    printf("%s", element);

    for (i = 0; attribute[i]; i += 2) {
        printf(" %s= '%s'", attribute[i], attribute[i + 1]);
    }

    printf("\n");
    depth++;
}

/* decrement the current level of the tree */
void
end_element(void *data, const char *el)
{
    int             i;
    for (i = 0; i < depth; i++) {
        printf(" ");
    }
    printf("Content of element %s was \"%s\"\n", el, last_content);
    depth--;
}

void
handle_data(void *data, const char *content, int length)
{
    char           *tmp = malloc(length);
    strncpy(tmp, content, length);
    tmp[length] = '\0';
    data = (void *) tmp;
    last_content = tmp;         /* TODO: concatenate the text nodes? */
}

int
parse_xml(char *buff, size_t buff_size)
{
    FILE           *fp;
    fp = fopen("start_indication.xml", "r");
    if (fp == NULL) {
        printf("Failed to open file\n");
        return 1;
    }

    XML_Parser      parser = XML_ParserCreate(NULL);
    XML_SetElementHandler(parser, start_element, end_element);
    XML_SetCharacterDataHandler(parser, handle_data);

    memset(buff, 0, buff_size);
    printf("strlen(buff) before parsing: %d\n", strlen(buff));

    size_t          file_size = 0;
    file_size = fread(buff, sizeof(char), buff_size, fp);

    /* parse the xml */
    if (XML_Parse(parser, buff, strlen(buff), XML_TRUE) == XML_STATUS_ERROR) {
        printf("Error: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
    }

    fclose(fp);
    XML_ParserFree(parser);

    return 0;
}

int
main(int argc, char **argv)
{
    int             result;
    char            buffer[BUFFER_SIZE];
    result = parse_xml(buffer, BUFFER_SIZE);
    printf("Result is %i\n", result);
    return 0;
}
like image 167
bortzmeyer Avatar answered Nov 15 '22 08:11

bortzmeyer


The 'value' 20 is the character data "20" in the element whose tagname is "item" and whose name attribute is "frame".

To receive character data events, register a callback with the XML_SetCharacterDataHandler function.

This callback will receive the character data. The parser may split character data - typically to handle reaching the end of a buffer, or for entities (so for foo&amp;bar your handler will get three calls - "foo", "&" and "bar"), so you have to paste the string parts together again if you need the whole of the data.

You know when you have all the character data inside a node when you receive the next element start or close callback.

When you have all the character data, you can process it.

A stand-alone example simplified from your code:

#include <expat.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>

static const char* xml =
    "<data>\n"\
    "    <header length=\"4\">\n"\
    "            <item name=\"time\" type=\"time\">16</item>\n"\
    "            <item name=\"ref\" type=\"string\">3843747</item>\n"\
    "            <item name=\"port\" type=\"int16\">0</item>\n"\
    "            <item name=\"frame\" type=\"int16\">20</item>\n"\
    "    </header>\n"\
    "</data>\n";

void reset_char_data_buffer ();
void process_char_data_buffer ();
static bool grab_next_value;

void start_element(void *data, const char *element, const char **attribute) {
    process_char_data_buffer();
    reset_char_data_buffer();

    if ( strcmp("item", element) == 0 ) {
        size_t matched = 0;

        for (size_t i = 0; attribute[i]; i += 2) {
            if ( ( strcmp("name", attribute[i]) == 0 ) && ( strcmp("frame", attribute[i+1]) == 0 ) )
                ++matched;

            if ( ( strcmp("type", attribute[i]) == 0 ) && ( strcmp("int16", attribute[i+1]) == 0 ) )
                ++matched;
        }

        if (matched == 2) {
            printf("this is the element you are looking for\n");
            grab_next_value = true;
        }
    }
}

void end_element(void *data, const char *el) {
    process_char_data_buffer();
    reset_char_data_buffer();
}

static char char_data_buffer[1024];
static size_t offs;
static bool overflow;

void reset_char_data_buffer (void) {
    offs = 0;
    overflow = false;
    grab_next_value = false;
}

// pastes parts of the node together
void char_data (void *userData, const XML_Char *s, int len) {
    if (!overflow) {
        if (len + offs >= sizeof(char_data_buffer) ) {
            overflow = true;
        } else {
            memcpy(char_data_buffer + offs, s, len);
            offs += len;
        }
    }
}

// if the element is the one we're after, convert the character data to
// an integer value
void process_char_data_buffer (void) {
    if (offs > 0) {
        char_data_buffer[ offs ] = '\0';

        printf("character data: %s\n", char_data_buffer);

        if ( grab_next_value ) {
            int value = atoi( char_data_buffer );

            printf("the value is %d\n", value);
        }
    }
}

int main (void ) {
    XML_Parser parser = XML_ParserCreate(NULL);

    XML_SetElementHandler(parser, start_element, end_element);
    XML_SetCharacterDataHandler(parser, char_data);

    reset_char_data_buffer();

    if (XML_Parse(parser, xml, strlen(xml), XML_TRUE) == XML_STATUS_ERROR)
        printf("Error: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));

    XML_ParserFree(parser);

    return 0;
}
like image 43
Pete Kirkham Avatar answered Nov 15 '22 06:11

Pete Kirkham