diff options
author | Daniel Silverstone <dsilvers@netsurf-browser.org> | 2012-03-24 14:59:13 +0000 |
---|---|---|
committer | Daniel Silverstone <dsilvers@netsurf-browser.org> | 2012-03-24 14:59:13 +0000 |
commit | 2eea8e30c3e5b2bdfc4abc19e2e94c0f795ceb6d (patch) | |
tree | 75e03edda4783902370da9d26146a9e6d8c81f05 /bindings | |
parent | 03ae3655cd0dd14156f54ac008cf1aa8eb8a39a8 (diff) | |
download | libdom-2eea8e30c3e5b2bdfc4abc19e2e94c0f795ceb6d.tar.gz libdom-2eea8e30c3e5b2bdfc4abc19e2e94c0f795ceb6d.tar.bz2 |
Beginnings of an expat binding -- NOT FUNCTIONAL YET
svn path=/trunk/libdom/; revision=13594
Diffstat (limited to 'bindings')
-rw-r--r-- | bindings/xml/Makefile | 18 | ||||
-rw-r--r-- | bindings/xml/expat_xmlparser.c | 346 | ||||
-rw-r--r-- | bindings/xml/libxml_xmlparser.c (renamed from bindings/xml/xmlparser.c) | 8 | ||||
-rw-r--r-- | bindings/xml/xmlerror.h | 2 |
4 files changed, 366 insertions, 8 deletions
diff --git a/bindings/xml/Makefile b/bindings/xml/Makefile index a015193..12b4ca0 100644 --- a/bindings/xml/Makefile +++ b/bindings/xml/Makefile @@ -1,7 +1,5 @@ ifeq ($(WITH_LIBXML_BINDING),yes) - DIR_SOURCES := xmlparser.c - - DIR_INSTALL_ITEMS := /include/dom/bindings/libxml:xmlerror.h;xmlparser.h + DIR_SOURCES := libxml_xmlparser.c # LibXML2 ifneq ($(PKGCONFIG),) @@ -15,6 +13,20 @@ ifeq ($(WITH_LIBXML_BINDING),yes) # LibXML 2.6.26 has a bug in its headers that expects _POSIX_C_SOURCE to be # defined. Define it here, even though we don't need it. CFLAGS := $(CFLAGS) -D_POSIX_C_SOURCE + + DO_XML_INSTALL := yes +endif + +ifeq ($(WITH_EXPAT_BINDING),yes) + DIR_SOURCES := expat_xmlparser.c + + LDFLAGS := $(LDFLAGS) -lexpat + + DO_XML_INSTALL := yes +endif + +ifeq ($(DO_XML_INSTALL),yes) + DIR_INSTALL_ITEMS := /include/dom/bindings/libxml:xmlerror.h;xmlparser.h endif include build/makefiles/Makefile.subdir diff --git a/bindings/xml/expat_xmlparser.c b/bindings/xml/expat_xmlparser.c new file mode 100644 index 0000000..ce649d7 --- /dev/null +++ b/bindings/xml/expat_xmlparser.c @@ -0,0 +1,346 @@ +/* + * This file is part of libdom. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2012 Daniel Silverstone <dsilvers@netsurf-browser.org> + */ + +#include <stdbool.h> +#include <string.h> +#include <assert.h> + +#include <stdlib.h> +#include <stdio.h> + +#include <dom/dom.h> + +#include "xmlparser.h" +#include "utils.h" + +#include <expat.h> + +/** + * expat XML parser object + */ +struct dom_xml_parser { + dom_msg msg; /**< Informational message function */ + void *mctx; /**< Pointer to client data */ + XML_Parser parser; /**< expat parser context */ + bool complete; /**< Indicate stream completion */ + struct dom_document *doc; /**< DOM Document we're building */ + struct dom_node *current; /**< DOM node we're currently building */ +}; + +/* Binding functions */ + +static void +expat_xmlparser_start_element_handler(void *_parser, + const XML_Char *name, + const XML_Char **atts) +{ + dom_xml_parser *parser = _parser; + dom_exception err; + dom_element *elem, *ins_elem; + dom_string *tag_name; + + fprintf(stderr, "<%s>\n", name); + + err = dom_string_create((const uint8_t *)name, + strlen(name), + &tag_name); + if (err != DOM_NO_ERR) { + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "No memory for tag name"); + return; + } + + err = dom_document_create_element(parser->doc, tag_name, &elem); + if (err != DOM_NO_ERR) { + dom_string_unref(tag_name); + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "Failed to create element '%s'", name); + return; + } + + dom_string_unref(tag_name); + + /* Add attributes to the element */ + + while (*atts) { + dom_string *key, *value; + + err = dom_string_create((const uint8_t *)(*atts), + strlen(*atts), &key); + if (err != DOM_NO_ERR) { + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "No memory for attribute name"); + dom_node_unref(elem); + return; + } + atts++; + err = dom_string_create((const uint8_t *)(*atts), + strlen(*atts), &value); + if (err != DOM_NO_ERR) { + dom_node_unref(elem); + dom_string_unref(key); + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "No memory for attribute value"); + return; + } + atts++; + + err = dom_element_set_attribute(elem, key, value); + dom_string_unref(key); + dom_string_unref(value); + if (err != DOM_NO_ERR) { + dom_node_unref(elem); + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "No memory for setting attribute"); + return; + } + } + + err = dom_node_append_child(parser->current, elem, &ins_elem); + if (err != DOM_NO_ERR) { + dom_node_unref(elem); + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "No memory for appending child node"); + return; + } + + dom_node_unref(ins_elem); + + dom_node_unref(parser->current); + parser->current = (struct dom_node *)elem; /* Steal initial ref */ +} + +static void +expat_xmlparser_end_element_handler(void *_parser, + const XML_Char *name) +{ + dom_xml_parser *parser = _parser; + dom_exception err; + dom_node *parent; + + UNUSED(name); + + fprintf(stderr, "</%s>\n", name); + + err = dom_node_get_parent_node(parser->current, &parent); + + if (err != DOM_NO_ERR) { + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "Unable to find a parent while closing element."); + return; + } + + dom_node_unref(parser->current); + parser->current = parent; /* Takes the ref given by get_parent_node */ +} + +static void +expat_xmlparser_cdata_handler(void *_parser, + const XML_Char *s, + int len) +{ + dom_xml_parser *parser = _parser; + dom_string *data; + dom_exception err; + struct dom_cdata_section *cdata, *ins_cdata; + + err = dom_string_create((const uint8_t *)s, len, &data); + if (err != DOM_NO_ERR) { + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "No memory for cdata section contents"); + return; + } + + err = dom_document_create_cdata_section(parser->doc, data, &cdata); + if (err != DOM_NO_ERR) { + dom_string_unref(data); + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "No memory for cdata section"); + return; + } + + /* No longer need data */ + dom_string_unref(data); + + /* Append cdata section to parent */ + err = dom_node_append_child(parser->current, (struct dom_node *) cdata, + (struct dom_node **) (void *) &ins_cdata); + if (err != DOM_NO_ERR) { + dom_node_unref((struct dom_node *) cdata); + parser->msg(DOM_MSG_ERROR, parser->mctx, + "Failed attaching cdata section"); + return; + } + + /* We're not interested in the inserted cdata section */ + if (ins_cdata != NULL) + dom_node_unref((struct dom_node *) ins_cdata); + + /* No longer interested in cdata section */ + dom_node_unref((struct dom_node *) cdata); +} + +static void +expat_xmlparser_unknown_data_handler(void *_parser, + const XML_Char *s, + int len) +{ + UNUSED(_parser); + + fprintf(stderr, "!!! %.*s !!!\n", len, s); +} +/** + * Create an XML parser instance + * + * \param enc Source charset, or NULL + * \param int_enc Desired charset of document buffer (UTF-8 or UTF-16) + * \param msg Informational message function + * \param mctx Pointer to client-specific private data + * \return Pointer to instance, or NULL on memory exhaustion + * + * int_enc is ignored due to it being made of bees. + */ +dom_xml_parser * +dom_xml_parser_create(const char *enc, const char *int_enc, + dom_msg msg, void *mctx) +{ + dom_xml_parser *parser; + dom_exception err; + + UNUSED(int_enc); + + parser = calloc(sizeof(*parser), 1); + if (parser == NULL) { + msg(DOM_MSG_CRITICAL, mctx, "No memory for parser"); + return NULL; + } + + parser->msg = msg; + parser->mctx = mctx; + + parser->parser = XML_ParserCreateNS(enc, ':'); + + if (parser->parser == NULL) { + free(parser); + msg(DOM_MSG_CRITICAL, mctx, "No memory for parser"); + return NULL; + } + + parser->complete = false; + parser->doc = NULL; + + err = dom_implementation_create_document( + DOM_IMPLEMENTATION_XML, + /* namespace */ NULL, + /* qname */ NULL, + /* doctype */ NULL, + NULL, + &parser->doc); + + if (err != DOM_NO_ERR) { + parser->msg(DOM_MSG_CRITICAL, parser->mctx, + "Failed creating document"); + XML_ParserFree(parser->parser); + free(parser); + return NULL; + } + + XML_SetUserData(parser->parser, parser); + + XML_SetElementHandler(parser->parser, + expat_xmlparser_start_element_handler, + expat_xmlparser_end_element_handler); + + XML_SetCharacterDataHandler(parser->parser, + expat_xmlparser_cdata_handler); + + XML_SetParamEntityParsing(parser->parser, + XML_PARAM_ENTITY_PARSING_ALWAYS); + + XML_SetDefaultHandler(parser->parser, + expat_xmlparser_unknown_data_handler); + + parser->current = dom_node_ref(parser->doc); + + return parser; +} + +/** + * Destroy an XML parser instance + * + * \param parser The parser instance to destroy + */ +void +dom_xml_parser_destroy(dom_xml_parser *parser) +{ + XML_ParserFree(parser->parser); + + free(parser); +} + +/** + * Parse a chunk of data + * + * \param parser The XML parser instance to use for parsing + * \param data Pointer to data chunk + * \param len Byte length of data chunk + * \return DOM_XML_OK on success, DOM_XML_EXTERNAL_ERR | <expat error> on failure + */ +dom_xml_error +dom_xml_parser_parse_chunk(dom_xml_parser *parser, uint8_t *data, size_t len) +{ + enum XML_Status status; + + status = XML_Parse(parser->parser, (const char *)data, len, 0); + if (status != XML_STATUS_OK) { + parser->msg(DOM_MSG_ERROR, parser->mctx, + "XML_Parse failed: %d", status); + return DOM_XML_EXTERNAL_ERR | status; + } + + return DOM_XML_OK; +} + +/** + * Notify parser that datastream is empty + * + * \param parser The XML parser instance to notify + * \return DOM_XML_OK on success, DOM_XML_EXTERNAL_ERR | <expat error> on failure + * + * This will force any remaining data through the parser + */ +dom_xml_error +dom_xml_parser_completed(dom_xml_parser *parser) +{ + enum XML_Status status; + + status = XML_Parse(parser->parser, "", 0, 1); + if (status != XML_STATUS_OK) { + parser->msg(DOM_MSG_ERROR, parser->mctx, + "XML_Parse failed: %d", status); + return DOM_XML_EXTERNAL_ERR | status; + } + + parser->complete = true; + + return DOM_XML_OK; + +} + +/** + * Retrieve the created DOM Document from a parser + * + * \param parser The parser instance to retrieve the document from + * \return Pointer to document, or NULL if parsing is not complete + * + * This may only be called after xml_parser_completed(). + */ +struct dom_document * +dom_xml_parser_get_document(dom_xml_parser *parser) +{ + return (parser->complete ? parser->doc : NULL); +} diff --git a/bindings/xml/xmlparser.c b/bindings/xml/libxml_xmlparser.c index 3ae1a3c..1c2442a 100644 --- a/bindings/xml/xmlparser.c +++ b/bindings/xml/libxml_xmlparser.c @@ -242,7 +242,7 @@ void dom_xml_parser_destroy(dom_xml_parser *parser) * \param parser The XML parser instance to use for parsing * \param data Pointer to data chunk * \param len Byte length of data chunk - * \return DOM_XML_OK on success, DOM_XML_LIBXML_ERR | <libxml error> on failure + * \return DOM_XML_OK on success, DOM_XML_EXTERNAL_ERR | <libxml error> on failure */ dom_xml_error dom_xml_parser_parse_chunk(dom_xml_parser *parser, uint8_t *data, size_t len) @@ -253,7 +253,7 @@ dom_xml_error dom_xml_parser_parse_chunk(dom_xml_parser *parser, if (err != XML_ERR_OK) { parser->msg(DOM_MSG_ERROR, parser->mctx, "xmlParseChunk failed: %d", err); - return DOM_XML_LIBXML_ERR | err; + return DOM_XML_EXTERNAL_ERR | err; } return DOM_XML_OK; @@ -263,7 +263,7 @@ dom_xml_error dom_xml_parser_parse_chunk(dom_xml_parser *parser, * Notify parser that datastream is empty * * \param parser The XML parser instance to notify - * \return DOM_XML_OK on success, DOM_XML_LIBXML_ERR | <libxml error> on failure + * \return DOM_XML_OK on success, DOM_XML_EXTERNAL_ERR | <libxml error> on failure * * This will force any remaining data through the parser */ @@ -277,7 +277,7 @@ dom_xml_error dom_xml_parser_completed(dom_xml_parser *parser) if (err != XML_ERR_OK) { parser->msg(DOM_MSG_ERROR, parser->mctx, "xmlParseChunk failed: %d", err); - return DOM_XML_LIBXML_ERR | err; + return DOM_XML_EXTERNAL_ERR | err; } parser->complete = true; diff --git a/bindings/xml/xmlerror.h b/bindings/xml/xmlerror.h index ca42f2b..51f2c8b 100644 --- a/bindings/xml/xmlerror.h +++ b/bindings/xml/xmlerror.h @@ -13,7 +13,7 @@ typedef enum { DOM_XML_NOMEM = 1, - DOM_XML_LIBXML_ERR = (1<<16), + DOM_XML_EXTERNAL_ERR = (1<<16), } dom_xml_error; #endif |