diff options
-rw-r--r-- | Makefile | 4 | ||||
-rw-r--r-- | examples/example.mk (renamed from examples/makefile) | 6 | ||||
-rw-r--r-- | examples/libxml.c | 1 | ||||
-rw-r--r-- | perf/example.mk (renamed from perf/makefile) | 12 | ||||
-rw-r--r-- | perf/hubbub.c | 221 | ||||
-rw-r--r-- | src/tokeniser/tokeniser.c | 4 | ||||
-rw-r--r-- | src/treebuilder/Makefile | 12 | ||||
-rw-r--r-- | src/treebuilder/element-type.c | 49 | ||||
-rw-r--r-- | src/treebuilder/element-type.gperf | 132 | ||||
-rw-r--r-- | src/treebuilder/element-type.h | 65 | ||||
-rw-r--r-- | src/treebuilder/in_body.c | 21 | ||||
-rw-r--r-- | src/treebuilder/internal.h | 30 | ||||
-rw-r--r-- | src/treebuilder/treebuilder.c | 126 | ||||
-rw-r--r-- | test/tokeniser2.c | 14 | ||||
-rw-r--r-- | test/tokeniser3.c | 14 |
15 files changed, 432 insertions, 279 deletions
@@ -2,11 +2,11 @@ # # Makefile for libhubbub # -# Copyright 2009-2015 John-Mark Bell <jmb@netsurf-browser.org> +# Copyright 2009-2020 John-Mark Bell <jmb@netsurf-browser.org> # Component settings COMPONENT := hubbub -COMPONENT_VERSION := 0.3.5 +COMPONENT_VERSION := 0.3.8 # Default to a static library COMPONENT_TYPE ?= lib-static diff --git a/examples/makefile b/examples/example.mk index 8c5d828..2d04553 100644 --- a/examples/makefile +++ b/examples/example.mk @@ -1,3 +1,9 @@ +# From the top level: +# +# make -C examples -f example.mk clean +# make -C examples -f example.mk +# ./examples/libxml test/data/html/misnested.html + CC := gcc LD := gcc diff --git a/examples/libxml.c b/examples/libxml.c index 9530ca3..7fa6090 100644 --- a/examples/libxml.c +++ b/examples/libxml.c @@ -14,6 +14,7 @@ #include <libxml/HTMLparser.h> #include <libxml/HTMLtree.h> +#include <libxml/debugXML.h> #include <hubbub/parser.h> #include <hubbub/tree.h> diff --git a/perf/makefile b/perf/example.mk index 7304ebb..a45fc8a 100644 --- a/perf/makefile +++ b/perf/example.mk @@ -1,3 +1,10 @@ +# From the top level: +# +# make -C perf -f example.mk clean +# make -C perf -f example.mk +# time ./perf/libxml2 ~/Downloads/html5.html +# time ./perf/hubbub ~/Downloads/html5.html + all: libxml2 hubbub CC = gcc @@ -15,3 +22,8 @@ hubbub: hubbub.c hubbub: CFLAGS += `pkg-config --cflags libparserutils libhubbub` hubbub: $(HUBBUB_OBJS) gcc -o hubbub $(HUBBUB_OBJS) `pkg-config --libs libhubbub libparserutils` + +.PHONY: clean +clean: + $(RM) hubbub $(HUBBUB_OBJS) + $(RM) libxml2 $(LIBXML2_OBJS) diff --git a/perf/hubbub.c b/perf/hubbub.c index 8c6f937..53d17c7 100644 --- a/perf/hubbub.c +++ b/perf/hubbub.c @@ -50,7 +50,8 @@ struct node_t { node_t *next; node_t *prev; - node_t *child; + node_t *child_first; + node_t *child_last; node_t *parent; }; @@ -62,7 +63,7 @@ struct buf_t { #define NUM_NAMESPACES 7 -const char const *ns_names[NUM_NAMESPACES] = +const char *ns_names[NUM_NAMESPACES] = { NULL, NULL /*html*/, "math", "svg", "xlink", "xml", "xmlns" }; @@ -70,25 +71,26 @@ node_t *Document; -static int create_comment(void *ctx, const hubbub_string *data, void **result); -static int create_doctype(void *ctx, const hubbub_doctype *doctype, +static hubbub_error create_comment(void *ctx, const hubbub_string *data, void **result); +static hubbub_error create_doctype(void *ctx, const hubbub_doctype *doctype, void **result); -static int create_element(void *ctx, const hubbub_tag *tag, void **result); -static int create_text(void *ctx, const hubbub_string *data, void **result); -static int ref_node(void *ctx, void *node); -static int unref_node(void *ctx, void *node); -static int append_child(void *ctx, void *parent, void *child, void **result); -static int insert_before(void *ctx, void *parent, void *child, void *ref_child, +static hubbub_error create_element(void *ctx, const hubbub_tag *tag, void **result); +static hubbub_error create_text(void *ctx, const hubbub_string *data, void **result); +static hubbub_error ref_node(void *ctx, void *node); +static hubbub_error unref_node(void *ctx, void *node); +static hubbub_error append_child(void *ctx, void *parent, void *child, void **result); +static hubbub_error insert_before(void *ctx, void *parent, void *child, void *ref_child, void **result); -static int remove_child(void *ctx, void *parent, void *child, void **result); -static int clone_node(void *ctx, void *node, bool deep, void **result); -static int reparent_children(void *ctx, void *node, void *new_parent); -static int get_parent(void *ctx, void *node, bool element_only, void **result); -static int has_children(void *ctx, void *node, bool *result); -static int form_associate(void *ctx, void *form, void *node); -static int add_attributes(void *ctx, void *node, +static hubbub_error remove_child(void *ctx, void *parent, void *child, void **result); +static hubbub_error clone_node(void *ctx, void *node, bool deep, void **result); +static hubbub_error reparent_children(void *ctx, void *node, void *new_parent); +static hubbub_error get_parent(void *ctx, void *node, bool element_only, void **result); +static hubbub_error has_children(void *ctx, void *node, bool *result); +static hubbub_error form_associate(void *ctx, void *form, void *node); +static hubbub_error add_attributes(void *ctx, void *node, const hubbub_attribute *attributes, uint32_t n_attributes); -static int set_quirks_mode(void *ctx, hubbub_quirks_mode mode); +static hubbub_error set_quirks_mode(void *ctx, hubbub_quirks_mode mode); +static hubbub_error change_encoding(void *ctx, const char *charset); static hubbub_tree_handler tree_handler = { create_comment, @@ -107,17 +109,11 @@ static hubbub_tree_handler tree_handler = { form_associate, add_attributes, set_quirks_mode, + change_encoding, NULL, NULL }; -static void *myrealloc(void *ptr, size_t len, void *pw) -{ - UNUSED(pw); - - return realloc(ptr, len); -} - int main(int argc, char **argv) @@ -129,16 +125,12 @@ int main(int argc, char **argv) int fd; uint8_t *file; - if (argc != 3) { - printf("Usage: %s <aliases_file> <filename>\n", argv[0]); + if (argc != 2) { + printf("Usage: %s <filename>\n", argv[0]); return 1; } - /* Initialise library */ - assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK); - - assert(hubbub_parser_create("UTF-8", false, myrealloc, NULL, &parser) == - HUBBUB_OK); + assert(hubbub_parser_create("UTF-8", false, &parser) == HUBBUB_OK); params.tree_handler = &tree_handler; assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TREE_HANDLER, @@ -148,22 +140,20 @@ int main(int argc, char **argv) assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_DOCUMENT_NODE, ¶ms) == HUBBUB_OK); - stat(argv[2], &info); - fd = open(argv[2], 0); + stat(argv[1], &info); + fd = open(argv[1], 0); file = mmap(NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0); assert(hubbub_parser_parse_chunk(parser, file, info.st_size) == HUBBUB_OK); - assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK); - - return 0; + return HUBBUB_OK; } /*** Tree construction functions ***/ -int create_comment(void *ctx, const hubbub_string *data, void **result) +hubbub_error create_comment(void *ctx, const hubbub_string *data, void **result) { node_t *node = calloc(1, sizeof *node); @@ -174,10 +164,10 @@ int create_comment(void *ctx, const hubbub_string *data, void **result) *result = node; - return 0; + return HUBBUB_OK; } -int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result) +hubbub_error create_doctype(void *ctx, const hubbub_doctype *doctype, void **result) { node_t *node = calloc(1, sizeof *node); @@ -202,10 +192,10 @@ int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result) *result = node; - return 0; + return HUBBUB_OK; } -int create_element(void *ctx, const hubbub_tag *tag, void **result) +hubbub_error create_element(void *ctx, const hubbub_tag *tag, void **result) { node_t *node = calloc(1, sizeof *node); @@ -241,10 +231,10 @@ int create_element(void *ctx, const hubbub_tag *tag, void **result) *result = node; - return 0; + return HUBBUB_OK; } -int create_text(void *ctx, const hubbub_string *data, void **result) +hubbub_error create_text(void *ctx, const hubbub_string *data, void **result) { node_t *node = calloc(1, sizeof *node); @@ -255,26 +245,26 @@ int create_text(void *ctx, const hubbub_string *data, void **result) *result = node; - return 0; + return HUBBUB_OK; } -int ref_node(void *ctx, void *node) +hubbub_error ref_node(void *ctx, void *node) { UNUSED(ctx); UNUSED(node); - return 0; + return HUBBUB_OK; } -int unref_node(void *ctx, void *node) +hubbub_error unref_node(void *ctx, void *node) { UNUSED(ctx); UNUSED(node); - return 0; + return HUBBUB_OK; } -int append_child(void *ctx, void *parent, void *child, void **result) +hubbub_error append_child(void *ctx, void *parent, void *child, void **result) { node_t *tparent = parent; node_t *tchild = child; @@ -287,26 +277,25 @@ int append_child(void *ctx, void *parent, void *child, void **result) tchild->next = tchild->prev = NULL; *result = child; - if (parent == (void *)1) { if (Document) { insert = Document; + while (insert->next != NULL) { + insert = insert->next; + } } else { Document = tchild; } } else { - if (tparent->child == NULL) { - tparent->child = tchild; + if (tparent->child_first == NULL) { + tparent->child_first = tchild; + tparent->child_last = tchild; } else { - insert = tparent->child; + insert = tparent->child_last; } } if (insert) { - while (insert->next != NULL) { - insert = insert->next; - } - if (tchild->type == CHARACTER && insert->type == CHARACTER) { insert->data.content = realloc(insert->data.content, strlen(insert->data.content) + @@ -316,14 +305,18 @@ int append_child(void *ctx, void *parent, void *child, void **result) } else { insert->next = tchild; tchild->prev = insert; + if (insert->parent != NULL && + insert->parent != (void *)1) { + insert->parent->child_last = insert; + } } } - return 0; + return HUBBUB_OK; } /* insert 'child' before 'ref_child', under 'parent' */ -int insert_before(void *ctx, void *parent, void *child, void *ref_child, +hubbub_error insert_before(void *ctx, void *parent, void *child, void *ref_child, void **result) { node_t *tparent = parent; @@ -352,26 +345,31 @@ int insert_before(void *ctx, void *parent, void *child, void *ref_child, if (tchild->prev) tchild->prev->next = tchild; else - tparent->child = tchild; + tparent->child_first = tchild; *result = child; } - return 0; + return HUBBUB_OK; } -int remove_child(void *ctx, void *parent, void *child, void **result) +hubbub_error remove_child(void *ctx, void *parent, void *child, void **result) { node_t *tparent = parent; node_t *tchild = child; UNUSED(ctx); - assert(tparent->child); + assert(tparent->child_last); + assert(tparent->child_first); assert(tchild->parent == tparent); - if (tchild->parent->child == tchild) { - tchild->parent->child = tchild->next; + if (tchild->parent->child_first == tchild) { + tchild->parent->child_first = tchild->next; + } + + if (tchild->parent->child_last == tchild) { + tchild->parent->child_last = tchild->prev; } if (tchild->prev) @@ -385,10 +383,10 @@ int remove_child(void *ctx, void *parent, void *child, void **result) *result = child; - return 0; + return HUBBUB_OK; } -int clone_node(void *ctx, void *node, bool deep, void **result) +hubbub_error clone_node(void *ctx, void *node, bool deep, void **result) { node_t *old_node = node; node_t *new_node = calloc(1, sizeof *new_node); @@ -398,12 +396,8 @@ int clone_node(void *ctx, void *node, bool deep, void **result) *new_node = *old_node; *result = new_node; - new_node->child = new_node->parent = - new_node->next = new_node->prev = - NULL; - if (deep == false) - return 0; + return HUBBUB_OK; if (old_node->next) { void *n; @@ -412,85 +406,92 @@ int clone_node(void *ctx, void *node, bool deep, void **result) new_node->next = n; new_node->next->prev = new_node; + + new_node->parent = old_node->parent; + if (new_node->parent != NULL && new_node->parent != (void *)1) { + new_node->parent->child_last = new_node; + } } - if (old_node->child) { + if (old_node->child_first) { void *n; - clone_node(ctx, old_node->child, true, &n); + clone_node(ctx, old_node->child_first, true, &n); - new_node->child = n; - new_node->child->parent = new_node; + if (new_node) + new_node->child_last = n; + new_node->child_first = n; + new_node->child_first->parent = new_node; } - return 0; + return HUBBUB_OK; } /* Take all of the child nodes of "node" and append them to "new_parent" */ -int reparent_children(void *ctx, void *node, void *new_parent) +hubbub_error reparent_children(void *ctx, void *node, void *new_parent) { node_t *parent = new_parent; node_t *old_parent = node; node_t *insert; - node_t *kids; + node_t *kids_first; + node_t *kids_last; UNUSED(ctx); - kids = old_parent->child; - if (!kids) return 0; + kids_first = old_parent->child_first; + kids_last = old_parent->child_last; + if (!kids_first) return HUBBUB_OK; - old_parent->child = NULL; + old_parent->child_first = NULL; + old_parent->child_last = NULL; - insert = parent->child; + insert = parent->child_last; if (!insert) { - parent->child = kids; + parent->child_first = kids_first; } else { - while (insert->next != NULL) { - insert = insert->next; - } - - insert->next = kids; - kids->prev = insert; + insert->next = kids_first; + kids_first->prev = insert; } + parent->child_last = kids_last; - while (kids) { - kids->parent = parent; - kids = kids->next; + while (kids_first) { + kids_first->parent = parent; + kids_first = kids_first->next; } - return 0; + return HUBBUB_OK; } -int get_parent(void *ctx, void *node, bool element_only, void **result) +hubbub_error get_parent(void *ctx, void *node, bool element_only, void **result) { UNUSED(ctx); UNUSED(element_only); *result = ((node_t *)node)->parent; - return 0; + return HUBBUB_OK; } -int has_children(void *ctx, void *node, bool *result) +hubbub_error has_children(void *ctx, void *node, bool *result) { UNUSED(ctx); - *result = ((node_t *)node)->child ? true : false; + *result = ((node_t *)node)->child_first ? true : false; - return 0; + return HUBBUB_OK; } -int form_associate(void *ctx, void *form, void *node) +hubbub_error form_associate(void *ctx, void *form, void *node) { UNUSED(ctx); UNUSED(form); UNUSED(node); - return 0; + return HUBBUB_OK; } -int add_attributes(void *ctx, void *vnode, +hubbub_error add_attributes(void *ctx, void *vnode, const hubbub_attribute *attributes, uint32_t n_attributes) { node_t *node = vnode; @@ -521,13 +522,21 @@ int add_attributes(void *ctx, void *vnode, } - return 0; + return HUBBUB_OK; } -int set_quirks_mode(void *ctx, hubbub_quirks_mode mode) +hubbub_error set_quirks_mode(void *ctx, hubbub_quirks_mode mode) { UNUSED(ctx); UNUSED(mode); - return 0; + return HUBBUB_OK; +} + +hubbub_error change_encoding(void *ctx, const char *charset) +{ + UNUSED(ctx); + UNUSED(charset); + + return HUBBUB_OK; } diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c index 2d9c4ed..78eeee3 100644 --- a/src/tokeniser/tokeniser.c +++ b/src/tokeniser/tokeniser.c @@ -271,7 +271,7 @@ static inline hubbub_error emit_current_comment(hubbub_tokeniser *tokeniser); static inline hubbub_error emit_current_doctype(hubbub_tokeniser *tokeniser, bool force_quirks); static hubbub_error hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser, - hubbub_token *token); + const hubbub_token *token); /** * Create a hubbub tokeniser @@ -3365,7 +3365,7 @@ hubbub_error emit_current_doctype(hubbub_tokeniser *tokeniser, * \param token Token to emit */ hubbub_error hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser, - hubbub_token *token) + const hubbub_token *token) { hubbub_error err = HUBBUB_OK; diff --git a/src/treebuilder/Makefile b/src/treebuilder/Makefile index 31feae1..77459f1 100644 --- a/src/treebuilder/Makefile +++ b/src/treebuilder/Makefile @@ -6,6 +6,16 @@ DIR_SOURCES := treebuilder.c \ in_cell.c in_select.c in_select_in_table.c \ in_foreign_content.c after_body.c in_frameset.c \ after_frameset.c after_after_body.c after_after_frameset.c \ - generic_rcdata.c + generic_rcdata.c element-type.c + +$(DIR)autogenerated-element-type.c: $(DIR)element-type.gperf + $(VQ)$(ECHO) " GPERF: $<" + $(Q)gperf --output-file=$@.tmp $< + $(Q)$(SED) -e 's/^\(const struct element_type_map\)/static \1/' $@.tmp >$@ + $(Q)$(RM) $@.tmp + +PRE_TARGETS := $(DIR)autogenerated-element-type.c + +CLEAN_ITEMS := $(DIR)autogenerated-element-type.c include $(NSBUILD)/Makefile.subdir diff --git a/src/treebuilder/element-type.c b/src/treebuilder/element-type.c new file mode 100644 index 0000000..7e2772c --- /dev/null +++ b/src/treebuilder/element-type.c @@ -0,0 +1,49 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2021 Michael Drake <tlsa@netsurf-browser.org> + */ + +#include "treebuilder/element-type.h" + +/* Auto-generated by `gperf`. */ +#include "treebuilder/autogenerated-element-type.c" + +/* Exported function, documented in element-type.h */ +element_type element_type_from_name( + hubbub_treebuilder *treebuilder, + const hubbub_string *tag_name) +{ + const struct element_type_map *value; + + UNUSED(treebuilder); + + value = hubbub_element_type_generated_lookup( + (const char *)tag_name->ptr, + tag_name->len); + if (value == NULL) { + return UNKNOWN; + } + + return value->type; +} + +/** + * Convert an element type to a name + * + * \param type The element type + * \return Pointer to name + */ +const char *element_type_to_name(element_type type) +{ + size_t i; + + for (i = 0; i < sizeof(wordlist) / sizeof(wordlist[0]); i++) { + if (wordlist[i].type == type) { + return wordlist[i].name; + } + } + + return "UNKNOWN"; +} diff --git a/src/treebuilder/element-type.gperf b/src/treebuilder/element-type.gperf new file mode 100644 index 0000000..c0980d3 --- /dev/null +++ b/src/treebuilder/element-type.gperf @@ -0,0 +1,132 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2021 Michael Drake <tlsa@netsurf-browser.org> + */ + +%language=ANSI-C +%compare-strncmp +%readonly-tables +%global-table +%ignore-case +%struct-type +%switch=1 +%define hash-function-name hubbub_element_type_generated_hash +%define lookup-function-name hubbub_element_type_generated_lookup + +%{ +#include <string.h> + +#include "treebuilder/element-type.h" + +%} + +struct element_type_map; +%% +a, A +address, ADDRESS +annotation-xml, ANNOTATION_XML +applet, APPLET +area, AREA +article, ARTICLE +aside, ASIDE +b, B +base, BASE +basefont, BASEFONT +bgsound, BGSOUND +big, BIG +blockquote, BLOCKQUOTE +body, BODY +br, BR +button, BUTTON +caption, CAPTION +center, CENTER +col, COL +colgroup, COLGROUP +command, COMMAND +dd, DD +desc, DESC +details, DETAILS +dialog, DIALOG +dir, DIR +div, DIV +dl, DL +dt, DT +em, EM +embed, EMBED +fieldset, FIELDSET +figcaption, FIGCAPTION +figure, FIGURE +font, FONT +footer, FOOTER +foreignobject, FOREIGNOBJECT +form, FORM +frame, FRAME +frameset, FRAMESET +h1, H1 +h2, H2 +h3, H3 +h4, H4 +h5, H5 +h6, H6 +head, HEAD +hr, HR +html, HTML +i, I +iframe, IFRAME +image, IMAGE +img, IMG +input, INPUT +isindex, ISINDEX +li, LI +link, LINK +listing, LISTING +malignmark, MALIGNMARK +marquee, MARQUEE +math, MATH +menu, MENU +meta, META +mglyph, MGLYPH +mi, MI +mn, MN +mo, MO +ms, MS +mtext, MTEXT +nobr, NOBR +noembed, NOEMBED +noframes, NOFRAMES +noscript, NOSCRIPT +object, OBJECT +ol, OL +optgroup, OPTGROUP +option, OPTION +output, OUTPUT +p, P +param, PARAM +plaintext, PLAINTEXT +pre, PRE +s, S +script, SCRIPT +select, SELECT +small, SMALL +spacer, SPACER +strike, STRIKE +strong, STRONG +style, STYLE +summary, SUMMARY +svg, SVG +table, TABLE +tbody, TBODY +td, TD +textarea, TEXTAREA +tfoot, TFOOT +th, TH +thead, THEAD +title, TITLE +tr, TR +tt, TT +u, U +ul, UL +wbr, WBR +xmp, XMP diff --git a/src/treebuilder/element-type.h b/src/treebuilder/element-type.h new file mode 100644 index 0000000..75612fd --- /dev/null +++ b/src/treebuilder/element-type.h @@ -0,0 +1,65 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org> + */ + +#ifndef hubbub_treebuilder_element_type_h_ +#define hubbub_treebuilder_element_type_h_ + +#include "treebuilder/treebuilder.h" +#include "utils/utils.h" + +typedef enum +{ +/* Special */ + ADDRESS, AREA, ARTICLE, ASIDE, BASE, BASEFONT, BGSOUND, BLOCKQUOTE, + BODY, BR, CENTER, COL, COLGROUP, COMMAND, DATAGRID, DD, DETAILS, + DIALOG, DIR, DIV, DL, DT, EMBED, FIELDSET, FIGCAPTION, FIGURE, FOOTER, + FORM, FRAME, FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HEADER, HR, IFRAME, + IMAGE, IMG, INPUT, ISINDEX, LI, LINK, LISTING, MAIN, MENU, META, NAV, + NOEMBED, NOFRAMES, NOSCRIPT, OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT, + PRE, SCRIPT, SECTION, SELECT, SPACER, STYLE, SUMMARY, TBODY, TEXTAREA, + TFOOT, THEAD, TITLE, TR, UL, WBR, +/* Scoping */ + APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH, +/* Formatting */ + A, B, BIG, CODE, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U, +/* Phrasing */ + /**< \todo Enumerate phrasing elements */ + LABEL, OUTPUT, RP, RT, RUBY, SPAN, SUB, SUP, VAR, XMP, +/* MathML */ + MATH, MGLYPH, MALIGNMARK, MI, MO, MN, MS, MTEXT, ANNOTATION_XML, +/* SVG */ + SVG, FOREIGNOBJECT, /* foreignobject is scoping, but only in SVG ns */ + DESC, + UNKNOWN +} element_type; + +struct element_type_map { + const char *name; + element_type type; +}; + +/** + * Convert an element name into an element type + * + * \param treebuilder The treebuilder instance + * \param tag_name The tag name to consider + * \return The corresponding element type + */ +element_type element_type_from_name( + hubbub_treebuilder *treebuilder, + const hubbub_string *tag_name); + +/** + * Convert an element type to a name + * + * \param type The element type + * \return Pointer to name + */ +const char *element_type_to_name(element_type type); + +#endif + diff --git a/src/treebuilder/in_body.c b/src/treebuilder/in_body.c index 5157e66..0e4184c 100644 --- a/src/treebuilder/in_body.c +++ b/src/treebuilder/in_body.c @@ -267,9 +267,10 @@ hubbub_error process_start_tag(hubbub_treebuilder *treebuilder, type == DATAGRID || type == DETAILS || type == DIALOG || type == DIR || type == DIV || type == DL || type == FIELDSET || - type == FIGURE || type == FOOTER || - type == HEADER || type == MENU || type == NAV || - type == OL || type == P || type == SECTION || + type == FIGCAPTION || type == FIGURE || + type == FOOTER || type == HEADER || type == MAIN || + type == MENU || type == NAV || type == OL || + type == P || type == SECTION || type == SUMMARY || type == UL) { err = process_container_in_body(treebuilder, token); } else if (type == H1 || type == H2 || type == H3 || @@ -430,12 +431,14 @@ hubbub_error process_end_tag(hubbub_treebuilder *treebuilder, * that wasn't ignored, reprocess this token */ err = process_0body_in_body(treebuilder); } else if (type == ADDRESS || type == ARTICLE || type == ASIDE || - type == BLOCKQUOTE || type == CENTER || type == DIR || - type == DATAGRID || type == DIV || type == DL || - type == FIELDSET || type == FOOTER || type == HEADER || - type == LISTING || type == MENU || type == NAV || - type == OL || type == PRE || type == SECTION || - type == UL) { + type == BLOCKQUOTE || type == CENTER || + type == DETAILS || type == DIALOG || type == DIR || + type == DATAGRID || type == DIV || type == DL || + type == FIELDSET || type == FIGCAPTION || + type == FIGURE || type == FOOTER || type == HEADER || + type == LISTING || type == MAIN|| type == MENU || + type == NAV || type == OL || type == PRE || + type == SECTION || type == SUMMARY || type == UL) { err = process_0container_in_body(treebuilder, type); } else if (type == FORM) { err = process_0form_in_body(treebuilder); diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h index 58c21d6..debc33e 100644 --- a/src/treebuilder/internal.h +++ b/src/treebuilder/internal.h @@ -9,32 +9,7 @@ #define hubbub_treebuilder_internal_h_ #include "treebuilder/treebuilder.h" - -typedef enum -{ -/* Special */ - ADDRESS, AREA, ARTICLE, ASIDE, BASE, BASEFONT, BGSOUND, BLOCKQUOTE, - BODY, BR, CENTER, COL, COLGROUP, COMMAND, DATAGRID, DD, DETAILS, - DIALOG, DIR, DIV, DL, DT, EMBED, FIELDSET, FIGURE, FOOTER, FORM, FRAME, - FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HEADER, HR, IFRAME, IMAGE, IMG, - INPUT, ISINDEX, LI, LINK, LISTING, MENU, META, NAV, NOEMBED, NOFRAMES, - NOSCRIPT, OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT, PRE, SCRIPT, - SECTION, SELECT, SPACER, STYLE, TBODY, TEXTAREA, TFOOT, THEAD, TITLE, - TR, UL, WBR, -/* Scoping */ - APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH, -/* Formatting */ - A, B, BIG, CODE, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U, -/* Phrasing */ - /**< \todo Enumerate phrasing elements */ - LABEL, OUTPUT, RP, RT, RUBY, SPAN, SUB, SUP, VAR, XMP, -/* MathML */ - MATH, MGLYPH, MALIGNMARK, MI, MO, MN, MS, MTEXT, ANNOTATION_XML, -/* SVG */ - SVG, FOREIGNOBJECT, /* foreignobject is scoping, but only in SVG ns */ - DESC, - UNKNOWN -} element_type; +#include "treebuilder/element-type.h" /** * Item on the element stack @@ -153,9 +128,6 @@ hubbub_error append_text(hubbub_treebuilder *treebuilder, const hubbub_string *string); hubbub_error complete_script(hubbub_treebuilder *treebuilder); -element_type element_type_from_name(hubbub_treebuilder *treebuilder, - const hubbub_string *tag_name); - bool is_special_element(element_type type); bool is_scoping_element(element_type type); bool is_formatting_element(element_type type); diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c index b67aa53..f11875e 100644 --- a/src/treebuilder/treebuilder.c +++ b/src/treebuilder/treebuilder.c @@ -17,69 +17,6 @@ #include "utils/utils.h" #include "utils/string.h" - -#define S(x) x, SLEN(x) - -static const struct { - const char *name; - size_t len; - element_type type; -} name_type_map[] = { - { S("address"), ADDRESS }, { S("area"), AREA }, - { S("base"), BASE }, { S("basefont"), BASEFONT }, - { S("bgsound"), BGSOUND }, { S("blockquote"), BLOCKQUOTE }, - { S("body"), BODY }, { S("br"), BR }, - { S("center"), CENTER }, { S("col"), COL }, - { S("colgroup"), COLGROUP }, { S("dd"), DD }, - { S("dir"), DIR }, { S("div"), DIV }, - { S("dl"), DL }, { S("dt"), DT }, - { S("embed"), EMBED }, { S("fieldset"), FIELDSET }, - { S("form"), FORM }, { S("frame"), FRAME }, - { S("frameset"), FRAMESET }, { S("h1"), H1 }, - { S("h2"), H2 }, { S("h3"), H3 }, - { S("h4"), H4 }, { S("h5"), H5 }, - { S("h6"), H6 }, { S("head"), HEAD }, - { S("hr"), HR }, { S("iframe"), IFRAME }, - { S("image"), IMAGE }, { S("img"), IMG }, - { S("input"), INPUT }, { S("isindex"), ISINDEX }, - { S("li"), LI }, { S("link"), LINK }, - { S("listing"), LISTING }, - { S("menu"), MENU }, - { S("meta"), META }, { S("noembed"), NOEMBED }, - { S("noframes"), NOFRAMES }, { S("noscript"), NOSCRIPT }, - { S("ol"), OL }, { S("optgroup"), OPTGROUP }, - { S("option"), OPTION }, { S("output"), OUTPUT }, - { S("p"), P }, { S("param"), PARAM }, - { S("plaintext"), PLAINTEXT }, { S("pre"), PRE }, - { S("script"), SCRIPT }, { S("select"), SELECT }, - { S("spacer"), SPACER }, { S("style"), STYLE }, - { S("tbody"), TBODY }, { S("textarea"), TEXTAREA }, - { S("tfoot"), TFOOT }, { S("thead"), THEAD }, - { S("title"), TITLE }, { S("tr"), TR }, - { S("ul"), UL }, { S("wbr"), WBR }, - { S("applet"), APPLET }, { S("button"), BUTTON }, - { S("caption"), CAPTION }, { S("html"), HTML }, - { S("marquee"), MARQUEE }, { S("object"), OBJECT }, - { S("table"), TABLE }, { S("td"), TD }, - { S("th"), TH }, - { S("a"), A }, { S("b"), B }, - { S("big"), BIG }, { S("em"), EM }, - { S("font"), FONT }, { S("i"), I }, - { S("nobr"), NOBR }, { S("s"), S }, - { S("small"), SMALL }, { S("strike"), STRIKE }, - { S("strong"), STRONG }, { S("tt"), TT }, - { S("u"), U }, { S("xmp"), XMP }, - - { S("math"), MATH }, { S("mglyph"), MGLYPH }, - { S("malignmark"), MALIGNMARK }, - { S("mi"), MI }, { S("mo"), MO }, - { S("mn"), MN }, { S("ms"), MS }, - { S("mtext"), MTEXT }, { S("annotation-xml"), ANNOTATION_XML }, - - { S("svg"), SVG }, { S("desc"), DESC }, - { S("foreignobject"), FOREIGNOBJECT }, -}; - static bool is_form_associated(element_type type); /** @@ -978,36 +915,6 @@ hubbub_error append_text(hubbub_treebuilder *treebuilder, } /** - * Convert an element name into an element type - * - * \param treebuilder The treebuilder instance - * \param tag_name The tag name to consider - * \return The corresponding element type - */ -element_type element_type_from_name(hubbub_treebuilder *treebuilder, - const hubbub_string *tag_name) -{ - const uint8_t *name = tag_name->ptr; - size_t len = tag_name->len; - uint32_t i; - - UNUSED(treebuilder); - - /** \todo optimise this */ - - for (i = 0; i < N_ELEMENTS(name_type_map); i++) { - if (name_type_map[i].len != len) - continue; - - if (strncasecmp(name_type_map[i].name, - (const char *) name, len) == 0) - return name_type_map[i].type; - } - - return UNKNOWN; -} - -/** * Determine if a node is a special element * * \param type Node type to consider @@ -1391,15 +1298,21 @@ hubbub_error formatting_list_remove(hubbub_treebuilder *treebuilder, *node = entry->details.node; *stack_index = entry->stack_index; - if (entry->prev == NULL) + if (entry->prev == NULL) { + assert(treebuilder->context.formatting_list == entry); treebuilder->context.formatting_list = entry->next; - else + } else { + assert(treebuilder->context.formatting_list != entry); entry->prev->next = entry->next; + } - if (entry->next == NULL) + if (entry->next == NULL) { + assert(treebuilder->context.formatting_list_end == entry); treebuilder->context.formatting_list_end = entry->prev; - else + } else { + assert(treebuilder->context.formatting_list_end != entry); entry->next->prev = entry->prev; + } free(entry); @@ -1484,24 +1397,5 @@ void formatting_list_dump(hubbub_treebuilder *treebuilder, FILE *fp) } } -/** - * Convert an element type to a name - * - * \param type The element type - * \return Pointer to name - */ -const char *element_type_to_name(element_type type) -{ - size_t i; - - for (i = 0; - i < sizeof(name_type_map) / sizeof(name_type_map[0]); - i++) { - if (name_type_map[i].type == type) - return name_type_map[i].name; - } - - return "UNKNOWN"; -} #endif diff --git a/test/tokeniser2.c b/test/tokeniser2.c index c8ab9c0..f468d1c 100644 --- a/test/tokeniser2.c +++ b/test/tokeniser2.c @@ -48,7 +48,7 @@ int main(int argc, char **argv) } json = json_object_from_file(argv[1]); - assert(!is_error(json)); + assert(json != NULL); assert(strcmp((char *) ((json_object_get_object(json)->head)->k), "tests") == 0); @@ -57,7 +57,7 @@ int main(int argc, char **argv) tests = json_object_get_array((struct json_object *) (json_object_get_object(json)->head)->v); - for (i = 0; i < array_list_length(tests); i++) { + for (i = 0; i < (int)array_list_length(tests); i++) { /* Get test */ struct json_object *test = (struct json_object *) array_list_get_idx(tests, i); @@ -216,7 +216,7 @@ hubbub_error token_handler(const hubbub_token *token, void *pw) struct json_object *obj = NULL; struct array_list *items; - for (; ctx->output_index < array_list_length(ctx->output); + for (; ctx->output_index < (int)array_list_length(ctx->output); ctx->output_index++) { /* Get object for index */ obj = (struct json_object *) @@ -236,11 +236,11 @@ hubbub_error token_handler(const hubbub_token *token, void *pw) * produced more tokens than expected. We allow for the generation * of a terminating EOF token, however. */ assert("too many tokens" && - (ctx->output_index < array_list_length(ctx->output) || + (ctx->output_index < (int)array_list_length(ctx->output) || token->type == HUBBUB_TOKEN_EOF)); /* Got a terminating EOF -- no error */ - if (ctx->output_index >= array_list_length(ctx->output)) + if (ctx->output_index >= (int)array_list_length(ctx->output)) return HUBBUB_OK; /* Now increment the output index so we don't re-expect this token */ @@ -438,7 +438,7 @@ hubbub_error token_handler(const hubbub_token *token, void *pw) /* Expected token only contained part of the data * Calculate how much is left, then try again with * the next expected token */ - hubbub_token t; + hubbub_token t = *token; t.type = HUBBUB_TOKEN_CHARACTER; t.data.character.ptr += len; @@ -446,7 +446,7 @@ hubbub_error token_handler(const hubbub_token *token, void *pw) ctx->char_off = 0; - token_handler(&t, pw); + return token_handler(&t, pw); } else if (strlen(expstr + ctx->char_off) > token->data.character.len) { /* Tokeniser output only contained part of the data diff --git a/test/tokeniser3.c b/test/tokeniser3.c index e33d018..eb921ce 100644 --- a/test/tokeniser3.c +++ b/test/tokeniser3.c @@ -46,7 +46,7 @@ int main(int argc, char **argv) } json = json_object_from_file(argv[1]); - assert(!is_error(json)); + assert(json != NULL); assert(strcmp((char *) ((json_object_get_object(json)->head)->k), "tests") == 0); @@ -55,7 +55,7 @@ int main(int argc, char **argv) tests = json_object_get_array((struct json_object *) (json_object_get_object(json)->head)->v); - for (i = 0; i < array_list_length(tests); i++) { + for (i = 0; i < (int)array_list_length(tests); i++) { /* Get test */ struct json_object *test = (struct json_object *) array_list_get_idx(tests, i); @@ -221,7 +221,7 @@ hubbub_error token_handler(const hubbub_token *token, void *pw) struct json_object *obj = NULL; struct array_list *items; - for (; ctx->output_index < array_list_length(ctx->output); + for (; ctx->output_index < (int)array_list_length(ctx->output); ctx->output_index++) { /* Get object for index */ obj = (struct json_object *) @@ -241,11 +241,11 @@ hubbub_error token_handler(const hubbub_token *token, void *pw) * produced more tokens than expected. We allow for the generation * of a terminating EOF token, however. */ assert("too many tokens" && - (ctx->output_index < array_list_length(ctx->output) || + (ctx->output_index < (int)array_list_length(ctx->output) || token->type == HUBBUB_TOKEN_EOF)); /* Got a terminating EOF -- no error */ - if (ctx->output_index >= array_list_length(ctx->output)) + if (ctx->output_index >= (int)array_list_length(ctx->output)) return HUBBUB_OK; /* Now increment the output index so we don't re-expect this token */ @@ -447,7 +447,7 @@ hubbub_error token_handler(const hubbub_token *token, void *pw) /* Expected token only contained part of the data * Calculate how much is left, then try again with * the next expected token */ - hubbub_token t; + hubbub_token t = *token; t.type = HUBBUB_TOKEN_CHARACTER; t.data.character.ptr += len; @@ -455,7 +455,7 @@ hubbub_error token_handler(const hubbub_token *token, void *pw) ctx->char_off = 0; - token_handler(&t, pw); + return token_handler(&t, pw); } else if (strlen(expstr + ctx->char_off) > token->data.character.len) { /* Tokeniser output only contained part of the data |