summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile4
-rw-r--r--examples/example.mk (renamed from examples/makefile)6
-rw-r--r--examples/libxml.c1
-rw-r--r--perf/example.mk (renamed from perf/makefile)12
-rw-r--r--perf/hubbub.c221
-rw-r--r--src/tokeniser/tokeniser.c4
-rw-r--r--src/treebuilder/Makefile12
-rw-r--r--src/treebuilder/element-type.c49
-rw-r--r--src/treebuilder/element-type.gperf132
-rw-r--r--src/treebuilder/element-type.h65
-rw-r--r--src/treebuilder/in_body.c21
-rw-r--r--src/treebuilder/internal.h30
-rw-r--r--src/treebuilder/treebuilder.c126
-rw-r--r--test/tokeniser2.c14
-rw-r--r--test/tokeniser3.c14
15 files changed, 432 insertions, 279 deletions
diff --git a/Makefile b/Makefile
index e4eec54..846a5fb 100644
--- a/Makefile
+++ b/Makefile
@@ -2,11 +2,11 @@
#
# Makefile for libhubbub
#
-# Copyright 2009-2015 John-Mark Bell <jmb@netsurf-browser.org>
+# Copyright 2009-2020 John-Mark Bell <jmb@netsurf-browser.org>
# Component settings
COMPONENT := hubbub
-COMPONENT_VERSION := 0.3.5
+COMPONENT_VERSION := 0.3.8
# Default to a static library
COMPONENT_TYPE ?= lib-static
diff --git a/examples/makefile b/examples/example.mk
index 8c5d828..2d04553 100644
--- a/examples/makefile
+++ b/examples/example.mk
@@ -1,3 +1,9 @@
+# From the top level:
+#
+# make -C examples -f example.mk clean
+# make -C examples -f example.mk
+# ./examples/libxml test/data/html/misnested.html
+
CC := gcc
LD := gcc
diff --git a/examples/libxml.c b/examples/libxml.c
index 9530ca3..7fa6090 100644
--- a/examples/libxml.c
+++ b/examples/libxml.c
@@ -14,6 +14,7 @@
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
+#include <libxml/debugXML.h>
#include <hubbub/parser.h>
#include <hubbub/tree.h>
diff --git a/perf/makefile b/perf/example.mk
index 7304ebb..a45fc8a 100644
--- a/perf/makefile
+++ b/perf/example.mk
@@ -1,3 +1,10 @@
+# From the top level:
+#
+# make -C perf -f example.mk clean
+# make -C perf -f example.mk
+# time ./perf/libxml2 ~/Downloads/html5.html
+# time ./perf/hubbub ~/Downloads/html5.html
+
all: libxml2 hubbub
CC = gcc
@@ -15,3 +22,8 @@ hubbub: hubbub.c
hubbub: CFLAGS += `pkg-config --cflags libparserutils libhubbub`
hubbub: $(HUBBUB_OBJS)
gcc -o hubbub $(HUBBUB_OBJS) `pkg-config --libs libhubbub libparserutils`
+
+.PHONY: clean
+clean:
+ $(RM) hubbub $(HUBBUB_OBJS)
+ $(RM) libxml2 $(LIBXML2_OBJS)
diff --git a/perf/hubbub.c b/perf/hubbub.c
index 8c6f937..53d17c7 100644
--- a/perf/hubbub.c
+++ b/perf/hubbub.c
@@ -50,7 +50,8 @@ struct node_t {
node_t *next;
node_t *prev;
- node_t *child;
+ node_t *child_first;
+ node_t *child_last;
node_t *parent;
};
@@ -62,7 +63,7 @@ struct buf_t {
#define NUM_NAMESPACES 7
-const char const *ns_names[NUM_NAMESPACES] =
+const char *ns_names[NUM_NAMESPACES] =
{ NULL, NULL /*html*/, "math", "svg", "xlink", "xml", "xmlns" };
@@ -70,25 +71,26 @@ node_t *Document;
-static int create_comment(void *ctx, const hubbub_string *data, void **result);
-static int create_doctype(void *ctx, const hubbub_doctype *doctype,
+static hubbub_error create_comment(void *ctx, const hubbub_string *data, void **result);
+static hubbub_error create_doctype(void *ctx, const hubbub_doctype *doctype,
void **result);
-static int create_element(void *ctx, const hubbub_tag *tag, void **result);
-static int create_text(void *ctx, const hubbub_string *data, void **result);
-static int ref_node(void *ctx, void *node);
-static int unref_node(void *ctx, void *node);
-static int append_child(void *ctx, void *parent, void *child, void **result);
-static int insert_before(void *ctx, void *parent, void *child, void *ref_child,
+static hubbub_error create_element(void *ctx, const hubbub_tag *tag, void **result);
+static hubbub_error create_text(void *ctx, const hubbub_string *data, void **result);
+static hubbub_error ref_node(void *ctx, void *node);
+static hubbub_error unref_node(void *ctx, void *node);
+static hubbub_error append_child(void *ctx, void *parent, void *child, void **result);
+static hubbub_error insert_before(void *ctx, void *parent, void *child, void *ref_child,
void **result);
-static int remove_child(void *ctx, void *parent, void *child, void **result);
-static int clone_node(void *ctx, void *node, bool deep, void **result);
-static int reparent_children(void *ctx, void *node, void *new_parent);
-static int get_parent(void *ctx, void *node, bool element_only, void **result);
-static int has_children(void *ctx, void *node, bool *result);
-static int form_associate(void *ctx, void *form, void *node);
-static int add_attributes(void *ctx, void *node,
+static hubbub_error remove_child(void *ctx, void *parent, void *child, void **result);
+static hubbub_error clone_node(void *ctx, void *node, bool deep, void **result);
+static hubbub_error reparent_children(void *ctx, void *node, void *new_parent);
+static hubbub_error get_parent(void *ctx, void *node, bool element_only, void **result);
+static hubbub_error has_children(void *ctx, void *node, bool *result);
+static hubbub_error form_associate(void *ctx, void *form, void *node);
+static hubbub_error add_attributes(void *ctx, void *node,
const hubbub_attribute *attributes, uint32_t n_attributes);
-static int set_quirks_mode(void *ctx, hubbub_quirks_mode mode);
+static hubbub_error set_quirks_mode(void *ctx, hubbub_quirks_mode mode);
+static hubbub_error change_encoding(void *ctx, const char *charset);
static hubbub_tree_handler tree_handler = {
create_comment,
@@ -107,17 +109,11 @@ static hubbub_tree_handler tree_handler = {
form_associate,
add_attributes,
set_quirks_mode,
+ change_encoding,
NULL,
NULL
};
-static void *myrealloc(void *ptr, size_t len, void *pw)
-{
- UNUSED(pw);
-
- return realloc(ptr, len);
-}
-
int main(int argc, char **argv)
@@ -129,16 +125,12 @@ int main(int argc, char **argv)
int fd;
uint8_t *file;
- if (argc != 3) {
- printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
+ if (argc != 2) {
+ printf("Usage: %s <filename>\n", argv[0]);
return 1;
}
- /* Initialise library */
- assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
-
- assert(hubbub_parser_create("UTF-8", false, myrealloc, NULL, &parser) ==
- HUBBUB_OK);
+ assert(hubbub_parser_create("UTF-8", false, &parser) == HUBBUB_OK);
params.tree_handler = &tree_handler;
assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TREE_HANDLER,
@@ -148,22 +140,20 @@ int main(int argc, char **argv)
assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_DOCUMENT_NODE,
&params) == HUBBUB_OK);
- stat(argv[2], &info);
- fd = open(argv[2], 0);
+ stat(argv[1], &info);
+ fd = open(argv[1], 0);
file = mmap(NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0);
assert(hubbub_parser_parse_chunk(parser, file, info.st_size)
== HUBBUB_OK);
- assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
-
- return 0;
+ return HUBBUB_OK;
}
/*** Tree construction functions ***/
-int create_comment(void *ctx, const hubbub_string *data, void **result)
+hubbub_error create_comment(void *ctx, const hubbub_string *data, void **result)
{
node_t *node = calloc(1, sizeof *node);
@@ -174,10 +164,10 @@ int create_comment(void *ctx, const hubbub_string *data, void **result)
*result = node;
- return 0;
+ return HUBBUB_OK;
}
-int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result)
+hubbub_error create_doctype(void *ctx, const hubbub_doctype *doctype, void **result)
{
node_t *node = calloc(1, sizeof *node);
@@ -202,10 +192,10 @@ int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result)
*result = node;
- return 0;
+ return HUBBUB_OK;
}
-int create_element(void *ctx, const hubbub_tag *tag, void **result)
+hubbub_error create_element(void *ctx, const hubbub_tag *tag, void **result)
{
node_t *node = calloc(1, sizeof *node);
@@ -241,10 +231,10 @@ int create_element(void *ctx, const hubbub_tag *tag, void **result)
*result = node;
- return 0;
+ return HUBBUB_OK;
}
-int create_text(void *ctx, const hubbub_string *data, void **result)
+hubbub_error create_text(void *ctx, const hubbub_string *data, void **result)
{
node_t *node = calloc(1, sizeof *node);
@@ -255,26 +245,26 @@ int create_text(void *ctx, const hubbub_string *data, void **result)
*result = node;
- return 0;
+ return HUBBUB_OK;
}
-int ref_node(void *ctx, void *node)
+hubbub_error ref_node(void *ctx, void *node)
{
UNUSED(ctx);
UNUSED(node);
- return 0;
+ return HUBBUB_OK;
}
-int unref_node(void *ctx, void *node)
+hubbub_error unref_node(void *ctx, void *node)
{
UNUSED(ctx);
UNUSED(node);
- return 0;
+ return HUBBUB_OK;
}
-int append_child(void *ctx, void *parent, void *child, void **result)
+hubbub_error append_child(void *ctx, void *parent, void *child, void **result)
{
node_t *tparent = parent;
node_t *tchild = child;
@@ -287,26 +277,25 @@ int append_child(void *ctx, void *parent, void *child, void **result)
tchild->next = tchild->prev = NULL;
*result = child;
-
if (parent == (void *)1) {
if (Document) {
insert = Document;
+ while (insert->next != NULL) {
+ insert = insert->next;
+ }
} else {
Document = tchild;
}
} else {
- if (tparent->child == NULL) {
- tparent->child = tchild;
+ if (tparent->child_first == NULL) {
+ tparent->child_first = tchild;
+ tparent->child_last = tchild;
} else {
- insert = tparent->child;
+ insert = tparent->child_last;
}
}
if (insert) {
- while (insert->next != NULL) {
- insert = insert->next;
- }
-
if (tchild->type == CHARACTER && insert->type == CHARACTER) {
insert->data.content = realloc(insert->data.content,
strlen(insert->data.content) +
@@ -316,14 +305,18 @@ int append_child(void *ctx, void *parent, void *child, void **result)
} else {
insert->next = tchild;
tchild->prev = insert;
+ if (insert->parent != NULL &&
+ insert->parent != (void *)1) {
+ insert->parent->child_last = insert;
+ }
}
}
- return 0;
+ return HUBBUB_OK;
}
/* insert 'child' before 'ref_child', under 'parent' */
-int insert_before(void *ctx, void *parent, void *child, void *ref_child,
+hubbub_error insert_before(void *ctx, void *parent, void *child, void *ref_child,
void **result)
{
node_t *tparent = parent;
@@ -352,26 +345,31 @@ int insert_before(void *ctx, void *parent, void *child, void *ref_child,
if (tchild->prev)
tchild->prev->next = tchild;
else
- tparent->child = tchild;
+ tparent->child_first = tchild;
*result = child;
}
- return 0;
+ return HUBBUB_OK;
}
-int remove_child(void *ctx, void *parent, void *child, void **result)
+hubbub_error remove_child(void *ctx, void *parent, void *child, void **result)
{
node_t *tparent = parent;
node_t *tchild = child;
UNUSED(ctx);
- assert(tparent->child);
+ assert(tparent->child_last);
+ assert(tparent->child_first);
assert(tchild->parent == tparent);
- if (tchild->parent->child == tchild) {
- tchild->parent->child = tchild->next;
+ if (tchild->parent->child_first == tchild) {
+ tchild->parent->child_first = tchild->next;
+ }
+
+ if (tchild->parent->child_last == tchild) {
+ tchild->parent->child_last = tchild->prev;
}
if (tchild->prev)
@@ -385,10 +383,10 @@ int remove_child(void *ctx, void *parent, void *child, void **result)
*result = child;
- return 0;
+ return HUBBUB_OK;
}
-int clone_node(void *ctx, void *node, bool deep, void **result)
+hubbub_error clone_node(void *ctx, void *node, bool deep, void **result)
{
node_t *old_node = node;
node_t *new_node = calloc(1, sizeof *new_node);
@@ -398,12 +396,8 @@ int clone_node(void *ctx, void *node, bool deep, void **result)
*new_node = *old_node;
*result = new_node;
- new_node->child = new_node->parent =
- new_node->next = new_node->prev =
- NULL;
-
if (deep == false)
- return 0;
+ return HUBBUB_OK;
if (old_node->next) {
void *n;
@@ -412,85 +406,92 @@ int clone_node(void *ctx, void *node, bool deep, void **result)
new_node->next = n;
new_node->next->prev = new_node;
+
+ new_node->parent = old_node->parent;
+ if (new_node->parent != NULL && new_node->parent != (void *)1) {
+ new_node->parent->child_last = new_node;
+ }
}
- if (old_node->child) {
+ if (old_node->child_first) {
void *n;
- clone_node(ctx, old_node->child, true, &n);
+ clone_node(ctx, old_node->child_first, true, &n);
- new_node->child = n;
- new_node->child->parent = new_node;
+ if (new_node)
+ new_node->child_last = n;
+ new_node->child_first = n;
+ new_node->child_first->parent = new_node;
}
- return 0;
+ return HUBBUB_OK;
}
/* Take all of the child nodes of "node" and append them to "new_parent" */
-int reparent_children(void *ctx, void *node, void *new_parent)
+hubbub_error reparent_children(void *ctx, void *node, void *new_parent)
{
node_t *parent = new_parent;
node_t *old_parent = node;
node_t *insert;
- node_t *kids;
+ node_t *kids_first;
+ node_t *kids_last;
UNUSED(ctx);
- kids = old_parent->child;
- if (!kids) return 0;
+ kids_first = old_parent->child_first;
+ kids_last = old_parent->child_last;
+ if (!kids_first) return HUBBUB_OK;
- old_parent->child = NULL;
+ old_parent->child_first = NULL;
+ old_parent->child_last = NULL;
- insert = parent->child;
+ insert = parent->child_last;
if (!insert) {
- parent->child = kids;
+ parent->child_first = kids_first;
} else {
- while (insert->next != NULL) {
- insert = insert->next;
- }
-
- insert->next = kids;
- kids->prev = insert;
+ insert->next = kids_first;
+ kids_first->prev = insert;
}
+ parent->child_last = kids_last;
- while (kids) {
- kids->parent = parent;
- kids = kids->next;
+ while (kids_first) {
+ kids_first->parent = parent;
+ kids_first = kids_first->next;
}
- return 0;
+ return HUBBUB_OK;
}
-int get_parent(void *ctx, void *node, bool element_only, void **result)
+hubbub_error get_parent(void *ctx, void *node, bool element_only, void **result)
{
UNUSED(ctx);
UNUSED(element_only);
*result = ((node_t *)node)->parent;
- return 0;
+ return HUBBUB_OK;
}
-int has_children(void *ctx, void *node, bool *result)
+hubbub_error has_children(void *ctx, void *node, bool *result)
{
UNUSED(ctx);
- *result = ((node_t *)node)->child ? true : false;
+ *result = ((node_t *)node)->child_first ? true : false;
- return 0;
+ return HUBBUB_OK;
}
-int form_associate(void *ctx, void *form, void *node)
+hubbub_error form_associate(void *ctx, void *form, void *node)
{
UNUSED(ctx);
UNUSED(form);
UNUSED(node);
- return 0;
+ return HUBBUB_OK;
}
-int add_attributes(void *ctx, void *vnode,
+hubbub_error add_attributes(void *ctx, void *vnode,
const hubbub_attribute *attributes, uint32_t n_attributes)
{
node_t *node = vnode;
@@ -521,13 +522,21 @@ int add_attributes(void *ctx, void *vnode,
}
- return 0;
+ return HUBBUB_OK;
}
-int set_quirks_mode(void *ctx, hubbub_quirks_mode mode)
+hubbub_error set_quirks_mode(void *ctx, hubbub_quirks_mode mode)
{
UNUSED(ctx);
UNUSED(mode);
- return 0;
+ return HUBBUB_OK;
+}
+
+hubbub_error change_encoding(void *ctx, const char *charset)
+{
+ UNUSED(ctx);
+ UNUSED(charset);
+
+ return HUBBUB_OK;
}
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 2d9c4ed..78eeee3 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -271,7 +271,7 @@ static inline hubbub_error emit_current_comment(hubbub_tokeniser *tokeniser);
static inline hubbub_error emit_current_doctype(hubbub_tokeniser *tokeniser,
bool force_quirks);
static hubbub_error hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser,
- hubbub_token *token);
+ const hubbub_token *token);
/**
* Create a hubbub tokeniser
@@ -3365,7 +3365,7 @@ hubbub_error emit_current_doctype(hubbub_tokeniser *tokeniser,
* \param token Token to emit
*/
hubbub_error hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser,
- hubbub_token *token)
+ const hubbub_token *token)
{
hubbub_error err = HUBBUB_OK;
diff --git a/src/treebuilder/Makefile b/src/treebuilder/Makefile
index 31feae1..77459f1 100644
--- a/src/treebuilder/Makefile
+++ b/src/treebuilder/Makefile
@@ -6,6 +6,16 @@ DIR_SOURCES := treebuilder.c \
in_cell.c in_select.c in_select_in_table.c \
in_foreign_content.c after_body.c in_frameset.c \
after_frameset.c after_after_body.c after_after_frameset.c \
- generic_rcdata.c
+ generic_rcdata.c element-type.c
+
+$(DIR)autogenerated-element-type.c: $(DIR)element-type.gperf
+ $(VQ)$(ECHO) " GPERF: $<"
+ $(Q)gperf --output-file=$@.tmp $<
+ $(Q)$(SED) -e 's/^\(const struct element_type_map\)/static \1/' $@.tmp >$@
+ $(Q)$(RM) $@.tmp
+
+PRE_TARGETS := $(DIR)autogenerated-element-type.c
+
+CLEAN_ITEMS := $(DIR)autogenerated-element-type.c
include $(NSBUILD)/Makefile.subdir
diff --git a/src/treebuilder/element-type.c b/src/treebuilder/element-type.c
new file mode 100644
index 0000000..7e2772c
--- /dev/null
+++ b/src/treebuilder/element-type.c
@@ -0,0 +1,49 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2021 Michael Drake <tlsa@netsurf-browser.org>
+ */
+
+#include "treebuilder/element-type.h"
+
+/* Auto-generated by `gperf`. */
+#include "treebuilder/autogenerated-element-type.c"
+
+/* Exported function, documented in element-type.h */
+element_type element_type_from_name(
+ hubbub_treebuilder *treebuilder,
+ const hubbub_string *tag_name)
+{
+ const struct element_type_map *value;
+
+ UNUSED(treebuilder);
+
+ value = hubbub_element_type_generated_lookup(
+ (const char *)tag_name->ptr,
+ tag_name->len);
+ if (value == NULL) {
+ return UNKNOWN;
+ }
+
+ return value->type;
+}
+
+/**
+ * Convert an element type to a name
+ *
+ * \param type The element type
+ * \return Pointer to name
+ */
+const char *element_type_to_name(element_type type)
+{
+ size_t i;
+
+ for (i = 0; i < sizeof(wordlist) / sizeof(wordlist[0]); i++) {
+ if (wordlist[i].type == type) {
+ return wordlist[i].name;
+ }
+ }
+
+ return "UNKNOWN";
+}
diff --git a/src/treebuilder/element-type.gperf b/src/treebuilder/element-type.gperf
new file mode 100644
index 0000000..c0980d3
--- /dev/null
+++ b/src/treebuilder/element-type.gperf
@@ -0,0 +1,132 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2021 Michael Drake <tlsa@netsurf-browser.org>
+ */
+
+%language=ANSI-C
+%compare-strncmp
+%readonly-tables
+%global-table
+%ignore-case
+%struct-type
+%switch=1
+%define hash-function-name hubbub_element_type_generated_hash
+%define lookup-function-name hubbub_element_type_generated_lookup
+
+%{
+#include <string.h>
+
+#include "treebuilder/element-type.h"
+
+%}
+
+struct element_type_map;
+%%
+a, A
+address, ADDRESS
+annotation-xml, ANNOTATION_XML
+applet, APPLET
+area, AREA
+article, ARTICLE
+aside, ASIDE
+b, B
+base, BASE
+basefont, BASEFONT
+bgsound, BGSOUND
+big, BIG
+blockquote, BLOCKQUOTE
+body, BODY
+br, BR
+button, BUTTON
+caption, CAPTION
+center, CENTER
+col, COL
+colgroup, COLGROUP
+command, COMMAND
+dd, DD
+desc, DESC
+details, DETAILS
+dialog, DIALOG
+dir, DIR
+div, DIV
+dl, DL
+dt, DT
+em, EM
+embed, EMBED
+fieldset, FIELDSET
+figcaption, FIGCAPTION
+figure, FIGURE
+font, FONT
+footer, FOOTER
+foreignobject, FOREIGNOBJECT
+form, FORM
+frame, FRAME
+frameset, FRAMESET
+h1, H1
+h2, H2
+h3, H3
+h4, H4
+h5, H5
+h6, H6
+head, HEAD
+hr, HR
+html, HTML
+i, I
+iframe, IFRAME
+image, IMAGE
+img, IMG
+input, INPUT
+isindex, ISINDEX
+li, LI
+link, LINK
+listing, LISTING
+malignmark, MALIGNMARK
+marquee, MARQUEE
+math, MATH
+menu, MENU
+meta, META
+mglyph, MGLYPH
+mi, MI
+mn, MN
+mo, MO
+ms, MS
+mtext, MTEXT
+nobr, NOBR
+noembed, NOEMBED
+noframes, NOFRAMES
+noscript, NOSCRIPT
+object, OBJECT
+ol, OL
+optgroup, OPTGROUP
+option, OPTION
+output, OUTPUT
+p, P
+param, PARAM
+plaintext, PLAINTEXT
+pre, PRE
+s, S
+script, SCRIPT
+select, SELECT
+small, SMALL
+spacer, SPACER
+strike, STRIKE
+strong, STRONG
+style, STYLE
+summary, SUMMARY
+svg, SVG
+table, TABLE
+tbody, TBODY
+td, TD
+textarea, TEXTAREA
+tfoot, TFOOT
+th, TH
+thead, THEAD
+title, TITLE
+tr, TR
+tt, TT
+u, U
+ul, UL
+wbr, WBR
+xmp, XMP
diff --git a/src/treebuilder/element-type.h b/src/treebuilder/element-type.h
new file mode 100644
index 0000000..75612fd
--- /dev/null
+++ b/src/treebuilder/element-type.h
@@ -0,0 +1,65 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_treebuilder_element_type_h_
+#define hubbub_treebuilder_element_type_h_
+
+#include "treebuilder/treebuilder.h"
+#include "utils/utils.h"
+
+typedef enum
+{
+/* Special */
+ ADDRESS, AREA, ARTICLE, ASIDE, BASE, BASEFONT, BGSOUND, BLOCKQUOTE,
+ BODY, BR, CENTER, COL, COLGROUP, COMMAND, DATAGRID, DD, DETAILS,
+ DIALOG, DIR, DIV, DL, DT, EMBED, FIELDSET, FIGCAPTION, FIGURE, FOOTER,
+ FORM, FRAME, FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HEADER, HR, IFRAME,
+ IMAGE, IMG, INPUT, ISINDEX, LI, LINK, LISTING, MAIN, MENU, META, NAV,
+ NOEMBED, NOFRAMES, NOSCRIPT, OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT,
+ PRE, SCRIPT, SECTION, SELECT, SPACER, STYLE, SUMMARY, TBODY, TEXTAREA,
+ TFOOT, THEAD, TITLE, TR, UL, WBR,
+/* Scoping */
+ APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH,
+/* Formatting */
+ A, B, BIG, CODE, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U,
+/* Phrasing */
+ /**< \todo Enumerate phrasing elements */
+ LABEL, OUTPUT, RP, RT, RUBY, SPAN, SUB, SUP, VAR, XMP,
+/* MathML */
+ MATH, MGLYPH, MALIGNMARK, MI, MO, MN, MS, MTEXT, ANNOTATION_XML,
+/* SVG */
+ SVG, FOREIGNOBJECT, /* foreignobject is scoping, but only in SVG ns */
+ DESC,
+ UNKNOWN
+} element_type;
+
+struct element_type_map {
+ const char *name;
+ element_type type;
+};
+
+/**
+ * Convert an element name into an element type
+ *
+ * \param treebuilder The treebuilder instance
+ * \param tag_name The tag name to consider
+ * \return The corresponding element type
+ */
+element_type element_type_from_name(
+ hubbub_treebuilder *treebuilder,
+ const hubbub_string *tag_name);
+
+/**
+ * Convert an element type to a name
+ *
+ * \param type The element type
+ * \return Pointer to name
+ */
+const char *element_type_to_name(element_type type);
+
+#endif
+
diff --git a/src/treebuilder/in_body.c b/src/treebuilder/in_body.c
index 5157e66..0e4184c 100644
--- a/src/treebuilder/in_body.c
+++ b/src/treebuilder/in_body.c
@@ -267,9 +267,10 @@ hubbub_error process_start_tag(hubbub_treebuilder *treebuilder,
type == DATAGRID || type == DETAILS ||
type == DIALOG || type == DIR ||
type == DIV || type == DL || type == FIELDSET ||
- type == FIGURE || type == FOOTER ||
- type == HEADER || type == MENU || type == NAV ||
- type == OL || type == P || type == SECTION ||
+ type == FIGCAPTION || type == FIGURE ||
+ type == FOOTER || type == HEADER || type == MAIN ||
+ type == MENU || type == NAV || type == OL ||
+ type == P || type == SECTION || type == SUMMARY ||
type == UL) {
err = process_container_in_body(treebuilder, token);
} else if (type == H1 || type == H2 || type == H3 ||
@@ -430,12 +431,14 @@ hubbub_error process_end_tag(hubbub_treebuilder *treebuilder,
* that wasn't ignored, reprocess this token */
err = process_0body_in_body(treebuilder);
} else if (type == ADDRESS || type == ARTICLE || type == ASIDE ||
- type == BLOCKQUOTE || type == CENTER || type == DIR ||
- type == DATAGRID || type == DIV || type == DL ||
- type == FIELDSET || type == FOOTER || type == HEADER ||
- type == LISTING || type == MENU || type == NAV ||
- type == OL || type == PRE || type == SECTION ||
- type == UL) {
+ type == BLOCKQUOTE || type == CENTER ||
+ type == DETAILS || type == DIALOG || type == DIR ||
+ type == DATAGRID || type == DIV || type == DL ||
+ type == FIELDSET || type == FIGCAPTION ||
+ type == FIGURE || type == FOOTER || type == HEADER ||
+ type == LISTING || type == MAIN|| type == MENU ||
+ type == NAV || type == OL || type == PRE ||
+ type == SECTION || type == SUMMARY || type == UL) {
err = process_0container_in_body(treebuilder, type);
} else if (type == FORM) {
err = process_0form_in_body(treebuilder);
diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h
index 58c21d6..debc33e 100644
--- a/src/treebuilder/internal.h
+++ b/src/treebuilder/internal.h
@@ -9,32 +9,7 @@
#define hubbub_treebuilder_internal_h_
#include "treebuilder/treebuilder.h"
-
-typedef enum
-{
-/* Special */
- ADDRESS, AREA, ARTICLE, ASIDE, BASE, BASEFONT, BGSOUND, BLOCKQUOTE,
- BODY, BR, CENTER, COL, COLGROUP, COMMAND, DATAGRID, DD, DETAILS,
- DIALOG, DIR, DIV, DL, DT, EMBED, FIELDSET, FIGURE, FOOTER, FORM, FRAME,
- FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HEADER, HR, IFRAME, IMAGE, IMG,
- INPUT, ISINDEX, LI, LINK, LISTING, MENU, META, NAV, NOEMBED, NOFRAMES,
- NOSCRIPT, OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT, PRE, SCRIPT,
- SECTION, SELECT, SPACER, STYLE, TBODY, TEXTAREA, TFOOT, THEAD, TITLE,
- TR, UL, WBR,
-/* Scoping */
- APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH,
-/* Formatting */
- A, B, BIG, CODE, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U,
-/* Phrasing */
- /**< \todo Enumerate phrasing elements */
- LABEL, OUTPUT, RP, RT, RUBY, SPAN, SUB, SUP, VAR, XMP,
-/* MathML */
- MATH, MGLYPH, MALIGNMARK, MI, MO, MN, MS, MTEXT, ANNOTATION_XML,
-/* SVG */
- SVG, FOREIGNOBJECT, /* foreignobject is scoping, but only in SVG ns */
- DESC,
- UNKNOWN
-} element_type;
+#include "treebuilder/element-type.h"
/**
* Item on the element stack
@@ -153,9 +128,6 @@ hubbub_error append_text(hubbub_treebuilder *treebuilder,
const hubbub_string *string);
hubbub_error complete_script(hubbub_treebuilder *treebuilder);
-element_type element_type_from_name(hubbub_treebuilder *treebuilder,
- const hubbub_string *tag_name);
-
bool is_special_element(element_type type);
bool is_scoping_element(element_type type);
bool is_formatting_element(element_type type);
diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c
index b67aa53..f11875e 100644
--- a/src/treebuilder/treebuilder.c
+++ b/src/treebuilder/treebuilder.c
@@ -17,69 +17,6 @@
#include "utils/utils.h"
#include "utils/string.h"
-
-#define S(x) x, SLEN(x)
-
-static const struct {
- const char *name;
- size_t len;
- element_type type;
-} name_type_map[] = {
- { S("address"), ADDRESS }, { S("area"), AREA },
- { S("base"), BASE }, { S("basefont"), BASEFONT },
- { S("bgsound"), BGSOUND }, { S("blockquote"), BLOCKQUOTE },
- { S("body"), BODY }, { S("br"), BR },
- { S("center"), CENTER }, { S("col"), COL },
- { S("colgroup"), COLGROUP }, { S("dd"), DD },
- { S("dir"), DIR }, { S("div"), DIV },
- { S("dl"), DL }, { S("dt"), DT },
- { S("embed"), EMBED }, { S("fieldset"), FIELDSET },
- { S("form"), FORM }, { S("frame"), FRAME },
- { S("frameset"), FRAMESET }, { S("h1"), H1 },
- { S("h2"), H2 }, { S("h3"), H3 },
- { S("h4"), H4 }, { S("h5"), H5 },
- { S("h6"), H6 }, { S("head"), HEAD },
- { S("hr"), HR }, { S("iframe"), IFRAME },
- { S("image"), IMAGE }, { S("img"), IMG },
- { S("input"), INPUT }, { S("isindex"), ISINDEX },
- { S("li"), LI }, { S("link"), LINK },
- { S("listing"), LISTING },
- { S("menu"), MENU },
- { S("meta"), META }, { S("noembed"), NOEMBED },
- { S("noframes"), NOFRAMES }, { S("noscript"), NOSCRIPT },
- { S("ol"), OL }, { S("optgroup"), OPTGROUP },
- { S("option"), OPTION }, { S("output"), OUTPUT },
- { S("p"), P }, { S("param"), PARAM },
- { S("plaintext"), PLAINTEXT }, { S("pre"), PRE },
- { S("script"), SCRIPT }, { S("select"), SELECT },
- { S("spacer"), SPACER }, { S("style"), STYLE },
- { S("tbody"), TBODY }, { S("textarea"), TEXTAREA },
- { S("tfoot"), TFOOT }, { S("thead"), THEAD },
- { S("title"), TITLE }, { S("tr"), TR },
- { S("ul"), UL }, { S("wbr"), WBR },
- { S("applet"), APPLET }, { S("button"), BUTTON },
- { S("caption"), CAPTION }, { S("html"), HTML },
- { S("marquee"), MARQUEE }, { S("object"), OBJECT },
- { S("table"), TABLE }, { S("td"), TD },
- { S("th"), TH },
- { S("a"), A }, { S("b"), B },
- { S("big"), BIG }, { S("em"), EM },
- { S("font"), FONT }, { S("i"), I },
- { S("nobr"), NOBR }, { S("s"), S },
- { S("small"), SMALL }, { S("strike"), STRIKE },
- { S("strong"), STRONG }, { S("tt"), TT },
- { S("u"), U }, { S("xmp"), XMP },
-
- { S("math"), MATH }, { S("mglyph"), MGLYPH },
- { S("malignmark"), MALIGNMARK },
- { S("mi"), MI }, { S("mo"), MO },
- { S("mn"), MN }, { S("ms"), MS },
- { S("mtext"), MTEXT }, { S("annotation-xml"), ANNOTATION_XML },
-
- { S("svg"), SVG }, { S("desc"), DESC },
- { S("foreignobject"), FOREIGNOBJECT },
-};
-
static bool is_form_associated(element_type type);
/**
@@ -978,36 +915,6 @@ hubbub_error append_text(hubbub_treebuilder *treebuilder,
}
/**
- * Convert an element name into an element type
- *
- * \param treebuilder The treebuilder instance
- * \param tag_name The tag name to consider
- * \return The corresponding element type
- */
-element_type element_type_from_name(hubbub_treebuilder *treebuilder,
- const hubbub_string *tag_name)
-{
- const uint8_t *name = tag_name->ptr;
- size_t len = tag_name->len;
- uint32_t i;
-
- UNUSED(treebuilder);
-
- /** \todo optimise this */
-
- for (i = 0; i < N_ELEMENTS(name_type_map); i++) {
- if (name_type_map[i].len != len)
- continue;
-
- if (strncasecmp(name_type_map[i].name,
- (const char *) name, len) == 0)
- return name_type_map[i].type;
- }
-
- return UNKNOWN;
-}
-
-/**
* Determine if a node is a special element
*
* \param type Node type to consider
@@ -1391,15 +1298,21 @@ hubbub_error formatting_list_remove(hubbub_treebuilder *treebuilder,
*node = entry->details.node;
*stack_index = entry->stack_index;
- if (entry->prev == NULL)
+ if (entry->prev == NULL) {
+ assert(treebuilder->context.formatting_list == entry);
treebuilder->context.formatting_list = entry->next;
- else
+ } else {
+ assert(treebuilder->context.formatting_list != entry);
entry->prev->next = entry->next;
+ }
- if (entry->next == NULL)
+ if (entry->next == NULL) {
+ assert(treebuilder->context.formatting_list_end == entry);
treebuilder->context.formatting_list_end = entry->prev;
- else
+ } else {
+ assert(treebuilder->context.formatting_list_end != entry);
entry->next->prev = entry->prev;
+ }
free(entry);
@@ -1484,24 +1397,5 @@ void formatting_list_dump(hubbub_treebuilder *treebuilder, FILE *fp)
}
}
-/**
- * Convert an element type to a name
- *
- * \param type The element type
- * \return Pointer to name
- */
-const char *element_type_to_name(element_type type)
-{
- size_t i;
-
- for (i = 0;
- i < sizeof(name_type_map) / sizeof(name_type_map[0]);
- i++) {
- if (name_type_map[i].type == type)
- return name_type_map[i].name;
- }
-
- return "UNKNOWN";
-}
#endif
diff --git a/test/tokeniser2.c b/test/tokeniser2.c
index c8ab9c0..f468d1c 100644
--- a/test/tokeniser2.c
+++ b/test/tokeniser2.c
@@ -48,7 +48,7 @@ int main(int argc, char **argv)
}
json = json_object_from_file(argv[1]);
- assert(!is_error(json));
+ assert(json != NULL);
assert(strcmp((char *) ((json_object_get_object(json)->head)->k),
"tests") == 0);
@@ -57,7 +57,7 @@ int main(int argc, char **argv)
tests = json_object_get_array((struct json_object *)
(json_object_get_object(json)->head)->v);
- for (i = 0; i < array_list_length(tests); i++) {
+ for (i = 0; i < (int)array_list_length(tests); i++) {
/* Get test */
struct json_object *test =
(struct json_object *) array_list_get_idx(tests, i);
@@ -216,7 +216,7 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
struct json_object *obj = NULL;
struct array_list *items;
- for (; ctx->output_index < array_list_length(ctx->output);
+ for (; ctx->output_index < (int)array_list_length(ctx->output);
ctx->output_index++) {
/* Get object for index */
obj = (struct json_object *)
@@ -236,11 +236,11 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
* produced more tokens than expected. We allow for the generation
* of a terminating EOF token, however. */
assert("too many tokens" &&
- (ctx->output_index < array_list_length(ctx->output) ||
+ (ctx->output_index < (int)array_list_length(ctx->output) ||
token->type == HUBBUB_TOKEN_EOF));
/* Got a terminating EOF -- no error */
- if (ctx->output_index >= array_list_length(ctx->output))
+ if (ctx->output_index >= (int)array_list_length(ctx->output))
return HUBBUB_OK;
/* Now increment the output index so we don't re-expect this token */
@@ -438,7 +438,7 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
/* Expected token only contained part of the data
* Calculate how much is left, then try again with
* the next expected token */
- hubbub_token t;
+ hubbub_token t = *token;
t.type = HUBBUB_TOKEN_CHARACTER;
t.data.character.ptr += len;
@@ -446,7 +446,7 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
ctx->char_off = 0;
- token_handler(&t, pw);
+ return token_handler(&t, pw);
} else if (strlen(expstr + ctx->char_off) >
token->data.character.len) {
/* Tokeniser output only contained part of the data
diff --git a/test/tokeniser3.c b/test/tokeniser3.c
index e33d018..eb921ce 100644
--- a/test/tokeniser3.c
+++ b/test/tokeniser3.c
@@ -46,7 +46,7 @@ int main(int argc, char **argv)
}
json = json_object_from_file(argv[1]);
- assert(!is_error(json));
+ assert(json != NULL);
assert(strcmp((char *) ((json_object_get_object(json)->head)->k),
"tests") == 0);
@@ -55,7 +55,7 @@ int main(int argc, char **argv)
tests = json_object_get_array((struct json_object *)
(json_object_get_object(json)->head)->v);
- for (i = 0; i < array_list_length(tests); i++) {
+ for (i = 0; i < (int)array_list_length(tests); i++) {
/* Get test */
struct json_object *test =
(struct json_object *) array_list_get_idx(tests, i);
@@ -221,7 +221,7 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
struct json_object *obj = NULL;
struct array_list *items;
- for (; ctx->output_index < array_list_length(ctx->output);
+ for (; ctx->output_index < (int)array_list_length(ctx->output);
ctx->output_index++) {
/* Get object for index */
obj = (struct json_object *)
@@ -241,11 +241,11 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
* produced more tokens than expected. We allow for the generation
* of a terminating EOF token, however. */
assert("too many tokens" &&
- (ctx->output_index < array_list_length(ctx->output) ||
+ (ctx->output_index < (int)array_list_length(ctx->output) ||
token->type == HUBBUB_TOKEN_EOF));
/* Got a terminating EOF -- no error */
- if (ctx->output_index >= array_list_length(ctx->output))
+ if (ctx->output_index >= (int)array_list_length(ctx->output))
return HUBBUB_OK;
/* Now increment the output index so we don't re-expect this token */
@@ -447,7 +447,7 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
/* Expected token only contained part of the data
* Calculate how much is left, then try again with
* the next expected token */
- hubbub_token t;
+ hubbub_token t = *token;
t.type = HUBBUB_TOKEN_CHARACTER;
t.data.character.ptr += len;
@@ -455,7 +455,7 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
ctx->char_off = 0;
- token_handler(&t, pw);
+ return token_handler(&t, pw);
} else if (strlen(expstr + ctx->char_off) >
token->data.character.len) {
/* Tokeniser output only contained part of the data