summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2008-03-11 22:03:28 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2008-03-11 22:03:28 +0000
commit04e5f38a5e8e471db41dd010e2c7c0ae9b016601 (patch)
tree68fae9d93497ab8a697b0b5dc056d40928c96426
parent5b37e1f0bfd10da6720a6587e39d3dee45f77e8d (diff)
downloadlibhubbub-04e5f38a5e8e471db41dd010e2c7c0ae9b016601.tar.gz
libhubbub-04e5f38a5e8e471db41dd010e2c7c0ae9b016601.tar.bz2
More treebuilder (8.2.4.8)
Make tree_handler a pointer rather than value. Check for tree_handler's presence in hubbub_treebuilder_token_handler rather than scattering checks all over the treebuilder code. Add test driver (doesn't actually build a tree but will exercise the core code correctly and verify that the treebuilder code releases all the node references it gains) Enhance quirks mode reporting to distinguish between standards, limited, and full quirks modes. svn path=/trunk/hubbub/; revision=3939
-rw-r--r--include/hubbub/functypes.h2
-rw-r--r--include/hubbub/parser.h2
-rw-r--r--include/hubbub/types.h9
-rw-r--r--src/treebuilder/treebuilder.c363
-rw-r--r--src/treebuilder/treebuilder.h2
-rw-r--r--test/INDEX3
-rw-r--r--test/Makefile3
-rw-r--r--test/tree.c325
8 files changed, 596 insertions, 113 deletions
diff --git a/include/hubbub/functypes.h b/include/hubbub/functypes.h
index ee8c429..ce95303 100644
--- a/include/hubbub/functypes.h
+++ b/include/hubbub/functypes.h
@@ -95,7 +95,7 @@ typedef int (*hubbub_tree_clone_node)(void *ctx, void *node, bool deep,
/**
* Type of tree quirks mode notification function
*/
-typedef int (*hubbub_tree_set_quirks_mode)(void *ctx, bool quirky);
+typedef int (*hubbub_tree_set_quirks_mode)(void *ctx, hubbub_quirks_mode mode);
#endif
diff --git a/include/hubbub/parser.h b/include/hubbub/parser.h
index 5565f63..f7d8e1e 100644
--- a/include/hubbub/parser.h
+++ b/include/hubbub/parser.h
@@ -52,7 +52,7 @@ typedef union hubbub_parser_optparams {
hubbub_content_model model;
} content_model;
- hubbub_tree_handler tree_handler;
+ hubbub_tree_handler *tree_handler;
void *document_node;
} hubbub_parser_optparams;
diff --git a/include/hubbub/types.h b/include/hubbub/types.h
index 57518ae..922bdbb 100644
--- a/include/hubbub/types.h
+++ b/include/hubbub/types.h
@@ -33,6 +33,15 @@ typedef enum hubbub_content_model {
} hubbub_content_model;
/**
+ * Quirks mode flag
+ */
+typedef enum hubbub_quirks_mode {
+ HUBBUB_QUIRKS_MODE_NONE,
+ HUBBUB_QUIRKS_MODE_LIMITED,
+ HUBBUB_QUIRKS_MODE_FULL
+} hubbub_quirks_mode;
+
+/**
* Type of an emitted token
*/
typedef enum hubbub_token_type {
diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c
index 6c92bd9..deb2493 100644
--- a/src/treebuilder/treebuilder.c
+++ b/src/treebuilder/treebuilder.c
@@ -5,6 +5,7 @@
* Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
*/
+#include <assert.h>
#include <string.h>
#include "treebuilder/treebuilder.h"
@@ -13,7 +14,7 @@
typedef enum
{
INITIAL,
- BEFORE_DOCTYPE,
+ BEFORE_HTML,
BEFORE_HEAD,
IN_HEAD,
IN_HEAD_NOSCRIPT,
@@ -99,7 +100,7 @@ struct hubbub_treebuilder
hubbub_treebuilder_context context;
- hubbub_tree_handler tree_handler;
+ hubbub_tree_handler *tree_handler;
hubbub_buffer_handler buffer_handler;
void *buffer_pw;
@@ -116,6 +117,9 @@ static void hubbub_treebuilder_buffer_handler(const uint8_t *data,
static void hubbub_treebuilder_token_handler(const hubbub_token *token,
void *pw);
+static bool handle_initial(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+
/** \todo Uncomment the static keyword here once these functions are actually used */
/*static*/ bool element_in_scope(hubbub_treebuilder *treebuilder,
@@ -178,7 +182,7 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser,
tb->input_buffer = NULL;
tb->input_buffer_len = 0;
- memset(&tb->tree_handler, 0, sizeof(hubbub_tree_handler));
+ tb->tree_handler = NULL;
memset(&tb->context, 0, sizeof(hubbub_treebuilder_context));
tb->context.mode = INITIAL;
@@ -191,6 +195,10 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser,
return NULL;
}
tb->context.stack_alloc = ELEMENT_STACK_CHUNK;
+ /* We rely on HTML not being equal to zero to determine
+ * if the first item in the stack is in use. Assert this here. */
+ assert(HTML != 0);
+ tb->context.element_stack[0].type = 0;
tb->buffer_handler = NULL;
tb->buffer_pw = NULL;
@@ -250,34 +258,36 @@ void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder)
HUBBUB_TOKENISER_TOKEN_HANDLER, &tokparams);
/* Clean up context */
- if (treebuilder->tree_handler.unref_node != NULL) {
+ if (treebuilder->tree_handler != NULL) {
if (treebuilder->context.head_element != NULL) {
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
treebuilder->context.head_element);
}
if (treebuilder->context.form_element != NULL) {
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
treebuilder->context.form_element);
}
if (treebuilder->context.document != NULL) {
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
treebuilder->context.document);
}
for (uint32_t n = treebuilder->context.current_node;
n > 0; n--) {
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
treebuilder->context.element_stack[n].node);
}
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
+ if (treebuilder->context.element_stack[0].type == HTML) {
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
treebuilder->context.element_stack[0].node);
+ }
}
treebuilder->alloc(treebuilder->context.element_stack, 0,
treebuilder->alloc_pw);
@@ -287,9 +297,9 @@ void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder)
entry = next) {
next = entry->next;
- if (treebuilder->tree_handler.unref_node != NULL) {
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
+ if (treebuilder->tree_handler != NULL) {
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
entry->details.node);
}
@@ -370,14 +380,167 @@ void hubbub_treebuilder_token_handler(const hubbub_token *token,
void *pw)
{
hubbub_treebuilder *treebuilder = (hubbub_treebuilder *) pw;
+ bool reprocess = true;
- UNUSED(token);
-
- /* Do nothing if we have no document node */
- if (treebuilder->context.document == NULL)
+ /* Do nothing if we have no document node or there's no tree handler */
+ if (treebuilder->context.document == NULL ||
+ treebuilder->tree_handler == NULL)
return;
- /** \todo implement this */
+ while (reprocess == true) {
+ switch (treebuilder->context.mode) {
+ case INITIAL:
+ reprocess = handle_initial(treebuilder, token);
+ break;
+ case BEFORE_HTML:
+ case BEFORE_HEAD:
+ case IN_HEAD:
+ case IN_HEAD_NOSCRIPT:
+ case AFTER_HEAD:
+ case IN_BODY:
+ case IN_TABLE:
+ case IN_CAPTION:
+ case IN_COLUMN_GROUP:
+ case IN_TABLE_BODY:
+ case IN_ROW:
+ case IN_CELL:
+ case IN_SELECT:
+ case IN_SELECT_IN_TABLE:
+ case AFTER_BODY:
+ case IN_FRAMESET:
+ case AFTER_FRAMESET:
+ case AFTER_AFTER_BODY:
+ case AFTER_AFTER_FRAMESET:
+ reprocess = false;
+ break;
+ }
+ }
+}
+
+/**
+ * Handle token in initial insertion mode
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to handle
+ * \return True to reprocess token, false otherwise
+ */
+bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token)
+{
+ bool reprocess = false;
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_CHARACTER:
+ {
+ const uint8_t *data = treebuilder->input_buffer +
+ token->data.character.data_off;
+ size_t len = token->data.character.len;
+ size_t c;
+
+ /** \todo UTF-16 */
+
+ for (c = 0; c < len; c++) {
+ if (data[c] != 0x09 && data[c] != 0x0A &&
+ data[c] != 0x0B && data[c] != 0x0C &&
+ data[c] != 0x20)
+ break;
+ }
+ /* Non-whitespace characters in token, so reprocess */
+ if (c != len) {
+ /* Update token data to strip leading whitespace */
+ ((hubbub_token *) token)->data.character.data_off +=
+ len - c;
+ ((hubbub_token *) token)->data.character.len -= c;
+
+ /** \todo parse error */
+
+ treebuilder->tree_handler->set_quirks_mode(
+ treebuilder->tree_handler->ctx,
+ HUBBUB_QUIRKS_MODE_FULL);
+
+ treebuilder->context.mode = BEFORE_HTML;
+ reprocess = true;
+ }
+ }
+ break;
+ case HUBBUB_TOKEN_COMMENT:
+ {
+ int success;
+ void *comment, *appended;
+
+ success = treebuilder->tree_handler->create_comment(
+ treebuilder->tree_handler->ctx,
+ &token->data.comment, &comment);
+ if (success != 0) {
+ /** \todo errors */
+ }
+
+ /* Append to Document node */
+ success = treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.document,
+ comment, &appended);
+ if (success != 0) {
+ /** \todo errors */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ comment);
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, appended);
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, comment);
+ }
+ break;
+ case HUBBUB_TOKEN_DOCTYPE:
+ {
+ int success;
+ void *doctype, *appended;
+
+ /** \todo need public and system ids from tokeniser */
+ success = treebuilder->tree_handler->create_doctype(
+ treebuilder->tree_handler->ctx,
+ &token->data.doctype.name,
+ NULL, NULL, &doctype);
+ if (success != 0) {
+ /** \todo errors */
+ }
+
+ /* Append to Document node */
+ success = treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.document,
+ doctype, &appended);
+ if (success != 0) {
+ /** \todo errors */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ doctype);
+ }
+
+ /** \todo doctype processing */
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, appended);
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, doctype);
+
+ treebuilder->context.mode = BEFORE_HTML;
+ }
+ break;
+ case HUBBUB_TOKEN_START_TAG:
+ case HUBBUB_TOKEN_END_TAG:
+ case HUBBUB_TOKEN_EOF:
+ /** \todo parse error */
+ treebuilder->tree_handler->set_quirks_mode(
+ treebuilder->tree_handler->ctx,
+ HUBBUB_QUIRKS_MODE_FULL);
+ treebuilder->context.mode = BEFORE_HTML;
+ reprocess = true;
+ break;
+ }
+
+ return reprocess;
}
/**
@@ -446,72 +609,66 @@ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder)
}
}
- if (treebuilder->tree_handler.clone_node != NULL &&
- treebuilder->tree_handler.append_child != NULL &&
- treebuilder->tree_handler.unref_node != NULL) {
- while (1) {
- int success;
- void *clone, *appended;
- element_type prev_type;
- void *prev_node;
- uint32_t prev_stack_index;
-
- success = treebuilder->tree_handler.clone_node(
- treebuilder->tree_handler.ctx,
- entry->details.node,
- false,
- &clone);
- if (success != 0) {
- /** \todo handle errors */
- return;
- }
-
- success = treebuilder->tree_handler.append_child(
- treebuilder->tree_handler.ctx,
- treebuilder->context.element_stack[
- treebuilder->context.
- current_node].node,
- clone,
- &appended);
- if (success != 0) {
- /** \todo handle errors */
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
- clone);
- return;
- }
-
- if (element_stack_push(treebuilder,
- entry->details.type,
- appended) == false) {
- /** \todo handle memory exhaustion */
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
- appended);
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
- clone);
- }
-
- if (formatting_list_replace(treebuilder,
- entry,
- entry->details.type, clone,
- treebuilder->context.current_node,
- &prev_type, &prev_node,
- &prev_stack_index) == false) {
- /** \todo handle errors */
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
- clone);
- }
-
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
- prev_node);
-
- if (entry->next != NULL)
- entry = entry->next;
+ while (1) {
+ int success;
+ void *clone, *appended;
+ element_type prev_type;
+ void *prev_node;
+ uint32_t prev_stack_index;
+
+ success = treebuilder->tree_handler->clone_node(
+ treebuilder->tree_handler->ctx,
+ entry->details.node,
+ false,
+ &clone);
+ if (success != 0) {
+ /** \todo handle errors */
+ return;
}
+
+ success = treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node,
+ clone,
+ &appended);
+ if (success != 0) {
+ /** \todo handle errors */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ clone);
+ return;
+ }
+
+ if (element_stack_push(treebuilder,
+ entry->details.type,
+ appended) == false) {
+ /** \todo handle memory exhaustion */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ appended);
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ clone);
+ }
+
+ if (formatting_list_replace(treebuilder, entry,
+ entry->details.type, clone,
+ treebuilder->context.current_node,
+ &prev_type, &prev_node,
+ &prev_stack_index) == false) {
+ /** \todo handle errors */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ clone);
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ prev_node);
+
+ if (entry->next != NULL)
+ entry = entry->next;
}
}
@@ -538,11 +695,9 @@ void clear_active_formatting_list_to_marker(hubbub_treebuilder *treebuilder)
/** \todo handle errors */
}
- if (treebuilder->tree_handler.unref_node != NULL) {
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
- node);
- }
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ node);
if (done == true)
break;
@@ -560,20 +715,14 @@ void insert_element(hubbub_treebuilder *treebuilder, const hubbub_tag *tag)
int success;
void *node, *appended;
- if (treebuilder->tree_handler.create_element == NULL ||
- treebuilder->tree_handler.append_child == NULL ||
- treebuilder->tree_handler.unref_node == NULL) {
- /** \todo errors */
- }
-
- success = treebuilder->tree_handler.create_element(
- treebuilder->tree_handler.ctx, tag, &node);
+ success = treebuilder->tree_handler->create_element(
+ treebuilder->tree_handler->ctx, tag, &node);
if (success != 0) {
/** \todo errors */
}
- success = treebuilder->tree_handler.append_child(
- treebuilder->tree_handler.ctx,
+ success = treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
treebuilder->context.element_stack[
treebuilder->context.current_node].node,
node, &appended);
@@ -581,7 +730,7 @@ void insert_element(hubbub_treebuilder *treebuilder, const hubbub_tag *tag)
/** \todo errors */
}
- treebuilder->tree_handler.unref_node(treebuilder->tree_handler.ctx,
+ treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
appended);
if (element_stack_push(treebuilder,
@@ -616,11 +765,9 @@ void close_implied_end_tags(hubbub_treebuilder *treebuilder,
/** \todo errors */
}
- if (treebuilder->tree_handler.unref_node != NULL) {
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
- node);
- }
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ node);
type = treebuilder->context.element_stack[
treebuilder->context.current_node].type;
diff --git a/src/treebuilder/treebuilder.h b/src/treebuilder/treebuilder.h
index 839f901..9d690b1 100644
--- a/src/treebuilder/treebuilder.h
+++ b/src/treebuilder/treebuilder.h
@@ -44,7 +44,7 @@ typedef union hubbub_treebuilder_optparams {
void *pw;
} error_handler;
- hubbub_tree_handler tree_handler;
+ hubbub_tree_handler *tree_handler;
void *document_node;
} hubbub_treebuilder_optparams;
diff --git a/test/INDEX b/test/INDEX
index d43829b..fc45511 100644
--- a/test/INDEX
+++ b/test/INDEX
@@ -13,4 +13,5 @@ inputstream Buffered input stream html
parser Public parser API html
parser-utf16 Public parser API (utf-16 internally) html
tokeniser HTML tokeniser html
-tokeniser2 HTML tokeniser (again) tokeniser2 \ No newline at end of file
+tokeniser2 HTML tokeniser (again) tokeniser2
+tree Treebuilding API html
diff --git a/test/Makefile b/test/Makefile
index bf4670c..6df42d7 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -32,7 +32,8 @@ DEBUG =
# Objects
OBJS = aliases cscodec csdetect dict entities filter hubbub \
- inputstream parser parser-utf16 tokeniser tokeniser2
+ inputstream parser parser-utf16 tokeniser tokeniser2 \
+ tree
OBJS += regression/cscodec-segv regression/filter-segv
.PHONY: clean debug export release setup test
diff --git a/test/tree.c b/test/tree.c
new file mode 100644
index 0000000..76cf438
--- /dev/null
+++ b/test/tree.c
@@ -0,0 +1,325 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <hubbub/hubbub.h>
+#include <hubbub/parser.h>
+#include <hubbub/tree.h>
+
+#include "utils/utils.h"
+
+#include "testutils.h"
+
+#define NODE_REF_CHUNK 1024
+static uint16_t *node_ref;
+static uintptr_t node_ref_alloc;
+static uintptr_t node_counter;
+
+#define GROW_REF \
+ if (node_counter >= node_ref_alloc) { \
+ uint16_t *temp = realloc(node_ref, \
+ (node_ref_alloc + NODE_REF_CHUNK) * \
+ sizeof(uint16_t)); \
+ if (temp == NULL) { \
+ printf("FAIL - no memory\n"); \
+ exit(1); \
+ } \
+ node_ref = temp; \
+ node_ref_alloc += NODE_REF_CHUNK; \
+ }
+
+static const uint8_t *pbuffer;
+
+static void buffer_handler(const uint8_t *buffer, size_t len, void *pw);
+static int create_comment(void *ctx, const hubbub_string *data, void **result);
+static int create_doctype(void *ctx, const hubbub_string *qname,
+ const hubbub_string *public_id, const hubbub_string *system_id,
+ void **result);
+static int create_element(void *ctx, const hubbub_tag *tag, void **result);
+static int create_text(void *ctx, const hubbub_string *data, void **result);
+static int ref_node(void *ctx, void *node);
+static int unref_node(void *ctx, void *node);
+static int append_child(void *ctx, void *parent, void *child, void **result);
+static int insert_before(void *ctx, void *parent, void *child, void *ref_child,
+ void **result);
+static int remove_child(void *ctx, void *parent, void *child, void **result);
+static int clone_node(void *ctx, void *node, bool deep, void **result);
+static int set_quirks_mode(void *ctx, hubbub_quirks_mode mode);
+
+static hubbub_tree_handler tree_handler = {
+ create_comment,
+ create_doctype,
+ create_element,
+ create_text,
+ ref_node,
+ unref_node,
+ append_child,
+ insert_before,
+ remove_child,
+ clone_node,
+ set_quirks_mode,
+ NULL
+};
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ hubbub_parser *parser;
+ hubbub_parser_optparams params;
+ FILE *fp;
+ size_t len, origlen;
+#define CHUNK_SIZE (4096)
+ uint8_t buf[CHUNK_SIZE];
+ const char *charset;
+ hubbub_charset_source cssource;
+ uint8_t *buffer;
+ bool passed = true;
+
+ if (argc != 3) {
+ printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
+ return 1;
+ }
+
+ node_ref = calloc(NODE_REF_CHUNK, sizeof(uint16_t));
+ if (node_ref == NULL) {
+ printf("Failed allocating node_ref\n");
+ return 1;
+ }
+ node_ref_alloc = NODE_REF_CHUNK;
+
+ /* Initialise library */
+ assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
+
+ parser = hubbub_parser_create("UTF-8", "UTF-8", myrealloc, NULL);
+ assert(parser != NULL);
+
+ params.buffer_handler.handler = buffer_handler;
+ params.buffer_handler.pw = NULL;
+ assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_BUFFER_HANDLER,
+ &params) == HUBBUB_OK);
+
+ params.tree_handler = &tree_handler;
+ assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TREE_HANDLER,
+ &params) == HUBBUB_OK);
+
+ params.document_node = (void *) ++node_counter;
+ ref_node(NULL, (void *) node_counter);
+ assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_DOCUMENT_NODE,
+ &params) == HUBBUB_OK);
+
+ fp = fopen(argv[2], "rb");
+ if (fp == NULL) {
+ printf("Failed opening %s\n", argv[2]);
+ return 1;
+ }
+
+ fseek(fp, 0, SEEK_END);
+ origlen = len = ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+
+ while (len >= CHUNK_SIZE) {
+ fread(buf, 1, CHUNK_SIZE, fp);
+
+ assert(hubbub_parser_parse_chunk(parser,
+ buf, CHUNK_SIZE) == HUBBUB_OK);
+
+ len -= CHUNK_SIZE;
+ }
+
+ if (len > 0) {
+ fread(buf, 1, len, fp);
+
+ assert(hubbub_parser_parse_chunk(parser,
+ buf, len) == HUBBUB_OK);
+
+ len = 0;
+
+ assert(hubbub_parser_completed(parser) == HUBBUB_OK);
+ }
+
+ fclose(fp);
+
+ charset = hubbub_parser_read_charset(parser, &cssource);
+
+ printf("Charset: %s (from %d)\n", charset, cssource);
+
+ assert(hubbub_parser_claim_buffer(parser, &buffer, &len) ==
+ HUBBUB_OK);
+
+ free(buffer);
+
+ hubbub_parser_destroy(parser);
+
+ assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
+
+ /* Ensure that all nodes have been released by the treebuilder */
+ for (uintptr_t n = 1; n <= node_counter; n++) {
+ if (node_ref[n] != 0) {
+ printf("%u still referenced (=%u)\n", n, node_ref[n]);
+ passed = false;
+ }
+ }
+
+ free(node_ref);
+
+ printf("%s\n", passed ? "PASS" : "FAIL");
+
+ return 0;
+}
+
+void buffer_handler(const uint8_t *buffer, size_t len, void *pw)
+{
+ UNUSED(len);
+ UNUSED(pw);
+
+ pbuffer = buffer;
+}
+
+int create_comment(void *ctx, const hubbub_string *data, void **result)
+{
+ printf("Creating (%u) [comment '%.*s']\n", ++node_counter,
+ data->len, pbuffer + data->data_off);
+
+ GROW_REF
+ node_ref[node_counter] = 0;
+
+ ref_node(ctx, (void *) node_counter);
+
+ *result = (void *) node_counter;
+
+ return 0;
+}
+
+int create_doctype(void *ctx, const hubbub_string *qname,
+ const hubbub_string *public_id, const hubbub_string *system_id,
+ void **result)
+{
+ UNUSED(public_id);
+ UNUSED(system_id);
+
+ printf("Creating (%u) [doctype '%.*s']\n", ++node_counter,
+ qname->len, pbuffer + qname->data_off);
+
+ GROW_REF
+ node_ref[node_counter] = 0;
+
+ ref_node(ctx, (void *) node_counter);
+
+ *result = (void *) node_counter;
+
+ return 0;
+}
+
+int create_element(void *ctx, const hubbub_tag *tag, void **result)
+{
+ printf("Creating (%u) [element '%.*s']\n", ++node_counter,
+ tag->name.len, pbuffer + tag->name.data_off);
+
+ GROW_REF
+ node_ref[node_counter] = 0;
+
+ ref_node(ctx, (void *) node_counter);
+
+ *result = (void *) node_counter;
+
+ return 0;
+}
+
+int create_text(void *ctx, const hubbub_string *data, void **result)
+{
+ printf("Creating (%u) [text '%.*s']\n", ++node_counter,
+ data->len, pbuffer + data->data_off);
+
+ GROW_REF
+ node_ref[node_counter] = 0;
+
+ ref_node(ctx, (void *) node_counter);
+
+ *result = (void *) node_counter;
+
+ return 0;
+}
+
+int ref_node(void *ctx, void *node)
+{
+ UNUSED(ctx);
+
+ printf("Referencing %u (=%u)\n",
+ (uintptr_t) node, ++node_ref[(uintptr_t) node]);
+
+ return 0;
+}
+
+int unref_node(void *ctx, void *node)
+{
+ UNUSED(ctx);
+
+ printf("Unreferencing %u (=%u)\n",
+ (uintptr_t) node, --node_ref[(uintptr_t) node]);
+
+ return 0;
+}
+
+int append_child(void *ctx, void *parent, void *child, void **result)
+{
+ printf("Appending %u to %u\n", (uintptr_t) child, (uintptr_t) parent);
+ ref_node(ctx, child);
+
+ *result = (void *) child;
+
+ return 0;
+}
+
+int insert_before(void *ctx, void *parent, void *child, void *ref_child,
+ void **result)
+{
+ printf("Inserting %u in %u before %u\n", (uintptr_t) child,
+ (uintptr_t) parent, (uintptr_t) ref_child);
+ ref_node(ctx, child);
+
+ *result = (void *) child;
+
+ return 0;
+}
+
+int remove_child(void *ctx, void *parent, void *child, void **result)
+{
+ printf("Removing %u from %u\n", (uintptr_t) child, (uintptr_t) parent);
+ ref_node(ctx, child);
+
+ *result = (void *) child;
+
+ return 0;
+}
+
+int clone_node(void *ctx, void *node, bool deep, void **result)
+{
+ printf("%sCloning %u -> %u\n", deep ? "Deep-" : "",
+ (uintptr_t) node, ++node_counter);
+
+ GROW_REF
+ node_ref[node_counter] = 0;
+
+ ref_node(ctx, (void *) node_counter);
+
+ *result = (void *) node_counter;
+
+ return 0;
+}
+
+int set_quirks_mode(void *ctx, hubbub_quirks_mode mode)
+{
+ UNUSED(ctx);
+
+ printf("Quirks mode = %u\n", mode);
+
+ return 0;
+}
+