summaryrefslogtreecommitdiff
path: root/src/treebuilder
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2008-03-11 22:03:28 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2008-03-11 22:03:28 +0000
commit04e5f38a5e8e471db41dd010e2c7c0ae9b016601 (patch)
tree68fae9d93497ab8a697b0b5dc056d40928c96426 /src/treebuilder
parent5b37e1f0bfd10da6720a6587e39d3dee45f77e8d (diff)
downloadlibhubbub-04e5f38a5e8e471db41dd010e2c7c0ae9b016601.tar.gz
libhubbub-04e5f38a5e8e471db41dd010e2c7c0ae9b016601.tar.bz2
More treebuilder (8.2.4.8)
Make tree_handler a pointer rather than value. Check for tree_handler's presence in hubbub_treebuilder_token_handler rather than scattering checks all over the treebuilder code. Add test driver (doesn't actually build a tree but will exercise the core code correctly and verify that the treebuilder code releases all the node references it gains) Enhance quirks mode reporting to distinguish between standards, limited, and full quirks modes. svn path=/trunk/hubbub/; revision=3939
Diffstat (limited to 'src/treebuilder')
-rw-r--r--src/treebuilder/treebuilder.c363
-rw-r--r--src/treebuilder/treebuilder.h2
2 files changed, 256 insertions, 109 deletions
diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c
index 6c92bd9..deb2493 100644
--- a/src/treebuilder/treebuilder.c
+++ b/src/treebuilder/treebuilder.c
@@ -5,6 +5,7 @@
* Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
*/
+#include <assert.h>
#include <string.h>
#include "treebuilder/treebuilder.h"
@@ -13,7 +14,7 @@
typedef enum
{
INITIAL,
- BEFORE_DOCTYPE,
+ BEFORE_HTML,
BEFORE_HEAD,
IN_HEAD,
IN_HEAD_NOSCRIPT,
@@ -99,7 +100,7 @@ struct hubbub_treebuilder
hubbub_treebuilder_context context;
- hubbub_tree_handler tree_handler;
+ hubbub_tree_handler *tree_handler;
hubbub_buffer_handler buffer_handler;
void *buffer_pw;
@@ -116,6 +117,9 @@ static void hubbub_treebuilder_buffer_handler(const uint8_t *data,
static void hubbub_treebuilder_token_handler(const hubbub_token *token,
void *pw);
+static bool handle_initial(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+
/** \todo Uncomment the static keyword here once these functions are actually used */
/*static*/ bool element_in_scope(hubbub_treebuilder *treebuilder,
@@ -178,7 +182,7 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser,
tb->input_buffer = NULL;
tb->input_buffer_len = 0;
- memset(&tb->tree_handler, 0, sizeof(hubbub_tree_handler));
+ tb->tree_handler = NULL;
memset(&tb->context, 0, sizeof(hubbub_treebuilder_context));
tb->context.mode = INITIAL;
@@ -191,6 +195,10 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser,
return NULL;
}
tb->context.stack_alloc = ELEMENT_STACK_CHUNK;
+ /* We rely on HTML not being equal to zero to determine
+ * if the first item in the stack is in use. Assert this here. */
+ assert(HTML != 0);
+ tb->context.element_stack[0].type = 0;
tb->buffer_handler = NULL;
tb->buffer_pw = NULL;
@@ -250,34 +258,36 @@ void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder)
HUBBUB_TOKENISER_TOKEN_HANDLER, &tokparams);
/* Clean up context */
- if (treebuilder->tree_handler.unref_node != NULL) {
+ if (treebuilder->tree_handler != NULL) {
if (treebuilder->context.head_element != NULL) {
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
treebuilder->context.head_element);
}
if (treebuilder->context.form_element != NULL) {
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
treebuilder->context.form_element);
}
if (treebuilder->context.document != NULL) {
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
treebuilder->context.document);
}
for (uint32_t n = treebuilder->context.current_node;
n > 0; n--) {
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
treebuilder->context.element_stack[n].node);
}
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
+ if (treebuilder->context.element_stack[0].type == HTML) {
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
treebuilder->context.element_stack[0].node);
+ }
}
treebuilder->alloc(treebuilder->context.element_stack, 0,
treebuilder->alloc_pw);
@@ -287,9 +297,9 @@ void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder)
entry = next) {
next = entry->next;
- if (treebuilder->tree_handler.unref_node != NULL) {
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
+ if (treebuilder->tree_handler != NULL) {
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
entry->details.node);
}
@@ -370,14 +380,167 @@ void hubbub_treebuilder_token_handler(const hubbub_token *token,
void *pw)
{
hubbub_treebuilder *treebuilder = (hubbub_treebuilder *) pw;
+ bool reprocess = true;
- UNUSED(token);
-
- /* Do nothing if we have no document node */
- if (treebuilder->context.document == NULL)
+ /* Do nothing if we have no document node or there's no tree handler */
+ if (treebuilder->context.document == NULL ||
+ treebuilder->tree_handler == NULL)
return;
- /** \todo implement this */
+ while (reprocess == true) {
+ switch (treebuilder->context.mode) {
+ case INITIAL:
+ reprocess = handle_initial(treebuilder, token);
+ break;
+ case BEFORE_HTML:
+ case BEFORE_HEAD:
+ case IN_HEAD:
+ case IN_HEAD_NOSCRIPT:
+ case AFTER_HEAD:
+ case IN_BODY:
+ case IN_TABLE:
+ case IN_CAPTION:
+ case IN_COLUMN_GROUP:
+ case IN_TABLE_BODY:
+ case IN_ROW:
+ case IN_CELL:
+ case IN_SELECT:
+ case IN_SELECT_IN_TABLE:
+ case AFTER_BODY:
+ case IN_FRAMESET:
+ case AFTER_FRAMESET:
+ case AFTER_AFTER_BODY:
+ case AFTER_AFTER_FRAMESET:
+ reprocess = false;
+ break;
+ }
+ }
+}
+
+/**
+ * Handle token in initial insertion mode
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to handle
+ * \return True to reprocess token, false otherwise
+ */
+bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token)
+{
+ bool reprocess = false;
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_CHARACTER:
+ {
+ const uint8_t *data = treebuilder->input_buffer +
+ token->data.character.data_off;
+ size_t len = token->data.character.len;
+ size_t c;
+
+ /** \todo UTF-16 */
+
+ for (c = 0; c < len; c++) {
+ if (data[c] != 0x09 && data[c] != 0x0A &&
+ data[c] != 0x0B && data[c] != 0x0C &&
+ data[c] != 0x20)
+ break;
+ }
+ /* Non-whitespace characters in token, so reprocess */
+ if (c != len) {
+ /* Update token data to strip leading whitespace */
+ ((hubbub_token *) token)->data.character.data_off +=
+ len - c;
+ ((hubbub_token *) token)->data.character.len -= c;
+
+ /** \todo parse error */
+
+ treebuilder->tree_handler->set_quirks_mode(
+ treebuilder->tree_handler->ctx,
+ HUBBUB_QUIRKS_MODE_FULL);
+
+ treebuilder->context.mode = BEFORE_HTML;
+ reprocess = true;
+ }
+ }
+ break;
+ case HUBBUB_TOKEN_COMMENT:
+ {
+ int success;
+ void *comment, *appended;
+
+ success = treebuilder->tree_handler->create_comment(
+ treebuilder->tree_handler->ctx,
+ &token->data.comment, &comment);
+ if (success != 0) {
+ /** \todo errors */
+ }
+
+ /* Append to Document node */
+ success = treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.document,
+ comment, &appended);
+ if (success != 0) {
+ /** \todo errors */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ comment);
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, appended);
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, comment);
+ }
+ break;
+ case HUBBUB_TOKEN_DOCTYPE:
+ {
+ int success;
+ void *doctype, *appended;
+
+ /** \todo need public and system ids from tokeniser */
+ success = treebuilder->tree_handler->create_doctype(
+ treebuilder->tree_handler->ctx,
+ &token->data.doctype.name,
+ NULL, NULL, &doctype);
+ if (success != 0) {
+ /** \todo errors */
+ }
+
+ /* Append to Document node */
+ success = treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.document,
+ doctype, &appended);
+ if (success != 0) {
+ /** \todo errors */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ doctype);
+ }
+
+ /** \todo doctype processing */
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, appended);
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, doctype);
+
+ treebuilder->context.mode = BEFORE_HTML;
+ }
+ break;
+ case HUBBUB_TOKEN_START_TAG:
+ case HUBBUB_TOKEN_END_TAG:
+ case HUBBUB_TOKEN_EOF:
+ /** \todo parse error */
+ treebuilder->tree_handler->set_quirks_mode(
+ treebuilder->tree_handler->ctx,
+ HUBBUB_QUIRKS_MODE_FULL);
+ treebuilder->context.mode = BEFORE_HTML;
+ reprocess = true;
+ break;
+ }
+
+ return reprocess;
}
/**
@@ -446,72 +609,66 @@ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder)
}
}
- if (treebuilder->tree_handler.clone_node != NULL &&
- treebuilder->tree_handler.append_child != NULL &&
- treebuilder->tree_handler.unref_node != NULL) {
- while (1) {
- int success;
- void *clone, *appended;
- element_type prev_type;
- void *prev_node;
- uint32_t prev_stack_index;
-
- success = treebuilder->tree_handler.clone_node(
- treebuilder->tree_handler.ctx,
- entry->details.node,
- false,
- &clone);
- if (success != 0) {
- /** \todo handle errors */
- return;
- }
-
- success = treebuilder->tree_handler.append_child(
- treebuilder->tree_handler.ctx,
- treebuilder->context.element_stack[
- treebuilder->context.
- current_node].node,
- clone,
- &appended);
- if (success != 0) {
- /** \todo handle errors */
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
- clone);
- return;
- }
-
- if (element_stack_push(treebuilder,
- entry->details.type,
- appended) == false) {
- /** \todo handle memory exhaustion */
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
- appended);
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
- clone);
- }
-
- if (formatting_list_replace(treebuilder,
- entry,
- entry->details.type, clone,
- treebuilder->context.current_node,
- &prev_type, &prev_node,
- &prev_stack_index) == false) {
- /** \todo handle errors */
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
- clone);
- }
-
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
- prev_node);
-
- if (entry->next != NULL)
- entry = entry->next;
+ while (1) {
+ int success;
+ void *clone, *appended;
+ element_type prev_type;
+ void *prev_node;
+ uint32_t prev_stack_index;
+
+ success = treebuilder->tree_handler->clone_node(
+ treebuilder->tree_handler->ctx,
+ entry->details.node,
+ false,
+ &clone);
+ if (success != 0) {
+ /** \todo handle errors */
+ return;
}
+
+ success = treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node,
+ clone,
+ &appended);
+ if (success != 0) {
+ /** \todo handle errors */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ clone);
+ return;
+ }
+
+ if (element_stack_push(treebuilder,
+ entry->details.type,
+ appended) == false) {
+ /** \todo handle memory exhaustion */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ appended);
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ clone);
+ }
+
+ if (formatting_list_replace(treebuilder, entry,
+ entry->details.type, clone,
+ treebuilder->context.current_node,
+ &prev_type, &prev_node,
+ &prev_stack_index) == false) {
+ /** \todo handle errors */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ clone);
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ prev_node);
+
+ if (entry->next != NULL)
+ entry = entry->next;
}
}
@@ -538,11 +695,9 @@ void clear_active_formatting_list_to_marker(hubbub_treebuilder *treebuilder)
/** \todo handle errors */
}
- if (treebuilder->tree_handler.unref_node != NULL) {
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
- node);
- }
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ node);
if (done == true)
break;
@@ -560,20 +715,14 @@ void insert_element(hubbub_treebuilder *treebuilder, const hubbub_tag *tag)
int success;
void *node, *appended;
- if (treebuilder->tree_handler.create_element == NULL ||
- treebuilder->tree_handler.append_child == NULL ||
- treebuilder->tree_handler.unref_node == NULL) {
- /** \todo errors */
- }
-
- success = treebuilder->tree_handler.create_element(
- treebuilder->tree_handler.ctx, tag, &node);
+ success = treebuilder->tree_handler->create_element(
+ treebuilder->tree_handler->ctx, tag, &node);
if (success != 0) {
/** \todo errors */
}
- success = treebuilder->tree_handler.append_child(
- treebuilder->tree_handler.ctx,
+ success = treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
treebuilder->context.element_stack[
treebuilder->context.current_node].node,
node, &appended);
@@ -581,7 +730,7 @@ void insert_element(hubbub_treebuilder *treebuilder, const hubbub_tag *tag)
/** \todo errors */
}
- treebuilder->tree_handler.unref_node(treebuilder->tree_handler.ctx,
+ treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
appended);
if (element_stack_push(treebuilder,
@@ -616,11 +765,9 @@ void close_implied_end_tags(hubbub_treebuilder *treebuilder,
/** \todo errors */
}
- if (treebuilder->tree_handler.unref_node != NULL) {
- treebuilder->tree_handler.unref_node(
- treebuilder->tree_handler.ctx,
- node);
- }
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ node);
type = treebuilder->context.element_stack[
treebuilder->context.current_node].type;
diff --git a/src/treebuilder/treebuilder.h b/src/treebuilder/treebuilder.h
index 839f901..9d690b1 100644
--- a/src/treebuilder/treebuilder.h
+++ b/src/treebuilder/treebuilder.h
@@ -44,7 +44,7 @@ typedef union hubbub_treebuilder_optparams {
void *pw;
} error_handler;
- hubbub_tree_handler tree_handler;
+ hubbub_tree_handler *tree_handler;
void *document_node;
} hubbub_treebuilder_optparams;