From 04e5f38a5e8e471db41dd010e2c7c0ae9b016601 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Tue, 11 Mar 2008 22:03:28 +0000 Subject: More treebuilder (8.2.4.8) Make tree_handler a pointer rather than value. Check for tree_handler's presence in hubbub_treebuilder_token_handler rather than scattering checks all over the treebuilder code. Add test driver (doesn't actually build a tree but will exercise the core code correctly and verify that the treebuilder code releases all the node references it gains) Enhance quirks mode reporting to distinguish between standards, limited, and full quirks modes. svn path=/trunk/hubbub/; revision=3939 --- src/treebuilder/treebuilder.c | 363 +++++++++++++++++++++++++++++------------- 1 file changed, 255 insertions(+), 108 deletions(-) (limited to 'src/treebuilder/treebuilder.c') diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c index 6c92bd9..deb2493 100644 --- a/src/treebuilder/treebuilder.c +++ b/src/treebuilder/treebuilder.c @@ -5,6 +5,7 @@ * Copyright 2008 John-Mark Bell */ +#include #include #include "treebuilder/treebuilder.h" @@ -13,7 +14,7 @@ typedef enum { INITIAL, - BEFORE_DOCTYPE, + BEFORE_HTML, BEFORE_HEAD, IN_HEAD, IN_HEAD_NOSCRIPT, @@ -99,7 +100,7 @@ struct hubbub_treebuilder hubbub_treebuilder_context context; - hubbub_tree_handler tree_handler; + hubbub_tree_handler *tree_handler; hubbub_buffer_handler buffer_handler; void *buffer_pw; @@ -116,6 +117,9 @@ static void hubbub_treebuilder_buffer_handler(const uint8_t *data, static void hubbub_treebuilder_token_handler(const hubbub_token *token, void *pw); +static bool handle_initial(hubbub_treebuilder *treebuilder, + const hubbub_token *token); + /** \todo Uncomment the static keyword here once these functions are actually used */ /*static*/ bool element_in_scope(hubbub_treebuilder *treebuilder, @@ -178,7 +182,7 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser, tb->input_buffer = NULL; tb->input_buffer_len = 0; - memset(&tb->tree_handler, 0, sizeof(hubbub_tree_handler)); + tb->tree_handler = NULL; memset(&tb->context, 0, sizeof(hubbub_treebuilder_context)); tb->context.mode = INITIAL; @@ -191,6 +195,10 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser, return NULL; } tb->context.stack_alloc = ELEMENT_STACK_CHUNK; + /* We rely on HTML not being equal to zero to determine + * if the first item in the stack is in use. Assert this here. */ + assert(HTML != 0); + tb->context.element_stack[0].type = 0; tb->buffer_handler = NULL; tb->buffer_pw = NULL; @@ -250,34 +258,36 @@ void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder) HUBBUB_TOKENISER_TOKEN_HANDLER, &tokparams); /* Clean up context */ - if (treebuilder->tree_handler.unref_node != NULL) { + if (treebuilder->tree_handler != NULL) { if (treebuilder->context.head_element != NULL) { - treebuilder->tree_handler.unref_node( - treebuilder->tree_handler.ctx, + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, treebuilder->context.head_element); } if (treebuilder->context.form_element != NULL) { - treebuilder->tree_handler.unref_node( - treebuilder->tree_handler.ctx, + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, treebuilder->context.form_element); } if (treebuilder->context.document != NULL) { - treebuilder->tree_handler.unref_node( - treebuilder->tree_handler.ctx, + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, treebuilder->context.document); } for (uint32_t n = treebuilder->context.current_node; n > 0; n--) { - treebuilder->tree_handler.unref_node( - treebuilder->tree_handler.ctx, + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, treebuilder->context.element_stack[n].node); } - treebuilder->tree_handler.unref_node( - treebuilder->tree_handler.ctx, + if (treebuilder->context.element_stack[0].type == HTML) { + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, treebuilder->context.element_stack[0].node); + } } treebuilder->alloc(treebuilder->context.element_stack, 0, treebuilder->alloc_pw); @@ -287,9 +297,9 @@ void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder) entry = next) { next = entry->next; - if (treebuilder->tree_handler.unref_node != NULL) { - treebuilder->tree_handler.unref_node( - treebuilder->tree_handler.ctx, + if (treebuilder->tree_handler != NULL) { + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, entry->details.node); } @@ -370,14 +380,167 @@ void hubbub_treebuilder_token_handler(const hubbub_token *token, void *pw) { hubbub_treebuilder *treebuilder = (hubbub_treebuilder *) pw; + bool reprocess = true; - UNUSED(token); - - /* Do nothing if we have no document node */ - if (treebuilder->context.document == NULL) + /* Do nothing if we have no document node or there's no tree handler */ + if (treebuilder->context.document == NULL || + treebuilder->tree_handler == NULL) return; - /** \todo implement this */ + while (reprocess == true) { + switch (treebuilder->context.mode) { + case INITIAL: + reprocess = handle_initial(treebuilder, token); + break; + case BEFORE_HTML: + case BEFORE_HEAD: + case IN_HEAD: + case IN_HEAD_NOSCRIPT: + case AFTER_HEAD: + case IN_BODY: + case IN_TABLE: + case IN_CAPTION: + case IN_COLUMN_GROUP: + case IN_TABLE_BODY: + case IN_ROW: + case IN_CELL: + case IN_SELECT: + case IN_SELECT_IN_TABLE: + case AFTER_BODY: + case IN_FRAMESET: + case AFTER_FRAMESET: + case AFTER_AFTER_BODY: + case AFTER_AFTER_FRAMESET: + reprocess = false; + break; + } + } +} + +/** + * Handle token in initial insertion mode + * + * \param treebuilder The treebuilder instance + * \param token The token to handle + * \return True to reprocess token, false otherwise + */ +bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token) +{ + bool reprocess = false; + + switch (token->type) { + case HUBBUB_TOKEN_CHARACTER: + { + const uint8_t *data = treebuilder->input_buffer + + token->data.character.data_off; + size_t len = token->data.character.len; + size_t c; + + /** \todo UTF-16 */ + + for (c = 0; c < len; c++) { + if (data[c] != 0x09 && data[c] != 0x0A && + data[c] != 0x0B && data[c] != 0x0C && + data[c] != 0x20) + break; + } + /* Non-whitespace characters in token, so reprocess */ + if (c != len) { + /* Update token data to strip leading whitespace */ + ((hubbub_token *) token)->data.character.data_off += + len - c; + ((hubbub_token *) token)->data.character.len -= c; + + /** \todo parse error */ + + treebuilder->tree_handler->set_quirks_mode( + treebuilder->tree_handler->ctx, + HUBBUB_QUIRKS_MODE_FULL); + + treebuilder->context.mode = BEFORE_HTML; + reprocess = true; + } + } + break; + case HUBBUB_TOKEN_COMMENT: + { + int success; + void *comment, *appended; + + success = treebuilder->tree_handler->create_comment( + treebuilder->tree_handler->ctx, + &token->data.comment, &comment); + if (success != 0) { + /** \todo errors */ + } + + /* Append to Document node */ + success = treebuilder->tree_handler->append_child( + treebuilder->tree_handler->ctx, + treebuilder->context.document, + comment, &appended); + if (success != 0) { + /** \todo errors */ + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + comment); + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, appended); + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, comment); + } + break; + case HUBBUB_TOKEN_DOCTYPE: + { + int success; + void *doctype, *appended; + + /** \todo need public and system ids from tokeniser */ + success = treebuilder->tree_handler->create_doctype( + treebuilder->tree_handler->ctx, + &token->data.doctype.name, + NULL, NULL, &doctype); + if (success != 0) { + /** \todo errors */ + } + + /* Append to Document node */ + success = treebuilder->tree_handler->append_child( + treebuilder->tree_handler->ctx, + treebuilder->context.document, + doctype, &appended); + if (success != 0) { + /** \todo errors */ + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + doctype); + } + + /** \todo doctype processing */ + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, appended); + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, doctype); + + treebuilder->context.mode = BEFORE_HTML; + } + break; + case HUBBUB_TOKEN_START_TAG: + case HUBBUB_TOKEN_END_TAG: + case HUBBUB_TOKEN_EOF: + /** \todo parse error */ + treebuilder->tree_handler->set_quirks_mode( + treebuilder->tree_handler->ctx, + HUBBUB_QUIRKS_MODE_FULL); + treebuilder->context.mode = BEFORE_HTML; + reprocess = true; + break; + } + + return reprocess; } /** @@ -446,72 +609,66 @@ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder) } } - if (treebuilder->tree_handler.clone_node != NULL && - treebuilder->tree_handler.append_child != NULL && - treebuilder->tree_handler.unref_node != NULL) { - while (1) { - int success; - void *clone, *appended; - element_type prev_type; - void *prev_node; - uint32_t prev_stack_index; - - success = treebuilder->tree_handler.clone_node( - treebuilder->tree_handler.ctx, - entry->details.node, - false, - &clone); - if (success != 0) { - /** \todo handle errors */ - return; - } - - success = treebuilder->tree_handler.append_child( - treebuilder->tree_handler.ctx, - treebuilder->context.element_stack[ - treebuilder->context. - current_node].node, - clone, - &appended); - if (success != 0) { - /** \todo handle errors */ - treebuilder->tree_handler.unref_node( - treebuilder->tree_handler.ctx, - clone); - return; - } - - if (element_stack_push(treebuilder, - entry->details.type, - appended) == false) { - /** \todo handle memory exhaustion */ - treebuilder->tree_handler.unref_node( - treebuilder->tree_handler.ctx, - appended); - treebuilder->tree_handler.unref_node( - treebuilder->tree_handler.ctx, - clone); - } - - if (formatting_list_replace(treebuilder, - entry, - entry->details.type, clone, - treebuilder->context.current_node, - &prev_type, &prev_node, - &prev_stack_index) == false) { - /** \todo handle errors */ - treebuilder->tree_handler.unref_node( - treebuilder->tree_handler.ctx, - clone); - } - - treebuilder->tree_handler.unref_node( - treebuilder->tree_handler.ctx, - prev_node); - - if (entry->next != NULL) - entry = entry->next; + while (1) { + int success; + void *clone, *appended; + element_type prev_type; + void *prev_node; + uint32_t prev_stack_index; + + success = treebuilder->tree_handler->clone_node( + treebuilder->tree_handler->ctx, + entry->details.node, + false, + &clone); + if (success != 0) { + /** \todo handle errors */ + return; } + + success = treebuilder->tree_handler->append_child( + treebuilder->tree_handler->ctx, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node, + clone, + &appended); + if (success != 0) { + /** \todo handle errors */ + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + clone); + return; + } + + if (element_stack_push(treebuilder, + entry->details.type, + appended) == false) { + /** \todo handle memory exhaustion */ + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + appended); + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + clone); + } + + if (formatting_list_replace(treebuilder, entry, + entry->details.type, clone, + treebuilder->context.current_node, + &prev_type, &prev_node, + &prev_stack_index) == false) { + /** \todo handle errors */ + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + clone); + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + prev_node); + + if (entry->next != NULL) + entry = entry->next; } } @@ -538,11 +695,9 @@ void clear_active_formatting_list_to_marker(hubbub_treebuilder *treebuilder) /** \todo handle errors */ } - if (treebuilder->tree_handler.unref_node != NULL) { - treebuilder->tree_handler.unref_node( - treebuilder->tree_handler.ctx, - node); - } + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + node); if (done == true) break; @@ -560,20 +715,14 @@ void insert_element(hubbub_treebuilder *treebuilder, const hubbub_tag *tag) int success; void *node, *appended; - if (treebuilder->tree_handler.create_element == NULL || - treebuilder->tree_handler.append_child == NULL || - treebuilder->tree_handler.unref_node == NULL) { - /** \todo errors */ - } - - success = treebuilder->tree_handler.create_element( - treebuilder->tree_handler.ctx, tag, &node); + success = treebuilder->tree_handler->create_element( + treebuilder->tree_handler->ctx, tag, &node); if (success != 0) { /** \todo errors */ } - success = treebuilder->tree_handler.append_child( - treebuilder->tree_handler.ctx, + success = treebuilder->tree_handler->append_child( + treebuilder->tree_handler->ctx, treebuilder->context.element_stack[ treebuilder->context.current_node].node, node, &appended); @@ -581,7 +730,7 @@ void insert_element(hubbub_treebuilder *treebuilder, const hubbub_tag *tag) /** \todo errors */ } - treebuilder->tree_handler.unref_node(treebuilder->tree_handler.ctx, + treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx, appended); if (element_stack_push(treebuilder, @@ -616,11 +765,9 @@ void close_implied_end_tags(hubbub_treebuilder *treebuilder, /** \todo errors */ } - if (treebuilder->tree_handler.unref_node != NULL) { - treebuilder->tree_handler.unref_node( - treebuilder->tree_handler.ctx, - node); - } + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + node); type = treebuilder->context.element_stack[ treebuilder->context.current_node].type; -- cgit v1.2.3