From 5b37e1f0bfd10da6720a6587e39d3dee45f77e8d Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Tue, 11 Mar 2008 16:54:30 +0000 Subject: More treebuilder (up to 8.2.4.7) Loads of issues still outstanding, including a distinct lack of error handling Change tree handler API to allow (de)referencing of nodes rather than explicit destruction. Change create_element handler to take an entire hubbub_tag rather than just the tag name -- the DOM binding can deal with the issue of attaching attributes to the created element node. svn path=/trunk/hubbub/; revision=3932 --- include/hubbub/functypes.h | 13 +- include/hubbub/parser.h | 3 + include/hubbub/tree.h | 3 +- src/parser.c | 7 + src/treebuilder/treebuilder.c | 806 +++++++++++++++++++++++++++++++++++++++++- src/treebuilder/treebuilder.h | 3 + 6 files changed, 829 insertions(+), 6 deletions(-) diff --git a/include/hubbub/functypes.h b/include/hubbub/functypes.h index 8d7f199..ee8c429 100644 --- a/include/hubbub/functypes.h +++ b/include/hubbub/functypes.h @@ -49,8 +49,8 @@ typedef int (*hubbub_tree_create_doctype)(void *ctx, const hubbub_string *qname, /** * Type of tree element node creation function */ -typedef int (*hubbub_tree_create_element)(void *ctx, - const hubbub_string *tag_name, void **result); +typedef int (*hubbub_tree_create_element)(void *ctx, const hubbub_tag *tag, + void **result); /** * Type of tree text node creation function @@ -59,9 +59,14 @@ typedef int (*hubbub_tree_create_text)(void *ctx, const hubbub_string *data, void **result); /** - * Type of tree node destruction function + * Type of tree node reference function + */ +typedef int (*hubbub_tree_ref_node)(void *ctx, void *node); + +/** + * Type of tree node dereference function */ -typedef int (*hubbub_tree_free_node)(void *ctx, void *node); +typedef int (*hubbub_tree_unref_node)(void *ctx, void *node); /** * Type of tree node appending function diff --git a/include/hubbub/parser.h b/include/hubbub/parser.h index 134f4b7..5565f63 100644 --- a/include/hubbub/parser.h +++ b/include/hubbub/parser.h @@ -26,6 +26,7 @@ typedef enum hubbub_parser_opttype { HUBBUB_PARSER_ERROR_HANDLER, HUBBUB_PARSER_CONTENT_MODEL, HUBBUB_PARSER_TREE_HANDLER, + HUBBUB_PARSER_DOCUMENT_NODE, } hubbub_parser_opttype; /** @@ -52,6 +53,8 @@ typedef union hubbub_parser_optparams { } content_model; hubbub_tree_handler tree_handler; + + void *document_node; } hubbub_parser_optparams; /* Create a hubbub parser */ diff --git a/include/hubbub/tree.h b/include/hubbub/tree.h index a883f1a..5b82592 100644 --- a/include/hubbub/tree.h +++ b/include/hubbub/tree.h @@ -18,7 +18,8 @@ typedef struct hubbub_tree_handler { hubbub_tree_create_doctype create_doctype; hubbub_tree_create_element create_element; hubbub_tree_create_text create_text; - hubbub_tree_free_node free_node; + hubbub_tree_ref_node ref_node; + hubbub_tree_unref_node unref_node; hubbub_tree_append_child append_child; hubbub_tree_insert_before insert_before; hubbub_tree_remove_child remove_child; diff --git a/src/parser.c b/src/parser.c index 23702e1..1ae498a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -158,6 +158,13 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, (hubbub_treebuilder_optparams *) params); } break; + case HUBBUB_PARSER_DOCUMENT_NODE: + if (parser->tb != NULL) { + result = hubbub_treebuilder_setopt(parser->tb, + HUBBUB_TREEBUILDER_DOCUMENT_NODE, + (hubbub_treebuilder_optparams *) params); + } + break; default: result = HUBBUB_INVALID; } diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c index 529cd08..6c92bd9 100644 --- a/src/treebuilder/treebuilder.c +++ b/src/treebuilder/treebuilder.c @@ -10,6 +10,86 @@ #include "treebuilder/treebuilder.h" #include "utils/utils.h" +typedef enum +{ + INITIAL, + BEFORE_DOCTYPE, + BEFORE_HEAD, + IN_HEAD, + IN_HEAD_NOSCRIPT, + AFTER_HEAD, + IN_BODY, + IN_TABLE, + IN_CAPTION, + IN_COLUMN_GROUP, + IN_TABLE_BODY, + IN_ROW, + IN_CELL, + IN_SELECT, + IN_SELECT_IN_TABLE, + AFTER_BODY, + IN_FRAMESET, + AFTER_FRAMESET, + AFTER_AFTER_BODY, + AFTER_AFTER_FRAMESET, +} insertion_mode; + +typedef enum +{ +/* Special */ + ADDRESS, AREA, BASE, BASEFONT, BGSOUND, BLOCKQUOTE, BODY, BR, CENTER, + COL, COLGROUP, DD, DIR, DIV, DL, DT, EMBED, FIELDSET, FORM, FRAME, + FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HR, IFRAME, IMAGE, IMG, INPUT, + ISINDEX, LI, LINK, LISTING, MENU, META, NOEMBED, NOFRAMES, NOSCRIPT, + OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT, PRE, SCRIPT, SELECT, SPACER, + STYLE, TBODY, TEXTAREA, TFOOT, THEAD, TITLE, TR, UL, WBR, +/* Scoping */ + APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH, +/* Formatting */ + A, B, BIG, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U, +/* Phrasing */ + /**< \todo Enumerate phrasing elements */ +} element_type; + +typedef struct element_context +{ + element_type type; + void *node; +} element_context; + +typedef struct formatting_list_entry +{ + element_context details; /**< Entry details */ + + uint32_t stack_index; /**< Index into element stack */ + + struct formatting_list_entry *prev; /**< Previous in list */ + struct formatting_list_entry *next; /**< Next in list */ +} formatting_list_entry; + +typedef struct hubbub_treebuilder_context +{ + insertion_mode mode; /**< The current insertion mode */ + +#define ELEMENT_STACK_CHUNK 128 + element_context *element_stack; /**< Stack of open elements */ + uint32_t stack_alloc; /**< Number of stack slots allocated */ + uint32_t current_node; /**< Index of current node in stack */ + uint32_t current_table; /**< Index of current table in stack */ + + formatting_list_entry *formatting_list; /**< List of active formatting + * elements */ + formatting_list_entry *formatting_list_end; /**< End of active + * formatting list */ + + void *head_element; /**< Pointer to HEAD element */ + + void *form_element; /**< Pointer to most recently + * opened FORM element */ + + void *document; /**< Pointer to the document node */ +} hubbub_treebuilder_context; + struct hubbub_treebuilder { hubbub_tokeniser *tokeniser; /**< Underlying tokeniser */ @@ -17,6 +97,8 @@ struct hubbub_treebuilder const uint8_t *input_buffer; /**< Start of tokeniser's buffer */ size_t input_buffer_len; /**< Length of input buffer */ + hubbub_treebuilder_context context; + hubbub_tree_handler tree_handler; hubbub_buffer_handler buffer_handler; @@ -34,6 +116,42 @@ static void hubbub_treebuilder_buffer_handler(const uint8_t *data, static void hubbub_treebuilder_token_handler(const hubbub_token *token, void *pw); +/** \todo Uncomment the static keyword here once these functions are actually used */ + +/*static*/ bool element_in_scope(hubbub_treebuilder *treebuilder, + element_type type, bool in_table); +/*static*/ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder); +/*static*/ void clear_active_formatting_list_to_marker( + hubbub_treebuilder *treebuilder); +/*static*/ void insert_element(hubbub_treebuilder *treebuilder, + const hubbub_tag *tag_name); +/*static*/ void close_implied_end_tags(hubbub_treebuilder *treebuilder, + element_type except); +/*static*/ void reset_insertion_mode(hubbub_treebuilder *treebuilder); + +static element_type element_type_from_name(hubbub_treebuilder *treebuilder, + const hubbub_string *tag_name); + +static inline bool is_special_element(element_type type); +static inline bool is_scoping_element(element_type type); +static inline bool is_formatting_element(element_type type); +static inline bool is_phrasing_element(element_type type); + +static bool element_stack_push(hubbub_treebuilder *treebuilder, + element_type type, void *node); +static bool element_stack_pop(hubbub_treebuilder *treebuilder, + element_type *type, void **node); + +/*static*/ bool formatting_list_insert(hubbub_treebuilder *treebuilder, + element_type type, void *node, uint32_t stack_index); +static bool formatting_list_remove(hubbub_treebuilder *treebuilder, + formatting_list_entry *entry, + element_type *type, void **node, uint32_t *stack_index); +static bool formatting_list_replace(hubbub_treebuilder *treebuilder, + formatting_list_entry *entry, + element_type type, void *node, uint32_t stack_index, + element_type *otype, void **onode, uint32_t *ostack_index); + /** * Create a hubbub treebuilder * @@ -62,6 +180,18 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser, memset(&tb->tree_handler, 0, sizeof(hubbub_tree_handler)); + memset(&tb->context, 0, sizeof(hubbub_treebuilder_context)); + tb->context.mode = INITIAL; + + tb->context.element_stack = alloc(NULL, + ELEMENT_STACK_CHUNK * sizeof(element_context), + pw); + if (tb->context.element_stack == NULL) { + alloc(tb, 0, pw); + return NULL; + } + tb->context.stack_alloc = ELEMENT_STACK_CHUNK; + tb->buffer_handler = NULL; tb->buffer_pw = NULL; @@ -76,6 +206,7 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser, if (hubbub_tokeniser_setopt(tokeniser, HUBBUB_TOKENISER_TOKEN_HANDLER, &tokparams) != HUBBUB_OK) { + alloc(tb->context.element_stack, 0, pw); alloc(tb, 0, pw); return NULL; } @@ -85,6 +216,7 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser, if (hubbub_tokeniser_setopt(tokeniser, HUBBUB_TOKENISER_BUFFER_HANDLER, &tokparams) != HUBBUB_OK) { + alloc(tb->context.element_stack, 0, pw); alloc(tb, 0, pw); return NULL; } @@ -99,6 +231,7 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser, */ void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder) { + formatting_list_entry *entry, *next; hubbub_tokeniser_optparams tokparams; if (treebuilder == NULL) @@ -116,6 +249,53 @@ void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder) hubbub_tokeniser_setopt(treebuilder->tokeniser, HUBBUB_TOKENISER_TOKEN_HANDLER, &tokparams); + /* Clean up context */ + if (treebuilder->tree_handler.unref_node != NULL) { + if (treebuilder->context.head_element != NULL) { + treebuilder->tree_handler.unref_node( + treebuilder->tree_handler.ctx, + treebuilder->context.head_element); + } + + if (treebuilder->context.form_element != NULL) { + treebuilder->tree_handler.unref_node( + treebuilder->tree_handler.ctx, + treebuilder->context.form_element); + } + + if (treebuilder->context.document != NULL) { + treebuilder->tree_handler.unref_node( + treebuilder->tree_handler.ctx, + treebuilder->context.document); + } + + for (uint32_t n = treebuilder->context.current_node; + n > 0; n--) { + treebuilder->tree_handler.unref_node( + treebuilder->tree_handler.ctx, + treebuilder->context.element_stack[n].node); + } + treebuilder->tree_handler.unref_node( + treebuilder->tree_handler.ctx, + treebuilder->context.element_stack[0].node); + } + treebuilder->alloc(treebuilder->context.element_stack, 0, + treebuilder->alloc_pw); + treebuilder->context.element_stack = NULL; + + for (entry = treebuilder->context.formatting_list; entry != NULL; + entry = next) { + next = entry->next; + + if (treebuilder->tree_handler.unref_node != NULL) { + treebuilder->tree_handler.unref_node( + treebuilder->tree_handler.ctx, + entry->details.node); + } + + treebuilder->alloc(entry, 0, treebuilder->alloc_pw); + } + treebuilder->alloc(treebuilder, 0, treebuilder->alloc_pw); } @@ -149,6 +329,9 @@ hubbub_error hubbub_treebuilder_setopt(hubbub_treebuilder *treebuilder, case HUBBUB_TREEBUILDER_TREE_HANDLER: treebuilder->tree_handler = params->tree_handler; break; + case HUBBUB_TREEBUILDER_DOCUMENT_NODE: + treebuilder->context.document = params->document_node; + break; } return HUBBUB_OK; @@ -188,9 +371,630 @@ void hubbub_treebuilder_token_handler(const hubbub_token *token, { hubbub_treebuilder *treebuilder = (hubbub_treebuilder *) pw; - UNUSED(treebuilder); UNUSED(token); + /* Do nothing if we have no document node */ + if (treebuilder->context.document == NULL) + return; + /** \todo implement this */ } +/** + * Determine if an element is in (table) scope + * + * \param treebuilder Treebuilder to look in + * \param type Element type to find + * \param in_table Whether we're looking in table scope + * \return True iff element is in scope, false otherwise + */ +bool element_in_scope(hubbub_treebuilder *treebuilder, + element_type type, bool in_table) +{ + uint32_t node; + + if (treebuilder->context.element_stack == NULL) + return false; + + for (node = treebuilder->context.current_node; node > 0; node --) { + element_type node_type = + treebuilder->context.element_stack[node].type; + + if (node_type == type) + return true; + + if (node_type == TABLE) + break; + + /* The list of element types given in the spec here are the + * scoping elements excluding TABLE and HTML. TABLE is handled + * in the previous conditional and HTML should only occur + * as the first node in the stack, which is never processed + * in this loop. */ + if (!in_table && is_scoping_element(node_type)) + break; + } + + return false; +} + +/** + * Reconstruct the list of active formatting elements + * + * \param treebuilder Treebuilder instance containing list + */ +void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder) +{ + formatting_list_entry *entry; + + if (treebuilder->context.formatting_list == NULL) + return; + + entry = treebuilder->context.formatting_list_end; + + /* Assumption: HTML and TABLE elements are not inserted into the list */ + if (is_scoping_element(entry->details.type) || entry->stack_index != 0) + return; + + while (entry->prev != NULL) { + entry = entry->prev; + + if (is_scoping_element(entry->details.type) || + entry->stack_index != 0) { + entry = entry->next; + break; + } + } + + if (treebuilder->tree_handler.clone_node != NULL && + treebuilder->tree_handler.append_child != NULL && + treebuilder->tree_handler.unref_node != NULL) { + while (1) { + int success; + void *clone, *appended; + element_type prev_type; + void *prev_node; + uint32_t prev_stack_index; + + success = treebuilder->tree_handler.clone_node( + treebuilder->tree_handler.ctx, + entry->details.node, + false, + &clone); + if (success != 0) { + /** \todo handle errors */ + return; + } + + success = treebuilder->tree_handler.append_child( + treebuilder->tree_handler.ctx, + treebuilder->context.element_stack[ + treebuilder->context. + current_node].node, + clone, + &appended); + if (success != 0) { + /** \todo handle errors */ + treebuilder->tree_handler.unref_node( + treebuilder->tree_handler.ctx, + clone); + return; + } + + if (element_stack_push(treebuilder, + entry->details.type, + appended) == false) { + /** \todo handle memory exhaustion */ + treebuilder->tree_handler.unref_node( + treebuilder->tree_handler.ctx, + appended); + treebuilder->tree_handler.unref_node( + treebuilder->tree_handler.ctx, + clone); + } + + if (formatting_list_replace(treebuilder, + entry, + entry->details.type, clone, + treebuilder->context.current_node, + &prev_type, &prev_node, + &prev_stack_index) == false) { + /** \todo handle errors */ + treebuilder->tree_handler.unref_node( + treebuilder->tree_handler.ctx, + clone); + } + + treebuilder->tree_handler.unref_node( + treebuilder->tree_handler.ctx, + prev_node); + + if (entry->next != NULL) + entry = entry->next; + } + } +} + +/** + * Clear the list of active formatting elements up to the last marker + * + * \param treebuilder The treebuilder instance containing the list + */ +void clear_active_formatting_list_to_marker(hubbub_treebuilder *treebuilder) +{ + formatting_list_entry *entry; + bool done = false; + + while ((entry = treebuilder->context.formatting_list_end) != NULL) { + element_type type; + void *node; + uint32_t stack_index; + + if (is_scoping_element(entry->details.type)) + done = true; + + if (formatting_list_remove(treebuilder, entry, + &type, &node, &stack_index) == false) { + /** \todo handle errors */ + } + + if (treebuilder->tree_handler.unref_node != NULL) { + treebuilder->tree_handler.unref_node( + treebuilder->tree_handler.ctx, + node); + } + + if (done == true) + break; + } +} + +/** + * Create element and insert it into the DOM, pushing it on the stack + * + * \param treebuilder The treebuilder instance + * \param tag The element to insert + */ +void insert_element(hubbub_treebuilder *treebuilder, const hubbub_tag *tag) +{ + int success; + void *node, *appended; + + if (treebuilder->tree_handler.create_element == NULL || + treebuilder->tree_handler.append_child == NULL || + treebuilder->tree_handler.unref_node == NULL) { + /** \todo errors */ + } + + success = treebuilder->tree_handler.create_element( + treebuilder->tree_handler.ctx, tag, &node); + if (success != 0) { + /** \todo errors */ + } + + success = treebuilder->tree_handler.append_child( + treebuilder->tree_handler.ctx, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node, + node, &appended); + if (success != 0) { + /** \todo errors */ + } + + treebuilder->tree_handler.unref_node(treebuilder->tree_handler.ctx, + appended); + + if (element_stack_push(treebuilder, + element_type_from_name(treebuilder, &tag->name), + node) == false) { + /** \todo errors */ + } +} + +/** + * Close implied end tags + * + * \param treebuilder The treebuilder instance + * \param except Tag type to exclude from processing [DD,DT,LI,P] + */ +void close_implied_end_tags(hubbub_treebuilder *treebuilder, + element_type except) +{ + element_type type; + + type = treebuilder->context.element_stack[ + treebuilder->context.current_node].type; + + while (type == DD || type == DT || type == LI || type == P) { + element_type otype; + void *node; + + if (type == except) + break; + + if (element_stack_pop(treebuilder, &otype, &node) == false) { + /** \todo errors */ + } + + if (treebuilder->tree_handler.unref_node != NULL) { + treebuilder->tree_handler.unref_node( + treebuilder->tree_handler.ctx, + node); + } + + type = treebuilder->context.element_stack[ + treebuilder->context.current_node].type; + } +} + +/** + * Reset the insertion mode + * + * \param treebuilder The treebuilder to reset + */ +void reset_insertion_mode(hubbub_treebuilder *treebuilder) +{ + uint32_t node; + element_context *stack = treebuilder->context.element_stack; + + /** \todo fragment parsing algorithm */ + + for (node = treebuilder->context.current_node; node > 0; node--) { + switch (stack[node].type) { + case SELECT: + /* fragment case */ + break; + case TD: + case TH: + treebuilder->context.mode = IN_CELL; + return; + case TR: + treebuilder->context.mode = IN_ROW; + return; + case TBODY: + case TFOOT: + case THEAD: + treebuilder->context.mode = IN_TABLE_BODY; + return; + case CAPTION: + treebuilder->context.mode = IN_CAPTION; + return; + case COLGROUP: + /* fragment case */ + break; + case TABLE: + treebuilder->context.mode = IN_TABLE; + return; + case HEAD: + /* fragment case */ + break; + case BODY: + treebuilder->context.mode = IN_BODY; + return; + case FRAMESET: + /* fragment case */ + break; + case HTML: + /* fragment case */ + break; + default: + break; + } + } +} + +/** + * Convert an element name into an element type + * + * \param treebuilder The treebuilder instance + * \param tag_name The tag name to consider + * \return The corresponding element type + */ +element_type element_type_from_name(hubbub_treebuilder *treebuilder, + const hubbub_string *tag_name) +{ + static const struct { + const char *name; + element_type type; + } name_type_map[] = { + { "ADDRESS", ADDRESS }, { "AREA", AREA }, + { "BASE", BASE }, { "BASEFONT", BASEFONT }, + { "BGSOUND", BGSOUND }, { "BLOCKQUOTE", BLOCKQUOTE }, + { "BODY", BODY }, { "BR", BR }, + { "CENTER", CENTER }, { "COL", COL }, + { "COLGROUP", COLGROUP }, { "DD", DD }, + { "DIR", DIR }, { "DIV", DIV }, + { "DL", DL }, { "DT", DT }, + { "EMBED", EMBED }, { "FIELDSET", FIELDSET }, + { "FORM", FORM }, { "FRAME", FRAME }, + { "FRAMESET", FRAMESET }, { "H1", H1 }, + { "H2", H2 }, { "H3", H3 }, + { "H4", H4 }, { "H5", H5 }, + { "H6", H6 }, { "HEAD", HEAD }, + { "HR", HR }, { "IFRAME", IFRAME }, + { "IMAGE", IMAGE }, { "IMG", IMG }, + { "INPUT", INPUT }, { "ISINDEX", ISINDEX }, + { "LI", LI }, { "LINK", LINK }, + { "LISTING", LISTING }, { "MENU", MENU }, + { "META", META }, { "NOEMBED", NOEMBED }, + { "NOFRAMES", NOFRAMES }, { "NOSCRIPT", NOSCRIPT }, + { "OL", OL }, { "OPTGROUP", OPTGROUP }, + { "OPTION", OPTION }, { "P", P }, + { "PARAM", PARAM }, { "PLAINTEXT", PLAINTEXT }, + { "PRE", PRE }, { "SCRIPT", SCRIPT }, + { "SELECT", SELECT }, { "SPACER", SPACER }, + { "STYLE", STYLE }, { "TBODY", TBODY }, + { "TEXTAREA", TEXTAREA }, { "TFOOT", TFOOT }, + { "THEAD", THEAD }, { "TITLE", TITLE }, + { "TR", TR }, { "UL", UL }, + { "WBR", WBR }, + { "APPLET", APPLET }, { "BUTTON", BUTTON }, + { "CAPTION", CAPTION }, { "HTML", HTML }, + { "MARQUEE", MARQUEE }, { "OBJECT", OBJECT }, + { "TABLE", TABLE }, { "TD", TD }, + { "TH", TH }, + { "A", A }, { "B", B }, + { "BIG", BIG }, { "EM", EM }, + { "FONT", FONT }, { "I", I }, + { "NOBR", NOBR }, { "S", S }, + { "SMALL", SMALL }, { "STRIKE", STRIKE }, + { "STRONG", STRONG }, { "TT", TT }, + { "U", U }, + }; + const char *data = + (const char *) treebuilder->input_buffer + tag_name->data_off; + + /** \todo UTF-16 support */ + /** \todo optimise this */ + + for (uint32_t i = 0; + i < sizeof(name_type_map) / sizeof(name_type_map[0]); + i++) { + if (strlen(name_type_map[i].name) != tag_name->len) + continue; + + if (strncasecmp(name_type_map[i].name, + data, tag_name->len) == 0) + return name_type_map[i].type; + } + + /** \todo produce type values for unknown tags */ + return U + 1; +} + +/** + * Determine if a node is a special element + * + * \param type Node type to consider + * \return True iff node is a special element + */ +inline bool is_special_element(element_type type) +{ + return (type <= WBR); +} + +/** + * Determine if a node is a scoping element + * + * \param type Node type to consider + * \return True iff node is a scoping element + */ +inline bool is_scoping_element(element_type type) +{ + return (type >= APPLET && type <= TH); +} + +/** + * Determine if a node is a formatting element + * + * \param type Node type to consider + * \return True iff node is a formatting element + */ +inline bool is_formatting_element(element_type type) +{ + return (type >= A && type <= U); +} + +/** + * Determine if a node is a phrasing element + * + * \param type Node type to consider + * \return True iff node is a phrasing element + */ +inline bool is_phrasing_element(element_type type) +{ + return (type > U); +} + +/** + * Push an element onto the stack of open elements + * + * \param treebuilder The treebuilder instance containing the stack + * \param type The type of element being pushed + * \param node The node to push + * \return True on success, false on memory exhaustion + */ +bool element_stack_push(hubbub_treebuilder *treebuilder, + element_type type, void *node) +{ + uint32_t slot = treebuilder->context.current_node + 1; + + if (slot > treebuilder->context.stack_alloc) { + element_context *temp = treebuilder->alloc( + treebuilder->context.element_stack, + (treebuilder->context.stack_alloc + + ELEMENT_STACK_CHUNK) * + sizeof(element_context), + treebuilder->alloc_pw); + + if (temp == NULL) + return false; + + treebuilder->context.element_stack = temp; + treebuilder->context.stack_alloc += ELEMENT_STACK_CHUNK; + } + + treebuilder->context.element_stack[slot].type = type; + treebuilder->context.element_stack[slot].node = node; + + treebuilder->context.current_node = slot; + + /* Update current table index */ + if (type == TABLE) + treebuilder->context.current_table = slot; + + return true; +} + +/** + * Pop an element off the stack of open elements + * + * \param treebuilder The treebuilder instance containing the stack + * \param type Pointer to location to receive element type + * \param node Pointer to location to receive node + * \return True on success, false on memory exhaustion. + */ +bool element_stack_pop(hubbub_treebuilder *treebuilder, + element_type *type, void **node) +{ + element_context *stack = treebuilder->context.element_stack; + uint32_t slot = treebuilder->context.current_node; + formatting_list_entry *entry; + + /* We're popping a table, find previous */ + if (stack[slot].type == TABLE) { + uint32_t t; + for (t = slot - 1; t > 0; t--) { + if (stack[t].type == TABLE) + break; + } + + treebuilder->context.current_table = t; + } + + if (is_formatting_element(stack[slot].type) || + (is_scoping_element(stack[slot].type) && + stack[slot].type != HTML && + stack[slot].type != TABLE)) { + /* Find occurrences of the node we're about to pop in the list + * of active formatting elements. We need to invalidate their + * stack index information. */ + for (entry = treebuilder->context.formatting_list_end; + entry != NULL; entry = entry->prev) { + /** \todo Can we optimise this? + * (i.e. by not traversing the entire list) */ + if (entry->stack_index == slot) + entry->stack_index = 0; + } + } + + *type = stack[slot].type; + *node = stack[slot].node; + + /** \todo reduce allocated stack size once there's enough free */ + + treebuilder->context.current_node = slot - 1; + + return true; +} + +/** + * Insert an element into the list of active formatting elements + * + * \param treebuilder Treebuilder instance containing list + * \param type Type of node being inserted + * \param node Node being inserted + * \param stack_index Index into stack of open elements + * \return True on success, false on memory exhaustion + */ +bool formatting_list_insert(hubbub_treebuilder *treebuilder, + element_type type, void *node, uint32_t stack_index) +{ + formatting_list_entry *entry; + + entry = treebuilder->alloc(NULL, sizeof(formatting_list_entry), + treebuilder->alloc_pw); + if (entry == NULL) + return false; + + entry->details.type = type; + entry->details.node = node; + entry->stack_index = stack_index; + + entry->prev = treebuilder->context.formatting_list_end; + entry->next = NULL; + + if (entry->prev != NULL) + entry->prev->next = entry; + else + treebuilder->context.formatting_list = entry; + + treebuilder->context.formatting_list_end = entry; + + return true; +} + +/** + * Remove an element from the list of active formatting elements + * + * \param treebuilder Treebuilder instance containing list + * \param entry The item to remove + * \param type Pointer to location to receive type of node + * \param node Pointer to location to receive node + * \param stack_index Pointer to location to receive stack index + * \return True on success, false on memory exhaustion + */ +bool formatting_list_remove(hubbub_treebuilder *treebuilder, + formatting_list_entry *entry, + element_type *type, void **node, uint32_t *stack_index) +{ + *type = entry->details.type; + *node = entry->details.node; + *stack_index = entry->stack_index; + + if (entry->prev == NULL) + treebuilder->context.formatting_list = entry->next; + else + entry->prev->next = entry->next; + + if (entry->next == NULL) + treebuilder->context.formatting_list_end = entry->prev; + else + entry->next->prev = entry->prev; + + treebuilder->alloc(entry, 0, treebuilder->alloc_pw); + + return true; +} + +/** + * Remove an element from the list of active formatting elements + * + * \param treebuilder Treebuilder instance containing list + * \param entry The item to replace + * \param type Replacement node type + * \param node Replacement node + * \param stack_index Replacement stack index + * \param otype Pointer to location to receive old type + * \param onode Pointer to location to receive old node + * \param ostack_index Pointer to location to receive old stack index + * \return True on success, false on memory exhaustion + */ +bool formatting_list_replace(hubbub_treebuilder *treebuilder, + formatting_list_entry *entry, + element_type type, void *node, uint32_t stack_index, + element_type *otype, void **onode, uint32_t *ostack_index) +{ + UNUSED(treebuilder); + + *otype = entry->details.type; + *onode = entry->details.node; + *ostack_index = entry->stack_index; + + entry->details.type = type; + entry->details.node = node; + entry->stack_index = stack_index; + + return true; +} + diff --git a/src/treebuilder/treebuilder.h b/src/treebuilder/treebuilder.h index 0792e99..839f901 100644 --- a/src/treebuilder/treebuilder.h +++ b/src/treebuilder/treebuilder.h @@ -27,6 +27,7 @@ typedef enum hubbub_treebuilder_opttype { HUBBUB_TREEBUILDER_BUFFER_HANDLER, HUBBUB_TREEBUILDER_ERROR_HANDLER, HUBBUB_TREEBUILDER_TREE_HANDLER, + HUBBUB_TREEBUILDER_DOCUMENT_NODE, } hubbub_treebuilder_opttype; /** @@ -44,6 +45,8 @@ typedef union hubbub_treebuilder_optparams { } error_handler; hubbub_tree_handler tree_handler; + + void *document_node; } hubbub_treebuilder_optparams; /* Create a hubbub treebuilder */ -- cgit v1.2.3