From 427ce60a0cf055347b2fd7ac4a37bec59d65c3ac Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Mon, 7 Apr 2008 02:04:05 +0000 Subject: Implement "in body" insertion mode. Modify treebuilder test driver to bring it in line with API changes. A few minimal bits of testdata for various bits of in body. Proper testing will come once we're actually building a tree. svn path=/trunk/hubbub/; revision=4076 --- src/treebuilder/Makefile | 2 +- src/treebuilder/in_body.c | 1898 +++++++++++++++++++++++++++++++++++++++++ src/treebuilder/in_body.h | 18 + src/treebuilder/internal.h | 190 +++++ src/treebuilder/treebuilder.c | 801 ++++++++--------- 5 files changed, 2477 insertions(+), 432 deletions(-) create mode 100644 src/treebuilder/in_body.c create mode 100644 src/treebuilder/in_body.h create mode 100644 src/treebuilder/internal.h (limited to 'src') diff --git a/src/treebuilder/Makefile b/src/treebuilder/Makefile index d63a7a3..3353a26 100644 --- a/src/treebuilder/Makefile +++ b/src/treebuilder/Makefile @@ -22,7 +22,7 @@ CFLAGS += -I$(CURDIR) # Objects -OBJS = treebuilder +OBJS = in_body treebuilder .PHONY: clean debug distclean export release setup test diff --git a/src/treebuilder/in_body.c b/src/treebuilder/in_body.c new file mode 100644 index 0000000..7fefdfd --- /dev/null +++ b/src/treebuilder/in_body.c @@ -0,0 +1,1898 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#include +#include + +#include "treebuilder/in_body.h" +#include "utils/utils.h" + +#undef DEBUG_IN_BODY + +typedef struct bookmark { + formatting_list_entry *prev; + formatting_list_entry *next; +} bookmark; + +static void process_character(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static bool process_start_tag(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static bool process_end_tag(hubbub_treebuilder *treebuilder, + const hubbub_token *token); + +static void process_html_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static void process_body_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static void process_container_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static void process_form_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static void process_dd_dt_li_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token, element_type type); +static void process_plaintext_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static void process_a_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static void process_presentational_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token, element_type type); +static void process_nobr_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static void process_button_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static void process_applet_marquee_object_in_body( + hubbub_treebuilder *treebuilder, const hubbub_token *token, + element_type type); +static void process_hr_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static void process_image_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static void process_input_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static void process_isindex_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static void process_textarea_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static void process_select_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +static void process_phrasing_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); + +static bool process_0body_in_body(hubbub_treebuilder *treebuilder); +static void process_0container_in_body(hubbub_treebuilder *treebuilder, + element_type type); +static void process_0p_in_body(hubbub_treebuilder *treebuilder); +static void process_0dd_dt_li_in_body(hubbub_treebuilder *treebuilder, + element_type type); +static void process_0h_in_body(hubbub_treebuilder *treebuilder, + element_type type); +static void process_0presentational_in_body(hubbub_treebuilder *treebuilder, + element_type type); +static void process_0applet_button_marquee_object_in_body( + hubbub_treebuilder *treebuilder, element_type type); +static void process_0br_in_body(hubbub_treebuilder *treebuilder); +static void process_0generic_in_body(hubbub_treebuilder *treebuilder, + element_type type); + +static bool aa_find_and_validate_formatting_element( + hubbub_treebuilder *treebuilder, element_type type, + formatting_list_entry **element); +static formatting_list_entry *aa_find_formatting_element( + hubbub_treebuilder *treebuilder, element_type type); +static bool aa_find_furthest_block(hubbub_treebuilder *treebuilder, + formatting_list_entry *formatting_element, + uint32_t *furthest_block); +static void aa_remove_from_parent(hubbub_treebuilder *treebuilder, void *node); +static void aa_reparent_node(hubbub_treebuilder *treebuilder, void *node, + void *new_parent); +static void aa_find_bookmark_location_reparenting_misnested( + hubbub_treebuilder *treebuilder, + uint32_t formatting_element, uint32_t furthest_block, + bookmark *bookmark, uint32_t *last_node); +static void aa_remove_element_stack_item(hubbub_treebuilder *treebuilder, + uint32_t index, uint32_t limit); +static void aa_clone_and_replace_entries(hubbub_treebuilder *treebuilder, + formatting_list_entry *element); +static void aa_insert_into_foster_parent(hubbub_treebuilder *treebuilder, + void *node); + + +/** + * Handle tokens in "in body" insertion mode + * + * \param treebuilder The treebuilder instance + * \param token The token to process + * \return True to reprocess the token, false otherwise + */ +bool handle_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + bool reprocess = false; + +#if !defined(NDEBUG) && defined(DEBUG_IN_BODY) + fprintf(stdout, "Processing token %d\n", token->type); + element_stack_dump(treebuilder, stdout); + formatting_list_dump(treebuilder, stdout); +#endif + + if (treebuilder->context.strip_leading_lr && + token->type != HUBBUB_TOKEN_CHARACTER) { + /* Reset the LR stripping flag */ + treebuilder->context.strip_leading_lr = false; + } + + switch (token->type) { + case HUBBUB_TOKEN_CHARACTER: + process_character(treebuilder, token); + break; + case HUBBUB_TOKEN_COMMENT: + process_comment_append(treebuilder, token, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + break; + case HUBBUB_TOKEN_DOCTYPE: + /** \todo parse error */ + break; + case HUBBUB_TOKEN_START_TAG: + reprocess = process_start_tag(treebuilder, token); + break; + case HUBBUB_TOKEN_END_TAG: + reprocess = process_end_tag(treebuilder, token); + break; + case HUBBUB_TOKEN_EOF: + for (uint32_t i = treebuilder->context.current_node; + i > 0; i--) { + element_type type = + treebuilder->context.element_stack[i].type; + + if (!(type == DD || type == DT || type == LI || + type == P || type == TBODY || + type == TD || type == TFOOT || + type == TH || type == THEAD || + type == TR || type == BODY)) { + /** \todo parse error */ + break; + } + } + break; + } + +#if !defined(NDEBUG) && defined(DEBUG_IN_BODY) + fprintf(stdout, "Processed\n"); + element_stack_dump(treebuilder, stdout); + formatting_list_dump(treebuilder, stdout); +#endif + + return reprocess; +} + +/** + * Process a character token + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_character(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + hubbub_string dummy = token->data.character; + + reconstruct_active_formatting_list(treebuilder); + + if (treebuilder->context.strip_leading_lr) { + const uint8_t *str = + treebuilder->input_buffer + dummy.data.off; + + /** \todo UTF-16 */ + if (*str == '\n') { + dummy.data.off++; + dummy.len--; + } + + treebuilder->context.strip_leading_lr = false; + } + + append_text(treebuilder, &dummy); +} + +/** + * Process a tag as if in "in body" mode + * + * \param treebuilder The treebuilder instance + * \param token The token to process + * \return True to reprocess the token + */ +bool process_tag_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + bool reprocess = false; + + switch (token->type) + { + case HUBBUB_TOKEN_START_TAG: + reprocess = process_start_tag(treebuilder, token); + break; + case HUBBUB_TOKEN_END_TAG: + reprocess = process_end_tag(treebuilder, token); + break; + case HUBBUB_TOKEN_CHARACTER: + case HUBBUB_TOKEN_COMMENT: + case HUBBUB_TOKEN_DOCTYPE: + case HUBBUB_TOKEN_EOF: + assert(0); + break; + } + + return reprocess; +} + +/** + * Process a start tag + * + * \param treebuilder The treebuilder instance + * \param token The token to process + * \return True to reprocess the token + */ +bool process_start_tag(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + bool reprocess = false; + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + if (type == HTML) { + process_html_in_body(treebuilder, token); + } else if (type == BASE || type == LINK || type == META) { + process_base_link_meta_in_head(treebuilder, + token, type); + } else if (type == SCRIPT) { + process_script_in_head(treebuilder, token); + } else if (type == STYLE) { + parse_generic_rcdata(treebuilder, token, false); + } else if (type == TITLE) { + parse_generic_rcdata(treebuilder, token, true); + } else if (type == BODY) { + process_body_in_body(treebuilder, token); + } else if (type == ADDRESS || type == BLOCKQUOTE || + type == CENTER || type == DIR || + type == DIV || type == DL || + type == FIELDSET || type == H1 || type == H2 || + type == H3 || type == H4 || type == H5 || + type == H6 || type == MENU || type == OL || + type == P || type == UL) { + process_container_in_body(treebuilder, token); + } else if (type == PRE || type == LISTING) { + process_container_in_body(treebuilder, token); + + treebuilder->context.strip_leading_lr = true; + } else if (type == FORM) { + process_form_in_body(treebuilder, token); + } else if (type == DD || type == DT || type == LI) { + process_dd_dt_li_in_body(treebuilder, token, type); + } else if (type == PLAINTEXT) { + process_plaintext_in_body(treebuilder, token); + } else if (type == A) { + process_a_in_body(treebuilder, token); + } else if (type == B || type == BIG || type == EM || + type == FONT || type == I || type == S || + type == SMALL || type == STRIKE || + type == STRONG || type == TT || type == U) { + process_presentational_in_body(treebuilder, + token, type); + } else if (type == NOBR) { + process_nobr_in_body(treebuilder, token); + } else if (type == BUTTON) { + process_button_in_body(treebuilder, token); + } else if (type == APPLET || type == MARQUEE || + type == OBJECT) { + process_applet_marquee_object_in_body(treebuilder, + token, type); + } else if (type == XMP) { + reconstruct_active_formatting_list(treebuilder); + parse_generic_rcdata(treebuilder, token, false); + } else if (type == TABLE) { + process_container_in_body(treebuilder, token); + + if (treebuilder->context.mode == IN_BODY) { + treebuilder->context.mode = IN_TABLE; + } + } else if (type == AREA || type == BASEFONT || + type == BGSOUND || type == BR || + type == EMBED || type == IMG || type == PARAM || + type == SPACER || type == WBR) { + reconstruct_active_formatting_list(treebuilder); + insert_element_no_push(treebuilder, &token->data.tag); + } else if (type == HR) { + process_hr_in_body(treebuilder, token); + } else if (type == IMAGE) { + process_image_in_body(treebuilder, token); + } else if (type == INPUT) { + process_input_in_body(treebuilder, token); + } else if (type == ISINDEX) { + process_isindex_in_body(treebuilder, token); + } else if (type == TEXTAREA) { + process_textarea_in_body(treebuilder, token); + } else if (type == IFRAME || type == NOEMBED || + type == NOFRAMES || + (false /* scripting */ && type == NOSCRIPT)) { + parse_generic_rcdata(treebuilder, token, false); + } else if (type == SELECT) { + process_select_in_body(treebuilder, token); + + if (treebuilder->context.mode == IN_BODY) { + treebuilder->context.mode = IN_SELECT; + } else if (treebuilder->context.mode == IN_TABLE || + treebuilder->context.mode == IN_CAPTION || + treebuilder->context.mode == IN_COLUMN_GROUP || + treebuilder->context.mode == IN_TABLE_BODY || + treebuilder->context.mode == IN_ROW || + treebuilder->context.mode == IN_CELL) { + treebuilder->context.mode = IN_SELECT_IN_TABLE; + } + } else if (type == CAPTION || type == COL || type == COLGROUP || + type == FRAME || type == FRAMESET || + type == HEAD || type == OPTION || + type == OPTGROUP || type == TBODY || + type == TD || type == TFOOT || type == TH || + type == THEAD || type == TR) { + /** \todo parse error */ +/* } else if (type == EVENT_SOURCE || type == SECTION || + type == NAV || type == ARTICLE || + type == ASIDE || type == HEADER || + type == FOOTER || type == DATAGRID || + type == COMMAND) { +*/ } else { + process_phrasing_in_body(treebuilder, token); + } + + return reprocess; +} + +/** + * Process an end tag + * + * \param treebuilder The treebuilder instance + * \param token The token to process + * \return True to reprocess the token + */ +bool process_end_tag(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + bool reprocess = false; + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + if (type == BODY) { + if (process_0body_in_body(treebuilder) && + treebuilder->context.mode == IN_BODY) { + treebuilder->context.mode = AFTER_BODY; + } + } else if (type == HTML) { + /* Act as if has been seen then, if + * that wasn't ignored, reprocess this token */ + if (process_0body_in_body(treebuilder) && + treebuilder->context.mode == IN_BODY) { + treebuilder->context.mode = AFTER_BODY; + } + reprocess = true; + } else if (type == ADDRESS || type == BLOCKQUOTE || + type == CENTER || type == DIR || type == DIV || + type == DL || type == FIELDSET || + type == LISTING || type == MENU || + type == OL || type == PRE || type == UL || + type == FORM) { + process_0container_in_body(treebuilder, type); + } else if (type == P) { + process_0p_in_body(treebuilder); + } else if (type == DD || type == DT || type == LI) { + process_0dd_dt_li_in_body(treebuilder, type); + } else if (type == H1 || type == H2 || type == H3 || + type == H4 || type == H5 || type == H6) { + process_0h_in_body(treebuilder, type); + } else if (type == A || type == B || type == BIG || + type == EM || type == FONT || type == I || + type == NOBR || type == S || type == SMALL || + type == STRIKE || type == STRONG || + type == TT || type == U) { + process_0presentational_in_body(treebuilder, type); + } else if (type == APPLET || type == BUTTON || + type == MARQUEE || type == OBJECT) { + process_0applet_button_marquee_object_in_body( + treebuilder, type); + } else if (type == BR) { + process_0br_in_body(treebuilder); + } else if (type == AREA || type == BASEFONT || + type == BGSOUND || type == EMBED || + type == HR || type == IFRAME || + type == IMAGE || type == IMG || + type == INPUT || type == ISINDEX || + type == NOEMBED || type == NOFRAMES || + type == PARAM || type == SELECT || + type == SPACER || type == TABLE || + type == TEXTAREA || type == WBR || + (false /* scripting enabled */ && + type == NOSCRIPT)) { + /** \todo parse error */ +/* } else if (type == EVENT_SOURCE || type == SECTION || + type == NAV || type == ARTICLE || + type == ASIDE || type == HEADER || + type == FOOTER || type == DATAGRID || + type == COMMAND) { +*/ } else { + process_0generic_in_body(treebuilder, type); + } + + return reprocess; +} + +/** + * Process a start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_html_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + /** \todo parse error */ + + treebuilder->tree_handler->add_attributes( + treebuilder->tree_handler->ctx, + treebuilder->context.element_stack[0].node, + token->data.tag.attributes, + token->data.tag.n_attributes); +} + +/** + * Process a start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_body_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + /** \todo parse error */ + + if (treebuilder->context.current_node < 1 || + treebuilder->context.element_stack[1].type != BODY) + return; + + treebuilder->tree_handler->add_attributes( + treebuilder->tree_handler->ctx, + treebuilder->context.element_stack[1].node, + token->data.tag.attributes, + token->data.tag.n_attributes); +} + +/** + * Process a generic container start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_container_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + if (element_in_scope(treebuilder, P, false)) { + process_0p_in_body(treebuilder); + } + + insert_element(treebuilder, &token->data.tag); +} + +/** + * Process a
start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_form_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + if (treebuilder->context.form_element != NULL) { + /** \todo parse error */ + } else { + if (element_in_scope(treebuilder, P, false)) { + process_0p_in_body(treebuilder); + } + + insert_element(treebuilder, &token->data.tag); + + /* Claim a reference on the node and + * use it as the current form element */ + treebuilder->tree_handler->ref_node( + treebuilder->tree_handler->ctx, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + + treebuilder->context.form_element = + treebuilder->context.element_stack[ + treebuilder->context.current_node].node; + } +} + +/** + * Process a
,
or
  • start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + * \param type The element type + */ +void process_dd_dt_li_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token, element_type type) +{ + element_context *stack = treebuilder->context.element_stack; + uint32_t node; + + if (element_in_scope(treebuilder, P, false)) { + process_0p_in_body(treebuilder); + } + + /* Find last LI/(DD,DT) on stack, if any */ + for (node = treebuilder->context.current_node; node > 0; node--) { + element_type ntype = stack[node].type; + + if (type == LI && ntype == LI) + break; + + if (((type == DD || type == DT) && + (ntype == DD || ntype == DT))) + break; + + if (!is_formatting_element(ntype) && + !is_phrasing_element(ntype) && + ntype != ADDRESS && + ntype != DIV) + break; + } + + /* If we found one, then pop all nodes up to and including it */ + if (stack[node].type == LI || stack[node].type == DD || + stack[node].type == DT) { + /* Check that we're only popping one node + * and emit a parse error if not */ + if (treebuilder->context.current_node > node) { + /** \todo parse error */ + } + + do { + element_type otype; + void *node; + + if (!element_stack_pop(treebuilder, &otype, &node)) { + /** \todo errors */ + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + node); + } while (treebuilder->context.current_node >= node); + } + + insert_element(treebuilder, &token->data.tag); +} + +/** + * Process a start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_plaintext_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + hubbub_tokeniser_optparams params; + + if (element_in_scope(treebuilder, P, false)) { + process_0p_in_body(treebuilder); + } + + insert_element(treebuilder, &token->data.tag); + + params.content_model.model = HUBBUB_CONTENT_MODEL_PLAINTEXT; + + hubbub_tokeniser_setopt(treebuilder->tokeniser, + HUBBUB_TOKENISER_CONTENT_MODEL, + &params); +} + +/** + * Process a <a> start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_a_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + formatting_list_entry *entry = + aa_find_formatting_element(treebuilder, A); + + if (entry != NULL) { + uint32_t index = entry->stack_index; + void *node = entry->details.node; + formatting_list_entry *entry2; + + /** \todo parse error */ + + /* Act as if </a> were seen */ + process_0presentational_in_body(treebuilder, A); + + entry2 = aa_find_formatting_element(treebuilder, A); + + /* Remove from formatting list, if it's still there */ + if (entry2 == entry && entry2->details.node == node) { + element_type otype; + void *onode; + uint32_t oindex; + + formatting_list_remove(treebuilder, entry, + &otype, &onode, &oindex); + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, onode); + + } + + /* Remove from the stack of open elements, if still there */ + if (index <= treebuilder->context.current_node && + treebuilder->context.element_stack[index].node + == node) { + aa_remove_element_stack_item(treebuilder, index, + treebuilder->context.current_node); + treebuilder->context.current_node--; + } + } + + reconstruct_active_formatting_list(treebuilder); + + insert_element(treebuilder, &token->data.tag); + + treebuilder->tree_handler->ref_node(treebuilder->tree_handler->ctx, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + + formatting_list_append(treebuilder, A, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node, + treebuilder->context.current_node); +} + +/** + * Process a <b>, <big>, <em>, <font>, <i>, <s>, <small>, + * <strike>, <strong>, <tt>, or <u> start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + * \param type The element type + */ +void process_presentational_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token, element_type type) +{ + reconstruct_active_formatting_list(treebuilder); + + insert_element(treebuilder, &token->data.tag); + + treebuilder->tree_handler->ref_node(treebuilder->tree_handler->ctx, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + + formatting_list_append(treebuilder, type, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node, + treebuilder->context.current_node); +} + +/** + * Process a <nobr> start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_nobr_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + reconstruct_active_formatting_list(treebuilder); + + if (element_in_scope(treebuilder, NOBR, false)) { + /** \todo parse error */ + + /* Act as if </nobr> were seen */ + process_0presentational_in_body(treebuilder, NOBR); + + /* Yes, again */ + reconstruct_active_formatting_list(treebuilder); + } + + insert_element(treebuilder, &token->data.tag); + + treebuilder->tree_handler->ref_node( + treebuilder->tree_handler->ctx, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + + formatting_list_append(treebuilder, NOBR, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node, + treebuilder->context.current_node); +} + +/** + * Process a <button> start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_button_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + if (element_in_scope(treebuilder, BUTTON, false)) { + /** \todo parse error */ + + /* Act as if </button> has been seen */ + process_0applet_button_marquee_object_in_body(treebuilder, + BUTTON); + } + + reconstruct_active_formatting_list(treebuilder); + + insert_element(treebuilder, &token->data.tag); + + if (treebuilder->context.form_element != NULL) { + treebuilder->tree_handler->form_associate( + treebuilder->tree_handler->ctx, + treebuilder->context.form_element, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + } + + treebuilder->tree_handler->ref_node( + treebuilder->tree_handler->ctx, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + + formatting_list_append(treebuilder, BUTTON, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node, + treebuilder->context.current_node); +} + +/** + * Process an <applet>, <marquee> or <object> start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + * \param type The element type + */ +void process_applet_marquee_object_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token, element_type type) +{ + reconstruct_active_formatting_list(treebuilder); + + insert_element(treebuilder, &token->data.tag); + + treebuilder->tree_handler->ref_node( + treebuilder->tree_handler->ctx, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + + formatting_list_append(treebuilder, type, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node, + treebuilder->context.current_node); +} + +/** + * Process an <hr> start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_hr_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + if (element_in_scope(treebuilder, P, false)) { + process_0p_in_body(treebuilder); + } + + insert_element_no_push(treebuilder, &token->data.tag); +} + +/** + * Process an <image> start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_image_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + hubbub_tag tag; + + /** \todo UTF-16 */ + tag.name.type = HUBBUB_STRING_PTR; + tag.name.data.ptr = (const uint8_t *) "img"; + tag.name.len = SLEN("img"); + + tag.n_attributes = token->data.tag.n_attributes; + tag.attributes = token->data.tag.attributes; + + reconstruct_active_formatting_list(treebuilder); + + insert_element_no_push(treebuilder, &tag); +} + +/** + * Process an <input> start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_input_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + element_type otype; + void *node; + + reconstruct_active_formatting_list(treebuilder); + + insert_element(treebuilder, &token->data.tag); + + if (treebuilder->context.form_element != NULL) { + treebuilder->tree_handler->form_associate( + treebuilder->tree_handler->ctx, + treebuilder->context.form_element, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + } + + if (!element_stack_pop(treebuilder, &otype, &node)) { + /** \todo errors */ + } + + treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx, + node); +} + +/** + * Process an <isindex> start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_isindex_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + hubbub_token dummy; + hubbub_attribute *action = NULL; + hubbub_attribute *prompt = NULL; + hubbub_attribute *attrs = NULL; + size_t n_attrs = 0; + + /** \todo parse error */ + + if (treebuilder->context.form_element != NULL) + return; + + /* First up, clone the token's attributes */ + if (token->data.tag.n_attributes > 0) { + attrs = treebuilder->alloc(NULL, + token->data.tag.n_attributes * sizeof(hubbub_attribute), + treebuilder->alloc_pw); + if (attrs == NULL) { + /** \todo error handling */ + return; + } + + for (uint32_t i = 0; i < token->data.tag.n_attributes; i++) { + hubbub_attribute *attr = &token->data.tag.attributes[i]; + const uint8_t *name = treebuilder->input_buffer + + attr->name.data.off; + + if (strncmp((const char *) name, "action", + attr->name.len) == 0) { + action = attr; + } else if (strncmp((const char *) name, "prompt", + attr->name.len) == 0) { + prompt = attr; + } else { + attrs[n_attrs++] = *attr; + } + } + } + + /* isindex algorithm */ + + /* Set up dummy as a start tag token */ + dummy.type = HUBBUB_TOKEN_START_TAG; + dummy.data.tag.name.type = HUBBUB_STRING_PTR; + + /* Act as if <form> were seen */ + dummy.data.tag.name.data.ptr = (const uint8_t *) "form"; + dummy.data.tag.name.len = SLEN("form"); + + dummy.data.tag.n_attributes = action != NULL ? 1 : 0; + dummy.data.tag.attributes = action; + + process_form_in_body(treebuilder, &dummy); + + /* Act as if <hr> were seen */ + dummy.data.tag.name.data.ptr = (const uint8_t *) "hr"; + dummy.data.tag.name.len = SLEN("hr"); + dummy.data.tag.n_attributes = 0; + dummy.data.tag.attributes = NULL; + + process_hr_in_body(treebuilder, &dummy); + + /* Act as if <p> were seen */ + dummy.data.tag.name.data.ptr = (const uint8_t *) "p"; + dummy.data.tag.name.len = SLEN("p"); + dummy.data.tag.n_attributes = 0; + dummy.data.tag.attributes = NULL; + + process_container_in_body(treebuilder, &dummy); + + /* Act as if <label> were seen */ + dummy.data.tag.name.data.ptr = (const uint8_t *) "label"; + dummy.data.tag.name.len = SLEN("label"); + dummy.data.tag.n_attributes = 0; + dummy.data.tag.attributes = NULL; + + process_phrasing_in_body(treebuilder, &dummy); + + /* Act as if a stream of characters were seen */ + dummy.type = HUBBUB_TOKEN_CHARACTER; + if (prompt != NULL) { + dummy.data.character = prompt->value; + } else { + /** \todo Localisation */ +#define PROMPT "This is a searchable index. Insert your search keywords here:" + dummy.data.character.type = HUBBUB_STRING_PTR; + dummy.data.character.data.ptr = (const uint8_t *) PROMPT; + dummy.data.character.len = SLEN(PROMPT); +#undef PROMPT + } + + process_character(treebuilder, &dummy); + + /* Act as if <input> was seen */ + dummy.type = HUBBUB_TOKEN_START_TAG; + dummy.data.tag.name.type = HUBBUB_STRING_PTR; + dummy.data.tag.name.data.ptr = (const uint8_t *) "input"; + dummy.data.tag.name.len = SLEN("input"); + + dummy.data.tag.n_attributes = n_attrs; + dummy.data.tag.attributes = attrs; + + process_input_in_body(treebuilder, &dummy); + + /* Act as if </label> was seen */ + process_0generic_in_body(treebuilder, LABEL); + + /* Act as if </p> was seen */ + process_0p_in_body(treebuilder); + + /* Act as if <hr> was seen */ + dummy.data.tag.name.data.ptr = (const uint8_t *) "hr"; + dummy.data.tag.name.len = SLEN("hr"); + dummy.data.tag.n_attributes = 0; + dummy.data.tag.attributes = NULL; + + process_hr_in_body(treebuilder, &dummy); + + /* Act as if </form> was seen */ + process_0container_in_body(treebuilder, FORM); + + /* Clean up */ + treebuilder->alloc(attrs, 0, treebuilder->alloc_pw); +} + +/** + * Process a <textarea> start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_textarea_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + treebuilder->context.strip_leading_lr = true; + parse_generic_rcdata(treebuilder, token, true); +} + +/** + * Process a <select> start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_select_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + reconstruct_active_formatting_list(treebuilder); + + insert_element(treebuilder, &token->data.tag); + + if (treebuilder->context.form_element != NULL) { + treebuilder->tree_handler->form_associate( + treebuilder->tree_handler->ctx, + treebuilder->context.form_element, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + } +} + +/** + * Process a phrasing start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_phrasing_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + reconstruct_active_formatting_list(treebuilder); + + insert_element(treebuilder, &token->data.tag); +} + +/** + * Process a </body> end tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \return True if processed, false otherwise + */ +bool process_0body_in_body(hubbub_treebuilder *treebuilder) +{ + bool processed = true; + + if (!element_in_scope(treebuilder, BODY, false)) { + /** \todo parse error */ + processed = true; + } else { + element_context *stack = treebuilder->context.element_stack; + uint32_t node; + + for (node = treebuilder->context.current_node; + node > 0; node--) { + element_type ntype = stack[node].type; + + if (ntype != DD && ntype != DT && ntype != LI && + ntype != P && ntype != TBODY && + ntype != TD && ntype != TFOOT && + ntype != TH && ntype != THEAD && + ntype != TR && ntype != BODY) { + /** \todo parse error */ + } + } + } + + return processed; +} + +/** + * Process a container end tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param type The element type + */ +void process_0container_in_body(hubbub_treebuilder *treebuilder, + element_type type) +{ + if (type == FORM) { + assert(treebuilder->context.form_element != NULL); + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + treebuilder->context.form_element); + treebuilder->context.form_element = NULL; + } + + if (!element_in_scope(treebuilder, type, false)) { + /** \todo parse error */ + } else { + uint32_t popped = 0; + element_type otype; + + close_implied_end_tags(treebuilder, UNKNOWN); + + do { + void *node; + + if (!element_stack_pop(treebuilder, &otype, &node)) { + /** \todo errors */ + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + node); + + popped++; + } while (otype != type); + + if (popped > 1) { + /** \todo parse error */ + } + } +} + +/** + * Process a </p> end tag as if in "in body" + * + * \param treebuilder The treebuilder instance + */ +void process_0p_in_body(hubbub_treebuilder *treebuilder) +{ + uint32_t popped = 0; + + if (treebuilder->context.element_stack[ + treebuilder->context.current_node].type != P) { + /** \todo parse error */ + } + + while(element_in_scope(treebuilder, P, false)) { + element_type type; + void *node; + + if (!element_stack_pop(treebuilder, &type, &node)) { + /** \todo errors */ + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, node); + + popped++; + } + + if (popped == 0) { + hubbub_token dummy; + + dummy.type = HUBBUB_TOKEN_START_TAG; + dummy.data.tag.name.type = HUBBUB_STRING_PTR; + /** \todo UTF-16 */ + dummy.data.tag.name.data.ptr = (const uint8_t *) "p"; + dummy.data.tag.name.len = SLEN("p"); + dummy.data.tag.n_attributes = 0; + dummy.data.tag.attributes = NULL; + + process_container_in_body(treebuilder, &dummy); + + /* Reprocess the end tag. This is safe as we've just + * inserted a <p> into the current scope */ + process_0p_in_body(treebuilder); + } +} + +/** + * Process a </dd>, </dt>, or </li> end tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param type The element type + */ +void process_0dd_dt_li_in_body(hubbub_treebuilder *treebuilder, + element_type type) +{ + if (!element_in_scope(treebuilder, type, false)) { + /** \todo parse error */ + } else { + uint32_t popped = 0; + element_type otype; + + close_implied_end_tags(treebuilder, type); + + do { + void *node; + + if (!element_stack_pop(treebuilder, &otype, &node)) { + /** \todo errors */ + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + node); + + popped++; + } while (otype != type); + + if (popped > 1) { + /** \todo parse error */ + } + } +} + +/** + * Process a </h1>, </h2>, </h3>, </h4>, + * </h5>, or </h6> end tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param type The element type + */ +void process_0h_in_body(hubbub_treebuilder *treebuilder, + element_type type) +{ + UNUSED(type); + + /** \todo optimise this */ + if (element_in_scope(treebuilder, H1, false) || + element_in_scope(treebuilder, H2, false) || + element_in_scope(treebuilder, H3, false) || + element_in_scope(treebuilder, H4, false) || + element_in_scope(treebuilder, H5, false) || + element_in_scope(treebuilder, H6, false)) { + uint32_t popped = 0; + element_type otype; + + close_implied_end_tags(treebuilder, UNKNOWN); + + do { + void *node; + + if (!element_stack_pop(treebuilder, &otype, &node)) { + /** \todo errors */ + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + node); + + popped++; + } while (otype != H1 && otype != H2 && + otype != H3 && otype != H4 && + otype != H5 && otype != H6); + + if (popped > 1) { + /** \todo parse error */ + } + } else { + /** \todo parse error */ + } +} + +/** + * Process a presentational end tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param type The element type + */ +void process_0presentational_in_body(hubbub_treebuilder *treebuilder, + element_type type) +{ + /* Welcome to the adoption agency */ + + while (true) { + element_context *stack = treebuilder->context.element_stack; + + /* 1 */ + formatting_list_entry *entry; + uint32_t formatting_element; + + if (!aa_find_and_validate_formatting_element(treebuilder, + type, &entry)) + return; + + /* Take a copy of the stack index for use + * during stack manipulation */ + formatting_element = entry->stack_index; + + /* 2 & 3 */ + uint32_t furthest_block; + + if (!aa_find_furthest_block(treebuilder, + entry, &furthest_block)) + return; + + /* 4 */ + uint32_t common_ancestor = formatting_element - 1; + + /* 5 */ + aa_remove_from_parent(treebuilder, stack[furthest_block].node); + + /* 6 */ + bookmark bookmark; + + bookmark.prev = entry->prev; + bookmark.next = entry->next; + + /* 7 */ + uint32_t last_node; + + aa_find_bookmark_location_reparenting_misnested(treebuilder, + formatting_element, furthest_block, + &bookmark, &last_node); + + /* 8 */ + if (stack[common_ancestor].type == TABLE || + stack[common_ancestor].type == TBODY || + stack[common_ancestor].type == TFOOT || + stack[common_ancestor].type == THEAD || + stack[common_ancestor].type == TR) { + aa_insert_into_foster_parent(treebuilder, + stack[last_node].node); + } else { + aa_reparent_node(treebuilder, stack[last_node].node, + stack[common_ancestor].node); + } + + /* 9 */ + void *fe_clone = NULL; + + treebuilder->tree_handler->clone_node( + treebuilder->tree_handler->ctx, + entry->details.node, false, &fe_clone); + + /* 10 */ + treebuilder->tree_handler->reparent_children( + treebuilder->tree_handler->ctx, + stack[furthest_block].node, fe_clone); + + /* 11 */ + void *clone_appended = NULL; + + treebuilder->tree_handler->append_child( + treebuilder->tree_handler->ctx, + stack[furthest_block].node, fe_clone, + &clone_appended); + + /* 12 and 13 are reversed here so that we know the correct + * stack index to use when inserting into the formatting list */ + + /* 13 */ + aa_remove_element_stack_item(treebuilder, formatting_element, + furthest_block); + + /* Fix up furthest block index */ + furthest_block--; + + /* Now, in the gap after furthest block, + * we insert an entry for clone */ + stack[furthest_block + 1].type = entry->details.type; + stack[furthest_block + 1].node = clone_appended; + + /* 12 */ + element_type otype; + void *onode; + uint32_t oindex; + + formatting_list_remove(treebuilder, entry, + &otype, &onode, &oindex); + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, onode); + + formatting_list_insert(treebuilder, + bookmark.prev, bookmark.next, + otype, fe_clone, furthest_block + 1); + + /* 14 */ + } +} + +/** + * Adoption agency: find and validate the formatting element + * + * \param treebuilder The treebuilder instance + * \param type Element type to search for + * \param element Pointer to location to receive list entry + * \return True to continue processing, false to stop + */ +bool aa_find_and_validate_formatting_element(hubbub_treebuilder *treebuilder, + element_type type, formatting_list_entry **element) +{ + formatting_list_entry *entry; + + entry = aa_find_formatting_element(treebuilder, type); + + if (entry == NULL || (entry->stack_index != 0 && + element_in_scope(treebuilder, entry->details.type, + false) != entry->stack_index)) { + /** \todo parse error */ + return false; + } + + if (entry->stack_index == 0) { + /* Not in element stack => remove from formatting list */ + element_type type; + void *node; + uint32_t index; + + /** \todo parse error */ + + if (!formatting_list_remove(treebuilder, entry, + &type, &node, &index)) { + /** \todo errors */ + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, node); + + return false; + } + + if (entry->stack_index != treebuilder->context.current_node) { + /** \todo parse error */ + } + + *element = entry; + + return true; +} + +/** + * Adoption agency: find formatting element + * + * \param treebuilder The treebuilder instance + * \param type Type of element to search for + * \return Pointer to formatting element, or NULL if none found + */ +formatting_list_entry *aa_find_formatting_element( + hubbub_treebuilder *treebuilder, element_type type) +{ + formatting_list_entry *entry; + + for (entry = treebuilder->context.formatting_list_end; + entry != NULL; entry = entry->prev) { + + /* Assumption: HTML and TABLE elements are not in the list */ + if (is_scoping_element(entry->details.type) || + entry->details.type == type) + break; + } + + /* Check if we stopped on a marker, rather than a formatting element */ + if (entry != NULL && is_scoping_element(entry->details.type)) + entry = NULL; + + return entry; +} + +/** + * Adoption agency: find furthest block + * + * \param treebuilder The treebuilder instance + * \param formatting_element The formatting element + * \param furthest_block Pointer to location to receive furthest block + * \return True to continue processing (::furthest_block filled in). + */ +bool aa_find_furthest_block(hubbub_treebuilder *treebuilder, + formatting_list_entry *formatting_element, + uint32_t *furthest_block) +{ + uint32_t fe_index = formatting_element->stack_index; + uint32_t fb; + + for (fb = fe_index + 1; fb <= treebuilder->context.current_node; fb++) { + element_type type = treebuilder->context.element_stack[fb].type; + + if (!(is_phrasing_element(type) || is_formatting_element(type))) + break; + } + + if (fb > treebuilder->context.current_node) { + element_type type; + void *node; + uint32_t index; + + /* Pop all elements off the stack up to, + * and including, the formatting element */ + do { + if (!element_stack_pop(treebuilder, &type, &node)) { + /** \todo errors */ + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + node); + } while (treebuilder->context.current_node >= fe_index); + + /* Remove the formatting element from the list */ + if (!formatting_list_remove(treebuilder, formatting_element, + &type, &node, &index)) { + /* \todo errors */ + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, node); + + return false; + } + + *furthest_block = fb; + + return true; +} + +/** + * Adoption agency: remove a node from its parent + * + * \param treebuilder The treebuilder instance + * \param node Node to remove + */ +void aa_remove_from_parent(hubbub_treebuilder *treebuilder, void *node) +{ + /* Get parent */ + void *parent = NULL; + + treebuilder->tree_handler->get_parent(treebuilder->tree_handler->ctx, + node, false, &parent); + + if (parent != NULL) { + void *removed; + + treebuilder->tree_handler->remove_child( + treebuilder->tree_handler->ctx, + parent, node, &removed); + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, removed); + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, parent); + } +} + +/** + * Adoption agency: reparent a node + * + * \param treebuilder The treebuilder instance + * \param node The node to reparent + * \param new_parent The new parent + */ +void aa_reparent_node(hubbub_treebuilder *treebuilder, void *node, + void *new_parent) +{ + void *appended; + + aa_remove_from_parent(treebuilder, node); + + treebuilder->tree_handler->append_child(treebuilder->tree_handler->ctx, + new_parent, node, &appended); + + treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx, + appended); +} + +/** + * Adoption agency: this is step 7 + * + * \param treebuilder The treebuilder instance + * \param formatting_element The stack index of the formatting element + * \param furthest_block Index of furthest block in element stack + * \param bookmark Pointer to bookmark (pre-initialised) + * \param last_node Pointer to location to receive index of last node + */ +void aa_find_bookmark_location_reparenting_misnested( + hubbub_treebuilder *treebuilder, + uint32_t formatting_element, uint32_t furthest_block, + bookmark *bookmark, uint32_t *last_node) +{ + element_context *stack = treebuilder->context.element_stack; + uint32_t node, last; + formatting_list_entry *node_entry; + + node = last = furthest_block; + + while (true) { + /* i */ + node--; + + /* ii */ + for (node_entry = treebuilder->context.formatting_list_end; + node_entry != NULL; + node_entry = node_entry->next) { + if (node_entry->stack_index == node) + break; + } + + /* Node is not in list of active formatting elements */ + if (node_entry == NULL) { + aa_remove_element_stack_item(treebuilder, + node, treebuilder->context.current_node); + + /* Update furthest block index and the last node index, + * as these are always below node in the stack */ + furthest_block--; + last--; + + /* Fixup the current_node index */ + treebuilder->context.current_node--; + + /* Back to i */ + continue; + } + + /* iii */ + if (node == formatting_element) + break; + + /* iv */ + if (last == furthest_block) { + bookmark->prev = node_entry->prev; + bookmark->next = node_entry->next; + } + + /* v */ + bool children = false; + + treebuilder->tree_handler->has_children( + treebuilder->tree_handler->ctx, + node_entry->details.node, &children); + + if (children) { + aa_clone_and_replace_entries(treebuilder, node_entry); + } + + /* vi */ + aa_reparent_node(treebuilder, + stack[last].node, stack[node].node); + + /* vii */ + last = node; + + /* viii */ + } + + *last_node = last; +} + +/** + * Adoption agency: remove an entry from the stack at the given index + * + * \param treebuilder The treebuilder instance + * \param index The index of the item to remove + * \param limit The index of the last item to move + * + * Preconditions: index < limit, limit <= current_node + * Postcondition: stack[limit] is empty + */ +void aa_remove_element_stack_item(hubbub_treebuilder *treebuilder, + uint32_t index, uint32_t limit) +{ + element_context *stack = treebuilder->context.element_stack; + + assert(index < limit); + assert(limit <= treebuilder->context.current_node); + + /* First, scan over subsequent entries in the stack, + * searching for them in the list of active formatting + * entries. If found, update the corresponding + * formatting list entry's stack index to match the + * new stack location */ + for (uint32_t n = index + 1; n <= limit; n++) { + if (is_formatting_element(stack[n].type) || + (is_scoping_element(stack[n].type) && + stack[n].type != HTML && + stack[n].type != TABLE)) { + formatting_list_entry *e; + + for (e = treebuilder->context.formatting_list_end; + e != NULL; e = e->prev) { + if (e->stack_index == n) + e->stack_index--; + } + } + } + + /* Reduce node's reference count */ + treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx, + stack[index].node); + + /* Now, shuffle the stack up one, removing node in the process */ + memmove(&stack[index], &stack[index + 1], + (limit - index) * sizeof(element_context)); +} + +/** + * Adoption agency: shallow clone a node and replace its formatting list + * and element stack entries + * + * \param treebuilder The treebuilder instance + * \param element The item in the formatting list containing the node + */ +void aa_clone_and_replace_entries(hubbub_treebuilder *treebuilder, + formatting_list_entry *element) +{ + element_type otype; + uint32_t oindex; + void *clone, *onode; + + /* Shallow clone of node */ + treebuilder->tree_handler->clone_node(treebuilder->tree_handler->ctx, + element->details.node, false, &clone); + + /* Replace formatting list entry for node with clone */ + formatting_list_replace(treebuilder, element, + element->details.type, clone, element->stack_index, + &otype, &onode, &oindex); + + treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx, + onode); + + treebuilder->tree_handler->ref_node(treebuilder->tree_handler->ctx, + clone); + + /* Replace node's stack entry with clone */ + treebuilder->context.element_stack[element->stack_index].node = clone; + + treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx, + onode); +} + +/** + * Adoption agency: locate foster parent and insert node into it + * + * \param treebuilder The treebuilder instance + * \param node The node to insert + */ +void aa_insert_into_foster_parent(hubbub_treebuilder *treebuilder, void *node) +{ + element_context *stack = treebuilder->context.element_stack; + void *foster_parent = NULL; + bool insert = false; + void *inserted; + + if (treebuilder->context.current_table == 0) { + treebuilder->tree_handler->ref_node( + treebuilder->tree_handler->ctx, + stack[0].node); + + foster_parent = stack[0].node; + } else { + void *t_parent = NULL; + + treebuilder->tree_handler->get_parent( + treebuilder->tree_handler->ctx, + stack[treebuilder->context.current_table].node, + true, &t_parent); + + if (t_parent != NULL) { + foster_parent = t_parent; + insert = true; + } else { + treebuilder->tree_handler->ref_node( + treebuilder->tree_handler->ctx, + stack[treebuilder->context. + current_table - 1].node); + foster_parent = stack[treebuilder->context. + current_table - 1].node; + } + } + + if (insert) { + treebuilder->tree_handler->insert_before( + treebuilder->tree_handler->ctx, + foster_parent, node, + stack[treebuilder->context.current_table].node, + &inserted); + } else { + treebuilder->tree_handler->append_child( + treebuilder->tree_handler->ctx, + foster_parent, node, + &inserted); + } + + treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx, + inserted); + + treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx, + foster_parent); +} + + +/** + * Process an </applet>, <button>, <marquee>, + * or <object> end tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param type The element type + */ +void process_0applet_button_marquee_object_in_body( + hubbub_treebuilder *treebuilder, element_type type) +{ + if (!element_in_scope(treebuilder, type, false)) { + /** \todo parse error */ + } else { + uint32_t popped = 0; + element_type otype; + + close_implied_end_tags(treebuilder, UNKNOWN); + + do { + void *node; + + if (!element_stack_pop(treebuilder, &otype, &node)) { + /** \todo errors */ + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + node); + + popped++; + } while (otype != type); + + if (popped > 1) { + /** \todo parse error */ + } + + clear_active_formatting_list_to_marker(treebuilder); + } +} + +/** + * Process a </br> end tag as if in "in body" + * + * \param treebuilder The treebuilder instance + */ +void process_0br_in_body(hubbub_treebuilder *treebuilder) +{ + hubbub_tag tag; + + /** \todo parse error */ + + /* Act as if <br> has been seen. */ + + /** \todo UTF-16 */ + tag.name.type = HUBBUB_STRING_PTR; + tag.name.data.ptr = (const uint8_t *) "br"; + tag.name.len = SLEN("br"); + + tag.n_attributes = 0; + tag.attributes = NULL; + + reconstruct_active_formatting_list(treebuilder); + + insert_element_no_push(treebuilder, &tag); +} + +/** + * Process a generic end tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param type The element type + */ +void process_0generic_in_body(hubbub_treebuilder *treebuilder, + element_type type) +{ + element_context *stack = treebuilder->context.element_stack; + uint32_t node = treebuilder->context.current_node; + + do { + if (stack[node].type == type) { + uint32_t popped = 0; + element_type otype; + + close_implied_end_tags(treebuilder, UNKNOWN); + + do { + void *node; + + if (!element_stack_pop(treebuilder, + &otype, &node)) { + /** \todo errors */ + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + node); + + popped++; + } while (otype != type); + + if (popped > 1) { + /** \todo parse error */ + } + + break; + } else if (!is_formatting_element(stack[node].type) && + !is_phrasing_element(stack[node].type)) { + /** \todo parse error */ + break; + } + } while (--node > 0); +} + diff --git a/src/treebuilder/in_body.h b/src/treebuilder/in_body.h new file mode 100644 index 0000000..7d1154e --- /dev/null +++ b/src/treebuilder/in_body.h @@ -0,0 +1,18 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org> + */ + +#ifndef hubbub_treebuilder_in_body_h_ +#define hubbub_treebuilder_in_body_h_ + +#include "treebuilder/internal.h" + +bool handle_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token); +bool process_tag_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); + +#endif + diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h new file mode 100644 index 0000000..e5410f5 --- /dev/null +++ b/src/treebuilder/internal.h @@ -0,0 +1,190 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org> + */ + +#ifndef hubbub_treebuilder_internal_h_ +#define hubbub_treebuilder_internal_h_ + +#include "treebuilder/treebuilder.h" + +typedef enum +{ + INITIAL, + BEFORE_HTML, + BEFORE_HEAD, + IN_HEAD, + IN_HEAD_NOSCRIPT, + AFTER_HEAD, + IN_BODY, + IN_TABLE, + IN_CAPTION, + IN_COLUMN_GROUP, + IN_TABLE_BODY, + IN_ROW, + IN_CELL, + IN_SELECT, + IN_SELECT_IN_TABLE, + AFTER_BODY, + IN_FRAMESET, + AFTER_FRAMESET, + AFTER_AFTER_BODY, + AFTER_AFTER_FRAMESET, + GENERIC_RCDATA, + SCRIPT_COLLECT_CHARACTERS, +} insertion_mode; + +typedef enum +{ +/* Special */ + ADDRESS, AREA, BASE, BASEFONT, BGSOUND, BLOCKQUOTE, BODY, BR, CENTER, + COL, COLGROUP, DD, DIR, DIV, DL, DT, EMBED, FIELDSET, FORM, FRAME, + FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HR, IFRAME, IMAGE, IMG, INPUT, + ISINDEX, LI, LINK, LISTING, MENU, META, NOEMBED, NOFRAMES, NOSCRIPT, + OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT, PRE, SCRIPT, SELECT, SPACER, + STYLE, TBODY, TEXTAREA, TFOOT, THEAD, TITLE, TR, UL, WBR, +/* Scoping */ + APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH, +/* Formatting */ + A, B, BIG, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U, +/* Phrasing */ + /**< \todo Enumerate phrasing elements */ + XMP, LABEL, + UNKNOWN, +} element_type; + +typedef struct element_context +{ + element_type type; + void *node; +} element_context; + +typedef struct formatting_list_entry +{ + element_context details; /**< Entry details */ + + uint32_t stack_index; /**< Index into element stack */ + + struct formatting_list_entry *prev; /**< Previous in list */ + struct formatting_list_entry *next; /**< Next in list */ +} formatting_list_entry; + +typedef struct hubbub_treebuilder_context +{ + insertion_mode mode; /**< The current insertion mode */ + +#define ELEMENT_STACK_CHUNK 128 + element_context *element_stack; /**< Stack of open elements */ + uint32_t stack_alloc; /**< Number of stack slots allocated */ + uint32_t current_node; /**< Index of current node in stack */ + uint32_t current_table; /**< Index of current table in stack */ + + formatting_list_entry *formatting_list; /**< List of active formatting + * elements */ + formatting_list_entry *formatting_list_end; /**< End of active + * formatting list */ + + void *head_element; /**< Pointer to HEAD element */ + + void *form_element; /**< Pointer to most recently + * opened FORM element */ + + void *document; /**< Pointer to the document node */ + + struct { + insertion_mode mode; /**< Insertion mode to return to */ + void *node; /**< Node to attach Text child to */ + element_type type; /**< Type of node */ + hubbub_string string; /**< Text data */ + } collect; /**< Context for character collecting */ + + bool strip_leading_lr; /**< Whether to strip a LR from the + * start of the next character sequence + * received */ +} hubbub_treebuilder_context; + +struct hubbub_treebuilder +{ + hubbub_tokeniser *tokeniser; /**< Underlying tokeniser */ + + const uint8_t *input_buffer; /**< Start of tokeniser's buffer */ + size_t input_buffer_len; /**< Length of input buffer */ + + hubbub_treebuilder_context context; + + hubbub_tree_handler *tree_handler; + + hubbub_buffer_handler buffer_handler; + void *buffer_pw; + + hubbub_error_handler error_handler; + void *error_pw; + + hubbub_alloc alloc; /**< Memory (de)allocation function */ + void *alloc_pw; /**< Client private data */ +}; + +bool process_characters_expect_whitespace( + hubbub_treebuilder *treebuilder, const hubbub_token *token, + bool insert_into_current_node); +void process_comment_append(hubbub_treebuilder *treebuilder, + const hubbub_token *token, void *parent); +void parse_generic_rcdata(hubbub_treebuilder *treebuilder, + const hubbub_token *token, bool rcdata); +void process_base_link_meta_in_head(hubbub_treebuilder *treebuilder, + const hubbub_token *token, element_type type); +void process_script_in_head(hubbub_treebuilder *treebuilder, + const hubbub_token *token); + +uint32_t element_in_scope(hubbub_treebuilder *treebuilder, + element_type type, bool in_table); +void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder); +void clear_active_formatting_list_to_marker( + hubbub_treebuilder *treebuilder); +void insert_element(hubbub_treebuilder *treebuilder, + const hubbub_tag *tag_name); +void insert_element_no_push(hubbub_treebuilder *treebuilder, + const hubbub_tag *tag_name); +void close_implied_end_tags(hubbub_treebuilder *treebuilder, + element_type except); +void reset_insertion_mode(hubbub_treebuilder *treebuilder); +void append_text(hubbub_treebuilder *treebuilder, + const hubbub_string *string); + +element_type element_type_from_name(hubbub_treebuilder *treebuilder, + const hubbub_string *tag_name); + +bool is_special_element(element_type type); +bool is_scoping_element(element_type type); +bool is_formatting_element(element_type type); +bool is_phrasing_element(element_type type); + +bool element_stack_push(hubbub_treebuilder *treebuilder, + element_type type, void *node); +bool element_stack_pop(hubbub_treebuilder *treebuilder, + element_type *type, void **node); + +bool formatting_list_append(hubbub_treebuilder *treebuilder, + element_type type, void *node, uint32_t stack_index); +bool formatting_list_insert(hubbub_treebuilder *treebuilder, + formatting_list_entry *prev, formatting_list_entry *next, + element_type type, void *node, uint32_t stack_index); +bool formatting_list_remove(hubbub_treebuilder *treebuilder, + formatting_list_entry *entry, + element_type *type, void **node, uint32_t *stack_index); +bool formatting_list_replace(hubbub_treebuilder *treebuilder, + formatting_list_entry *entry, + element_type type, void *node, uint32_t stack_index, + element_type *otype, void **onode, uint32_t *ostack_index); + +#ifndef NDEBUG +#include <stdio.h> + +void element_stack_dump(hubbub_treebuilder *treebuilder, FILE *fp); +void formatting_list_dump(hubbub_treebuilder *treebuilder, FILE *fp); +#endif + +#endif + diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c index 01e31e4..90cca11 100644 --- a/src/treebuilder/treebuilder.c +++ b/src/treebuilder/treebuilder.c @@ -8,119 +8,61 @@ #include <assert.h> #include <string.h> +#include "treebuilder/in_body.h" +#include "treebuilder/internal.h" #include "treebuilder/treebuilder.h" #include "utils/utils.h" -typedef enum -{ - INITIAL, - BEFORE_HTML, - BEFORE_HEAD, - IN_HEAD, - IN_HEAD_NOSCRIPT, - AFTER_HEAD, - IN_BODY, - IN_TABLE, - IN_CAPTION, - IN_COLUMN_GROUP, - IN_TABLE_BODY, - IN_ROW, - IN_CELL, - IN_SELECT, - IN_SELECT_IN_TABLE, - AFTER_BODY, - IN_FRAMESET, - AFTER_FRAMESET, - AFTER_AFTER_BODY, - AFTER_AFTER_FRAMESET, - GENERIC_RCDATA, - SCRIPT_COLLECT_CHARACTERS, -} insertion_mode; - -typedef enum -{ -/* Special */ - ADDRESS, AREA, BASE, BASEFONT, BGSOUND, BLOCKQUOTE, BODY, BR, CENTER, - COL, COLGROUP, DD, DIR, DIV, DL, DT, EMBED, FIELDSET, FORM, FRAME, - FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HR, IFRAME, IMAGE, IMG, INPUT, - ISINDEX, LI, LINK, LISTING, MENU, META, NOEMBED, NOFRAMES, NOSCRIPT, - OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT, PRE, SCRIPT, SELECT, SPACER, - STYLE, TBODY, TEXTAREA, TFOOT, THEAD, TITLE, TR, UL, WBR, -/* Scoping */ - APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH, -/* Formatting */ - A, B, BIG, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U, -/* Phrasing */ - /**< \todo Enumerate phrasing elements */ -} element_type; - -typedef struct element_context -{ +static const struct { + const char *name; element_type type; - void *node; -} element_context; - -typedef struct formatting_list_entry -{ - element_context details; /**< Entry details */ - - uint32_t stack_index; /**< Index into element stack */ - - struct formatting_list_entry *prev; /**< Previous in list */ - struct formatting_list_entry *next; /**< Next in list */ -} formatting_list_entry; - -typedef struct hubbub_treebuilder_context -{ - insertion_mode mode; /**< The current insertion mode */ - -#define ELEMENT_STACK_CHUNK 128 - element_context *element_stack; /**< Stack of open elements */ - uint32_t stack_alloc; /**< Number of stack slots allocated */ - uint32_t current_node; /**< Index of current node in stack */ - uint32_t current_table; /**< Index of current table in stack */ - - formatting_list_entry *formatting_list; /**< List of active formatting - * elements */ - formatting_list_entry *formatting_list_end; /**< End of active - * formatting list */ - - void *head_element; /**< Pointer to HEAD element */ - - void *form_element; /**< Pointer to most recently - * opened FORM element */ - - void *document; /**< Pointer to the document node */ - - struct { - insertion_mode mode; /**< Insertion mode to return to */ - void *node; /**< Node to attach Text child to */ - element_type type; /**< Type of node */ - hubbub_string string; /**< Text data */ - } collect; /**< Context for character collecting */ -} hubbub_treebuilder_context; - -struct hubbub_treebuilder -{ - hubbub_tokeniser *tokeniser; /**< Underlying tokeniser */ - - const uint8_t *input_buffer; /**< Start of tokeniser's buffer */ - size_t input_buffer_len; /**< Length of input buffer */ - - hubbub_treebuilder_context context; - - hubbub_tree_handler *tree_handler; - - hubbub_buffer_handler buffer_handler; - void *buffer_pw; - - hubbub_error_handler error_handler; - void *error_pw; - - hubbub_alloc alloc; /**< Memory (de)allocation function */ - void *alloc_pw; /**< Client private data */ +} name_type_map[] = { + { "ADDRESS", ADDRESS }, { "AREA", AREA }, + { "BASE", BASE }, { "BASEFONT", BASEFONT }, + { "BGSOUND", BGSOUND }, { "BLOCKQUOTE", BLOCKQUOTE }, + { "BODY", BODY }, { "BR", BR }, + { "CENTER", CENTER }, { "COL", COL }, + { "COLGROUP", COLGROUP }, { "DD", DD }, + { "DIR", DIR }, { "DIV", DIV }, + { "DL", DL }, { "DT", DT }, + { "EMBED", EMBED }, { "FIELDSET", FIELDSET }, + { "FORM", FORM }, { "FRAME", FRAME }, + { "FRAMESET", FRAMESET }, { "H1", H1 }, + { "H2", H2 }, { "H3", H3 }, + { "H4", H4 }, { "H5", H5 }, + { "H6", H6 }, { "HEAD", HEAD }, + { "HR", HR }, { "IFRAME", IFRAME }, + { "IMAGE", IMAGE }, { "IMG", IMG }, + { "INPUT", INPUT }, { "ISINDEX", ISINDEX }, + { "LI", LI }, { "LINK", LINK }, + { "LISTING", LISTING }, { "MENU", MENU }, + { "META", META }, { "NOEMBED", NOEMBED }, + { "NOFRAMES", NOFRAMES }, { "NOSCRIPT", NOSCRIPT }, + { "OL", OL }, { "OPTGROUP", OPTGROUP }, + { "OPTION", OPTION }, { "P", P }, + { "PARAM", PARAM }, { "PLAINTEXT", PLAINTEXT }, + { "PRE", PRE }, { "SCRIPT", SCRIPT }, + { "SELECT", SELECT }, { "SPACER", SPACER }, + { "STYLE", STYLE }, { "TBODY", TBODY }, + { "TEXTAREA", TEXTAREA }, { "TFOOT", TFOOT }, + { "THEAD", THEAD }, { "TITLE", TITLE }, + { "TR", TR }, { "UL", UL }, + { "WBR", WBR }, + { "APPLET", APPLET }, { "BUTTON", BUTTON }, + { "CAPTION", CAPTION }, { "HTML", HTML }, + { "MARQUEE", MARQUEE }, { "OBJECT", OBJECT }, + { "TABLE", TABLE }, { "TD", TD }, + { "TH", TH }, + { "A", A }, { "B", B }, + { "BIG", BIG }, { "EM", EM }, + { "FONT", FONT }, { "I", I }, + { "NOBR", NOBR }, { "S", S }, + { "SMALL", SMALL }, { "STRIKE", STRIKE }, + { "STRONG", STRONG }, { "TT", TT }, + { "U", U }, }; + static void hubbub_treebuilder_buffer_handler(const uint8_t *data, size_t len, void *pw); static void hubbub_treebuilder_token_handler(const hubbub_token *token, @@ -143,59 +85,6 @@ static bool handle_generic_rcdata(hubbub_treebuilder *treebuilder, static bool handle_script_collect_characters(hubbub_treebuilder *treebuilder, const hubbub_token *token); -static bool process_characters_expect_whitespace( - hubbub_treebuilder *treebuilder, const hubbub_token *token, - bool insert_into_current_node); -static void process_comment_append(hubbub_treebuilder *treebuilder, - const hubbub_token *token, void *parent); -static void parse_generic_rcdata(hubbub_treebuilder *treebuilder, - const hubbub_token *token, bool rcdata); -static void process_base_link_meta_in_head(hubbub_treebuilder *treebuilder, - const hubbub_token *token, element_type type); -static void process_script_in_head(hubbub_treebuilder *treebuilder, - const hubbub_token *token); - -/** \todo Uncomment the static keyword here once these functions are actually used */ - -/*static*/ bool element_in_scope(hubbub_treebuilder *treebuilder, - element_type type, bool in_table); -/*static*/ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder); -/*static*/ void clear_active_formatting_list_to_marker( - hubbub_treebuilder *treebuilder); -static void insert_element(hubbub_treebuilder *treebuilder, - const hubbub_tag *tag_name); -static void insert_element_verbatim(hubbub_treebuilder *treebuilder, - const uint8_t *name, size_t len); -static void insert_element_no_push(hubbub_treebuilder *treebuilder, - const hubbub_tag *tag_name); -/*static*/ void close_implied_end_tags(hubbub_treebuilder *treebuilder, - element_type except); -/*static*/ void reset_insertion_mode(hubbub_treebuilder *treebuilder); - -static element_type element_type_from_name(hubbub_treebuilder *treebuilder, - const hubbub_string *tag_name); -static element_type element_type_from_verbatim_name(const uint8_t *name, - size_t len); - -static inline bool is_special_element(element_type type); -static inline bool is_scoping_element(element_type type); -static inline bool is_formatting_element(element_type type); -static inline bool is_phrasing_element(element_type type); - -static bool element_stack_push(hubbub_treebuilder *treebuilder, - element_type type, void *node); -static bool element_stack_pop(hubbub_treebuilder *treebuilder, - element_type *type, void **node); - -/*static*/ bool formatting_list_insert(hubbub_treebuilder *treebuilder, - element_type type, void *node, uint32_t stack_index); -static bool formatting_list_remove(hubbub_treebuilder *treebuilder, - formatting_list_entry *entry, - element_type *type, void **node, uint32_t *stack_index); -static bool formatting_list_replace(hubbub_treebuilder *treebuilder, - formatting_list_entry *entry, - element_type type, void *node, uint32_t stack_index, - element_type *otype, void **onode, uint32_t *ostack_index); /** * Create a hubbub treebuilder @@ -243,6 +132,8 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser, tb->context.collect.string.type = HUBBUB_STRING_OFF; + tb->context.strip_leading_lr = false; + tb->buffer_handler = NULL; tb->buffer_pw = NULL; @@ -430,7 +321,7 @@ void hubbub_treebuilder_token_handler(const hubbub_token *token, treebuilder->tree_handler == NULL) return; - while (reprocess == true) { + while (reprocess) { switch (treebuilder->context.mode) { case INITIAL: reprocess = handle_initial(treebuilder, token); @@ -451,6 +342,8 @@ void hubbub_treebuilder_token_handler(const hubbub_token *token, reprocess = handle_after_head(treebuilder, token); break; case IN_BODY: + reprocess = handle_in_body(treebuilder, token); + break; case IN_TABLE: case IN_CAPTION: case IN_COLUMN_GROUP: @@ -491,7 +384,7 @@ bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token) switch (token->type) { case HUBBUB_TOKEN_CHARACTER: if (process_characters_expect_whitespace(treebuilder, token, - false) == true) { + false)) { /** \todo parse error */ treebuilder->tree_handler->set_quirks_mode( @@ -552,7 +445,7 @@ bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token) break; } - if (reprocess == true) { + if (reprocess) { treebuilder->context.mode = BEFORE_HTML; } @@ -570,6 +463,7 @@ bool handle_before_html(hubbub_treebuilder *treebuilder, const hubbub_token *token) { bool reprocess = false; + bool handled = false; switch (token->type) { case HUBBUB_TOKEN_DOCTYPE: @@ -589,48 +483,7 @@ bool handle_before_html(hubbub_treebuilder *treebuilder, &token->data.tag.name); if (type == HTML) { - int success; - void *html, *appended; - - /* We can't use insert_element() here, as it assumes - * that we're inserting into current_node. There is - * no current_node to insert into at this point so - * we get to do it manually. */ - - success = treebuilder->tree_handler->create_element( - treebuilder->tree_handler->ctx, - &token->data.tag, &html); - if (success != 0) { - /** \todo errors */ - } - - success = treebuilder->tree_handler->append_child( - treebuilder->tree_handler->ctx, - treebuilder->context.document, - html, &appended); - if (success != 0) { - /** \todo errors */ - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - html); - } - - /* We can't use element_stack_push() here, as it - * assumes that current_node is pointing at the index - * before the one to insert at. For the first entry in - * the stack, this does not hold so we must insert - * manually. */ - treebuilder->context.element_stack[0].type = HTML; - treebuilder->context.element_stack[0].node = html; - treebuilder->context.current_node = 0; - - /** \todo cache selection algorithm */ - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - appended); - - treebuilder->context.mode = BEFORE_HEAD; + handled = true; } else { reprocess = true; } @@ -642,15 +495,37 @@ bool handle_before_html(hubbub_treebuilder *treebuilder, break; } - if (reprocess == true) { - /* Need to manufacture html element */ + + if (handled || reprocess) { int success; void *html, *appended; - /** \todo UTF-16 */ - success = treebuilder->tree_handler->create_element_verbatim( - treebuilder->tree_handler->ctx, - (const uint8_t *) "html", SLEN("html"), &html); + /* We can't use insert_element() here, as it assumes + * that we're inserting into current_node. There is + * no current_node to insert into at this point so + * we get to do it manually. */ + + if (reprocess) { + /* Need to manufacture html element */ + hubbub_tag tag; + + /** \todo UTF-16 */ + tag.name.type = HUBBUB_STRING_PTR; + tag.name.data.ptr = (const uint8_t *) "html"; + tag.name.len = SLEN("html"); + + tag.n_attributes = 0; + tag.attributes = NULL; + + success = treebuilder->tree_handler->create_element( + treebuilder->tree_handler->ctx, + &tag, &html); + } else { + success = treebuilder->tree_handler->create_element( + treebuilder->tree_handler->ctx, + &token->data.tag, &html); + } + if (success != 0) { /** \todo errors */ } @@ -698,6 +573,7 @@ bool handle_before_head(hubbub_treebuilder *treebuilder, const hubbub_token *token) { bool reprocess = false; + bool handled = false; switch (token->type) { case HUBBUB_TOKEN_CHARACTER: @@ -718,20 +594,10 @@ bool handle_before_head(hubbub_treebuilder *treebuilder, &token->data.tag.name); if (type == HTML) { - /** \todo Process as if "in body" */ + /* Process as if "in body" */ + process_tag_in_body(treebuilder, token); } else if (type == HEAD) { - insert_element(treebuilder, &token->data.tag); - - treebuilder->tree_handler->ref_node( - treebuilder->tree_handler->ctx, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node); - - treebuilder->context.head_element = - treebuilder->context.element_stack[ - treebuilder->context.current_node].node; - - treebuilder->context.mode = IN_HEAD; + handled = true; } else { reprocess = true; } @@ -755,9 +621,31 @@ bool handle_before_head(hubbub_treebuilder *treebuilder, break; } - if (reprocess == true) { - insert_element_verbatim(treebuilder, - (const uint8_t *) "head", SLEN("head")); + if (handled || reprocess) { + hubbub_tag tag; + + if (reprocess) { + /* Manufacture head tag */ + tag.name.type = HUBBUB_STRING_PTR; + tag.name.data.ptr = (const uint8_t *) "head"; + tag.name.len = SLEN("head"); + + tag.n_attributes = 0; + tag.attributes = NULL; + } else { + tag = token->data.tag; + } + + insert_element(treebuilder, &tag); + + treebuilder->tree_handler->ref_node( + treebuilder->tree_handler->ctx, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + + treebuilder->context.head_element = + treebuilder->context.element_stack[ + treebuilder->context.current_node].node; treebuilder->context.mode = IN_HEAD; } @@ -776,6 +664,7 @@ bool handle_in_head(hubbub_treebuilder *treebuilder, const hubbub_token *token) { bool reprocess = false; + bool handled = false; switch (token->type) { case HUBBUB_TOKEN_CHARACTER: @@ -796,7 +685,8 @@ bool handle_in_head(hubbub_treebuilder *treebuilder, &token->data.tag.name); if (type == HTML) { - /** \todo Process as if "in body" */ + /* Process as if "in body" */ + process_tag_in_body(treebuilder, token); } else if (type == BASE || type == LINK || type == META) { process_base_link_meta_in_head(treebuilder, token, type); @@ -816,6 +706,8 @@ bool handle_in_head(hubbub_treebuilder *treebuilder, process_script_in_head(treebuilder, token); } else if (type == HEAD) { /** \todo parse error */ + } else { + reprocess = true; } } break; @@ -825,19 +717,7 @@ bool handle_in_head(hubbub_treebuilder *treebuilder, &token->data.tag.name); if (type == HEAD) { - element_type otype; - void *node; - - if (element_stack_pop(treebuilder, - &otype, &node) == false) { - /** \todo errors */ - } - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - node); - - treebuilder->context.mode = AFTER_HEAD; + handled = true; } else if (type == BODY || type == HTML || type == P || type == BR) { reprocess = true; @@ -849,12 +729,11 @@ bool handle_in_head(hubbub_treebuilder *treebuilder, break; } - if (reprocess == true) { + if (handled || reprocess) { element_type otype; void *node; - if (element_stack_pop(treebuilder, - &otype, &node) == false) { + if (!element_stack_pop(treebuilder, &otype, &node)) { /** \todo errors */ } @@ -879,6 +758,7 @@ bool handle_in_head_noscript(hubbub_treebuilder *treebuilder, const hubbub_token *token) { bool reprocess = false; + bool handled = false; switch (token->type) { case HUBBUB_TOKEN_CHARACTER: @@ -899,7 +779,8 @@ bool handle_in_head_noscript(hubbub_treebuilder *treebuilder, &token->data.tag.name); if (type == HTML) { - /** \todo Process as "in body" */ + /* Process as "in body" */ + process_tag_in_body(treebuilder, token); } else if (type == LINK || type == META) { process_base_link_meta_in_head(treebuilder, token, type); @@ -919,19 +800,7 @@ bool handle_in_head_noscript(hubbub_treebuilder *treebuilder, &token->data.tag.name); if (type == NOSCRIPT) { - element_type otype; - void *node; - - if (element_stack_pop(treebuilder, - &otype, &node) == false) { - /** \todo errors */ - } - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - node); - - treebuilder->context.mode = IN_HEAD; + handled = true; } else if (type == P || type == BR) { /** \todo parse error */ reprocess = true; @@ -946,11 +815,11 @@ bool handle_in_head_noscript(hubbub_treebuilder *treebuilder, break; } - if (reprocess == true) { + if (handled || reprocess) { element_type otype; void *node; - if (element_stack_pop(treebuilder, &otype, &node) == false) { + if (!element_stack_pop(treebuilder, &otype, &node)) { /** \todo errors */ } @@ -975,6 +844,7 @@ bool handle_after_head(hubbub_treebuilder *treebuilder, const hubbub_token *token) { bool reprocess = false; + bool handled = false; switch (token->type) { case HUBBUB_TOKEN_CHARACTER: @@ -995,10 +865,10 @@ bool handle_after_head(hubbub_treebuilder *treebuilder, &token->data.tag.name); if (type == HTML) { - /** \todo Process as if "in body" */ + /* Process as if "in body" */ + process_tag_in_body(treebuilder, token); } else if (type == BODY) { - insert_element(treebuilder, &token->data.tag); - treebuilder->context.mode = IN_BODY; + handled = true; } else if (type == FRAMESET) { insert_element(treebuilder, &token->data.tag); treebuilder->context.mode = IN_FRAMESET; @@ -1010,10 +880,9 @@ bool handle_after_head(hubbub_treebuilder *treebuilder, /** \todo parse error */ - if (element_stack_push(treebuilder, + if (!element_stack_push(treebuilder, HEAD, - treebuilder->context.head_element) == - false) { + treebuilder->context.head_element)) { /** \todo errors */ } @@ -1028,8 +897,7 @@ bool handle_after_head(hubbub_treebuilder *treebuilder, parse_generic_rcdata(treebuilder, token, true); } - if (element_stack_pop(treebuilder, &otype, &node) == - false) { + if (!element_stack_pop(treebuilder, &otype, &node)) { /** \todo errors */ } @@ -1046,9 +914,22 @@ bool handle_after_head(hubbub_treebuilder *treebuilder, break; } - if (reprocess == true) { - insert_element_verbatim(treebuilder, - (const uint8_t *) "body", SLEN("body")); + if (handled || reprocess) { + hubbub_tag tag; + + if (reprocess) { + /* Manufacture body */ + tag.name.type = HUBBUB_STRING_PTR; + tag.name.data.ptr = (const uint8_t *) "body"; + tag.name.len = SLEN("body"); + + tag.n_attributes = 0; + tag.attributes = NULL; + } else { + tag = token->data.tag; + } + + insert_element(treebuilder, &tag); treebuilder->context.mode = IN_BODY; } @@ -1069,6 +950,12 @@ bool handle_generic_rcdata(hubbub_treebuilder *treebuilder, bool reprocess = false; bool done = false; + if (treebuilder->context.strip_leading_lr && + token->type != HUBBUB_TOKEN_CHARACTER) { + /* Reset the LR stripping flag */ + treebuilder->context.strip_leading_lr = false; + } + switch (token->type) { case HUBBUB_TOKEN_CHARACTER: if (treebuilder->context.collect.string.len == 0) { @@ -1077,6 +964,19 @@ bool handle_generic_rcdata(hubbub_treebuilder *treebuilder, } treebuilder->context.collect.string.len += token->data.character.len; + + if (treebuilder->context.strip_leading_lr) { + const uint8_t *str = treebuilder->input_buffer + + treebuilder->context.collect.string.data.off; + + /** \todo UTF-16 */ + if (*str == '\n') { + treebuilder->context.collect.string.data.off++; + treebuilder->context.collect.string.len--; + } + + treebuilder->context.strip_leading_lr = false; + } break; case HUBBUB_TOKEN_END_TAG: { @@ -1084,7 +984,7 @@ bool handle_generic_rcdata(hubbub_treebuilder *treebuilder, &token->data.tag.name); if (type != treebuilder->context.collect.type) { - assert(0); + /** \todo parse error */ } done = true; @@ -1102,7 +1002,7 @@ bool handle_generic_rcdata(hubbub_treebuilder *treebuilder, break; } - if (done == true) { + if (done) { int success; void *text, *appended; @@ -1189,7 +1089,7 @@ bool handle_script_collect_characters(hubbub_treebuilder *treebuilder, break; } - if (done == true) { + if (done) { int success; void *text, *appended; @@ -1281,42 +1181,14 @@ bool process_characters_expect_whitespace(hubbub_treebuilder *treebuilder, } /* Non-whitespace characters in token, so reprocess */ if (c != len) { - if (c > 0 && insert_into_current_node == true) { + if (c > 0 && insert_into_current_node) { hubbub_string temp; - int success; - void *text, *appended; + temp.type = HUBBUB_STRING_OFF; temp.data.off = token->data.character.data.off; temp.len = len - c; - /** \todo Append to pre-existing text child, iff - * one exists and it's the last in the child list */ - - success = treebuilder->tree_handler->create_text( - treebuilder->tree_handler->ctx, - &temp, &text); - if (success != 0) { - /** \todo errors */ - } - - success = treebuilder->tree_handler->append_child( - treebuilder->tree_handler->ctx, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node, - text, &appended); - if (success != 0) { - /** \todo errors */ - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - text); - } - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - appended); - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - text); + append_text(treebuilder, &temp); } /* Update token data to strip leading whitespace */ @@ -1391,6 +1263,13 @@ void parse_generic_rcdata(hubbub_treebuilder *treebuilder, /** \todo errors */ } + /* It's a bit nasty having this code deal with textarea->form + * association, but it avoids having to duplicate the entire rest + * of this function for textarea processing */ + if (type == TEXTAREA && treebuilder->context.form_element != NULL) { + /** \todo associate textarea with form */ + } + success = treebuilder->tree_handler->append_child( treebuilder->tree_handler->ctx, treebuilder->context.element_stack[ @@ -1486,9 +1365,9 @@ void process_script_in_head(hubbub_treebuilder *treebuilder, * \param treebuilder Treebuilder to look in * \param type Element type to find * \param in_table Whether we're looking in table scope - * \return True iff element is in scope, false otherwise + * \return Element stack index, or 0 if not in scope */ -bool element_in_scope(hubbub_treebuilder *treebuilder, +uint32_t element_in_scope(hubbub_treebuilder *treebuilder, element_type type, bool in_table) { uint32_t node; @@ -1496,12 +1375,12 @@ bool element_in_scope(hubbub_treebuilder *treebuilder, if (treebuilder->context.element_stack == NULL) return false; - for (node = treebuilder->context.current_node; node > 0; node --) { + for (node = treebuilder->context.current_node; node > 0; node--) { element_type node_type = treebuilder->context.element_stack[node].type; if (node_type == type) - return true; + return node; if (node_type == TABLE) break; @@ -1515,7 +1394,7 @@ bool element_in_scope(hubbub_treebuilder *treebuilder, break; } - return false; + return 0; } /** @@ -1546,7 +1425,7 @@ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder) } } - while (1) { + while (entry != NULL) { int success; void *clone, *appended; element_type prev_type; @@ -1577,9 +1456,8 @@ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder) return; } - if (element_stack_push(treebuilder, - entry->details.type, - appended) == false) { + if (!element_stack_push(treebuilder, + entry->details.type, appended)) { /** \todo handle memory exhaustion */ treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, @@ -1589,11 +1467,11 @@ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder) clone); } - if (formatting_list_replace(treebuilder, entry, + if (!formatting_list_replace(treebuilder, entry, entry->details.type, clone, treebuilder->context.current_node, &prev_type, &prev_node, - &prev_stack_index) == false) { + &prev_stack_index)) { /** \todo handle errors */ treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, @@ -1604,8 +1482,7 @@ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder) treebuilder->tree_handler->ctx, prev_node); - if (entry->next != NULL) - entry = entry->next; + entry = entry->next; } } @@ -1627,8 +1504,8 @@ void clear_active_formatting_list_to_marker(hubbub_treebuilder *treebuilder) if (is_scoping_element(entry->details.type)) done = true; - if (formatting_list_remove(treebuilder, entry, - &type, &node, &stack_index) == false) { + if (!formatting_list_remove(treebuilder, entry, + &type, &node, &stack_index)) { /** \todo handle errors */ } @@ -1636,7 +1513,7 @@ void clear_active_formatting_list_to_marker(hubbub_treebuilder *treebuilder) treebuilder->tree_handler->ctx, node); - if (done == true) + if (done) break; } } @@ -1670,47 +1547,9 @@ void insert_element(hubbub_treebuilder *treebuilder, const hubbub_tag *tag) treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx, appended); - if (element_stack_push(treebuilder, + if (!element_stack_push(treebuilder, element_type_from_name(treebuilder, &tag->name), - node) == false) { - /** \todo errors */ - } -} - -/** - * Create element and insert it into the DOM, pushing it on the stack - * - * \param treebuilder The treebuilder instance - * \param name Name of element to insert - * \param len Length, in bytes, of ::name - */ -void insert_element_verbatim(hubbub_treebuilder *treebuilder, - const uint8_t *name, size_t len) -{ - int success; - void *node, *appended; - - success = treebuilder->tree_handler->create_element_verbatim( - treebuilder->tree_handler->ctx, name, len, &node); - if (success != 0) { - /** \todo errors */ - } - - success = treebuilder->tree_handler->append_child( - treebuilder->tree_handler->ctx, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node, - node, &appended); - if (success != 0) { - /** \todo errors */ - } - - treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx, - appended); - - if (element_stack_push(treebuilder, - element_type_from_verbatim_name(name, len), - node) == false) { + node)) { /** \todo errors */ } } @@ -1752,7 +1591,8 @@ void insert_element_no_push(hubbub_treebuilder *treebuilder, * Close implied end tags * * \param treebuilder The treebuilder instance - * \param except Tag type to exclude from processing [DD,DT,LI,P] + * \param except Tag type to exclude from processing [DD,DT,LI,P], + * or UNKNOWN to exclude nothing */ void close_implied_end_tags(hubbub_treebuilder *treebuilder, element_type except) @@ -1766,10 +1606,10 @@ void close_implied_end_tags(hubbub_treebuilder *treebuilder, element_type otype; void *node; - if (type == except) + if (except != UNKNOWN && type == except) break; - if (element_stack_pop(treebuilder, &otype, &node) == false) { + if (!element_stack_pop(treebuilder, &otype, &node)) { /** \todo errors */ } @@ -1839,77 +1679,67 @@ void reset_insertion_mode(hubbub_treebuilder *treebuilder) } /** - * Convert an element name into an element type + * Append text to the current node, inserting into the last child of the + * current node, iff it's a Text node. * * \param treebuilder The treebuilder instance - * \param tag_name The tag name to consider - * \return The corresponding element type + * \param string The string to append */ -element_type element_type_from_name(hubbub_treebuilder *treebuilder, - const hubbub_string *tag_name) +void append_text(hubbub_treebuilder *treebuilder, + const hubbub_string *string) { - const uint8_t *name = treebuilder->input_buffer + tag_name->data.off; + int success; + void *text, *appended; + + /** \todo Append to pre-existing text child, iff + * one exists and it's the last in the child list */ + + success = treebuilder->tree_handler->create_text( + treebuilder->tree_handler->ctx, string, &text); + if (success != 0) { + /** \todo errors */ + } + + success = treebuilder->tree_handler->append_child( + treebuilder->tree_handler->ctx, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node, + text, &appended); + if (success != 0) { + /** \todo errors */ + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + text); + } - return element_type_from_verbatim_name(name, tag_name->len); + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, appended); + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, text); } /** - * Convert a verbatim element name into an element type + * Convert an element name into an element type * - * \param name The tag name - * \param len Length, in bytes, of ::name + * \param treebuilder The treebuilder instance + * \param tag_name The tag name to consider * \return The corresponding element type */ -element_type element_type_from_verbatim_name(const uint8_t *name, size_t len) +element_type element_type_from_name(hubbub_treebuilder *treebuilder, + const hubbub_string *tag_name) { - static const struct { - const char *name; - element_type type; - } name_type_map[] = { - { "ADDRESS", ADDRESS }, { "AREA", AREA }, - { "BASE", BASE }, { "BASEFONT", BASEFONT }, - { "BGSOUND", BGSOUND }, { "BLOCKQUOTE", BLOCKQUOTE }, - { "BODY", BODY }, { "BR", BR }, - { "CENTER", CENTER }, { "COL", COL }, - { "COLGROUP", COLGROUP }, { "DD", DD }, - { "DIR", DIR }, { "DIV", DIV }, - { "DL", DL }, { "DT", DT }, - { "EMBED", EMBED }, { "FIELDSET", FIELDSET }, - { "FORM", FORM }, { "FRAME", FRAME }, - { "FRAMESET", FRAMESET }, { "H1", H1 }, - { "H2", H2 }, { "H3", H3 }, - { "H4", H4 }, { "H5", H5 }, - { "H6", H6 }, { "HEAD", HEAD }, - { "HR", HR }, { "IFRAME", IFRAME }, - { "IMAGE", IMAGE }, { "IMG", IMG }, - { "INPUT", INPUT }, { "ISINDEX", ISINDEX }, - { "LI", LI }, { "LINK", LINK }, - { "LISTING", LISTING }, { "MENU", MENU }, - { "META", META }, { "NOEMBED", NOEMBED }, - { "NOFRAMES", NOFRAMES }, { "NOSCRIPT", NOSCRIPT }, - { "OL", OL }, { "OPTGROUP", OPTGROUP }, - { "OPTION", OPTION }, { "P", P }, - { "PARAM", PARAM }, { "PLAINTEXT", PLAINTEXT }, - { "PRE", PRE }, { "SCRIPT", SCRIPT }, - { "SELECT", SELECT }, { "SPACER", SPACER }, - { "STYLE", STYLE }, { "TBODY", TBODY }, - { "TEXTAREA", TEXTAREA }, { "TFOOT", TFOOT }, - { "THEAD", THEAD }, { "TITLE", TITLE }, - { "TR", TR }, { "UL", UL }, - { "WBR", WBR }, - { "APPLET", APPLET }, { "BUTTON", BUTTON }, - { "CAPTION", CAPTION }, { "HTML", HTML }, - { "MARQUEE", MARQUEE }, { "OBJECT", OBJECT }, - { "TABLE", TABLE }, { "TD", TD }, - { "TH", TH }, - { "A", A }, { "B", B }, - { "BIG", BIG }, { "EM", EM }, - { "FONT", FONT }, { "I", I }, - { "NOBR", NOBR }, { "S", S }, - { "SMALL", SMALL }, { "STRIKE", STRIKE }, - { "STRONG", STRONG }, { "TT", TT }, - { "U", U }, - }; + const uint8_t *name = NULL; + size_t len = tag_name->len; + + switch (tag_name->type) { + case HUBBUB_STRING_OFF: + name = treebuilder->input_buffer + tag_name->data.off; + break; + case HUBBUB_STRING_PTR: + name = tag_name->data.ptr; + break; + } + /** \todo UTF-16 support */ /** \todo optimise this */ @@ -1925,8 +1755,7 @@ element_type element_type_from_verbatim_name(const uint8_t *name, size_t len) return name_type_map[i].type; } - /** \todo produce type values for unknown tags */ - return U + 1; + return UNKNOWN; } /** @@ -1935,7 +1764,7 @@ element_type element_type_from_verbatim_name(const uint8_t *name, size_t len) * \param type Node type to consider * \return True iff node is a special element */ -inline bool is_special_element(element_type type) +bool is_special_element(element_type type) { return (type <= WBR); } @@ -1946,7 +1775,7 @@ inline bool is_special_element(element_type type) * \param type Node type to consider * \return True iff node is a scoping element */ -inline bool is_scoping_element(element_type type) +bool is_scoping_element(element_type type) { return (type >= APPLET && type <= TH); } @@ -1957,7 +1786,7 @@ inline bool is_scoping_element(element_type type) * \param type Node type to consider * \return True iff node is a formatting element */ -inline bool is_formatting_element(element_type type) +bool is_formatting_element(element_type type) { return (type >= A && type <= U); } @@ -1968,7 +1797,7 @@ inline bool is_formatting_element(element_type type) * \param type Node type to consider * \return True iff node is a phrasing element */ -inline bool is_phrasing_element(element_type type) +bool is_phrasing_element(element_type type) { return (type > U); } @@ -2066,7 +1895,7 @@ bool element_stack_pop(hubbub_treebuilder *treebuilder, } /** - * Insert an element into the list of active formatting elements + * Append an element to the end of the list of active formatting elements * * \param treebuilder Treebuilder instance containing list * \param type Type of node being inserted @@ -2074,7 +1903,7 @@ bool element_stack_pop(hubbub_treebuilder *treebuilder, * \param stack_index Index into stack of open elements * \return True on success, false on memory exhaustion */ -bool formatting_list_insert(hubbub_treebuilder *treebuilder, +bool formatting_list_append(hubbub_treebuilder *treebuilder, element_type type, void *node, uint32_t stack_index) { formatting_list_entry *entry; @@ -2101,6 +1930,57 @@ bool formatting_list_insert(hubbub_treebuilder *treebuilder, return true; } +/** + * Insert an element into the list of active formatting elements + * + * \param treebuilder Treebuilder instance containing list + * \param prev Previous entry + * \param next Next entry + * \param type Type of node being inserted + * \param node Node being inserted + * \param stack_index Index into stack of open elements + * \return True on success, false on memory exhaustion + */ +bool formatting_list_insert(hubbub_treebuilder *treebuilder, + formatting_list_entry *prev, formatting_list_entry *next, + element_type type, void *node, uint32_t stack_index) +{ + formatting_list_entry *entry; + + if (prev != NULL) { + assert(prev->next == next); + } + + if (next != NULL) { + assert(next->prev == prev); + } + + entry = treebuilder->alloc(NULL, sizeof(formatting_list_entry), + treebuilder->alloc_pw); + if (entry == NULL) + return false; + + entry->details.type = type; + entry->details.node = node; + entry->stack_index = stack_index; + + entry->prev = prev; + entry->next = next; + + if (entry->prev != NULL) + entry->prev->next = entry; + else + treebuilder->context.formatting_list = entry; + + if (entry->next != NULL) + entry->next->prev = entry; + else + treebuilder->context.formatting_list_end = entry; + + return true; +} + + /** * Remove an element from the list of active formatting elements * @@ -2165,3 +2045,62 @@ bool formatting_list_replace(hubbub_treebuilder *treebuilder, return true; } +#ifndef NDEBUG +static const char *element_type_to_name(element_type type); + +/** + * Dump an element stack to the given file pointer + * + * \param treebuilder The treebuilder instance + * \param fp The file to dump to + */ +void element_stack_dump(hubbub_treebuilder *treebuilder, FILE *fp) +{ + element_context *stack = treebuilder->context.element_stack; + uint32_t i; + + for (i = 0; i <= treebuilder->context.current_node; i++) { + fprintf(fp, "%u: %s %p\n", + i, + element_type_to_name(stack[i].type), + stack[i].node); + } +} + +/** + * Dump a formatting list to the given file pointer + * + * \param treebuilder The treebuilder instance + * \param fp The file to dump to + */ +void formatting_list_dump(hubbub_treebuilder *treebuilder, FILE *fp) +{ + formatting_list_entry *entry; + + for (entry = treebuilder->context.formatting_list; entry != NULL; + entry = entry->next) { + fprintf(fp, "%s %p %u\n", + element_type_to_name(entry->details.type), + entry->details.node, entry->stack_index); + } +} + +/** + * Convert an element type to a name + * + * \param type The element type + * \return Pointer to name + */ +const char *element_type_to_name(element_type type) +{ + for (uint32_t i = 0; + i < sizeof(name_type_map) / sizeof(name_type_map[0]); + i++) { + if (name_type_map[i].type == type) + return name_type_map[i].name; + } + + return "UNKNOWN"; +} +#endif + -- cgit v1.2.3