From 67169ed4d74fcbbeb0177de132d4b1ec7964cba7 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Tue, 10 Mar 2009 22:17:58 +0000 Subject: Sync treebuilder with spec svn path=/trunk/hubbub/; revision=6764 --- src/treebuilder/Makefile | 2 +- src/treebuilder/after_head.c | 22 +- src/treebuilder/before_head.c | 5 +- src/treebuilder/before_html.c | 3 - src/treebuilder/generic_rcdata.c | 46 ++--- src/treebuilder/in_body.c | 377 +++++++++++++++++++++++------------ src/treebuilder/in_column_group.c | 2 +- src/treebuilder/in_foreign_content.c | 92 +++++++-- src/treebuilder/in_frameset.c | 4 +- src/treebuilder/in_head.c | 63 +----- src/treebuilder/in_row.c | 2 +- src/treebuilder/in_select.c | 6 +- src/treebuilder/in_table.c | 16 +- src/treebuilder/in_table_body.c | 4 +- src/treebuilder/initial.c | 27 +-- src/treebuilder/internal.h | 26 +-- src/treebuilder/modes.h | 1 - src/treebuilder/script_collect.c | 145 -------------- src/treebuilder/treebuilder.c | 248 +++++++++++------------ 19 files changed, 528 insertions(+), 563 deletions(-) delete mode 100644 src/treebuilder/script_collect.c (limited to 'src') diff --git a/src/treebuilder/Makefile b/src/treebuilder/Makefile index 8c50b8e..28be92a 100644 --- a/src/treebuilder/Makefile +++ b/src/treebuilder/Makefile @@ -39,7 +39,7 @@ SRCS_$(d) := treebuilder.c \ in_cell.c in_select.c in_select_in_table.c \ in_foreign_content.c after_body.c in_frameset.c \ after_frameset.c after_after_body.c after_after_frameset.c \ - generic_rcdata.c script_collect.c + generic_rcdata.c # Append to sources for component SOURCES += $(addprefix $(d), $(SRCS_$(d))) diff --git a/src/treebuilder/after_head.c b/src/treebuilder/after_head.c index 5ec6ca4..9af0796 100644 --- a/src/treebuilder/after_head.c +++ b/src/treebuilder/after_head.c @@ -51,7 +51,7 @@ hubbub_error handle_after_head(hubbub_treebuilder *treebuilder, } else if (type == BODY) { handled = true; } else if (type == FRAMESET) { - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); treebuilder->context.mode = IN_FRAMESET; } else if (type == BASE || type == LINK || type == META || type == NOFRAMES || type == SCRIPT || @@ -59,6 +59,7 @@ hubbub_error handle_after_head(hubbub_treebuilder *treebuilder, hubbub_ns ns; element_type otype; void *node; + uint32_t index; /** \todo parse error */ @@ -69,11 +70,13 @@ hubbub_error handle_after_head(hubbub_treebuilder *treebuilder, /** \todo errors */ } + index = treebuilder->context.current_node; + /* Process as "in head" */ err = handle_in_head(treebuilder, token); - if (!element_stack_pop(treebuilder, &ns, &otype, - &node)) { + if (!element_stack_remove(treebuilder, index, + &ns, &otype, &node)) { /** \todo errors */ } @@ -87,7 +90,16 @@ hubbub_error handle_after_head(hubbub_treebuilder *treebuilder, } break; case HUBBUB_TOKEN_END_TAG: - /** \todo parse error */ + { + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + if (type == HTML || type == BODY || type == BR) { + err = HUBBUB_REPROCESS; + } else { + /** \todo parse error */ + } + } break; case HUBBUB_TOKEN_EOF: err = HUBBUB_REPROCESS; @@ -109,7 +121,7 @@ hubbub_error handle_after_head(hubbub_treebuilder *treebuilder, tag = token->data.tag; } - insert_element(treebuilder, &tag); + insert_element(treebuilder, &tag, true); treebuilder->context.mode = IN_BODY; } diff --git a/src/treebuilder/before_head.c b/src/treebuilder/before_head.c index a2f4386..19bf800 100644 --- a/src/treebuilder/before_head.c +++ b/src/treebuilder/before_head.c @@ -60,7 +60,8 @@ hubbub_error handle_before_head(hubbub_treebuilder *treebuilder, element_type type = element_type_from_name(treebuilder, &token->data.tag.name); - if (type == HEAD || type == BR) { + if (type == HTML || type == BODY || + type == HEAD || type == BR) { err = HUBBUB_REPROCESS; } else { /** \todo parse error */ @@ -87,7 +88,7 @@ hubbub_error handle_before_head(hubbub_treebuilder *treebuilder, tag = token->data.tag; } - insert_element(treebuilder, &tag); + insert_element(treebuilder, &tag, true); treebuilder->tree_handler->ref_node( treebuilder->tree_handler->ctx, diff --git a/src/treebuilder/before_html.c b/src/treebuilder/before_html.c index 003dd37..d5a0d22 100644 --- a/src/treebuilder/before_html.c +++ b/src/treebuilder/before_html.c @@ -97,9 +97,6 @@ hubbub_error handle_before_html(hubbub_treebuilder *treebuilder, html, &appended); if (success != 0) { /** \todo errors */ - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - html); } treebuilder->tree_handler->unref_node( diff --git a/src/treebuilder/generic_rcdata.c b/src/treebuilder/generic_rcdata.c index 1daf80f..627068a 100644 --- a/src/treebuilder/generic_rcdata.c +++ b/src/treebuilder/generic_rcdata.c @@ -37,8 +37,6 @@ hubbub_error handle_generic_rcdata(hubbub_treebuilder *treebuilder, case HUBBUB_TOKEN_CHARACTER: { hubbub_string chars = token->data.character; - int success; - void *text, *appended; if (treebuilder->context.strip_leading_lr) { if (chars.ptr[0] == '\n') { @@ -52,30 +50,7 @@ hubbub_error handle_generic_rcdata(hubbub_treebuilder *treebuilder, if (chars.len == 0) break; - - success = treebuilder->tree_handler->create_text( - treebuilder->tree_handler->ctx, - &chars, - &text); - if (success != 0) { - /** \todo errors */ - } - - success = treebuilder->tree_handler->append_child( - treebuilder->tree_handler->ctx, - treebuilder->context.collect.node, - text, &appended); - if (success != 0) { - /** \todo errors */ - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - text); - } - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, appended); - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, text); + append_text(treebuilder, &chars); } break; case HUBBUB_TOKEN_END_TAG: @@ -87,10 +62,16 @@ hubbub_error handle_generic_rcdata(hubbub_treebuilder *treebuilder, /** \todo parse error */ } + if (type == SCRIPT) { + /** \todo script processing and execution */ + } + done = true; } break; case HUBBUB_TOKEN_EOF: + /** \todo if the current node's a script, + * mark it as already executed */ /** \todo parse error */ done = true; err = HUBBUB_REPROCESS; @@ -104,11 +85,18 @@ hubbub_error handle_generic_rcdata(hubbub_treebuilder *treebuilder, } if (done) { - /* Clean up context */ + hubbub_ns ns; + element_type otype; + void *node; + + /* Pop the current node from the stack */ + if (!element_stack_pop(treebuilder, &ns, &otype, &node)) { + /** \todo errors */ + } + treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, - treebuilder->context.collect.node); - treebuilder->context.collect.node = NULL; + node); /* Return to previous insertion mode */ treebuilder->context.mode = treebuilder->context.collect.mode; diff --git a/src/treebuilder/in_body.c b/src/treebuilder/in_body.c index 698a485..ce3032e 100644 --- a/src/treebuilder/in_body.c +++ b/src/treebuilder/in_body.c @@ -34,8 +34,12 @@ static void process_html_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token); static void process_body_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token); +static void process_frameset_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); static void process_container_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token); +static void process_hN_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); static void process_form_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token); static void process_dd_dt_li_in_body(hubbub_treebuilder *treebuilder, @@ -57,20 +61,21 @@ static void process_hr_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token); static void process_image_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token); -static void process_input_in_body(hubbub_treebuilder *treebuilder, - const hubbub_token *token); static void process_isindex_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token); static void process_textarea_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token); static void process_select_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token); +static void process_opt_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); static void process_phrasing_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token); static bool process_0body_in_body(hubbub_treebuilder *treebuilder); static void process_0container_in_body(hubbub_treebuilder *treebuilder, element_type type); +static void process_0form_in_body(hubbub_treebuilder *treebuilder); static void process_0p_in_body(hubbub_treebuilder *treebuilder); static void process_0dd_dt_li_in_body(hubbub_treebuilder *treebuilder, element_type type); @@ -185,6 +190,7 @@ void process_character(hubbub_treebuilder *treebuilder, const hubbub_token *token) { hubbub_string dummy = token->data.character; + const uint8_t *p; reconstruct_active_formatting_list(treebuilder); @@ -201,6 +207,16 @@ void process_character(hubbub_treebuilder *treebuilder, if (dummy.len) append_text(treebuilder, &dummy); + + if (treebuilder->context.frameset_ok) { + for (p = dummy.ptr; p < dummy.ptr + dummy.len; p++) { + if (*p != 0x0009 && *p != 0x000a && + *p != 0x000c && *p != 0x0020) { + treebuilder->context.frameset_ok = false; + break; + } + } + } } /** @@ -219,30 +235,34 @@ bool process_start_tag(hubbub_treebuilder *treebuilder, if (type == HTML) { process_html_in_body(treebuilder, token); - } else if (type == BASE || type == COMMAND || - type == EVENTSOURCE || type == LINK || + } else if (type == BASE || type == COMMAND || type == LINK || type == META || type == NOFRAMES || type == SCRIPT || type == STYLE || type == TITLE) { /* Process as "in head" */ err = handle_in_head(treebuilder, token); } else if (type == BODY) { process_body_in_body(treebuilder, token); + } else if (type == FRAMESET) { + process_frameset_in_body(treebuilder, token); + treebuilder->context.mode = IN_FRAMESET; } else if (type == ADDRESS || type == ARTICLE || type == ASIDE || type == BLOCKQUOTE || type == CENTER || type == DATAGRID || type == DETAILS || type == DIALOG || type == DIR || type == DIV || type == DL || type == FIELDSET || type == FIGURE || type == FOOTER || - type == H1 || type == H2 || type == H3 || - type == H4 || type == H5 || type == H6 || type == HEADER || type == MENU || type == NAV || type == OL || type == P || type == SECTION || type == UL) { process_container_in_body(treebuilder, token); + } else if (type == H1 || type == H2 || type == H3 || + type == H4 || type == H5 || type == H6) { + process_hN_in_body(treebuilder, token); } else if (type == PRE || type == LISTING) { process_container_in_body(treebuilder, token); treebuilder->context.strip_leading_lr = true; + treebuilder->context.frameset_ok = false; } else if (type == FORM) { process_form_in_body(treebuilder, token); } else if (type == DD || type == DT || type == LI) { @@ -251,7 +271,7 @@ bool process_start_tag(hubbub_treebuilder *treebuilder, process_plaintext_in_body(treebuilder, token); } else if (type == A) { process_a_in_body(treebuilder, token); - } else if (type == B || type == BIG || type == EM || + } else if (type == B || type == BIG || type == CODE || type == EM || type == FONT || type == I || type == S || type == SMALL || type == STRIKE || type == STRONG || type == TT || type == U) { @@ -267,25 +287,27 @@ bool process_start_tag(hubbub_treebuilder *treebuilder, token, type); } else if (type == XMP) { reconstruct_active_formatting_list(treebuilder); + treebuilder->context.frameset_ok = false; parse_generic_rcdata(treebuilder, token, false); } else if (type == TABLE) { process_container_in_body(treebuilder, token); + treebuilder->context.frameset_ok = false; + treebuilder->context.element_stack[current_table(treebuilder)] .tainted = false; treebuilder->context.mode = IN_TABLE; } else if (type == AREA || type == BASEFONT || type == BGSOUND || type == BR || - type == EMBED || type == IMG || type == PARAM || - type == SPACER || type == WBR) { + type == EMBED || type == IMG || type == INPUT || + type == PARAM || type == SPACER || type == WBR) { reconstruct_active_formatting_list(treebuilder); - insert_element_no_push(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, false); + treebuilder->context.frameset_ok = false; } else if (type == HR) { process_hr_in_body(treebuilder, token); } else if (type == IMAGE) { process_image_in_body(treebuilder, token); - } else if (type == INPUT) { - process_input_in_body(treebuilder, token); } else if (type == ISINDEX) { process_isindex_in_body(treebuilder, token); } else if (type == TEXTAREA) { @@ -294,6 +316,8 @@ bool process_start_tag(hubbub_treebuilder *treebuilder, type == NOFRAMES || (treebuilder->context.enable_scripting && type == NOSCRIPT)) { + if (type == IFRAME) + treebuilder->context.frameset_ok = false; parse_generic_rcdata(treebuilder, token, false); } else if (type == SELECT) { process_select_in_body(treebuilder, token); @@ -308,6 +332,8 @@ bool process_start_tag(hubbub_treebuilder *treebuilder, treebuilder->context.mode == IN_CELL) { treebuilder->context.mode = IN_SELECT_IN_TABLE; } + } else if (type == OPTGROUP || type == OPTION) { + process_opt_in_body(treebuilder, token); } else if (type == RP || type == RT) { /** \todo ruby */ } else if (type == MATH || type == SVG) { @@ -320,21 +346,21 @@ bool process_start_tag(hubbub_treebuilder *treebuilder, adjust_svg_attributes(treebuilder, &tag); tag.ns = HUBBUB_NS_SVG; } else { + adjust_mathml_attributes(treebuilder, &tag); tag.ns = HUBBUB_NS_MATHML; } if (token->data.tag.self_closing) { - insert_element_no_push(treebuilder, &tag); + insert_element(treebuilder, &tag, false); /** \todo ack sc flag */ } else { - insert_element(treebuilder, &tag); + insert_element(treebuilder, &tag, true); treebuilder->context.second_mode = treebuilder->context.mode; treebuilder->context.mode = IN_FOREIGN_CONTENT; } } else if (type == CAPTION || type == COL || type == COLGROUP || - type == FRAME || type == FRAMESET || - type == HEAD || type == TBODY || + type == FRAME || type == HEAD || type == TBODY || type == TD || type == TFOOT || type == TH || type == THEAD || type == TR) { /** \todo parse error */ @@ -372,13 +398,16 @@ bool process_end_tag(hubbub_treebuilder *treebuilder, treebuilder->context.mode = AFTER_BODY; } err = HUBBUB_REPROCESS; - } else if (type == ADDRESS || type == BLOCKQUOTE || - type == CENTER || type == DIR || type == DIV || - type == DL || type == FIELDSET || - type == LISTING || type == MENU || - type == OL || type == PRE || type == UL || - type == FORM) { + } else if (type == ADDRESS || type == ARTICLE || type == ASIDE || + type == BLOCKQUOTE || type == CENTER || type == DIR || + type == DATAGRID || type == DIV || type == DL || + type == FIELDSET || type == FOOTER || type == HEADER || + type == LISTING || type == MENU || type == NAV || + type == OL || type == PRE || type == SECTION || + type == UL) { process_0container_in_body(treebuilder, type); + } else if (type == FORM) { + process_0form_in_body(treebuilder); } else if (type == P) { process_0p_in_body(treebuilder); } else if (type == DD || type == DT || type == LI) { @@ -386,7 +415,7 @@ bool process_end_tag(hubbub_treebuilder *treebuilder, } else if (type == H1 || type == H2 || type == H3 || type == H4 || type == H5 || type == H6) { process_0h_in_body(treebuilder, type); - } else if (type == A || type == B || type == BIG || + } else if (type == A || type == B || type == BIG || type == CODE || type == EM || type == FONT || type == I || type == NOBR || type == S || type == SMALL || type == STRIKE || type == STRONG || @@ -410,12 +439,7 @@ bool process_end_tag(hubbub_treebuilder *treebuilder, (treebuilder->context.enable_scripting && type == NOSCRIPT)) { /** \todo parse error */ -/* } else if (type == EVENT_SOURCE || type == SECTION || - type == NAV || type == ARTICLE || - type == ASIDE || type == HEADER || - type == FOOTER || type == DATAGRID || - type == COMMAND) { -*/ } else { + } else { process_0generic_in_body(treebuilder, type); } @@ -462,6 +486,58 @@ void process_body_in_body(hubbub_treebuilder *treebuilder, token->data.tag.n_attributes); } +/** + * Process a frameset start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_frameset_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + void *parent = NULL; + + /** \todo parse error */ + + if (treebuilder->context.current_node < 1 || + treebuilder->context.element_stack[1].type != BODY) + return; + + if (treebuilder->context.frameset_ok == false) + return; + + if (treebuilder->tree_handler->get_parent( + treebuilder->tree_handler->ctx, + treebuilder->context.element_stack[1].node, + false, &parent)) { + /** \todo errors */ + } + + if (parent != NULL) { + void *removed; + + if (treebuilder->tree_handler->remove_child( + treebuilder->tree_handler->ctx, + parent, + treebuilder->context.element_stack[1].node, + &removed)) { + /** \todo errors */ + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, removed); + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, parent); + } + + if (element_stack_pop_until(treebuilder, BODY) == false) { + /** \todo errors */ + } + + insert_element(treebuilder, &token->data.tag, true); +} + /** * Process a generic container start tag as if in "in body" * @@ -475,7 +551,45 @@ void process_container_in_body(hubbub_treebuilder *treebuilder, process_0p_in_body(treebuilder); } - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); +} + +/** + * Process a hN start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_hN_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + element_type type; + + if (element_in_scope(treebuilder, P, false)) { + process_0p_in_body(treebuilder); + } + + type = treebuilder->context.element_stack[ + treebuilder->context.current_node].type; + + if (type == H1 || type == H2 || type == H3 || type == H4 || + type == H5 || type == H6) { + hubbub_ns ns; + element_type otype; + void *node; + + /** \todo parse error */ + + if (!element_stack_pop(treebuilder, &ns, &otype, &node)) { + /** \todo errors */ + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + node); + } + + insert_element(treebuilder, &token->data.tag, true); } /** @@ -494,7 +608,7 @@ void process_form_in_body(hubbub_treebuilder *treebuilder, process_0p_in_body(treebuilder); } - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); /* Claim a reference on the node and * use it as the current form element */ @@ -522,6 +636,8 @@ void process_dd_dt_li_in_body(hubbub_treebuilder *treebuilder, element_context *stack = treebuilder->context.element_stack; uint32_t node; + treebuilder->context.frameset_ok = false; + if (element_in_scope(treebuilder, P, false)) { process_0p_in_body(treebuilder); } @@ -569,7 +685,7 @@ void process_dd_dt_li_in_body(hubbub_treebuilder *treebuilder, } while (treebuilder->context.current_node >= node); } - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); } /** @@ -587,7 +703,7 @@ void process_plaintext_in_body(hubbub_treebuilder *treebuilder, process_0p_in_body(treebuilder); } - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); params.content_model.model = HUBBUB_CONTENT_MODEL_PLAINTEXT; @@ -639,15 +755,21 @@ void process_a_in_body(hubbub_treebuilder *treebuilder, if (index <= treebuilder->context.current_node && treebuilder->context.element_stack[index].node == node) { - aa_remove_element_stack_item(treebuilder, index, - treebuilder->context.current_node); - treebuilder->context.current_node--; + hubbub_ns ns; + element_type otype; + void *onode; + + element_stack_remove(treebuilder, index, &ns, &otype, + &onode); + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, onode); } } reconstruct_active_formatting_list(treebuilder); - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); treebuilder->tree_handler->ref_node(treebuilder->tree_handler->ctx, treebuilder->context.element_stack[ @@ -672,7 +794,7 @@ void process_presentational_in_body(hubbub_treebuilder *treebuilder, { reconstruct_active_formatting_list(treebuilder); - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); treebuilder->tree_handler->ref_node(treebuilder->tree_handler->ctx, treebuilder->context.element_stack[ @@ -705,7 +827,7 @@ void process_nobr_in_body(hubbub_treebuilder *treebuilder, reconstruct_active_formatting_list(treebuilder); } - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); treebuilder->tree_handler->ref_node( treebuilder->tree_handler->ctx, @@ -737,15 +859,7 @@ void process_button_in_body(hubbub_treebuilder *treebuilder, reconstruct_active_formatting_list(treebuilder); - insert_element(treebuilder, &token->data.tag); - - if (treebuilder->context.form_element != NULL) { - treebuilder->tree_handler->form_associate( - treebuilder->tree_handler->ctx, - treebuilder->context.form_element, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node); - } + insert_element(treebuilder, &token->data.tag, true); treebuilder->tree_handler->ref_node( treebuilder->tree_handler->ctx, @@ -756,6 +870,8 @@ void process_button_in_body(hubbub_treebuilder *treebuilder, treebuilder->context.element_stack[ treebuilder->context.current_node].node, treebuilder->context.current_node); + + treebuilder->context.frameset_ok = false; } /** @@ -770,7 +886,7 @@ void process_applet_marquee_object_in_body(hubbub_treebuilder *treebuilder, { reconstruct_active_formatting_list(treebuilder); - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); treebuilder->tree_handler->ref_node( treebuilder->tree_handler->ctx, @@ -781,6 +897,8 @@ void process_applet_marquee_object_in_body(hubbub_treebuilder *treebuilder, treebuilder->context.element_stack[ treebuilder->context.current_node].node, treebuilder->context.current_node); + + treebuilder->context.frameset_ok = false; } /** @@ -796,7 +914,9 @@ void process_hr_in_body(hubbub_treebuilder *treebuilder, process_0p_in_body(treebuilder); } - insert_element_no_push(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, false); + + treebuilder->context.frameset_ok = false; } /** @@ -819,40 +939,7 @@ void process_image_in_body(hubbub_treebuilder *treebuilder, reconstruct_active_formatting_list(treebuilder); - insert_element_no_push(treebuilder, &tag); -} - -/** - * Process an input start tag as if in "in body" - * - * \param treebuilder The treebuilder instance - * \param token The token to process - */ -void process_input_in_body(hubbub_treebuilder *treebuilder, - const hubbub_token *token) -{ - hubbub_ns ns; - element_type otype; - void *node; - - reconstruct_active_formatting_list(treebuilder); - - insert_element(treebuilder, &token->data.tag); - - if (treebuilder->context.form_element != NULL) { - treebuilder->tree_handler->form_associate( - treebuilder->tree_handler->ctx, - treebuilder->context.form_element, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node); - } - - if (!element_stack_pop(treebuilder, &ns, &otype, &node)) { - /** \todo errors */ - } - - treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx, - node); + insert_element(treebuilder, &tag, false); } /** @@ -974,7 +1061,9 @@ void process_isindex_in_body(hubbub_treebuilder *treebuilder, dummy.data.tag.n_attributes = n_attrs; dummy.data.tag.attributes = attrs; - process_input_in_body(treebuilder, &dummy); + reconstruct_active_formatting_list(treebuilder); + insert_element(treebuilder, &dummy.data.tag, false); + treebuilder->context.frameset_ok = false; /* Act as if was seen */ process_0generic_in_body(treebuilder, LABEL); @@ -1007,6 +1096,7 @@ void process_textarea_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token) { treebuilder->context.strip_leading_lr = true; + treebuilder->context.frameset_ok = false; parse_generic_rcdata(treebuilder, token, true); } @@ -1021,15 +1111,27 @@ void process_select_in_body(hubbub_treebuilder *treebuilder, { reconstruct_active_formatting_list(treebuilder); - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); - if (treebuilder->context.form_element != NULL) { - treebuilder->tree_handler->form_associate( - treebuilder->tree_handler->ctx, - treebuilder->context.form_element, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node); + treebuilder->context.frameset_ok = false; +} + +/** + * Process an option or optgroup start tag as if in "in body" + * + * \param treebuilder The treebuilder instance + * \param token The token to process + */ +void process_opt_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + if (element_in_scope(treebuilder, OPTION, false)) { + process_0generic_in_body(treebuilder, OPTION); } + + reconstruct_active_formatting_list(treebuilder); + + insert_element(treebuilder, &token->data.tag, true); } /** @@ -1043,7 +1145,7 @@ void process_phrasing_in_body(hubbub_treebuilder *treebuilder, { reconstruct_active_formatting_list(treebuilder); - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); } /** @@ -1068,7 +1170,9 @@ bool process_0body_in_body(hubbub_treebuilder *treebuilder) element_type ntype = stack[node].type; if (ntype != DD && ntype != DT && ntype != LI && - ntype != P && ntype != TBODY && + ntype != OPTGROUP && ntype != OPTION && + ntype != P && ntype != RP && + ntype != RT && ntype != TBODY && ntype != TD && ntype != TFOOT && ntype != TH && ntype != THEAD && ntype != TR && ntype != BODY) { @@ -1089,14 +1193,6 @@ bool process_0body_in_body(hubbub_treebuilder *treebuilder) void process_0container_in_body(hubbub_treebuilder *treebuilder, element_type type) { - if (type == FORM) { - if (treebuilder->context.form_element != NULL) - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - treebuilder->context.form_element); - treebuilder->context.form_element = NULL; - } - if (!element_in_scope(treebuilder, type, false)) { /** \todo parse error */ } else { @@ -1127,6 +1223,49 @@ void process_0container_in_body(hubbub_treebuilder *treebuilder, } } +/** + * Process a form end tag as if in "in body" + * + * \param treebuilder The treebuilder instance + */ +void process_0form_in_body(hubbub_treebuilder *treebuilder) +{ + void *node = treebuilder->context.form_element; + uint32_t idx = 0; + + if (treebuilder->context.form_element != NULL) + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + treebuilder->context.form_element); + treebuilder->context.form_element = NULL; + + idx = element_in_scope(treebuilder, FORM, false); + + if (idx == 0 || node == NULL || + treebuilder->context.element_stack[idx].node != node) { + /** \todo parse error */ + } else { + hubbub_ns ns; + element_type otype; + void *node; + + close_implied_end_tags(treebuilder, UNKNOWN); + + if (treebuilder->context.element_stack[ + treebuilder->context.current_node].node != + node) { + /** \todo parse error */ + } + + element_stack_remove(treebuilder, idx, &ns, &otype, &node); + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + node); + } +} + + /** * Process a p end tag as if in "in body" * @@ -1309,18 +1448,15 @@ void process_0presentational_in_body(hubbub_treebuilder *treebuilder, common_ancestor = formatting_element - 1; /* 5 */ - aa_remove_from_parent(treebuilder, stack[furthest_block].node); - - /* 6 */ bookmark.prev = entry->prev; bookmark.next = entry->next; - /* 7 */ + /* 6 */ aa_find_bookmark_location_reparenting_misnested(treebuilder, formatting_element, &furthest_block, &bookmark, &last_node); - /* 8 */ + /* 7 */ if (stack[common_ancestor].type == TABLE || stack[common_ancestor].type == TBODY || stack[common_ancestor].type == TFOOT || @@ -1357,17 +1493,17 @@ void process_0presentational_in_body(hubbub_treebuilder *treebuilder, stack[last_node].node = reparented; } - /* 9 */ + /* 8 */ treebuilder->tree_handler->clone_node( treebuilder->tree_handler->ctx, entry->details.node, false, &fe_clone); - /* 10 */ + /* 9 */ treebuilder->tree_handler->reparent_children( treebuilder->tree_handler->ctx, stack[furthest_block].node, fe_clone); - /* 11 */ + /* 10 */ treebuilder->tree_handler->append_child( treebuilder->tree_handler->ctx, stack[furthest_block].node, fe_clone, @@ -1385,10 +1521,10 @@ void process_0presentational_in_body(hubbub_treebuilder *treebuilder, clone_appended); } - /* 12 and 13 are reversed here so that we know the correct + /* 11 and 12 are reversed here so that we know the correct * stack index to use when inserting into the formatting list */ - /* 13 */ + /* 12 */ aa_remove_element_stack_item(treebuilder, formatting_element, furthest_block); @@ -1400,7 +1536,7 @@ void process_0presentational_in_body(hubbub_treebuilder *treebuilder, stack[furthest_block + 1].type = entry->details.type; stack[furthest_block + 1].node = clone_appended; - /* 12 */ + /* 11 */ formatting_list_remove(treebuilder, entry, &ons, &otype, &onode, &oindex); @@ -1411,7 +1547,7 @@ void process_0presentational_in_body(hubbub_treebuilder *treebuilder, bookmark.prev, bookmark.next, ons, otype, clone_appended, furthest_block + 1); - /* 14 */ + /* 13 */ } } @@ -1626,7 +1762,6 @@ void aa_find_bookmark_location_reparenting_misnested( node = last = fb = *furthest_block; while (true) { - bool children = false; void *reparented; /* i */ @@ -1668,13 +1803,7 @@ void aa_find_bookmark_location_reparenting_misnested( } /* v */ - treebuilder->tree_handler->has_children( - treebuilder->tree_handler->ctx, - node_entry->details.node, &children); - - if (children) { - aa_clone_and_replace_entries(treebuilder, node_entry); - } + aa_clone_and_replace_entries(treebuilder, node_entry); /* vi */ reparented = aa_reparent_node(treebuilder, @@ -1842,6 +1971,8 @@ void *aa_insert_into_foster_parent(hubbub_treebuilder *treebuilder, void *node) } } + aa_remove_from_parent(treebuilder, node); + if (insert) { treebuilder->tree_handler->insert_before( treebuilder->tree_handler->ctx, @@ -1928,7 +2059,7 @@ void process_0br_in_body(hubbub_treebuilder *treebuilder) reconstruct_active_formatting_list(treebuilder); - insert_element_no_push(treebuilder, &tag); + insert_element(treebuilder, &tag, false); } /** diff --git a/src/treebuilder/in_column_group.c b/src/treebuilder/in_column_group.c index cd6d589..e489e6b 100644 --- a/src/treebuilder/in_column_group.c +++ b/src/treebuilder/in_column_group.c @@ -51,7 +51,7 @@ hubbub_error handle_in_column_group(hubbub_treebuilder *treebuilder, /* Process as if "in body" */ handle_in_body(treebuilder, token); } else if (type == COL) { - insert_element_no_push(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, false); /** \todo ack sc flag */ } else { diff --git a/src/treebuilder/in_foreign_content.c b/src/treebuilder/in_foreign_content.c index 9b09752..35d8968 100644 --- a/src/treebuilder/in_foreign_content.c +++ b/src/treebuilder/in_foreign_content.c @@ -135,6 +135,30 @@ static const case_changes svg_tagnames[] = { #undef S +/** + * Adjust MathML attributes + * + * \param treebuilder Treebuilder instance + * \param tag Tag to adjust the attributes of + */ +void adjust_mathml_attributes(hubbub_treebuilder *treebuilder, + hubbub_tag *tag) +{ + size_t i; + UNUSED(treebuilder); + + for (i = 0; i < tag->n_attributes; i++) { + hubbub_attribute *attr = &tag->attributes[i]; + const uint8_t *name = attr->name.ptr; + size_t len = attr->name.len; + + if (hubbub_string_match(name, len, + (const uint8_t *) "definitionurl", + SLEN("definitionurl"))) { + attr->name.ptr = (uint8_t *) "definitionURL"; + } + } +} /** * Adjust SVG attributes. @@ -157,10 +181,10 @@ void adjust_svg_attributes(hubbub_treebuilder *treebuilder, for (j = 0; j < N_ELEMENTS(svg_attributes); j++) { if (hubbub_string_match(name, len, - (uint8_t *)svg_attributes[j].attr, + (uint8_t *) svg_attributes[j].attr, svg_attributes[j].len)) { attr->name.ptr = - (uint8_t *)svg_attributes[j].proper; + (uint8_t *) svg_attributes[j].proper; } } } @@ -183,10 +207,9 @@ void adjust_svg_tagname(hubbub_treebuilder *treebuilder, for (i = 0; i < N_ELEMENTS(svg_tagnames); i++) { if (hubbub_string_match(name, len, - (uint8_t *)svg_tagnames[i].attr, + (uint8_t *) svg_tagnames[i].attr, svg_tagnames[i].len)) { - tag->name.ptr = - (uint8_t *)svg_tagnames[i].proper; + tag->name.ptr = (uint8_t *) svg_tagnames[i].proper; } } } @@ -237,7 +260,8 @@ void adjust_foreign_attributes(hubbub_treebuilder *treebuilder, } /* 8 == strlen("xml:base") */ } else if (attr->name.len >= 8 && - strncmp((char *) name, "xml:", SLEN("xml:")) == 0) { + strncmp((char *) name, "xml:", + SLEN("xml:")) == 0) { size_t len = attr->name.len - 4; name += 4; @@ -318,8 +342,7 @@ static void process_as_in_secondary(hubbub_treebuilder *treebuilder, if (treebuilder->context.mode == IN_FOREIGN_CONTENT && !element_in_scope_in_non_html_ns(treebuilder)) { - treebuilder->context.mode = - treebuilder->context.second_mode; + treebuilder->context.mode = treebuilder->context.second_mode; } } @@ -398,20 +421,45 @@ hubbub_error handle_in_foreign_content(hubbub_treebuilder *treebuilder, type == BODY || type == BR || type == CENTER || type == CODE || type == DD || type == DIV || type == DL || type == DT || type == EM || - type == EMBED || type == FONT || type == H1 || - type == H2 || type == H3 || type == H4 || - type == H5 || type == H6 || type == HEAD || - type == HR || type == I || type == IMG || - type == LI || type == LISTING || - type == MENU || type == META || type == NOBR || - type == OL || type == P || type == PRE || - type == RUBY || type == S || type == SMALL || - type == SPAN || type == STRONG || - type == STRIKE || type == SUB || type == SUP || - type == TABLE || type == TT || type == U || - type == UL || type == VAR) { + type == EMBED || type == H1 || type == H2 || + type == H3 || type == H4 || type == H5 || + type == H6 || type == HEAD || type == HR || + type == I || type == IMG || type == LI || + type == LISTING || type == MENU || + type == META || type == NOBR || type == OL || + type == P || type == PRE || type == RUBY || + type == S || type == SMALL || type == SPAN || + type == STRONG || type == STRIKE || + type == SUB || type == SUP || type == TABLE || + type == TT || type == U || type == UL || + type == VAR) { foreign_break_out(treebuilder); err = HUBBUB_REPROCESS; + } else if (type == FONT) { + const hubbub_tag *tag = &token->data.tag; + size_t i; + + for (i = 0; i < tag->n_attributes; i++) { + hubbub_attribute *attr = &tag->attributes[i]; + const uint8_t *name = attr->name.ptr; + size_t len = attr->name.len; + + if (hubbub_string_match(name, len, + (const uint8_t *) "color", + SLEN("color")) || + hubbub_string_match(name, len, + (const uint8_t *) "face", + SLEN("face")) || + hubbub_string_match(name, len, + (const uint8_t *) "size", + SLEN("size"))) + break; + } + + if (i != tag->n_attributes) { + foreign_break_out(treebuilder); + err = HUBBUB_REPROCESS; + } } else { hubbub_tag tag = token->data.tag; @@ -426,10 +474,10 @@ hubbub_error handle_in_foreign_content(hubbub_treebuilder *treebuilder, tag.ns = cur_node_ns; if (token->data.tag.self_closing) { - insert_element_no_push(treebuilder, &tag); + insert_element(treebuilder, &tag, false); /** \todo ack sc flag */ } else { - insert_element(treebuilder, &tag); + insert_element(treebuilder, &tag, true); } } } diff --git a/src/treebuilder/in_frameset.c b/src/treebuilder/in_frameset.c index 221313e..10d1a8f 100644 --- a/src/treebuilder/in_frameset.c +++ b/src/treebuilder/in_frameset.c @@ -49,9 +49,9 @@ hubbub_error handle_in_frameset(hubbub_treebuilder *treebuilder, if (type == HTML) { handle_in_body(treebuilder, token); } else if (type == FRAMESET) { - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); } else if (type == FRAME) { - insert_element_no_push(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, false); /** \todo ack sc flag */ } else if (type == NOFRAMES) { err = handle_in_head(treebuilder, token); diff --git a/src/treebuilder/in_head.c b/src/treebuilder/in_head.c index b042e52..32d2ce0 100644 --- a/src/treebuilder/in_head.c +++ b/src/treebuilder/in_head.c @@ -34,7 +34,7 @@ static hubbub_error process_meta_in_head(hubbub_treebuilder *treebuilder, uint16_t content_type_enc = 0; size_t i; - insert_element_no_push(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, false); /** \todo ack sc flag */ @@ -99,52 +99,6 @@ static hubbub_error process_meta_in_head(hubbub_treebuilder *treebuilder, return HUBBUB_OK; } - - -/** - * Process a script start tag as if in "in head" - * - * \param treebuilder The treebuilder instance - * \param token The token to process - */ -static void process_script_in_head(hubbub_treebuilder *treebuilder, - const hubbub_token *token) -{ - int success; - void *script; - hubbub_tokeniser_optparams params; - - success = treebuilder->tree_handler->create_element( - treebuilder->tree_handler->ctx, - &token->data.tag, &script); - if (success != 0) { - /** \todo errors */ - } - - /** \todo mark script as parser-inserted */ - - /* It would be nice to be able to re-use the generic - * rcdata character collector here. Unfortunately, we - * can't as we need to do special processing after the - * script data has been collected, so we use an almost - * identical insertion mode which does the right magic - * at the end. */ - params.content_model.model = HUBBUB_CONTENT_MODEL_CDATA; - hubbub_tokeniser_setopt(treebuilder->tokeniser, - HUBBUB_TOKENISER_CONTENT_MODEL, - ¶ms); - - treebuilder->context.collect.mode = treebuilder->context.mode; - treebuilder->context.collect.node = script; - treebuilder->context.collect.type = SCRIPT; - - treebuilder->context.mode = SCRIPT_COLLECT_CHARACTERS; -} - - - - - /** * Handle token in "in head" insertion mode * @@ -179,9 +133,8 @@ hubbub_error handle_in_head(hubbub_treebuilder *treebuilder, if (type == HTML) { /* Process as if "in body" */ handle_in_body(treebuilder, token); - } else if (type == BASE || type == COMMAND || - type == EVENTSOURCE || type == LINK) { - insert_element_no_push(treebuilder, &token->data.tag); + } else if (type == BASE || type == COMMAND || type == LINK) { + insert_element(treebuilder, &token->data.tag, false); /** \todo ack sc flag */ } else if (type == META) { @@ -194,11 +147,15 @@ hubbub_error handle_in_head(hubbub_treebuilder *treebuilder, if (treebuilder->context.enable_scripting) { parse_generic_rcdata(treebuilder, token, false); } else { - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, + true); treebuilder->context.mode = IN_HEAD_NOSCRIPT; } } else if (type == SCRIPT) { - process_script_in_head(treebuilder, token); + /** \todo need to ensure that the client callback + * sets the parser-inserted/already-executed script + * flags. */ + parse_generic_rcdata(treebuilder, token, false); } else if (type == HEAD) { /** \todo parse error */ } else { @@ -213,7 +170,7 @@ hubbub_error handle_in_head(hubbub_treebuilder *treebuilder, if (type == HEAD) { handled = true; - } else if (type == BR) { + } else if (type == HTML || type == BODY || type == BR) { err = HUBBUB_REPROCESS; } /** \todo parse error */ } diff --git a/src/treebuilder/in_row.c b/src/treebuilder/in_row.c index b0fd897..79d6f6c 100644 --- a/src/treebuilder/in_row.c +++ b/src/treebuilder/in_row.c @@ -92,7 +92,7 @@ hubbub_error handle_in_row(hubbub_treebuilder *treebuilder, if (type == TH || type == TD) { table_clear_stack(treebuilder); - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); treebuilder->context.mode = IN_CELL; /* ref node for formatting list */ diff --git a/src/treebuilder/in_select.c b/src/treebuilder/in_select.c index e8c78f8..06fe287 100644 --- a/src/treebuilder/in_select.c +++ b/src/treebuilder/in_select.c @@ -62,7 +62,7 @@ hubbub_error handle_in_select(hubbub_treebuilder *treebuilder, node); } - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); } else if (type == OPTGROUP) { if (current_node(treebuilder) == OPTION) { if (!element_stack_pop(treebuilder, &ns, &otype, @@ -86,7 +86,7 @@ hubbub_error handle_in_select(hubbub_treebuilder *treebuilder, node); } - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); } else if (type == SELECT || type == INPUT || type == TEXTAREA) { @@ -99,6 +99,8 @@ hubbub_error handle_in_select(hubbub_treebuilder *treebuilder, } if (type != SELECT) err = HUBBUB_REPROCESS; + } else if (type == SCRIPT) { + handle_in_head(treebuilder, token); } else { /** \todo parse error */ } diff --git a/src/treebuilder/in_table.c b/src/treebuilder/in_table.c index 0c97470..ac5e6c5 100644 --- a/src/treebuilder/in_table.c +++ b/src/treebuilder/in_table.c @@ -56,15 +56,7 @@ static inline bool process_input_in_table(hubbub_treebuilder *treebuilder, } /** \todo parse error */ - insert_element(treebuilder, &token->data.tag); - - if (treebuilder->context.form_element != NULL) { - treebuilder->tree_handler->form_associate( - treebuilder->tree_handler->ctx, - treebuilder->context.form_element, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node); - } + insert_element(treebuilder, &token->data.tag, true); return true; } @@ -125,7 +117,7 @@ hubbub_error handle_in_table(hubbub_treebuilder *treebuilder, treebuilder->context.current_node].node, treebuilder->context.current_node); - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); treebuilder->context.mode = IN_CAPTION; } else if (type == COLGROUP || type == COL) { hubbub_tag tag = token->data.tag; @@ -141,7 +133,7 @@ hubbub_error handle_in_table(hubbub_treebuilder *treebuilder, } clear_stack_table_context(treebuilder); - insert_element(treebuilder, &tag); + insert_element(treebuilder, &tag, true); treebuilder->context.mode = IN_COLUMN_GROUP; } else if (type == TBODY || type == TFOOT || type == THEAD || type == TD || type == TH || type == TR) { @@ -158,7 +150,7 @@ hubbub_error handle_in_table(hubbub_treebuilder *treebuilder, } clear_stack_table_context(treebuilder); - insert_element(treebuilder, &tag); + insert_element(treebuilder, &tag, true); treebuilder->context.mode = IN_TABLE_BODY; } else if (type == TABLE) { /** \todo parse error */ diff --git a/src/treebuilder/in_table_body.c b/src/treebuilder/in_table_body.c index 24fea5c..985a893 100644 --- a/src/treebuilder/in_table_body.c +++ b/src/treebuilder/in_table_body.c @@ -103,7 +103,7 @@ hubbub_error handle_in_table_body(hubbub_treebuilder *treebuilder, if (type == TR) { table_clear_stack(treebuilder); - insert_element(treebuilder, &token->data.tag); + insert_element(treebuilder, &token->data.tag, true); treebuilder->context.mode = IN_ROW; } else if (type == TH || type == TD) { hubbub_tag tag; @@ -119,7 +119,7 @@ hubbub_error handle_in_table_body(hubbub_treebuilder *treebuilder, tag.attributes = NULL; table_clear_stack(treebuilder); - insert_element(treebuilder, &tag); + insert_element(treebuilder, &tag, true); treebuilder->context.mode = IN_ROW; err = HUBBUB_REPROCESS; diff --git a/src/treebuilder/initial.c b/src/treebuilder/initial.c index 1a9731a..6ead570 100644 --- a/src/treebuilder/initial.c +++ b/src/treebuilder/initial.c @@ -139,7 +139,8 @@ static bool lookup_full_quirks(hubbub_treebuilder *treebuilder, return true; /* No public id means not-quirks */ - if (cdoc->public_missing) return false; + if (cdoc->public_missing) + return false; for (i = 0; i < sizeof public_doctypes / sizeof public_doctypes[0]; i++) { @@ -151,21 +152,21 @@ static bool lookup_full_quirks(hubbub_treebuilder *treebuilder, } if (hubbub_string_match_ci(public_id, public_id_len, - S("-//W3O//DTD W3 HTML Strict 3.0//EN//")) || + S("-//W3O//DTD W3 HTML Strict 3.0//EN//")) || hubbub_string_match_ci(public_id, public_id_len, - S("-/W3C/DTD HTML 4.0 Transitional/EN")) || + S("-/W3C/DTD HTML 4.0 Transitional/EN")) || hubbub_string_match_ci(public_id, public_id_len, - S("HTML")) || + S("HTML")) || hubbub_string_match_ci(system_id, system_id_len, - S("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"))) { + S("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"))) { return true; } if (cdoc->system_missing == true && (starts_with(public_id, public_id_len, - S("-//W3C//DTD HTML 4.01 Frameset//")) || + S("-//W3C//DTD HTML 4.01 Frameset//")) || starts_with(public_id, public_id_len, - S("-//W3C//DTD HTML 4.01 Transitional//")))) { + S("-//W3C//DTD HTML 4.01 Transitional//")))) { return true; } @@ -193,17 +194,17 @@ static bool lookup_limited_quirks(hubbub_treebuilder *treebuilder, #define S(s) (uint8_t *) s, sizeof s if (starts_with(public_id, public_id_len, - S("-//W3C//DTD XHTML 1.0 Frameset//")) || + S("-//W3C//DTD XHTML 1.0 Frameset//")) || starts_with(public_id, public_id_len, - S("-//W3C//DTD XHTML 1.0 Transitional//"))) { + S("-//W3C//DTD XHTML 1.0 Transitional//"))) { return true; } if (cdoc->system_missing == false && (starts_with(public_id, public_id_len, - S("-//W3C//DTD HTML 4.01 Frameset//")) || + S("-//W3C//DTD HTML 4.01 Frameset//")) || starts_with(public_id, public_id_len, - S("-//W3C//DTD HTML 4.01 Transitional//")))) { + S("-//W3C//DTD HTML 4.01 Transitional//")))) { return true; } @@ -220,7 +221,8 @@ static bool lookup_limited_quirks(hubbub_treebuilder *treebuilder, * \param token The token to handle * \return True to reprocess token, false otherwise */ -hubbub_error handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token) +hubbub_error handle_initial(hubbub_treebuilder *treebuilder, + const hubbub_token *token) { hubbub_error err = HUBBUB_OK; @@ -249,7 +251,6 @@ hubbub_error handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token /** \todo parse error */ - /** \todo need public and system ids from tokeniser */ success = treebuilder->tree_handler->create_doctype( treebuilder->tree_handler->ctx, &token->data.doctype, diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h index 0649f25..823ed01 100644 --- a/src/treebuilder/internal.h +++ b/src/treebuilder/internal.h @@ -15,19 +15,19 @@ typedef enum /* Special */ ADDRESS, AREA, ARTICLE, ASIDE, BASE, BASEFONT, BGSOUND, BLOCKQUOTE, BODY, BR, CENTER, COL, COLGROUP, COMMAND, DATAGRID, DD, DETAILS, - DIALOG, DIR, DIV, DL, DT, EMBED, EVENTSOURCE, FIELDSET, FIGURE, - FOOTER, FORM, FRAME, FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HEADER, - HR, IFRAME, IMAGE, IMG, INPUT, ISINDEX, LI, LINK, LISTING, MENU, META, - NAV, NOEMBED, NOFRAMES, NOSCRIPT, OL, OPTGROUP, OPTION, P, PARAM, - PLAINTEXT, PRE, SCRIPT, SECTION, SELECT, SPACER, STYLE, TBODY, - TEXTAREA, TFOOT, THEAD, TITLE, TR, UL, WBR, + DIALOG, DIR, DIV, DL, DT, EMBED, FIELDSET, FIGURE, FOOTER, FORM, FRAME, + FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HEADER, HR, IFRAME, IMAGE, IMG, + INPUT, ISINDEX, LI, LINK, LISTING, MENU, META, NAV, NOEMBED, NOFRAMES, + NOSCRIPT, OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT, PRE, SCRIPT, + SECTION, SELECT, SPACER, STYLE, TBODY, TEXTAREA, TFOOT, THEAD, TITLE, + TR, UL, WBR, /* Scoping */ APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH, /* Formatting */ - A, B, BIG, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U, + A, B, BIG, CODE, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U, /* Phrasing */ /**< \todo Enumerate phrasing elements */ - CODE, LABEL, RP, RT, RUBY, SPAN, SUB, SUP, VAR, XMP, + LABEL, OUTPUT, RP, RT, RUBY, SPAN, SUB, SUP, VAR, XMP, /* MathML */ MATH, MGLYPH, MALIGNMARK, MI, MO, MN, MS, MTEXT, ANNOTATION_XML, /* SVG */ @@ -96,7 +96,6 @@ typedef struct hubbub_treebuilder_context struct { insertion_mode mode; /**< Insertion mode to return to */ - void *node; /**< Node to attach Text child to */ element_type type; /**< Type of node */ } collect; /**< Context for character collecting */ @@ -107,6 +106,8 @@ typedef struct hubbub_treebuilder_context bool in_table_foster; /**< Whether nodes that would be * inserted into the current node should * be foster parented */ + + bool frameset_ok; /**< Whether to process a frameset */ } hubbub_treebuilder_context; /** @@ -144,9 +145,7 @@ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder); void clear_active_formatting_list_to_marker( hubbub_treebuilder *treebuilder); void insert_element(hubbub_treebuilder *treebuilder, - const hubbub_tag *tag_name); -void insert_element_no_push(hubbub_treebuilder *treebuilder, - const hubbub_tag *tag_name); + const hubbub_tag *tag_name, bool push); void close_implied_end_tags(hubbub_treebuilder *treebuilder, element_type except); void reset_insertion_mode(hubbub_treebuilder *treebuilder); @@ -167,6 +166,8 @@ bool element_stack_pop(hubbub_treebuilder *treebuilder, hubbub_ns *ns, element_type *type, void **node); bool element_stack_pop_until(hubbub_treebuilder *treebuilder, element_type type); +bool element_stack_remove(hubbub_treebuilder *treebuilder, uint32_t index, + hubbub_ns *ns, element_type *type, void **removed); uint32_t current_table(hubbub_treebuilder *treebuilder); element_type current_node(hubbub_treebuilder *treebuilder); element_type prev_node(hubbub_treebuilder *treebuilder); @@ -190,6 +191,7 @@ bool formatting_list_replace(hubbub_treebuilder *treebuilder, uint32_t *ostack_index); /* in_foreign_content.c */ +void adjust_mathml_attributes(hubbub_treebuilder *treebuilder, hubbub_tag *tag); void adjust_svg_attributes(hubbub_treebuilder *treebuilder, hubbub_tag *tag); void adjust_svg_tagname(hubbub_treebuilder *treebuilder, diff --git a/src/treebuilder/modes.h b/src/treebuilder/modes.h index 225ed4b..2c9f546 100644 --- a/src/treebuilder/modes.h +++ b/src/treebuilder/modes.h @@ -36,7 +36,6 @@ typedef enum AFTER_AFTER_FRAMESET, GENERIC_RCDATA, - SCRIPT_COLLECT_CHARACTERS, } insertion_mode; diff --git a/src/treebuilder/script_collect.c b/src/treebuilder/script_collect.c deleted file mode 100644 index 248001f..0000000 --- a/src/treebuilder/script_collect.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * This file is part of Hubbub. - * Licensed under the MIT License, - * http://www.opensource.org/licenses/mit-license.php - * Copyright 2008 John-Mark Bell - */ - -#include -#include - -#include "treebuilder/modes.h" -#include "treebuilder/internal.h" -#include "treebuilder/treebuilder.h" -#include "utils/utils.h" - - -/** - * Handle tokens in "script collect characters" insertion mode - * - * \param treebuilder The treebuilder instance - * \param token The token to process - * \return True to reprocess the token, false otherwise - */ -hubbub_error handle_script_collect_characters(hubbub_treebuilder *treebuilder, - const hubbub_token *token) -{ - hubbub_error err = HUBBUB_OK; - bool done = false; - - switch (token->type) { - case HUBBUB_TOKEN_CHARACTER: - { - int success; - void *text, *appended; - - success = treebuilder->tree_handler->create_text( - treebuilder->tree_handler->ctx, - &token->data.character, - &text); - if (success != 0) { - /** \todo errors */ - } - - /** \todo fragment case -- skip this lot entirely */ - - success = treebuilder->tree_handler->append_child( - treebuilder->tree_handler->ctx, - treebuilder->context.collect.node, - text, &appended); - if (success != 0) { - /** \todo errors */ - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - text); - } - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, appended); - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, text); - } - break; - case HUBBUB_TOKEN_END_TAG: - { - element_type type = element_type_from_name(treebuilder, - &token->data.tag.name); - - if (type != treebuilder->context.collect.type) { - /** \todo parse error */ - /** \todo Mark script as "already executed" */ - } - - done = true; - } - break; - case HUBBUB_TOKEN_EOF: - case HUBBUB_TOKEN_COMMENT: - case HUBBUB_TOKEN_DOCTYPE: - case HUBBUB_TOKEN_START_TAG: - /** \todo parse error */ - /** \todo Mark script as "already executed" */ - done = true; - err = HUBBUB_REPROCESS; - break; - } - - if (done) { - int success; - void *appended; - - /** \todo insertion point manipulation */ - - /* Scripts in "after head" should be inserted into */ - /* See 8.2.5.9 The "after head" insertion mode */ - if (treebuilder->context.collect.mode == AFTER_HEAD) { - if (!element_stack_push(treebuilder, - HUBBUB_NS_HTML, - HEAD, - treebuilder->context.head_element)) { - /** \todo errors */ - } - } - - /* Append script node to current node */ - success = treebuilder->tree_handler->append_child( - treebuilder->tree_handler->ctx, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node, - treebuilder->context.collect.node, &appended); - if (success != 0) { - /** \todo errors */ - } - - if (treebuilder->context.collect.mode == AFTER_HEAD) { - hubbub_ns ns; - element_type otype; - void *node; - - if (!element_stack_pop(treebuilder, &ns, &otype, - &node)) { - /** \todo errors */ - } - } - - /** \todo restore insertion point */ - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - appended); - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - treebuilder->context.collect.node); - treebuilder->context.collect.node = NULL; - - /** \todo process any pending script */ - - /* Return to previous insertion mode */ - treebuilder->context.mode = - treebuilder->context.collect.mode; - } - - return err; -} - diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c index b7d6720..37de764 100644 --- a/src/treebuilder/treebuilder.c +++ b/src/treebuilder/treebuilder.c @@ -25,61 +25,61 @@ static const struct { element_type type; } name_type_map[] = { { S("address"), ADDRESS }, { S("area"), AREA }, - { S("base"), BASE }, { S("basefont"), BASEFONT }, + { S("base"), BASE }, { S("basefont"), BASEFONT }, { S("bgsound"), BGSOUND }, { S("blockquote"), BLOCKQUOTE }, - { S("body"), BODY }, { S("br"), BR }, + { S("body"), BODY }, { S("br"), BR }, { S("center"), CENTER }, { S("col"), COL }, { S("colgroup"), COLGROUP }, { S("dd"), DD }, { S("dir"), DIR }, { S("div"), DIV }, { S("dl"), DL }, { S("dt"), DT }, - { S("embed"), EMBED }, { S("fieldset"), FIELDSET }, - { S("form"), FORM }, { S("frame"), FRAME }, + { S("embed"), EMBED }, { S("fieldset"), FIELDSET }, + { S("form"), FORM }, { S("frame"), FRAME }, { S("frameset"), FRAMESET }, { S("h1"), H1 }, { S("h2"), H2 }, { S("h3"), H3 }, { S("h4"), H4 }, { S("h5"), H5 }, { S("h6"), H6 }, { S("head"), HEAD }, { S("hr"), HR }, { S("iframe"), IFRAME }, - { S("image"), IMAGE }, { S("img"), IMG }, - { S("input"), INPUT }, { S("isindex"), ISINDEX }, + { S("image"), IMAGE }, { S("img"), IMG }, + { S("input"), INPUT }, { S("isindex"), ISINDEX }, { S("li"), LI }, { S("link"), LINK }, { S("listing"), LISTING }, { S("menu"), MENU }, - { S("meta"), META }, { S("noembed"), NOEMBED }, + { S("meta"), META }, { S("noembed"), NOEMBED }, { S("noframes"), NOFRAMES }, { S("noscript"), NOSCRIPT }, { S("ol"), OL }, { S("optgroup"), OPTGROUP }, - { S("option"), OPTION }, { S("p"), P }, - { S("param"), PARAM }, { S("plaintext"), PLAINTEXT }, - { S("pre"), PRE }, { S("script"), SCRIPT }, - { S("select"), SELECT }, { S("spacer"), SPACER }, - { S("style"), STYLE }, { S("tbody"), TBODY }, - { S("textarea"), TEXTAREA }, { S("tfoot"), TFOOT }, - { S("thead"), THEAD }, { S("title"), TITLE }, - { S("tr"), TR }, { S("ul"), UL }, - { S("wbr"), WBR }, + { S("option"), OPTION }, { S("output"), OUTPUT }, + { S("p"), P }, { S("param"), PARAM }, + { S("plaintext"), PLAINTEXT }, { S("pre"), PRE }, + { S("script"), SCRIPT }, { S("select"), SELECT }, + { S("spacer"), SPACER }, { S("style"), STYLE }, + { S("tbody"), TBODY }, { S("textarea"), TEXTAREA }, + { S("tfoot"), TFOOT }, { S("thead"), THEAD }, + { S("title"), TITLE }, { S("tr"), TR }, + { S("ul"), UL }, { S("wbr"), WBR }, { S("applet"), APPLET }, { S("button"), BUTTON }, { S("caption"), CAPTION }, { S("html"), HTML }, { S("marquee"), MARQUEE }, { S("object"), OBJECT }, - { S("table"), TABLE }, { S("td"), TD }, + { S("table"), TABLE }, { S("td"), TD }, { S("th"), TH }, - { S("a"), A }, { S("b"), B }, + { S("a"), A }, { S("b"), B }, { S("big"), BIG }, { S("em"), EM }, - { S("font"), FONT }, { S("i"), I }, - { S("nobr"), NOBR }, { S("s"), S }, - { S("small"), SMALL }, { S("strike"), STRIKE }, + { S("font"), FONT }, { S("i"), I }, + { S("nobr"), NOBR }, { S("s"), S }, + { S("small"), SMALL }, { S("strike"), STRIKE }, { S("strong"), STRONG }, { S("tt"), TT }, - { S("u"), U }, { S("xmp"), XMP }, + { S("u"), U }, { S("xmp"), XMP }, - { S("math"), MATH }, { S("mglyph"), MGLYPH }, + { S("math"), MATH }, { S("mglyph"), MGLYPH }, { S("malignmark"), MALIGNMARK }, { S("mi"), MI }, { S("mo"), MO }, { S("mn"), MN }, { S("ms"), MS }, - { S("mtext"), MTEXT }, { S("annotation-xml"), ANNOTATION_XML }, + { S("mtext"), MTEXT }, { S("annotation-xml"), ANNOTATION_XML }, { S("svg"), SVG }, { S("desc"), DESC }, { S("foreignobject"), FOREIGNOBJECT }, }; - +static bool is_form_associated(element_type type); /** * Create a hubbub treebuilder @@ -127,6 +127,7 @@ hubbub_error hubbub_treebuilder_create(hubbub_tokeniser *tokeniser, tb->context.element_stack[0].type = 0; tb->context.strip_leading_lr = false; + tb->context.frameset_ok = true; tb->error_handler = NULL; tb->error_pw = NULL; @@ -192,12 +193,6 @@ hubbub_error hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder) treebuilder->context.document); } - if (treebuilder->context.collect.node != NULL) { - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - treebuilder->context.collect.node); - } - for (n = treebuilder->context.current_node; n > 0; n--) { treebuilder->tree_handler->unref_node( @@ -364,10 +359,6 @@ hubbub_error hubbub_treebuilder_token_handler(const hubbub_token *token, mode(GENERIC_RCDATA) err = handle_generic_rcdata(treebuilder, token); break; - mode(SCRIPT_COLLECT_CHARACTERS) - err = handle_script_collect_characters( - treebuilder, token); - break; } } @@ -386,7 +377,8 @@ hubbub_error hubbub_treebuilder_token_handler(const hubbub_token *token, * (token data updated to skip any leading whitespace), * false if it contained only whitespace */ -hubbub_error process_characters_expect_whitespace(hubbub_treebuilder *treebuilder, +hubbub_error process_characters_expect_whitespace( + hubbub_treebuilder *treebuilder, const hubbub_token *token, bool insert_into_current_node) { const uint8_t *data = token->data.character.ptr; @@ -451,9 +443,6 @@ void process_comment_append(hubbub_treebuilder *treebuilder, parent, comment, &appended); if (success != 0) { /** \todo errors */ - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - comment); } treebuilder->tree_handler->unref_node( @@ -474,59 +463,12 @@ void process_comment_append(hubbub_treebuilder *treebuilder, void parse_generic_rcdata(hubbub_treebuilder *treebuilder, const hubbub_token *token, bool rcdata) { - int success; - void *node, *appended; element_type type; hubbub_tokeniser_optparams params; type = element_type_from_name(treebuilder, &token->data.tag.name); - success = treebuilder->tree_handler->create_element( - treebuilder->tree_handler->ctx, - &token->data.tag, &node); - if (success != 0) { - /** \todo errors */ - } - - if (treebuilder->context.in_table_foster) { - appended = aa_insert_into_foster_parent(treebuilder, node); - treebuilder->tree_handler->ref_node( - treebuilder->tree_handler->ctx, appended); - } else { - success = treebuilder->tree_handler->append_child( - treebuilder->tree_handler->ctx, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node, - node, &appended); - if (success != 0) { - /** \todo errors */ - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - node); - } - if (appended != node) { - /* Transfer the reference we have on node to appended. - * We're no longer interested in node */ - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - node); - treebuilder->tree_handler->ref_node( - treebuilder->tree_handler->ctx, - appended); - } - } - - /* It's a bit nasty having this code deal with textarea->form - * association, but it avoids having to duplicate the entire rest - * of this function for textarea processing */ - if (type == TEXTAREA && treebuilder->context.form_element != NULL) { - treebuilder->tree_handler->form_associate( - treebuilder->tree_handler->ctx, - treebuilder->context.form_element, - appended); - } - - /* Appended node's reference count is 2 */ + insert_element(treebuilder, &token->data.tag, true); params.content_model.model = rcdata ? HUBBUB_CONTENT_MODEL_RCDATA : HUBBUB_CONTENT_MODEL_CDATA; @@ -535,10 +477,6 @@ void parse_generic_rcdata(hubbub_treebuilder *treebuilder, treebuilder->context.collect.mode = treebuilder->context.mode; treebuilder->context.collect.type = type; - treebuilder->context.collect.node = appended; - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, appended); treebuilder->context.mode = GENERIC_RCDATA; } @@ -741,12 +679,15 @@ void clear_active_formatting_list_to_marker(hubbub_treebuilder *treebuilder) } /** - * Create element and insert it into the DOM, pushing it on the stack + * Create element and insert it into the DOM, + * potentially pushing it on the stack * * \param treebuilder The treebuilder instance * \param tag The element to insert + * \param push Whether to push the element onto the stack */ -void insert_element(hubbub_treebuilder *treebuilder, const hubbub_tag *tag) +void insert_element(hubbub_treebuilder *treebuilder, const hubbub_tag *tag, + bool push) { element_type type = current_node(treebuilder); int success; @@ -776,53 +717,25 @@ void insert_element(hubbub_treebuilder *treebuilder, const hubbub_tag *tag) treebuilder->tree_handler->ctx, node); } - if (!element_stack_push(treebuilder, - tag->ns, - element_type_from_name(treebuilder, &tag->name), - appended)) { - /** \todo errors */ - } -} - -/** - * Create element and insert it into the DOM, do not push it onto the stack - * - * \param treebuilder The treebuilder instance - * \param tag The element to insert - */ -void insert_element_no_push(hubbub_treebuilder *treebuilder, - const hubbub_tag *tag) -{ - element_type type = current_node(treebuilder); - int success; - void *node, *appended; - - success = treebuilder->tree_handler->create_element( - treebuilder->tree_handler->ctx, tag, &node); - if (success != 0) { - /** \todo errors */ - } - - if (treebuilder->context.in_table_foster && - (type == TABLE || type == TBODY || type == TFOOT || - type == THEAD || type == TR)) { - appended = aa_insert_into_foster_parent(treebuilder, node); - } else { - success = treebuilder->tree_handler->append_child( + type = element_type_from_name(treebuilder, &tag->name); + if (treebuilder->context.form_element != NULL && + is_form_associated(type)) { + /** \todo consider @form, or leave it to the client? */ + treebuilder->tree_handler->form_associate( treebuilder->tree_handler->ctx, + treebuilder->context.form_element, treebuilder->context.element_stack[ - treebuilder->context.current_node].node, - node, &appended); - if (success != 0) { + treebuilder->context.current_node].node); + } + + if (push) { + if (!element_stack_push(treebuilder, tag->ns, type, appended)) { /** \todo errors */ } - + } else { treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, node); + treebuilder->tree_handler->ctx, appended); } - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, appended); } /** @@ -1041,6 +954,19 @@ bool is_phrasing_element(element_type type) return (type > U); } +/** + * Determine if a node is form associated + * + * \param type Node type to consider + * \return True iff node is form associated + */ +bool is_form_associated(element_type type) +{ + return type == FIELDSET || type == LABEL || type == INPUT || + type == BUTTON || type == SELECT || type == TEXTAREA || + type == OUTPUT; +} + /** * Push an element onto the stack of open elements * @@ -1159,6 +1085,60 @@ bool element_stack_pop_until(hubbub_treebuilder *treebuilder, return true; } +/** + * Remove a node from the stack of open elements + * + * \param treebuilder The treebuilder instance + * \param index The index of the node to remove + * \param ns Pointer to location to receive namespace + * \param type Pointer to location to receive type + * \param removed Pointer to location to receive removed node + * \return true on success, false on memory exhaustion + */ +bool element_stack_remove(hubbub_treebuilder *treebuilder, uint32_t index, + hubbub_ns *ns, element_type *type, void **removed) +{ + element_context *stack = treebuilder->context.element_stack; + uint32_t n; + + assert(index <= treebuilder->context.current_node); + + /* Scan over subsequent entries in the stack, + * searching for them in the list of active formatting + * entries. If found, update the corresponding + * formatting list entry's stack index to match the + * new stack location */ + for (n = index + 1; n <= treebuilder->context.current_node; n++) { + if (is_formatting_element(stack[n].type) || + (is_scoping_element(stack[n].type) && + stack[n].type != HTML && + stack[n].type != TABLE)) { + formatting_list_entry *e; + + for (e = treebuilder->context.formatting_list_end; + e != NULL; e = e->prev) { + if (e->stack_index == n) + e->stack_index--; + } + } + } + + *ns = stack[index].ns; + *type = stack[index].type; + *removed = stack[index].node; + + /* Now, shuffle the stack up one, removing node in the process */ + if (index < treebuilder->context.current_node) { + memmove(&stack[index], &stack[index + 1], + (treebuilder->context.current_node - index) * + sizeof(element_context)); + } + + treebuilder->context.current_node--; + + return true; +} + /** * Find the stack index of the current table. */ -- cgit v1.2.3