/* * This file is part of Hubbub. * Licensed under the MIT License, * http://www.opensource.org/licenses/mit-license.php * Copyright 2008 John-Mark Bell */ #include #include #include "treebuilder/in_body.h" #include "treebuilder/internal.h" #include "treebuilder/treebuilder.h" #include "utils/utils.h" static const struct { const char *name; element_type type; } name_type_map[] = { { "ADDRESS", ADDRESS }, { "AREA", AREA }, { "BASE", BASE }, { "BASEFONT", BASEFONT }, { "BGSOUND", BGSOUND }, { "BLOCKQUOTE", BLOCKQUOTE }, { "BODY", BODY }, { "BR", BR }, { "CENTER", CENTER }, { "COL", COL }, { "COLGROUP", COLGROUP }, { "DD", DD }, { "DIR", DIR }, { "DIV", DIV }, { "DL", DL }, { "DT", DT }, { "EMBED", EMBED }, { "FIELDSET", FIELDSET }, { "FORM", FORM }, { "FRAME", FRAME }, { "FRAMESET", FRAMESET }, { "H1", H1 }, { "H2", H2 }, { "H3", H3 }, { "H4", H4 }, { "H5", H5 }, { "H6", H6 }, { "HEAD", HEAD }, { "HR", HR }, { "IFRAME", IFRAME }, { "IMAGE", IMAGE }, { "IMG", IMG }, { "INPUT", INPUT }, { "ISINDEX", ISINDEX }, { "LI", LI }, { "LINK", LINK }, { "LISTING", LISTING }, { "MENU", MENU }, { "META", META }, { "NOEMBED", NOEMBED }, { "NOFRAMES", NOFRAMES }, { "NOSCRIPT", NOSCRIPT }, { "OL", OL }, { "OPTGROUP", OPTGROUP }, { "OPTION", OPTION }, { "P", P }, { "PARAM", PARAM }, { "PLAINTEXT", PLAINTEXT }, { "PRE", PRE }, { "SCRIPT", SCRIPT }, { "SELECT", SELECT }, { "SPACER", SPACER }, { "STYLE", STYLE }, { "TBODY", TBODY }, { "TEXTAREA", TEXTAREA }, { "TFOOT", TFOOT }, { "THEAD", THEAD }, { "TITLE", TITLE }, { "TR", TR }, { "UL", UL }, { "WBR", WBR }, { "APPLET", APPLET }, { "BUTTON", BUTTON }, { "CAPTION", CAPTION }, { "HTML", HTML }, { "MARQUEE", MARQUEE }, { "OBJECT", OBJECT }, { "TABLE", TABLE }, { "TD", TD }, { "TH", TH }, { "A", A }, { "B", B }, { "BIG", BIG }, { "EM", EM }, { "FONT", FONT }, { "I", I }, { "NOBR", NOBR }, { "S", S }, { "SMALL", SMALL }, { "STRIKE", STRIKE }, { "STRONG", STRONG }, { "TT", TT }, { "U", U }, }; static void hubbub_treebuilder_buffer_handler(const uint8_t *data, size_t len, void *pw); static void hubbub_treebuilder_token_handler(const hubbub_token *token, void *pw); static bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token); static bool handle_before_html(hubbub_treebuilder *treebuilder, const hubbub_token *token); static bool handle_before_head(hubbub_treebuilder *treebuilder, const hubbub_token *token); static bool handle_in_head(hubbub_treebuilder *treebuilder, const hubbub_token *token); static bool handle_in_head_noscript(hubbub_treebuilder *treebuilder, const hubbub_token *token); static bool handle_after_head(hubbub_treebuilder *treebuilder, const hubbub_token *token); static bool handle_generic_rcdata(hubbub_treebuilder *treebuilder, const hubbub_token *token); static bool handle_script_collect_characters(hubbub_treebuilder *treebuilder, const hubbub_token *token); /** * Create a hubbub treebuilder * * \param tokeniser Underlying tokeniser instance * \param alloc Memory (de)allocation function * \param pw Pointer to client-specific private data * \return Pointer to treebuilder instance, or NULL on error. */ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser, hubbub_alloc alloc, void *pw) { hubbub_treebuilder *tb; hubbub_tokeniser_optparams tokparams; if (tokeniser == NULL || alloc == NULL) return NULL; tb = alloc(NULL, sizeof(hubbub_treebuilder), pw); if (tb == NULL) return NULL; tb->tokeniser = tokeniser; tb->input_buffer = NULL; tb->input_buffer_len = 0; tb->tree_handler = NULL; memset(&tb->context, 0, sizeof(hubbub_treebuilder_context)); tb->context.mode = INITIAL; tb->context.element_stack = alloc(NULL, ELEMENT_STACK_CHUNK * sizeof(element_context), pw); if (tb->context.element_stack == NULL) { alloc(tb, 0, pw); return NULL; } tb->context.stack_alloc = ELEMENT_STACK_CHUNK; /* We rely on HTML not being equal to zero to determine * if the first item in the stack is in use. Assert this here. */ assert(HTML != 0); tb->context.element_stack[0].type = 0; tb->context.collect.string.type = HUBBUB_STRING_OFF; tb->context.strip_leading_lr = false; tb->buffer_handler = NULL; tb->buffer_pw = NULL; tb->error_handler = NULL; tb->error_pw = NULL; tb->alloc = alloc; tb->alloc_pw = pw; tokparams.token_handler.handler = hubbub_treebuilder_token_handler; tokparams.token_handler.pw = tb; if (hubbub_tokeniser_setopt(tokeniser, HUBBUB_TOKENISER_TOKEN_HANDLER, &tokparams) != HUBBUB_OK) { alloc(tb->context.element_stack, 0, pw); alloc(tb, 0, pw); return NULL; } tokparams.buffer_handler.handler = hubbub_treebuilder_buffer_handler; tokparams.buffer_handler.pw = tb; if (hubbub_tokeniser_setopt(tokeniser, HUBBUB_TOKENISER_BUFFER_HANDLER, &tokparams) != HUBBUB_OK) { alloc(tb->context.element_stack, 0, pw); alloc(tb, 0, pw); return NULL; } return tb; } /** * Destroy a hubbub treebuilder * * \param treebuilder The treebuilder instance to destroy */ void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder) { formatting_list_entry *entry, *next; hubbub_tokeniser_optparams tokparams; if (treebuilder == NULL) return; tokparams.buffer_handler.handler = treebuilder->buffer_handler; tokparams.buffer_handler.pw = treebuilder->buffer_pw; hubbub_tokeniser_setopt(treebuilder->tokeniser, HUBBUB_TOKENISER_BUFFER_HANDLER, &tokparams); tokparams.token_handler.handler = NULL; tokparams.token_handler.pw = NULL; hubbub_tokeniser_setopt(treebuilder->tokeniser, HUBBUB_TOKENISER_TOKEN_HANDLER, &tokparams); /* Clean up context */ if (treebuilder->tree_handler != NULL) { if (treebuilder->context.head_element != NULL) { treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, treebuilder->context.head_element); } if (treebuilder->context.form_element != NULL) { treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, treebuilder->context.form_element); } if (treebuilder->context.document != NULL) { treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, treebuilder->context.document); } for (uint32_t n = treebuilder->context.current_node; n > 0; n--) { treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, treebuilder->context.element_stack[n].node); } if (treebuilder->context.element_stack[0].type == HTML) { treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, treebuilder->context.element_stack[0].node); } } treebuilder->alloc(treebuilder->context.element_stack, 0, treebuilder->alloc_pw); treebuilder->context.element_stack = NULL; for (entry = treebuilder->context.formatting_list; entry != NULL; entry = next) { next = entry->next; if (treebuilder->tree_handler != NULL) { treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, entry->details.node); } treebuilder->alloc(entry, 0, treebuilder->alloc_pw); } treebuilder->alloc(treebuilder, 0, treebuilder->alloc_pw); } /** * Configure a hubbub treebuilder * * \param treebuilder The treebuilder instance to configure * \param type The option type to configure * \param params Pointer to option-specific parameters * \return HUBBUB_OK on success, appropriate error otherwise. */ hubbub_error hubbub_treebuilder_setopt(hubbub_treebuilder *treebuilder, hubbub_treebuilder_opttype type, hubbub_treebuilder_optparams *params) { if (treebuilder == NULL || params == NULL) return HUBBUB_BADPARM; switch (type) { case HUBBUB_TREEBUILDER_BUFFER_HANDLER: treebuilder->buffer_handler = params->buffer_handler.handler; treebuilder->buffer_pw = params->buffer_handler.pw; treebuilder->buffer_handler(treebuilder->input_buffer, treebuilder->input_buffer_len, treebuilder->buffer_pw); break; case HUBBUB_TREEBUILDER_ERROR_HANDLER: treebuilder->error_handler = params->error_handler.handler; treebuilder->error_pw = params->error_handler.pw; break; case HUBBUB_TREEBUILDER_TREE_HANDLER: treebuilder->tree_handler = params->tree_handler; break; case HUBBUB_TREEBUILDER_DOCUMENT_NODE: treebuilder->context.document = params->document_node; break; } return HUBBUB_OK; } /** * Handle tokeniser buffer moving * * \param data New location of buffer * \param len Length of buffer in bytes * \param pw Pointer to treebuilder instance */ void hubbub_treebuilder_buffer_handler(const uint8_t *data, size_t len, void *pw) { hubbub_treebuilder *treebuilder = (hubbub_treebuilder *) pw; treebuilder->input_buffer = data; treebuilder->input_buffer_len = len; /* Inform client buffer handler, too (if there is one) */ if (treebuilder->buffer_handler != NULL) { treebuilder->buffer_handler(treebuilder->input_buffer, treebuilder->input_buffer_len, treebuilder->buffer_pw); } } /** * Handle tokeniser emitting a token * * \param token The emitted token * \param pw Pointer to treebuilder instance */ void hubbub_treebuilder_token_handler(const hubbub_token *token, void *pw) { hubbub_treebuilder *treebuilder = (hubbub_treebuilder *) pw; bool reprocess = true; /* Do nothing if we have no document node or there's no tree handler */ if (treebuilder->context.document == NULL || treebuilder->tree_handler == NULL) return; while (reprocess) { switch (treebuilder->context.mode) { case INITIAL: reprocess = handle_initial(treebuilder, token); break; case BEFORE_HTML: reprocess = handle_before_html(treebuilder, token); break; case BEFORE_HEAD: reprocess = handle_before_head(treebuilder, token); break; case IN_HEAD: reprocess = handle_in_head(treebuilder, token); break; case IN_HEAD_NOSCRIPT: reprocess = handle_in_head_noscript(treebuilder, token); break; case AFTER_HEAD: reprocess = handle_after_head(treebuilder, token); break; case IN_BODY: reprocess = handle_in_body(treebuilder, token); break; case IN_TABLE: case IN_CAPTION: case IN_COLUMN_GROUP: case IN_TABLE_BODY: case IN_ROW: case IN_CELL: case IN_SELECT: case IN_SELECT_IN_TABLE: case AFTER_BODY: case IN_FRAMESET: case AFTER_FRAMESET: case AFTER_AFTER_BODY: case AFTER_AFTER_FRAMESET: reprocess = false; break; case GENERIC_RCDATA: reprocess = handle_generic_rcdata(treebuilder, token); break; case SCRIPT_COLLECT_CHARACTERS: reprocess = handle_script_collect_characters( treebuilder, token); break; } } } /** * Handle token in initial insertion mode * * \param treebuilder The treebuilder instance * \param token The token to handle * \return True to reprocess token, false otherwise */ bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token) { bool reprocess = false; switch (token->type) { case HUBBUB_TOKEN_CHARACTER: if (process_characters_expect_whitespace(treebuilder, token, false)) { /** \todo parse error */ treebuilder->tree_handler->set_quirks_mode( treebuilder->tree_handler->ctx, HUBBUB_QUIRKS_MODE_FULL); reprocess = true; } break; case HUBBUB_TOKEN_COMMENT: process_comment_append(treebuilder, token, treebuilder->context.document); break; case HUBBUB_TOKEN_DOCTYPE: { int success; void *doctype, *appended; /** \todo need public and system ids from tokeniser */ success = treebuilder->tree_handler->create_doctype( treebuilder->tree_handler->ctx, &token->data.doctype.name, NULL, NULL, &doctype); if (success != 0) { /** \todo errors */ } /* Append to Document node */ success = treebuilder->tree_handler->append_child( treebuilder->tree_handler->ctx, treebuilder->context.document, doctype, &appended); if (success != 0) { /** \todo errors */ treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, doctype); } /** \todo doctype processing */ treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, appended); treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, doctype); treebuilder->context.mode = BEFORE_HTML; } break; case HUBBUB_TOKEN_START_TAG: case HUBBUB_TOKEN_END_TAG: case HUBBUB_TOKEN_EOF: /** \todo parse error */ treebuilder->tree_handler->set_quirks_mode( treebuilder->tree_handler->ctx, HUBBUB_QUIRKS_MODE_FULL); reprocess = true; break; } if (reprocess) { treebuilder->context.mode = BEFORE_HTML; } return reprocess; } /** * Handle token in "before html" insertion mode * * \param treebuilder The treebuilder instance * \param token The token to handle * \return True to reprocess token, false otherwise */ bool handle_before_html(hubbub_treebuilder *treebuilder, const hubbub_token *token) { bool reprocess = false; bool handled = false; switch (token->type) { case HUBBUB_TOKEN_DOCTYPE: /** \todo parse error */ break; case HUBBUB_TOKEN_COMMENT: process_comment_append(treebuilder, token, treebuilder->context.document); break; case HUBBUB_TOKEN_CHARACTER: reprocess = process_characters_expect_whitespace(treebuilder, token, false); break; case HUBBUB_TOKEN_START_TAG: { element_type type = element_type_from_name(treebuilder, &token->data.tag.name); if (type == HTML) { handled = true; } else { reprocess = true; } } break; case HUBBUB_TOKEN_END_TAG: case HUBBUB_TOKEN_EOF: reprocess = true; break; } if (handled || reprocess) { int success; void *html, *appended; /* We can't use insert_element() here, as it assumes * that we're inserting into current_node. There is * no current_node to insert into at this point so * we get to do it manually. */ if (reprocess) { /* Need to manufacture html element */ hubbub_tag tag; /** \todo UTF-16 */ tag.name.type = HUBBUB_STRING_PTR; tag.name.data.ptr = (const uint8_t *) "html"; tag.name.len = SLEN("html"); tag.n_attributes = 0; tag.attributes = NULL; success = treebuilder->tree_handler->create_element( treebuilder->tree_handler->ctx, &tag, &html); } else { success = treebuilder->tree_handler->create_element( treebuilder->tree_handler->ctx, &token->data.tag, &html); } if (success != 0) { /** \todo errors */ } success = treebuilder->tree_handler->append_child( treebuilder->tree_handler->ctx, treebuilder->context.document, html, &appended); if (success != 0) { /** \todo errors */ treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, html); } /* We can't use element_stack_push() here, as it * assumes that current_node is pointing at the index * before the one to insert at. For the first entry in * the stack, this does not hold so we must insert * manually. */ treebuilder->context.element_stack[0].type = HTML; treebuilder->context.element_stack[0].node = html; treebuilder->context.current_node = 0; /** \todo cache selection algorithm */ treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, appended); treebuilder->context.mode = BEFORE_HEAD; } return reprocess; } /** * Handle token in "before head" insertion mode * * \param treebuilder The treebuilder instance * \param token The token to handle * \return True to reprocess token, false otherwise */ bool handle_before_head(hubbub_treebuilder *treebuilder, const hubbub_token *token) { bool reprocess = false; bool handled = false; switch (token->type) { case HUBBUB_TOKEN_CHARACTER: reprocess = process_characters_expect_whitespace(treebuilder, token, false); break; case HUBBUB_TOKEN_COMMENT: process_comment_append(treebuilder, token, treebuilder->context.element_stack[ treebuilder->context.current_node].node); break; case HUBBUB_TOKEN_DOCTYPE: /** \todo parse error */ break; case HUBBUB_TOKEN_START_TAG: { element_type type = element_type_from_name(treebuilder, &token->data.tag.name); if (type == HTML) { /* Process as if "in body" */ process_tag_in_body(treebuilder, token); } else if (type == HEAD) { handled = true; } else { reprocess = true; } } break; case HUBBUB_TOKEN_END_TAG: { element_type type = element_type_from_name(treebuilder, &token->data.tag.name); if (type == HEAD || type == BODY || type == HTML || type == P || type == BR) { reprocess = true; } else { /** \todo parse error */ } } break; case HUBBUB_TOKEN_EOF: reprocess = true; break; } if (handled || reprocess) { hubbub_tag tag; if (reprocess) { /* Manufacture head tag */ tag.name.type = HUBBUB_STRING_PTR; tag.name.data.ptr = (const uint8_t *) "head"; tag.name.len = SLEN("head"); tag.n_attributes = 0; tag.attributes = NULL; } else { tag = token->data.tag; } insert_element(treebuilder, &tag); treebuilder->tree_handler->ref_node( treebuilder->tree_handler->ctx, treebuilder->context.element_stack[ treebuilder->context.current_node].node); treebuilder->context.head_element = treebuilder->context.element_stack[ treebuilder->context.current_node].node; treebuilder->context.mode = IN_HEAD; } return reprocess; } /** * Handle token in "in head" insertion mode * * \param treebuilder The treebuilder instance * \param token The token to handle * \return True to reprocess token, false otherwise */ bool handle_in_head(hubbub_treebuilder *treebuilder, const hubbub_token *token) { bool reprocess = false; bool handled = false; switch (token->type) { case HUBBUB_TOKEN_CHARACTER: reprocess = process_characters_expect_whitespace(treebuilder, token, true); break; case HUBBUB_TOKEN_COMMENT: process_comment_append(treebuilder, token, treebuilder->context.element_stack[ treebuilder->context.current_node].node); break; case HUBBUB_TOKEN_DOCTYPE: /** \todo parse error */ break; case HUBBUB_TOKEN_START_TAG: { element_type type = element_type_from_name(treebuilder, &token->data.tag.name); if (type == HTML) { /* Process as if "in body" */ process_tag_in_body(treebuilder, token); } else if (type == BASE || type == LINK || type == META) { process_base_link_meta_in_head(treebuilder, token, type); } else if (type == TITLE) { parse_generic_rcdata(treebuilder, token, true); } else if (type == NOSCRIPT) { /** \todo determine if scripting is enabled */ if (false /*scripting_is_enabled*/) { parse_generic_rcdata(treebuilder, token, false); } else { insert_element(treebuilder, &token->data.tag); treebuilder->context.mode = IN_HEAD_NOSCRIPT; } } else if (type == STYLE) { parse_generic_rcdata(treebuilder, token, false); } else if (type == SCRIPT) { process_script_in_head(treebuilder, token); } else if (type == HEAD) { /** \todo parse error */ } else { reprocess = true; } } break; case HUBBUB_TOKEN_END_TAG: { element_type type = element_type_from_name(treebuilder, &token->data.tag.name); if (type == HEAD) { handled = true; } else if (type == BODY || type == HTML || type == P || type == BR) { reprocess = true; } } break; case HUBBUB_TOKEN_EOF: reprocess = true; break; } if (handled || reprocess) { element_type otype; void *node; if (!element_stack_pop(treebuilder, &otype, &node)) { /** \todo errors */ } treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, node); treebuilder->context.mode = AFTER_HEAD; } return reprocess; } /** * Handle tokens in "in head noscript" insertion mode * * \param treebuilder The treebuilder instance * \param token The token to process * \return True to reprocess the token, false otherwise */ bool handle_in_head_noscript(hubbub_treebuilder *treebuilder, const hubbub_token *token) { bool reprocess = false; bool handled = false; switch (token->type) { case HUBBUB_TOKEN_CHARACTER: reprocess = process_characters_expect_whitespace(treebuilder, token, true); break; case HUBBUB_TOKEN_COMMENT: process_comment_append(treebuilder, token, treebuilder->context.element_stack[ treebuilder->context.current_node].node); break; case HUBBUB_TOKEN_DOCTYPE: /** \todo parse error */ break; case HUBBUB_TOKEN_START_TAG: { element_type type = element_type_from_name(treebuilder, &token->data.tag.name); if (type == HTML) { /* Process as "in body" */ process_tag_in_body(treebuilder, token); } else if (type == LINK || type == META) { process_base_link_meta_in_head(treebuilder, token, type); } else if (type == STYLE) { parse_generic_rcdata(treebuilder, token, false); } else if (type == HEAD || type == NOSCRIPT) { /** \todo parse error */ } else { /** \todo parse error */ reprocess = true; } } break; case HUBBUB_TOKEN_END_TAG: { element_type type = element_type_from_name(treebuilder, &token->data.tag.name); if (type == NOSCRIPT) { handled = true; } else if (type == P || type == BR) { /** \todo parse error */ reprocess = true; } else { /** \todo parse error */ } } break; case HUBBUB_TOKEN_EOF: /** \todo parse error */ reprocess = true; break; } if (handled || reprocess) { element_type otype; void *node; if (!element_stack_pop(treebuilder, &otype, &node)) { /** \todo errors */ } treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, node); treebuilder->context.mode = IN_HEAD; } return reprocess; } /** * Handle tokens in "after head" insertion mode * * \param treebuilder The treebuilder instance * \param token The token to process * \return True to reprocess the token, false otherwise */ bool handle_after_head(hubbub_treebuilder *treebuilder, const hubbub_token *token) { bool reprocess = false; bool handled = false; switch (token->type) { case HUBBUB_TOKEN_CHARACTER: reprocess = process_characters_expect_whitespace(treebuilder, token, true); break; case HUBBUB_TOKEN_COMMENT: process_comment_append(treebuilder, token, treebuilder->context.element_stack[ treebuilder->context.current_node].node); break; case HUBBUB_TOKEN_DOCTYPE: /** \todo parse error */ break; case HUBBUB_TOKEN_START_TAG: { element_type type = element_type_from_name(treebuilder, &token->data.tag.name); if (type == HTML) { /* Process as if "in body" */ process_tag_in_body(treebuilder, token); } else if (type == BODY) { handled = true; } else if (type == FRAMESET) { insert_element(treebuilder, &token->data.tag); treebuilder->context.mode = IN_FRAMESET; } else if (type == BASE || type == LINK || type == META || type == SCRIPT || type == STYLE || type == TITLE) { element_type otype; void *node; /** \todo parse error */ if (!element_stack_push(treebuilder, HEAD, treebuilder->context.head_element)) { /** \todo errors */ } if (type == BASE || type == LINK || type == META) { process_base_link_meta_in_head(treebuilder, token, type); } else if (type == SCRIPT) { process_script_in_head(treebuilder, token); } else if (type == STYLE) { parse_generic_rcdata(treebuilder, token, false); } else if (type == TITLE) { parse_generic_rcdata(treebuilder, token, true); } if (!element_stack_pop(treebuilder, &otype, &node)) { /** \todo errors */ } /* No need to unref node as we never increased * its reference count when pushing it on the stack */ } else { reprocess = true; } } break; case HUBBUB_TOKEN_END_TAG: case HUBBUB_TOKEN_EOF: reprocess = true; break; } if (handled || reprocess) { hubbub_tag tag; if (reprocess) { /* Manufacture body */ tag.name.type = HUBBUB_STRING_PTR; tag.name.data.ptr = (const uint8_t *) "body"; tag.name.len = SLEN("body"); tag.n_attributes = 0; tag.attributes = NULL; } else { tag = token->data.tag; } insert_element(treebuilder, &tag); treebuilder->context.mode = IN_BODY; } return reprocess; } /** * Handle tokens in "generic rcdata" insertion mode * * \param treebuilder The treebuilder instance * \param token The token to process * \return True to reprocess the token, false otherwise */ bool handle_generic_rcdata(hubbub_treebuilder *treebuilder, const hubbub_token *token) { bool reprocess = false; bool done = false; if (treebuilder->context.strip_leading_lr && token->type != HUBBUB_TOKEN_CHARACTER) { /* Reset the LR stripping flag */ treebuilder->context.strip_leading_lr = false; } switch (token->type) { case HUBBUB_TOKEN_CHARACTER: if (treebuilder->context.collect.string.len == 0) { treebuilder->context.collect.string.data.off = token->data.character.data.off; } treebuilder->context.collect.string.len += token->data.character.len; if (treebuilder->context.strip_leading_lr) { const uint8_t *str = treebuilder->input_buffer + treebuilder->context.collect.string.data.off; /** \todo UTF-16 */ if (*str == '\n') { treebuilder->context.collect.string.data.off++; treebuilder->context.collect.string.len--; } treebuilder->context.strip_leading_lr = false; } break; case HUBBUB_TOKEN_END_TAG: { element_type type = element_type_from_name(treebuilder, &token->data.tag.name); if (type != treebuilder->context.collect.type) { /** \todo parse error */ } done = true; } break; case HUBBUB_TOKEN_EOF: /** \todo parse error */ done = reprocess = true; break; case HUBBUB_TOKEN_COMMENT: case HUBBUB_TOKEN_DOCTYPE: case HUBBUB_TOKEN_START_TAG: /* Should never happen */ assert(0); break; } if (done) { int success; void *text, *appended; success = treebuilder->tree_handler->create_text( treebuilder->tree_handler->ctx, &treebuilder->context.collect.string, &text); if (success != 0) { /** \todo errors */ } success = treebuilder->tree_handler->append_child( treebuilder->tree_handler->ctx, treebuilder->context.collect.node, text, &appended); if (success != 0) { /** \todo errors */ treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, text); } treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, appended); treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, text); /* Clean up context */ treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, treebuilder->context.collect.node); treebuilder->context.collect.node = NULL; /* Return to previous insertion mode */ treebuilder->context.mode = treebuilder->context.collect.mode; } return reprocess; } /** * Handle tokens in "script collect characters" insertion mode * * \param treebuilder The treebuilder instance * \param token The token to process * \return True to reprocess the token, false otherwise */ bool handle_script_collect_characters(hubbub_treebuilder *treebuilder, const hubbub_token *token) { bool reprocess = false; bool done = false; switch (token->type) { case HUBBUB_TOKEN_CHARACTER: if (treebuilder->context.collect.string.len == 0) { treebuilder->context.collect.string.data.off = token->data.character.data.off; } treebuilder->context.collect.string.len += token->data.character.len; break; case HUBBUB_TOKEN_END_TAG: { element_type type = element_type_from_name(treebuilder, &token->data.tag.name); if (type != treebuilder->context.collect.type) { /** \todo parse error */ /** \todo Mark script as "already executed" */ } done = true; } break; case HUBBUB_TOKEN_EOF: case HUBBUB_TOKEN_COMMENT: case HUBBUB_TOKEN_DOCTYPE: case HUBBUB_TOKEN_START_TAG: /** \todo parse error */ /** \todo Mark script as "already executed" */ done = reprocess = true; break; } if (done) { int success; void *text, *appended; /** \todo fragment case -- skip this lot entirely */ success = treebuilder->tree_handler->create_text( treebuilder->tree_handler->ctx, &treebuilder->context.collect.string, &text); if (success != 0) { /** \todo errors */ } success = treebuilder->tree_handler->append_child( treebuilder->tree_handler->ctx, treebuilder->context.collect.node, text, &appended); if (success != 0) { /** \todo errors */ treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, text); } treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, appended); treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, text); /** \todo insertion point manipulation */ /* Append script node to current node */ success = treebuilder->tree_handler->append_child( treebuilder->tree_handler->ctx, treebuilder->context.element_stack[ treebuilder->context.current_node].node, treebuilder->context.collect.node, &appended); if (success != 0) { /** \todo errors */ } /** \todo restore insertion point */ treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, appended); treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, treebuilder->context.collect.node); treebuilder->context.collect.node = NULL; /** \todo process any pending script */ /* Return to previous insertion mode */ treebuilder->context.mode = treebuilder->context.collect.mode; } return reprocess; } /** * Process a character token in cases where we expect only whitespace * * \param treebuilder The treebuilder instance * \param token The character token * \param insert_into_current_node Whether to insert whitespace into * current node * \return True if the token needs reprocessing * (token data updated to skip any leading whitespace), * false if it contained only whitespace */ bool process_characters_expect_whitespace(hubbub_treebuilder *treebuilder, const hubbub_token *token, bool insert_into_current_node) { const uint8_t *data = treebuilder->input_buffer + token->data.character.data.off; size_t len = token->data.character.len; size_t c; /** \todo UTF-16 */ for (c = 0; c < len; c++) { if (data[c] != 0x09 && data[c] != 0x0A && data[c] != 0x0B && data[c] != 0x0C && data[c] != 0x20) break; } /* Non-whitespace characters in token, so reprocess */ if (c != len) { if (c > 0 && insert_into_current_node) { hubbub_string temp; temp.type = HUBBUB_STRING_OFF; temp.data.off = token->data.character.data.off; temp.len = len - c; append_text(treebuilder, &temp); } /* Update token data to strip leading whitespace */ ((hubbub_token *) token)->data.character.data.off += len - c; ((hubbub_token *) token)->data.character.len -= c; return true; } return false; } /** * Process a comment token, appending it to the given parent * * \param treebuilder The treebuilder instance * \param token The comment token * \param parent The node to append to */ void process_comment_append(hubbub_treebuilder *treebuilder, const hubbub_token *token, void *parent) { int success; void *comment, *appended; success = treebuilder->tree_handler->create_comment( treebuilder->tree_handler->ctx, &token->data.comment, &comment); if (success != 0) { /** \todo errors */ } /* Append to Document node */ success = treebuilder->tree_handler->append_child( treebuilder->tree_handler->ctx, parent, comment, &appended); if (success != 0) { /** \todo errors */ treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, comment); } treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, appended); treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, comment); } /** * Trigger parsing of generic (R)CDATA * * \param treebuilder The treebuilder instance * \param token The current token * \param rcdata True for RCDATA, false for CDATA */ void parse_generic_rcdata(hubbub_treebuilder *treebuilder, const hubbub_token *token, bool rcdata) { int success; void *node, *appended; element_type type; hubbub_tokeniser_optparams params; type = element_type_from_name(treebuilder, &token->data.tag.name); success = treebuilder->tree_handler->create_element( treebuilder->tree_handler->ctx, &token->data.tag, &node); if (success != 0) { /** \todo errors */ } /* It's a bit nasty having this code deal with textarea->form * association, but it avoids having to duplicate the entire rest * of this function for textarea processing */ if (type == TEXTAREA && treebuilder->context.form_element != NULL) { /** \todo associate textarea with form */ } success = treebuilder->tree_handler->append_child( treebuilder->tree_handler->ctx, treebuilder->context.element_stack[ treebuilder->context.current_node].node, node, &appended); if (success != 0) { /** \todo errors */ treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, node); } params.content_model.model = rcdata ? HUBBUB_CONTENT_MODEL_RCDATA : HUBBUB_CONTENT_MODEL_CDATA; hubbub_tokeniser_setopt(treebuilder->tokeniser, HUBBUB_TOKENISER_CONTENT_MODEL, ¶ms); treebuilder->context.collect.mode = treebuilder->context.mode; treebuilder->context.collect.type = type; treebuilder->context.collect.node = node; treebuilder->context.collect.string.data.off = 0; treebuilder->context.collect.string.len = 0; treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, appended); treebuilder->context.mode = GENERIC_RCDATA; } /** * Process a , , or start tag as if in "in head" * * \param treebuilder The treebuilder instance * \param token The token to process * \param type The type of element (BASE, LINK, or META) */ void process_base_link_meta_in_head(hubbub_treebuilder *treebuilder, const hubbub_token *token, element_type type) { insert_element_no_push(treebuilder, &token->data.tag); if (type == META) { /** \todo charset extraction */ } } /** * Process a