summaryrefslogtreecommitdiff
path: root/src/treebuilder/treebuilder.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/treebuilder/treebuilder.c')
-rw-r--r--src/treebuilder/treebuilder.c838
1 files changed, 2 insertions, 836 deletions
diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c
index 68f82d8..2b256b4 100644
--- a/src/treebuilder/treebuilder.c
+++ b/src/treebuilder/treebuilder.c
@@ -8,11 +8,12 @@
#include <assert.h>
#include <string.h>
-#include "treebuilder/in_body.h"
+#include "treebuilder/modes.h"
#include "treebuilder/internal.h"
#include "treebuilder/treebuilder.h"
#include "utils/utils.h"
+
static const struct {
const char *name;
element_type type;
@@ -68,23 +69,6 @@ static void hubbub_treebuilder_buffer_handler(const uint8_t *data,
static void hubbub_treebuilder_token_handler(const hubbub_token *token,
void *pw);
-static bool handle_initial(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-static bool handle_before_html(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-static bool handle_before_head(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-static bool handle_in_head(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-static bool handle_in_head_noscript(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-static bool handle_after_head(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-static bool handle_generic_rcdata(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-static bool handle_script_collect_characters(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-
/**
* Create a hubbub treebuilder
@@ -371,824 +355,6 @@ void hubbub_treebuilder_token_handler(const hubbub_token *token,
}
}
-/**
- * Handle token in initial insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to handle
- * \return True to reprocess token, false otherwise
- */
-bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token)
-{
- bool reprocess = false;
-
- switch (token->type) {
- case HUBBUB_TOKEN_CHARACTER:
- if (process_characters_expect_whitespace(treebuilder, token,
- false)) {
- /** \todo parse error */
-
- treebuilder->tree_handler->set_quirks_mode(
- treebuilder->tree_handler->ctx,
- HUBBUB_QUIRKS_MODE_FULL);
- treebuilder->context.mode = BEFORE_HTML;
- reprocess = true;
- }
- break;
- case HUBBUB_TOKEN_COMMENT:
- process_comment_append(treebuilder, token,
- treebuilder->context.document);
- break;
- case HUBBUB_TOKEN_DOCTYPE:
- {
- int success;
- void *doctype, *appended;
-
- /** \todo parse error */
-
- /** \todo need public and system ids from tokeniser */
- success = treebuilder->tree_handler->create_doctype(
- treebuilder->tree_handler->ctx,
- &token->data.doctype.name,
- &token->data.doctype.public_id,
- &token->data.doctype.system_id, &doctype);
- if (success != 0) {
- /** \todo errors */
- }
-
- /* Append to Document node */
- success = treebuilder->tree_handler->append_child(
- treebuilder->tree_handler->ctx,
- treebuilder->context.document,
- doctype, &appended);
- if (success != 0) {
- /** \todo errors */
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- doctype);
- }
-
- /* \todo look up the doctype in a catalog */
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx, appended);
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx, doctype);
-
- treebuilder->context.mode = BEFORE_HTML;
- }
- break;
- case HUBBUB_TOKEN_START_TAG:
- case HUBBUB_TOKEN_END_TAG:
- case HUBBUB_TOKEN_EOF:
- /** \todo parse error */
- treebuilder->tree_handler->set_quirks_mode(
- treebuilder->tree_handler->ctx,
- HUBBUB_QUIRKS_MODE_FULL);
- reprocess = true;
- break;
- }
-
- if (reprocess) {
- treebuilder->context.mode = BEFORE_HTML;
- }
-
- return reprocess;
-}
-
-/**
- * Handle token in "before html" insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to handle
- * \return True to reprocess token, false otherwise
- */
-bool handle_before_html(hubbub_treebuilder *treebuilder,
- const hubbub_token *token)
-{
- bool reprocess = false;
- bool handled = false;
-
- switch (token->type) {
- case HUBBUB_TOKEN_DOCTYPE:
- /** \todo parse error */
- break;
- case HUBBUB_TOKEN_COMMENT:
- process_comment_append(treebuilder, token,
- treebuilder->context.document);
- break;
- case HUBBUB_TOKEN_CHARACTER:
- reprocess = process_characters_expect_whitespace(treebuilder,
- token, false);
- break;
- case HUBBUB_TOKEN_START_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == HTML) {
- handled = true;
- } else {
- reprocess = true;
- }
- }
- break;
- case HUBBUB_TOKEN_END_TAG:
- case HUBBUB_TOKEN_EOF:
- reprocess = true;
- break;
- }
-
-
- if (handled || reprocess) {
- int success;
- void *html, *appended;
-
- /* We can't use insert_element() here, as it assumes
- * that we're inserting into current_node. There is
- * no current_node to insert into at this point so
- * we get to do it manually. */
-
- if (reprocess) {
- /* Need to manufacture html element */
- hubbub_tag tag;
-
- /** \todo UTF-16 */
- tag.name.type = HUBBUB_STRING_PTR;
- tag.name.data.ptr = (const uint8_t *) "html";
- tag.name.len = SLEN("html");
-
- tag.n_attributes = 0;
- tag.attributes = NULL;
-
- success = treebuilder->tree_handler->create_element(
- treebuilder->tree_handler->ctx,
- &tag, &html);
- } else {
- success = treebuilder->tree_handler->create_element(
- treebuilder->tree_handler->ctx,
- &token->data.tag, &html);
- }
-
- if (success != 0) {
- /** \todo errors */
- }
-
- success = treebuilder->tree_handler->append_child(
- treebuilder->tree_handler->ctx,
- treebuilder->context.document,
- html, &appended);
- if (success != 0) {
- /** \todo errors */
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- html);
- }
-
- /* We can't use element_stack_push() here, as it
- * assumes that current_node is pointing at the index
- * before the one to insert at. For the first entry in
- * the stack, this does not hold so we must insert
- * manually. */
- treebuilder->context.element_stack[0].type = HTML;
- treebuilder->context.element_stack[0].node = html;
- treebuilder->context.current_node = 0;
-
- /** \todo cache selection algorithm */
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- appended);
-
- treebuilder->context.mode = BEFORE_HEAD;
- }
-
- return reprocess;
-}
-
-/**
- * Handle token in "before head" insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to handle
- * \return True to reprocess token, false otherwise
- */
-bool handle_before_head(hubbub_treebuilder *treebuilder,
- const hubbub_token *token)
-{
- bool reprocess = false;
- bool handled = false;
-
- switch (token->type) {
- case HUBBUB_TOKEN_CHARACTER:
- reprocess = process_characters_expect_whitespace(treebuilder,
- token, false);
- break;
- case HUBBUB_TOKEN_COMMENT:
- process_comment_append(treebuilder, token,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node);
- break;
- case HUBBUB_TOKEN_DOCTYPE:
- /** \todo parse error */
- break;
- case HUBBUB_TOKEN_START_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == HTML) {
- /* Process as if "in body" */
- process_tag_in_body(treebuilder, token);
- } else if (type == HEAD) {
- handled = true;
- } else {
- reprocess = true;
- }
- }
- break;
- case HUBBUB_TOKEN_END_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == HEAD || type == BR) {
- reprocess = true;
- } else {
- /** \todo parse error */
- }
- }
- break;
- case HUBBUB_TOKEN_EOF:
- reprocess = true;
- break;
- }
-
- if (handled || reprocess) {
- hubbub_tag tag;
-
- if (reprocess) {
- /* Manufacture head tag */
- tag.name.type = HUBBUB_STRING_PTR;
- tag.name.data.ptr = (const uint8_t *) "head";
- tag.name.len = SLEN("head");
-
- tag.n_attributes = 0;
- tag.attributes = NULL;
- } else {
- tag = token->data.tag;
- }
-
- insert_element(treebuilder, &tag);
-
- treebuilder->tree_handler->ref_node(
- treebuilder->tree_handler->ctx,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node);
-
- treebuilder->context.head_element =
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node;
-
- treebuilder->context.mode = IN_HEAD;
- }
-
- return reprocess;
-}
-
-/**
- * Handle token in "in head" insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to handle
- * \return True to reprocess token, false otherwise
- */
-bool handle_in_head(hubbub_treebuilder *treebuilder,
- const hubbub_token *token)
-{
- bool reprocess = false;
- bool handled = false;
-
- switch (token->type) {
- case HUBBUB_TOKEN_CHARACTER:
- reprocess = process_characters_expect_whitespace(treebuilder,
- token, true);
- break;
- case HUBBUB_TOKEN_COMMENT:
- process_comment_append(treebuilder, token,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node);
- break;
- case HUBBUB_TOKEN_DOCTYPE:
- /** \todo parse error */
- break;
- case HUBBUB_TOKEN_START_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == HTML) {
- /* Process as if "in body" */
- process_tag_in_body(treebuilder, token);
- } else if (type == BASE || type == COMMAND ||
- type == EVENT_SOURCE || type == LINK) {
- process_base_link_meta_in_head(treebuilder,
- token, type);
-
- /** \todo ack sc flag */
- } else if (type == META) {
- process_base_link_meta_in_head(treebuilder,
- token, type);
-
- /** \todo ack sc flag */
-
- /** \todo detect charset */
- } else if (type == TITLE) {
- parse_generic_rcdata(treebuilder, token, true);
- } else if (type == NOFRAMES || type == STYLE) {
- parse_generic_rcdata(treebuilder, token, false);
- } else if (type == NOSCRIPT) {
- /** \todo determine if scripting is enabled */
- if (false /*scripting_is_enabled*/) {
- parse_generic_rcdata(treebuilder, token, false);
- } else {
- insert_element(treebuilder, &token->data.tag);
- treebuilder->context.mode = IN_HEAD_NOSCRIPT;
- }
- } else if (type == SCRIPT) {
- process_script_in_head(treebuilder, token);
- } else if (type == HEAD) {
- /** \todo parse error */
- } else {
- reprocess = true;
- }
- }
- break;
- case HUBBUB_TOKEN_END_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == HEAD) {
- handled = true;
- } else if (type == BR) {
- reprocess = true;
- } /** \todo parse error */
- }
- break;
- case HUBBUB_TOKEN_EOF:
- reprocess = true;
- break;
- }
-
- if (handled || reprocess) {
- element_type otype;
- void *node;
-
- if (!element_stack_pop(treebuilder, &otype, &node)) {
- /** \todo errors */
- }
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- node);
-
- treebuilder->context.mode = AFTER_HEAD;
- }
-
- return reprocess;
-}
-
-/**
- * Handle tokens in "in head noscript" insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to process
- * \return True to reprocess the token, false otherwise
- */
-bool handle_in_head_noscript(hubbub_treebuilder *treebuilder,
- const hubbub_token *token)
-{
- bool reprocess = false;
- bool handled = false;
-
- switch (token->type) {
- case HUBBUB_TOKEN_CHARACTER:
- /* This should be equivalent to "in head" processing */
- reprocess = process_characters_expect_whitespace(treebuilder,
- token, true);
- break;
- case HUBBUB_TOKEN_COMMENT:
- /* This should be equivalent to "in head" processing */
- process_comment_append(treebuilder, token,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node);
- break;
- case HUBBUB_TOKEN_DOCTYPE:
- /** \todo parse error */
- break;
- case HUBBUB_TOKEN_START_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == HTML) {
- /* Process as "in body" */
- process_tag_in_body(treebuilder, token);
- } else if (type == NOSCRIPT) {
- handled = true;
- } else if (type == LINK) {
- /* This should be equivalent to "in head" processing */
- process_base_link_meta_in_head(treebuilder,
- token, type);
-
- /** \todo ack sc flag */
- } else if (type == META) {
- /* This should be equivalent to "in head" processing */
- process_base_link_meta_in_head(treebuilder,
- token, type);
-
- /** \todo ack sc flag */
-
- /** \todo detect charset */
- } else if (type == NOFRAMES) {
- /* This should be equivalent to "in head" processing */
- parse_generic_rcdata(treebuilder, token, true);
- } else if (type == STYLE) {
- /* This should be equivalent to "in head" processing */
- parse_generic_rcdata(treebuilder, token, false);
- } else if (type == HEAD || type == NOSCRIPT) {
- /** \todo parse error */
- } else {
- /** \todo parse error */
- reprocess = true;
- }
- }
- break;
- case HUBBUB_TOKEN_END_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == NOSCRIPT) {
- handled = true;
- } else if (type == BR) {
- /** \todo parse error */
- reprocess = true;
- } else {
- /** \todo parse error */
- }
- }
- break;
- case HUBBUB_TOKEN_EOF:
- /** \todo parse error */
- reprocess = true;
- break;
- }
-
- if (handled || reprocess) {
- element_type otype;
- void *node;
-
- if (!element_stack_pop(treebuilder, &otype, &node)) {
- /** \todo errors */
- }
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- node);
-
- treebuilder->context.mode = IN_HEAD;
- }
-
- return reprocess;
-}
-
-/**
- * Handle tokens in "after head" insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to process
- * \return True to reprocess the token, false otherwise
- */
-bool handle_after_head(hubbub_treebuilder *treebuilder,
- const hubbub_token *token)
-{
- bool reprocess = false;
- bool handled = false;
-
- switch (token->type) {
- case HUBBUB_TOKEN_CHARACTER:
- append_text(treebuilder, &token->data.character);
- break;
- case HUBBUB_TOKEN_COMMENT:
- process_comment_append(treebuilder, token,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node);
- break;
- case HUBBUB_TOKEN_DOCTYPE:
- /** \todo parse error */
- break;
- case HUBBUB_TOKEN_START_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == HTML) {
- /* Process as if "in body" */
- process_tag_in_body(treebuilder, token);
- } else if (type == BODY) {
- handled = true;
- } else if (type == FRAMESET) {
- insert_element(treebuilder, &token->data.tag);
- treebuilder->context.mode = IN_FRAMESET;
- } else if (type == BASE || type == LINK || type == META ||
- type == NOFRAMES || type == SCRIPT ||
- type == STYLE || type == TITLE) {
- element_type otype;
- void *node;
-
- /** \todo parse error */
-
- if (!element_stack_push(treebuilder,
- HEAD,
- treebuilder->context.head_element)) {
- /** \todo errors */
- }
-
-
- /* This should be identical to handling "in head" */
- if (type == BASE || type == LINK || type == META) {
- /** \todo ack sc flag */
-
- process_base_link_meta_in_head(treebuilder,
- token, type);
- } else if (type == SCRIPT) {
- process_script_in_head(treebuilder, token);
- } else if (type == STYLE || type == NOFRAMES) {
- parse_generic_rcdata(treebuilder, token, false);
- } else if (type == TITLE) {
- parse_generic_rcdata(treebuilder, token, true);
- }
-
- if (!element_stack_pop(treebuilder, &otype, &node)) {
- /** \todo errors */
- }
-
- /* No need to unref node as we never increased
- * its reference count when pushing it on the stack */
- } else if (type == HEAD) {
- /** \todo parse error */
- } else {
- reprocess = true;
- }
- }
- break;
- case HUBBUB_TOKEN_END_TAG:
- /** \parse error */
- break;
- case HUBBUB_TOKEN_EOF:
- reprocess = true;
- break;
- }
-
- if (handled || reprocess) {
- hubbub_tag tag;
-
- if (reprocess) {
- /* Manufacture body */
- tag.name.type = HUBBUB_STRING_PTR;
- tag.name.data.ptr = (const uint8_t *) "body";
- tag.name.len = SLEN("body");
-
- tag.n_attributes = 0;
- tag.attributes = NULL;
- } else {
- tag = token->data.tag;
- }
-
- insert_element(treebuilder, &tag);
-
- treebuilder->context.mode = IN_BODY;
- }
-
- return reprocess;
-}
-
-/**
- * Handle tokens in "generic rcdata" insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to process
- * \return True to reprocess the token, false otherwise
- */
-bool handle_generic_rcdata(hubbub_treebuilder *treebuilder,
- const hubbub_token *token)
-{
- bool reprocess = false;
- bool done = false;
-
- if (treebuilder->context.strip_leading_lr &&
- token->type != HUBBUB_TOKEN_CHARACTER) {
- /* Reset the LR stripping flag */
- treebuilder->context.strip_leading_lr = false;
- }
-
- switch (token->type) {
- case HUBBUB_TOKEN_CHARACTER:
- if (treebuilder->context.collect.string.len == 0) {
- treebuilder->context.collect.string.data.off =
- token->data.character.data.off;
- }
- treebuilder->context.collect.string.len +=
- token->data.character.len;
-
- if (treebuilder->context.strip_leading_lr) {
- const uint8_t *str = treebuilder->input_buffer +
- treebuilder->context.collect.string.data.off;
-
- /** \todo UTF-16 */
- if (*str == '\n') {
- treebuilder->context.collect.string.data.off++;
- treebuilder->context.collect.string.len--;
- }
-
- treebuilder->context.strip_leading_lr = false;
- }
- break;
- case HUBBUB_TOKEN_END_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type != treebuilder->context.collect.type) {
- /** \todo parse error */
- }
-
- done = true;
- }
- break;
- case HUBBUB_TOKEN_EOF:
- /** \todo parse error */
- done = reprocess = true;
- break;
- case HUBBUB_TOKEN_COMMENT:
- case HUBBUB_TOKEN_DOCTYPE:
- case HUBBUB_TOKEN_START_TAG:
- /* Should never happen */
- assert(0);
- break;
- }
-
- if (done) {
- int success;
- void *text, *appended;
-
- success = treebuilder->tree_handler->create_text(
- treebuilder->tree_handler->ctx,
- &treebuilder->context.collect.string,
- &text);
- if (success != 0) {
- /** \todo errors */
- }
-
- success = treebuilder->tree_handler->append_child(
- treebuilder->tree_handler->ctx,
- treebuilder->context.collect.node,
- text, &appended);
- if (success != 0) {
- /** \todo errors */
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- text);
- }
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx, appended);
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx, text);
-
- /* Clean up context */
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- treebuilder->context.collect.node);
- treebuilder->context.collect.node = NULL;
-
- /* Return to previous insertion mode */
- treebuilder->context.mode =
- treebuilder->context.collect.mode;
- }
-
- return reprocess;
-}
-
-/**
- * Handle tokens in "script collect characters" insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to process
- * \return True to reprocess the token, false otherwise
- */
-bool handle_script_collect_characters(hubbub_treebuilder *treebuilder,
- const hubbub_token *token)
-{
- bool reprocess = false;
- bool done = false;
-
- switch (token->type) {
- case HUBBUB_TOKEN_CHARACTER:
- if (treebuilder->context.collect.string.len == 0) {
- treebuilder->context.collect.string.data.off =
- token->data.character.data.off;
- }
- treebuilder->context.collect.string.len +=
- token->data.character.len;
- break;
- case HUBBUB_TOKEN_END_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type != treebuilder->context.collect.type) {
- /** \todo parse error */
- /** \todo Mark script as "already executed" */
- }
-
- done = true;
- }
- break;
- case HUBBUB_TOKEN_EOF:
- case HUBBUB_TOKEN_COMMENT:
- case HUBBUB_TOKEN_DOCTYPE:
- case HUBBUB_TOKEN_START_TAG:
- /** \todo parse error */
- /** \todo Mark script as "already executed" */
- done = reprocess = true;
- break;
- }
-
- if (done) {
- int success;
- void *text, *appended;
-
- success = treebuilder->tree_handler->create_text(
- treebuilder->tree_handler->ctx,
- &treebuilder->context.collect.string,
- &text);
- if (success != 0) {
- /** \todo errors */
- }
-
- /** \todo fragment case -- skip this lot entirely */
-
- success = treebuilder->tree_handler->append_child(
- treebuilder->tree_handler->ctx,
- treebuilder->context.collect.node,
- text, &appended);
- if (success != 0) {
- /** \todo errors */
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- text);
- }
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx, appended);
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx, text);
-
- /** \todo insertion point manipulation */
-
- /* Append script node to current node */
- success = treebuilder->tree_handler->append_child(
- treebuilder->tree_handler->ctx,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node,
- treebuilder->context.collect.node, &appended);
- if (success != 0) {
- /** \todo errors */
- }
-
- /** \todo restore insertion point */
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- appended);
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- treebuilder->context.collect.node);
- treebuilder->context.collect.node = NULL;
-
- /** \todo process any pending script */
-
- /* Return to previous insertion mode */
- treebuilder->context.mode =
- treebuilder->context.collect.mode;
- }
-
- return reprocess;
-}
-
/**
* Process a character token in cases where we expect only whitespace