summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndrew Sidwell <andy@entai.co.uk>2008-06-23 20:22:25 +0000
committerAndrew Sidwell <andy@entai.co.uk>2008-06-23 20:22:25 +0000
commit6261a9cf2faada630dc1924fcf58305594a8028a (patch)
tree0596d019dc994a9f2e376a37d892d3d449f7985f /src
parent0cd11636c3db826f06dcf33ad53208675c5752dc (diff)
downloadlibhubbub-6261a9cf2faada630dc1924fcf58305594a8028a.tar.gz
libhubbub-6261a9cf2faada630dc1924fcf58305594a8028a.tar.bz2
Put each insertion mode into its own C file, so that treebuilder.c doesn't get extremely long.
svn path=/trunk/hubbub/; revision=4429
Diffstat (limited to 'src')
-rw-r--r--src/treebuilder/Makefile5
-rw-r--r--src/treebuilder/after_head.c127
-rw-r--r--src/treebuilder/before_head.c106
-rw-r--r--src/treebuilder/before_html.c126
-rw-r--r--src/treebuilder/generic_rcdata.c123
-rw-r--r--src/treebuilder/in_head.c119
-rw-r--r--src/treebuilder/in_head_noscript.c121
-rw-r--r--src/treebuilder/initial.c101
-rw-r--r--src/treebuilder/internal.h27
-rw-r--r--src/treebuilder/modes.h66
-rw-r--r--src/treebuilder/script_collect.c123
-rw-r--r--src/treebuilder/treebuilder.c838
12 files changed, 1018 insertions, 864 deletions
diff --git a/src/treebuilder/Makefile b/src/treebuilder/Makefile
index 3c6355c..f73f774 100644
--- a/src/treebuilder/Makefile
+++ b/src/treebuilder/Makefile
@@ -32,7 +32,10 @@ dirstack_$(sp) := $(d)
d := $(DIR)
# Sources
-SRCS_$(d) := in_body.c treebuilder.c
+SRCS_$(d) := treebuilder.c \
+ initial.c before_html.c before_head.c in_head.c \
+ in_head_noscript.c after_head.c in_body.c \
+ generic_rcdata.c script_collect.c
# Append to sources for component
SOURCES += $(addprefix $(d), $(SRCS_$(d)))
diff --git a/src/treebuilder/after_head.c b/src/treebuilder/after_head.c
new file mode 100644
index 0000000..b460115
--- /dev/null
+++ b/src/treebuilder/after_head.c
@@ -0,0 +1,127 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include "treebuilder/modes.h"
+#include "treebuilder/internal.h"
+#include "treebuilder/treebuilder.h"
+#include "utils/utils.h"
+
+
+/**
+ * Handle tokens in "after head" insertion mode
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ * \return True to reprocess the token, false otherwise
+ */
+bool handle_after_head(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ bool reprocess = false;
+ bool handled = false;
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_CHARACTER:
+ append_text(treebuilder, &token->data.character);
+ break;
+ case HUBBUB_TOKEN_COMMENT:
+ process_comment_append(treebuilder, token,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+ break;
+ case HUBBUB_TOKEN_DOCTYPE:
+ /** \todo parse error */
+ break;
+ case HUBBUB_TOKEN_START_TAG:
+ {
+ element_type type = element_type_from_name(treebuilder,
+ &token->data.tag.name);
+
+ if (type == HTML) {
+ /* Process as if "in body" */
+ process_tag_in_body(treebuilder, token);
+ } else if (type == BODY) {
+ handled = true;
+ } else if (type == FRAMESET) {
+ insert_element(treebuilder, &token->data.tag);
+ treebuilder->context.mode = IN_FRAMESET;
+ } else if (type == BASE || type == LINK || type == META ||
+ type == NOFRAMES || type == SCRIPT ||
+ type == STYLE || type == TITLE) {
+ element_type otype;
+ void *node;
+
+ /** \todo parse error */
+
+ if (!element_stack_push(treebuilder,
+ HEAD,
+ treebuilder->context.head_element)) {
+ /** \todo errors */
+ }
+
+
+ /* This should be identical to handling "in head" */
+ if (type == BASE || type == LINK || type == META) {
+ /** \todo ack sc flag */
+
+ process_base_link_meta_in_head(treebuilder,
+ token, type);
+ } else if (type == SCRIPT) {
+ process_script_in_head(treebuilder, token);
+ } else if (type == STYLE || type == NOFRAMES) {
+ parse_generic_rcdata(treebuilder, token, false);
+ } else if (type == TITLE) {
+ parse_generic_rcdata(treebuilder, token, true);
+ }
+
+ if (!element_stack_pop(treebuilder, &otype, &node)) {
+ /** \todo errors */
+ }
+
+ /* No need to unref node as we never increased
+ * its reference count when pushing it on the stack */
+ } else if (type == HEAD) {
+ /** \todo parse error */
+ } else {
+ reprocess = true;
+ }
+ }
+ break;
+ case HUBBUB_TOKEN_END_TAG:
+ /** \parse error */
+ break;
+ case HUBBUB_TOKEN_EOF:
+ reprocess = true;
+ break;
+ }
+
+ if (handled || reprocess) {
+ hubbub_tag tag;
+
+ if (reprocess) {
+ /* Manufacture body */
+ tag.name.type = HUBBUB_STRING_PTR;
+ tag.name.data.ptr = (const uint8_t *) "body";
+ tag.name.len = SLEN("body");
+
+ tag.n_attributes = 0;
+ tag.attributes = NULL;
+ } else {
+ tag = token->data.tag;
+ }
+
+ insert_element(treebuilder, &tag);
+
+ treebuilder->context.mode = IN_BODY;
+ }
+
+ return reprocess;
+}
+
diff --git a/src/treebuilder/before_head.c b/src/treebuilder/before_head.c
new file mode 100644
index 0000000..1534ff8
--- /dev/null
+++ b/src/treebuilder/before_head.c
@@ -0,0 +1,106 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include "treebuilder/modes.h"
+#include "treebuilder/internal.h"
+#include "treebuilder/treebuilder.h"
+#include "utils/utils.h"
+
+
+/**
+ * Handle token in "before head" insertion mode
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to handle
+ * \return True to reprocess token, false otherwise
+ */
+bool handle_before_head(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ bool reprocess = false;
+ bool handled = false;
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_CHARACTER:
+ reprocess = process_characters_expect_whitespace(treebuilder,
+ token, false);
+ break;
+ case HUBBUB_TOKEN_COMMENT:
+ process_comment_append(treebuilder, token,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+ break;
+ case HUBBUB_TOKEN_DOCTYPE:
+ /** \todo parse error */
+ break;
+ case HUBBUB_TOKEN_START_TAG:
+ {
+ element_type type = element_type_from_name(treebuilder,
+ &token->data.tag.name);
+
+ if (type == HTML) {
+ /* Process as if "in body" */
+ process_tag_in_body(treebuilder, token);
+ } else if (type == HEAD) {
+ handled = true;
+ } else {
+ reprocess = true;
+ }
+ }
+ break;
+ case HUBBUB_TOKEN_END_TAG:
+ {
+ element_type type = element_type_from_name(treebuilder,
+ &token->data.tag.name);
+
+ if (type == HEAD || type == BR) {
+ reprocess = true;
+ } else {
+ /** \todo parse error */
+ }
+ }
+ break;
+ case HUBBUB_TOKEN_EOF:
+ reprocess = true;
+ break;
+ }
+
+ if (handled || reprocess) {
+ hubbub_tag tag;
+
+ if (reprocess) {
+ /* Manufacture head tag */
+ tag.name.type = HUBBUB_STRING_PTR;
+ tag.name.data.ptr = (const uint8_t *) "head";
+ tag.name.len = SLEN("head");
+
+ tag.n_attributes = 0;
+ tag.attributes = NULL;
+ } else {
+ tag = token->data.tag;
+ }
+
+ insert_element(treebuilder, &tag);
+
+ treebuilder->tree_handler->ref_node(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+
+ treebuilder->context.head_element =
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node;
+
+ treebuilder->context.mode = IN_HEAD;
+ }
+
+ return reprocess;
+}
+
diff --git a/src/treebuilder/before_html.c b/src/treebuilder/before_html.c
new file mode 100644
index 0000000..f8b3231
--- /dev/null
+++ b/src/treebuilder/before_html.c
@@ -0,0 +1,126 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include "treebuilder/modes.h"
+#include "treebuilder/internal.h"
+#include "treebuilder/treebuilder.h"
+#include "utils/utils.h"
+
+
+/**
+ * Handle token in "before html" insertion mode
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to handle
+ * \return True to reprocess token, false otherwise
+ */
+bool handle_before_html(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ bool reprocess = false;
+ bool handled = false;
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_DOCTYPE:
+ /** \todo parse error */
+ break;
+ case HUBBUB_TOKEN_COMMENT:
+ process_comment_append(treebuilder, token,
+ treebuilder->context.document);
+ break;
+ case HUBBUB_TOKEN_CHARACTER:
+ reprocess = process_characters_expect_whitespace(treebuilder,
+ token, false);
+ break;
+ case HUBBUB_TOKEN_START_TAG:
+ {
+ element_type type = element_type_from_name(treebuilder,
+ &token->data.tag.name);
+
+ if (type == HTML) {
+ handled = true;
+ } else {
+ reprocess = true;
+ }
+ }
+ break;
+ case HUBBUB_TOKEN_END_TAG:
+ case HUBBUB_TOKEN_EOF:
+ reprocess = true;
+ break;
+ }
+
+
+ if (handled || reprocess) {
+ int success;
+ void *html, *appended;
+
+ /* We can't use insert_element() here, as it assumes
+ * that we're inserting into current_node. There is
+ * no current_node to insert into at this point so
+ * we get to do it manually. */
+
+ if (reprocess) {
+ /* Need to manufacture html element */
+ hubbub_tag tag;
+
+ /** \todo UTF-16 */
+ tag.name.type = HUBBUB_STRING_PTR;
+ tag.name.data.ptr = (const uint8_t *) "html";
+ tag.name.len = SLEN("html");
+
+ tag.n_attributes = 0;
+ tag.attributes = NULL;
+
+ success = treebuilder->tree_handler->create_element(
+ treebuilder->tree_handler->ctx,
+ &tag, &html);
+ } else {
+ success = treebuilder->tree_handler->create_element(
+ treebuilder->tree_handler->ctx,
+ &token->data.tag, &html);
+ }
+
+ if (success != 0) {
+ /** \todo errors */
+ }
+
+ success = treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.document,
+ html, &appended);
+ if (success != 0) {
+ /** \todo errors */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ html);
+ }
+
+ /* We can't use element_stack_push() here, as it
+ * assumes that current_node is pointing at the index
+ * before the one to insert at. For the first entry in
+ * the stack, this does not hold so we must insert
+ * manually. */
+ treebuilder->context.element_stack[0].type = HTML;
+ treebuilder->context.element_stack[0].node = html;
+ treebuilder->context.current_node = 0;
+
+ /** \todo cache selection algorithm */
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ appended);
+
+ treebuilder->context.mode = BEFORE_HEAD;
+ }
+
+ return reprocess;
+}
+
diff --git a/src/treebuilder/generic_rcdata.c b/src/treebuilder/generic_rcdata.c
new file mode 100644
index 0000000..07173cf
--- /dev/null
+++ b/src/treebuilder/generic_rcdata.c
@@ -0,0 +1,123 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include "treebuilder/modes.h"
+#include "treebuilder/internal.h"
+#include "treebuilder/treebuilder.h"
+#include "utils/utils.h"
+
+
+/**
+ * Handle tokens in "generic rcdata" insertion mode
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ * \return True to reprocess the token, false otherwise
+ */
+bool handle_generic_rcdata(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ bool reprocess = false;
+ bool done = false;
+
+ if (treebuilder->context.strip_leading_lr &&
+ token->type != HUBBUB_TOKEN_CHARACTER) {
+ /* Reset the LR stripping flag */
+ treebuilder->context.strip_leading_lr = false;
+ }
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_CHARACTER:
+ if (treebuilder->context.collect.string.len == 0) {
+ treebuilder->context.collect.string.data.off =
+ token->data.character.data.off;
+ }
+ treebuilder->context.collect.string.len +=
+ token->data.character.len;
+
+ if (treebuilder->context.strip_leading_lr) {
+ const uint8_t *str = treebuilder->input_buffer +
+ treebuilder->context.collect.string.data.off;
+
+ /** \todo UTF-16 */
+ if (*str == '\n') {
+ treebuilder->context.collect.string.data.off++;
+ treebuilder->context.collect.string.len--;
+ }
+
+ treebuilder->context.strip_leading_lr = false;
+ }
+ break;
+ case HUBBUB_TOKEN_END_TAG:
+ {
+ element_type type = element_type_from_name(treebuilder,
+ &token->data.tag.name);
+
+ if (type != treebuilder->context.collect.type) {
+ /** \todo parse error */
+ }
+
+ done = true;
+ }
+ break;
+ case HUBBUB_TOKEN_EOF:
+ /** \todo parse error */
+ done = reprocess = true;
+ break;
+ case HUBBUB_TOKEN_COMMENT:
+ case HUBBUB_TOKEN_DOCTYPE:
+ case HUBBUB_TOKEN_START_TAG:
+ /* Should never happen */
+ assert(0);
+ break;
+ }
+
+ if (done) {
+ int success;
+ void *text, *appended;
+
+ success = treebuilder->tree_handler->create_text(
+ treebuilder->tree_handler->ctx,
+ &treebuilder->context.collect.string,
+ &text);
+ if (success != 0) {
+ /** \todo errors */
+ }
+
+ success = treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.collect.node,
+ text, &appended);
+ if (success != 0) {
+ /** \todo errors */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ text);
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, appended);
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, text);
+
+ /* Clean up context */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.collect.node);
+ treebuilder->context.collect.node = NULL;
+
+ /* Return to previous insertion mode */
+ treebuilder->context.mode =
+ treebuilder->context.collect.mode;
+ }
+
+ return reprocess;
+}
+
diff --git a/src/treebuilder/in_head.c b/src/treebuilder/in_head.c
new file mode 100644
index 0000000..96ff87d
--- /dev/null
+++ b/src/treebuilder/in_head.c
@@ -0,0 +1,119 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include "treebuilder/modes.h"
+#include "treebuilder/internal.h"
+#include "treebuilder/treebuilder.h"
+#include "utils/utils.h"
+
+
+/**
+ * Handle token in "in head" insertion mode
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to handle
+ * \return True to reprocess token, false otherwise
+ */
+bool handle_in_head(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ bool reprocess = false;
+ bool handled = false;
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_CHARACTER:
+ reprocess = process_characters_expect_whitespace(treebuilder,
+ token, true);
+ break;
+ case HUBBUB_TOKEN_COMMENT:
+ process_comment_append(treebuilder, token,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+ break;
+ case HUBBUB_TOKEN_DOCTYPE:
+ /** \todo parse error */
+ break;
+ case HUBBUB_TOKEN_START_TAG:
+ {
+ element_type type = element_type_from_name(treebuilder,
+ &token->data.tag.name);
+
+ if (type == HTML) {
+ /* Process as if "in body" */
+ process_tag_in_body(treebuilder, token);
+ } else if (type == BASE || type == COMMAND ||
+ type == EVENT_SOURCE || type == LINK) {
+ process_base_link_meta_in_head(treebuilder,
+ token, type);
+
+ /** \todo ack sc flag */
+ } else if (type == META) {
+ process_base_link_meta_in_head(treebuilder,
+ token, type);
+
+ /** \todo ack sc flag */
+
+ /** \todo detect charset */
+ } else if (type == TITLE) {
+ parse_generic_rcdata(treebuilder, token, true);
+ } else if (type == NOFRAMES || type == STYLE) {
+ parse_generic_rcdata(treebuilder, token, false);
+ } else if (type == NOSCRIPT) {
+ /** \todo determine if scripting is enabled */
+ if (false /*scripting_is_enabled*/) {
+ parse_generic_rcdata(treebuilder, token, false);
+ } else {
+ insert_element(treebuilder, &token->data.tag);
+ treebuilder->context.mode = IN_HEAD_NOSCRIPT;
+ }
+ } else if (type == SCRIPT) {
+ process_script_in_head(treebuilder, token);
+ } else if (type == HEAD) {
+ /** \todo parse error */
+ } else {
+ reprocess = true;
+ }
+ }
+ break;
+ case HUBBUB_TOKEN_END_TAG:
+ {
+ element_type type = element_type_from_name(treebuilder,
+ &token->data.tag.name);
+
+ if (type == HEAD) {
+ handled = true;
+ } else if (type == BR) {
+ reprocess = true;
+ } /** \todo parse error */
+ }
+ break;
+ case HUBBUB_TOKEN_EOF:
+ reprocess = true;
+ break;
+ }
+
+ if (handled || reprocess) {
+ element_type otype;
+ void *node;
+
+ if (!element_stack_pop(treebuilder, &otype, &node)) {
+ /** \todo errors */
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ node);
+
+ treebuilder->context.mode = AFTER_HEAD;
+ }
+
+ return reprocess;
+}
+
diff --git a/src/treebuilder/in_head_noscript.c b/src/treebuilder/in_head_noscript.c
new file mode 100644
index 0000000..ca01681
--- /dev/null
+++ b/src/treebuilder/in_head_noscript.c
@@ -0,0 +1,121 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include "treebuilder/modes.h"
+#include "treebuilder/internal.h"
+#include "treebuilder/treebuilder.h"
+#include "utils/utils.h"
+
+
+/**
+ * Handle tokens in "in head noscript" insertion mode
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ * \return True to reprocess the token, false otherwise
+ */
+bool handle_in_head_noscript(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ bool reprocess = false;
+ bool handled = false;
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_CHARACTER:
+ /* This should be equivalent to "in head" processing */
+ reprocess = process_characters_expect_whitespace(treebuilder,
+ token, true);
+ break;
+ case HUBBUB_TOKEN_COMMENT:
+ /* This should be equivalent to "in head" processing */
+ process_comment_append(treebuilder, token,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+ break;
+ case HUBBUB_TOKEN_DOCTYPE:
+ /** \todo parse error */
+ break;
+ case HUBBUB_TOKEN_START_TAG:
+ {
+ element_type type = element_type_from_name(treebuilder,
+ &token->data.tag.name);
+
+ if (type == HTML) {
+ /* Process as "in body" */
+ process_tag_in_body(treebuilder, token);
+ } else if (type == NOSCRIPT) {
+ handled = true;
+ } else if (type == LINK) {
+ /* This should be equivalent to "in head" processing */
+ process_base_link_meta_in_head(treebuilder,
+ token, type);
+
+ /** \todo ack sc flag */
+ } else if (type == META) {
+ /* This should be equivalent to "in head" processing */
+ process_base_link_meta_in_head(treebuilder,
+ token, type);
+
+ /** \todo ack sc flag */
+
+ /** \todo detect charset */
+ } else if (type == NOFRAMES) {
+ /* This should be equivalent to "in head" processing */
+ parse_generic_rcdata(treebuilder, token, true);
+ } else if (type == STYLE) {
+ /* This should be equivalent to "in head" processing */
+ parse_generic_rcdata(treebuilder, token, false);
+ } else if (type == HEAD || type == NOSCRIPT) {
+ /** \todo parse error */
+ } else {
+ /** \todo parse error */
+ reprocess = true;
+ }
+ }
+ break;
+ case HUBBUB_TOKEN_END_TAG:
+ {
+ element_type type = element_type_from_name(treebuilder,
+ &token->data.tag.name);
+
+ if (type == NOSCRIPT) {
+ handled = true;
+ } else if (type == BR) {
+ /** \todo parse error */
+ reprocess = true;
+ } else {
+ /** \todo parse error */
+ }
+ }
+ break;
+ case HUBBUB_TOKEN_EOF:
+ /** \todo parse error */
+ reprocess = true;
+ break;
+ }
+
+ if (handled || reprocess) {
+ element_type otype;
+ void *node;
+
+ if (!element_stack_pop(treebuilder, &otype, &node)) {
+ /** \todo errors */
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ node);
+
+ treebuilder->context.mode = IN_HEAD;
+ }
+
+ return reprocess;
+}
+
diff --git a/src/treebuilder/initial.c b/src/treebuilder/initial.c
new file mode 100644
index 0000000..30a380b
--- /dev/null
+++ b/src/treebuilder/initial.c
@@ -0,0 +1,101 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include "treebuilder/modes.h"
+#include "treebuilder/internal.h"
+#include "treebuilder/treebuilder.h"
+#include "utils/utils.h"
+
+
+/**
+ * Handle token in initial insertion mode
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to handle
+ * \return True to reprocess token, false otherwise
+ */
+bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token)
+{
+ bool reprocess = false;
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_CHARACTER:
+ if (process_characters_expect_whitespace(treebuilder, token,
+ false)) {
+ /** \todo parse error */
+
+ treebuilder->tree_handler->set_quirks_mode(
+ treebuilder->tree_handler->ctx,
+ HUBBUB_QUIRKS_MODE_FULL);
+ treebuilder->context.mode = BEFORE_HTML;
+ reprocess = true;
+ }
+ break;
+ case HUBBUB_TOKEN_COMMENT:
+ process_comment_append(treebuilder, token,
+ treebuilder->context.document);
+ break;
+ case HUBBUB_TOKEN_DOCTYPE:
+ {
+ int success;
+ void *doctype, *appended;
+
+ /** \todo parse error */
+
+ /** \todo need public and system ids from tokeniser */
+ success = treebuilder->tree_handler->create_doctype(
+ treebuilder->tree_handler->ctx,
+ &token->data.doctype.name,
+ &token->data.doctype.public_id,
+ &token->data.doctype.system_id, &doctype);
+ if (success != 0) {
+ /** \todo errors */
+ }
+
+ /* Append to Document node */
+ success = treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.document,
+ doctype, &appended);
+ if (success != 0) {
+ /** \todo errors */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ doctype);
+ }
+
+ /* \todo look up the doctype in a catalog */
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, appended);
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, doctype);
+
+ treebuilder->context.mode = BEFORE_HTML;
+ }
+ break;
+ case HUBBUB_TOKEN_START_TAG:
+ case HUBBUB_TOKEN_END_TAG:
+ case HUBBUB_TOKEN_EOF:
+ /** \todo parse error */
+ treebuilder->tree_handler->set_quirks_mode(
+ treebuilder->tree_handler->ctx,
+ HUBBUB_QUIRKS_MODE_FULL);
+ reprocess = true;
+ break;
+ }
+
+ if (reprocess) {
+ treebuilder->context.mode = BEFORE_HTML;
+ }
+
+ return reprocess;
+}
+
diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h
index 392f606..a4eed84 100644
--- a/src/treebuilder/internal.h
+++ b/src/treebuilder/internal.h
@@ -12,33 +12,6 @@
typedef enum
{
- INITIAL,
- BEFORE_HTML,
- BEFORE_HEAD,
- IN_HEAD,
- IN_HEAD_NOSCRIPT,
- AFTER_HEAD,
- IN_BODY,
- IN_TABLE,
- IN_CAPTION,
- IN_COLUMN_GROUP,
- IN_TABLE_BODY,
- IN_ROW,
- IN_CELL,
- IN_SELECT,
- IN_SELECT_IN_TABLE,
- IN_FOREIGN_CONTENT,
- AFTER_BODY,
- IN_FRAMESET,
- AFTER_FRAMESET,
- AFTER_AFTER_BODY,
- AFTER_AFTER_FRAMESET,
- GENERIC_RCDATA,
- SCRIPT_COLLECT_CHARACTERS,
-} insertion_mode;
-
-typedef enum
-{
/* Special */
ADDRESS, AREA, ARTICLE, ASIDE, BASE, BASEFONT, BGSOUND, BLOCKQUOTE,
BODY, BR, CENTER, COL, COLGROUP, COMMAND, DATAGRID, DD, DETAILS,
diff --git a/src/treebuilder/modes.h b/src/treebuilder/modes.h
new file mode 100644
index 0000000..ab9a229
--- /dev/null
+++ b/src/treebuilder/modes.h
@@ -0,0 +1,66 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_treebuilder_modes_h_
+#define hubbub_treebuilder_modes_h_
+
+#include "treebuilder/treebuilder.h"
+
+/** The various treebuilder insertion modes */
+typedef enum
+{
+ INITIAL,
+ BEFORE_HTML,
+ BEFORE_HEAD,
+ IN_HEAD,
+ IN_HEAD_NOSCRIPT,
+ AFTER_HEAD,
+ IN_BODY,
+ IN_TABLE,
+ IN_CAPTION,
+ IN_COLUMN_GROUP,
+ IN_TABLE_BODY,
+ IN_ROW,
+ IN_CELL,
+ IN_SELECT,
+ IN_SELECT_IN_TABLE,
+ IN_FOREIGN_CONTENT,
+ AFTER_BODY,
+ IN_FRAMESET,
+ AFTER_FRAMESET,
+ AFTER_AFTER_BODY,
+
+ AFTER_AFTER_FRAMESET,
+ GENERIC_RCDATA,
+ SCRIPT_COLLECT_CHARACTERS,
+} insertion_mode;
+
+
+
+bool handle_initial(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+bool handle_before_html(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+bool handle_before_head(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+bool handle_in_head(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+bool handle_in_head_noscript(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+bool handle_after_head(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+bool handle_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+bool handle_generic_rcdata(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+bool handle_script_collect_characters(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+
+bool process_tag_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+
+#endif
diff --git a/src/treebuilder/script_collect.c b/src/treebuilder/script_collect.c
new file mode 100644
index 0000000..7f61f95
--- /dev/null
+++ b/src/treebuilder/script_collect.c
@@ -0,0 +1,123 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include "treebuilder/modes.h"
+#include "treebuilder/internal.h"
+#include "treebuilder/treebuilder.h"
+#include "utils/utils.h"
+
+
+/**
+ * Handle tokens in "script collect characters" insertion mode
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ * \return True to reprocess the token, false otherwise
+ */
+bool handle_script_collect_characters(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ bool reprocess = false;
+ bool done = false;
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_CHARACTER:
+ if (treebuilder->context.collect.string.len == 0) {
+ treebuilder->context.collect.string.data.off =
+ token->data.character.data.off;
+ }
+ treebuilder->context.collect.string.len +=
+ token->data.character.len;
+ break;
+ case HUBBUB_TOKEN_END_TAG:
+ {
+ element_type type = element_type_from_name(treebuilder,
+ &token->data.tag.name);
+
+ if (type != treebuilder->context.collect.type) {
+ /** \todo parse error */
+ /** \todo Mark script as "already executed" */
+ }
+
+ done = true;
+ }
+ break;
+ case HUBBUB_TOKEN_EOF:
+ case HUBBUB_TOKEN_COMMENT:
+ case HUBBUB_TOKEN_DOCTYPE:
+ case HUBBUB_TOKEN_START_TAG:
+ /** \todo parse error */
+ /** \todo Mark script as "already executed" */
+ done = reprocess = true;
+ break;
+ }
+
+ if (done) {
+ int success;
+ void *text, *appended;
+
+ success = treebuilder->tree_handler->create_text(
+ treebuilder->tree_handler->ctx,
+ &treebuilder->context.collect.string,
+ &text);
+ if (success != 0) {
+ /** \todo errors */
+ }
+
+ /** \todo fragment case -- skip this lot entirely */
+
+ success = treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.collect.node,
+ text, &appended);
+ if (success != 0) {
+ /** \todo errors */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ text);
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, appended);
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, text);
+
+ /** \todo insertion point manipulation */
+
+ /* Append script node to current node */
+ success = treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node,
+ treebuilder->context.collect.node, &appended);
+ if (success != 0) {
+ /** \todo errors */
+ }
+
+ /** \todo restore insertion point */
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ appended);
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.collect.node);
+ treebuilder->context.collect.node = NULL;
+
+ /** \todo process any pending script */
+
+ /* Return to previous insertion mode */
+ treebuilder->context.mode =
+ treebuilder->context.collect.mode;
+ }
+
+ return reprocess;
+}
+
diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c
index 68f82d8..2b256b4 100644
--- a/src/treebuilder/treebuilder.c
+++ b/src/treebuilder/treebuilder.c
@@ -8,11 +8,12 @@
#include <assert.h>
#include <string.h>
-#include "treebuilder/in_body.h"
+#include "treebuilder/modes.h"
#include "treebuilder/internal.h"
#include "treebuilder/treebuilder.h"
#include "utils/utils.h"
+
static const struct {
const char *name;
element_type type;
@@ -68,23 +69,6 @@ static void hubbub_treebuilder_buffer_handler(const uint8_t *data,
static void hubbub_treebuilder_token_handler(const hubbub_token *token,
void *pw);
-static bool handle_initial(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-static bool handle_before_html(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-static bool handle_before_head(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-static bool handle_in_head(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-static bool handle_in_head_noscript(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-static bool handle_after_head(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-static bool handle_generic_rcdata(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-static bool handle_script_collect_characters(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-
/**
* Create a hubbub treebuilder
@@ -371,824 +355,6 @@ void hubbub_treebuilder_token_handler(const hubbub_token *token,
}
}
-/**
- * Handle token in initial insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to handle
- * \return True to reprocess token, false otherwise
- */
-bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token)
-{
- bool reprocess = false;
-
- switch (token->type) {
- case HUBBUB_TOKEN_CHARACTER:
- if (process_characters_expect_whitespace(treebuilder, token,
- false)) {
- /** \todo parse error */
-
- treebuilder->tree_handler->set_quirks_mode(
- treebuilder->tree_handler->ctx,
- HUBBUB_QUIRKS_MODE_FULL);
- treebuilder->context.mode = BEFORE_HTML;
- reprocess = true;
- }
- break;
- case HUBBUB_TOKEN_COMMENT:
- process_comment_append(treebuilder, token,
- treebuilder->context.document);
- break;
- case HUBBUB_TOKEN_DOCTYPE:
- {
- int success;
- void *doctype, *appended;
-
- /** \todo parse error */
-
- /** \todo need public and system ids from tokeniser */
- success = treebuilder->tree_handler->create_doctype(
- treebuilder->tree_handler->ctx,
- &token->data.doctype.name,
- &token->data.doctype.public_id,
- &token->data.doctype.system_id, &doctype);
- if (success != 0) {
- /** \todo errors */
- }
-
- /* Append to Document node */
- success = treebuilder->tree_handler->append_child(
- treebuilder->tree_handler->ctx,
- treebuilder->context.document,
- doctype, &appended);
- if (success != 0) {
- /** \todo errors */
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- doctype);
- }
-
- /* \todo look up the doctype in a catalog */
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx, appended);
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx, doctype);
-
- treebuilder->context.mode = BEFORE_HTML;
- }
- break;
- case HUBBUB_TOKEN_START_TAG:
- case HUBBUB_TOKEN_END_TAG:
- case HUBBUB_TOKEN_EOF:
- /** \todo parse error */
- treebuilder->tree_handler->set_quirks_mode(
- treebuilder->tree_handler->ctx,
- HUBBUB_QUIRKS_MODE_FULL);
- reprocess = true;
- break;
- }
-
- if (reprocess) {
- treebuilder->context.mode = BEFORE_HTML;
- }
-
- return reprocess;
-}
-
-/**
- * Handle token in "before html" insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to handle
- * \return True to reprocess token, false otherwise
- */
-bool handle_before_html(hubbub_treebuilder *treebuilder,
- const hubbub_token *token)
-{
- bool reprocess = false;
- bool handled = false;
-
- switch (token->type) {
- case HUBBUB_TOKEN_DOCTYPE:
- /** \todo parse error */
- break;
- case HUBBUB_TOKEN_COMMENT:
- process_comment_append(treebuilder, token,
- treebuilder->context.document);
- break;
- case HUBBUB_TOKEN_CHARACTER:
- reprocess = process_characters_expect_whitespace(treebuilder,
- token, false);
- break;
- case HUBBUB_TOKEN_START_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == HTML) {
- handled = true;
- } else {
- reprocess = true;
- }
- }
- break;
- case HUBBUB_TOKEN_END_TAG:
- case HUBBUB_TOKEN_EOF:
- reprocess = true;
- break;
- }
-
-
- if (handled || reprocess) {
- int success;
- void *html, *appended;
-
- /* We can't use insert_element() here, as it assumes
- * that we're inserting into current_node. There is
- * no current_node to insert into at this point so
- * we get to do it manually. */
-
- if (reprocess) {
- /* Need to manufacture html element */
- hubbub_tag tag;
-
- /** \todo UTF-16 */
- tag.name.type = HUBBUB_STRING_PTR;
- tag.name.data.ptr = (const uint8_t *) "html";
- tag.name.len = SLEN("html");
-
- tag.n_attributes = 0;
- tag.attributes = NULL;
-
- success = treebuilder->tree_handler->create_element(
- treebuilder->tree_handler->ctx,
- &tag, &html);
- } else {
- success = treebuilder->tree_handler->create_element(
- treebuilder->tree_handler->ctx,
- &token->data.tag, &html);
- }
-
- if (success != 0) {
- /** \todo errors */
- }
-
- success = treebuilder->tree_handler->append_child(
- treebuilder->tree_handler->ctx,
- treebuilder->context.document,
- html, &appended);
- if (success != 0) {
- /** \todo errors */
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- html);
- }
-
- /* We can't use element_stack_push() here, as it
- * assumes that current_node is pointing at the index
- * before the one to insert at. For the first entry in
- * the stack, this does not hold so we must insert
- * manually. */
- treebuilder->context.element_stack[0].type = HTML;
- treebuilder->context.element_stack[0].node = html;
- treebuilder->context.current_node = 0;
-
- /** \todo cache selection algorithm */
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- appended);
-
- treebuilder->context.mode = BEFORE_HEAD;
- }
-
- return reprocess;
-}
-
-/**
- * Handle token in "before head" insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to handle
- * \return True to reprocess token, false otherwise
- */
-bool handle_before_head(hubbub_treebuilder *treebuilder,
- const hubbub_token *token)
-{
- bool reprocess = false;
- bool handled = false;
-
- switch (token->type) {
- case HUBBUB_TOKEN_CHARACTER:
- reprocess = process_characters_expect_whitespace(treebuilder,
- token, false);
- break;
- case HUBBUB_TOKEN_COMMENT:
- process_comment_append(treebuilder, token,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node);
- break;
- case HUBBUB_TOKEN_DOCTYPE:
- /** \todo parse error */
- break;
- case HUBBUB_TOKEN_START_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == HTML) {
- /* Process as if "in body" */
- process_tag_in_body(treebuilder, token);
- } else if (type == HEAD) {
- handled = true;
- } else {
- reprocess = true;
- }
- }
- break;
- case HUBBUB_TOKEN_END_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == HEAD || type == BR) {
- reprocess = true;
- } else {
- /** \todo parse error */
- }
- }
- break;
- case HUBBUB_TOKEN_EOF:
- reprocess = true;
- break;
- }
-
- if (handled || reprocess) {
- hubbub_tag tag;
-
- if (reprocess) {
- /* Manufacture head tag */
- tag.name.type = HUBBUB_STRING_PTR;
- tag.name.data.ptr = (const uint8_t *) "head";
- tag.name.len = SLEN("head");
-
- tag.n_attributes = 0;
- tag.attributes = NULL;
- } else {
- tag = token->data.tag;
- }
-
- insert_element(treebuilder, &tag);
-
- treebuilder->tree_handler->ref_node(
- treebuilder->tree_handler->ctx,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node);
-
- treebuilder->context.head_element =
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node;
-
- treebuilder->context.mode = IN_HEAD;
- }
-
- return reprocess;
-}
-
-/**
- * Handle token in "in head" insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to handle
- * \return True to reprocess token, false otherwise
- */
-bool handle_in_head(hubbub_treebuilder *treebuilder,
- const hubbub_token *token)
-{
- bool reprocess = false;
- bool handled = false;
-
- switch (token->type) {
- case HUBBUB_TOKEN_CHARACTER:
- reprocess = process_characters_expect_whitespace(treebuilder,
- token, true);
- break;
- case HUBBUB_TOKEN_COMMENT:
- process_comment_append(treebuilder, token,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node);
- break;
- case HUBBUB_TOKEN_DOCTYPE:
- /** \todo parse error */
- break;
- case HUBBUB_TOKEN_START_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == HTML) {
- /* Process as if "in body" */
- process_tag_in_body(treebuilder, token);
- } else if (type == BASE || type == COMMAND ||
- type == EVENT_SOURCE || type == LINK) {
- process_base_link_meta_in_head(treebuilder,
- token, type);
-
- /** \todo ack sc flag */
- } else if (type == META) {
- process_base_link_meta_in_head(treebuilder,
- token, type);
-
- /** \todo ack sc flag */
-
- /** \todo detect charset */
- } else if (type == TITLE) {
- parse_generic_rcdata(treebuilder, token, true);
- } else if (type == NOFRAMES || type == STYLE) {
- parse_generic_rcdata(treebuilder, token, false);
- } else if (type == NOSCRIPT) {
- /** \todo determine if scripting is enabled */
- if (false /*scripting_is_enabled*/) {
- parse_generic_rcdata(treebuilder, token, false);
- } else {
- insert_element(treebuilder, &token->data.tag);
- treebuilder->context.mode = IN_HEAD_NOSCRIPT;
- }
- } else if (type == SCRIPT) {
- process_script_in_head(treebuilder, token);
- } else if (type == HEAD) {
- /** \todo parse error */
- } else {
- reprocess = true;
- }
- }
- break;
- case HUBBUB_TOKEN_END_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == HEAD) {
- handled = true;
- } else if (type == BR) {
- reprocess = true;
- } /** \todo parse error */
- }
- break;
- case HUBBUB_TOKEN_EOF:
- reprocess = true;
- break;
- }
-
- if (handled || reprocess) {
- element_type otype;
- void *node;
-
- if (!element_stack_pop(treebuilder, &otype, &node)) {
- /** \todo errors */
- }
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- node);
-
- treebuilder->context.mode = AFTER_HEAD;
- }
-
- return reprocess;
-}
-
-/**
- * Handle tokens in "in head noscript" insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to process
- * \return True to reprocess the token, false otherwise
- */
-bool handle_in_head_noscript(hubbub_treebuilder *treebuilder,
- const hubbub_token *token)
-{
- bool reprocess = false;
- bool handled = false;
-
- switch (token->type) {
- case HUBBUB_TOKEN_CHARACTER:
- /* This should be equivalent to "in head" processing */
- reprocess = process_characters_expect_whitespace(treebuilder,
- token, true);
- break;
- case HUBBUB_TOKEN_COMMENT:
- /* This should be equivalent to "in head" processing */
- process_comment_append(treebuilder, token,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node);
- break;
- case HUBBUB_TOKEN_DOCTYPE:
- /** \todo parse error */
- break;
- case HUBBUB_TOKEN_START_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == HTML) {
- /* Process as "in body" */
- process_tag_in_body(treebuilder, token);
- } else if (type == NOSCRIPT) {
- handled = true;
- } else if (type == LINK) {
- /* This should be equivalent to "in head" processing */
- process_base_link_meta_in_head(treebuilder,
- token, type);
-
- /** \todo ack sc flag */
- } else if (type == META) {
- /* This should be equivalent to "in head" processing */
- process_base_link_meta_in_head(treebuilder,
- token, type);
-
- /** \todo ack sc flag */
-
- /** \todo detect charset */
- } else if (type == NOFRAMES) {
- /* This should be equivalent to "in head" processing */
- parse_generic_rcdata(treebuilder, token, true);
- } else if (type == STYLE) {
- /* This should be equivalent to "in head" processing */
- parse_generic_rcdata(treebuilder, token, false);
- } else if (type == HEAD || type == NOSCRIPT) {
- /** \todo parse error */
- } else {
- /** \todo parse error */
- reprocess = true;
- }
- }
- break;
- case HUBBUB_TOKEN_END_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == NOSCRIPT) {
- handled = true;
- } else if (type == BR) {
- /** \todo parse error */
- reprocess = true;
- } else {
- /** \todo parse error */
- }
- }
- break;
- case HUBBUB_TOKEN_EOF:
- /** \todo parse error */
- reprocess = true;
- break;
- }
-
- if (handled || reprocess) {
- element_type otype;
- void *node;
-
- if (!element_stack_pop(treebuilder, &otype, &node)) {
- /** \todo errors */
- }
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- node);
-
- treebuilder->context.mode = IN_HEAD;
- }
-
- return reprocess;
-}
-
-/**
- * Handle tokens in "after head" insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to process
- * \return True to reprocess the token, false otherwise
- */
-bool handle_after_head(hubbub_treebuilder *treebuilder,
- const hubbub_token *token)
-{
- bool reprocess = false;
- bool handled = false;
-
- switch (token->type) {
- case HUBBUB_TOKEN_CHARACTER:
- append_text(treebuilder, &token->data.character);
- break;
- case HUBBUB_TOKEN_COMMENT:
- process_comment_append(treebuilder, token,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node);
- break;
- case HUBBUB_TOKEN_DOCTYPE:
- /** \todo parse error */
- break;
- case HUBBUB_TOKEN_START_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type == HTML) {
- /* Process as if "in body" */
- process_tag_in_body(treebuilder, token);
- } else if (type == BODY) {
- handled = true;
- } else if (type == FRAMESET) {
- insert_element(treebuilder, &token->data.tag);
- treebuilder->context.mode = IN_FRAMESET;
- } else if (type == BASE || type == LINK || type == META ||
- type == NOFRAMES || type == SCRIPT ||
- type == STYLE || type == TITLE) {
- element_type otype;
- void *node;
-
- /** \todo parse error */
-
- if (!element_stack_push(treebuilder,
- HEAD,
- treebuilder->context.head_element)) {
- /** \todo errors */
- }
-
-
- /* This should be identical to handling "in head" */
- if (type == BASE || type == LINK || type == META) {
- /** \todo ack sc flag */
-
- process_base_link_meta_in_head(treebuilder,
- token, type);
- } else if (type == SCRIPT) {
- process_script_in_head(treebuilder, token);
- } else if (type == STYLE || type == NOFRAMES) {
- parse_generic_rcdata(treebuilder, token, false);
- } else if (type == TITLE) {
- parse_generic_rcdata(treebuilder, token, true);
- }
-
- if (!element_stack_pop(treebuilder, &otype, &node)) {
- /** \todo errors */
- }
-
- /* No need to unref node as we never increased
- * its reference count when pushing it on the stack */
- } else if (type == HEAD) {
- /** \todo parse error */
- } else {
- reprocess = true;
- }
- }
- break;
- case HUBBUB_TOKEN_END_TAG:
- /** \parse error */
- break;
- case HUBBUB_TOKEN_EOF:
- reprocess = true;
- break;
- }
-
- if (handled || reprocess) {
- hubbub_tag tag;
-
- if (reprocess) {
- /* Manufacture body */
- tag.name.type = HUBBUB_STRING_PTR;
- tag.name.data.ptr = (const uint8_t *) "body";
- tag.name.len = SLEN("body");
-
- tag.n_attributes = 0;
- tag.attributes = NULL;
- } else {
- tag = token->data.tag;
- }
-
- insert_element(treebuilder, &tag);
-
- treebuilder->context.mode = IN_BODY;
- }
-
- return reprocess;
-}
-
-/**
- * Handle tokens in "generic rcdata" insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to process
- * \return True to reprocess the token, false otherwise
- */
-bool handle_generic_rcdata(hubbub_treebuilder *treebuilder,
- const hubbub_token *token)
-{
- bool reprocess = false;
- bool done = false;
-
- if (treebuilder->context.strip_leading_lr &&
- token->type != HUBBUB_TOKEN_CHARACTER) {
- /* Reset the LR stripping flag */
- treebuilder->context.strip_leading_lr = false;
- }
-
- switch (token->type) {
- case HUBBUB_TOKEN_CHARACTER:
- if (treebuilder->context.collect.string.len == 0) {
- treebuilder->context.collect.string.data.off =
- token->data.character.data.off;
- }
- treebuilder->context.collect.string.len +=
- token->data.character.len;
-
- if (treebuilder->context.strip_leading_lr) {
- const uint8_t *str = treebuilder->input_buffer +
- treebuilder->context.collect.string.data.off;
-
- /** \todo UTF-16 */
- if (*str == '\n') {
- treebuilder->context.collect.string.data.off++;
- treebuilder->context.collect.string.len--;
- }
-
- treebuilder->context.strip_leading_lr = false;
- }
- break;
- case HUBBUB_TOKEN_END_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type != treebuilder->context.collect.type) {
- /** \todo parse error */
- }
-
- done = true;
- }
- break;
- case HUBBUB_TOKEN_EOF:
- /** \todo parse error */
- done = reprocess = true;
- break;
- case HUBBUB_TOKEN_COMMENT:
- case HUBBUB_TOKEN_DOCTYPE:
- case HUBBUB_TOKEN_START_TAG:
- /* Should never happen */
- assert(0);
- break;
- }
-
- if (done) {
- int success;
- void *text, *appended;
-
- success = treebuilder->tree_handler->create_text(
- treebuilder->tree_handler->ctx,
- &treebuilder->context.collect.string,
- &text);
- if (success != 0) {
- /** \todo errors */
- }
-
- success = treebuilder->tree_handler->append_child(
- treebuilder->tree_handler->ctx,
- treebuilder->context.collect.node,
- text, &appended);
- if (success != 0) {
- /** \todo errors */
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- text);
- }
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx, appended);
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx, text);
-
- /* Clean up context */
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- treebuilder->context.collect.node);
- treebuilder->context.collect.node = NULL;
-
- /* Return to previous insertion mode */
- treebuilder->context.mode =
- treebuilder->context.collect.mode;
- }
-
- return reprocess;
-}
-
-/**
- * Handle tokens in "script collect characters" insertion mode
- *
- * \param treebuilder The treebuilder instance
- * \param token The token to process
- * \return True to reprocess the token, false otherwise
- */
-bool handle_script_collect_characters(hubbub_treebuilder *treebuilder,
- const hubbub_token *token)
-{
- bool reprocess = false;
- bool done = false;
-
- switch (token->type) {
- case HUBBUB_TOKEN_CHARACTER:
- if (treebuilder->context.collect.string.len == 0) {
- treebuilder->context.collect.string.data.off =
- token->data.character.data.off;
- }
- treebuilder->context.collect.string.len +=
- token->data.character.len;
- break;
- case HUBBUB_TOKEN_END_TAG:
- {
- element_type type = element_type_from_name(treebuilder,
- &token->data.tag.name);
-
- if (type != treebuilder->context.collect.type) {
- /** \todo parse error */
- /** \todo Mark script as "already executed" */
- }
-
- done = true;
- }
- break;
- case HUBBUB_TOKEN_EOF:
- case HUBBUB_TOKEN_COMMENT:
- case HUBBUB_TOKEN_DOCTYPE:
- case HUBBUB_TOKEN_START_TAG:
- /** \todo parse error */
- /** \todo Mark script as "already executed" */
- done = reprocess = true;
- break;
- }
-
- if (done) {
- int success;
- void *text, *appended;
-
- success = treebuilder->tree_handler->create_text(
- treebuilder->tree_handler->ctx,
- &treebuilder->context.collect.string,
- &text);
- if (success != 0) {
- /** \todo errors */
- }
-
- /** \todo fragment case -- skip this lot entirely */
-
- success = treebuilder->tree_handler->append_child(
- treebuilder->tree_handler->ctx,
- treebuilder->context.collect.node,
- text, &appended);
- if (success != 0) {
- /** \todo errors */
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- text);
- }
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx, appended);
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx, text);
-
- /** \todo insertion point manipulation */
-
- /* Append script node to current node */
- success = treebuilder->tree_handler->append_child(
- treebuilder->tree_handler->ctx,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node,
- treebuilder->context.collect.node, &appended);
- if (success != 0) {
- /** \todo errors */
- }
-
- /** \todo restore insertion point */
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- appended);
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- treebuilder->context.collect.node);
- treebuilder->context.collect.node = NULL;
-
- /** \todo process any pending script */
-
- /* Return to previous insertion mode */
- treebuilder->context.mode =
- treebuilder->context.collect.mode;
- }
-
- return reprocess;
-}
-
/**
* Process a character token in cases where we expect only whitespace