/*
* This file is part of Hubbub.
* Licensed under the MIT License,
* http://www.opensource.org/licenses/mit-license.php
* Copyright 2008 John-Mark Bell
*/
#include
#include
#include "treebuilder/modes.h"
#include "treebuilder/internal.h"
#include "treebuilder/treebuilder.h"
#include "utils/utils.h"
#undef DEBUG_IN_BODY
typedef struct bookmark {
formatting_list_entry *prev;
formatting_list_entry *next;
} bookmark;
static void process_character(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static bool process_start_tag(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static bool process_end_tag(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_html_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_body_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_container_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_form_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_dd_dt_li_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token, element_type type);
static void process_plaintext_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_a_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_presentational_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token, element_type type);
static void process_nobr_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_button_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_applet_marquee_object_in_body(
hubbub_treebuilder *treebuilder, const hubbub_token *token,
element_type type);
static void process_hr_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_image_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_input_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_isindex_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_textarea_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_select_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static void process_phrasing_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
static bool process_0body_in_body(hubbub_treebuilder *treebuilder);
static void process_0container_in_body(hubbub_treebuilder *treebuilder,
element_type type);
static void process_0p_in_body(hubbub_treebuilder *treebuilder);
static void process_0dd_dt_li_in_body(hubbub_treebuilder *treebuilder,
element_type type);
static void process_0h_in_body(hubbub_treebuilder *treebuilder,
element_type type);
static void process_0presentational_in_body(hubbub_treebuilder *treebuilder,
element_type type);
static void process_0applet_button_marquee_object_in_body(
hubbub_treebuilder *treebuilder, element_type type);
static void process_0br_in_body(hubbub_treebuilder *treebuilder);
static void process_0generic_in_body(hubbub_treebuilder *treebuilder,
element_type type);
static bool aa_find_and_validate_formatting_element(
hubbub_treebuilder *treebuilder, element_type type,
formatting_list_entry **element);
static formatting_list_entry *aa_find_formatting_element(
hubbub_treebuilder *treebuilder, element_type type);
static bool aa_find_furthest_block(hubbub_treebuilder *treebuilder,
formatting_list_entry *formatting_element,
uint32_t *furthest_block);
static void aa_remove_from_parent(hubbub_treebuilder *treebuilder, void *node);
static void *aa_reparent_node(hubbub_treebuilder *treebuilder, void *node,
void *new_parent);
static void aa_find_bookmark_location_reparenting_misnested(
hubbub_treebuilder *treebuilder,
uint32_t formatting_element, uint32_t *furthest_block,
bookmark *bookmark, uint32_t *last_node);
static void aa_remove_element_stack_item(hubbub_treebuilder *treebuilder,
uint32_t index, uint32_t limit);
static void aa_clone_and_replace_entries(hubbub_treebuilder *treebuilder,
formatting_list_entry *element);
/**
* Handle tokens in "in body" insertion mode
*
* \param treebuilder The treebuilder instance
* \param token The token to process
* \return True to reprocess the token, false otherwise
*/
bool handle_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token)
{
bool reprocess = false;
#if !defined(NDEBUG) && defined(DEBUG_IN_BODY)
fprintf(stdout, "Processing token %d\n", token->type);
element_stack_dump(treebuilder, stdout);
formatting_list_dump(treebuilder, stdout);
#endif
if (treebuilder->context.strip_leading_lr &&
token->type != HUBBUB_TOKEN_CHARACTER) {
/* Reset the LR stripping flag */
treebuilder->context.strip_leading_lr = false;
}
switch (token->type) {
case HUBBUB_TOKEN_CHARACTER:
process_character(treebuilder, token);
break;
case HUBBUB_TOKEN_COMMENT:
process_comment_append(treebuilder, token,
treebuilder->context.element_stack[
treebuilder->context.current_node].node);
break;
case HUBBUB_TOKEN_DOCTYPE:
/** \todo parse error */
break;
case HUBBUB_TOKEN_START_TAG:
reprocess = process_start_tag(treebuilder, token);
break;
case HUBBUB_TOKEN_END_TAG:
reprocess = process_end_tag(treebuilder, token);
break;
case HUBBUB_TOKEN_EOF:
for (uint32_t i = treebuilder->context.current_node;
i > 0; i--) {
element_type type =
treebuilder->context.element_stack[i].type;
if (!(type == DD || type == DT || type == LI ||
type == P || type == TBODY ||
type == TD || type == TFOOT ||
type == TH || type == THEAD ||
type == TR || type == BODY)) {
/** \todo parse error */
break;
}
}
break;
}
#if !defined(NDEBUG) && defined(DEBUG_IN_BODY)
fprintf(stdout, "Processed\n");
element_stack_dump(treebuilder, stdout);
formatting_list_dump(treebuilder, stdout);
#endif
return reprocess;
}
/**
* Process a character token
*
* \param treebuilder The treebuilder instance
* \param token The token to process
*/
void process_character(hubbub_treebuilder *treebuilder,
const hubbub_token *token)
{
hubbub_string dummy = token->data.character;
reconstruct_active_formatting_list(treebuilder);
if (treebuilder->context.strip_leading_lr) {
const uint8_t *str = dummy.ptr;
/** \todo UTF-16 */
if (*str == '\n') {
dummy.ptr++;
dummy.len--;
}
treebuilder->context.strip_leading_lr = false;
}
if (dummy.len)
append_text(treebuilder, &dummy);
}
/**
* Process a start tag
*
* \param treebuilder The treebuilder instance
* \param token The token to process
* \return True to reprocess the token
*/
bool process_start_tag(hubbub_treebuilder *treebuilder,
const hubbub_token *token)
{
bool reprocess = false;
element_type type = element_type_from_name(treebuilder,
&token->data.tag.name);
if (type == HTML) {
process_html_in_body(treebuilder, token);
} else if (type == BASE || type == COMMAND ||
type == EVENTSOURCE || type == LINK ||
type == META || type == NOFRAMES || type == SCRIPT ||
type == STYLE || type == TITLE) {
/* Process as "in head" */
process_in_head(treebuilder, token);
} else if (type == BODY) {
process_body_in_body(treebuilder, token);
} else if (type == ADDRESS || type == ARTICLE || type == ASIDE ||
type == BLOCKQUOTE || type == CENTER ||
type == DATAGRID || type == DETAILS ||
type == DIALOG || type == DIR ||
type == DIV || type == DL || type == FIELDSET ||
type == FIGURE || type == FOOTER ||
type == H1 || type == H2 || type == H3 ||
type == H4 || type == H5 || type == H6 ||
type == HEADER || type == MENU || type == NAV ||
type == OL || type == P || type == SECTION ||
type == UL) {
process_container_in_body(treebuilder, token);
} else if (type == PRE || type == LISTING) {
process_container_in_body(treebuilder, token);
treebuilder->context.strip_leading_lr = true;
} else if (type == FORM) {
process_form_in_body(treebuilder, token);
} else if (type == DD || type == DT || type == LI) {
process_dd_dt_li_in_body(treebuilder, token, type);
} else if (type == PLAINTEXT) {
process_plaintext_in_body(treebuilder, token);
} else if (type == A) {
process_a_in_body(treebuilder, token);
} else if (type == B || type == BIG || type == EM ||
type == FONT || type == I || type == S ||
type == SMALL || type == STRIKE ||
type == STRONG || type == TT || type == U) {
process_presentational_in_body(treebuilder,
token, type);
} else if (type == NOBR) {
process_nobr_in_body(treebuilder, token);
} else if (type == BUTTON) {
process_button_in_body(treebuilder, token);
} else if (type == APPLET || type == MARQUEE ||
type == OBJECT) {
process_applet_marquee_object_in_body(treebuilder,
token, type);
} else if (type == XMP) {
reconstruct_active_formatting_list(treebuilder);
parse_generic_rcdata(treebuilder, token, false);
} else if (type == TABLE) {
process_container_in_body(treebuilder, token);
treebuilder->context.element_stack[current_table(treebuilder)]
.tainted = false;
treebuilder->context.mode = IN_TABLE;
} else if (type == AREA || type == BASEFONT ||
type == BGSOUND || type == BR ||
type == EMBED || type == IMG || type == PARAM ||
type == SPACER || type == WBR) {
reconstruct_active_formatting_list(treebuilder);
insert_element_no_push(treebuilder, &token->data.tag);
} else if (type == HR) {
process_hr_in_body(treebuilder, token);
} else if (type == IMAGE) {
process_image_in_body(treebuilder, token);
} else if (type == INPUT) {
process_input_in_body(treebuilder, token);
} else if (type == ISINDEX) {
process_isindex_in_body(treebuilder, token);
} else if (type == TEXTAREA) {
process_textarea_in_body(treebuilder, token);
} else if (type == IFRAME || type == NOEMBED ||
type == NOFRAMES ||
(treebuilder->context.enable_scripting &&
type == NOSCRIPT)) {
parse_generic_rcdata(treebuilder, token, false);
} else if (type == SELECT) {
process_select_in_body(treebuilder, token);
if (treebuilder->context.mode == IN_BODY) {
treebuilder->context.mode = IN_SELECT;
} else if (treebuilder->context.mode == IN_TABLE ||
treebuilder->context.mode == IN_CAPTION ||
treebuilder->context.mode == IN_COLUMN_GROUP ||
treebuilder->context.mode == IN_TABLE_BODY ||
treebuilder->context.mode == IN_ROW ||
treebuilder->context.mode == IN_CELL) {
treebuilder->context.mode = IN_SELECT_IN_TABLE;
}
} else if (type == RP || type == RT) {
/** \todo ruby */
} else if (type == MATH || type == SVG) {
hubbub_tag tag = token->data.tag;
reconstruct_active_formatting_list(treebuilder);
adjust_foreign_attributes(treebuilder, &tag);
if (type == SVG) {
adjust_svg_attributes(treebuilder, &tag);
tag.ns = HUBBUB_NS_SVG;
} else {
tag.ns = HUBBUB_NS_MATHML;
}
if (token->data.tag.self_closing) {
insert_element_no_push(treebuilder, &tag);
/** \todo ack sc flag */
} else {
insert_element(treebuilder, &tag);
treebuilder->context.second_mode =
treebuilder->context.mode;
treebuilder->context.mode = IN_FOREIGN_CONTENT;
}
} else if (type == CAPTION || type == COL || type == COLGROUP ||
type == FRAME || type == FRAMESET ||
type == HEAD || type == TBODY ||
type == TD || type == TFOOT || type == TH ||
type == THEAD || type == TR) {
/** \todo parse error */
} else {
process_phrasing_in_body(treebuilder, token);
}
return reprocess;
}
/**
* Process an end tag
*
* \param treebuilder The treebuilder instance
* \param token The token to process
* \return True to reprocess the token
*/
bool process_end_tag(hubbub_treebuilder *treebuilder,
const hubbub_token *token)
{
bool reprocess = false;
element_type type = element_type_from_name(treebuilder,
&token->data.tag.name);
if (type == BODY) {
if (process_0body_in_body(treebuilder) &&
treebuilder->context.mode == IN_BODY) {
treebuilder->context.mode = AFTER_BODY;
}
} else if (type == HTML) {
/* Act as if
start tag as if in "in body"
*
* \param treebuilder The treebuilder instance
* \param token The token to process
*/
void process_body_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token)
{
/** \todo parse error */
if (treebuilder->context.current_node < 1 ||
treebuilder->context.element_stack[1].type != BODY)
return;
treebuilder->tree_handler->add_attributes(
treebuilder->tree_handler->ctx,
treebuilder->context.element_stack[1].node,
token->data.tag.attributes,
token->data.tag.n_attributes);
}
/**
* Process a generic container start tag as if in "in body"
*
* \param treebuilder The treebuilder instance
* \param token The token to process
*/
void process_container_in_body(hubbub_treebuilder *treebuilder,
const hubbub_token *token)
{
if (element_in_scope(treebuilder, P, false)) {
process_0p_in_body(treebuilder);
}
insert_element(treebuilder, &token->data.tag);
}
/**
* Process a