From d4f37723baada4e546dc6451d08b1e636dbeae3d Mon Sep 17 00:00:00 2001 From: Rupinder Singh Khokhar Date: Fri, 11 Jul 2014 05:26:59 +0530 Subject: Added support to get attribute names & values from the context. This slows down the library a very little bit. Optimizations will be done later. --- src/treebuilder/after_head.c | 3 +- src/treebuilder/before_html.c | 19 +++++++++- src/treebuilder/in_body.c | 53 +++++++++++++++++++++++++--- src/treebuilder/in_row.c | 2 ++ src/treebuilder/in_table.c | 2 ++ src/treebuilder/internal.h | 18 +++++++--- src/treebuilder/treebuilder.c | 81 +++++++++++++++++++++++++++++++++++++++---- 7 files changed, 161 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/treebuilder/after_head.c b/src/treebuilder/after_head.c index 40955a8..c2444bf 100644 --- a/src/treebuilder/after_head.c +++ b/src/treebuilder/after_head.c @@ -67,7 +67,8 @@ hubbub_error handle_after_head(hubbub_treebuilder *treebuilder, err = element_stack_push(treebuilder, HUBBUB_NS_HTML, HEAD, - treebuilder->context.head_element); + treebuilder->context.head_element, + NULL, 0); if (err != HUBBUB_OK) return err; diff --git a/src/treebuilder/before_html.c b/src/treebuilder/before_html.c index 1ed3717..069696f 100644 --- a/src/treebuilder/before_html.c +++ b/src/treebuilder/before_html.c @@ -61,6 +61,7 @@ hubbub_error handle_before_html(hubbub_treebuilder *treebuilder, if (handled || err == HUBBUB_REPROCESS) { hubbub_error e; void *html, *appended; + size_t i; /* We can't use insert_element() here, as it assumes * that we're inserting into current_node. There is @@ -110,7 +111,23 @@ hubbub_error handle_before_html(hubbub_treebuilder *treebuilder, treebuilder->context.element_stack[0].type = HTML; treebuilder->context.element_stack[0].node = appended; treebuilder->context.current_node = 0; - + if(err != HUBBUB_REPROCESS) { + if(token->data.tag.n_attributes > 0){ + treebuilder->context.element_stack[0].attributes = + (hubbub_attribute *)malloc(sizeof(hubbub_attribute) * + token->data.tag.n_attributes); + } + + for(i = 0; i < token->data.tag.n_attributes; i++) { + copy_attribute(&token->data.tag.attributes[i], + &treebuilder->context.element_stack[0].attributes[i]); + } + treebuilder->context.element_stack[0].n_attributes = + token->data.tag.n_attributes; + } else { + treebuilder->context.element_stack[0].n_attributes = + 0; + } /** \todo cache selection algorithm */ treebuilder->context.mode = BEFORE_HEAD; diff --git a/src/treebuilder/in_body.c b/src/treebuilder/in_body.c index eb82db0..f4ac2ad 100644 --- a/src/treebuilder/in_body.c +++ b/src/treebuilder/in_body.c @@ -523,12 +523,49 @@ hubbub_error process_html_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token) { /** \todo parse error */ + size_t i; + hubbub_attribute *attrs = + treebuilder->context.element_stack[0].attributes; + size_t n_attrs = + treebuilder->context.element_stack[0].n_attributes; + + size_t j; + const hubbub_tag *tag = &token->data.tag; + + bool found = 0; + + size_t dummy_len = 0; + element_context *stack = treebuilder->context.element_stack; + + /** This O(n^2) algorithm can be easily further reduced in time complexity to improve speed*/ + for (j = 0; j < tag->n_attributes; j++) { + found = 0; + for (i = 0; i < n_attrs; i++) { + if (hubbub_string_match(attrs[i].name.ptr, attrs[i].name.len, + tag->attributes[j].name.ptr, + tag->attributes[j].name.len) == true) { + found = 1; + break; + } + } + if(!found) { + stack->n_attributes += 1; + stack->attributes = + realloc(stack->attributes, + stack->n_attributes + * sizeof (stack->attributes[0])); + copy_attribute(&tag->attributes[j], + &stack->attributes[ + (stack->n_attributes)-1]); + dummy_len += 1; + } + } return treebuilder->tree_handler->add_attributes( treebuilder->tree_handler->ctx, treebuilder->context.element_stack[0].node, - token->data.tag.attributes, - token->data.tag.n_attributes); + &stack->attributes[stack->n_attributes] - dummy_len, + dummy_len); } /** @@ -876,6 +913,7 @@ hubbub_error process_a_in_body(hubbub_treebuilder *treebuilder, err = formatting_list_append(treebuilder, token->data.tag.ns, A, treebuilder->context.element_stack[ treebuilder->context.current_node].node, + token->data.tag.attributes, token->data.tag.n_attributes, treebuilder->context.current_node); if (err != HUBBUB_OK) { hubbub_ns ns; @@ -929,6 +967,7 @@ hubbub_error process_presentational_in_body(hubbub_treebuilder *treebuilder, err = formatting_list_append(treebuilder, token->data.tag.ns, type, treebuilder->context.element_stack[ treebuilder->context.current_node].node, + token->data.tag.attributes, token->data.tag.n_attributes, treebuilder->context.current_node); if (err != HUBBUB_OK) { hubbub_ns ns; @@ -995,6 +1034,7 @@ hubbub_error process_nobr_in_body(hubbub_treebuilder *treebuilder, err = formatting_list_append(treebuilder, token->data.tag.ns, NOBR, treebuilder->context.element_stack[ treebuilder->context.current_node].node, + token->data.tag.attributes, token->data.tag.n_attributes, treebuilder->context.current_node); if (err != HUBBUB_OK) { hubbub_ns ns; @@ -1082,6 +1122,7 @@ hubbub_error process_applet_marquee_object_in_body( err = formatting_list_append(treebuilder, token->data.tag.ns, type, treebuilder->context.element_stack[ treebuilder->context.current_node].node, + token->data.tag.attributes, token->data.tag.n_attributes, treebuilder->context.current_node); if (err != HUBBUB_OK) { hubbub_ns ns; @@ -1877,6 +1918,8 @@ hubbub_error process_0presentational_in_body(hubbub_treebuilder *treebuilder, * we insert an entry for clone */ stack[furthest_block + 1].type = entry->details.type; stack[furthest_block + 1].node = clone_appended; + hubbub_attribute *attrs = entry->details.attributes; + size_t n_attrs = entry->details.n_attributes; /* 11 */ err = formatting_list_remove(treebuilder, entry, @@ -1888,7 +1931,8 @@ hubbub_error process_0presentational_in_body(hubbub_treebuilder *treebuilder, err = formatting_list_insert(treebuilder, bookmark.prev, bookmark.next, - ons, otype, clone_appended, furthest_block + 1); + ons, otype, clone_appended, attrs, n_attrs, + furthest_block + 1); if (err != HUBBUB_OK) { treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, @@ -2268,7 +2312,8 @@ hubbub_error aa_clone_and_replace_entries(hubbub_treebuilder *treebuilder, /* Replace formatting list entry for node with clone */ err = formatting_list_replace(treebuilder, element, element->details.ns, element->details.type, - clone, element->stack_index, + clone, element->details.attributes, + element->details.n_attributes, element->stack_index, &ons, &otype, &onode, &oindex); assert(err == HUBBUB_OK); diff --git a/src/treebuilder/in_row.c b/src/treebuilder/in_row.c index 3bf9161..0446e6f 100644 --- a/src/treebuilder/in_row.c +++ b/src/treebuilder/in_row.c @@ -106,6 +106,8 @@ hubbub_error handle_in_row(hubbub_treebuilder *treebuilder, token->data.tag.ns, type, treebuilder->context.element_stack[ treebuilder->context.current_node].node, + token->data.tag.attributes, + token->data.tag.n_attributes, treebuilder->context.current_node); if (err != HUBBUB_OK) { hubbub_ns ns; diff --git a/src/treebuilder/in_table.c b/src/treebuilder/in_table.c index 875e8ae..cf61827 100644 --- a/src/treebuilder/in_table.c +++ b/src/treebuilder/in_table.c @@ -121,6 +121,8 @@ hubbub_error handle_in_table(hubbub_treebuilder *treebuilder, token->data.tag.ns, type, treebuilder->context.element_stack[ treebuilder->context.current_node].node, + token->data.tag.attributes, + token->data.tag.n_attributes, treebuilder->context.current_node); if (err != HUBBUB_OK) { treebuilder->tree_handler->unref_node( diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h index 941f94a..f1314bb 100644 --- a/src/treebuilder/internal.h +++ b/src/treebuilder/internal.h @@ -57,6 +57,10 @@ typedef struct element_context * instead of the current node." */ void *node; /**< Node pointer */ + hubbub_attribute *attributes; /**< The attributes associated with + * element*/ + size_t n_attributes; /**< Number of attributes associated + * with the element*/ } element_context; /** @@ -170,7 +174,8 @@ bool is_formatting_element(element_type type); bool is_phrasing_element(element_type type); hubbub_error element_stack_push(hubbub_treebuilder *treebuilder, - hubbub_ns ns, element_type type, void *node); + hubbub_ns ns, element_type type, void *node, + hubbub_attribute *attrs, size_t n_attrs); hubbub_error element_stack_pop(hubbub_treebuilder *treebuilder, hubbub_ns *ns, element_type *type, void **node); hubbub_error element_stack_pop_until(hubbub_treebuilder *treebuilder, @@ -183,12 +188,12 @@ element_type current_node(hubbub_treebuilder *treebuilder); element_type prev_node(hubbub_treebuilder *treebuilder); hubbub_error formatting_list_append(hubbub_treebuilder *treebuilder, - hubbub_ns ns, element_type type, void *node, - uint32_t stack_index); + hubbub_ns ns, element_type type, void *node, hubbub_attribute *attrs, + size_t n_attrs, uint32_t stack_index); hubbub_error formatting_list_insert(hubbub_treebuilder *treebuilder, formatting_list_entry *prev, formatting_list_entry *next, - hubbub_ns ns, element_type type, void *node, - uint32_t stack_index); + hubbub_ns ns, element_type type, void *node, hubbub_attribute *attrs, + size_t n_attrs, uint32_t stack_index); hubbub_error formatting_list_remove(hubbub_treebuilder *treebuilder, formatting_list_entry *entry, hubbub_ns *ns, element_type *type, void **node, @@ -196,9 +201,12 @@ hubbub_error formatting_list_remove(hubbub_treebuilder *treebuilder, hubbub_error formatting_list_replace(hubbub_treebuilder *treebuilder, formatting_list_entry *entry, hubbub_ns ns, element_type type, void *node, + hubbub_attribute *attrs, size_t n_attrs, uint32_t stack_index, hubbub_ns *ons, element_type *otype, void **onode, uint32_t *ostack_index); +void copy_attribute(hubbub_attribute *source, + hubbub_attribute *sink); /* in_foreign_content.c */ void adjust_mathml_attributes(hubbub_treebuilder *treebuilder, hubbub_tag *tag); diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c index 11ff2a2..32d7932 100644 --- a/src/treebuilder/treebuilder.c +++ b/src/treebuilder/treebuilder.c @@ -5,6 +5,8 @@ * Copyright 2008 John-Mark Bell */ +#define _GNU_SOURCE + #include #include @@ -137,6 +139,8 @@ hubbub_error hubbub_treebuilder_create(hubbub_tokeniser *tokeniser, * if the first item in the stack is in use. Assert this here. */ assert(HTML != 0); tb->context.element_stack[0].type = (element_type) 0; + tb->context.element_stack[0].attributes = NULL; + tb->context.element_stack[0].n_attributes = 0; tb->context.strip_leading_lr = false; tb->context.frameset_ok = true; @@ -687,7 +691,9 @@ hubbub_error reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder) goto cleanup; error = element_stack_push(treebuilder, entry->details.ns, - entry->details.type, appended); + entry->details.type, appended, + entry->details.attributes, + entry->details.n_attributes); if (error != HUBBUB_OK) { remove_node_from_dom(treebuilder, appended); @@ -716,8 +722,9 @@ hubbub_error reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder) error = formatting_list_replace(treebuilder, entry, entry->details.ns, entry->details.type, - node, sp, - &prev_ns, &prev_type, &prev_node, + node, entry->details.attributes, + entry->details.n_attributes, + sp, &prev_ns, &prev_type, &prev_node, &prev_stack_index); /* Cannot fail. Ensure this. */ assert(error == HUBBUB_OK); @@ -880,7 +887,8 @@ hubbub_error insert_element(hubbub_treebuilder *treebuilder, if (push) { error = element_stack_push(treebuilder, - tag->ns, type, appended); + tag->ns, type, appended, + tag->attributes, tag->n_attributes); if (error != HUBBUB_OK) { remove_node_from_dom(treebuilder, appended); @@ -1154,9 +1162,11 @@ bool is_form_associated(element_type type) * \return HUBBUB_OK on success, appropriate error otherwise. */ hubbub_error element_stack_push(hubbub_treebuilder *treebuilder, - hubbub_ns ns, element_type type, void *node) + hubbub_ns ns, element_type type, void *node, + hubbub_attribute *attrs, size_t n_attrs) { uint32_t slot = treebuilder->context.current_node + 1; + size_t i; if (slot >= treebuilder->context.stack_alloc) { element_context *temp = realloc( @@ -1176,6 +1186,16 @@ hubbub_error element_stack_push(hubbub_treebuilder *treebuilder, treebuilder->context.element_stack[slot].type = type; treebuilder->context.element_stack[slot].node = node; + if(n_attrs > 0) { + treebuilder->context.element_stack[slot].attributes = + (hubbub_attribute *)malloc(n_attrs * sizeof(hubbub_attribute)); + } + for (i = 0; i < n_attrs; i++) { + copy_attribute(&attrs[i], + &treebuilder->context.element_stack[slot].attributes[i]); + } + treebuilder->context.element_stack[slot].n_attributes = n_attrs; + treebuilder->context.current_node = slot; return HUBBUB_OK; @@ -1371,11 +1391,14 @@ element_type prev_node(hubbub_treebuilder *treebuilder) */ hubbub_error formatting_list_append(hubbub_treebuilder *treebuilder, hubbub_ns ns, element_type type, void *node, + hubbub_attribute *attrs, size_t n_attrs, uint32_t stack_index) { formatting_list_entry *entry; uint32_t n_elements = 0; formatting_list_entry *remove_entry; + size_t i; + for (entry = treebuilder->context.formatting_list_end; entry != NULL; entry = entry->prev) { /* Assumption: HTML and TABLE elements are not in the list */ @@ -1410,6 +1433,15 @@ hubbub_error formatting_list_append(hubbub_treebuilder *treebuilder, entry->details.type = type; entry->details.node = node; entry->stack_index = stack_index; + if(n_attrs > 0) { + entry->details.attributes = + (hubbub_attribute *)malloc(n_attrs * sizeof(hubbub_attribute)); + } + for(i = 0; i < n_attrs; i++) { + copy_attribute(&attrs[i], + &entry->details.attributes[i]); + } + entry->details.n_attributes = n_attrs; entry->prev = treebuilder->context.formatting_list_end; entry->next = NULL; @@ -1439,9 +1471,11 @@ hubbub_error formatting_list_append(hubbub_treebuilder *treebuilder, hubbub_error formatting_list_insert(hubbub_treebuilder *treebuilder, formatting_list_entry *prev, formatting_list_entry *next, hubbub_ns ns, element_type type, void *node, + hubbub_attribute *attrs, size_t n_attrs, uint32_t stack_index) { formatting_list_entry *entry; + size_t i; if (prev != NULL) { assert(prev->next == next); @@ -1459,6 +1493,15 @@ hubbub_error formatting_list_insert(hubbub_treebuilder *treebuilder, entry->details.type = type; entry->details.node = node; entry->stack_index = stack_index; + entry->details.n_attributes = n_attrs; + if(n_attrs > 0) { + entry->details.attributes = (hubbub_attribute *) + malloc(n_attrs * sizeof(hubbub_attribute)); + } + for(i = 0;i < n_attrs; i++) { + copy_attribute(&attrs[i], + &entry->details.attributes[i]); + } entry->prev = prev; entry->next = next; @@ -1530,12 +1573,13 @@ hubbub_error formatting_list_remove(hubbub_treebuilder *treebuilder, hubbub_error formatting_list_replace(hubbub_treebuilder *treebuilder, formatting_list_entry *entry, hubbub_ns ns, element_type type, void *node, + hubbub_attribute *attrs, size_t n_attrs, uint32_t stack_index, hubbub_ns *ons, element_type *otype, void **onode, uint32_t *ostack_index) { UNUSED(treebuilder); - + size_t i = 0; *ons = entry->details.ns; *otype = entry->details.type; *onode = entry->details.node; @@ -1544,11 +1588,36 @@ hubbub_error formatting_list_replace(hubbub_treebuilder *treebuilder, entry->details.ns = ns; entry->details.type = type; entry->details.node = node; + entry->details.n_attributes = n_attrs; + if(n_attrs > 0) { + entry->details.attributes = + (hubbub_attribute *)malloc(n_attrs * sizeof(hubbub_attribute)); + } + for(i = 0;i < n_attrs;i++) { + copy_attribute(&attrs[i], + &entry->details.attributes[i]); + } entry->stack_index = stack_index; return HUBBUB_OK; } +void copy_attribute(hubbub_attribute *source, + hubbub_attribute *sink) { + + sink->ns = source->ns; + + sink->name.ptr = (const uint8_t *) strndup( + (const char *) source->name.ptr, + source->name.len); + sink->name.len = source->name.len; + + sink->value.ptr = (const uint8_t *) strndup( + (const char *) source->value.ptr, + source->value.len); + sink->value.len = source->value.len; + return; +} #ifndef NDEBUG -- cgit v1.2.3