From 3ab390780657abb179c5271dedb3a2731be9c3df Mon Sep 17 00:00:00 2001 From: Andrew Sidwell Date: Mon, 7 Jul 2008 18:54:14 +0000 Subject: Add a really basic tree construction test harness. Known problems: - Doesn't compare expected tree with hubbub's tree -- atm it just checks there are no segfaults - Ungracious handling of #document-fragment tests (since hubbub doesn't support them at present) - Leaks memory because it doesn't use refcounting - Doesn't support dynamic adding of attributes. Also, it shows a crash bug in the treebuilder when run. svn path=/trunk/hubbub/; revision=4529 --- test/INDEX | 1 + test/Makefile | 5 +- test/tree2.c | 561 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 565 insertions(+), 2 deletions(-) create mode 100644 test/tree2.c diff --git a/test/INDEX b/test/INDEX index 41d9746..e3522bc 100644 --- a/test/INDEX +++ b/test/INDEX @@ -16,6 +16,7 @@ tokeniser HTML tokeniser html tokeniser2 HTML tokeniser (again) tokeniser2 tokeniser3 HTML tokeniser (byte-by-byte) tokeniser2 tree Treebuilding API html +tree2 Treebuilding API tree-construction # Regression tests regression/cscodec-segv Segfault in charset codecs diff --git a/test/Makefile b/test/Makefile index 00ca279..97648e4 100644 --- a/test/Makefile +++ b/test/Makefile @@ -34,13 +34,14 @@ d := $(DIR) # Extend toolchain settings # We require the presence of libjson -- http://oss.metaparadigm.com/json-c/ CFLAGS := $(CFLAGS) -I$(TOP)/src/ -I$(d) \ - `$(PKGCONFIG) $(PKGCONFIGFLAGS) --cflags json` + `$(PKGCONFIG) $(PKGCONFIGFLAGS) --cflags json` \ + -Wno-unused-parameter LDFLAGS := $(LDFLAGS) `$(PKGCONFIG) $(PKGCONFIGFLAGS) --libs json` # Tests TESTS_$(d) := aliases cscodec csdetect dict entities filter hubbub \ inputstream parser parser-utf16 tokeniser tokeniser2 tokeniser3 \ - tree + tree tree2 TESTS_$(d) := $(TESTS_$(d)) regression/cscodec-segv regression/filter-segv \ regression/stream-nomem diff --git a/test/tree2.c b/test/tree2.c new file mode 100644 index 0000000..807b9a7 --- /dev/null +++ b/test/tree2.c @@ -0,0 +1,561 @@ +/* + * Tree construction tester. + * Leaks a fair bit of memory, because it doesn't make use of reference + * counting. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include + +#include +#include +#include + +#include "utils/utils.h" + +#include "testutils.h" + +typedef struct attr_t attr_t; +typedef struct node_t node_t; + +struct attr_t { + char *name; + char *value; +}; + +struct node_t { + enum { DOCTYPE, COMMENT, ELEMENT, CHARACTER } type; + + union { + struct { + char *name; + char *public_id; + char *system_id; + } doctype; + + struct { + char *name; + attr_t *attrs; + size_t n_attrs; + } element; + + char *content; /**< For comments, characters **/ + } data; + + node_t *next; + node_t *prev; + + node_t *child; + node_t *parent; +}; + +node_t *Document; + + + +static void node_print(node_t *node, unsigned depth); + + +static const uint8_t *pbuffer; + +static void buffer_handler(const uint8_t *buffer, size_t len, void *pw); +static int create_comment(void *ctx, const hubbub_string *data, void **result); +static int create_doctype(void *ctx, const hubbub_string *qname, + const hubbub_string *public_id, const hubbub_string *system_id, + void **result); +static int create_element(void *ctx, const hubbub_tag *tag, void **result); +static int create_text(void *ctx, const hubbub_string *data, void **result); +static int ref_node(void *ctx, void *node); +static int unref_node(void *ctx, void *node); +static int append_child(void *ctx, void *parent, void *child, void **result); +static int insert_before(void *ctx, void *parent, void *child, void *ref_child, + void **result); +static int remove_child(void *ctx, void *parent, void *child, void **result); +static int clone_node(void *ctx, void *node, bool deep, void **result); +static int reparent_children(void *ctx, void *node, void *new_parent); +static int get_parent(void *ctx, void *node, bool element_only, void **result); +static int has_children(void *ctx, void *node, bool *result); +static int form_associate(void *ctx, void *form, void *node); +static int add_attributes(void *ctx, void *node, + const hubbub_attribute *attributes, uint32_t n_attributes); +static int set_quirks_mode(void *ctx, hubbub_quirks_mode mode); + +static hubbub_tree_handler tree_handler = { + create_comment, + create_doctype, + create_element, + create_text, + ref_node, + unref_node, + append_child, + insert_before, + remove_child, + clone_node, + reparent_children, + get_parent, + has_children, + form_associate, + add_attributes, + set_quirks_mode, + NULL +}; + +static void *myrealloc(void *ptr, size_t len, void *pw) +{ + UNUSED(pw); + + return realloc(ptr, len); +} + +static const uint8_t *ptr_from_hubbub_string(const hubbub_string *string) +{ + const uint8_t *data; + + switch (string->type) { + case HUBBUB_STRING_OFF: + data = pbuffer + string->data.off; + break; + case HUBBUB_STRING_PTR: + data = string->data.ptr; + break; + } + + return data; +} + + + +/* + * Create, initialise, and return, a parser instance. + */ +static hubbub_parser *setup_parser(void) +{ + hubbub_parser *parser; + hubbub_parser_optparams params; + + parser = hubbub_parser_create("UTF-8", "UTF-8", myrealloc, NULL); + assert(parser != NULL); + + params.buffer_handler.handler = buffer_handler; + params.buffer_handler.pw = NULL; + assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_BUFFER_HANDLER, + ¶ms) == HUBBUB_OK); + + params.tree_handler = &tree_handler; + assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TREE_HANDLER, + ¶ms) == HUBBUB_OK); + + params.document_node = (void *)1; + assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_DOCUMENT_NODE, + ¶ms) == HUBBUB_OK); + + return parser; +} + + +void buffer_handler(const uint8_t *buffer, size_t len, void *pw) +{ + UNUSED(len); + UNUSED(pw); + + pbuffer = buffer; +} + + +/* States for reading in data from the tree construction file */ +enum reading_state { + EXPECT_DATA, + READING_DATA, + READING_ERRORS, + READING_TREE, +}; + +int main(int argc, char **argv) +{ + FILE *fp; + char line[1024]; + + bool passed = true; + + hubbub_parser *parser; + enum reading_state state = EXPECT_DATA; + + + if (argc != 3) { + printf("Usage: %s \n", argv[0]); + return 1; + } + + /* Initialise library */ + assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK); + + fp = fopen(argv[2], "rb"); + if (fp == NULL) { + printf("Failed opening %s\n", argv[2]); + return 1; + } + + /* We rely on lines not being anywhere near 1024 characters... */ + while (fgets(line, sizeof line, fp) == line) { + switch (state) + { + case EXPECT_DATA: + if (strcmp(line, "#data\n") == 0) { + parser = setup_parser(); + state = READING_DATA; + } + break; + + case READING_DATA: + if (strcmp(line, "#errors\n") == 0) { + assert(hubbub_parser_completed(parser) == HUBBUB_OK); + state = READING_ERRORS; + } else { + size_t len = strlen(line); + + assert(hubbub_parser_parse_chunk(parser, (uint8_t *)line, + len - 1) == HUBBUB_OK); + printf(": %s", line); + } + break; + + case READING_ERRORS: + assert(strcmp(line, "#document-fragment\n") != 0); + + if (strcmp(line, "#document\n") == 0) + state = READING_TREE; + else { + } + break; + + case READING_TREE: + if (line[0] == '|' && line[1] == ' ') { + printf("%s", line); + } else { + node_print(Document, 0); + hubbub_parser_destroy(parser); + + state = EXPECT_DATA; + } + break; + } + } + + printf("%s\n", passed ? "PASS" : "FAIL"); + + assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK); + + return 0; +} + + +/*** Tree construction functions ***/ + +int create_comment(void *ctx, const hubbub_string *data, void **result) +{ + node_t *node = calloc(1, sizeof *node); + + node->type = COMMENT; + node->data.content = strndup((char *)ptr_from_hubbub_string(data), + data->len); + + *result = node; + + return 0; +} + +int create_doctype(void *ctx, const hubbub_string *qname, + const hubbub_string *public_id, const hubbub_string *system_id, + void **result) +{ + node_t *node = calloc(1, sizeof *node); + + node->type = DOCTYPE; + node->data.doctype.name = strndup((char *)ptr_from_hubbub_string(qname), + qname->len); + node->data.doctype.public_id = + strndup((char *)ptr_from_hubbub_string(public_id), + public_id->len); + node->data.doctype.system_id = strndup( + (char *)ptr_from_hubbub_string(system_id), + system_id->len); + + *result = node; + + return 0; +} + +int create_element(void *ctx, const hubbub_tag *tag, void **result) +{ + node_t *node = calloc(1, sizeof *node); + + node->type = ELEMENT; + node->data.element.name = strndup( + (char *)ptr_from_hubbub_string(&tag->name), + tag->name.len); + node->data.element.n_attrs = tag->n_attributes; + + node->data.element.attrs = calloc(node->data.element.n_attrs, + sizeof *node->data.element.attrs); + + for (size_t i = 0; i < tag->n_attributes; i++) { + node->data.element.attrs[i].name = strndup( + (char *)ptr_from_hubbub_string( + &tag->attributes[i].name), + tag->attributes[i].name.len); + + node->data.element.attrs[i].value = strndup( + (char *)ptr_from_hubbub_string( + &tag->attributes[i].value), + tag->attributes[i].value.len); + } + + *result = node; + + return 0; +} + +int create_text(void *ctx, const hubbub_string *data, void **result) +{ + node_t *node = calloc(1, sizeof *node); + + node->type = CHARACTER; + node->data.content = strndup((char *)ptr_from_hubbub_string(data), + data->len); + + *result = node; + + return 0; +} + +int ref_node(void *ctx, void *node) +{ + return 0; +} + +int unref_node(void *ctx, void *node) +{ + return 0; +} + +int append_child(void *ctx, void *parent, void *child, void **result) +{ + node_t *tparent = parent; + node_t *tchild = child; + + if (parent == (void *)1) { + Document = tchild; + } else { + if (tparent->child == NULL) { + tparent->child = tchild; + tchild->parent = tparent; + } else { + node_t *insert = tparent->child; + + while (insert->next != NULL) { + insert = insert->next; + } + + insert->next = tchild; + tchild->prev = insert; + + tchild->parent = tparent; + } + } + + *result = child; + + return 0; +} + +/* insert 'child' before 'ref_child', under 'parent' */ +int insert_before(void *ctx, void *parent, void *child, void *ref_child, + void **result) +{ + node_t *tparent = parent; + node_t *tchild = child; + node_t *tref = ref_child; + + tchild->parent = parent; + + tchild->prev = tref->prev; + tchild->next = tref; + tref->prev = tchild; + + if (tref->prev) + tref->prev->next = tchild; + else + tparent->child = tchild; + + return 0; +} + +int remove_child(void *ctx, void *parent, void *child, void **result) +{ + node_t *tparent = parent; + node_t *tchild = child; + + assert(tparent->child); + + + if (tchild->parent->child == tchild) + tchild->parent->child = tchild->next; + + if (tchild->prev) + tchild->prev->next = tchild->next; + + if (tchild->next) + tchild->next->prev = tchild->prev; + + /* now reset all the child's pointers */ + tchild->next = tchild->prev = tchild->parent = NULL; + + *result = child; + + return 0; +} + +int clone_node(void *ctx, void *node, bool deep, void **result) +{ + node_t *old_node = node; + node_t *new_node = calloc(1, sizeof *new_node); + + *new_node = *old_node; + *result = new_node; + + new_node->child = new_node->parent = + new_node->next = new_node->prev = + NULL; + + if (deep == false) + return 0; + + if (old_node->next) { + void *n; + + clone_node(ctx, old_node->next, true, &n); + + new_node->next = n; + new_node->next->prev = new_node; + } + + if (old_node->child) { + void *n; + + clone_node(ctx, old_node->child, true, &n); + + new_node->child = n; + new_node->child->parent = new_node; + } + + return 0; +} + +/* Reparent "node" to "new_parent" */ +int reparent_children(void *ctx, void *node, void *new_parent) +{ + node_t *tparent = new_parent; + node_t *tchild = node; + void *nnode; + + remove_child(ctx, tchild->parent, tchild, &nnode); + append_child(ctx, tparent, tchild, &nnode); + + return 0; +} + +int get_parent(void *ctx, void *node, bool element_only, void **result) +{ + *result = ((node_t *)node)->parent; + + return 0; +} + +int has_children(void *ctx, void *node, bool *result) +{ + *result = ((node_t *)node)->child ? true : false; + + return 0; +} + +int form_associate(void *ctx, void *form, void *node) +{ + return 0; +} + +int add_attributes(void *ctx, void *node, + const hubbub_attribute *attributes, uint32_t n_attributes) +{ + /* not yet implemented */ + + return 0; +} + +int set_quirks_mode(void *ctx, hubbub_quirks_mode mode) +{ + return 0; +} + + + +/*** Serialising bits ***/ + +static int compare_attrs(const void *a, const void *b) { + const attr_t *first = a; + const attr_t *second = b; + + return strcmp(first->name, second->name); +} + + +static void node_print(node_t *node, unsigned depth) +{ + if (!node) return; + + printf("@ "); + for (unsigned i = 0; i < depth; i++) { + printf(" "); + } + + switch (node->type) + { + case DOCTYPE: + printf("data.element.name); + + qsort(node->data.element.attrs, node->data.element.n_attrs, + sizeof *node->data.element.attrs, + compare_attrs); + + for (size_t i = 0; i < node->data.element.n_attrs; i++) { + printf(" %s=\"%s\"", + node->data.element.attrs[i].name, + node->data.element.attrs[i].value); + } + + printf(">\n"); + + break; + case CHARACTER: + printf("\"%s\"\n", node->data.content); + break; + case COMMENT: + printf("\n", node->data.content); + break; + } + + if (node->child) { + node = node->child; + + while (node) { + node_print(node, depth + 1); + node = node->next; + } + } +} -- cgit v1.2.3