From 923baf72744a3b91a82266e535fb45fa66d9cc19 Mon Sep 17 00:00:00 2001 From: Andrew Sidwell Date: Thu, 31 Jul 2008 15:11:12 +0000 Subject: Add my cobbled-together perftests. svn path=/trunk/hubbub/; revision=4842 --- perf/README | 27 +++ perf/html5libtest.py | 12 ++ perf/hubbub.c | 492 +++++++++++++++++++++++++++++++++++++++++++++++++++ perf/libxml2.c | 52 ++++++ perf/makefile | 17 ++ 5 files changed, 600 insertions(+) create mode 100644 perf/README create mode 100755 perf/html5libtest.py create mode 100644 perf/hubbub.c create mode 100644 perf/libxml2.c create mode 100644 perf/makefile diff --git a/perf/README b/perf/README new file mode 100644 index 0000000..1e9e847 --- /dev/null +++ b/perf/README @@ -0,0 +1,27 @@ +This directory contains some very basic cobbled-together performance tests. +A makefile is provided for generating the executables from the .c files. + + +html5libtest.py +--------------- + + This tests the Python html5lib project, obtained from: + http://code.google.com/p/html5lib/ + + This is generally quite a slow parser. :) + + +libxml2.c +--------- + + This tests the GNOME libxml2 HTML parser, using mmap(). It doesn't do + anything with the resulting tree, just generates one. + + +hubbub.c +-------- + + This tests hubbub, using mmap(), and a modified version of the test + treebuilder. It could certainly be made more efficient (it's based on + an old version of the tree construction testrunner) so should not be + compared too harshly against the libxml2 results. diff --git a/perf/html5libtest.py b/perf/html5libtest.py new file mode 100755 index 0000000..3630fe8 --- /dev/null +++ b/perf/html5libtest.py @@ -0,0 +1,12 @@ +#!/usr/bin/python + +import sys +import html5lib + +if len(sys.argv) != 2: + print "Usage: %s " % sys.argv[0] + sys.exit(1) + +f = open(sys.argv[1]) +parser = html5lib.HTMLParser() +document = parser.parse(f) diff --git a/perf/hubbub.c b/perf/hubbub.c new file mode 100644 index 0000000..c273377 --- /dev/null +++ b/perf/hubbub.c @@ -0,0 +1,492 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +typedef struct attr_t attr_t; +typedef struct node_t node_t; +typedef struct buf_t buf_t; + +struct attr_t { + hubbub_ns ns; + char *name; + char *value; +}; + +struct node_t { + enum { DOCTYPE, COMMENT, ELEMENT, CHARACTER } type; + + union { + struct { + char *name; + char *public_id; + char *system_id; + } doctype; + + struct { + hubbub_ns ns; + char *name; + attr_t *attrs; + size_t n_attrs; + } element; + + char *content; /**< For comments, characters **/ + } data; + + node_t *next; + node_t *prev; + + node_t *child; + node_t *parent; +}; + +struct buf_t { + char *buf; + size_t len; + size_t pos; +}; + + +#define NUM_NAMESPACES 7 +const char const *ns_names[NUM_NAMESPACES] = + { NULL, NULL /*html*/, "math", "svg", "xlink", "xml", "xmlns" }; + + +node_t *Document; + + + +static void node_print(buf_t *buf, node_t *node, unsigned depth); + +static int create_comment(void *ctx, const hubbub_string *data, void **result); +static int create_doctype(void *ctx, const hubbub_doctype *doctype, + void **result); +static int create_element(void *ctx, const hubbub_tag *tag, void **result); +static int create_text(void *ctx, const hubbub_string *data, void **result); +static int ref_node(void *ctx, void *node); +static int unref_node(void *ctx, void *node); +static int append_child(void *ctx, void *parent, void *child, void **result); +static int insert_before(void *ctx, void *parent, void *child, void *ref_child, + void **result); +static int remove_child(void *ctx, void *parent, void *child, void **result); +static int clone_node(void *ctx, void *node, bool deep, void **result); +static int reparent_children(void *ctx, void *node, void *new_parent); +static int get_parent(void *ctx, void *node, bool element_only, void **result); +static int has_children(void *ctx, void *node, bool *result); +static int form_associate(void *ctx, void *form, void *node); +static int add_attributes(void *ctx, void *node, + const hubbub_attribute *attributes, uint32_t n_attributes); +static int set_quirks_mode(void *ctx, hubbub_quirks_mode mode); + +static hubbub_tree_handler tree_handler = { + create_comment, + create_doctype, + create_element, + create_text, + ref_node, + unref_node, + append_child, + insert_before, + remove_child, + clone_node, + reparent_children, + get_parent, + has_children, + form_associate, + add_attributes, + set_quirks_mode, + NULL +}; + +static void *myrealloc(void *ptr, size_t len, void *pw) +{ + return realloc(ptr, len); +} + + + +int main(int argc, char **argv) +{ + hubbub_parser *parser; + hubbub_parser_optparams params; + + struct stat info; + int fd; + uint8_t *file; + + if (argc != 3) { + printf("Usage: %s \n", argv[0]); + return 1; + } + + /* Initialise library */ + assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK); + + parser = hubbub_parser_create("UTF-8", "UTF-8", myrealloc, NULL); + assert(parser != NULL); + + params.tree_handler = &tree_handler; + assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TREE_HANDLER, + ¶ms) == HUBBUB_OK); + + params.document_node = (void *)1; + assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_DOCUMENT_NODE, + ¶ms) == HUBBUB_OK); + + stat(argv[2], &info); + fd = open(argv[2], 0); + file = mmap(NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0); + + assert(hubbub_parser_parse_chunk(parser, file, info.st_size) + == HUBBUB_OK); + + assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK); + + return 0; +} + + +/*** Tree construction functions ***/ + +int create_comment(void *ctx, const hubbub_string *data, void **result) +{ + node_t *node = calloc(1, sizeof *node); + + node->type = COMMENT; + node->data.content = strndup((const char *) data->ptr, data->len); + + *result = node; + + return 0; +} + +int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result) +{ + node_t *node = calloc(1, sizeof *node); + + node->type = DOCTYPE; + node->data.doctype.name = strndup( + (const char *) doctype->name.ptr, + doctype->name.len); + + if (!doctype->public_missing) { + node->data.doctype.public_id = strndup( + (const char *) doctype->public_id.ptr, + doctype->public_id.len); + } + + if (!doctype->system_missing) { + node->data.doctype.system_id = strndup( + (const char *) doctype->system_id.ptr, + doctype->system_id.len); + } + + *result = node; + + return 0; +} + +int create_element(void *ctx, const hubbub_tag *tag, void **result) +{ + node_t *node = calloc(1, sizeof *node); + + assert(tag->ns < NUM_NAMESPACES); + + node->type = ELEMENT; + node->data.element.ns = tag->ns; + node->data.element.name = strndup( + (const char *) tag->name.ptr, + tag->name.len); + node->data.element.n_attrs = tag->n_attributes; + + node->data.element.attrs = calloc(node->data.element.n_attrs, + sizeof *node->data.element.attrs); + + for (size_t i = 0; i < tag->n_attributes; i++) { + attr_t *attr = &node->data.element.attrs[i]; + + assert(tag->attributes[i].ns < NUM_NAMESPACES); + + attr->ns = tag->attributes[i].ns; + + attr->name = strndup( + (const char *) tag->attributes[i].name.ptr, + tag->attributes[i].name.len); + + attr->value = strndup( + (const char *) tag->attributes[i].value.ptr, + tag->attributes[i].value.len); + } + + *result = node; + + return 0; +} + +int create_text(void *ctx, const hubbub_string *data, void **result) +{ + node_t *node = calloc(1, sizeof *node); + + node->type = CHARACTER; + node->data.content = strndup((const char *) data->ptr, data->len); + + *result = node; + + return 0; +} + +int ref_node(void *ctx, void *node) +{ + return 0; +} + +int unref_node(void *ctx, void *node) +{ + return 0; +} + +int append_child(void *ctx, void *parent, void *child, void **result) +{ + node_t *tparent = parent; + node_t *tchild = child; + + node_t *insert = NULL; + + tchild->parent = tparent; + tchild->next = tchild->prev = NULL; + + *result = child; + + if (parent == (void *)1) { + if (Document) { + insert = Document; + } else { + Document = tchild; + } + } else { + if (tparent->child == NULL) { + tparent->child = tchild; + } else { + insert = tparent->child; + } + } + + if (insert) { + while (insert->next != NULL) { + insert = insert->next; + } + + if (tchild->type == CHARACTER && insert->type == CHARACTER) { + insert->data.content = realloc(insert->data.content, + strlen(insert->data.content) + + strlen(tchild->data.content) + 1); + strcat(insert->data.content, tchild->data.content); + *result = insert; + } else { + insert->next = tchild; + tchild->prev = insert; + } + } + + return 0; +} + +/* insert 'child' before 'ref_child', under 'parent' */ +int insert_before(void *ctx, void *parent, void *child, void *ref_child, + void **result) +{ + node_t *tparent = parent; + node_t *tchild = child; + node_t *tref = ref_child; + + if (tchild->type == CHARACTER && tref->prev && + tref->prev->type == CHARACTER) { + node_t *insert = tref->prev; + + insert->data.content = realloc(insert->data.content, + strlen(insert->data.content) + + strlen(tchild->data.content) + 1); + strcat(insert->data.content, tchild->data.content); + + *result = insert; + } else { + tchild->parent = parent; + + tchild->prev = tref->prev; + tchild->next = tref; + tref->prev = tchild; + + if (tchild->prev) + tchild->prev->next = tchild; + else + tparent->child = tchild; + + *result = child; + } + + return 0; +} + +int remove_child(void *ctx, void *parent, void *child, void **result) +{ + node_t *tparent = parent; + node_t *tchild = child; + + assert(tparent->child); + assert(tchild->parent == tparent); + + if (tchild->parent->child == tchild) { + tchild->parent->child = tchild->next; + } + + if (tchild->prev) + tchild->prev->next = tchild->next; + + if (tchild->next) + tchild->next->prev = tchild->prev; + + /* now reset all the child's pointers */ + tchild->next = tchild->prev = tchild->parent = NULL; + + *result = child; + + return 0; +} + +int clone_node(void *ctx, void *node, bool deep, void **result) +{ + node_t *old_node = node; + node_t *new_node = calloc(1, sizeof *new_node); + + *new_node = *old_node; + *result = new_node; + + new_node->child = new_node->parent = + new_node->next = new_node->prev = + NULL; + + if (deep == false) + return 0; + + if (old_node->next) { + void *n; + + clone_node(ctx, old_node->next, true, &n); + + new_node->next = n; + new_node->next->prev = new_node; + } + + if (old_node->child) { + void *n; + + clone_node(ctx, old_node->child, true, &n); + + new_node->child = n; + new_node->child->parent = new_node; + } + + return 0; +} + +/* Take all of the child nodes of "node" and append them to "new_parent" */ +int reparent_children(void *ctx, void *node, void *new_parent) +{ + node_t *parent = new_parent; + node_t *old_parent = node; + + node_t *insert; + node_t *kids; + + kids = old_parent->child; + if (!kids) return 0; + + old_parent->child = NULL; + + insert = parent->child; + if (!insert) { + parent->child = kids; + } else { + while (insert->next != NULL) { + insert = insert->next; + } + + insert->next = kids; + kids->prev = insert; + } + + while (kids) { + kids->parent = parent; + kids = kids->next; + } + + return 0; +} + +int get_parent(void *ctx, void *node, bool element_only, void **result) +{ + *result = ((node_t *)node)->parent; + + return 0; +} + +int has_children(void *ctx, void *node, bool *result) +{ + *result = ((node_t *)node)->child ? true : false; + + return 0; +} + +int form_associate(void *ctx, void *form, void *node) +{ + return 0; +} + +int add_attributes(void *ctx, void *vnode, + const hubbub_attribute *attributes, uint32_t n_attributes) +{ + node_t *node = vnode; + size_t old_elems = node->data.element.n_attrs; + + node->data.element.n_attrs += n_attributes; + + node->data.element.attrs = realloc(node->data.element.attrs, + node->data.element.n_attrs * + sizeof *node->data.element.attrs); + + for (size_t i = 0; i < n_attributes; i++) { + attr_t *attr = &node->data.element.attrs[old_elems + i]; + + assert(attributes[i].ns < NUM_NAMESPACES); + + attr->ns = attributes[i].ns; + + attr->name = strndup( + (const char *) attributes[i].name.ptr, + attributes[i].name.len); + + attr->value = strndup( + (const char *) attributes[i].value.ptr, + attributes[i].value.len); + } + + + return 0; +} + +int set_quirks_mode(void *ctx, hubbub_quirks_mode mode) +{ + return 0; +} diff --git a/perf/libxml2.c b/perf/libxml2.c new file mode 100644 index 0000000..cd5ad45 --- /dev/null +++ b/perf/libxml2.c @@ -0,0 +1,52 @@ +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +int main(int argc, char **argv) +{ + htmlDocPtr doc; + + struct stat info; + int fd; + char *file; + + if (argc != 2) { + printf("Usage: %s \n", argv[0]); + return 1; + } + + /* libxml hack */ + LIBXML_TEST_VERSION + + + stat(argv[1], &info); + fd = open(argv[1], 0); + file = mmap(NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0); + + doc = htmlReadMemory(file, info.st_size, NULL, NULL, + HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | + HTML_PARSE_NOWARNING); +#if 0 + doc = htmlReadFile(argv[1], NULL, HTML_PARSE_RECOVER | + HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING); +#endif + if (!doc) { + printf("FAIL\n"); + return 1; + } + + xmlFreeDoc(doc); + + xmlCleanupParser(); + + return 0; +} + diff --git a/perf/makefile b/perf/makefile new file mode 100644 index 0000000..7304ebb --- /dev/null +++ b/perf/makefile @@ -0,0 +1,17 @@ +all: libxml2 hubbub + +CC = gcc +CFLAGS = -W -Wall --std=c99 + +LIBXML2_OBJS = libxml2.o +libxml2: libxml2.c +libxml2: CFLAGS += `pkg-config libxml-2.0 --cflags` +libxml2: $(LIBXML2_OBJS) + gcc -o libxml2 $(LIBXML2_OBJS) `pkg-config libxml-2.0 --libs` + + +HUBBUB_OBJS = hubbub.o +hubbub: hubbub.c +hubbub: CFLAGS += `pkg-config --cflags libparserutils libhubbub` +hubbub: $(HUBBUB_OBJS) + gcc -o hubbub $(HUBBUB_OBJS) `pkg-config --libs libhubbub libparserutils` -- cgit v1.2.3