From 625179d93958610fd854f4eba5b70a0aa3e6fecf Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Fri, 7 Mar 2008 12:39:12 +0000 Subject: Beginnings of a tree builder. Distinct lack of any real functionality beyond creation/destruction & option setting. svn path=/trunk/hubbub/; revision=3894 --- include/hubbub/functypes.h | 61 ++++++++++++- include/hubbub/parser.h | 6 +- include/hubbub/tree.h | 31 +++++++ src/Makefile | 3 + src/parser.c | 67 +++++++++++++-- src/treebuilder/Makefile | 53 ++++++++++++ src/treebuilder/treebuilder.c | 196 ++++++++++++++++++++++++++++++++++++++++++ src/treebuilder/treebuilder.h | 62 +++++++++++++ 8 files changed, 468 insertions(+), 11 deletions(-) create mode 100644 include/hubbub/tree.h create mode 100644 src/treebuilder/Makefile create mode 100644 src/treebuilder/treebuilder.c create mode 100644 src/treebuilder/treebuilder.h diff --git a/include/hubbub/functypes.h b/include/hubbub/functypes.h index aa3e649..8d7f199 100644 --- a/include/hubbub/functypes.h +++ b/include/hubbub/functypes.h @@ -2,12 +2,13 @@ * This file is part of Hubbub. * Licensed under the MIT License, * http://www.opensource.org/licenses/mit-license.php - * Copyright 2007 John-Mark Bell + * Copyright 2007-8 John-Mark Bell */ #ifndef hubbub_functypes_h_ #define hubbub_functypes_h_ +#include #include #include @@ -32,6 +33,64 @@ typedef void (*hubbub_buffer_handler)(const uint8_t *data, typedef void (*hubbub_error_handler)(uint32_t line, uint32_t col, const char *message, void *pw); +/** + * Type of tree comment node creation function + */ +typedef int (*hubbub_tree_create_comment)(void *ctx, const hubbub_string *data, + void **result); + +/** + * Type of tree doctype node creation function + */ +typedef int (*hubbub_tree_create_doctype)(void *ctx, const hubbub_string *qname, + const hubbub_string *public_id, const hubbub_string *system_id, + void **result); + +/** + * Type of tree element node creation function + */ +typedef int (*hubbub_tree_create_element)(void *ctx, + const hubbub_string *tag_name, void **result); + +/** + * Type of tree text node creation function + */ +typedef int (*hubbub_tree_create_text)(void *ctx, const hubbub_string *data, + void **result); + +/** + * Type of tree node destruction function + */ +typedef int (*hubbub_tree_free_node)(void *ctx, void *node); + +/** + * Type of tree node appending function + */ +typedef int (*hubbub_tree_append_child)(void *ctx, void *parent, void *child, + void **result); + +/** + * Type of tree node insertion function + */ +typedef int (*hubbub_tree_insert_before)(void *ctx, void *parent, void *child, + void *ref_child, void **result); + +/** + * Type of tree node removal function + */ +typedef int (*hubbub_tree_remove_child)(void *ctx, void *parent, void *child, + void **result); + +/** + * Type of tree node cloning function + */ +typedef int (*hubbub_tree_clone_node)(void *ctx, void *node, bool deep, + void **result); + +/** + * Type of tree quirks mode notification function + */ +typedef int (*hubbub_tree_set_quirks_mode)(void *ctx, bool quirky); #endif diff --git a/include/hubbub/parser.h b/include/hubbub/parser.h index cdf8664..134f4b7 100644 --- a/include/hubbub/parser.h +++ b/include/hubbub/parser.h @@ -2,7 +2,7 @@ * This file is part of Hubbub. * Licensed under the MIT License, * http://www.opensource.org/licenses/mit-license.php - * Copyright 2007 John-Mark Bell + * Copyright 2007-8 John-Mark Bell */ #ifndef hubbub_parser_h_ @@ -12,6 +12,7 @@ #include #include +#include #include typedef struct hubbub_parser hubbub_parser; @@ -24,6 +25,7 @@ typedef enum hubbub_parser_opttype { HUBBUB_PARSER_BUFFER_HANDLER, HUBBUB_PARSER_ERROR_HANDLER, HUBBUB_PARSER_CONTENT_MODEL, + HUBBUB_PARSER_TREE_HANDLER, } hubbub_parser_opttype; /** @@ -48,6 +50,8 @@ typedef union hubbub_parser_optparams { struct { hubbub_content_model model; } content_model; + + hubbub_tree_handler tree_handler; } hubbub_parser_optparams; /* Create a hubbub parser */ diff --git a/include/hubbub/tree.h b/include/hubbub/tree.h new file mode 100644 index 0000000..a883f1a --- /dev/null +++ b/include/hubbub/tree.h @@ -0,0 +1,31 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#ifndef hubbub_tree_h_ +#define hubbub_tree_h_ + +#include + +/** + * Hubbub tree handler + */ +typedef struct hubbub_tree_handler { + hubbub_tree_create_comment create_comment; + hubbub_tree_create_doctype create_doctype; + hubbub_tree_create_element create_element; + hubbub_tree_create_text create_text; + hubbub_tree_free_node free_node; + hubbub_tree_append_child append_child; + hubbub_tree_insert_before insert_before; + hubbub_tree_remove_child remove_child; + hubbub_tree_clone_node clone_node; + hubbub_tree_set_quirks_mode set_quirks_mode; + void *ctx; +} hubbub_tree_handler; + +#endif + diff --git a/src/Makefile b/src/Makefile index b72a9e0..7af11a4 100644 --- a/src/Makefile +++ b/src/Makefile @@ -37,6 +37,7 @@ release: $(addprefix Release/, $(addsuffix .o, $(OBJS))) @${MAKE} -C charset release @${MAKE} -C input release @${MAKE} -C tokeniser release + @${MAKE} -C treebuilder release @${MAKE} -C utils release @${AR} ${ARFLAGS} $(RELEASE) Release/* @@ -44,6 +45,7 @@ debug: $(addprefix Debug/, $(addsuffix .o, $(OBJS))) @${MAKE} -C charset debug @${MAKE} -C input debug @${MAKE} -C tokeniser debug + @${MAKE} -C treebuilder debug @${MAKE} -C utils debug @${AR} ${ARFLAGS} $(DEBUG) Debug/* @@ -51,6 +53,7 @@ clean: @${MAKE} -C charset clean @${MAKE} -C input clean @${MAKE} -C tokeniser clean + @${MAKE} -C treebuilder clean @${MAKE} -C utils clean -@${RM} ${RMFLAGS} $(addprefix Release/, $(addsuffix .o, ${OBJS})) -@${RM} ${RMFLAGS} $(addprefix Debug/, $(addsuffix .o, ${OBJS})) diff --git a/src/parser.c b/src/parser.c index 258067f..23702e1 100644 --- a/src/parser.c +++ b/src/parser.c @@ -2,13 +2,14 @@ * This file is part of Hubbub. * Licensed under the MIT License, * http://www.opensource.org/licenses/mit-license.php - * Copyright 2007 John-Mark Bell + * Copyright 2007-8 John-Mark Bell */ #include #include "input/inputstream.h" #include "tokeniser/tokeniser.h" +#include "treebuilder/treebuilder.h" /** * Hubbub parser object @@ -16,6 +17,7 @@ struct hubbub_parser { hubbub_inputstream *stream; /**< Input stream instance */ hubbub_tokeniser *tok; /**< Tokeniser instance */ + hubbub_treebuilder *tb; /**< Treebuilder instance */ hubbub_alloc alloc; /**< Memory (de)allocation function */ void *pw; /**< Client data */ @@ -55,6 +57,14 @@ hubbub_parser *hubbub_parser_create(const char *enc, const char *int_enc, return NULL; } + parser->tb = hubbub_treebuilder_create(parser->tok, alloc, pw); + if (parser->tb == NULL) { + hubbub_tokeniser_destroy(parser->tok); + hubbub_inputstream_destroy(parser->stream); + alloc(parser, 0, pw); + return NULL; + } + parser->alloc = alloc; parser->pw = pw; @@ -71,6 +81,8 @@ void hubbub_parser_destroy(hubbub_parser *parser) if (parser == NULL) return; + hubbub_treebuilder_destroy(parser->tb); + hubbub_tokeniser_destroy(parser->tok); hubbub_inputstream_destroy(parser->stream); @@ -90,30 +102,67 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, hubbub_parser_opttype type, hubbub_parser_optparams *params) { - hubbub_tokeniser_opttype toktype; + hubbub_error result = HUBBUB_OK;; if (parser == NULL || params == NULL) return HUBBUB_BADPARM; switch (type) { case HUBBUB_PARSER_TOKEN_HANDLER: - toktype = HUBBUB_TOKENISER_TOKEN_HANDLER; + if (parser->tb != NULL) { + /* Client is defining their own token handler, + * so we must destroy the default treebuilder */ + hubbub_treebuilder_destroy(parser->tb); + parser->tb = NULL; + } + result = hubbub_tokeniser_setopt(parser->tok, + HUBBUB_TOKENISER_TOKEN_HANDLER, + (hubbub_tokeniser_optparams *) params); break; case HUBBUB_PARSER_BUFFER_HANDLER: - toktype = HUBBUB_TOKENISER_BUFFER_HANDLER; + /* The buffer handler cascades, so if there's a treebuilder, + * simply inform that. Otherwise, tell the tokeniser. */ + if (parser->tb != NULL) { + result = hubbub_treebuilder_setopt(parser->tb, + HUBBUB_TREEBUILDER_BUFFER_HANDLER, + (hubbub_treebuilder_optparams *) params); + } else { + result = hubbub_tokeniser_setopt(parser->tok, + HUBBUB_TOKENISER_BUFFER_HANDLER, + (hubbub_tokeniser_optparams *) params); + } break; case HUBBUB_PARSER_ERROR_HANDLER: - toktype = HUBBUB_TOKENISER_BUFFER_HANDLER; + /* The error handler does not cascade, so tell both the + * treebuilder (if extant) and the tokeniser. */ + if (parser->tb != NULL) { + result = hubbub_treebuilder_setopt(parser->tb, + HUBBUB_TREEBUILDER_ERROR_HANDLER, + (hubbub_treebuilder_optparams *) params); + } + if (result == HUBBUB_OK) { + result = hubbub_tokeniser_setopt(parser->tok, + HUBBUB_TOKENISER_ERROR_HANDLER, + (hubbub_tokeniser_optparams *) params); + } break; case HUBBUB_PARSER_CONTENT_MODEL: - toktype = HUBBUB_TOKENISER_CONTENT_MODEL; + result = hubbub_tokeniser_setopt(parser->tok, + HUBBUB_TOKENISER_CONTENT_MODEL, + (hubbub_tokeniser_optparams *) params); + break; + case HUBBUB_PARSER_TREE_HANDLER: + if (parser->tb != NULL) { + result = hubbub_treebuilder_setopt(parser->tb, + HUBBUB_TREEBUILDER_TREE_HANDLER, + (hubbub_treebuilder_optparams *) params); + } break; default: - return HUBBUB_INVALID; + result = HUBBUB_INVALID; } - return hubbub_tokeniser_setopt(parser->tok, toktype, - (hubbub_tokeniser_optparams *) params); + return result; } /** diff --git a/src/treebuilder/Makefile b/src/treebuilder/Makefile new file mode 100644 index 0000000..d63a7a3 --- /dev/null +++ b/src/treebuilder/Makefile @@ -0,0 +1,53 @@ +# Makefile for libhubbub +# +# Toolchain is exported by top-level makefile +# +# Top-level makefile also exports the following variables: +# +# COMPONENT Name of component +# EXPORT Absolute path of export directory +# TOP Absolute path of source tree root +# +# The top-level makefile requires the following targets to exist: +# +# clean Clean source tree +# debug Create a debug binary +# distclean Fully clean source tree, back to pristine condition +# export Export distributable components to ${EXPORT} +# release Create a release binary +# setup Perform any setup required prior to compilation +# test Execute any test cases + +# Manipulate include paths +CFLAGS += -I$(CURDIR) + +# Objects +OBJS = treebuilder + +.PHONY: clean debug distclean export release setup test + +# Targets +release: $(addprefix ../Release/, $(addsuffix .o, $(OBJS))) + +debug: $(addprefix ../Debug/, $(addsuffix .o, $(OBJS))) + +clean: + -@${RM} ${RMFLAGS} $(addprefix ../Release/, $(addsuffix .o, ${OBJS})) + -@${RM} ${RMFLAGS} $(addprefix ../Debug/, $(addsuffix .o, ${OBJS})) + +distclean: + +setup: + +export: + +test: + +# Pattern rules +../Release/%.o: %.c + @${ECHO} ${ECHOFLAGS} "==> $<" + @${CC} -c ${CFLAGS} -DNDEBUG -o $@ $< + +../Debug/%.o: %.c + @${ECHO} ${ECHOFLAGS} "==> $<" + @${CC} -c -g ${CFLAGS} -o $@ $< diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c new file mode 100644 index 0000000..529cd08 --- /dev/null +++ b/src/treebuilder/treebuilder.c @@ -0,0 +1,196 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#include + +#include "treebuilder/treebuilder.h" +#include "utils/utils.h" + +struct hubbub_treebuilder +{ + hubbub_tokeniser *tokeniser; /**< Underlying tokeniser */ + + const uint8_t *input_buffer; /**< Start of tokeniser's buffer */ + size_t input_buffer_len; /**< Length of input buffer */ + + hubbub_tree_handler tree_handler; + + hubbub_buffer_handler buffer_handler; + void *buffer_pw; + + hubbub_error_handler error_handler; + void *error_pw; + + hubbub_alloc alloc; /**< Memory (de)allocation function */ + void *alloc_pw; /**< Client private data */ +}; + +static void hubbub_treebuilder_buffer_handler(const uint8_t *data, + size_t len, void *pw); +static void hubbub_treebuilder_token_handler(const hubbub_token *token, + void *pw); + +/** + * Create a hubbub treebuilder + * + * \param tokeniser Underlying tokeniser instance + * \param alloc Memory (de)allocation function + * \param pw Pointer to client-specific private data + * \return Pointer to treebuilder instance, or NULL on error. + */ +hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser, + hubbub_alloc alloc, void *pw) +{ + hubbub_treebuilder *tb; + hubbub_tokeniser_optparams tokparams; + + if (tokeniser == NULL || alloc == NULL) + return NULL; + + tb = alloc(NULL, sizeof(hubbub_treebuilder), pw); + if (tb == NULL) + return NULL; + + tb->tokeniser = tokeniser; + + tb->input_buffer = NULL; + tb->input_buffer_len = 0; + + memset(&tb->tree_handler, 0, sizeof(hubbub_tree_handler)); + + tb->buffer_handler = NULL; + tb->buffer_pw = NULL; + + tb->error_handler = NULL; + tb->error_pw = NULL; + + tb->alloc = alloc; + tb->alloc_pw = pw; + + tokparams.token_handler.handler = hubbub_treebuilder_token_handler; + tokparams.token_handler.pw = tb; + + if (hubbub_tokeniser_setopt(tokeniser, HUBBUB_TOKENISER_TOKEN_HANDLER, + &tokparams) != HUBBUB_OK) { + alloc(tb, 0, pw); + return NULL; + } + + tokparams.buffer_handler.handler = hubbub_treebuilder_buffer_handler; + tokparams.buffer_handler.pw = tb; + + if (hubbub_tokeniser_setopt(tokeniser, HUBBUB_TOKENISER_BUFFER_HANDLER, + &tokparams) != HUBBUB_OK) { + alloc(tb, 0, pw); + return NULL; + } + + return tb; +} + +/** + * Destroy a hubbub treebuilder + * + * \param treebuilder The treebuilder instance to destroy + */ +void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder) +{ + hubbub_tokeniser_optparams tokparams; + + if (treebuilder == NULL) + return; + + tokparams.buffer_handler.handler = treebuilder->buffer_handler; + tokparams.buffer_handler.pw = treebuilder->buffer_pw; + + hubbub_tokeniser_setopt(treebuilder->tokeniser, + HUBBUB_TOKENISER_BUFFER_HANDLER, &tokparams); + + tokparams.token_handler.handler = NULL; + tokparams.token_handler.pw = NULL; + + hubbub_tokeniser_setopt(treebuilder->tokeniser, + HUBBUB_TOKENISER_TOKEN_HANDLER, &tokparams); + + treebuilder->alloc(treebuilder, 0, treebuilder->alloc_pw); +} + +/** + * Configure a hubbub treebuilder + * + * \param treebuilder The treebuilder instance to configure + * \param type The option type to configure + * \param params Pointer to option-specific parameters + * \return HUBBUB_OK on success, appropriate error otherwise. + */ +hubbub_error hubbub_treebuilder_setopt(hubbub_treebuilder *treebuilder, + hubbub_treebuilder_opttype type, + hubbub_treebuilder_optparams *params) +{ + if (treebuilder == NULL || params == NULL) + return HUBBUB_BADPARM; + + switch (type) { + case HUBBUB_TREEBUILDER_BUFFER_HANDLER: + treebuilder->buffer_handler = params->buffer_handler.handler; + treebuilder->buffer_pw = params->buffer_handler.pw; + treebuilder->buffer_handler(treebuilder->input_buffer, + treebuilder->input_buffer_len, + treebuilder->buffer_pw); + break; + case HUBBUB_TREEBUILDER_ERROR_HANDLER: + treebuilder->error_handler = params->error_handler.handler; + treebuilder->error_pw = params->error_handler.pw; + break; + case HUBBUB_TREEBUILDER_TREE_HANDLER: + treebuilder->tree_handler = params->tree_handler; + break; + } + + return HUBBUB_OK; +} + +/** + * Handle tokeniser buffer moving + * + * \param data New location of buffer + * \param len Length of buffer in bytes + * \param pw Pointer to treebuilder instance + */ +void hubbub_treebuilder_buffer_handler(const uint8_t *data, + size_t len, void *pw) +{ + hubbub_treebuilder *treebuilder = (hubbub_treebuilder *) pw; + + treebuilder->input_buffer = data; + treebuilder->input_buffer_len = len; + + /* Inform client buffer handler, too (if there is one) */ + if (treebuilder->buffer_handler != NULL) { + treebuilder->buffer_handler(treebuilder->input_buffer, + treebuilder->input_buffer_len, + treebuilder->buffer_pw); + } +} + +/** + * Handle tokeniser emitting a token + * + * \param token The emitted token + * \param pw Pointer to treebuilder instance + */ +void hubbub_treebuilder_token_handler(const hubbub_token *token, + void *pw) +{ + hubbub_treebuilder *treebuilder = (hubbub_treebuilder *) pw; + + UNUSED(treebuilder); + UNUSED(token); + + /** \todo implement this */ +} + diff --git a/src/treebuilder/treebuilder.h b/src/treebuilder/treebuilder.h new file mode 100644 index 0000000..0792e99 --- /dev/null +++ b/src/treebuilder/treebuilder.h @@ -0,0 +1,62 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#ifndef hubbub_treebuilder_treebuilder_h_ +#define hubbub_treebuilder_treebuilder_h_ + +#include +#include + +#include +#include +#include +#include + +#include "tokeniser/tokeniser.h" + +typedef struct hubbub_treebuilder hubbub_treebuilder; + +/** + * Hubbub treebuilder option types + */ +typedef enum hubbub_treebuilder_opttype { + HUBBUB_TREEBUILDER_BUFFER_HANDLER, + HUBBUB_TREEBUILDER_ERROR_HANDLER, + HUBBUB_TREEBUILDER_TREE_HANDLER, +} hubbub_treebuilder_opttype; + +/** + * Hubbub treebuilder option parameters + */ +typedef union hubbub_treebuilder_optparams { + struct { + hubbub_buffer_handler handler; + void *pw; + } buffer_handler; + + struct { + hubbub_error_handler handler; + void *pw; + } error_handler; + + hubbub_tree_handler tree_handler; +} hubbub_treebuilder_optparams; + +/* Create a hubbub treebuilder */ +hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser, + hubbub_alloc alloc, void *pw); + +/* Destroy a hubbub treebuilder */ +void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder); + +/* Configure a hubbub treebuilder */ +hubbub_error hubbub_treebuilder_setopt(hubbub_treebuilder *treebuilder, + hubbub_treebuilder_opttype type, + hubbub_treebuilder_optparams *params); + +#endif + -- cgit v1.2.3