summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile3
-rw-r--r--src/parser.c67
-rw-r--r--src/treebuilder/Makefile53
-rw-r--r--src/treebuilder/treebuilder.c196
-rw-r--r--src/treebuilder/treebuilder.h62
5 files changed, 372 insertions, 9 deletions
diff --git a/src/Makefile b/src/Makefile
index b72a9e0..7af11a4 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -37,6 +37,7 @@ release: $(addprefix Release/, $(addsuffix .o, $(OBJS)))
@${MAKE} -C charset release
@${MAKE} -C input release
@${MAKE} -C tokeniser release
+ @${MAKE} -C treebuilder release
@${MAKE} -C utils release
@${AR} ${ARFLAGS} $(RELEASE) Release/*
@@ -44,6 +45,7 @@ debug: $(addprefix Debug/, $(addsuffix .o, $(OBJS)))
@${MAKE} -C charset debug
@${MAKE} -C input debug
@${MAKE} -C tokeniser debug
+ @${MAKE} -C treebuilder debug
@${MAKE} -C utils debug
@${AR} ${ARFLAGS} $(DEBUG) Debug/*
@@ -51,6 +53,7 @@ clean:
@${MAKE} -C charset clean
@${MAKE} -C input clean
@${MAKE} -C tokeniser clean
+ @${MAKE} -C treebuilder clean
@${MAKE} -C utils clean
-@${RM} ${RMFLAGS} $(addprefix Release/, $(addsuffix .o, ${OBJS}))
-@${RM} ${RMFLAGS} $(addprefix Debug/, $(addsuffix .o, ${OBJS}))
diff --git a/src/parser.c b/src/parser.c
index 258067f..23702e1 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -2,13 +2,14 @@
* This file is part of Hubbub.
* Licensed under the MIT License,
* http://www.opensource.org/licenses/mit-license.php
- * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ * Copyright 2007-8 John-Mark Bell <jmb@netsurf-browser.org>
*/
#include <hubbub/parser.h>
#include "input/inputstream.h"
#include "tokeniser/tokeniser.h"
+#include "treebuilder/treebuilder.h"
/**
* Hubbub parser object
@@ -16,6 +17,7 @@
struct hubbub_parser {
hubbub_inputstream *stream; /**< Input stream instance */
hubbub_tokeniser *tok; /**< Tokeniser instance */
+ hubbub_treebuilder *tb; /**< Treebuilder instance */
hubbub_alloc alloc; /**< Memory (de)allocation function */
void *pw; /**< Client data */
@@ -55,6 +57,14 @@ hubbub_parser *hubbub_parser_create(const char *enc, const char *int_enc,
return NULL;
}
+ parser->tb = hubbub_treebuilder_create(parser->tok, alloc, pw);
+ if (parser->tb == NULL) {
+ hubbub_tokeniser_destroy(parser->tok);
+ hubbub_inputstream_destroy(parser->stream);
+ alloc(parser, 0, pw);
+ return NULL;
+ }
+
parser->alloc = alloc;
parser->pw = pw;
@@ -71,6 +81,8 @@ void hubbub_parser_destroy(hubbub_parser *parser)
if (parser == NULL)
return;
+ hubbub_treebuilder_destroy(parser->tb);
+
hubbub_tokeniser_destroy(parser->tok);
hubbub_inputstream_destroy(parser->stream);
@@ -90,30 +102,67 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser,
hubbub_parser_opttype type,
hubbub_parser_optparams *params)
{
- hubbub_tokeniser_opttype toktype;
+ hubbub_error result = HUBBUB_OK;;
if (parser == NULL || params == NULL)
return HUBBUB_BADPARM;
switch (type) {
case HUBBUB_PARSER_TOKEN_HANDLER:
- toktype = HUBBUB_TOKENISER_TOKEN_HANDLER;
+ if (parser->tb != NULL) {
+ /* Client is defining their own token handler,
+ * so we must destroy the default treebuilder */
+ hubbub_treebuilder_destroy(parser->tb);
+ parser->tb = NULL;
+ }
+ result = hubbub_tokeniser_setopt(parser->tok,
+ HUBBUB_TOKENISER_TOKEN_HANDLER,
+ (hubbub_tokeniser_optparams *) params);
break;
case HUBBUB_PARSER_BUFFER_HANDLER:
- toktype = HUBBUB_TOKENISER_BUFFER_HANDLER;
+ /* The buffer handler cascades, so if there's a treebuilder,
+ * simply inform that. Otherwise, tell the tokeniser. */
+ if (parser->tb != NULL) {
+ result = hubbub_treebuilder_setopt(parser->tb,
+ HUBBUB_TREEBUILDER_BUFFER_HANDLER,
+ (hubbub_treebuilder_optparams *) params);
+ } else {
+ result = hubbub_tokeniser_setopt(parser->tok,
+ HUBBUB_TOKENISER_BUFFER_HANDLER,
+ (hubbub_tokeniser_optparams *) params);
+ }
break;
case HUBBUB_PARSER_ERROR_HANDLER:
- toktype = HUBBUB_TOKENISER_BUFFER_HANDLER;
+ /* The error handler does not cascade, so tell both the
+ * treebuilder (if extant) and the tokeniser. */
+ if (parser->tb != NULL) {
+ result = hubbub_treebuilder_setopt(parser->tb,
+ HUBBUB_TREEBUILDER_ERROR_HANDLER,
+ (hubbub_treebuilder_optparams *) params);
+ }
+ if (result == HUBBUB_OK) {
+ result = hubbub_tokeniser_setopt(parser->tok,
+ HUBBUB_TOKENISER_ERROR_HANDLER,
+ (hubbub_tokeniser_optparams *) params);
+ }
break;
case HUBBUB_PARSER_CONTENT_MODEL:
- toktype = HUBBUB_TOKENISER_CONTENT_MODEL;
+ result = hubbub_tokeniser_setopt(parser->tok,
+ HUBBUB_TOKENISER_CONTENT_MODEL,
+ (hubbub_tokeniser_optparams *) params);
+ break;
+ case HUBBUB_PARSER_TREE_HANDLER:
+ if (parser->tb != NULL) {
+ result = hubbub_treebuilder_setopt(parser->tb,
+ HUBBUB_TREEBUILDER_TREE_HANDLER,
+ (hubbub_treebuilder_optparams *) params);
+ }
break;
default:
- return HUBBUB_INVALID;
+ result = HUBBUB_INVALID;
}
- return hubbub_tokeniser_setopt(parser->tok, toktype,
- (hubbub_tokeniser_optparams *) params);
+ return result;
}
/**
diff --git a/src/treebuilder/Makefile b/src/treebuilder/Makefile
new file mode 100644
index 0000000..d63a7a3
--- /dev/null
+++ b/src/treebuilder/Makefile
@@ -0,0 +1,53 @@
+# Makefile for libhubbub
+#
+# Toolchain is exported by top-level makefile
+#
+# Top-level makefile also exports the following variables:
+#
+# COMPONENT Name of component
+# EXPORT Absolute path of export directory
+# TOP Absolute path of source tree root
+#
+# The top-level makefile requires the following targets to exist:
+#
+# clean Clean source tree
+# debug Create a debug binary
+# distclean Fully clean source tree, back to pristine condition
+# export Export distributable components to ${EXPORT}
+# release Create a release binary
+# setup Perform any setup required prior to compilation
+# test Execute any test cases
+
+# Manipulate include paths
+CFLAGS += -I$(CURDIR)
+
+# Objects
+OBJS = treebuilder
+
+.PHONY: clean debug distclean export release setup test
+
+# Targets
+release: $(addprefix ../Release/, $(addsuffix .o, $(OBJS)))
+
+debug: $(addprefix ../Debug/, $(addsuffix .o, $(OBJS)))
+
+clean:
+ -@${RM} ${RMFLAGS} $(addprefix ../Release/, $(addsuffix .o, ${OBJS}))
+ -@${RM} ${RMFLAGS} $(addprefix ../Debug/, $(addsuffix .o, ${OBJS}))
+
+distclean:
+
+setup:
+
+export:
+
+test:
+
+# Pattern rules
+../Release/%.o: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c ${CFLAGS} -DNDEBUG -o $@ $<
+
+../Debug/%.o: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c -g ${CFLAGS} -o $@ $<
diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c
new file mode 100644
index 0000000..529cd08
--- /dev/null
+++ b/src/treebuilder/treebuilder.c
@@ -0,0 +1,196 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <string.h>
+
+#include "treebuilder/treebuilder.h"
+#include "utils/utils.h"
+
+struct hubbub_treebuilder
+{
+ hubbub_tokeniser *tokeniser; /**< Underlying tokeniser */
+
+ const uint8_t *input_buffer; /**< Start of tokeniser's buffer */
+ size_t input_buffer_len; /**< Length of input buffer */
+
+ hubbub_tree_handler tree_handler;
+
+ hubbub_buffer_handler buffer_handler;
+ void *buffer_pw;
+
+ hubbub_error_handler error_handler;
+ void *error_pw;
+
+ hubbub_alloc alloc; /**< Memory (de)allocation function */
+ void *alloc_pw; /**< Client private data */
+};
+
+static void hubbub_treebuilder_buffer_handler(const uint8_t *data,
+ size_t len, void *pw);
+static void hubbub_treebuilder_token_handler(const hubbub_token *token,
+ void *pw);
+
+/**
+ * Create a hubbub treebuilder
+ *
+ * \param tokeniser Underlying tokeniser instance
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data
+ * \return Pointer to treebuilder instance, or NULL on error.
+ */
+hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser,
+ hubbub_alloc alloc, void *pw)
+{
+ hubbub_treebuilder *tb;
+ hubbub_tokeniser_optparams tokparams;
+
+ if (tokeniser == NULL || alloc == NULL)
+ return NULL;
+
+ tb = alloc(NULL, sizeof(hubbub_treebuilder), pw);
+ if (tb == NULL)
+ return NULL;
+
+ tb->tokeniser = tokeniser;
+
+ tb->input_buffer = NULL;
+ tb->input_buffer_len = 0;
+
+ memset(&tb->tree_handler, 0, sizeof(hubbub_tree_handler));
+
+ tb->buffer_handler = NULL;
+ tb->buffer_pw = NULL;
+
+ tb->error_handler = NULL;
+ tb->error_pw = NULL;
+
+ tb->alloc = alloc;
+ tb->alloc_pw = pw;
+
+ tokparams.token_handler.handler = hubbub_treebuilder_token_handler;
+ tokparams.token_handler.pw = tb;
+
+ if (hubbub_tokeniser_setopt(tokeniser, HUBBUB_TOKENISER_TOKEN_HANDLER,
+ &tokparams) != HUBBUB_OK) {
+ alloc(tb, 0, pw);
+ return NULL;
+ }
+
+ tokparams.buffer_handler.handler = hubbub_treebuilder_buffer_handler;
+ tokparams.buffer_handler.pw = tb;
+
+ if (hubbub_tokeniser_setopt(tokeniser, HUBBUB_TOKENISER_BUFFER_HANDLER,
+ &tokparams) != HUBBUB_OK) {
+ alloc(tb, 0, pw);
+ return NULL;
+ }
+
+ return tb;
+}
+
+/**
+ * Destroy a hubbub treebuilder
+ *
+ * \param treebuilder The treebuilder instance to destroy
+ */
+void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder)
+{
+ hubbub_tokeniser_optparams tokparams;
+
+ if (treebuilder == NULL)
+ return;
+
+ tokparams.buffer_handler.handler = treebuilder->buffer_handler;
+ tokparams.buffer_handler.pw = treebuilder->buffer_pw;
+
+ hubbub_tokeniser_setopt(treebuilder->tokeniser,
+ HUBBUB_TOKENISER_BUFFER_HANDLER, &tokparams);
+
+ tokparams.token_handler.handler = NULL;
+ tokparams.token_handler.pw = NULL;
+
+ hubbub_tokeniser_setopt(treebuilder->tokeniser,
+ HUBBUB_TOKENISER_TOKEN_HANDLER, &tokparams);
+
+ treebuilder->alloc(treebuilder, 0, treebuilder->alloc_pw);
+}
+
+/**
+ * Configure a hubbub treebuilder
+ *
+ * \param treebuilder The treebuilder instance to configure
+ * \param type The option type to configure
+ * \param params Pointer to option-specific parameters
+ * \return HUBBUB_OK on success, appropriate error otherwise.
+ */
+hubbub_error hubbub_treebuilder_setopt(hubbub_treebuilder *treebuilder,
+ hubbub_treebuilder_opttype type,
+ hubbub_treebuilder_optparams *params)
+{
+ if (treebuilder == NULL || params == NULL)
+ return HUBBUB_BADPARM;
+
+ switch (type) {
+ case HUBBUB_TREEBUILDER_BUFFER_HANDLER:
+ treebuilder->buffer_handler = params->buffer_handler.handler;
+ treebuilder->buffer_pw = params->buffer_handler.pw;
+ treebuilder->buffer_handler(treebuilder->input_buffer,
+ treebuilder->input_buffer_len,
+ treebuilder->buffer_pw);
+ break;
+ case HUBBUB_TREEBUILDER_ERROR_HANDLER:
+ treebuilder->error_handler = params->error_handler.handler;
+ treebuilder->error_pw = params->error_handler.pw;
+ break;
+ case HUBBUB_TREEBUILDER_TREE_HANDLER:
+ treebuilder->tree_handler = params->tree_handler;
+ break;
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Handle tokeniser buffer moving
+ *
+ * \param data New location of buffer
+ * \param len Length of buffer in bytes
+ * \param pw Pointer to treebuilder instance
+ */
+void hubbub_treebuilder_buffer_handler(const uint8_t *data,
+ size_t len, void *pw)
+{
+ hubbub_treebuilder *treebuilder = (hubbub_treebuilder *) pw;
+
+ treebuilder->input_buffer = data;
+ treebuilder->input_buffer_len = len;
+
+ /* Inform client buffer handler, too (if there is one) */
+ if (treebuilder->buffer_handler != NULL) {
+ treebuilder->buffer_handler(treebuilder->input_buffer,
+ treebuilder->input_buffer_len,
+ treebuilder->buffer_pw);
+ }
+}
+
+/**
+ * Handle tokeniser emitting a token
+ *
+ * \param token The emitted token
+ * \param pw Pointer to treebuilder instance
+ */
+void hubbub_treebuilder_token_handler(const hubbub_token *token,
+ void *pw)
+{
+ hubbub_treebuilder *treebuilder = (hubbub_treebuilder *) pw;
+
+ UNUSED(treebuilder);
+ UNUSED(token);
+
+ /** \todo implement this */
+}
+
diff --git a/src/treebuilder/treebuilder.h b/src/treebuilder/treebuilder.h
new file mode 100644
index 0000000..0792e99
--- /dev/null
+++ b/src/treebuilder/treebuilder.h
@@ -0,0 +1,62 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_treebuilder_treebuilder_h_
+#define hubbub_treebuilder_treebuilder_h_
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+#include <hubbub/errors.h>
+#include <hubbub/functypes.h>
+#include <hubbub/tree.h>
+#include <hubbub/types.h>
+
+#include "tokeniser/tokeniser.h"
+
+typedef struct hubbub_treebuilder hubbub_treebuilder;
+
+/**
+ * Hubbub treebuilder option types
+ */
+typedef enum hubbub_treebuilder_opttype {
+ HUBBUB_TREEBUILDER_BUFFER_HANDLER,
+ HUBBUB_TREEBUILDER_ERROR_HANDLER,
+ HUBBUB_TREEBUILDER_TREE_HANDLER,
+} hubbub_treebuilder_opttype;
+
+/**
+ * Hubbub treebuilder option parameters
+ */
+typedef union hubbub_treebuilder_optparams {
+ struct {
+ hubbub_buffer_handler handler;
+ void *pw;
+ } buffer_handler;
+
+ struct {
+ hubbub_error_handler handler;
+ void *pw;
+ } error_handler;
+
+ hubbub_tree_handler tree_handler;
+} hubbub_treebuilder_optparams;
+
+/* Create a hubbub treebuilder */
+hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser,
+ hubbub_alloc alloc, void *pw);
+
+/* Destroy a hubbub treebuilder */
+void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder);
+
+/* Configure a hubbub treebuilder */
+hubbub_error hubbub_treebuilder_setopt(hubbub_treebuilder *treebuilder,
+ hubbub_treebuilder_opttype type,
+ hubbub_treebuilder_optparams *params);
+
+#endif
+