From b20ec087e0c528a1b9a76b7887023d6da50bac47 Mon Sep 17 00:00:00 2001 From: John-Mark Bell Date: Thu, 5 Jul 2012 21:21:14 +0100 Subject: Insert data at correct point in input stream. --- src/parser.c | 10 ++-------- src/tokeniser/tokeniser.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ src/tokeniser/tokeniser.h | 4 ++++ 3 files changed, 51 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/parser.c b/src/parser.c index 95216a3..bf6cca4 100644 --- a/src/parser.c +++ b/src/parser.c @@ -207,23 +207,17 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, * useful to allow hubbub callbacks to add computed data to the input. * * \param parser Parser instance to use - * \param data Data to parse (encoded in the input charset) + * \param data Data to parse (encoded in UTF-8) * \param len Length, in bytes, of data * \return HUBBUB_OK on success, appropriate error otherwise */ hubbub_error hubbub_parser_insert_chunk(hubbub_parser *parser, const uint8_t *data, size_t len) { - parserutils_error perror; - if (parser == NULL || data == NULL) return HUBBUB_BADPARM; - perror = parserutils_inputstream_insert(parser->stream, data, len); - if (perror != PARSERUTILS_OK) - return hubbub_error_from_parserutils_error(perror); - - return HUBBUB_OK; + return hubbub_tokeniser_insert_chunk(parser->tok, data, len); } /** diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c index 949f433..549a89a 100644 --- a/src/tokeniser/tokeniser.c +++ b/src/tokeniser/tokeniser.c @@ -169,6 +169,7 @@ struct hubbub_tokeniser { parserutils_inputstream *input; /**< Input stream */ parserutils_buffer *buffer; /**< Input buffer */ + parserutils_buffer *insert_buf; /**< Stream insertion buffer */ hubbub_tokeniser_context context; /**< Tokeniser context */ @@ -303,6 +304,13 @@ hubbub_error hubbub_tokeniser_create(parserutils_inputstream *input, return hubbub_error_from_parserutils_error(perror); } + perror = parserutils_buffer_create(alloc, pw, &tok->insert_buf); + if (perror != PARSERUTILS_OK) { + parserutils_buffer_destroy(tok->buffer); + alloc(tok, 0, pw); + return hubbub_error_from_parserutils_error(perror); + } + tok->state = STATE_DATA; tok->content_model = HUBBUB_CONTENT_MODEL_PCDATA; @@ -343,6 +351,8 @@ hubbub_error hubbub_tokeniser_destroy(hubbub_tokeniser *tokeniser) 0, tokeniser->alloc_pw); } + parserutils_buffer_destroy(tokeniser->insert_buf); + parserutils_buffer_destroy(tokeniser->buffer); tokeniser->alloc(tokeniser, 0, tokeniser->alloc_pw); @@ -385,6 +395,32 @@ hubbub_error hubbub_tokeniser_setopt(hubbub_tokeniser *tokeniser, return HUBBUB_OK; } +/** + * Insert a chunk of data into the input stream. + * + * Inserts the given data into the input stream ready for parsing but + * does not cause any additional processing of the input. + * + * \param tokeniser Tokeniser instance + * \param data Data to insert (UTF-8 encoded) + * \param len Length, in bytes, of data + * \return HUBBUB_OK on success, appropriate error otherwise + */ +hubbub_error hubbub_tokeniser_insert_chunk(hubbub_tokeniser *tokeniser, + const uint8_t *data, size_t len) +{ + parserutils_error perror; + + if (tokeniser == NULL || data == NULL) + return HUBBUB_BADPARM; + + perror = parserutils_buffer_append(tokeniser->insert_buf, data, len); + if (perror != PARSERUTILS_OK) + return hubbub_error_from_parserutils_error(perror); + + return HUBBUB_OK; +} + /** * Process remaining data in the input stream * @@ -3310,6 +3346,7 @@ hubbub_error hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser, assert(tokeniser != NULL); assert(token != NULL); + assert(tokeniser->insert_buf->length == 0); #ifndef NDEBUG /* Sanity checks */ @@ -3371,5 +3408,13 @@ hubbub_error hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser, tokeniser->context.pending = 0; } + if (tokeniser->insert_buf->length > 0) { + parserutils_inputstream_insert(tokeniser->input, + tokeniser->insert_buf->data, + tokeniser->insert_buf->length); + parserutils_buffer_discard(tokeniser->insert_buf, 0, + tokeniser->insert_buf->length); + } + return err; } diff --git a/src/tokeniser/tokeniser.h b/src/tokeniser/tokeniser.h index 4a9f362..e13a03a 100644 --- a/src/tokeniser/tokeniser.h +++ b/src/tokeniser/tokeniser.h @@ -62,6 +62,10 @@ hubbub_error hubbub_tokeniser_setopt(hubbub_tokeniser *tokeniser, hubbub_tokeniser_opttype type, hubbub_tokeniser_optparams *params); +/* Insert a chunk of data into the input stream */ +hubbub_error hubbub_tokeniser_insert_chunk(hubbub_tokeniser *tokeniser, + const uint8_t *data, size_t len); + /* Process remaining data in the input stream */ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser); -- cgit v1.2.3