diff options
author | John-Mark Bell <jmb@netsurf-browser.org> | 2012-07-05 21:21:14 +0100 |
---|---|---|
committer | John-Mark Bell <jmb@netsurf-browser.org> | 2012-07-05 21:21:14 +0100 |
commit | b20ec087e0c528a1b9a76b7887023d6da50bac47 (patch) | |
tree | 766726549f9982535f9b5de0ab0b917b9aab76bb | |
parent | d8dec9ad2c21a6bb08c49e6ae7e5b0f01867f982 (diff) | |
download | libhubbub-b20ec087e0c528a1b9a76b7887023d6da50bac47.tar.gz libhubbub-b20ec087e0c528a1b9a76b7887023d6da50bac47.tar.bz2 |
Insert data at correct point in input stream.
-rw-r--r-- | src/parser.c | 10 | ||||
-rw-r--r-- | src/tokeniser/tokeniser.c | 45 | ||||
-rw-r--r-- | src/tokeniser/tokeniser.h | 4 |
3 files changed, 51 insertions, 8 deletions
diff --git a/src/parser.c b/src/parser.c index 95216a3..bf6cca4 100644 --- a/src/parser.c +++ b/src/parser.c @@ -207,23 +207,17 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, * useful to allow hubbub callbacks to add computed data to the input. * * \param parser Parser instance to use - * \param data Data to parse (encoded in the input charset) + * \param data Data to parse (encoded in UTF-8) * \param len Length, in bytes, of data * \return HUBBUB_OK on success, appropriate error otherwise */ hubbub_error hubbub_parser_insert_chunk(hubbub_parser *parser, const uint8_t *data, size_t len) { - parserutils_error perror; - if (parser == NULL || data == NULL) return HUBBUB_BADPARM; - perror = parserutils_inputstream_insert(parser->stream, data, len); - if (perror != PARSERUTILS_OK) - return hubbub_error_from_parserutils_error(perror); - - return HUBBUB_OK; + return hubbub_tokeniser_insert_chunk(parser->tok, data, len); } /** diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c index 949f433..549a89a 100644 --- a/src/tokeniser/tokeniser.c +++ b/src/tokeniser/tokeniser.c @@ -169,6 +169,7 @@ struct hubbub_tokeniser { parserutils_inputstream *input; /**< Input stream */ parserutils_buffer *buffer; /**< Input buffer */ + parserutils_buffer *insert_buf; /**< Stream insertion buffer */ hubbub_tokeniser_context context; /**< Tokeniser context */ @@ -303,6 +304,13 @@ hubbub_error hubbub_tokeniser_create(parserutils_inputstream *input, return hubbub_error_from_parserutils_error(perror); } + perror = parserutils_buffer_create(alloc, pw, &tok->insert_buf); + if (perror != PARSERUTILS_OK) { + parserutils_buffer_destroy(tok->buffer); + alloc(tok, 0, pw); + return hubbub_error_from_parserutils_error(perror); + } + tok->state = STATE_DATA; tok->content_model = HUBBUB_CONTENT_MODEL_PCDATA; @@ -343,6 +351,8 @@ hubbub_error hubbub_tokeniser_destroy(hubbub_tokeniser *tokeniser) 0, tokeniser->alloc_pw); } + parserutils_buffer_destroy(tokeniser->insert_buf); + parserutils_buffer_destroy(tokeniser->buffer); tokeniser->alloc(tokeniser, 0, tokeniser->alloc_pw); @@ -386,6 +396,32 @@ hubbub_error hubbub_tokeniser_setopt(hubbub_tokeniser *tokeniser, } /** + * Insert a chunk of data into the input stream. + * + * Inserts the given data into the input stream ready for parsing but + * does not cause any additional processing of the input. + * + * \param tokeniser Tokeniser instance + * \param data Data to insert (UTF-8 encoded) + * \param len Length, in bytes, of data + * \return HUBBUB_OK on success, appropriate error otherwise + */ +hubbub_error hubbub_tokeniser_insert_chunk(hubbub_tokeniser *tokeniser, + const uint8_t *data, size_t len) +{ + parserutils_error perror; + + if (tokeniser == NULL || data == NULL) + return HUBBUB_BADPARM; + + perror = parserutils_buffer_append(tokeniser->insert_buf, data, len); + if (perror != PARSERUTILS_OK) + return hubbub_error_from_parserutils_error(perror); + + return HUBBUB_OK; +} + +/** * Process remaining data in the input stream * * \param tokeniser The tokeniser instance to invoke @@ -3310,6 +3346,7 @@ hubbub_error hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser, assert(tokeniser != NULL); assert(token != NULL); + assert(tokeniser->insert_buf->length == 0); #ifndef NDEBUG /* Sanity checks */ @@ -3371,5 +3408,13 @@ hubbub_error hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser, tokeniser->context.pending = 0; } + if (tokeniser->insert_buf->length > 0) { + parserutils_inputstream_insert(tokeniser->input, + tokeniser->insert_buf->data, + tokeniser->insert_buf->length); + parserutils_buffer_discard(tokeniser->insert_buf, 0, + tokeniser->insert_buf->length); + } + return err; } diff --git a/src/tokeniser/tokeniser.h b/src/tokeniser/tokeniser.h index 4a9f362..e13a03a 100644 --- a/src/tokeniser/tokeniser.h +++ b/src/tokeniser/tokeniser.h @@ -62,6 +62,10 @@ hubbub_error hubbub_tokeniser_setopt(hubbub_tokeniser *tokeniser, hubbub_tokeniser_opttype type, hubbub_tokeniser_optparams *params); +/* Insert a chunk of data into the input stream */ +hubbub_error hubbub_tokeniser_insert_chunk(hubbub_tokeniser *tokeniser, + const uint8_t *data, size_t len); + /* Process remaining data in the input stream */ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser); |