diff options
-rw-r--r-- | include/hubbub/errors.h | 3 | ||||
-rw-r--r-- | include/hubbub/parser.h | 5 | ||||
-rw-r--r-- | src/parser.c | 47 | ||||
-rw-r--r-- | src/tokeniser/tokeniser.c | 26 | ||||
-rw-r--r-- | src/tokeniser/tokeniser.h | 5 | ||||
-rw-r--r-- | src/utils/errors.c | 3 |
6 files changed, 50 insertions, 39 deletions
diff --git a/include/hubbub/errors.h b/include/hubbub/errors.h index ee7870a..8865246 100644 --- a/include/hubbub/errors.h +++ b/include/hubbub/errors.h @@ -16,9 +16,10 @@ extern "C" #include <stddef.h> typedef enum hubbub_error { - HUBBUB_OK = 0, + HUBBUB_OK = 0, /**< No error */ HUBBUB_REPROCESS = 1, HUBBUB_ENCODINGCHANGE = 2, + HUBBUB_PAUSED = 3, /**< tokenisation is paused */ HUBBUB_NOMEM = 5, HUBBUB_BADPARM = 6, diff --git a/include/hubbub/parser.h b/include/hubbub/parser.h index 7d2fb1f..cd5d6b5 100644 --- a/include/hubbub/parser.h +++ b/include/hubbub/parser.h @@ -32,7 +32,8 @@ typedef enum hubbub_parser_opttype { HUBBUB_PARSER_CONTENT_MODEL, HUBBUB_PARSER_TREE_HANDLER, HUBBUB_PARSER_DOCUMENT_NODE, - HUBBUB_PARSER_ENABLE_SCRIPTING + HUBBUB_PARSER_ENABLE_SCRIPTING, + HUBBUB_PARSER_PAUSE } hubbub_parser_opttype; /** @@ -58,6 +59,8 @@ typedef union hubbub_parser_optparams { void *document_node; /**< Document node */ bool enable_scripting; /**< Whether to enable scripting */ + + bool pause_parse; /**< Pause parsing */ } hubbub_parser_optparams; /* Create a hubbub parser */ diff --git a/src/parser.c b/src/parser.c index bf6cca4..5423922 100644 --- a/src/parser.c +++ b/src/parser.c @@ -152,6 +152,7 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, HUBBUB_TOKENISER_TOKEN_HANDLER, (hubbub_tokeniser_optparams *) params); break; + case HUBBUB_PARSER_ERROR_HANDLER: /* The error handler does not cascade, so tell both the * treebuilder (if extant) and the tokeniser. */ @@ -166,11 +167,19 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, (hubbub_tokeniser_optparams *) params); } break; + case HUBBUB_PARSER_CONTENT_MODEL: result = hubbub_tokeniser_setopt(parser->tok, HUBBUB_TOKENISER_CONTENT_MODEL, (hubbub_tokeniser_optparams *) params); break; + + case HUBBUB_PARSER_PAUSE: + result = hubbub_tokeniser_setopt(parser->tok, + HUBBUB_TOKENISER_PAUSE, + (hubbub_tokeniser_optparams *) params); + break; + case HUBBUB_PARSER_TREE_HANDLER: if (parser->tb != NULL) { result = hubbub_treebuilder_setopt(parser->tb, @@ -178,6 +187,7 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, (hubbub_treebuilder_optparams *) params); } break; + case HUBBUB_PARSER_DOCUMENT_NODE: if (parser->tb != NULL) { result = hubbub_treebuilder_setopt(parser->tb, @@ -185,6 +195,7 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, (hubbub_treebuilder_optparams *) params); } break; + case HUBBUB_PARSER_ENABLE_SCRIPTING: if (parser->tb != NULL) { result = hubbub_treebuilder_setopt(parser->tb, @@ -192,6 +203,7 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, (hubbub_treebuilder_optparams *) params); } break; + default: result = HUBBUB_INVALID; } @@ -266,41 +278,6 @@ hubbub_error hubbub_parser_parse_chunk(hubbub_parser *parser, return HUBBUB_OK; } -#if 0 -/** - * Pass a chunk of extraneous data to a hubbub parser for parsing - * - * \param parser Parser instance to use - * \param data Data to parse (encoded in UTF-8) - * \param len Length, in byte, of data - * \return HUBBUB_OK on success, appropriate error otherwise - */ -hubbub_error hubbub_parser_parse_extraneous_chunk(hubbub_parser *parser, - const uint8_t *data, size_t len) -{ - hubbub_error error; - - /** \todo In some cases, we don't actually want script-inserted - * data to be parsed until later. We'll need some way of flagging - * this through the public API, and the inputstream API will need - * some way of marking the insertion point so that, when the - * tokeniser is run, only the inserted chunk is parsed. */ - - if (parser == NULL || data == NULL) - return HUBBUB_BADPARM; - - error = parserutils_inputstream_insert(parser->stream, data, len); - if (error != HUBBUB_OK) - return error; - - error = hubbub_tokeniser_run(parser->tok); - if (error != HUBBUB_OK) - return error; - - return HUBBUB_OK; -} -#endif - /** * Inform the parser that the last chunk of data has been parsed * diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c index 549a89a..2fff50d 100644 --- a/src/tokeniser/tokeniser.c +++ b/src/tokeniser/tokeniser.c @@ -16,6 +16,7 @@ #include "utils/parserutilserror.h" #include "utils/utils.h" +#include "hubbub/errors.h" #include "tokeniser/entities.h" #include "tokeniser/tokeniser.h" @@ -155,6 +156,7 @@ typedef struct hubbub_tokeniser_context { } position; /**< Position in source data */ uint32_t allowed_char; /**< Used for quote matching */ + } hubbub_tokeniser_context; /** @@ -166,6 +168,7 @@ struct hubbub_tokeniser { * model flag */ bool escape_flag; /**< Escape flag **/ bool process_cdata_section; /**< Whether to process CDATA sections*/ + bool paused; /**< flag for if parsing is currently paused */ parserutils_inputstream *input; /**< Input stream */ parserutils_buffer *buffer; /**< Input buffer */ @@ -317,6 +320,8 @@ hubbub_error hubbub_tokeniser_create(parserutils_inputstream *input, tok->escape_flag = false; tok->process_cdata_section = false; + tok->paused = false; + tok->input = input; tok->token_handler = NULL; @@ -372,6 +377,8 @@ hubbub_error hubbub_tokeniser_setopt(hubbub_tokeniser *tokeniser, hubbub_tokeniser_opttype type, hubbub_tokeniser_optparams *params) { + hubbub_error err = HUBBUB_OK; + if (tokeniser == NULL || params == NULL) return HUBBUB_BADPARM; @@ -390,9 +397,18 @@ hubbub_error hubbub_tokeniser_setopt(hubbub_tokeniser *tokeniser, case HUBBUB_TOKENISER_PROCESS_CDATA: tokeniser->process_cdata_section = params->process_cdata; break; + case HUBBUB_TOKENISER_PAUSE: + if (params->pause_parse == true) { + tokeniser->paused = true; + } else { + if (tokeniser->paused == true) { + tokeniser->paused = false; + err = hubbub_tokeniser_run(tokeniser); + } + } } - return HUBBUB_OK; + return err; } /** @@ -434,6 +450,9 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser) if (tokeniser == NULL) return HUBBUB_BADPARM; + if (tokeniser->paused == true) + return HUBBUB_PAUSED; + #if 0 #define state(x) \ case x: \ @@ -3416,5 +3435,10 @@ hubbub_error hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser, tokeniser->insert_buf->length); } + /* Ensure callback can pause the tokenise */ + if (err == HUBBUB_PAUSED) { + tokeniser->paused = true; + } + return err; } diff --git a/src/tokeniser/tokeniser.h b/src/tokeniser/tokeniser.h index e13a03a..ed46d60 100644 --- a/src/tokeniser/tokeniser.h +++ b/src/tokeniser/tokeniser.h @@ -26,7 +26,8 @@ typedef enum hubbub_tokeniser_opttype { HUBBUB_TOKENISER_TOKEN_HANDLER, HUBBUB_TOKENISER_ERROR_HANDLER, HUBBUB_TOKENISER_CONTENT_MODEL, - HUBBUB_TOKENISER_PROCESS_CDATA + HUBBUB_TOKENISER_PROCESS_CDATA, + HUBBUB_TOKENISER_PAUSE } hubbub_tokeniser_opttype; /** @@ -48,6 +49,8 @@ typedef union hubbub_tokeniser_optparams { } content_model; /**< Current content model */ bool process_cdata; /**< Whether to process CDATA sections*/ + + bool pause_parse; /**< Pause parsing */ } hubbub_tokeniser_optparams; /* Create a hubbub tokeniser */ diff --git a/src/utils/errors.c b/src/utils/errors.c index 766b181..e7a797f 100644 --- a/src/utils/errors.c +++ b/src/utils/errors.c @@ -29,6 +29,9 @@ const char *hubbub_error_to_string(hubbub_error error) case HUBBUB_ENCODINGCHANGE: result = "Encoding of document has changed"; break; + case HUBBUB_PAUSED: + result = "Parser is paused"; + break; case HUBBUB_NOMEM: result = "Insufficient memory"; break; |