From bdff981eef4a7434291941e6bca662357498bc59 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Tue, 10 Jul 2012 11:23:57 -0600 Subject: Add ability to pause tokenisation --- src/parser.c | 12 ++++++++++++ src/tokeniser/tokeniser.c | 24 +++++++++++++++++++++++- src/tokeniser/tokeniser.h | 5 ++++- src/utils/errors.c | 3 +++ 4 files changed, 42 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/parser.c b/src/parser.c index 8da9d79..5423922 100644 --- a/src/parser.c +++ b/src/parser.c @@ -152,6 +152,7 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, HUBBUB_TOKENISER_TOKEN_HANDLER, (hubbub_tokeniser_optparams *) params); break; + case HUBBUB_PARSER_ERROR_HANDLER: /* The error handler does not cascade, so tell both the * treebuilder (if extant) and the tokeniser. */ @@ -166,11 +167,19 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, (hubbub_tokeniser_optparams *) params); } break; + case HUBBUB_PARSER_CONTENT_MODEL: result = hubbub_tokeniser_setopt(parser->tok, HUBBUB_TOKENISER_CONTENT_MODEL, (hubbub_tokeniser_optparams *) params); break; + + case HUBBUB_PARSER_PAUSE: + result = hubbub_tokeniser_setopt(parser->tok, + HUBBUB_TOKENISER_PAUSE, + (hubbub_tokeniser_optparams *) params); + break; + case HUBBUB_PARSER_TREE_HANDLER: if (parser->tb != NULL) { result = hubbub_treebuilder_setopt(parser->tb, @@ -178,6 +187,7 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, (hubbub_treebuilder_optparams *) params); } break; + case HUBBUB_PARSER_DOCUMENT_NODE: if (parser->tb != NULL) { result = hubbub_treebuilder_setopt(parser->tb, @@ -185,6 +195,7 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, (hubbub_treebuilder_optparams *) params); } break; + case HUBBUB_PARSER_ENABLE_SCRIPTING: if (parser->tb != NULL) { result = hubbub_treebuilder_setopt(parser->tb, @@ -192,6 +203,7 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, (hubbub_treebuilder_optparams *) params); } break; + default: result = HUBBUB_INVALID; } diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c index 549a89a..04f4ea7 100644 --- a/src/tokeniser/tokeniser.c +++ b/src/tokeniser/tokeniser.c @@ -16,6 +16,7 @@ #include "utils/parserutilserror.h" #include "utils/utils.h" +#include "hubbub/errors.h" #include "tokeniser/entities.h" #include "tokeniser/tokeniser.h" @@ -155,6 +156,7 @@ typedef struct hubbub_tokeniser_context { } position; /**< Position in source data */ uint32_t allowed_char; /**< Used for quote matching */ + } hubbub_tokeniser_context; /** @@ -166,6 +168,7 @@ struct hubbub_tokeniser { * model flag */ bool escape_flag; /**< Escape flag **/ bool process_cdata_section; /**< Whether to process CDATA sections*/ + bool paused; /**< flag for if parsing is currently paused */ parserutils_inputstream *input; /**< Input stream */ parserutils_buffer *buffer; /**< Input buffer */ @@ -372,6 +375,8 @@ hubbub_error hubbub_tokeniser_setopt(hubbub_tokeniser *tokeniser, hubbub_tokeniser_opttype type, hubbub_tokeniser_optparams *params) { + hubbub_error err = HUBBUB_OK; + if (tokeniser == NULL || params == NULL) return HUBBUB_BADPARM; @@ -390,9 +395,18 @@ hubbub_error hubbub_tokeniser_setopt(hubbub_tokeniser *tokeniser, case HUBBUB_TOKENISER_PROCESS_CDATA: tokeniser->process_cdata_section = params->process_cdata; break; + case HUBBUB_TOKENISER_PAUSE: + if (params->pause_parse == true) { + tokeniser->paused = true; + } else { + if (tokeniser->paused == true) { + tokeniser->paused = false; + err = hubbub_tokeniser_run(tokeniser); + } + } } - return HUBBUB_OK; + return err; } /** @@ -434,6 +448,9 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser) if (tokeniser == NULL) return HUBBUB_BADPARM; + if (tokeniser->paused == true) + return HUBBUB_PAUSED; + #if 0 #define state(x) \ case x: \ @@ -3416,5 +3433,10 @@ hubbub_error hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser, tokeniser->insert_buf->length); } + /* Ensure callback can pause the tokenise */ + if (err == HUBBUB_PAUSED) { + tokeniser->paused = true; + } + return err; } diff --git a/src/tokeniser/tokeniser.h b/src/tokeniser/tokeniser.h index e13a03a..ed46d60 100644 --- a/src/tokeniser/tokeniser.h +++ b/src/tokeniser/tokeniser.h @@ -26,7 +26,8 @@ typedef enum hubbub_tokeniser_opttype { HUBBUB_TOKENISER_TOKEN_HANDLER, HUBBUB_TOKENISER_ERROR_HANDLER, HUBBUB_TOKENISER_CONTENT_MODEL, - HUBBUB_TOKENISER_PROCESS_CDATA + HUBBUB_TOKENISER_PROCESS_CDATA, + HUBBUB_TOKENISER_PAUSE } hubbub_tokeniser_opttype; /** @@ -48,6 +49,8 @@ typedef union hubbub_tokeniser_optparams { } content_model; /**< Current content model */ bool process_cdata; /**< Whether to process CDATA sections*/ + + bool pause_parse; /**< Pause parsing */ } hubbub_tokeniser_optparams; /* Create a hubbub tokeniser */ diff --git a/src/utils/errors.c b/src/utils/errors.c index 766b181..e7a797f 100644 --- a/src/utils/errors.c +++ b/src/utils/errors.c @@ -29,6 +29,9 @@ const char *hubbub_error_to_string(hubbub_error error) case HUBBUB_ENCODINGCHANGE: result = "Encoding of document has changed"; break; + case HUBBUB_PAUSED: + result = "Parser is paused"; + break; case HUBBUB_NOMEM: result = "Insufficient memory"; break; -- cgit v1.2.3