summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/hubbub/errors.h3
-rw-r--r--include/hubbub/parser.h5
-rw-r--r--src/parser.c47
-rw-r--r--src/tokeniser/tokeniser.c26
-rw-r--r--src/tokeniser/tokeniser.h5
-rw-r--r--src/utils/errors.c3
6 files changed, 50 insertions, 39 deletions
diff --git a/include/hubbub/errors.h b/include/hubbub/errors.h
index ee7870a..8865246 100644
--- a/include/hubbub/errors.h
+++ b/include/hubbub/errors.h
@@ -16,9 +16,10 @@ extern "C"
#include <stddef.h>
typedef enum hubbub_error {
- HUBBUB_OK = 0,
+ HUBBUB_OK = 0, /**< No error */
HUBBUB_REPROCESS = 1,
HUBBUB_ENCODINGCHANGE = 2,
+ HUBBUB_PAUSED = 3, /**< tokenisation is paused */
HUBBUB_NOMEM = 5,
HUBBUB_BADPARM = 6,
diff --git a/include/hubbub/parser.h b/include/hubbub/parser.h
index 7d2fb1f..cd5d6b5 100644
--- a/include/hubbub/parser.h
+++ b/include/hubbub/parser.h
@@ -32,7 +32,8 @@ typedef enum hubbub_parser_opttype {
HUBBUB_PARSER_CONTENT_MODEL,
HUBBUB_PARSER_TREE_HANDLER,
HUBBUB_PARSER_DOCUMENT_NODE,
- HUBBUB_PARSER_ENABLE_SCRIPTING
+ HUBBUB_PARSER_ENABLE_SCRIPTING,
+ HUBBUB_PARSER_PAUSE
} hubbub_parser_opttype;
/**
@@ -58,6 +59,8 @@ typedef union hubbub_parser_optparams {
void *document_node; /**< Document node */
bool enable_scripting; /**< Whether to enable scripting */
+
+ bool pause_parse; /**< Pause parsing */
} hubbub_parser_optparams;
/* Create a hubbub parser */
diff --git a/src/parser.c b/src/parser.c
index bf6cca4..5423922 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -152,6 +152,7 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser,
HUBBUB_TOKENISER_TOKEN_HANDLER,
(hubbub_tokeniser_optparams *) params);
break;
+
case HUBBUB_PARSER_ERROR_HANDLER:
/* The error handler does not cascade, so tell both the
* treebuilder (if extant) and the tokeniser. */
@@ -166,11 +167,19 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser,
(hubbub_tokeniser_optparams *) params);
}
break;
+
case HUBBUB_PARSER_CONTENT_MODEL:
result = hubbub_tokeniser_setopt(parser->tok,
HUBBUB_TOKENISER_CONTENT_MODEL,
(hubbub_tokeniser_optparams *) params);
break;
+
+ case HUBBUB_PARSER_PAUSE:
+ result = hubbub_tokeniser_setopt(parser->tok,
+ HUBBUB_TOKENISER_PAUSE,
+ (hubbub_tokeniser_optparams *) params);
+ break;
+
case HUBBUB_PARSER_TREE_HANDLER:
if (parser->tb != NULL) {
result = hubbub_treebuilder_setopt(parser->tb,
@@ -178,6 +187,7 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser,
(hubbub_treebuilder_optparams *) params);
}
break;
+
case HUBBUB_PARSER_DOCUMENT_NODE:
if (parser->tb != NULL) {
result = hubbub_treebuilder_setopt(parser->tb,
@@ -185,6 +195,7 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser,
(hubbub_treebuilder_optparams *) params);
}
break;
+
case HUBBUB_PARSER_ENABLE_SCRIPTING:
if (parser->tb != NULL) {
result = hubbub_treebuilder_setopt(parser->tb,
@@ -192,6 +203,7 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser,
(hubbub_treebuilder_optparams *) params);
}
break;
+
default:
result = HUBBUB_INVALID;
}
@@ -266,41 +278,6 @@ hubbub_error hubbub_parser_parse_chunk(hubbub_parser *parser,
return HUBBUB_OK;
}
-#if 0
-/**
- * Pass a chunk of extraneous data to a hubbub parser for parsing
- *
- * \param parser Parser instance to use
- * \param data Data to parse (encoded in UTF-8)
- * \param len Length, in byte, of data
- * \return HUBBUB_OK on success, appropriate error otherwise
- */
-hubbub_error hubbub_parser_parse_extraneous_chunk(hubbub_parser *parser,
- const uint8_t *data, size_t len)
-{
- hubbub_error error;
-
- /** \todo In some cases, we don't actually want script-inserted
- * data to be parsed until later. We'll need some way of flagging
- * this through the public API, and the inputstream API will need
- * some way of marking the insertion point so that, when the
- * tokeniser is run, only the inserted chunk is parsed. */
-
- if (parser == NULL || data == NULL)
- return HUBBUB_BADPARM;
-
- error = parserutils_inputstream_insert(parser->stream, data, len);
- if (error != HUBBUB_OK)
- return error;
-
- error = hubbub_tokeniser_run(parser->tok);
- if (error != HUBBUB_OK)
- return error;
-
- return HUBBUB_OK;
-}
-#endif
-
/**
* Inform the parser that the last chunk of data has been parsed
*
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 549a89a..2fff50d 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -16,6 +16,7 @@
#include "utils/parserutilserror.h"
#include "utils/utils.h"
+#include "hubbub/errors.h"
#include "tokeniser/entities.h"
#include "tokeniser/tokeniser.h"
@@ -155,6 +156,7 @@ typedef struct hubbub_tokeniser_context {
} position; /**< Position in source data */
uint32_t allowed_char; /**< Used for quote matching */
+
} hubbub_tokeniser_context;
/**
@@ -166,6 +168,7 @@ struct hubbub_tokeniser {
* model flag */
bool escape_flag; /**< Escape flag **/
bool process_cdata_section; /**< Whether to process CDATA sections*/
+ bool paused; /**< flag for if parsing is currently paused */
parserutils_inputstream *input; /**< Input stream */
parserutils_buffer *buffer; /**< Input buffer */
@@ -317,6 +320,8 @@ hubbub_error hubbub_tokeniser_create(parserutils_inputstream *input,
tok->escape_flag = false;
tok->process_cdata_section = false;
+ tok->paused = false;
+
tok->input = input;
tok->token_handler = NULL;
@@ -372,6 +377,8 @@ hubbub_error hubbub_tokeniser_setopt(hubbub_tokeniser *tokeniser,
hubbub_tokeniser_opttype type,
hubbub_tokeniser_optparams *params)
{
+ hubbub_error err = HUBBUB_OK;
+
if (tokeniser == NULL || params == NULL)
return HUBBUB_BADPARM;
@@ -390,9 +397,18 @@ hubbub_error hubbub_tokeniser_setopt(hubbub_tokeniser *tokeniser,
case HUBBUB_TOKENISER_PROCESS_CDATA:
tokeniser->process_cdata_section = params->process_cdata;
break;
+ case HUBBUB_TOKENISER_PAUSE:
+ if (params->pause_parse == true) {
+ tokeniser->paused = true;
+ } else {
+ if (tokeniser->paused == true) {
+ tokeniser->paused = false;
+ err = hubbub_tokeniser_run(tokeniser);
+ }
+ }
}
- return HUBBUB_OK;
+ return err;
}
/**
@@ -434,6 +450,9 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
if (tokeniser == NULL)
return HUBBUB_BADPARM;
+ if (tokeniser->paused == true)
+ return HUBBUB_PAUSED;
+
#if 0
#define state(x) \
case x: \
@@ -3416,5 +3435,10 @@ hubbub_error hubbub_tokeniser_emit_token(hubbub_tokeniser *tokeniser,
tokeniser->insert_buf->length);
}
+ /* Ensure callback can pause the tokenise */
+ if (err == HUBBUB_PAUSED) {
+ tokeniser->paused = true;
+ }
+
return err;
}
diff --git a/src/tokeniser/tokeniser.h b/src/tokeniser/tokeniser.h
index e13a03a..ed46d60 100644
--- a/src/tokeniser/tokeniser.h
+++ b/src/tokeniser/tokeniser.h
@@ -26,7 +26,8 @@ typedef enum hubbub_tokeniser_opttype {
HUBBUB_TOKENISER_TOKEN_HANDLER,
HUBBUB_TOKENISER_ERROR_HANDLER,
HUBBUB_TOKENISER_CONTENT_MODEL,
- HUBBUB_TOKENISER_PROCESS_CDATA
+ HUBBUB_TOKENISER_PROCESS_CDATA,
+ HUBBUB_TOKENISER_PAUSE
} hubbub_tokeniser_opttype;
/**
@@ -48,6 +49,8 @@ typedef union hubbub_tokeniser_optparams {
} content_model; /**< Current content model */
bool process_cdata; /**< Whether to process CDATA sections*/
+
+ bool pause_parse; /**< Pause parsing */
} hubbub_tokeniser_optparams;
/* Create a hubbub tokeniser */
diff --git a/src/utils/errors.c b/src/utils/errors.c
index 766b181..e7a797f 100644
--- a/src/utils/errors.c
+++ b/src/utils/errors.c
@@ -29,6 +29,9 @@ const char *hubbub_error_to_string(hubbub_error error)
case HUBBUB_ENCODINGCHANGE:
result = "Encoding of document has changed";
break;
+ case HUBBUB_PAUSED:
+ result = "Parser is paused";
+ break;
case HUBBUB_NOMEM:
result = "Insufficient memory";
break;