diff options
-rw-r--r-- | include/hubbub/types.h | 3 | ||||
-rw-r--r-- | src/tokeniser/tokeniser.c | 10 | ||||
-rw-r--r-- | test/tokeniser2.c | 11 | ||||
-rw-r--r-- | test/tokeniser3.c | 11 |
4 files changed, 23 insertions, 12 deletions
diff --git a/include/hubbub/types.h b/include/hubbub/types.h index e5c208b..6e2b1a9 100644 --- a/include/hubbub/types.h +++ b/include/hubbub/types.h @@ -33,7 +33,8 @@ typedef enum hubbub_content_model { HUBBUB_CONTENT_MODEL_PCDATA, HUBBUB_CONTENT_MODEL_RCDATA, HUBBUB_CONTENT_MODEL_CDATA, - HUBBUB_CONTENT_MODEL_PLAINTEXT + HUBBUB_CONTENT_MODEL_PLAINTEXT, + HUBBUB_CONTENT_MODEL_RAWTEXT } hubbub_content_model; /** diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c index b31b404..64eaf30 100644 --- a/src/tokeniser/tokeniser.c +++ b/src/tokeniser/tokeniser.c @@ -759,12 +759,16 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser) emit_current_chars(tokeniser); } - /* Emit a null character */ - emit_character_token(tokeniser, &u_null_str); + /* Emit a null or a replacement character */ + if(tokeniser->content_model != HUBBUB_CONTENT_MODEL_PCDATA) { + emit_character_token(tokeniser, &u_fffd_str); + } else { + emit_character_token(tokeniser, &u_null_str); + } /* Advance past NUL */ parserutils_inputstream_advance(tokeniser->input, 1); - } else if (c == '\r') { + } else if (c == '\r' && tokeniser->content_model != HUBBUB_CONTENT_MODEL_PLAINTEXT) { error = parserutils_inputstream_peek( tokeniser->input, tokeniser->context.pending + len, diff --git a/test/tokeniser2.c b/test/tokeniser2.c index 07f355a..a0264c9 100644 --- a/test/tokeniser2.c +++ b/test/tokeniser2.c @@ -86,7 +86,7 @@ int main(int argc, char **argv) } else if (strcmp(key, "lastStartTag") == 0) { ctx.last_start_tag = (const char *) json_object_get_string(val); - } else if (strcmp(key, "contentModelFlags") == 0) { + } else if (strcmp(key, "initialStates") == 0) { ctx.content_model = json_object_get_array(val); } else if (strcmp(key, "processCDATA") == 0) { @@ -169,15 +169,18 @@ void run_test(context *ctx) (struct json_object *) array_list_get_idx(ctx->content_model, i)); - if (strcmp(cm, "PCDATA") == 0) { + if (strcmp(cm, "PCDATA state") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_PCDATA; - } else if (strcmp(cm, "RCDATA") == 0) { + } else if (strcmp(cm, "RCDATA state") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_RCDATA; - } else if (strcmp(cm, "CDATA") == 0) { + } else if (strcmp(cm, "CDATA state") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_CDATA; + } else if (strcmp(cm, "RAWTEXT state") == 0) { + params.content_model.model = + HUBBUB_CONTENT_MODEL_RAWTEXT; } else { params.content_model.model = HUBBUB_CONTENT_MODEL_PLAINTEXT; diff --git a/test/tokeniser3.c b/test/tokeniser3.c index e68a230..86f079b 100644 --- a/test/tokeniser3.c +++ b/test/tokeniser3.c @@ -85,7 +85,7 @@ int main(int argc, char **argv) } else if (strcmp(key, "lastStartTag") == 0) { ctx.last_start_tag = (const char *) json_object_get_string(val); - } else if (strcmp(key, "contentModelFlags") == 0) { + } else if (strcmp(key, "initialStates") == 0) { ctx.content_model = json_object_get_array(val); } else if (strcmp(key, "processCDATA") == 0) { @@ -167,15 +167,18 @@ void run_test(context *ctx) (struct json_object *) array_list_get_idx(ctx->content_model, i)); - if (strcmp(cm, "PCDATA") == 0) { + if (strcmp(cm, "PCDATA state") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_PCDATA; - } else if (strcmp(cm, "RCDATA") == 0) { + } else if (strcmp(cm, "RCDATA state") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_RCDATA; - } else if (strcmp(cm, "CDATA") == 0) { + } else if (strcmp(cm, "CDATA state") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_CDATA; + } else if (strcmp(cm, "RAWTEXT state") == 0) { + params.content_model.model = + HUBBUB_CONTENT_MODEL_RAWTEXT; } else { params.content_model.model = HUBBUB_CONTENT_MODEL_PLAINTEXT; |