summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRupinder Singh Khokhar <rsk1coder99@gmail.com>2014-06-14 07:35:44 +0530
committerRupinder Singh Khokhar <rsk1coder99@gmail.com>2014-07-09 10:04:21 +0530
commite68a4b8ac410f402d12308ce7d63083b78d7ee89 (patch)
treefd7a8bedb4a668dd1d801b3bde2d3c14381c5e36
parentd10b20d4f9fd2879e223de7a979535a77db2e9ce (diff)
downloadlibhubbub-e68a4b8ac410f402d12308ce7d63083b78d7ee89.tar.gz
libhubbub-e68a4b8ac410f402d12308ce7d63083b78d7ee89.tar.bz2
Adding PLAINTEXT State & fixing the tester at places
-rw-r--r--include/hubbub/types.h3
-rw-r--r--src/tokeniser/tokeniser.c10
-rw-r--r--test/tokeniser2.c11
-rw-r--r--test/tokeniser3.c11
4 files changed, 23 insertions, 12 deletions
diff --git a/include/hubbub/types.h b/include/hubbub/types.h
index e5c208b..6e2b1a9 100644
--- a/include/hubbub/types.h
+++ b/include/hubbub/types.h
@@ -33,7 +33,8 @@ typedef enum hubbub_content_model {
HUBBUB_CONTENT_MODEL_PCDATA,
HUBBUB_CONTENT_MODEL_RCDATA,
HUBBUB_CONTENT_MODEL_CDATA,
- HUBBUB_CONTENT_MODEL_PLAINTEXT
+ HUBBUB_CONTENT_MODEL_PLAINTEXT,
+ HUBBUB_CONTENT_MODEL_RAWTEXT
} hubbub_content_model;
/**
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index b31b404..64eaf30 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -759,12 +759,16 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
emit_current_chars(tokeniser);
}
- /* Emit a null character */
- emit_character_token(tokeniser, &u_null_str);
+ /* Emit a null or a replacement character */
+ if(tokeniser->content_model != HUBBUB_CONTENT_MODEL_PCDATA) {
+ emit_character_token(tokeniser, &u_fffd_str);
+ } else {
+ emit_character_token(tokeniser, &u_null_str);
+ }
/* Advance past NUL */
parserutils_inputstream_advance(tokeniser->input, 1);
- } else if (c == '\r') {
+ } else if (c == '\r' && tokeniser->content_model != HUBBUB_CONTENT_MODEL_PLAINTEXT) {
error = parserutils_inputstream_peek(
tokeniser->input,
tokeniser->context.pending + len,
diff --git a/test/tokeniser2.c b/test/tokeniser2.c
index 07f355a..a0264c9 100644
--- a/test/tokeniser2.c
+++ b/test/tokeniser2.c
@@ -86,7 +86,7 @@ int main(int argc, char **argv)
} else if (strcmp(key, "lastStartTag") == 0) {
ctx.last_start_tag = (const char *)
json_object_get_string(val);
- } else if (strcmp(key, "contentModelFlags") == 0) {
+ } else if (strcmp(key, "initialStates") == 0) {
ctx.content_model =
json_object_get_array(val);
} else if (strcmp(key, "processCDATA") == 0) {
@@ -169,15 +169,18 @@ void run_test(context *ctx)
(struct json_object *)
array_list_get_idx(ctx->content_model, i));
- if (strcmp(cm, "PCDATA") == 0) {
+ if (strcmp(cm, "PCDATA state") == 0) {
params.content_model.model =
HUBBUB_CONTENT_MODEL_PCDATA;
- } else if (strcmp(cm, "RCDATA") == 0) {
+ } else if (strcmp(cm, "RCDATA state") == 0) {
params.content_model.model =
HUBBUB_CONTENT_MODEL_RCDATA;
- } else if (strcmp(cm, "CDATA") == 0) {
+ } else if (strcmp(cm, "CDATA state") == 0) {
params.content_model.model =
HUBBUB_CONTENT_MODEL_CDATA;
+ } else if (strcmp(cm, "RAWTEXT state") == 0) {
+ params.content_model.model =
+ HUBBUB_CONTENT_MODEL_RAWTEXT;
} else {
params.content_model.model =
HUBBUB_CONTENT_MODEL_PLAINTEXT;
diff --git a/test/tokeniser3.c b/test/tokeniser3.c
index e68a230..86f079b 100644
--- a/test/tokeniser3.c
+++ b/test/tokeniser3.c
@@ -85,7 +85,7 @@ int main(int argc, char **argv)
} else if (strcmp(key, "lastStartTag") == 0) {
ctx.last_start_tag = (const char *)
json_object_get_string(val);
- } else if (strcmp(key, "contentModelFlags") == 0) {
+ } else if (strcmp(key, "initialStates") == 0) {
ctx.content_model =
json_object_get_array(val);
} else if (strcmp(key, "processCDATA") == 0) {
@@ -167,15 +167,18 @@ void run_test(context *ctx)
(struct json_object *)
array_list_get_idx(ctx->content_model, i));
- if (strcmp(cm, "PCDATA") == 0) {
+ if (strcmp(cm, "PCDATA state") == 0) {
params.content_model.model =
HUBBUB_CONTENT_MODEL_PCDATA;
- } else if (strcmp(cm, "RCDATA") == 0) {
+ } else if (strcmp(cm, "RCDATA state") == 0) {
params.content_model.model =
HUBBUB_CONTENT_MODEL_RCDATA;
- } else if (strcmp(cm, "CDATA") == 0) {
+ } else if (strcmp(cm, "CDATA state") == 0) {
params.content_model.model =
HUBBUB_CONTENT_MODEL_CDATA;
+ } else if (strcmp(cm, "RAWTEXT state") == 0) {
+ params.content_model.model =
+ HUBBUB_CONTENT_MODEL_RAWTEXT;
} else {
params.content_model.model =
HUBBUB_CONTENT_MODEL_PLAINTEXT;