summaryrefslogtreecommitdiff
path: root/src/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/parser.c')
-rw-r--r--src/parser.c20
1 files changed, 19 insertions, 1 deletions
diff --git a/src/parser.c b/src/parser.c
index 575eb73..1ebbaab 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -217,9 +217,27 @@ hubbub_error hubbub_parser_parse_chunk(hubbub_parser *parser,
perror = parserutils_inputstream_append(parser->stream, data, len);
if (perror != PARSERUTILS_OK)
- return !HUBBUB_OK;
+ return hubbub_error_from_parserutils_error(perror);
error = hubbub_tokeniser_run(parser->tok);
+ if (error == HUBBUB_BADENCODING) {
+ /* Ok, we autodetected an encoding that we don't actually
+ * support. We've not actually processed any data at this
+ * point so fall back to Windows-1252 and hope for the best
+ */
+ perror = parserutils_inputstream_change_charset(parser->stream,
+ "Windows-1252", HUBBUB_CHARSET_TENTATIVE);
+ /* Under no circumstances should we get here if we've managed
+ * to process data. If there is a way, I want to know about it
+ */
+ assert(perror != PARSERUTILS_INVALID);
+ if (perror != PARSERUTILS_OK)
+ return hubbub_error_from_parserutils_error(perror);
+
+ /* Retry the tokenisation */
+ error = hubbub_tokeniser_run(parser->tok);
+ }
+
if (error != HUBBUB_OK)
return error;