diff options
author | John Mark Bell <jmb@netsurf-browser.org> | 2010-10-23 17:24:28 +0000 |
---|---|---|
committer | John Mark Bell <jmb@netsurf-browser.org> | 2010-10-23 17:24:28 +0000 |
commit | 6537f4f4acc41eb0608fdb1506ff8fc947cfb121 (patch) | |
tree | 84885dbfd3c55920e0b5ba161b038b8b86824ae7 | |
parent | 4dfe13fb3772f9f9df944238e2fcf782b8c336aa (diff) | |
download | libparserutils-6537f4f4acc41eb0608fdb1506ff8fc947cfb121.tar.gz libparserutils-6537f4f4acc41eb0608fdb1506ff8fc947cfb121.tar.bz2 |
Fix bug where any encoding specified when creating an input stream would be replaced by UTF-8 if there was no charset detection callback provided, too.
Tidy up the logic in this area, and add more commentary so it's clear.
svn path=/trunk/libparserutils/; revision=10899
-rw-r--r-- | src/input/inputstream.c | 26 |
1 files changed, 18 insertions, 8 deletions
diff --git a/src/input/inputstream.c b/src/input/inputstream.c index 03dbf8f..73c038d 100644 --- a/src/input/inputstream.c +++ b/src/input/inputstream.c @@ -379,6 +379,9 @@ parserutils_error parserutils_inputstream_refill_buffer( if (stream->done_first_chunk == false) { parserutils_filter_optparams params; + /* If there is a charset detection routine, give it an + * opportunity to override any charset specified when the + * inputstream was created */ if (stream->csdetect != NULL) { error = stream->csdetect(stream->raw->data, stream->raw->length, @@ -391,16 +394,23 @@ parserutils_error parserutils_inputstream_refill_buffer( /* We don't have enough data to detect the * input encoding, but we're not going to get * any more as we've been notified of EOF. - * Therefore, fall back to UTF-8. */ - stream->mibenum = - parserutils_charset_mibenum_from_name( - "UTF-8", SLEN("UTF-8")); - stream->encsrc = 0; - + * Therefore, leave the encoding alone + * so that any charset specified when the + * inputstream was created will be preserved. + * If there was no charset specified, then + * we'll default to UTF-8, below */ error = PARSERUTILS_OK; } - } else { - /* Default to UTF-8 */ + } + + /* Default to UTF-8 if there is still no encoding information + * We'll do this if there was no encoding specified up-front + * and: + * 1) there was no charset detection routine + * or 2) there was insufficient data for the charset + * detection routine to detect an encoding + */ + if (stream->mibenum == 0) { stream->mibenum = parserutils_charset_mibenum_from_name("UTF-8", SLEN("UTF-8")); |