From 6537f4f4acc41eb0608fdb1506ff8fc947cfb121 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sat, 23 Oct 2010 17:24:28 +0000 Subject: Fix bug where any encoding specified when creating an input stream would be replaced by UTF-8 if there was no charset detection callback provided, too. Tidy up the logic in this area, and add more commentary so it's clear. svn path=/trunk/libparserutils/; revision=10899 --- src/input/inputstream.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/input/inputstream.c b/src/input/inputstream.c index 03dbf8f..73c038d 100644 --- a/src/input/inputstream.c +++ b/src/input/inputstream.c @@ -379,6 +379,9 @@ parserutils_error parserutils_inputstream_refill_buffer( if (stream->done_first_chunk == false) { parserutils_filter_optparams params; + /* If there is a charset detection routine, give it an + * opportunity to override any charset specified when the + * inputstream was created */ if (stream->csdetect != NULL) { error = stream->csdetect(stream->raw->data, stream->raw->length, @@ -391,16 +394,23 @@ parserutils_error parserutils_inputstream_refill_buffer( /* We don't have enough data to detect the * input encoding, but we're not going to get * any more as we've been notified of EOF. - * Therefore, fall back to UTF-8. */ - stream->mibenum = - parserutils_charset_mibenum_from_name( - "UTF-8", SLEN("UTF-8")); - stream->encsrc = 0; - + * Therefore, leave the encoding alone + * so that any charset specified when the + * inputstream was created will be preserved. + * If there was no charset specified, then + * we'll default to UTF-8, below */ error = PARSERUTILS_OK; } - } else { - /* Default to UTF-8 */ + } + + /* Default to UTF-8 if there is still no encoding information + * We'll do this if there was no encoding specified up-front + * and: + * 1) there was no charset detection routine + * or 2) there was insufficient data for the charset + * detection routine to detect an encoding + */ + if (stream->mibenum == 0) { stream->mibenum = parserutils_charset_mibenum_from_name("UTF-8", SLEN("UTF-8")); -- cgit v1.2.3