diff options
Diffstat (limited to 'src/charset/detect.c')
-rw-r--r-- | src/charset/detect.c | 15 |
1 files changed, 14 insertions, 1 deletions
diff --git a/src/charset/detect.c b/src/charset/detect.c index 562c12d..fd3de13 100644 --- a/src/charset/detect.c +++ b/src/charset/detect.c @@ -48,10 +48,23 @@ parserutils_error hubbub_charset_extract(const uint8_t *data, size_t len, if (data == NULL || mibenum == NULL || source == NULL) return PARSERUTILS_BADPARM; + /** + * Meaning of *source on entry: + * + * CONFIDENT - Do not pass Go, do not attempt auto-detection. + * TENTATIVE - We've tried to autodetect already, but subsequently + * discovered that we don't actually support the detected + * charset. Thus, we've defaulted to Windows-1252. Don't + * perform auto-detection again, as it would be futile. + * (This bit diverges from the spec) + * UNKNOWN - No autodetection performed yet. Get on with it. + */ + /* 1. */ /* If the source is dictated, there's nothing for us to do */ - if (*source == HUBBUB_CHARSET_CONFIDENT) { + if (*source == HUBBUB_CHARSET_CONFIDENT || + *source == HUBBUB_CHARSET_TENTATIVE) { return PARSERUTILS_OK; } |