summaryrefslogtreecommitdiff
path: root/src/charset/detect.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/charset/detect.c')
-rw-r--r--src/charset/detect.c15
1 files changed, 14 insertions, 1 deletions
diff --git a/src/charset/detect.c b/src/charset/detect.c
index 562c12d..fd3de13 100644
--- a/src/charset/detect.c
+++ b/src/charset/detect.c
@@ -48,10 +48,23 @@ parserutils_error hubbub_charset_extract(const uint8_t *data, size_t len,
if (data == NULL || mibenum == NULL || source == NULL)
return PARSERUTILS_BADPARM;
+ /**
+ * Meaning of *source on entry:
+ *
+ * CONFIDENT - Do not pass Go, do not attempt auto-detection.
+ * TENTATIVE - We've tried to autodetect already, but subsequently
+ * discovered that we don't actually support the detected
+ * charset. Thus, we've defaulted to Windows-1252. Don't
+ * perform auto-detection again, as it would be futile.
+ * (This bit diverges from the spec)
+ * UNKNOWN - No autodetection performed yet. Get on with it.
+ */
+
/* 1. */
/* If the source is dictated, there's nothing for us to do */
- if (*source == HUBBUB_CHARSET_CONFIDENT) {
+ if (*source == HUBBUB_CHARSET_CONFIDENT ||
+ *source == HUBBUB_CHARSET_TENTATIVE) {
return PARSERUTILS_OK;
}