summaryrefslogtreecommitdiff
path: root/src/charset/detect.c
diff options
context:
space:
mode:
authorAndrew Sidwell <andy@entai.co.uk>2008-08-11 01:29:00 +0000
committerAndrew Sidwell <andy@entai.co.uk>2008-08-11 01:29:00 +0000
commita6c3624c75a547e142fc732898f9a3890fa9e2f5 (patch)
tree6e1b44c51a706843d2057e3e1126ffa5714becf1 /src/charset/detect.c
parent4aef3c775e93d5add0277941fe0ecccc91d4ccb3 (diff)
downloadlibhubbub-a6c3624c75a547e142fc732898f9a3890fa9e2f5.tar.gz
libhubbub-a6c3624c75a547e142fc732898f9a3890fa9e2f5.tar.bz2
Move one step closer to getting encoding changes working.
svn path=/trunk/hubbub/; revision=5000
Diffstat (limited to 'src/charset/detect.c')
-rw-r--r--src/charset/detect.c21
1 files changed, 9 insertions, 12 deletions
diff --git a/src/charset/detect.c b/src/charset/detect.c
index 7d3459f..f3f2e4f 100644
--- a/src/charset/detect.c
+++ b/src/charset/detect.c
@@ -49,19 +49,18 @@ parserutils_error hubbub_charset_extract(const uint8_t *data, size_t len,
if (data == NULL || mibenum == NULL || source == NULL)
return PARSERUTILS_BADPARM;
- /* 1 */
+ /* 1. */
/* If the source is dictated, there's nothing for us to do */
- if (*source == HUBBUB_CHARSET_DICTATED) {
- /* confidence = certain; */
+ if (*source == HUBBUB_CHARSET_CONFIDENT) {
return PARSERUTILS_OK;
}
- /* 2 */
+ /* 2. */
/** \todo We probably want to wait for ~512 bytes of data / 500ms here */
- /* 3 */
+ /* 3. */
/* We need at least 3 bytes of data */
if (len < 3)
@@ -71,13 +70,12 @@ parserutils_error hubbub_charset_extract(const uint8_t *data, size_t len,
charset = hubbub_charset_read_bom(data, len);
if (charset != 0) {
*mibenum = charset;
- *source = HUBBUB_CHARSET_DOCUMENT;
- /* confidence = certain; */
+ *source = HUBBUB_CHARSET_CONFIDENT;
return PARSERUTILS_OK;
}
- /* 4 */
+ /* 4. */
/* No BOM was found, so we must look for a meta charset within
* the document itself. */
@@ -111,8 +109,7 @@ parserutils_error hubbub_charset_extract(const uint8_t *data, size_t len,
"UTF-32BE", SLEN("UTF-32BE"))) {
*mibenum = charset;
- *source = HUBBUB_CHARSET_DOCUMENT;
- /* confidence = tentative; */
+ *source = HUBBUB_CHARSET_TENTATIVE;
return PARSERUTILS_OK;
}
@@ -126,7 +123,7 @@ parserutils_error hubbub_charset_extract(const uint8_t *data, size_t len,
/* We failed to autodetect a charset, so use the default fallback */
default_encoding:
- /* 7 */
+ /* 7. */
charset = parserutils_charset_mibenum_from_name("Windows-1252",
SLEN("Windows-1252"));
@@ -135,7 +132,7 @@ default_encoding:
SLEN("ISO-8859-1"));
*mibenum = charset;
- *source = HUBBUB_CHARSET_DEFAULT;
+ *source = HUBBUB_CHARSET_TENTATIVE;
return PARSERUTILS_OK;
}