diff options
author | James Bursa <james@netsurf-browser.org> | 2003-12-29 00:38:59 +0000 |
---|---|---|
committer | James Bursa <james@netsurf-browser.org> | 2003-12-29 00:38:59 +0000 |
commit | 4fcbc23c1ce263d38973a5ba69dd471c2585050f (patch) | |
tree | 2fd5602254d569af013d4b6aeac789976bafb50e /utils/utils.c | |
parent | 3a8b8485adc6a0e5e1d8182b64951d077b842093 (diff) | |
download | netsurf-4fcbc23c1ce263d38973a5ba69dd471c2585050f.tar.gz netsurf-4fcbc23c1ce263d38973a5ba69dd471c2585050f.tar.bz2 |
[project @ 2003-12-29 00:38:59 by bursa]
Transliterate Unicode to Latin1 using Markus Kuhn's transtab.
svn path=/import/netsurf/; revision=465
Diffstat (limited to 'utils/utils.c')
-rw-r--r-- | utils/utils.c | 25 |
1 files changed, 19 insertions, 6 deletions
diff --git a/utils/utils.c b/utils/utils.c index ecc31f995..8cd6e1f68 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -117,22 +117,35 @@ char * squash_whitespace(const char * s) char * tolat1(xmlChar * s) { unsigned int length = strlen((char*) s); - char *d = xcalloc(length + 1, sizeof(char)); + unsigned int space = length + 100; + char *d = xcalloc(space, sizeof(char)); char *d0 = d; + char *end = d0 + space - 10; int u, chars; while (*s != 0) { chars = length; u = xmlGetUTF8Char((unsigned char *) s, &chars); + if (chars <= 0) { + s += 1; + length -= 1; + LOG(("UTF-8 error")); + continue; + } s += chars; length -= chars; if (u == 0x09 || u == 0x0a || u == 0x0d) - *d = ' '; + *d++ = ' '; else if ((0x20 <= u && u <= 0x7f) || (0xa0 <= u && u <= 0xff)) - *d = u; - else - *d = '?'; - d++; + *d++ = u; + else { + unicode_transliterate((unsigned int) u, &d); + if (end < d) { + space += 100; + d0 = xrealloc(d0, space); + end = d0 + space - 10; + } + } } *d = 0; |