From a57f3931e853ba31f3f397e055b1445f627c8bcb Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Thu, 4 Sep 2008 17:02:14 +0000 Subject: Extended 8bit coded. Mostly for Windows-125n support. Also needs testing. svn path=/trunk/libparserutils/; revision=5235 --- src/charset/codec.c | 2 + src/charset/codecs/Makefile | 2 +- src/charset/codecs/codec_ext8.c | 575 +++++++++++++++++++++++++++++++++++++++ src/charset/codecs/ext8_tables.h | 187 +++++++++++++ 4 files changed, 765 insertions(+), 1 deletion(-) create mode 100644 src/charset/codecs/codec_ext8.c create mode 100644 src/charset/codecs/ext8_tables.h diff --git a/src/charset/codec.c b/src/charset/codec.c index 620a5b2..ca2218e 100644 --- a/src/charset/codec.c +++ b/src/charset/codec.c @@ -15,6 +15,7 @@ extern parserutils_charset_handler iconv_codec_handler; #endif extern parserutils_charset_handler charset_8859_codec_handler; +extern parserutils_charset_handler charset_ext8_codec_handler; extern parserutils_charset_handler charset_utf8_codec_handler; extern parserutils_charset_handler charset_utf16_codec_handler; @@ -22,6 +23,7 @@ static parserutils_charset_handler *handler_table[] = { &charset_utf8_codec_handler, &charset_utf16_codec_handler, &charset_8859_codec_handler, + &charset_ext8_codec_handler, #ifdef WITH_ICONV_CODEC &iconv_codec_handler, #endif diff --git a/src/charset/codecs/Makefile b/src/charset/codecs/Makefile index fd0365b..001ac1f 100644 --- a/src/charset/codecs/Makefile +++ b/src/charset/codecs/Makefile @@ -32,7 +32,7 @@ dirstack_$(sp) := $(d) d := $(DIR) # Sources -SRCS_$(d) := codec_8859.c codec_iconv.c codec_utf8.c codec_utf16.c +SRCS_$(d) := codec_8859.c codec_ext8.c codec_iconv.c codec_utf8.c codec_utf16.c # Append to sources for component SOURCES += $(addprefix $(d), $(SRCS_$(d))) diff --git a/src/charset/codecs/codec_ext8.c b/src/charset/codecs/codec_ext8.c new file mode 100644 index 0000000..f32f542 --- /dev/null +++ b/src/charset/codecs/codec_ext8.c @@ -0,0 +1,575 @@ +/* + * This file is part of LibParserUtils. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#include +#include +#include + +/* These two are for htonl / ntohl */ +#include +#include + +#include + +#include "charset/codecs/codec_impl.h" +#include "utils/utils.h" + +#include "charset/codecs/ext8_tables.h" + +static struct { + uint16_t mib; + const char *name; + size_t len; + uint32_t *table; +} known_charsets[] = { + { 0, "Windows-1250", SLEN("Windows-1250"), w1250 }, + { 0, "Windows-1251", SLEN("Windows-1251"), w1251 }, + { 0, "Windows-1252", SLEN("Windows-1252"), w1252 }, + { 0, "Windows-1253", SLEN("Windows-1253"), w1253 }, + { 0, "Windows-1254", SLEN("Windows-1254"), w1254 }, + { 0, "Windows-1255", SLEN("Windows-1255"), w1255 }, + { 0, "Windows-1256", SLEN("Windows-1256"), w1256 }, + { 0, "Windows-1257", SLEN("Windows-1257"), w1257 }, + { 0, "Windows-1258", SLEN("Windows-1258"), w1258 }, +}; + +/** + * Windows charset codec + */ +typedef struct charset_ext8_codec { + parserutils_charset_codec base; /**< Base class */ + + uint32_t *table; /**< Mapping table for 0x80-0xFF */ + +#define READ_BUFSIZE (8) + uint32_t read_buf[READ_BUFSIZE]; /**< Buffer for partial + * output sequences (decode) + * (host-endian) */ + size_t read_len; /**< Character length of read_buf */ + +#define WRITE_BUFSIZE (8) + uint32_t write_buf[WRITE_BUFSIZE]; /**< Buffer for partial + * output sequences (encode) + * (host-endian) */ + size_t write_len; /**< Character length of write_buf */ + +} charset_ext8_codec; + +static bool charset_ext8_codec_handles_charset(const char *charset); +static parserutils_charset_codec *charset_ext8_codec_create(const char *charset, + parserutils_alloc alloc, void *pw); +static void charset_ext8_codec_destroy (parserutils_charset_codec *codec); +static parserutils_error charset_ext8_codec_encode( + parserutils_charset_codec *codec, + const uint8_t **source, size_t *sourcelen, + uint8_t **dest, size_t *destlen); +static parserutils_error charset_ext8_codec_decode( + parserutils_charset_codec *codec, + const uint8_t **source, size_t *sourcelen, + uint8_t **dest, size_t *destlen); +static parserutils_error charset_ext8_codec_reset( + parserutils_charset_codec *codec); +static inline parserutils_error charset_ext8_codec_read_char( + charset_ext8_codec *c, + const uint8_t **source, size_t *sourcelen, + uint8_t **dest, size_t *destlen); +static inline parserutils_error charset_ext8_codec_output_decoded_char( + charset_ext8_codec *c, + uint32_t ucs4, uint8_t **dest, size_t *destlen); +static inline parserutils_error charset_ext8_from_ucs4(charset_ext8_codec *c, + uint32_t ucs4, uint8_t **s, size_t *len); +static inline parserutils_error charset_ext8_to_ucs4(charset_ext8_codec *c, + const uint8_t *s, size_t len, uint32_t *ucs4); + +/** + * Determine whether this codec handles a specific charset + * + * \param charset Charset to test + * \return true if handleable, false otherwise + */ +bool charset_ext8_codec_handles_charset(const char *charset) +{ + uint16_t match = parserutils_charset_mibenum_from_name(charset, + strlen(charset)); + + if (known_charsets[0].mib == 0) { + for (uint32_t i = 0; i < N_ELEMENTS(known_charsets); i++) { + known_charsets[i].mib = + parserutils_charset_mibenum_from_name( + known_charsets[i].name, + known_charsets[i].len); + } + } + + for (uint32_t i = 0; i < N_ELEMENTS(known_charsets); i++) { + if (known_charsets[i].mib == match) + return true; + } + + return false; +} + +/** + * Create an extended 8bit codec + * + * \param charset The charset to read from / write to + * \param alloc Memory (de)allocation function + * \param pw Pointer to client-specific private data (may be NULL) + * \return Pointer to codec, or NULL on failure + */ +parserutils_charset_codec *charset_ext8_codec_create(const char *charset, + parserutils_alloc alloc, void *pw) +{ + charset_ext8_codec *codec; + uint16_t match = parserutils_charset_mibenum_from_name( + charset, strlen(charset)); + uint32_t *table = NULL; + + for (uint32_t i = 0; i < N_ELEMENTS(known_charsets); i++) { + if (known_charsets[i].mib == match) { + table = known_charsets[i].table; + break; + } + } + + assert(table != NULL); + + codec = alloc(NULL, sizeof(charset_ext8_codec), pw); + if (codec == NULL) + return NULL; + + codec->table = table; + + codec->read_buf[0] = 0; + codec->read_len = 0; + + codec->write_buf[0] = 0; + codec->write_len = 0; + + /* Finally, populate vtable */ + codec->base.handler.destroy = charset_ext8_codec_destroy; + codec->base.handler.encode = charset_ext8_codec_encode; + codec->base.handler.decode = charset_ext8_codec_decode; + codec->base.handler.reset = charset_ext8_codec_reset; + + return (parserutils_charset_codec *) codec; +} + +/** + * Destroy an extended 8bit codec + * + * \param codec The codec to destroy + */ +void charset_ext8_codec_destroy (parserutils_charset_codec *codec) +{ + UNUSED(codec); +} + +/** + * Encode a chunk of UCS-4 (big endian) data into extended 8bit + * + * \param codec The codec to use + * \param source Pointer to pointer to source data + * \param sourcelen Pointer to length (in bytes) of source data + * \param dest Pointer to pointer to output buffer + * \param destlen Pointer to length (in bytes) of output buffer + * \return PARSERUTILS_OK on success, + * PARSERUTILS_NOMEM if output buffer is too small, + * PARSERUTILS_INVALID if a character cannot be represented and the + * codec's error handling mode is set to STRICT, + * + * On exit, ::source will point immediately _after_ the last input character + * read. Any remaining output for the character will be buffered by the + * codec for writing on the next call. + * + * Note that, if failure occurs whilst attempting to write any output + * buffered by the last call, then ::source and ::sourcelen will remain + * unchanged (as nothing more has been read). + * + * ::sourcelen will be reduced appropriately on exit. + * + * ::dest will point immediately _after_ the last character written. + * + * ::destlen will be reduced appropriately on exit. + */ +parserutils_error charset_ext8_codec_encode(parserutils_charset_codec *codec, + const uint8_t **source, size_t *sourcelen, + uint8_t **dest, size_t *destlen) +{ + charset_ext8_codec *c = (charset_ext8_codec *) codec; + uint32_t ucs4; + uint32_t *towrite; + size_t towritelen; + parserutils_error error; + + /* Process any outstanding characters from the previous call */ + if (c->write_len > 0) { + uint32_t *pwrite = c->write_buf; + + while (c->write_len > 0) { + error = charset_ext8_from_ucs4(c, pwrite[0], + dest, destlen); + if (error != PARSERUTILS_OK) { + assert(error == PARSERUTILS_NOMEM); + + for (uint32_t len = 0; + len < c->write_len; len++) { + c->write_buf[len] = pwrite[len]; + } + + return error; + } + + pwrite++; + c->write_len--; + } + } + + /* Now process the characters for this call */ + while (*sourcelen > 0) { + ucs4 = ntohl(*((uint32_t *) (void *) *source)); + towrite = &ucs4; + towritelen = 1; + + /* Output current characters */ + while (towritelen > 0) { + error = charset_ext8_from_ucs4(c, towrite[0], dest, + destlen); + if (error != PARSERUTILS_OK) { + if (error != PARSERUTILS_NOMEM) { + return error; + } + + /* Insufficient output space */ + if (towritelen >= WRITE_BUFSIZE) + abort(); + + c->write_len = towritelen; + + /* Copy pending chars to save area, for + * processing next call. */ + for (uint32_t len = 0; len < towritelen; len++) + c->write_buf[len] = towrite[len]; + + /* Claim character we've just buffered, + * so it's not reprocessed */ + *source += 4; + *sourcelen -= 4; + + return PARSERUTILS_NOMEM; + } + + towrite++; + towritelen--; + } + + *source += 4; + *sourcelen -= 4; + } + + return PARSERUTILS_OK; +} + +/** + * Decode a chunk of extended 8bit data into UCS-4 (big endian) + * + * \param codec The codec to use + * \param source Pointer to pointer to source data + * \param sourcelen Pointer to length (in bytes) of source data + * \param dest Pointer to pointer to output buffer + * \param destlen Pointer to length (in bytes) of output buffer + * \return PARSERUTILS_OK on success, + * PARSERUTILS_NOMEM if output buffer is too small, + * PARSERUTILS_INVALID if a character cannot be represented and the + * codec's error handling mode is set to STRICT, + * + * On exit, ::source will point immediately _after_ the last input character + * read, if the result is _OK or _NOMEM. Any remaining output for the + * character will be buffered by the codec for writing on the next call. + * + * In the case of the result being _INVALID, ::source will point _at_ the + * last input character read; nothing will be written or buffered for the + * failed character. It is up to the client to fix the cause of the failure + * and retry the decoding process. + * + * Note that, if failure occurs whilst attempting to write any output + * buffered by the last call, then ::source and ::sourcelen will remain + * unchanged (as nothing more has been read). + * + * If STRICT error handling is configured and an illegal sequence is split + * over two calls, then _INVALID will be returned from the second call, + * but ::source will point mid-way through the invalid sequence (i.e. it + * will be unmodified over the second call). In addition, the internal + * incomplete-sequence buffer will be emptied, such that subsequent calls + * will progress, rather than re-evaluating the same invalid sequence. + * + * ::sourcelen will be reduced appropriately on exit. + * + * ::dest will point immediately _after_ the last character written. + * + * ::destlen will be reduced appropriately on exit. + * + * Call this with a source length of 0 to flush the output buffer. + */ +parserutils_error charset_ext8_codec_decode(parserutils_charset_codec *codec, + const uint8_t **source, size_t *sourcelen, + uint8_t **dest, size_t *destlen) +{ + charset_ext8_codec *c = (charset_ext8_codec *) codec; + parserutils_error error; + + if (c->read_len > 0) { + /* Output left over from last decode */ + uint32_t *pread = c->read_buf; + + while (c->read_len > 0 && *destlen >= c->read_len * 4) { + *((uint32_t *) (void *) *dest) = htonl(pread[0]); + + *dest += 4; + *destlen -= 4; + + pread++; + c->read_len--; + } + + if (*destlen < c->read_len * 4) { + /* Ran out of output buffer */ + size_t i; + + /* Shuffle remaining output down */ + for (i = 0; i < c->read_len; i++) + c->read_buf[i] = pread[i]; + + return PARSERUTILS_NOMEM; + } + } + + /* Finally, the "normal" case; process all outstanding characters */ + while (*sourcelen > 0) { + error = charset_ext8_codec_read_char(c, + source, sourcelen, dest, destlen); + if (error != PARSERUTILS_OK) { + return error; + } + } + + return PARSERUTILS_OK; +} + +/** + * Clear an extended 8bit codec's encoding state + * + * \param codec The codec to reset + * \return PARSERUTILS_OK on success, appropriate error otherwise + */ +parserutils_error charset_ext8_codec_reset(parserutils_charset_codec *codec) +{ + charset_ext8_codec *c = (charset_ext8_codec *) codec; + + c->read_buf[0] = 0; + c->read_len = 0; + + c->write_buf[0] = 0; + c->write_len = 0; + + return PARSERUTILS_OK; +} + + +/** + * Read a character from the extended 8bit to UCS-4 (big endian) + * + * \param c The codec + * \param source Pointer to pointer to source buffer (updated on exit) + * \param sourcelen Pointer to length of source buffer (updated on exit) + * \param dest Pointer to pointer to output buffer (updated on exit) + * \param destlen Pointer to length of output buffer (updated on exit) + * \return PARSERUTILS_OK on success, + * PARSERUTILS_NOMEM if output buffer is too small, + * PARSERUTILS_INVALID if a character cannot be represented and the + * codec's error handling mode is set to STRICT, + * + * On exit, ::source will point immediately _after_ the last input character + * read, if the result is _OK or _NOMEM. Any remaining output for the + * character will be buffered by the codec for writing on the next call. + * + * In the case of the result being _INVALID, ::source will point _at_ the + * last input character read; nothing will be written or buffered for the + * failed character. It is up to the client to fix the cause of the failure + * and retry the decoding process. + * + * ::sourcelen will be reduced appropriately on exit. + * + * ::dest will point immediately _after_ the last character written. + * + * ::destlen will be reduced appropriately on exit. + */ +parserutils_error charset_ext8_codec_read_char(charset_ext8_codec *c, + const uint8_t **source, size_t *sourcelen, + uint8_t **dest, size_t *destlen) +{ + uint32_t ucs4; + parserutils_error error; + + /* Convert a single character */ + error = charset_ext8_to_ucs4(c, *source, *sourcelen, &ucs4); + if (error == PARSERUTILS_OK) { + /* Read a character */ + error = charset_ext8_codec_output_decoded_char(c, + ucs4, dest, destlen); + if (error == PARSERUTILS_OK || error == PARSERUTILS_NOMEM) { + /* output succeeded; update source pointers */ + *source += 1; + *sourcelen -= 1; + } + + return error; + } else if (error == PARSERUTILS_NEEDDATA) { + /* Can only happen if sourcelen == 0 */ + return error; + } else if (error == PARSERUTILS_INVALID) { + /* Illegal input sequence */ + + /* Strict errormode; simply flag invalid character */ + if (c->base.errormode == + PARSERUTILS_CHARSET_CODEC_ERROR_STRICT) { + return PARSERUTILS_INVALID; + } + + /* output U+FFFD and continue processing. */ + error = charset_ext8_codec_output_decoded_char(c, + 0xFFFD, dest, destlen); + if (error == PARSERUTILS_OK || error == PARSERUTILS_NOMEM) { + /* output succeeded; update source pointers */ + *source += 1; + *sourcelen -= 1; + } + + return error; + } + + return PARSERUTILS_OK; +} + +/** + * Output a UCS-4 character (big endian) + * + * \param c Codec to use + * \param ucs4 UCS-4 character (host endian) + * \param dest Pointer to pointer to output buffer + * \param destlen Pointer to output buffer length + * \return PARSERUTILS_OK on success, + * PARSERUTILS_NOMEM if output buffer is too small, + */ +parserutils_error charset_ext8_codec_output_decoded_char(charset_ext8_codec *c, + uint32_t ucs4, uint8_t **dest, size_t *destlen) +{ + if (*destlen < 4) { + /* Run out of output buffer */ + c->read_len = 1; + c->read_buf[0] = ucs4; + + return PARSERUTILS_NOMEM; + } + + *((uint32_t *) (void *) *dest) = htonl(ucs4); + *dest += 4; + *destlen -= 4; + + return PARSERUTILS_OK; +} + +/** + * Convert a UCS4 (host endian) character to extended 8bit + * + * \param c The codec instance + * \param ucs4 The UCS4 character to convert + * \param s Pointer to pointer to destination buffer + * \param len Pointer to destination buffer length + * \return PARSERUTILS_OK on success, + * PARSERUTILS_NOMEM if there's insufficient space in the output buffer, + * PARSERUTILS_INVALID if the character cannot be represented + * + * _INVALID will only be returned if the codec's conversion mode is STRICT. + * Otherwise, '?' will be output. + * + * On successful conversion, *s and *len will be updated. + */ +parserutils_error charset_ext8_from_ucs4(charset_ext8_codec *c, + uint32_t ucs4, uint8_t **s, size_t *len) +{ + uint8_t out = 0; + + if (*len < 1) + return PARSERUTILS_NOMEM; + + if (ucs4 < 0x80) { + /* ASCII */ + out = ucs4; + } else { + uint32_t i; + + for (i = 0; i < 128; i++) { + if (ucs4 == c->table[i]) + break; + } + + if (i == 128) { + if (c->base.errormode == + PARSERUTILS_CHARSET_CODEC_ERROR_STRICT) + return PARSERUTILS_INVALID; + else + out = '?'; + } else { + out = c->table[i]; + } + } + + *(*s++) = out; + *len--; + + return PARSERUTILS_OK; +} + +/** + * Convert an extended 8bit character to UCS4 (host endian) + * + * \param c The codec instance + * \param s Pointer to source buffer + * \param len Source buffer length + * \param ucs4 Pointer to destination buffer + * \return PARSERUTILS_OK on success, + * PARSERUTILS_NEEDDATA if there's insufficient input data + * PARSERUTILS_INVALID if the character cannot be represented + */ +parserutils_error charset_ext8_to_ucs4(charset_ext8_codec *c, + const uint8_t *s, size_t len, uint32_t *ucs4) +{ + uint32_t out; + + if (len < 1) + return PARSERUTILS_NEEDDATA; + + if (*s < 0x80) { + out = *s; + } else { + if (c->table[*s] == 0xFFFF) + return PARSERUTILS_INVALID; + + out = c->table[*s]; + } + + *ucs4 = out; + + return PARSERUTILS_OK; +} + +const parserutils_charset_handler charset_ext8_codec_handler = { + charset_ext8_codec_handles_charset, + charset_ext8_codec_create +}; + diff --git a/src/charset/codecs/ext8_tables.h b/src/charset/codecs/ext8_tables.h new file mode 100644 index 0000000..4691ebb --- /dev/null +++ b/src/charset/codecs/ext8_tables.h @@ -0,0 +1,187 @@ +/* + * This file is part of LibParserUtils. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#ifndef parserutils_charset_codecs_ext8tables_h_ +#define parserutils_charset_codecs_ext8tables_h_ + +/* Mapping tables for extended 8bit -> UCS4. + * Undefined characters are mapped to U+FFFF, + * which is a guaranteed non-character + */ + +static uint32_t w1250[128] = { + 0x20AC, 0xFFFF, 0x201A, 0xFFFF, 0x201E, 0x2026, 0x2020, 0x2021, + 0xFFFF, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179, + 0xFFFF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFF, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A, + 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B, + 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C, + 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, + 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, + 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, + 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, + 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, + 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F, + 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, + 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9, +}; + +static uint32_t w1251[128] = { + 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, + 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F, + 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFF, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F, + 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7, + 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407, + 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7, + 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, + 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, + 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, +}; + +static uint32_t w1252[128] = { + 0x20AC, 0xFFFF, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFF, 0x017D, 0xFFFF, + 0xFFFF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFF, 0x017E, 0x0178, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF, +}; + +static uint32_t w1253[128] = { + 0x20AC, 0xFFFF, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0xFFFF, 0x2030, 0xFFFF, 0x2039, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFF, 0x2122, 0xFFFF, 0x203A, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0xFFFF, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7, + 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F, + 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, + 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, + 0x03A0, 0x03A1, 0xFFFF, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, + 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, + 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, + 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF, + 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, + 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFF, +}; + +static uint32_t w1254[128] = { + 0x20AC, 0xFFFF, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFF, 0xFFFF, 0x0178, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF, +}; + +static uint32_t w1255[128] = { + 0x20AC, 0xFFFF, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0xFFFF, 0x2039, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0xFFFF, 0x203A, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, + 0x05B8, 0x05B9, 0xFFFF, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF, + 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3, + 0x05F4, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, + 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, + 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, + 0x05E8, 0x05E9, 0x05EA, 0xFFFF, 0xFFFF, 0x200E, 0x200F, 0xFFFF, +}; + +static uint32_t w1256[128] = { + 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688, + 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA, + 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F, + 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, + 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, + 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7, + 0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643, + 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF, + 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7, + 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2, +}; + +static uint32_t w1257[128] = { + 0x20AC, 0xFFFF, 0x201A, 0xFFFF, 0x201E, 0x2026, 0x2020, 0x2021, + 0xFFFF, 0x2030, 0xFFFF, 0x2039, 0xFFFF, 0x00A8, 0x02C7, 0x00B8, + 0xFFFF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0xFFFF, 0x2122, 0xFFFF, 0x203A, 0xFFFF, 0x00AF, 0x02DB, 0xFFFF, + 0x00A0, 0xFFFF, 0x00A2, 0x00A3, 0x00A4, 0xFFFF, 0x00A6, 0x00A7, + 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6, + 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, + 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, + 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, + 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, + 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, + 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C, + 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, + 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9, +}; + +static uint32_t w1258[128] = { + 0x20AC, 0xFFFF, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0xFFFF, 0x2039, 0x0152, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0xFFFF, 0x203A, 0x0153, 0xFFFF, 0xFFFF, 0x0178, + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, + 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, + 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7, + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF, + 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7, + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7, + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF, + 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7, + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF, +}; + +#endif -- cgit v1.2.3