diff options
author | Benito van der Zander <benito@benibela.de> | 2016-07-12 17:51:50 +0200 |
---|---|---|
committer | Steven G. Johnson <stevenj@mit.edu> | 2016-07-12 11:51:50 -0400 |
commit | eeebf70bcf68443b0b2e5b3d811227ed3f039ea4 (patch) | |
tree | a4815a783b88588a2aa30b6a396eebfe1320e250 /utf8proc.h | |
parent | 9a0b87b57ec0be5bdf8baa7d53a4dfeb940d07d8 (diff) | |
download | libutf8proc-eeebf70bcf68443b0b2e5b3d811227ed3f039ea4.tar.gz libutf8proc-eeebf70bcf68443b0b2e5b3d811227ed3f039ea4.tar.bz2 |
Smaller tables (#68)
* convert sequences to utf-16 (saves 25kb)
* store sequence length in properties instead using -1 termination (saves 10kb)
* cache index for slightly faster data creation
* store lower/upper/title mapping in sequence array (saves 25kb). Add utf8proc_totitle, as title_mapping cannot be used to get the title codepoint anymore. Rename xxx_mapping to xxx_seqindex, so programs assuming a value with the old meaning fail at compile time
* change combination array data type to uint16 (saves 40kb)
* merge 1st and 2nd comb index (saves 50kb)
* kill empty prefix/suffix in combination array (saves 50kb)
* there was no need to have a separate combination start array, it can be merged in a single array
* some fixes
* mark the table as const again
* and regen
Diffstat (limited to 'utf8proc.h')
-rw-r--r-- | utf8proc.h | 20 |
1 files changed, 13 insertions, 7 deletions
@@ -242,13 +242,12 @@ typedef struct utf8proc_property_struct { * @see utf8proc_decomp_type_t. */ utf8proc_propval_t decomp_type; - utf8proc_uint16_t decomp_mapping; - utf8proc_uint16_t casefold_mapping; - utf8proc_int32_t uppercase_mapping; - utf8proc_int32_t lowercase_mapping; - utf8proc_int32_t titlecase_mapping; - utf8proc_int32_t comb1st_index; - utf8proc_int32_t comb2nd_index; + utf8proc_uint16_t decomp_seqindex; + utf8proc_uint16_t casefold_seqindex; + utf8proc_uint16_t uppercase_seqindex; + utf8proc_uint16_t lowercase_seqindex; + utf8proc_uint16_t titlecase_seqindex; + utf8proc_uint16_t comb_index; unsigned bidi_mirrored:1; unsigned comp_exclusion:1; /** @@ -550,6 +549,13 @@ UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c); UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c); /** + * Given a codepoint `c`, return the codepoint of the corresponding + * title-case character, if any; otherwise (if there is no title-case + * variant, or if `c` is not a valid codepoint) return `c`. + */ +UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c); + +/** * Given a codepoint, return a character width analogous to `wcwidth(codepoint)`, * except that a width of 0 is returned for non-printable codepoints * instead of -1 as in `wcwidth`. |