From eeebf70bcf68443b0b2e5b3d811227ed3f039ea4 Mon Sep 17 00:00:00 2001 From: Benito van der Zander Date: Tue, 12 Jul 2016 17:51:50 +0200 Subject: Smaller tables (#68) * convert sequences to utf-16 (saves 25kb) * store sequence length in properties instead using -1 termination (saves 10kb) * cache index for slightly faster data creation * store lower/upper/title mapping in sequence array (saves 25kb). Add utf8proc_totitle, as title_mapping cannot be used to get the title codepoint anymore. Rename xxx_mapping to xxx_seqindex, so programs assuming a value with the old meaning fail at compile time * change combination array data type to uint16 (saves 40kb) * merge 1st and 2nd comb index (saves 50kb) * kill empty prefix/suffix in combination array (saves 50kb) * there was no need to have a separate combination start array, it can be merged in a single array * some fixes * mark the table as const again * and regen --- utf8proc.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'utf8proc.h') diff --git a/utf8proc.h b/utf8proc.h index f9f0d92..4e033e7 100644 --- a/utf8proc.h +++ b/utf8proc.h @@ -242,13 +242,12 @@ typedef struct utf8proc_property_struct { * @see utf8proc_decomp_type_t. */ utf8proc_propval_t decomp_type; - utf8proc_uint16_t decomp_mapping; - utf8proc_uint16_t casefold_mapping; - utf8proc_int32_t uppercase_mapping; - utf8proc_int32_t lowercase_mapping; - utf8proc_int32_t titlecase_mapping; - utf8proc_int32_t comb1st_index; - utf8proc_int32_t comb2nd_index; + utf8proc_uint16_t decomp_seqindex; + utf8proc_uint16_t casefold_seqindex; + utf8proc_uint16_t uppercase_seqindex; + utf8proc_uint16_t lowercase_seqindex; + utf8proc_uint16_t titlecase_seqindex; + utf8proc_uint16_t comb_index; unsigned bidi_mirrored:1; unsigned comp_exclusion:1; /** @@ -549,6 +548,13 @@ UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c); */ UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c); +/** + * Given a codepoint `c`, return the codepoint of the corresponding + * title-case character, if any; otherwise (if there is no title-case + * variant, or if `c` is not a valid codepoint) return `c`. + */ +UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c); + /** * Given a codepoint, return a character width analogous to `wcwidth(codepoint)`, * except that a width of 0 is returned for non-printable codepoints -- cgit v1.2.3