summaryrefslogtreecommitdiff
path: root/utf8proc.h
diff options
context:
space:
mode:
authorBenito van der Zander <benito@benibela.de>2016-07-12 17:51:50 +0200
committerSteven G. Johnson <stevenj@mit.edu>2016-07-12 11:51:50 -0400
commiteeebf70bcf68443b0b2e5b3d811227ed3f039ea4 (patch)
treea4815a783b88588a2aa30b6a396eebfe1320e250 /utf8proc.h
parent9a0b87b57ec0be5bdf8baa7d53a4dfeb940d07d8 (diff)
downloadlibutf8proc-eeebf70bcf68443b0b2e5b3d811227ed3f039ea4.tar.gz
libutf8proc-eeebf70bcf68443b0b2e5b3d811227ed3f039ea4.tar.bz2
Smaller tables (#68)
* convert sequences to utf-16 (saves 25kb) * store sequence length in properties instead using -1 termination (saves 10kb) * cache index for slightly faster data creation * store lower/upper/title mapping in sequence array (saves 25kb). Add utf8proc_totitle, as title_mapping cannot be used to get the title codepoint anymore. Rename xxx_mapping to xxx_seqindex, so programs assuming a value with the old meaning fail at compile time * change combination array data type to uint16 (saves 40kb) * merge 1st and 2nd comb index (saves 50kb) * kill empty prefix/suffix in combination array (saves 50kb) * there was no need to have a separate combination start array, it can be merged in a single array * some fixes * mark the table as const again * and regen
Diffstat (limited to 'utf8proc.h')
-rw-r--r--utf8proc.h20
1 files changed, 13 insertions, 7 deletions
diff --git a/utf8proc.h b/utf8proc.h
index f9f0d92..4e033e7 100644
--- a/utf8proc.h
+++ b/utf8proc.h
@@ -242,13 +242,12 @@ typedef struct utf8proc_property_struct {
* @see utf8proc_decomp_type_t.
*/
utf8proc_propval_t decomp_type;
- utf8proc_uint16_t decomp_mapping;
- utf8proc_uint16_t casefold_mapping;
- utf8proc_int32_t uppercase_mapping;
- utf8proc_int32_t lowercase_mapping;
- utf8proc_int32_t titlecase_mapping;
- utf8proc_int32_t comb1st_index;
- utf8proc_int32_t comb2nd_index;
+ utf8proc_uint16_t decomp_seqindex;
+ utf8proc_uint16_t casefold_seqindex;
+ utf8proc_uint16_t uppercase_seqindex;
+ utf8proc_uint16_t lowercase_seqindex;
+ utf8proc_uint16_t titlecase_seqindex;
+ utf8proc_uint16_t comb_index;
unsigned bidi_mirrored:1;
unsigned comp_exclusion:1;
/**
@@ -550,6 +549,13 @@ UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c);
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c);
/**
+ * Given a codepoint `c`, return the codepoint of the corresponding
+ * title-case character, if any; otherwise (if there is no title-case
+ * variant, or if `c` is not a valid codepoint) return `c`.
+ */
+UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c);
+
+/**
* Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
* except that a width of 0 is returned for non-printable codepoints
* instead of -1 as in `wcwidth`.