From d81308faba0cfb3fccf8c3b12446863c7b76ae32 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 2 May 2018 14:18:26 -0400 Subject: uppercase mapping ß (U+00df) to ẞ (U+1E9E) (#134) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * uppercase(0x00df) = 0x1e9e * tests for titlecase and u+00df uppercase * NEWS, another test --- test/case.c | 23 ++++++++++++- test/printproperty.c | 93 +++++++++++++++++++++++++++++----------------------- 2 files changed, 74 insertions(+), 42 deletions(-) (limited to 'test') diff --git a/test/case.c b/test/case.c index 39958e3..a267609 100644 --- a/test/case.c +++ b/test/case.c @@ -13,13 +13,20 @@ int main(int argc, char **argv) for (c = 0; c <= 0x110000; ++c) { utf8proc_int32_t l = utf8proc_tolower(c); utf8proc_int32_t u = utf8proc_toupper(c); + utf8proc_int32_t t = utf8proc_totitle(c); check(l == c || utf8proc_codepoint_valid(l), "invalid tolower"); check(u == c || utf8proc_codepoint_valid(u), "invalid toupper"); + check(t == c || utf8proc_codepoint_valid(t), "invalid totitle"); + + if (utf8proc_codepoint_valid(c) && (l == u) != (l == t)) { + fprintf(stderr, "unexpected titlecase %x for lowercase %x / uppercase %x\n", t, l, c); + ++error; + } if (sizeof(wint_t) > 2 || c < (1<<16)) { wint_t l0 = towlower(c), u0 = towupper(c); - + /* OS unicode tables may be out of date. But if they do have a lower/uppercase mapping, hopefully it is correct? */ @@ -44,6 +51,20 @@ int main(int argc, char **argv) } } check(!error, "utf8proc case conversion FAILED %d tests.", error); + + /* issue #130 */ + check(utf8proc_toupper(0x00df) == 0x1e9e && + utf8proc_totitle(0x00df) == 0x1e9e && + utf8proc_tolower(0x00df) == 0x00df && + utf8proc_tolower(0x1e9e) == 0x00df && + utf8proc_toupper(0x1e9e) == 0x1e9e, + "incorrect 0x00df/0x1e9e case conversions"); + utf8proc_uint8_t str_00df[] = {0xc3, 0x9f, 0x00}; + utf8proc_uint8_t str_1e9e[] = {0xe1, 0xba, 0x9e, 0x00}; + check(!strcmp((char*)utf8proc_NFKC_Casefold(str_00df), "ss") && + !strcmp((char*)utf8proc_NFKC_Casefold(str_1e9e), "ss"), + "incorrect 0x00df/0x1e9e casefold normalization"); + printf("More up-to-date than OS unicode tables for %d tests.\n", better); printf("utf8proc case conversion tests SUCCEEDED.\n"); return 0; diff --git a/test/printproperty.c b/test/printproperty.c index 2819aa1..4017eac 100644 --- a/test/printproperty.c +++ b/test/printproperty.c @@ -4,46 +4,57 @@ int main(int argc, char **argv) { - int i; + int i; - for (i = 1; i < argc; ++i) { - unsigned int c; - if (!strcmp(argv[i], "-V")) { - printf("utf8proc version %s\n", utf8proc_version()); - continue; - } - check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]); - const utf8proc_property_t *p = utf8proc_get_property(c); - printf("U+%s:\n" - " category = %s\n" - " combining_class = %d\n" - " bidi_class = %d\n" - " decomp_type = %d\n" - " uppercase_mapping = %x\n" - " lowercase_mapping = %x\n" - " titlecase_mapping = %x\n" - " comb_index = %d\n" - " bidi_mirrored = %d\n" - " comp_exclusion = %d\n" - " ignorable = %d\n" - " control_boundary = %d\n" - " boundclass = %d\n" - " charwidth = %d\n", - argv[i], - utf8proc_category_string(c), - p->combining_class, - p->bidi_class, - p->decomp_type, - utf8proc_toupper(c), - utf8proc_tolower(c), - utf8proc_totitle(c), - p->comb_index, - p->bidi_mirrored, - p->comp_exclusion, - p->ignorable, - p->control_boundary, - p->boundclass, - utf8proc_charwidth(c)); - } - return 0; + for (i = 1; i < argc; ++i) { + utf8proc_uint8_t cstr[16], *map; + unsigned int c; + if (!strcmp(argv[i], "-V")) { + printf("utf8proc version %s\n", utf8proc_version()); + continue; + } + check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]); + const utf8proc_property_t *p = utf8proc_get_property(c); + + if (utf8proc_codepoint_valid(c)) + cstr[utf8proc_encode_char(c, cstr)] = 0; + else + strcat((char*)cstr, "N/A"); + utf8proc_map(cstr, 0, &map, UTF8PROC_NULLTERM | UTF8PROC_CASEFOLD); + + printf("U+%s: %s\n" + " category = %s\n" + " combining_class = %d\n" + " bidi_class = %d\n" + " decomp_type = %d\n" + " uppercase_mapping = %x\n" + " lowercase_mapping = %x\n" + " titlecase_mapping = %x\n" + " casefold = %s\n" + " comb_index = %d\n" + " bidi_mirrored = %d\n" + " comp_exclusion = %d\n" + " ignorable = %d\n" + " control_boundary = %d\n" + " boundclass = %d\n" + " charwidth = %d\n", + argv[i], (char*) cstr, + utf8proc_category_string(c), + p->combining_class, + p->bidi_class, + p->decomp_type, + utf8proc_toupper(c), + utf8proc_tolower(c), + utf8proc_totitle(c), + (char *) map, + p->comb_index, + p->bidi_mirrored, + p->comp_exclusion, + p->ignorable, + p->control_boundary, + p->boundclass, + utf8proc_charwidth(c)); + free(map); + } + return 0; } -- cgit v1.2.3