diff options
author | Steven G. Johnson <stevenj@mit.edu> | 2018-05-02 08:15:02 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-05-02 08:15:02 -0400 |
commit | bdc8b9e4b2063e4b4563938d5077ee3b826cf342 (patch) | |
tree | b82ecf4a68d8b8841f4cb5aa4f903841f729bb47 /test/misc.c | |
parent | 48949bd3ebd66bb94a40f4c3fcfb26dd4bf2be2b (diff) | |
download | libutf8proc-bdc8b9e4b2063e4b4563938d5077ee3b826cf342.tar.gz libutf8proc-bdc8b9e4b2063e4b4563938d5077ee3b826cf342.tar.bz2 |
Case folding fixes (#133)
* Fixes allowing for “Full” folding and NFKC_CaseFold compliance.
* Only include C (Common) and F (Full) foldings from CaseFolding.txt. Removed S (Simple) since F & S are specified to be exclusive.
* Extend UTF8PROC_IGNORE to also ignore unassigned codepoints (such as \u2065) which are specified as being discarded by NFKC_CF.
* Document the changes to UTF8PROC_IGNORE in header.
* Add NFKC_CF helper function with documentation.
* restore old IGNORE behavior, add UTF8PROC_STRIPNA, rename to utf8proc_NFKC_Casefold, add a test
* success message
* test that IGNORE does not strip NA
* data update
* NFKC_Casefold shouldn't strip NA
Diffstat (limited to 'test/misc.c')
-rw-r--r-- | test/misc.c | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/test/misc.c b/test/misc.c index e26cc6f..56d81d5 100644 --- a/test/misc.c +++ b/test/misc.c @@ -19,9 +19,28 @@ static void issue128(void) /* #128 */ free(nfd_out); free(nfc_out); } +static void issue102(void) /* #128 */ +{ + utf8proc_uint8_t input[] = {0x58, 0xe2, 0x81, 0xa5, 0x45, 0xcc, 0x80, 0xc2, 0xad, 0xe1, 0xb4, 0xac, 0x00}; /* "X\u2065E\u0300\u00ad\u1d2c" */ + utf8proc_uint8_t stripna[] = {0x78, 0xc3, 0xa8, 0x61, 0x00}; /* "x\u00e8a" */ + utf8proc_uint8_t correct[] = {0x78, 0xe2, 0x81, 0xa5, 0xc3, 0xa8, 0x61, 0x00}; /* "x\u2065\u00e8a" */ + utf8proc_uint8_t *output; + utf8proc_map(input, 0, &output, UTF8PROC_NULLTERM | UTF8PROC_STABLE | + UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE | UTF8PROC_STRIPNA); + printf("NFKC_Casefold \"%s\" -> \"%s\" vs. \"%s\"\n", (char*)input, (char*)output, (char*)stripna); + check(strlen((char*) output) == 4, "incorrect NFKC_Casefold+stripna length"); + check(!memcmp(stripna, output, 5), "incorrect NFKC_Casefold+stripna data"); + free(output); + output = utf8proc_NFKC_Casefold(input); + printf("NFKC_Casefold \"%s\" -> \"%s\" vs. \"%s\"\n", (char*)input, (char*)output, (char*)correct); + check(strlen((char*) output) == 7, "incorrect NFKC_Casefold length"); + check(!memcmp(correct, output, 8), "incorrect NFKC_Casefold data"); +} + int main(void) { issue128(); + issue102(); printf("Misc tests SUCCEEDED.\n"); return 0; } |