From 90721f2d39b0cdd5d22409f1bf4f6ce4b7382944 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 6 Mar 2015 17:36:08 -0500 Subject: directory cleanup: move tests and data into subdirectories --- test/graphemetest.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 test/graphemetest.c (limited to 'test/graphemetest.c') diff --git a/test/graphemetest.c b/test/graphemetest.c new file mode 100644 index 0000000..7914dc8 --- /dev/null +++ b/test/graphemetest.c @@ -0,0 +1,72 @@ +#include "tests.h" + +int main(int argc, char **argv) +{ + char *buf = NULL; + size_t bufsize = 0; + FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL; + uint8_t src[1024]; + + check(f != NULL, "error opening GraphemeBreakTest.txt"); + while (getline(&buf, &bufsize, f) > 0) { + size_t bi = 0, si = 0; + lineno += 1; + + if (lineno % 100 == 0) + printf("checking line %zd...\n", lineno); + + if (buf[0] == '#') continue; + + while (buf[bi]) { + bi = skipspaces(buf, bi); + if (buf[bi] == '/') { /* grapheme break */ + src[si++] = '/'; + bi++; + } + else if (buf[bi] == '+') { /* no break */ + bi++; + } + else if (buf[bi] == '#') { /* start of comments */ + break; + } + else { /* hex-encoded codepoint */ + bi += encode((char*) (src + si), buf + bi) - 1; + while (src[si]) ++si; /* advance to NUL termination */ + } + } + if (si && src[si-1] == '/') + --si; /* no break after final grapheme */ + src[si] = 0; /* NUL-terminate */ + + if (si) { + uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */ + size_t i = 0, j = 0; + ssize_t glen; + uint8_t *g; /* utf8proc_map grapheme results */ + while (i < si) { + if (src[i] != '/') + utf8[j++] = src[i++]; + else + i++; + } + glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND); + if (glen == UTF8PROC_ERROR_INVALIDUTF8) { + /* the test file contains surrogate codepoints, which are only for UTF-16 */ + printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno); + } + else { + check(glen >= 0, "utf8proc_map error = %s", + utf8proc_errmsg(glen)); + for (i = 0; i <= glen; ++i) + if (g[i] == 0xff) + g[i] = '/'; /* easier-to-read output (/ is not in test strings) */ + check(!strcmp((char*)g, (char*)src), + "grapheme mismatch: \"%s\" instead of \"%s\"", (char*)g, (char*)src); + } + free(g); + } + } + fclose(f); + printf("Passed tests after %zd lines!\n", lineno); + return 0; +} -- cgit v1.2.3