diff options
author | Steven G. Johnson <stevenj@mit.edu> | 2018-07-24 13:25:51 -0400 |
---|---|---|
committer | Steven G. Johnson <stevenj@mit.edu> | 2018-07-24 13:25:51 -0400 |
commit | e0295be467d15e7abec2af275bcca30dc816bc9e (patch) | |
tree | 370dcfc22d38a3ed302c1beeafa2ee5bf1b52db7 /utf8proc.c | |
parent | 98e5529a0a6cd4dd09a8885029253f26c677c85f (diff) | |
parent | d4a58cfec5345bbb2bb0db1e85172a8cff278da7 (diff) | |
download | libutf8proc-e0295be467d15e7abec2af275bcca30dc816bc9e.tar.gz libutf8proc-e0295be467d15e7abec2af275bcca30dc816bc9e.tar.bz2 |
Merge branch 'master' of https://github.com/JuliaLang/utf8proc
Diffstat (limited to 'utf8proc.c')
-rw-r--r-- | utf8proc.c | 23 |
1 files changed, 11 insertions, 12 deletions
@@ -271,12 +271,8 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) { tbc == UTF8PROC_BOUNDCLASS_ZWJ || // --- tbc == UTF8PROC_BOUNDCLASS_SPACINGMARK || // GB9a lbc == UTF8PROC_BOUNDCLASS_PREPEND) ? false : // GB9b - ((lbc == UTF8PROC_BOUNDCLASS_E_BASE || // GB10 (requires additional handling below) - lbc == UTF8PROC_BOUNDCLASS_E_BASE_GAZ) && // ---- - tbc == UTF8PROC_BOUNDCLASS_E_MODIFIER) ? false : // ---- - (lbc == UTF8PROC_BOUNDCLASS_ZWJ && // GB11 - (tbc == UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ || // ---- - tbc == UTF8PROC_BOUNDCLASS_E_BASE_GAZ)) ? false : // ---- + (lbc == UTF8PROC_BOUNDCLASS_E_ZWG && // GB11 (requires additional handling below) + tbc == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) ? false : // ---- (lbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR && // GB12/13 (requires additional handling below) tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR) ? false : // ---- true; // GB999 @@ -295,12 +291,15 @@ static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t // forbidden by a different rule such as GB9). if (*state == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR) *state = UTF8PROC_BOUNDCLASS_OTHER; - // Special support for GB10. Fold any EXTEND codepoints into the previous - // boundclass if we're dealing with an emoji base boundclass. - else if ((*state == UTF8PROC_BOUNDCLASS_E_BASE || - *state == UTF8PROC_BOUNDCLASS_E_BASE_GAZ) && - tbc == UTF8PROC_BOUNDCLASS_EXTEND) - *state = UTF8PROC_BOUNDCLASS_E_BASE; + // Special support for GB11 (emoji extend* zwj / emoji) + else if (*state == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) { + if (tbc == UTF8PROC_BOUNDCLASS_EXTEND) // fold EXTEND codepoints into emoji + *state = UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC; + else if (tbc == UTF8PROC_BOUNDCLASS_ZWJ) + *state = UTF8PROC_BOUNDCLASS_E_ZWG; // state to record emoji+zwg combo + else + *state = tbc; + } else *state = tbc; } |