summaryrefslogtreecommitdiff
path: root/utf8proc.c
diff options
context:
space:
mode:
Diffstat (limited to 'utf8proc.c')
-rw-r--r--utf8proc.c18
1 files changed, 8 insertions, 10 deletions
diff --git a/utf8proc.c b/utf8proc.c
index f03126c..98e754d 100644
--- a/utf8proc.c
+++ b/utf8proc.c
@@ -196,9 +196,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, ut
} else return 0;
}
-/* internal "unsafe" version that does not check whether uc is in range */
-static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
+/* internal version used for inserting 0xff bytes between graphemes */
+static utf8proc_ssize_t charbound_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
if (uc < 0x00) {
+ if (uc == -1) { /* internal value used for grapheme breaks */
+ dst[0] = (utf8proc_uint8_t)0xFF;
+ return 1;
+ }
return 0;
} else if (uc < 0x80) {
dst[0] = (utf8proc_uint8_t)uc;
@@ -207,12 +211,6 @@ static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t
dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
return 2;
- } else if (uc == 0xFFFF) {
- dst[0] = (utf8proc_uint8_t)0xFF;
- return 1;
- } else if (uc == 0xFFFE) {
- dst[0] = (utf8proc_uint8_t)0xFE;
- return 1;
} else if (uc < 0x10000) {
dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
@@ -480,7 +478,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
int tbc = property->boundclass;
boundary = grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
if (boundary) {
- if (bufsize >= 1) dst[0] = 0xFFFF;
+ if (bufsize >= 1) dst[0] = -1; /* sentinel value for grapheme break */
if (bufsize >= 2) dst[1] = uc;
return 2;
}
@@ -686,7 +684,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
if (options & UTF8PROC_CHARBOUND) {
for (rpos = 0; rpos < length; rpos++) {
uc = buffer[rpos];
- wpos += unsafe_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
+ wpos += charbound_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
}
} else {
for (rpos = 0; rpos < length; rpos++) {