summaryrefslogtreecommitdiff
path: root/utf8proc.c
diff options
context:
space:
mode:
authorMichael Drake <mike@smoothartist.com>2016-11-21 14:22:39 +0000
committerSteven G. Johnson <stevenj@mit.edu>2016-11-21 09:22:39 -0500
commit70bbed8626e902d8c1e2b8277b0c61efb8a460bb (patch)
treee8a3bbc9ba7034cd38be424009b4845688fb9919 /utf8proc.c
parentcaef918abd0a9425b3942df3859c7bea7b8986e0 (diff)
downloadlibutf8proc-70bbed8626e902d8c1e2b8277b0c61efb8a460bb.tar.gz
libutf8proc-70bbed8626e902d8c1e2b8277b0c61efb8a460bb.tar.bz2
Tlsa/ucs4 normalize (#88)
* Split codepoint sequence normalisation out into separate function. This creates utf8proc_normalize_utf32() which takes and returns a UTF-32 string, applying the following options: - UTF8PROC_NLF2LS - UTF8PROC_NLF2PS - UTF8PROC_NLF2LF - UTF8PROC_STRIPCC - UTF8PROC_COMPOSE - UTF8PROC_STABLE The utf8proc_reencode() function has been updated to call the new utf8proc_normalize_utf32(). * Update code documentation: utf8proc_reencode handles UTF8PROC_CHARBOUND.
Diffstat (limited to 'utf8proc.c')
-rw-r--r--utf8proc.c13
1 files changed, 10 insertions, 3 deletions
diff --git a/utf8proc.c b/utf8proc.c
index ba5143a..8dc583e 100644
--- a/utf8proc.c
+++ b/utf8proc.c
@@ -545,9 +545,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
return wpos;
}
-UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
- /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
- ASSERT: 'buffer' has one spare byte of free space at the end! */
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
+ /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored */
if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
utf8proc_ssize_t rpos;
utf8proc_ssize_t wpos = 0;
@@ -655,6 +654,14 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
}
length = wpos;
}
+ return length;
+}
+
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
+ /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
+ ASSERT: 'buffer' has one spare byte of free space at the end! */
+ length = utf8proc_normalize_utf32(buffer, length, options);
+ if (length < 0) return length;
{
utf8proc_ssize_t rpos, wpos = 0;
utf8proc_int32_t uc;