From 70bbed8626e902d8c1e2b8277b0c61efb8a460bb Mon Sep 17 00:00:00 2001 From: Michael Drake Date: Mon, 21 Nov 2016 14:22:39 +0000 Subject: Tlsa/ucs4 normalize (#88) * Split codepoint sequence normalisation out into separate function. This creates utf8proc_normalize_utf32() which takes and returns a UTF-32 string, applying the following options: - UTF8PROC_NLF2LS - UTF8PROC_NLF2PS - UTF8PROC_NLF2LF - UTF8PROC_STRIPCC - UTF8PROC_COMPOSE - UTF8PROC_STABLE The utf8proc_reencode() function has been updated to call the new utf8proc_normalize_utf32(). * Update code documentation: utf8proc_reencode handles UTF8PROC_CHARBOUND. --- utf8proc.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'utf8proc.c') diff --git a/utf8proc.c b/utf8proc.c index ba5143a..8dc583e 100644 --- a/utf8proc.c +++ b/utf8proc.c @@ -545,9 +545,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( return wpos; } -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { - /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored - ASSERT: 'buffer' has one spare byte of free space at the end! */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { + /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored */ if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) { utf8proc_ssize_t rpos; utf8proc_ssize_t wpos = 0; @@ -655,6 +654,14 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, } length = wpos; } + return length; +} + +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { + /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored + ASSERT: 'buffer' has one spare byte of free space at the end! */ + length = utf8proc_normalize_utf32(buffer, length, options); + if (length < 0) return length; { utf8proc_ssize_t rpos, wpos = 0; utf8proc_int32_t uc; -- cgit v1.2.3