diff options
author | Michael Drake <mike@smoothartist.com> | 2016-11-21 14:22:39 +0000 |
---|---|---|
committer | Steven G. Johnson <stevenj@mit.edu> | 2016-11-21 09:22:39 -0500 |
commit | 70bbed8626e902d8c1e2b8277b0c61efb8a460bb (patch) | |
tree | e8a3bbc9ba7034cd38be424009b4845688fb9919 /utf8proc.c | |
parent | caef918abd0a9425b3942df3859c7bea7b8986e0 (diff) | |
download | libutf8proc-70bbed8626e902d8c1e2b8277b0c61efb8a460bb.tar.gz libutf8proc-70bbed8626e902d8c1e2b8277b0c61efb8a460bb.tar.bz2 |
Tlsa/ucs4 normalize (#88)
* Split codepoint sequence normalisation out into separate function.
This creates utf8proc_normalize_utf32() which takes and returns
a UTF-32 string, applying the following options:
- UTF8PROC_NLF2LS
- UTF8PROC_NLF2PS
- UTF8PROC_NLF2LF
- UTF8PROC_STRIPCC
- UTF8PROC_COMPOSE
- UTF8PROC_STABLE
The utf8proc_reencode() function has been updated to call the
new utf8proc_normalize_utf32().
* Update code documentation: utf8proc_reencode handles UTF8PROC_CHARBOUND.
Diffstat (limited to 'utf8proc.c')
-rw-r--r-- | utf8proc.c | 13 |
1 files changed, 10 insertions, 3 deletions
@@ -545,9 +545,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( return wpos; } -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { - /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored - ASSERT: 'buffer' has one spare byte of free space at the end! */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { + /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored */ if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) { utf8proc_ssize_t rpos; utf8proc_ssize_t wpos = 0; @@ -655,6 +654,14 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, } length = wpos; } + return length; +} + +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { + /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored + ASSERT: 'buffer' has one spare byte of free space at the end! */ + length = utf8proc_normalize_utf32(buffer, length, options); + if (length < 0) return length; { utf8proc_ssize_t rpos, wpos = 0; utf8proc_int32_t uc; |