From bd8060ff57e1dff8cd017bfa63d71d9e038e63bb Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Mon, 4 Jan 2016 21:40:30 +0000 Subject: extend API with normalise extension --- README | 4 ++-- include/libutf8proc/utf8proc.h | 6 ++++++ src/utf8proc.c | 11 +++++++++-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/README b/README index 05c5e14..c16e969 100644 --- a/README +++ b/README @@ -6,8 +6,8 @@ conveniance library for NetSurf. Previously this library was simply copied into the NetSurf sources. This takes the unicode 5 capable version 1.3.1 of the library and -converts it to the NetSurf build system there are no data changes from -upstream. +converts it to the NetSurf build system. Additional API has been added +with a normalisation function but there are no data changes from upstream. All the Makefiles and changes are licenced as per the utf8proc source using the MIT "expat" licence. diff --git a/include/libutf8proc/utf8proc.h b/include/libutf8proc/utf8proc.h index 00f10c8..1731703 100644 --- a/include/libutf8proc/utf8proc.h +++ b/include/libutf8proc/utf8proc.h @@ -478,6 +478,12 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options ); +/** + * Reencodes the sequence of unicode characters given by the pointer + * 'buffer' and 'length'. See utf8proc_reencode for further details. + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalise(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options); + /** * Reencodes the sequence of `length` codepoints pointed to by `buffer` * UTF-8 data in-place (i.e., the result is also stored in `buffer`). diff --git a/src/utf8proc.c b/src/utf8proc.c index c302b79..cf64034 100644 --- a/src/utf8proc.c +++ b/src/utf8proc.c @@ -435,7 +435,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( if (decomp_result < 0) return decomp_result; wpos += decomp_result; /* prohibiting integer overflows due to too long strings: */ - if (wpos < 0 || wpos > SSIZE_MAX/sizeof(utf8proc_int32_t)/2) + if (wpos < 0 || wpos > (ssize_t)(SSIZE_MAX/sizeof(utf8proc_int32_t)/2)) return UTF8PROC_ERROR_OVERFLOW; } } @@ -461,7 +461,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( return wpos; } -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalise(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored ASSERT: 'buffer' has one spare byte of free space at the end! */ if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) { @@ -564,6 +564,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, } length = wpos; } + return length; +} + +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { + length = utf8proc_normalise(buffer, length, options); + if (length < 0) return length; + { utf8proc_ssize_t rpos, wpos = 0; utf8proc_int32_t uc; -- cgit v1.2.3