diff options
author | Vincent Sanders <vince@kyllikki.org> | 2014-11-14 13:39:47 +0000 |
---|---|---|
committer | Vincent Sanders <vince@kyllikki.org> | 2014-11-14 13:39:47 +0000 |
commit | 6b0e4847ddb1d62dffd5d4de4a1240b3fa3afd8a (patch) | |
tree | ebddb89ed063e1e8927ed9b837762f574d34fbf4 | |
parent | c203e4dcb680ec3bdccf5fdf7a496549442c56de (diff) | |
download | libutf8proc-6b0e4847ddb1d62dffd5d4de4a1240b3fa3afd8a.tar.gz libutf8proc-6b0e4847ddb1d62dffd5d4de4a1240b3fa3afd8a.tar.bz2 |
Update with API extension from the NetSurf version
-rw-r--r-- | README | 11 | ||||
-rw-r--r-- | include/libutf8proc/utf8proc.h | 8 | ||||
-rw-r--r-- | src/utf8proc.c | 11 |
3 files changed, 24 insertions, 6 deletions
@@ -6,8 +6,11 @@ conveniance library for NetSurf. Previously this library was simply copied into the NetSurf sources. This takes the unicode 5 capable version 1.1.6 of the library and -converts it to the NetSurf build system. No C source code has been -changed from upstream and all the Makefiles are licenced as per the -utf8proc source. +converts it to the NetSurf build system. additional API has been added +with a normalisation function but there are no data changes from +upstream. -[1] http://www.public-software-group.org/utf8proc
\ No newline at end of file +All the Makefiles and changes are licenced as per the utf8proc +source using the MIT "expat" licence. + +[1] http://www.public-software-group.org/utf8proc diff --git a/include/libutf8proc/utf8proc.h b/include/libutf8proc/utf8proc.h index 24a891b..c074779 100644 --- a/include/libutf8proc/utf8proc.h +++ b/include/libutf8proc/utf8proc.h @@ -81,6 +81,8 @@ extern "C" { #define SSIZE_MAX ((size_t)SIZE_MAX/2) #endif +#define UTF8PROC_CCC_VIRAMA 9 + #define UTF8PROC_NULLTERM (1<<0) #define UTF8PROC_STABLE (1<<1) #define UTF8PROC_COMPAT (1<<2) @@ -326,6 +328,12 @@ ssize_t utf8proc_decompose( * buffer size is returned. */ +ssize_t utf8proc_normalise(int32_t *buffer, ssize_t length, int options); +/* + * Reencodes the sequence of unicode characters given by the pointer + * 'buffer' and 'length'. See utf8proc_reencode for further details. + */ + ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options); /* * Reencodes the sequence of unicode characters given by the pointer diff --git a/src/utf8proc.c b/src/utf8proc.c index ef2d433..3e0b25f 100644 --- a/src/utf8proc.c +++ b/src/utf8proc.c @@ -387,7 +387,7 @@ ssize_t utf8proc_decompose( if (decomp_result < 0) return decomp_result; wpos += decomp_result; /* prohibiting integer overflows due to too long strings: */ - if (wpos < 0 || wpos > SSIZE_MAX/sizeof(int32_t)/2) + if (wpos < 0 || wpos > (ssize_t)(SSIZE_MAX/sizeof(int32_t)/2)) return UTF8PROC_ERROR_OVERFLOW; } } @@ -413,7 +413,7 @@ ssize_t utf8proc_decompose( return wpos; } -ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options) { +ssize_t utf8proc_normalise(int32_t *buffer, ssize_t length, int options) { /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored ASSERT: 'buffer' has one spare byte of free space at the end! */ if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) { @@ -516,6 +516,13 @@ ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options) { } length = wpos; } + return length; +} + +ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options) { + length = utf8proc_normalise(buffer, length, options); + if (length < 0) return length; + { ssize_t rpos, wpos = 0; int32_t uc; |