summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Sanders <vince@kyllikki.org>2016-01-04 21:40:30 +0000
committerVincent Sanders <vince@kyllikki.org>2016-01-04 21:40:30 +0000
commitbd8060ff57e1dff8cd017bfa63d71d9e038e63bb (patch)
tree828cf4d1be6582f9b9c4cbb14712ed3b3e22bc67
parenta783495a127b7d4cc1a4abae25aba658b38110b4 (diff)
downloadlibutf8proc-vince/v1.3.1.tar.gz
libutf8proc-vince/v1.3.1.tar.bz2
extend API with normalise extensionvince/v1.3.1
-rw-r--r--README4
-rw-r--r--include/libutf8proc/utf8proc.h6
-rw-r--r--src/utf8proc.c11
3 files changed, 17 insertions, 4 deletions
diff --git a/README b/README
index 05c5e14..c16e969 100644
--- a/README
+++ b/README
@@ -6,8 +6,8 @@ conveniance library for NetSurf. Previously this library was simply
copied into the NetSurf sources.
This takes the unicode 5 capable version 1.3.1 of the library and
-converts it to the NetSurf build system there are no data changes from
-upstream.
+converts it to the NetSurf build system. Additional API has been added
+with a normalisation function but there are no data changes from upstream.
All the Makefiles and changes are licenced as per the utf8proc
source using the MIT "expat" licence.
diff --git a/include/libutf8proc/utf8proc.h b/include/libutf8proc/utf8proc.h
index 00f10c8..1731703 100644
--- a/include/libutf8proc/utf8proc.h
+++ b/include/libutf8proc/utf8proc.h
@@ -479,6 +479,12 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
);
/**
+ * Reencodes the sequence of unicode characters given by the pointer
+ * 'buffer' and 'length'. See utf8proc_reencode for further details.
+ */
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalise(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options);
+
+/**
* Reencodes the sequence of `length` codepoints pointed to by `buffer`
* UTF-8 data in-place (i.e., the result is also stored in `buffer`).
*
diff --git a/src/utf8proc.c b/src/utf8proc.c
index c302b79..cf64034 100644
--- a/src/utf8proc.c
+++ b/src/utf8proc.c
@@ -435,7 +435,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
if (decomp_result < 0) return decomp_result;
wpos += decomp_result;
/* prohibiting integer overflows due to too long strings: */
- if (wpos < 0 || wpos > SSIZE_MAX/sizeof(utf8proc_int32_t)/2)
+ if (wpos < 0 || wpos > (ssize_t)(SSIZE_MAX/sizeof(utf8proc_int32_t)/2))
return UTF8PROC_ERROR_OVERFLOW;
}
}
@@ -461,7 +461,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
return wpos;
}
-UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalise(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
/* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
ASSERT: 'buffer' has one spare byte of free space at the end! */
if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
@@ -564,6 +564,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
}
length = wpos;
}
+ return length;
+}
+
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
+ length = utf8proc_normalise(buffer, length, options);
+ if (length < 0) return length;
+
{
utf8proc_ssize_t rpos, wpos = 0;
utf8proc_int32_t uc;