summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteven G. Johnson <stevenj@mit.edu>2015-04-09 11:36:40 -0400
committerSteven G. Johnson <stevenj@mit.edu>2015-04-09 11:36:40 -0400
commit7c14ef5f8371e463a01e0f1de971caa600384390 (patch)
tree859d990277010fe6fa5d7d3e3ed53081cbee9388
parent498ecbddd87f2555a730e90810db7744cf416b82 (diff)
parent0a818c700353194d85baaf6a2f7f6ea32686b922 (diff)
downloadlibutf8proc-7c14ef5f8371e463a01e0f1de971caa600384390.tar.gz
libutf8proc-7c14ef5f8371e463a01e0f1de971caa600384390.tar.bz2
Merge pull request #32 from JuliaLang/tk/ssize_t_typedef
Use a typedef instead of a #define for ssize_t with MSVC
-rw-r--r--data/data_generator.rb8
-rw-r--r--test/graphemetest.c8
-rw-r--r--test/normtest.c2
-rw-r--r--test/tests.h2
-rw-r--r--utf8proc.c130
-rw-r--r--utf8proc.h85
-rw-r--r--utf8proc_data.c8
7 files changed, 126 insertions, 117 deletions
diff --git a/data/data_generator.rb b/data/data_generator.rb
index b24caad..3d8c3b7 100644
--- a/data/data_generator.rb
+++ b/data/data_generator.rb
@@ -268,7 +268,7 @@ for code in 0...0x110000
end
end
-$stdout << "const int32_t utf8proc_sequences[] = {\n "
+$stdout << "const utf8proc_int32_t utf8proc_sequences[] = {\n "
i = 0
$int_array.each do |entry|
i += 1
@@ -280,7 +280,7 @@ $int_array.each do |entry|
end
$stdout << "};\n\n"
-$stdout << "const uint16_t utf8proc_stage1table[] = {\n "
+$stdout << "const utf8proc_uint16_t utf8proc_stage1table[] = {\n "
i = 0
stage1.each do |entry|
i += 1
@@ -292,7 +292,7 @@ stage1.each do |entry|
end
$stdout << "};\n\n"
-$stdout << "const uint16_t utf8proc_stage2table[] = {\n "
+$stdout << "const utf8proc_uint16_t utf8proc_stage2table[] = {\n "
i = 0
stage2.flatten.each do |entry|
i += 1
@@ -311,7 +311,7 @@ properties.each { |line|
}
$stdout << "};\n\n"
-$stdout << "const int32_t utf8proc_combinations[] = {\n "
+$stdout << "const utf8proc_int32_t utf8proc_combinations[] = {\n "
i = 0
comb1st_indicies.keys.each_index do |a|
comb2nd_indicies.keys.each_index do |b|
diff --git a/test/graphemetest.c b/test/graphemetest.c
index 7914dc8..a05b6e2 100644
--- a/test/graphemetest.c
+++ b/test/graphemetest.c
@@ -5,7 +5,7 @@ int main(int argc, char **argv)
char *buf = NULL;
size_t bufsize = 0;
FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL;
- uint8_t src[1024];
+ utf8proc_uint8_t src[1024];
check(f != NULL, "error opening GraphemeBreakTest.txt");
while (getline(&buf, &bufsize, f) > 0) {
@@ -39,10 +39,10 @@ int main(int argc, char **argv)
src[si] = 0; /* NUL-terminate */
if (si) {
- uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */
+ utf8proc_uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */
size_t i = 0, j = 0;
- ssize_t glen;
- uint8_t *g; /* utf8proc_map grapheme results */
+ utf8proc_ssize_t glen;
+ utf8proc_uint8_t *g; /* utf8proc_map grapheme results */
while (i < si) {
if (src[i] != '/')
utf8[j++] = src[i++];
diff --git a/test/normtest.c b/test/normtest.c
index 7add4c6..555c14c 100644
--- a/test/normtest.c
+++ b/test/normtest.c
@@ -1,7 +1,7 @@
#include "tests.h"
#define CHECK_NORM(NRM, norm, src) { \
- char *src_norm = (char*) utf8proc_ ## NRM((uint8_t*) src); \
+ char *src_norm = (char*) utf8proc_ ## NRM((utf8proc_uint8_t*) src); \
check(!strcmp(norm, src_norm), \
"normalization failed for %s -> %s", src, norm); \
free(src_norm); \
diff --git a/test/tests.h b/test/tests.h
index d4897f8..c27185d 100644
--- a/test/tests.h
+++ b/test/tests.h
@@ -47,7 +47,7 @@ size_t encode(char *dest, const char *buf)
}
check(sscanf(buf + i, "%x", &c) == 1, "invalid hex input %s", buf+i);
i = j; /* skip to char after hex input */
- d += utf8proc_encode_char(c, (uint8_t *) (dest + d));
+ d += utf8proc_encode_char(c, (utf8proc_uint8_t *) (dest + d));
} while (1);
}
diff --git a/utf8proc.c b/utf8proc.c
index 0ff1533..a1d0c4b 100644
--- a/utf8proc.c
+++ b/utf8proc.c
@@ -44,7 +44,7 @@
#include "utf8proc_data.c"
-UTF8PROC_DLLEXPORT const int8_t utf8proc_utf8class[256] = {
+UTF8PROC_DLLEXPORT const utf8proc_int8_t utf8proc_utf8class[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -91,7 +91,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
}
-UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode) {
+UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
switch (errcode) {
case UTF8PROC_ERROR_NOMEM:
return "Memory for processing UTF-8 data could not be allocated.";
@@ -108,12 +108,12 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode) {
}
}
-UTF8PROC_DLLEXPORT ssize_t utf8proc_iterate(
- const uint8_t *str, ssize_t strlen, int32_t *dst
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
) {
int length;
int i;
- int32_t uc = -1;
+ utf8proc_int32_t uc = -1;
*dst = -1;
if (!strlen) return 0;
length = utf8proc_utf8class[str[0]];
@@ -148,14 +148,14 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_iterate(
return length;
}
-UTF8PROC_DLLEXPORT bool utf8proc_codepoint_valid(int32_t uc) {
+UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t uc) {
if (uc < 0 || uc >= 0x110000 ||
((uc & 0xFFFF) >= 0xFFFE) || (uc >= 0xD800 && uc < 0xE000) ||
(uc >= 0xFDD0 && uc < 0xFDF0)) return false;
else return true;
}
-UTF8PROC_DLLEXPORT ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst) {
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
if (uc < 0x00) {
return 0;
} else if (uc < 0x80) {
@@ -186,7 +186,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst) {
}
/* internal "unsafe" version that does not check whether uc is in range */
-static const utf8proc_property_t *get_property(int32_t uc) {
+static const utf8proc_property_t *get_property(utf8proc_int32_t uc) {
/* ASSERT: uc >= 0 && uc < 0x110000 */
return utf8proc_properties + (
utf8proc_stage2table[
@@ -195,12 +195,12 @@ static const utf8proc_property_t *get_property(int32_t uc) {
);
}
-UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t uc) {
+UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t uc) {
return uc < 0 || uc >= 0x110000 ? utf8proc_properties : get_property(uc);
}
/* return whether there is a grapheme break between boundclasses lbc and tbc */
-static bool grapheme_break(int lbc, int tbc) {
+static utf8proc_bool grapheme_break(int lbc, int tbc) {
return
(lbc == UTF8PROC_BOUNDCLASS_START) ? true :
(lbc == UTF8PROC_BOUNDCLASS_CR &&
@@ -226,22 +226,22 @@ static bool grapheme_break(int lbc, int tbc) {
}
/* return whether there is a grapheme break between codepoints c1 and c2 */
-UTF8PROC_DLLEXPORT bool utf8proc_grapheme_break(int32_t c1, int32_t c2) {
+UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t c1, utf8proc_int32_t c2) {
return grapheme_break(utf8proc_get_property(c1)->boundclass,
utf8proc_get_property(c2)->boundclass);
}
/* return a character width analogous to wcwidth (except portable and
hopefully less buggy than most system wcwidth functions). */
-UTF8PROC_DLLEXPORT int utf8proc_charwidth(int32_t c) {
+UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) {
return utf8proc_get_property(c)->charwidth;
}
-UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(int32_t c) {
+UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t c) {
return utf8proc_get_property(c)->category;
}
-UTF8PROC_DLLEXPORT const char *utf8proc_category_string(int32_t c) {
+UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
static const char s[][3] = {"Cn","Lu","Ll","Lt","Lm","Lo","Mn","Mc","Me","Nd","Nl","No","Pc","Pd","Ps","Pe","Pi","Pf","Po","Sm","Sc","Sk","So","Zs","Zl","Zp","Cc","Cf","Cs","Co"};
return s[utf8proc_category(c)];
}
@@ -250,17 +250,17 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(int32_t c) {
return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
options & ~UTF8PROC_LUMP, last_boundclass)
-UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
const utf8proc_property_t *property;
utf8proc_propval_t category;
- int32_t hangul_sindex;
+ utf8proc_int32_t hangul_sindex;
if (uc < 0 || uc >= 0x110000) return UTF8PROC_ERROR_NOTASSIGNED;
property = get_property(uc);
category = property->category;
hangul_sindex = uc - UTF8PROC_HANGUL_SBASE;
if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT) {
- int32_t hangul_tindex;
+ utf8proc_int32_t hangul_tindex;
if (bufsize >= 1) {
dst[0] = UTF8PROC_HANGUL_LBASE +
hangul_sindex / UTF8PROC_HANGUL_NCOUNT;
@@ -312,8 +312,8 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssi
}
if (options & UTF8PROC_CASEFOLD) {
if (property->casefold_mapping) {
- const int32_t *casefold_entry;
- ssize_t written = 0;
+ const utf8proc_int32_t *casefold_entry;
+ utf8proc_ssize_t written = 0;
for (casefold_entry = property->casefold_mapping;
*casefold_entry >= 0; casefold_entry++) {
written += utf8proc_decompose_char(*casefold_entry, dst+written,
@@ -327,8 +327,8 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssi
if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
if (property->decomp_mapping &&
(!property->decomp_type || (options & UTF8PROC_COMPAT))) {
- const int32_t *decomp_entry;
- ssize_t written = 0;
+ const utf8proc_int32_t *decomp_entry;
+ utf8proc_ssize_t written = 0;
for (decomp_entry = property->decomp_mapping;
*decomp_entry >= 0; decomp_entry++) {
written += utf8proc_decompose_char(*decomp_entry, dst+written,
@@ -340,7 +340,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssi
}
}
if (options & UTF8PROC_CHARBOUND) {
- bool boundary;
+ utf8proc_bool boundary;
int tbc = property->boundclass;
boundary = grapheme_break(*last_boundclass, tbc);
*last_boundclass = tbc;
@@ -354,21 +354,21 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssi
return 1;
}
-UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose(
- const uint8_t *str, ssize_t strlen,
- int32_t *buffer, ssize_t bufsize, utf8proc_option_t options
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
+ utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
) {
/* strlen will be ignored, if UTF8PROC_NULLTERM is set in options */
- ssize_t wpos = 0;
+ utf8proc_ssize_t wpos = 0;
if ((options & UTF8PROC_COMPOSE) && (options & UTF8PROC_DECOMPOSE))
return UTF8PROC_ERROR_INVALIDOPTS;
if ((options & UTF8PROC_STRIPMARK) &&
!(options & UTF8PROC_COMPOSE) && !(options & UTF8PROC_DECOMPOSE))
return UTF8PROC_ERROR_INVALIDOPTS;
{
- int32_t uc;
- ssize_t rpos = 0;
- ssize_t decomp_result;
+ utf8proc_int32_t uc;
+ utf8proc_ssize_t rpos = 0;
+ utf8proc_ssize_t decomp_result;
int boundclass = UTF8PROC_BOUNDCLASS_START;
while (1) {
if (options & UTF8PROC_NULLTERM) {
@@ -390,14 +390,14 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose(
if (decomp_result < 0) return decomp_result;
wpos += decomp_result;
/* prohibiting integer overflows due to too long strings: */
- if (wpos < 0 || wpos > SSIZE_MAX/sizeof(int32_t)/2)
+ if (wpos < 0 || wpos > SSIZE_MAX/sizeof(utf8proc_int32_t)/2)
return UTF8PROC_ERROR_OVERFLOW;
}
}
if ((options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) && bufsize >= wpos) {
- ssize_t pos = 0;
+ utf8proc_ssize_t pos = 0;
while (pos < wpos-1) {
- int32_t uc1, uc2;
+ utf8proc_int32_t uc1, uc2;
const utf8proc_property_t *property1, *property2;
uc1 = buffer[pos];
uc2 = buffer[pos+1];
@@ -416,13 +416,13 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose(
return wpos;
}
-UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, utf8proc_option_t options) {
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
/* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
ASSERT: 'buffer' has one spare byte of free space at the end! */
if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
- ssize_t rpos;
- ssize_t wpos = 0;
- int32_t uc;
+ utf8proc_ssize_t rpos;
+ utf8proc_ssize_t wpos = 0;
+ utf8proc_int32_t uc;
for (rpos = 0; rpos < length; rpos++) {
uc = buffer[rpos];
if (uc == 0x000D && rpos < length-1 && buffer[rpos+1] == 0x000A) rpos++;
@@ -451,23 +451,23 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, ut
length = wpos;
}
if (options & UTF8PROC_COMPOSE) {
- int32_t *starter = NULL;
- int32_t current_char;
+ utf8proc_int32_t *starter = NULL;
+ utf8proc_int32_t current_char;
const utf8proc_property_t *starter_property = NULL, *current_property;
utf8proc_propval_t max_combining_class = -1;
- ssize_t rpos;
- ssize_t wpos = 0;
- int32_t composition;
+ utf8proc_ssize_t rpos;
+ utf8proc_ssize_t wpos = 0;
+ utf8proc_int32_t composition;
for (rpos = 0; rpos < length; rpos++) {
current_char = buffer[rpos];
current_property = get_property(current_char);
if (starter && current_property->combining_class > max_combining_class) {
/* combination perhaps possible */
- int32_t hangul_lindex;
- int32_t hangul_sindex;
+ utf8proc_int32_t hangul_lindex;
+ utf8proc_int32_t hangul_sindex;
hangul_lindex = *starter - UTF8PROC_HANGUL_LBASE;
if (hangul_lindex >= 0 && hangul_lindex < UTF8PROC_HANGUL_LCOUNT) {
- int32_t hangul_vindex;
+ utf8proc_int32_t hangul_vindex;
hangul_vindex = current_char - UTF8PROC_HANGUL_VBASE;
if (hangul_vindex >= 0 && hangul_vindex < UTF8PROC_HANGUL_VCOUNT) {
*starter = UTF8PROC_HANGUL_SBASE +
@@ -480,7 +480,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, ut
hangul_sindex = *starter - UTF8PROC_HANGUL_SBASE;
if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT &&
(hangul_sindex % UTF8PROC_HANGUL_TCOUNT) == 0) {
- int32_t hangul_tindex;
+ utf8proc_int32_t hangul_tindex;
hangul_tindex = current_char - UTF8PROC_HANGUL_TBASE;
if (hangul_tindex >= 0 && hangul_tindex < UTF8PROC_HANGUL_TCOUNT) {
*starter += hangul_tindex;
@@ -520,26 +520,26 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, ut
length = wpos;
}
{
- ssize_t rpos, wpos = 0;
- int32_t uc;
+ utf8proc_ssize_t rpos, wpos = 0;
+ utf8proc_int32_t uc;
for (rpos = 0; rpos < length; rpos++) {
uc = buffer[rpos];
- wpos += utf8proc_encode_char(uc, ((uint8_t *)buffer) + wpos);
+ wpos += utf8proc_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
}
- ((uint8_t *)buffer)[wpos] = 0;
+ ((utf8proc_uint8_t *)buffer)[wpos] = 0;
return wpos;
}
}
-UTF8PROC_DLLEXPORT ssize_t utf8proc_map(
- const uint8_t *str, ssize_t strlen, uint8_t **dstptr, utf8proc_option_t options
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
) {
- int32_t *buffer;
- ssize_t result;
+ utf8proc_int32_t *buffer;
+ utf8proc_ssize_t result;
*dstptr = NULL;
result = utf8proc_decompose(str, strlen, NULL, 0, options);
if (result < 0) return result;
- buffer = (int32_t *) malloc(result * sizeof(int32_t) + 1);
+ buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1);
if (!buffer) return UTF8PROC_ERROR_NOMEM;
result = utf8proc_decompose(str, strlen, buffer, result, options);
if (result < 0) {
@@ -552,37 +552,37 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_map(
return result;
}
{
- int32_t *newptr;
- newptr = (int32_t *) realloc(buffer, (size_t)result+1);
+ utf8proc_int32_t *newptr;
+ newptr = (utf8proc_int32_t *) realloc(buffer, (size_t)result+1);
if (newptr) buffer = newptr;
}
- *dstptr = (uint8_t *)buffer;
+ *dstptr = (utf8proc_uint8_t *)buffer;
return result;
}
-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFD(const uint8_t *str) {
- uint8_t *retval;
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str) {
+ utf8proc_uint8_t *retval;
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_DECOMPOSE);
return retval;
}
-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFC(const uint8_t *str) {
- uint8_t *retval;
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str) {
+ utf8proc_uint8_t *retval;
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_COMPOSE);
return retval;
}
-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFKD(const uint8_t *str) {
- uint8_t *retval;
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str) {
+ utf8proc_uint8_t *retval;
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
return retval;
}
-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFKC(const uint8_t *str) {
- uint8_t *retval;
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str) {
+ utf8proc_uint8_t *retval;
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
return retval;
diff --git a/utf8proc.h b/utf8proc.h
index e089f28..69c3213 100644
--- a/utf8proc.h
+++ b/utf8proc.h
@@ -77,23 +77,32 @@
#include <stdlib.h>
#include <sys/types.h>
#ifdef _MSC_VER
-typedef signed char int8_t;
-typedef unsigned char uint8_t;
-typedef short int16_t;
-typedef unsigned short uint16_t;
-typedef int int32_t;
+typedef signed char utf8proc_int8_t;
+typedef unsigned char utf8proc_uint8_t;
+typedef short utf8proc_int16_t;
+typedef unsigned short utf8proc_uint16_t;
+typedef int utf8proc_int32_t;
# ifdef _WIN64
-# define ssize_t __int64
+typedef __int64 utf8proc_ssize_t;
# else
-# define ssize_t int
+typedef int utf8proc_ssize_t;
# endif
# ifndef __cplusplus
-typedef unsigned char bool;
+typedef unsigned char utf8proc_bool;
enum {false, true};
+# else
+typedef bool utf8proc_bool;
# endif
#else
# include <stdbool.h>
# include <inttypes.h>
+typedef int8_t utf8proc_int8_t;
+typedef uint8_t utf8proc_uint8_t;
+typedef int16_t utf8proc_int16_t;
+typedef uint16_t utf8proc_uint16_t;
+typedef int32_t utf8proc_int32_t;
+typedef ssize_t utf8proc_ssize_t;
+typedef bool utf8proc_bool;
#endif
#include <limits.h>
@@ -203,7 +212,7 @@ typedef enum {
/* @name Types */
/** Holds the value of a property. */
-typedef int16_t utf8proc_propval_t;
+typedef utf8proc_int16_t utf8proc_propval_t;
/** Struct containing information about a codepoint. */
typedef struct utf8proc_property_struct {
@@ -223,13 +232,13 @@ typedef struct utf8proc_property_struct {
* @see utf8proc_decomp_type_t.
*/
utf8proc_propval_t decomp_type;
- const int32_t *decomp_mapping;
- const int32_t *casefold_mapping;
- int32_t uppercase_mapping;
- int32_t lowercase_mapping;
- int32_t titlecase_mapping;
- int32_t comb1st_index;
- int32_t comb2nd_index;
+ const utf8proc_int32_t *decomp_mapping;
+ const utf8proc_int32_t *casefold_mapping;
+ utf8proc_int32_t uppercase_mapping;
+ utf8proc_int32_t lowercase_mapping;
+ utf8proc_int32_t titlecase_mapping;
+ utf8proc_int32_t comb1st_index;
+ utf8proc_int32_t comb2nd_index;
unsigned bidi_mirrored:1;
unsigned comp_exclusion:1;
/**
@@ -351,7 +360,7 @@ typedef enum {
* Array containing the byte lengths of a UTF-8 encoded codepoint based
* on the first byte.
*/
-UTF8PROC_DLLEXPORT extern const int8_t utf8proc_utf8class[256];
+UTF8PROC_DLLEXPORT extern const utf8proc_int8_t utf8proc_utf8class[256];
/**
* Returns the utf8proc API version as a string MAJOR.MINOR.PATCH
@@ -364,7 +373,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void);
* Returns an informative error string for the given utf8proc error code
* (e.g. the error codes returned by @ref utf8proc_map).
*/
-UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode);
+UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode);
/**
* Reads a single codepoint from the UTF-8 sequence being pointed to by `str`.
@@ -376,7 +385,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode);
* In case of success, the number of bytes read is returned; otherwise, a
* negative error code is returned.
*/
-UTF8PROC_DLLEXPORT ssize_t utf8proc_iterate(const uint8_t *str, ssize_t strlen, int32_t *codepoint_ref);
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref);
/**
* Check if a codepoint is valid (regardless of whether it has been
@@ -384,7 +393,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_iterate(const uint8_t *str, ssize_t strlen,
*
* @return 1 if the given `codepoint` is valid and otherwise return 0.
*/
-UTF8PROC_DLLEXPORT bool utf8proc_codepoint_valid(int32_t codepoint);
+UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codepoint);
/**
* Encodes the codepoint as an UTF-8 string in the byte array pointed
@@ -395,7 +404,7 @@ UTF8PROC_DLLEXPORT bool utf8proc_codepoint_valid(int32_t codepoint);
*
* This function does not check whether `codepoint` is valid Unicode.
*/
-UTF8PROC_DLLEXPORT ssize_t utf8proc_encode_char(int32_t codepoint, uint8_t *dst);
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst);
/**
* Look up the properties for a given codepoint.
@@ -409,7 +418,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_encode_char(int32_t codepoint, uint8_t *dst)
* If the codepoint is unassigned or invalid, a pointer to a special struct is
* returned in which `category` is 0 (@ref UTF8PROC_CATEGORY_CN).
*/
-UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t codepoint);
+UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t codepoint);
/** Decompose a codepoint into an array of codepoints.
*
@@ -438,8 +447,8 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t code
* required buffer size is returned, while the buffer will be overwritten with
* undefined data.
*/
-UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(
- int32_t codepoint, int32_t *dst, ssize_t bufsize,
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(
+ utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize,
utf8proc_option_t options, int *last_boundclass
);
@@ -459,9 +468,9 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(
* required buffer size is returned, while the buffer will be overwritten with
* undefined data.
*/
-UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose(
- const uint8_t *str, ssize_t strlen,
- int32_t *buffer, ssize_t bufsize, utf8proc_option_t options
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
+ utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
);
/**
@@ -489,13 +498,13 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose(
* entries of the array pointed to by `str` have to be in the
* range `0x0000` to `0x10FFFF`. Otherwise, the program might crash!
*/
-UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, utf8proc_option_t options);
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options);
/**
* Given a pair of consecutive codepoints, return whether a grapheme break is
* permitted between them (as defined by the extended grapheme clusters in UAX#29).
*/
-UTF8PROC_DLLEXPORT bool utf8proc_grapheme_break(int32_t codepoint1, int32_t codepoint2);
+UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2);
/**
* Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
@@ -505,19 +514,19 @@ UTF8PROC_DLLEXPORT bool utf8proc_grapheme_break(int32_t codepoint1, int32_t code
* @note
* If you want to check for particular types of non-printable characters,
* (analogous to `isprint` or `iscntrl`), use @ref utf8proc_category. */
-UTF8PROC_DLLEXPORT int utf8proc_charwidth(int32_t codepoint);
+UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t codepoint);
/**
* Return the Unicode category for the codepoint (one of the
* @ref utf8proc_category_t constants.)
*/
-UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(int32_t codepoint);
+UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t codepoint);
/**
* Return the two-letter (nul-terminated) Unicode category string for
* the codepoint (e.g. `"Lu"` or `"Co"`).
*/
-UTF8PROC_DLLEXPORT const char *utf8proc_category_string(int32_t codepoint);
+UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoint);
/**
* Maps the given UTF-8 string pointed to by `str` to a new UTF-8
@@ -537,8 +546,8 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(int32_t codepoint);
* @note The memory of the new UTF-8 string will have been allocated
* with `malloc`, and should therefore be deallocated with `free`.
*/
-UTF8PROC_DLLEXPORT ssize_t utf8proc_map(
- const uint8_t *str, ssize_t strlen, uint8_t **dstptr, utf8proc_option_t options
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
);
/** @name Unicode normalization
@@ -550,13 +559,13 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_map(
*/
/** @{ */
/** NFD normalization (@ref UTF8PROC_DECOMPOSE). */
-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFD(const uint8_t *str);
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str);
/** NFC normalization (@ref UTF8PROC_COMPOSE). */
-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFC(const uint8_t *str);
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str);
/** NFD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */
-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFKD(const uint8_t *str);
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str);
/** NFD normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
-UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFKC(const uint8_t *str);
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
/** @} */
#ifdef __cplusplus
diff --git a/utf8proc_data.c b/utf8proc_data.c
index 75af6ee..502a013 100644
--- a/utf8proc_data.c
+++ b/utf8proc_data.c
@@ -1,4 +1,4 @@
-const int32_t utf8proc_sequences[] = {
+const utf8proc_int32_t utf8proc_sequences[] = {
97, -1, 98, -1, 99, -1, 100,
-1, 101, -1, 102, -1, 103, -1, 104,
-1, 105, -1, 106, -1, 107, -1, 108,
@@ -1523,7 +1523,7 @@ const int32_t utf8proc_sequences[] = {
172689, -1, 19798, -1, 40702, -1, 40709, -1,
40719, -1, 40726, -1, 173568, -1, };
-const uint16_t utf8proc_stage1table[] = {
+const utf8proc_uint16_t utf8proc_stage1table[] = {
0, 256, 512, 768, 1024, 1280, 1536,
1792, 2048, 2304, 2560, 2816, 3072, 3328, 3584,
3840, 4096, 4352, 4608, 4864, 5120, 5376, 5632,
@@ -2070,7 +2070,7 @@ const uint16_t utf8proc_stage1table[] = {
18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432,
35584, };
-const uint16_t utf8proc_stage2table[] = {
+const utf8proc_uint16_t utf8proc_stage2table[] = {
1, 2, 2, 2, 2, 2, 2,
2, 2, 3, 4, 3, 5, 6, 2,
2, 2, 2, 2, 2, 2, 2, 2,
@@ -13003,7 +13003,7 @@ const utf8proc_property_t utf8proc_properties[] = {
{UTF8PROC_CATEGORY_LO, 0, UTF8PROC_BIDI_CLASS_L, 0, utf8proc_sequences + 12179, NULL, -1, -1, -1, -1, -1, false, false, false, false, UTF8PROC_BOUNDCLASS_OTHER, 2},
};
-const int32_t utf8proc_combinations[] = {
+const utf8proc_int32_t utf8proc_combinations[] = {
192, 193, 194, 195, 196, 197, -1,
256, 258, 260, 550, 461, -1, -1, 512,
514, -1, -1, -1, -1, -1, -1, -1,