summaryrefslogtreecommitdiff
path: root/utf8proc.c
diff options
context:
space:
mode:
Diffstat (limited to 'utf8proc.c')
-rw-r--r--utf8proc.c61
1 files changed, 31 insertions, 30 deletions
diff --git a/utf8proc.c b/utf8proc.c
index c66b1ae..133685d 100644
--- a/utf8proc.c
+++ b/utf8proc.c
@@ -1,3 +1,4 @@
+/* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */
/*
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
*
@@ -87,7 +88,7 @@ DLLEXPORT const int8_t utf8proc_utf8class[256] = {
#define STRINGIZEx(x) #x
#define STRINGIZE(x) STRINGIZEx(x)
DLLEXPORT const char *utf8proc_version(void) {
- return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "-dev";
+ return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "-dev";
}
DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode) {
@@ -195,54 +196,54 @@ static const utf8proc_property_t *get_property(int32_t uc) {
}
DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t uc) {
- return uc < 0 || uc >= 0x110000 ? utf8proc_properties : get_property(uc);
+ return uc < 0 || uc >= 0x110000 ? utf8proc_properties : get_property(uc);
}
/* return whether there is a grapheme break between boundclasses lbc and tbc */
static bool grapheme_break(int lbc, int tbc) {
- return
- (lbc == UTF8PROC_BOUNDCLASS_START) ? true :
- (lbc == UTF8PROC_BOUNDCLASS_CR &&
- tbc == UTF8PROC_BOUNDCLASS_LF) ? false :
- (lbc >= UTF8PROC_BOUNDCLASS_CR && lbc <= UTF8PROC_BOUNDCLASS_CONTROL) ? true :
- (tbc >= UTF8PROC_BOUNDCLASS_CR && tbc <= UTF8PROC_BOUNDCLASS_CONTROL) ? true :
- (tbc == UTF8PROC_BOUNDCLASS_EXTEND) ? false :
- (lbc == UTF8PROC_BOUNDCLASS_L &&
- (tbc == UTF8PROC_BOUNDCLASS_L ||
- tbc == UTF8PROC_BOUNDCLASS_V ||
- tbc == UTF8PROC_BOUNDCLASS_LV ||
- tbc == UTF8PROC_BOUNDCLASS_LVT)) ? false :
- ((lbc == UTF8PROC_BOUNDCLASS_LV ||
- lbc == UTF8PROC_BOUNDCLASS_V) &&
- (tbc == UTF8PROC_BOUNDCLASS_V ||
- tbc == UTF8PROC_BOUNDCLASS_T)) ? false :
- ((lbc == UTF8PROC_BOUNDCLASS_LVT ||
- lbc == UTF8PROC_BOUNDCLASS_T) &&
- tbc == UTF8PROC_BOUNDCLASS_T) ? false :
- (lbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR &&
- tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR) ? false :
- (tbc != UTF8PROC_BOUNDCLASS_SPACINGMARK);
+ return
+ (lbc == UTF8PROC_BOUNDCLASS_START) ? true :
+ (lbc == UTF8PROC_BOUNDCLASS_CR &&
+ tbc == UTF8PROC_BOUNDCLASS_LF) ? false :
+ (lbc >= UTF8PROC_BOUNDCLASS_CR && lbc <= UTF8PROC_BOUNDCLASS_CONTROL) ? true :
+ (tbc >= UTF8PROC_BOUNDCLASS_CR && tbc <= UTF8PROC_BOUNDCLASS_CONTROL) ? true :
+ (tbc == UTF8PROC_BOUNDCLASS_EXTEND) ? false :
+ (lbc == UTF8PROC_BOUNDCLASS_L &&
+ (tbc == UTF8PROC_BOUNDCLASS_L ||
+ tbc == UTF8PROC_BOUNDCLASS_V ||
+ tbc == UTF8PROC_BOUNDCLASS_LV ||
+ tbc == UTF8PROC_BOUNDCLASS_LVT)) ? false :
+ ((lbc == UTF8PROC_BOUNDCLASS_LV ||
+ lbc == UTF8PROC_BOUNDCLASS_V) &&
+ (tbc == UTF8PROC_BOUNDCLASS_V ||
+ tbc == UTF8PROC_BOUNDCLASS_T)) ? false :
+ ((lbc == UTF8PROC_BOUNDCLASS_LVT ||
+ lbc == UTF8PROC_BOUNDCLASS_T) &&
+ tbc == UTF8PROC_BOUNDCLASS_T) ? false :
+ (lbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR &&
+ tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR) ? false :
+ (tbc != UTF8PROC_BOUNDCLASS_SPACINGMARK);
}
/* return whether there is a grapheme break between codepoints c1 and c2 */
DLLEXPORT bool utf8proc_grapheme_break(int32_t c1, int32_t c2) {
- return grapheme_break(utf8proc_get_property(c1)->boundclass,
- utf8proc_get_property(c2)->boundclass);
+ return grapheme_break(utf8proc_get_property(c1)->boundclass,
+ utf8proc_get_property(c2)->boundclass);
}
/* return a character width analogous to wcwidth (except portable and
hopefully less buggy than most system wcwidth functions). */
DLLEXPORT int utf8proc_charwidth(int32_t c) {
- return utf8proc_get_property(c)->charwidth;
+ return utf8proc_get_property(c)->charwidth;
}
DLLEXPORT utf8proc_category_t utf8proc_category(int32_t c) {
- return utf8proc_get_property(c)->category;
+ return utf8proc_get_property(c)->category;
}
DLLEXPORT const char *utf8proc_category_string(int32_t c) {
- static const char s[][3] = {"Cn","Lu","Ll","Lt","Lm","Lo","Mn","Mc","Me","Nd","Nl","No","Pc","Pd","Ps","Pe","Pi","Pf","Po","Sm","Sc","Sk","So","Zs","Zl","Zp","Cc","Cf","Cs","Co"};
- return s[utf8proc_category(c)];
+ static const char s[][3] = {"Cn","Lu","Ll","Lt","Lm","Lo","Mn","Mc","Me","Nd","Nl","No","Pc","Pd","Ps","Pe","Pi","Pf","Po","Sm","Sc","Sk","So","Zs","Zl","Zp","Cc","Cf","Cs","Co"};
+ return s[utf8proc_category(c)];
}
#define utf8proc_decompose_lump(replacement_uc) \