summaryrefslogtreecommitdiff
path: root/utf8proc.h
diff options
context:
space:
mode:
authorSteven G. Johnson <stevenj@mit.edu>2015-03-08 17:23:43 -0400
committerSteven G. Johnson <stevenj@mit.edu>2015-03-12 12:10:19 -0400
commita4c84d2063bb6f2218bc20f89e59001319361f6d (patch)
treebf9799783e0f66c67f0303ea13a778fe51eff539 /utf8proc.h
parent08f101a9e8c6a72dfdb1c9b913df880e13a36333 (diff)
downloadlibutf8proc-a4c84d2063bb6f2218bc20f89e59001319361f6d.tar.gz
libutf8proc-a4c84d2063bb6f2218bc20f89e59001319361f6d.tar.bz2
fix #2: add charwidth function
Diffstat (limited to 'utf8proc.h')
-rw-r--r--utf8proc.h16
1 files changed, 16 insertions, 0 deletions
diff --git a/utf8proc.h b/utf8proc.h
index 0d647db..360f919 100644
--- a/utf8proc.h
+++ b/utf8proc.h
@@ -181,6 +181,7 @@ typedef struct utf8proc_property_struct {
unsigned ignorable:1;
unsigned control_boundary:1;
unsigned boundclass:4;
+ unsigned charwidth:2;
} utf8proc_property_t;
#define UTF8PROC_CATEGORY_CN 0
@@ -388,6 +389,21 @@ DLLEXPORT bool utf8proc_grapheme_break(int32_t c1, int32_t c2);
* permitted between them (as defined by the extended grapheme clusters in UAX#29).
*/
+DLLEXPORT int utf8proc_charwidth(int32_t c);
+/* Given a codepoint c, return a character width analogous to wcwidth(c),
+ except that a width of 0 is returned for non-printable characters
+ instead of -1 as in wcwidth.
+
+ If you want to check for particular types of non-printable characters,
+ (analogous to isprint or iscntrl), use utf8proc_category(c). */
+
+DLLEXPORT int utf8proc_category(int32_t c);
+/* Return the Unicode character category for c (one of the
+ UTF8PROC_CATEGORY_* constants.) */
+
+DLLEXPORT const char *utf8proc_category_string(int32_t c);
+/* Return the two-letter Unicode category string for c (e.g. "Lu" or "Co"). */
+
DLLEXPORT ssize_t utf8proc_map(
const uint8_t *str, ssize_t strlen, uint8_t **dstptr, int options
);