summaryrefslogtreecommitdiff
path: root/mojibake.h
diff options
context:
space:
mode:
authorSteven G. Johnson <stevenj@mit.edu>2014-12-14 08:47:06 -0500
committerSteven G. Johnson <stevenj@mit.edu>2014-12-14 08:47:06 -0500
commit4f70bbe78033345e575c425c822ca53b23403fcb (patch)
tree356ead4341719ec86fde9846dbde726b219298e2 /mojibake.h
parent0f9e9796e6c2a3400e2c9149ceb483cc03111aee (diff)
parent397a1eabea5d7bca2f5f9831ac9431b5b85017fc (diff)
downloadlibutf8proc-4f70bbe78033345e575c425c822ca53b23403fcb.tar.gz
libutf8proc-4f70bbe78033345e575c425c822ca53b23403fcb.tar.bz2
Merge pull request #20 from JuliaLang/graphemes
Update graphemes for Unicode 7
Diffstat (limited to 'mojibake.h')
-rw-r--r--mojibake.h27
1 files changed, 24 insertions, 3 deletions
diff --git a/mojibake.h b/mojibake.h
index fa93c8d..ce97d36 100644
--- a/mojibake.h
+++ b/mojibake.h
@@ -170,17 +170,17 @@ typedef struct utf8proc_property_struct {
utf8proc_propval_t bidi_class;
utf8proc_propval_t decomp_type;
const int32_t *decomp_mapping;
- unsigned bidi_mirrored:1;
+ const int32_t *casefold_mapping;
int32_t uppercase_mapping;
int32_t lowercase_mapping;
int32_t titlecase_mapping;
int32_t comb1st_index;
int32_t comb2nd_index;
+ unsigned bidi_mirrored:1;
unsigned comp_exclusion:1;
unsigned ignorable:1;
unsigned control_boundary:1;
- unsigned extend:1;
- const int32_t *casefold_mapping;
+ unsigned boundclass:4;
} utf8proc_property_t;
#define UTF8PROC_CATEGORY_CN 0
@@ -253,6 +253,21 @@ typedef struct utf8proc_property_struct {
#define UTF8PROC_DECOMP_TYPE_FRACTION 15
#define UTF8PROC_DECOMP_TYPE_COMPAT 16
+/* values for boundclass property: */
+#define UTF8PROC_BOUNDCLASS_START 0
+#define UTF8PROC_BOUNDCLASS_OTHER 1
+#define UTF8PROC_BOUNDCLASS_CR 2
+#define UTF8PROC_BOUNDCLASS_LF 3
+#define UTF8PROC_BOUNDCLASS_CONTROL 4
+#define UTF8PROC_BOUNDCLASS_EXTEND 5
+#define UTF8PROC_BOUNDCLASS_L 6
+#define UTF8PROC_BOUNDCLASS_V 7
+#define UTF8PROC_BOUNDCLASS_T 8
+#define UTF8PROC_BOUNDCLASS_LV 9
+#define UTF8PROC_BOUNDCLASS_LVT 10
+#define UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR 11
+#define UTF8PROC_BOUNDCLASS_SPACINGMARK 12
+
DLLEXPORT extern const int8_t utf8proc_utf8class[256];
DLLEXPORT const char *utf8proc_version(void);
@@ -367,6 +382,12 @@ DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options
* crash!
*/
+DLLEXPORT bool utf8proc_grapheme_break(int32_t c1, int32_t c2);
+/*
+ * Given a pair of consecutive codepoints (c1,c2), return whether a grapheme break is
+ * permitted between them (as defined by the extended grapheme clusters in UAX#29).
+ */
+
DLLEXPORT ssize_t utf8proc_map(
const uint8_t *str, ssize_t strlen, uint8_t **dstptr, int options
);