From b4621f43c3b8aaa5636cb129cd0f2e0f8cc81889 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 30 Nov 2016 10:40:26 -0500 Subject: new utf8proc_map_custom for hooking in user-defined custom mappings (#89) * new utf8proc_map_custom for hooking in user-defined custom mappings * whoops, add test program * NEWS, version bump for 2.1 * change test functions to static so that gcc doesn't complain about missing prototypes --- utf8proc.h | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) (limited to 'utf8proc.h') diff --git a/utf8proc.h b/utf8proc.h index 9d1f782..96328fb 100644 --- a/utf8proc.h +++ b/utf8proc.h @@ -71,9 +71,9 @@ /** The MAJOR version number (increased when backwards API compatibility is broken). */ #define UTF8PROC_VERSION_MAJOR 2 /** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */ -#define UTF8PROC_VERSION_MINOR 0 +#define UTF8PROC_VERSION_MINOR 1 /** The PATCH version (increased for fixes that do not change the API). */ -#define UTF8PROC_VERSION_PATCH 2 +#define UTF8PROC_VERSION_PATCH 0 /** @} */ #include @@ -373,6 +373,13 @@ typedef enum { UTF8PROC_BOUNDCLASS_E_BASE_GAZ = 18, /**< E_BASE + GLUE_AFTER_ZJW */ } utf8proc_boundclass_t; +/** + * Function pointer type passed to @ref utf8proc_map_custom and + * @ref utf8proc_decompose_custom, which is used to specify a user-defined + * mapping of codepoints to be applied in conjunction with other mappings. + */ +typedef utf8proc_int32_t (*utf8proc_custom_func)(utf8proc_int32_t codepoint, void *data); + /** * Array containing the byte lengths of a UTF-8 encoded codepoint based * on the first byte. @@ -480,6 +487,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char( * `buffer` (which must contain at least `bufsize` entries). In case of * success, the number of codepoints written is returned; in case of an * error, a negative error code is returned (@ref utf8proc_errmsg). + * See @ref utf8proc_decompose_custom to supply additional transformations. * * If the number of written codepoints would be bigger than `bufsize`, the * required buffer size is returned, while the buffer will be overwritten with @@ -490,6 +498,18 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options ); +/** + * The same as @ref utf8proc_decompose, but also takes a `custom_func` mapping function + * that is called on each codepoint in `str` before any other transformations + * (along with a `custom_data` pointer that is passed through to `custom_func`). + * The `custom_func` argument is ignored if it is `NULL`. See also @ref utf8proc_map_custom. + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom( + const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, + utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options, + utf8proc_custom_func custom_func, void *custom_data +); + /** * Normalizes the sequence of `length` codepoints pointed to by `buffer` * in-place (i.e., the result is also stored in `buffer`). @@ -623,7 +643,8 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi * in any case the result will be NULL terminated (though it might * contain NULL characters with the string if `str` contained NULL * characters). Other flags in the `options` field are passed to the - * functions defined above, and regarded as described. + * functions defined above, and regarded as described. See also + * @ref utfproc_map_custom to supply a custom codepoint transformation. * * In case of success the length of the new string is returned, * otherwise a negative error code is returned. @@ -635,6 +656,17 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options ); +/** + * Like @ref utf8proc_map, but also takes a `custom_func` mapping function + * that is called on each codepoint in `str` before any other transformations + * (along with a `custom_data` pointer that is passed through to `custom_func`). + * The `custom_func` argument is ignored if it is `NULL`. + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom( + const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options, + utf8proc_custom_func custom_func, void *custom_data +); + /** @name Unicode normalization * * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or NFKC -- cgit v1.2.3