diff options
-rw-r--r-- | include/dom/core/string.h | 8 | ||||
-rw-r--r-- | src/core/string.c | 116 |
2 files changed, 123 insertions, 1 deletions
diff --git a/include/dom/core/string.h b/include/dom/core/string.h index b9b41a9..f2a6122 100644 --- a/include/dom/core/string.h +++ b/include/dom/core/string.h @@ -102,6 +102,14 @@ dom_exception dom_string_replace(dom_string *target, dom_string *source, uint32_t i1, uint32_t i2, dom_string **result); +/* Generate an uppercase version of the given string */ +dom_exception dom_string_toupper(dom_string *source, bool ascii_only, + dom_string **upper); + +/* Generate an lowercase version of the given string */ +dom_exception dom_string_tolower(dom_string *source, bool ascii_only, + dom_string **lower); + /* Calculate a hash value from a dom string */ uint32_t dom_string_hash(dom_string *str); diff --git a/src/core/string.c b/src/core/string.c index aa046ad..0cadd77 100644 --- a/src/core/string.c +++ b/src/core/string.c @@ -890,7 +890,7 @@ const char *dom_string_data(const dom_string *str) } } -/* Get the byte length of this dom_string +/** Get the byte length of this dom_string * * \param str The dom_string object */ @@ -904,3 +904,117 @@ size_t dom_string_byte_length(const dom_string *str) } } +/** Convert the given string to uppercase + * + * \param source + * \param ascii_only Whether to only convert [a-z] to [A-Z] + * \param upper Result pointer for uppercase string. Caller owns ref + * + * \return DOM_NO_ERR on success. + * + * \note Right now, will return DOM_NOT_SUPPORTED_ERR if ascii_only is false. + */ +dom_exception +dom_string_toupper(dom_string *source, bool ascii_only, dom_string **upper) +{ + const uint8_t *orig_s = (const uint8_t *) dom_string_data(source); + const size_t nbytes = dom_string_byte_length(source); + uint8_t *copy_s; + size_t index = 0, clen; + parserutils_error err; + dom_exception exc; + + if (ascii_only == false) + return DOM_NOT_SUPPORTED_ERR; + + copy_s = malloc(nbytes); + if (copy_s == NULL) + return DOM_NO_MEM_ERR; + memcpy(copy_s, orig_s, nbytes); + + while (index < nbytes) { + err = parserutils_charset_utf8_char_byte_length(orig_s + index, + &clen); + if (err != PARSERUTILS_OK) { + free(copy_s); + /** \todo Find a better exception */ + return DOM_NO_MEM_ERR; + } + + if (clen == 1) { + if (orig_s[index] >= 'a' && + orig_s[index] <= 'z') + copy_s[index] -= 'a' - 'A'; + } + + index += clen; + } + + if (((dom_string_internal*)source)->type == DOM_STRING_CDATA) { + exc = dom_string_create(copy_s, nbytes, upper); + } else { + exc = dom_string_create_interned(copy_s, nbytes, upper); + } + + free(copy_s); + + return exc; +} + +/** Convert the given string to lowercase + * + * \param source + * \param ascii_only Whether to only convert [a-z] to [A-Z] + * \param lower Result pointer for lowercase string. Caller owns ref + * + * \return DOM_NO_ERR on success. + * + * \note Right now, will return DOM_NOT_SUPPORTED_ERR if ascii_only is false. + */ +dom_exception +dom_string_tolower(dom_string *source, bool ascii_only, dom_string **lower) +{ + const uint8_t *orig_s = (const uint8_t *) dom_string_data(source); + const size_t nbytes = dom_string_byte_length(source); + uint8_t *copy_s; + size_t index = 0, clen; + parserutils_error err; + dom_exception exc; + + if (ascii_only == false) + return DOM_NOT_SUPPORTED_ERR; + + copy_s = malloc(nbytes); + if (copy_s == NULL) + return DOM_NO_MEM_ERR; + memcpy(copy_s, orig_s, nbytes); + + while (index < nbytes) { + err = parserutils_charset_utf8_char_byte_length(orig_s + index, + &clen); + if (err != PARSERUTILS_OK) { + free(copy_s); + /** \todo Find a better exception */ + return DOM_NO_MEM_ERR; + } + + if (clen == 1) { + if (orig_s[index] >= 'A' && + orig_s[index] <= 'Z') + copy_s[index] += 'a' - 'A'; + } + + index += clen; + } + + if (((dom_string_internal*)source)->type == DOM_STRING_CDATA) { + exc = dom_string_create(copy_s, nbytes, lower); + } else { + exc = dom_string_create_interned(copy_s, nbytes, lower); + } + + free(copy_s); + + return exc; +} + |