From f4e50b45c834b644caa6a82bd044faa82f6f4860 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Tue, 12 May 2020 23:22:05 +0100 Subject: make free text search content handler agnostic --- content/handlers/html/html.c | 150 ++++++++++++++++++++++++++++++++++++++ content/handlers/text/textplain.c | 104 ++++++++++++++++++++++++++ 2 files changed, 254 insertions(+) (limited to 'content/handlers') diff --git a/content/handlers/html/html.c b/content/handlers/html/html.c index 88098ebee..23d607823 100644 --- a/content/handlers/html/html.c +++ b/content/handlers/html/html.c @@ -2179,6 +2179,153 @@ static void html_fini(void) html_css_fini(); } +/** + * Finds all occurrences of a given string in an html box + * + * \param pattern the string pattern to search for + * \param p_len pattern length + * \param cur pointer to the current box + * \param case_sens whether to perform a case sensitive search + * \param context The search context to add the entry to. + * \return true on success, false on memory allocation failure + */ +static nserror +find_occurrences_html_box(const char *pattern, + int p_len, + struct box *cur, + bool case_sens, + struct textsearch_context *context) +{ + struct box *a; + nserror res = NSERROR_OK; + + /* ignore this box, if there's no visible text */ + if (!cur->object && cur->text) { + const char *text = cur->text; + unsigned length = cur->length; + + while (length > 0) { + unsigned match_length; + unsigned match_offset; + const char *new_text; + const char *pos; + + pos = content_textsearch_find_pattern(text, + length, + pattern, + p_len, + case_sens, + &match_length); + if (!pos) + break; + + /* found string in box => add to list */ + match_offset = pos - cur->text; + + res = content_textsearch_add_match(context, + cur->byte_offset + match_offset, + cur->byte_offset + match_offset + match_length, + cur, + cur); + if (res != NSERROR_OK) { + return res; + } + + new_text = pos + match_length; + length -= (new_text - text); + text = new_text; + } + } + + /* and recurse */ + for (a = cur->children; a; a = a->next) { + res = find_occurrences_html_box(pattern, + p_len, + a, + case_sens, + context); + if (res != NSERROR_OK) { + return res; + } + } + + return res; +} + +/** + * Finds all occurrences of a given string in the html box tree + * + * \param pattern the string pattern to search for + * \param p_len pattern length + * \param c The content to search + * \param csens whether to perform a case sensitive search + * \param context The search context to add the entry to. + * \return true on success, false on memory allocation failure + */ +static nserror +html_textsearch_find(struct content *c, + struct textsearch_context *context, + const char *pattern, + int p_len, + bool csens) +{ + html_content *html = (html_content *)c; + + if (html->layout == NULL) { + return NSERROR_INVALID; + } + + return find_occurrences_html_box(pattern, + p_len, + html->layout, + csens, + context); +} + + +static nserror +html_textsearch_bounds(struct content *c, + unsigned start_idx, + unsigned end_idx, + struct box *start_box, + struct box *end_box, + struct rect *bounds) +{ + /* get box position and jump to it */ + box_coords(start_box, &bounds->x0, &bounds->y0); + /* \todo: move x0 in by correct idx */ + box_coords(end_box, &bounds->x1, &bounds->y1); + /* \todo: move x1 in by correct idx */ + bounds->x1 += end_box->width; + bounds->y1 += end_box->height; + + return NSERROR_OK; +} + + +/** + * create a selection object suitable for this content + */ +static nserror +html_create_selection(struct content *c, struct selection **sel_out) +{ + html_content *html = (html_content *)c; + struct selection *sel; + sel = selection_create(c, true); + if (sel == NULL) { + return NSERROR_NOMEM; + } + + selection_init(sel, html->layout, &html->len_ctx); + + *sel_out = sel; + return NSERROR_OK; +} + + +/** + * HTML content handler function table + */ static const content_handler html_content_handler = { .fini = html_fini, .create = html_create, @@ -2205,6 +2352,9 @@ static const content_handler html_content_handler = { .type = html_content_type, .exec = html_exec, .saw_insecure_objects = html_saw_insecure_objects, + .textsearch_find = html_textsearch_find, + .textsearch_bounds = html_textsearch_bounds, + .create_selection = html_create_selection, .no_share = true, }; diff --git a/content/handlers/text/textplain.c b/content/handlers/text/textplain.c index 750c5eb3d..a233d827f 100644 --- a/content/handlers/text/textplain.c +++ b/content/handlers/text/textplain.c @@ -1212,6 +1212,107 @@ textplain_coord_from_offset(const char *text, size_t offset, size_t length) return x; } +/** + * Finds all occurrences of a given string in a textplain content + * + * \param c the content to be searched + * \param context The search context to add the entry to. + * \param pattern the string pattern to search for + * \param p_len pattern length + * \param case_sens whether to perform a case sensitive search + * \return NSERROR_OK on success else error code on faliure + */ +static nserror +textplain_textsearch_find(struct content *c, + struct textsearch_context *context, + const char *pattern, + int p_len, + bool case_sens) +{ + int nlines = textplain_line_count(c); + int line; + nserror res = NSERROR_OK; + + for(line = 0; line < nlines; line++) { + size_t offset, length; + const char *text; + + text = textplain_get_line(c, line, &offset, &length); + if (text) { + while (length > 0) { + unsigned match_length; + size_t start_idx; + const char *new_text; + const char *pos; + + pos = content_textsearch_find_pattern( + text, + length, + pattern, + p_len, + case_sens, + &match_length); + if (!pos) + break; + + /* found string in line => add to list */ + start_idx = offset + (pos - text); + res = content_textsearch_add_match(context, + start_idx, + start_idx + match_length, + NULL, + NULL); + if (res != NSERROR_OK) { + return res; + } + + new_text = pos + match_length; + offset += (new_text - text); + length -= (new_text - text); + text = new_text; + } + } + } + + return res; +} + + +/** + * get bounds of a free text search match + */ +static nserror +textplain_textsearch_bounds(struct content *c, + unsigned start_idx, + unsigned end_idx, + struct box *start_box, + struct box *end_box, + struct rect *bounds) +{ + textplain_coords_from_range(c, start_idx, end_idx, bounds); + + return NSERROR_OK; +} + + +/** + * create a selection object suitable for this content + */ +static nserror +textplain_create_selection(struct content *c, struct selection **sel_out) +{ + struct selection *sel; + sel = selection_create(c, false); + if (sel == NULL) { + return NSERROR_NOMEM; + } + + selection_init(sel, NULL, NULL); + + *sel_out = sel; + return NSERROR_OK; +} + /** * plain text content handler table @@ -1232,6 +1333,9 @@ static const content_handler textplain_content_handler = { .get_selection = textplain_get_selection, .clone = textplain_clone, .type = textplain_content_type, + .textsearch_find = textplain_textsearch_find, + .textsearch_bounds = textplain_textsearch_bounds, + .create_selection = textplain_create_selection, .no_share = true, }; -- cgit v1.2.3