diff options
Diffstat (limited to 'content/textsearch.c')
-rw-r--r-- | content/textsearch.c | 740 |
1 files changed, 740 insertions, 0 deletions
diff --git a/content/textsearch.c b/content/textsearch.c new file mode 100644 index 000000000..3f97d42ee --- /dev/null +++ b/content/textsearch.c @@ -0,0 +1,740 @@ +/* + * Copyright 2004 John M Bell <jmb202@ecs.soton.ac.uk> + * Copyright 2020 Vincent Sanders <vince@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * \file + * Free text search + */ + +#include <stdbool.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include "utils/errors.h" +#include "utils/utils.h" +#include "utils/ascii.h" +#include "netsurf/types.h" +#include "desktop/selection.h" + +#include "content/content.h" +#include "content/content_protected.h" +#include "content/hlcache.h" +#include "content/textsearch.h" + +/** + * search match + */ +struct list_entry { + /** + * previous match + */ + struct list_entry *prev; + + /** + * next match + */ + struct list_entry *next; + + /** + * start position of match + */ + unsigned start_idx; + + /** + * end of match + */ + unsigned end_idx; + + /** + * content opaque start pointer + */ + struct box *start_box; + + /** + * content opaque end pointer + */ + struct box *end_box; + + /** + * content specific selection object + */ + struct selection *sel; +}; + +/** + * The context for a free text search + */ +struct textsearch_context { + + /** + * content search was performed upon + */ + struct content *c; + + /** + * opaque pointer passed to constructor. + */ + void *gui_p; + + /** + * List of matches + */ + struct list_entry *found; + + /** + * current selected match + */ + struct list_entry *current; /* first for select all */ + + /** + * query string search results are for + */ + char *string; + bool prev_case_sens; + bool newsearch; +}; + + +/** + * broadcast textsearch message + */ +static inline void +textsearch_broadcast(struct textsearch_context *textsearch, + int type, + bool state, + const char *string) +{ + union content_msg_data msg_data; + msg_data.textsearch.type = type; + msg_data.textsearch.ctx = textsearch->gui_p; + msg_data.textsearch.state = state; + msg_data.textsearch.string = string; + content_broadcast(textsearch->c, CONTENT_MSG_TEXTSEARCH, &msg_data); +} + + +/** + * Release the memory used by the list of matches, + * deleting selection objects too + */ +static void free_matches(struct textsearch_context *textsearch) +{ + struct list_entry *cur; + struct list_entry *nxt; + + cur = textsearch->found->next; + + /* + * empty the list before clearing and deleting the selections + * because the the clearing may update the toolkit immediately, + * causing nested accesses to the list + */ + + textsearch->found->prev = NULL; + textsearch->found->next = NULL; + + for (; cur; cur = nxt) { + nxt = cur->next; + if (cur->sel) { + selection_destroy(cur->sel); + } + free(cur); + } +} + + +/** + * Specifies whether all matches or just the current match should + * be highlighted in the search text. + */ +static void search_show_all(bool all, struct textsearch_context *context) +{ + struct list_entry *a; + + for (a = context->found->next; a; a = a->next) { + bool add = true; + if (!all && a != context->current) { + add = false; + if (a->sel) { + selection_destroy(a->sel); + a->sel = NULL; + } + } + + if (add && !a->sel) { + + a->sel = selection_create(context->c); + if (a->sel != NULL) { + selection_init(a->sel); + selection_set_position(a->sel, + a->start_idx, + a->end_idx); + } + } + } +} + + +/** + * Search for a string in a content. + * + * \param context The search context. + * \param string the string to search for + * \param string_len length of search string + * \param flags flags to control the search. + */ +static nserror +search_text(struct textsearch_context *context, + const char *string, + int string_len, + search_flags_t flags) +{ + struct rect bounds; + union content_msg_data msg_data; + bool case_sensitive, forwards, showall; + nserror res = NSERROR_OK; + + case_sensitive = ((flags & SEARCH_FLAG_CASE_SENSITIVE) != 0) ? + true : false; + forwards = ((flags & SEARCH_FLAG_FORWARDS) != 0) ? true : false; + showall = ((flags & SEARCH_FLAG_SHOWALL) != 0) ? true : false; + + if (context->c == NULL) { + return res; + } + + /* check if we need to start a new search or continue an old one */ + if ((context->newsearch) || + (context->prev_case_sens != case_sensitive)) { + + if (context->string != NULL) { + free(context->string); + } + + context->current = NULL; + free_matches(context); + + context->string = malloc(string_len + 1); + if (context->string != NULL) { + memcpy(context->string, string, string_len); + context->string[string_len] = '\0'; + } + + /* indicate find operation starting */ + textsearch_broadcast(context, CONTENT_TEXTSEARCH_FIND, true, NULL); + + + /* call content find handler */ + res = context->c->handler->textsearch_find(context->c, + context, + string, + string_len, + case_sensitive); + + /* indicate find operation finished */ + textsearch_broadcast(context, CONTENT_TEXTSEARCH_FIND, false, NULL); + + if (res != NSERROR_OK) { + free_matches(context); + return res; + } + + context->prev_case_sens = case_sensitive; + + /* new search, beginning at the top of the page */ + context->current = context->found->next; + context->newsearch = false; + + } else if (context->current != NULL) { + /* continued search in the direction specified */ + if (forwards) { + if (context->current->next) + context->current = context->current->next; + } else { + if (context->current->prev) + context->current = context->current->prev; + } + } + + /* update match state */ + textsearch_broadcast(context, + CONTENT_TEXTSEARCH_MATCH, + (context->current != NULL), + NULL); + + search_show_all(showall, context); + + /* update back state */ + textsearch_broadcast(context, + CONTENT_TEXTSEARCH_BACK, + ((context->current != NULL) && + (context->current->prev != NULL)), + NULL); + + /* update forward state */ + textsearch_broadcast(context, + CONTENT_TEXTSEARCH_FORWARD, + ((context->current != NULL) && + (context->current->next != NULL)), + NULL); + + + if (context->current == NULL) { + /* no current match */ + return res; + } + + /* call content match bounds handler */ + res = context->c->handler->textsearch_bounds(context->c, + context->current->start_idx, + context->current->end_idx, + context->current->start_box, + context->current->end_box, + &bounds); + if (res == NSERROR_OK) { + msg_data.scroll.area = true; + msg_data.scroll.x0 = bounds.x0; + msg_data.scroll.y0 = bounds.y0; + msg_data.scroll.x1 = bounds.x1; + msg_data.scroll.y1 = bounds.y1; + content_broadcast(context->c, CONTENT_MSG_SCROLL, &msg_data); + } + + return res; +} + + +/** + * Begins/continues the search process + * + * \note that this may be called many times for a single search. + * + * \param context The search context in use. + * \param flags The flags forward/back etc + * \param string The string to match + */ +static nserror +content_textsearch_step(struct textsearch_context *textsearch, + search_flags_t flags, + const char *string) +{ + int string_len; + int i = 0; + nserror res = NSERROR_OK; + + assert(textsearch != NULL); + + /* broadcast recent query string */ + textsearch_broadcast(textsearch, + CONTENT_TEXTSEARCH_RECENT, + false, + string); + + string_len = strlen(string); + for (i = 0; i < string_len; i++) { + if (string[i] != '#' && string[i] != '*') + break; + } + + if (i < string_len) { + res = search_text(textsearch, string, string_len, flags); + } else { + union content_msg_data msg_data; + + free_matches(textsearch); + + /* update match state */ + textsearch_broadcast(textsearch, + CONTENT_TEXTSEARCH_MATCH, + true, + NULL); + + /* update back state */ + textsearch_broadcast(textsearch, + CONTENT_TEXTSEARCH_BACK, + false, + NULL); + + /* update forward state */ + textsearch_broadcast(textsearch, + CONTENT_TEXTSEARCH_FORWARD, + false, + NULL); + + /* clear scroll */ + msg_data.scroll.area = false; + msg_data.scroll.x0 = 0; + msg_data.scroll.y0 = 0; + content_broadcast(textsearch->c, + CONTENT_MSG_SCROLL, + &msg_data); + } + + return res; +} + + +/** + * Terminate a search. + * + * \param c content to clear + */ +static nserror content_textsearch__clear(struct content *c) +{ + free(c->textsearch.string); + c->textsearch.string = NULL; + + if (c->textsearch.context != NULL) { + content_textsearch_destroy(c->textsearch.context); + c->textsearch.context = NULL; + } + return NSERROR_OK; +} + + +/** + * create a search_context + * + * \param c The content the search_context is connected to + * \param context A context pointer passed to the provider routines. + * \param search_out A pointer to recive the new text search context + * \return NSERROR_OK on success and \a search_out updated else error code + */ +static nserror +content_textsearch_create(struct content *c, + void *gui_data, + struct textsearch_context **textsearch_out) +{ + struct textsearch_context *context; + struct list_entry *search_head; + content_type type; + + if ((c->handler->textsearch_find == NULL) || + (c->handler->textsearch_bounds == NULL)) { + /* + * content has no free text find handler so searching + * is unsupported. + */ + return NSERROR_NOT_IMPLEMENTED; + } + + type = c->handler->type(); + + context = malloc(sizeof(struct textsearch_context)); + if (context == NULL) { + return NSERROR_NOMEM; + } + + search_head = malloc(sizeof(struct list_entry)); + if (search_head == NULL) { + free(context); + return NSERROR_NOMEM; + } + + search_head->start_idx = 0; + search_head->end_idx = 0; + search_head->start_box = NULL; + search_head->end_box = NULL; + search_head->sel = NULL; + search_head->prev = NULL; + search_head->next = NULL; + + context->found = search_head; + context->current = NULL; + context->string = NULL; + context->prev_case_sens = false; + context->newsearch = true; + context->c = c; + context->gui_p = gui_data; + + *textsearch_out = context; + + return NSERROR_OK; +} + + +/* exported interface, documented in content/textsearch.h */ +const char * +content_textsearch_find_pattern(const char *string, + int s_len, + const char *pattern, + int p_len, + bool case_sens, + unsigned int *m_len) +{ + struct { const char *ss, *s, *p; bool first; } context[16]; + const char *ep = pattern + p_len; + const char *es = string + s_len; + const char *p = pattern - 1; /* a virtual '*' before the pattern */ + const char *ss = string; + const char *s = string; + bool first = true; + int top = 0; + + while (p < ep) { + bool matches; + if (p < pattern || *p == '*') { + char ch; + + /* skip any further asterisks; one is the same as many + */ + do p++; while (p < ep && *p == '*'); + + /* if we're at the end of the pattern, yes, it matches + */ + if (p >= ep) break; + + /* anything matches a # so continue matching from + here, and stack a context that will try to match + the wildcard against the next character */ + + ch = *p; + if (ch != '#') { + /* scan forwards until we find a match for + this char */ + if (!case_sens) ch = ascii_to_upper(ch); + while (s < es) { + if (case_sens) { + if (*s == ch) break; + } else if (ascii_to_upper(*s) == ch) + break; + s++; + } + } + + if (s < es) { + /* remember where we are in case the match + fails; we may then resume */ + if (top < (int)NOF_ELEMENTS(context)) { + context[top].ss = ss; + context[top].s = s + 1; + context[top].p = p - 1; + /* ptr to last asterisk */ + context[top].first = first; + top++; + } + + if (first) { + ss = s; + /* remember first non-'*' char */ + first = false; + } + + matches = true; + } else { + matches = false; + } + + } else if (s < es) { + char ch = *p; + if (ch == '#') + matches = true; + else { + if (case_sens) + matches = (*s == ch); + else + matches = (ascii_to_upper(*s) == ascii_to_upper(ch)); + } + if (matches && first) { + ss = s; /* remember first non-'*' char */ + first = false; + } + } else { + matches = false; + } + + if (matches) { + p++; s++; + } else { + /* doesn't match, + * resume with stacked context if we have one */ + if (--top < 0) + return NULL; /* no match, give up */ + + ss = context[top].ss; + s = context[top].s; + p = context[top].p; + first = context[top].first; + } + } + + /* end of pattern reached */ + *m_len = max(s - ss, 1); + return ss; +} + + +/* exported interface, documented in content/textsearch.h */ +nserror +content_textsearch_add_match(struct textsearch_context *context, + unsigned start_idx, + unsigned end_idx, + struct box *start_box, + struct box *end_box) +{ + struct list_entry *entry; + + /* found string in box => add to list */ + entry = calloc(1, sizeof(*entry)); + if (entry == NULL) { + return NSERROR_NOMEM; + } + + entry->start_idx = start_idx; + entry->end_idx = end_idx; + entry->start_box = start_box; + entry->end_box = end_box; + entry->sel = NULL; + + entry->next = NULL; + entry->prev = context->found->prev; + + if (context->found->prev == NULL) { + context->found->next = entry; + } else { + context->found->prev->next = entry; + } + + context->found->prev = entry; + + return NSERROR_OK; +} + + +/* exported interface, documented in content/textsearch.h */ +bool +content_textsearch_ishighlighted(struct textsearch_context *textsearch, + unsigned start_offset, + unsigned end_offset, + unsigned *start_idx, + unsigned *end_idx) +{ + struct list_entry *cur; + + for (cur = textsearch->found->next; cur != NULL; cur = cur->next) { + if (cur->sel && + selection_highlighted(cur->sel, + start_offset, + end_offset, + start_idx, + end_idx)) { + return true; + } + } + + return false; +} + + +/* exported interface, documented in content/textsearch.h */ +nserror content_textsearch_destroy(struct textsearch_context *textsearch) +{ + assert(textsearch != NULL); + + if (textsearch->string != NULL) { + /* broadcast recent query string */ + textsearch_broadcast(textsearch, + CONTENT_TEXTSEARCH_RECENT, + false, + textsearch->string); + + free(textsearch->string); + } + + /* update back state */ + textsearch_broadcast(textsearch, + CONTENT_TEXTSEARCH_BACK, + true, + NULL); + + /* update forward state */ + textsearch_broadcast(textsearch, + CONTENT_TEXTSEARCH_FORWARD, + true, + NULL); + + free_matches(textsearch); + free(textsearch); + + return NSERROR_OK; +} + + +/* exported interface, documented in content/content.h */ +nserror +content_textsearch(struct hlcache_handle *h, + void *context, + search_flags_t flags, + const char *string) +{ + struct content *c = hlcache_handle_get_content(h); + nserror res; + + assert(c != NULL); + + if (string != NULL && + c->textsearch.string != NULL && + c->textsearch.context != NULL && + strcmp(string, c->textsearch.string) == 0) { + /* Continue prev. search */ + content_textsearch_step(c->textsearch.context, flags, string); + + } else if (string != NULL) { + /* New search */ + free(c->textsearch.string); + c->textsearch.string = strdup(string); + if (c->textsearch.string == NULL) { + return NSERROR_NOMEM; + } + + if (c->textsearch.context != NULL) { + content_textsearch_destroy(c->textsearch.context); + c->textsearch.context = NULL; + } + + res = content_textsearch_create(c, + context, + &c->textsearch.context); + if (res != NSERROR_OK) { + return res; + } + + content_textsearch_step(c->textsearch.context, flags, string); + + } else { + /* Clear search */ + content_textsearch__clear(c); + + free(c->textsearch.string); + c->textsearch.string = NULL; + } + + return NSERROR_OK; +} + + +/* exported interface, documented in content/content.h */ +nserror content_textsearch_clear(struct hlcache_handle *h) +{ + struct content *c = hlcache_handle_get_content(h); + assert(c != 0); + + return(content_textsearch__clear(c)); +} |