summaryrefslogtreecommitdiff
path: root/desktop/search.c
diff options
context:
space:
mode:
Diffstat (limited to 'desktop/search.c')
-rw-r--r--desktop/search.c705
1 files changed, 705 insertions, 0 deletions
diff --git a/desktop/search.c b/desktop/search.c
new file mode 100644
index 000000000..018f40674
--- /dev/null
+++ b/desktop/search.c
@@ -0,0 +1,705 @@
+/*
+ * Copyright 2004 John M Bell <jmb202@ecs.soton.ac.uk>
+ * Copyright 2005 Adrian Lees <adrianl@users.sourceforge.net>
+ * Copyright 2009 Mark Benjamin <netsurf-browser.org.MarkBenjamin@dfgh.net>
+ *
+ * This file is part of NetSurf, http://www.netsurf-browser.org/
+ *
+ * NetSurf is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * NetSurf is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+ /** \file
+ * Free text search (core)
+ */
+#include "utils/config.h"
+
+#include <ctype.h>
+#include <string.h>
+#include "content/content.h"
+#include "desktop/browser.h"
+#include "desktop/gui.h"
+#include "desktop/options.h"
+#include "desktop/search.h"
+#include "desktop/selection.h"
+#include "render/box.h"
+#include "render/html.h"
+#include "utils/config.h"
+#include "utils/log.h"
+#include "utils/messages.h"
+#include "utils/url.h"
+#include "utils/utils.h"
+
+
+#ifndef NOF_ELEMENTS
+#define NOF_ELEMENTS(array) (sizeof(array)/sizeof(*(array)))
+#endif
+
+
+struct list_entry {
+ unsigned start_idx; /* start position of match */
+ unsigned end_idx; /* end of match */
+
+ struct box *start_box; /* used only for html contents */
+ struct box *end_box;
+
+ struct selection *sel;
+
+ struct list_entry *prev;
+ struct list_entry *next;
+};
+
+struct search_context {
+ struct browser_window *bw;
+ struct content *content;
+ char *string;
+ bool prev_case_sens;
+ bool newsearch;
+ bool insert;
+ void *p; /* front-specific data */
+ struct search_callbacks *callbacks;
+ struct list_entry *found;
+ struct list_entry *current; /* first for select all */
+};
+
+static void search_text(const char *string, int string_len,
+ struct search_context *context, search_flags_t flags);
+static const char *find_pattern(const char *string, int s_len,
+ const char *pattern, int p_len, bool case_sens,
+ unsigned int *m_len);
+static bool find_occurrences_html(const char *pattern, int p_len,
+ struct box *cur, bool case_sens,
+ struct search_context *context);
+static bool find_occurrences_text(const char *pattern, int p_len,
+ struct content *c, bool case_sens,
+ struct search_context *context);
+static struct list_entry *add_entry(unsigned start_idx, unsigned end_idx,
+ struct search_context *context);
+static void free_matches(struct search_context *context);
+
+
+/**
+ * create a search_context
+ * \param bw the browser_window the search_context is connected to
+ * \param callbacks the callbacks to modify appearance according to results
+ * \param p the pointer to send to the callbacks
+ * \return true for success
+ */
+bool search_create_context(struct browser_window *bw,
+ struct search_callbacks *callbacks, void *p)
+{
+ struct search_context *context = malloc(sizeof(struct search_context));
+ struct list_entry *search_head = malloc(sizeof(struct list_entry));
+
+ if ((context == NULL) || (search_head == NULL)) {
+ warn_user("NoMemory", 0);
+ return false;
+ }
+ if (bw->search_context != NULL)
+ search_destroy_context(bw->search_context);
+
+ search_head->start_idx = 0;
+ search_head->end_idx = 0;
+ search_head->start_box = NULL;
+ search_head->end_box = NULL;
+ search_head->sel = NULL;
+ search_head->prev = NULL;
+ search_head->next = NULL;
+
+ context->found = search_head;
+ context->current = NULL;
+ context->content = NULL;
+ context->string = NULL;
+ context->prev_case_sens = false;
+ context->newsearch = true;
+ context->insert = true;
+ context->bw = bw;
+ context->callbacks = callbacks;
+ context->p = p;
+
+ bw->search_context = context;
+ return true;
+}
+/**
+ * to simplify calls to search_step(); checks that the browser_window is
+ * non-NULL, creates a new search_context in case of a new search
+ * \param bw the browser_window the search refers to
+ * \param callbacks the callbacks to modify appearance according to results
+ * \param p a pointer returned to the callbacks
+ * \return true for success
+ */
+bool search_verify_new(struct browser_window *bw,
+ struct search_callbacks *callbacks, void *p)
+{
+ if (bw == NULL)
+ return false;
+ if (bw->search_context == NULL)
+ return search_create_context(bw, callbacks, p);
+ return true;
+}
+
+/**
+ * Begins/continues the search process
+ * Note that this may be called many times for a single search.
+ *
+ * \param bw the browser_window to search in
+ * \param flags the flags forward/back etc
+ * \param string the string to match
+ */
+
+void search_step(struct search_context *context, search_flags_t flags,
+ const char *string)
+{
+ int string_len;
+ int i = 0;
+
+ if ((context == NULL) || (context->callbacks == NULL)) {
+ warn_user("SearchError", 0);
+ return;
+ }
+
+
+ if (context->callbacks->add_recent != NULL)
+ context->callbacks->add_recent(string, context->p);
+
+ string_len = strlen(string);
+ for(i = 0; i < string_len; i++)
+ if (string[i] != '#' && string[i] != '*') break;
+ if (i >= string_len) {
+ free_matches(context);
+ if (context->callbacks->status != NULL)
+ context->callbacks->status(true, context->p);
+ if (context->callbacks->back_state != NULL)
+ context->callbacks->back_state(false, context->p);
+ if (context->callbacks->forward_state != NULL)
+ context->callbacks->forward_state(false, context->p);
+ gui_window_set_scroll(context->bw->window, 0, 0);
+ return;
+ }
+ search_text(string, string_len, context, flags);
+}
+
+/**
+ * Release the memory used by the list of matches,
+ * deleting selection objects too
+ */
+
+void free_matches(struct search_context *context)
+{
+ struct list_entry *a = context->found->next;
+ struct list_entry *b;
+
+ /* empty the list before clearing and deleting the
+ selections because the the clearing updates the
+ screen immediately, causing nested accesses to the list */
+
+ context->found->prev = NULL;
+ context->found->next = NULL;
+
+ for (; a; a = b) {
+ b = a->next;
+ if (a->sel) {
+ selection_clear(a->sel, true);
+ selection_destroy(a->sel);
+ }
+ free(a);
+ }
+}
+
+/**
+ * Search for a string in the box tree
+ *
+ * \param string the string to search for
+ * \param string_len length of search string
+ */
+void search_text(const char *string, int string_len,
+ struct search_context *context, search_flags_t flags)
+{
+ struct rect bounds;
+ struct content *c;
+ struct box *box;
+ bool case_sensitive, forwards, showall;
+
+ case_sensitive = ((flags & SEARCH_FLAG_CASE_SENSITIVE) != 0) ?
+ true : false;
+ forwards = ((flags & SEARCH_FLAG_FORWARDS) != 0) ? true : false;
+ showall = ((flags & SEARCH_FLAG_SHOWALL) != 0) ? true : false;
+
+ if (context->bw == NULL)
+ return;
+ c = context->bw->current_content;
+
+ /* only handle html contents */
+ if ((!c) || (c->type != CONTENT_HTML &&
+ c->type != CONTENT_TEXTPLAIN))
+ return;
+
+ box = c->data.html.layout;
+
+ if (!box)
+ return;
+
+ /* LOG(("do_search '%s' - '%s' (%p, %p) %p (%d, %d) %d",
+ search_data.string, string, search_data.content, c, search_data.found->next,
+ search_data.prev_case_sens, case_sens, forwards)); */
+
+ /* check if we need to start a new search or continue an old one */
+ if (context->newsearch) {
+ bool res;
+
+ if (context->string != NULL)
+ free(context->string);
+ context->current = NULL;
+ free_matches(context);
+
+ context->string = malloc(string_len + 1);
+ if (context->string != NULL) {
+ memcpy(context->string, string, string_len);
+ context->string[string_len] = '\0';
+ }
+
+ if ((context->callbacks != NULL) &&
+ (context->callbacks->hourglass != NULL))
+ context->callbacks->hourglass(true, context->p);
+
+ if (c->type == CONTENT_HTML)
+ res = find_occurrences_html(string, string_len,
+ box, case_sensitive, context);
+ else {
+ assert(c->type == CONTENT_TEXTPLAIN);
+ res = find_occurrences_text(string, string_len,
+ c, case_sensitive, context);
+ }
+
+ if (!res) {
+ free_matches(context);
+ if ((context->callbacks != NULL) &&
+ (context->callbacks->hourglass !=
+ NULL))
+ context->callbacks->hourglass(false,
+ context->p);
+ return;
+ }
+ if ((context->callbacks != NULL) &&
+ (context->callbacks->hourglass != NULL))
+ context->callbacks->hourglass(false, context->p);
+
+ context->content = c;
+ context->prev_case_sens = case_sensitive;
+/* LOG(("%d %p %p (%p, %p)", new, search_data.found->next, search_data.current,
+ search_data.current->prev, search_data.current->next)); */
+ /* new search, beginning at the top of the page */
+ context->current = context->found->next;
+ context->newsearch = false;
+ }
+ else if (context->current != NULL) {
+ /* continued search in the direction specified */
+ if (forwards) {
+ if (context->current->next)
+ context->current = context->current->next;
+ }
+ else {
+ if (context->current->prev)
+ context->current = context->current->prev;
+ }
+ }
+
+ if (context->callbacks == NULL)
+ return;
+ if (context->callbacks->status != NULL)
+ context->callbacks->status((context->current != NULL),
+ context->p);
+ search_show_all(showall, context);
+
+ if (context->callbacks->back_state != NULL)
+ context->callbacks->back_state((context->current != NULL) &&
+ (context->current->prev != NULL),
+ context->p);
+ if (context->callbacks->forward_state != NULL)
+ context->callbacks->forward_state((context->current != NULL) &&
+ (context->current->next != NULL), context->p);
+
+ if (context->current == NULL)
+ return;
+
+ switch (c->type) {
+ case CONTENT_HTML:
+ /* get box position and jump to it */
+ box_coords(context->current->start_box,
+ &bounds.x0, &bounds.y0);
+ /* \todo: move x0 in by correct idx */
+ box_coords(context->current->end_box,
+ &bounds.x1, &bounds.y1);
+ /* \todo: move x1 in by correct idx */
+ bounds.x1 += context->current->end_box->width;
+ bounds.y1 += context->current->end_box->height;
+ break;
+
+ default:
+ assert(c->type == CONTENT_TEXTPLAIN);
+ textplain_coords_from_range(c,
+ context->current->start_idx,
+ context->current->end_idx, &bounds);
+ break;
+ }
+
+ gui_window_scroll_visible(context->bw->window,
+ bounds.x0, bounds.y0, bounds.x1, bounds.y1);
+}
+
+/**
+ * Find the first occurrence of 'match' in 'string' and return its index
+ *
+ * \param string the string to be searched (unterminated)
+ * \param s_len length of the string to be searched
+ * \param pattern the pattern for which we are searching (unterminated)
+ * \param p_len length of pattern
+ * \param case_sens true iff case sensitive match required
+ * \param m_len accepts length of match in bytes
+ * \return pointer to first match, NULL if none
+ */
+
+const char *find_pattern(const char *string, int s_len, const char *pattern,
+ int p_len, bool case_sens, unsigned int *m_len)
+{
+ struct { const char *ss, *s, *p; bool first; } context[16];
+ const char *ep = pattern + p_len;
+ const char *es = string + s_len;
+ const char *p = pattern - 1; /* a virtual '*' before the pattern */
+ const char *ss = string;
+ const char *s = string;
+ bool first = true;
+ int top = 0;
+
+ while (p < ep) {
+ bool matches;
+ if (p < pattern || *p == '*') {
+ char ch;
+
+ /* skip any further asterisks; one is the same as many
+ */
+ do p++; while (p < ep && *p == '*');
+
+ /* if we're at the end of the pattern, yes, it matches
+ */
+ if (p >= ep) break;
+
+ /* anything matches a # so continue matching from
+ here, and stack a context that will try to match
+ the wildcard against the next character */
+
+ ch = *p;
+ if (ch != '#') {
+ /* scan forwards until we find a match for
+ this char */
+ if (!case_sens) ch = toupper(ch);
+ while (s < es) {
+ if (case_sens) {
+ if (*s == ch) break;
+ } else if (toupper(*s) == ch)
+ break;
+ s++;
+ }
+ }
+
+ if (s < es) {
+ /* remember where we are in case the match
+ fails; we may then resume */
+ if (top < (int)NOF_ELEMENTS(context)) {
+ context[top].ss = ss;
+ context[top].s = s + 1;
+ context[top].p = p - 1;
+ /* ptr to last asterisk */
+ context[top].first = first;
+ top++;
+ }
+
+ if (first) {
+ ss = s;
+ /* remember first non-'*' char */
+ first = false;
+ }
+
+ matches = true;
+ }
+ else
+ matches = false;
+ }
+ else if (s < es) {
+ char ch = *p;
+ if (ch == '#')
+ matches = true;
+ else {
+ if (case_sens)
+ matches = (*s == ch);
+ else
+ matches = (toupper(*s) == toupper(ch));
+ }
+ if (matches && first) {
+ ss = s; /* remember first non-'*' char */
+ first = false;
+ }
+ }
+ else
+ matches = false;
+
+ if (matches) {
+ p++; s++;
+ }
+ else {
+ /* doesn't match, resume with stacked context if we have one */
+ if (--top < 0) return NULL; /* no match, give up */
+
+ ss = context[top].ss;
+ s = context[top].s;
+ p = context[top].p;
+ first = context[top].first;
+ }
+ }
+
+ /* end of pattern reached */
+ *m_len = max(s - ss, 1);
+ return ss;
+}
+
+/**
+ * Finds all occurrences of a given string in the html box tree
+ *
+ * \param pattern the string pattern to search for
+ * \param p_len pattern length
+ * \param cur pointer to the current box
+ * \param case_sens whether to perform a case sensitive search
+ * \return true on success, false on memory allocation failure
+ */
+bool find_occurrences_html(const char *pattern, int p_len, struct box *cur,
+ bool case_sens, struct search_context *context)
+{
+ struct box *a;
+
+ /* ignore this box, if there's no visible text */
+ if (!cur->object && cur->text) {
+ const char *text = cur->text;
+ unsigned length = cur->length;
+
+ while (length > 0) {
+ struct list_entry *entry;
+ unsigned match_length;
+ unsigned match_offset;
+ const char *new_text;
+ const char *pos = find_pattern(text, length,
+ pattern, p_len, case_sens,
+ &match_length);
+ if (!pos) break;
+
+ /* found string in box => add to list */
+ match_offset = pos - cur->text;
+
+ entry = add_entry(cur->byte_offset + match_offset,
+ cur->byte_offset +
+ match_offset +
+ match_length, context);
+ if (!entry)
+ return false;
+
+ entry->start_box = cur;
+ entry->end_box = cur;
+
+ new_text = pos + match_length;
+ length -= (new_text - text);
+ text = new_text;
+ }
+ }
+
+ /* and recurse */
+ for (a = cur->children; a; a = a->next) {
+ if (!find_occurrences_html(pattern, p_len, a, case_sens,
+ context))
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Finds all occurrences of a given string in a textplain content
+ *
+ * \param pattern the string pattern to search for
+ * \param p_len pattern length
+ * \param c the content to be searched
+ * \param case_sens wheteher to perform a case sensitive search
+ * \return true on success, false on memory allocation failure
+ */
+
+bool find_occurrences_text(const char *pattern, int p_len,
+ struct content *c, bool case_sens,
+ struct search_context *context)
+{
+ int nlines = textplain_line_count(c);
+ int line;
+
+ for(line = 0; line < nlines; line++) {
+ size_t offset, length;
+ const char *text = textplain_get_line(c, line,
+ &offset, &length);
+ if (text) {
+ while (length > 0) {
+ struct list_entry *entry;
+ unsigned match_length;
+ size_t start_idx;
+ const char *new_text;
+ const char *pos = find_pattern(text, length,
+ pattern, p_len, case_sens,
+ &match_length);
+ if (!pos) break;
+
+ /* found string in line => add to list */
+ start_idx = offset + (pos - text);
+ entry = add_entry(start_idx, start_idx +
+ match_length, context);
+ if (!entry)
+ return false;
+
+ new_text = pos + match_length;
+ offset += (new_text - text);
+ length -= (new_text - text);
+ text = new_text;
+ }
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Determines whether any portion of the given text box should be
+ * selected because it matches the current search string.
+ *
+ * \param g gui window
+ * \param start_offset byte offset within text of string to be checked
+ * \param end_offset byte offset within text
+ * \param start_idx byte offset within string of highlight start
+ * \param end_idx byte offset of highlight end
+ * \return true iff part of the box should be highlighted
+ */
+
+bool gui_search_term_highlighted(struct gui_window *g,
+ unsigned start_offset, unsigned end_offset,
+ unsigned *start_idx, unsigned *end_idx,
+ struct search_context *context)
+{
+ if (g == context->bw->window) {
+ struct list_entry *a;
+ for(a = context->found->next; a; a = a->next)
+ if (a->sel && selection_defined(a->sel) &&
+ selection_highlighted(a->sel,
+ start_offset, end_offset,
+ start_idx, end_idx))
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * Specifies whether all matches or just the current match should
+ * be highlighted in the search text.
+ */
+
+void search_show_all(bool all, struct search_context *context)
+{
+ struct list_entry *a;
+
+ for (a = context->found->next; a; a = a->next) {
+ bool add = true;
+ if (!all && a != context->current) {
+ add = false;
+ if (a->sel) {
+ selection_clear(a->sel, true);
+ selection_destroy(a->sel);
+ a->sel = NULL;
+ }
+ }
+ if (add && !a->sel) {
+ a->sel = selection_create(context->bw);
+ if (a->sel) {
+ struct content *c = context->bw->
+ current_content;
+ switch (c->type) {
+ case CONTENT_HTML:
+ selection_init(a->sel,
+ c->data.html.layout);
+ break;
+ default:
+ assert(c->type ==
+ CONTENT_TEXTPLAIN);
+ selection_init(a->sel, NULL);
+ break;
+ }
+ selection_set_start(a->sel, a->start_idx);
+ selection_set_end(a->sel, a->end_idx);
+ }
+ }
+ }
+}
+
+/**
+ * Add a new entry to the list of matches
+ *
+ * \param start_idx offset of match start within textual representation
+ * \param end_idx offset of match end
+ * \return pointer to added entry, NULL iff failed
+ */
+
+struct list_entry *add_entry(unsigned start_idx, unsigned end_idx,
+ struct search_context *context)
+{
+ struct list_entry *entry;
+
+ /* found string in box => add to list */
+ entry = calloc(1, sizeof(*entry));
+ if (!entry) {
+ warn_user("NoMemory", 0);
+ return NULL;
+ }
+
+ entry->start_idx = start_idx;
+ entry->end_idx = end_idx;
+ entry->sel = NULL;
+
+ entry->next = 0;
+ entry->prev = context->found->prev;
+ if (context->found->prev == NULL)
+ context->found->next = entry;
+ else
+ context->found->prev->next = entry;
+ context->found->prev = entry;
+
+ return entry;
+}
+
+/**
+ * Ends the search process, invalidating all state
+ * freeing the list of found boxes
+ */
+void search_destroy_context(struct search_context *context)
+{
+ if (context->bw != NULL)
+ context->bw->search_context = NULL;
+ if ((context->string != NULL) && (context->callbacks != NULL) &&
+ (context->callbacks->add_recent != NULL)) {
+ context->callbacks->add_recent(context->string, context->p);
+ free(context->string);
+ }
+ free_matches(context);
+ free(context);
+}
+