summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2008-01-28 01:35:00 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2008-01-28 01:35:00 +0000
commit78d194cb77db00a530779aa2a1e8d2ef9707d229 (patch)
treeba4d25c396623825fcc020bf26cd757ca34f37ec
parent32fe1bd1bfcaa2c96cd407c3d1e20f2d4000bd0b (diff)
downloadnetsurf-78d194cb77db00a530779aa2a1e8d2ef9707d229.tar.gz
netsurf-78d194cb77db00a530779aa2a1e8d2ef9707d229.tar.bz2
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache. The new scheme works as follows: 1) Request content for URL (fetchcache() 2) Start fetch of content (fetchcache_go() 3) If no redirect, continue through LOADING, READY, DONE etc. states as before If redirect, receive NEWPTR for each redirect that occurs, then continue through LOADING, READY, DONE etc. states as before. The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring. As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before. svn path=/trunk/netsurf/; revision=3787
-rw-r--r--content/content.c1
-rw-r--r--content/content.h2
-rw-r--r--content/fetchcache.c175
-rw-r--r--css/css.c40
-rw-r--r--css/css.h1
-rw-r--r--debug/netsurfd.c4
-rw-r--r--desktop/browser.c18
-rw-r--r--render/html.c62
-rw-r--r--render/html.h1
-rw-r--r--riscos/plugin.c9
-rw-r--r--riscos/theme_install.c1
11 files changed, 145 insertions, 169 deletions
diff --git a/content/content.c b/content/content.c
index 9a76f0039..c8389d80e 100644
--- a/content/content.c
+++ b/content/content.c
@@ -424,6 +424,7 @@ struct content * content_create(const char *url)
c->http_code = 0;
c->no_error_pages = false;
c->download = false;
+ c->redirect_count = 0;
c->error_count = 0;
c->cache_data->req_time = 0;
c->cache_data->res_time = 0;
diff --git a/content/content.h b/content/content.h
index 25202f33b..324af952f 100644
--- a/content/content.h
+++ b/content/content.h
@@ -82,7 +82,6 @@ typedef enum {
CONTENT_MSG_DONE, /**< finished */
CONTENT_MSG_ERROR, /**< error occurred */
CONTENT_MSG_STATUS, /**< new status string */
- CONTENT_MSG_REDIRECT, /**< replacement URL */
CONTENT_MSG_REFORMAT, /**< content_reformat done */
CONTENT_MSG_REDRAW, /**< needs redraw (eg. new animation frame) */
CONTENT_MSG_NEWPTR, /**< address of structure has changed */
@@ -227,6 +226,7 @@ struct content {
bool no_error_pages; /**< Used by fetchcache(). */
bool download; /**< Used by fetchcache(). */
+ unsigned int redirect_count; /**< Used by fetchcache(). */
/** Array of first n rendering errors or warnings. */
struct {
diff --git a/content/fetchcache.c b/content/fetchcache.c
index 2e8a7216c..c0e34e3aa 100644
--- a/content/fetchcache.c
+++ b/content/fetchcache.c
@@ -51,6 +51,8 @@ static void fetchcache_error_page(struct content *c, const char *error);
static void fetchcache_cache_update(struct content *c,
const struct cache_data *data);
static void fetchcache_notmodified(struct content *c, const void *data);
+static void fetchcache_redirect(struct content *c, const void *data,
+ unsigned long size);
/**
@@ -380,11 +382,10 @@ void fetchcache_callback(fetch_msg msg, void *p, const void *data,
bool res;
struct content *c = p;
content_type type;
- char *mime_type, *url;
+ char *mime_type;
char **params;
unsigned int i;
union content_msg_data msg_data;
- url_func_result result;
switch (msg) {
case FETCH_TYPE:
@@ -457,37 +458,7 @@ void fetchcache_callback(fetch_msg msg, void *p, const void *data,
break;
case FETCH_REDIRECT:
- c->fetch = 0;
- /* redirect URLs must be absolute by HTTP/1.1, but many sites send
- * relative ones: treat them as relative to requested URL */
- result = url_join(data, c->url, &url);
- /* set the status to ERROR so that the content is
- * destroyed in content_clean() */
- c->status = CONTENT_STATUS_ERROR;
- if (result == URL_FUNC_OK) {
- bool same;
-
- result = url_compare(c->url, url, &same);
-
- /* check that we're not attempting to
- * redirect to the same URL */
- if (result != URL_FUNC_OK || same) {
- msg_data.error =
- messages_get("BadRedirect");
- content_broadcast(c,
- CONTENT_MSG_ERROR, msg_data);
- }
- else {
- msg_data.redirect = url;
- content_broadcast(c,
- CONTENT_MSG_REDIRECT,
- msg_data);
- }
- free(url);
- } else {
- msg_data.error = messages_get("BadRedirect");
- content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
- }
+ fetchcache_redirect(c, data, size);
break;
case FETCH_NOTMODIFIED:
@@ -790,6 +761,144 @@ void fetchcache_notmodified(struct content *c, const void *data)
}
}
+/**
+ * Redirect callback handler
+ */
+
+void fetchcache_redirect(struct content *c, const void *data,
+ unsigned long size)
+{
+ char *url;
+ char *referer;
+ long http_code = fetch_http_code(c->fetch);
+ const char *ref = fetch_get_referer(c->fetch);
+ union content_msg_data msg_data;
+ url_func_result result;
+
+ /* Preconditions */
+ assert(c && data);
+ assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN);
+ /* Ensure a redirect happened */
+ assert(300 <= http_code && http_code <= 399);
+ /* 304 is handled by fetch_notmodified() */
+ assert(http_code != 304);
+
+ /* Clone referer -- original is destroyed in fetch_abort() */
+ referer = ref ? strdup(ref) : NULL;
+
+ /* set the status to ERROR so that this content is
+ * destroyed in content_clean() */
+ fetch_abort(c->fetch);
+ c->fetch = 0;
+ c->status = CONTENT_STATUS_ERROR;
+
+ /* Ensure that referer cloning succeeded
+ * _must_ be after content invalidation */
+ if (ref && !referer) {
+ LOG(("Failed cloning referer"));
+
+ msg_data.error = messages_get("BadRedirect");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+
+ return;
+ }
+
+ /** \todo 300, 305, 307
+ * More specifically:
+ * + 300 needs to serve up the fetch body to the user
+ * + 305 needs to refetch using the proxy specified in ::data
+ * + 307 needs to refetch.
+ *
+ * If the original request method was either GET or HEAD, then follow
+ * redirect unconditionally. If the original request method was neither
+ * GET nor HEAD, then the user MUST be asked what to do.
+ *
+ * Note:
+ * For backwards compatibility, all 301, 302 and 303 redirects are
+ * followed unconditionally with a GET request to the new location.
+ */
+ if (http_code != 301 && http_code != 302 && http_code != 303) {
+ LOG(("Unsupported redirect type %ld", http_code));
+
+ msg_data.error = messages_get("BadRedirect");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+
+ free(referer);
+ return;
+ }
+
+ /* Forcibly stop redirecting if we've followed too many redirects */
+#define REDIRECT_LIMIT 10
+ if (c->redirect_count > REDIRECT_LIMIT) {
+ LOG(("Too many nested redirects"));
+
+ msg_data.error = messages_get("BadRedirect");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+
+ free(referer);
+ return;
+ }
+#undef REDIRECT_LIMIT
+
+ /* redirect URLs must be absolute by HTTP/1.1, but many
+ * sites send relative ones: treat them as relative to
+ * requested URL */
+ result = url_join(data, c->url, &url);
+
+ if (result != URL_FUNC_OK) {
+ msg_data.error = messages_get("BadRedirect");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+
+ free(referer);
+ return;
+ }
+
+ /* Process users of this content */
+ while (c->user_list->next) {
+ intptr_t p1, p2;
+ void (*callback)(content_msg msg,
+ struct content *c, intptr_t p1,
+ intptr_t p2,
+ union content_msg_data data);
+ struct content *replacement;
+
+ p1 = c->user_list->next->p1;
+ p2 = c->user_list->next->p2;
+ callback = c->user_list->next->callback;
+
+ /* Remove user */
+ content_remove_user(c, callback, p1, p2);
+
+ /* Get replacement content -- HTTP GET request */
+ replacement = fetchcache(url, callback, p1, p2,
+ c->width, c->height, c->no_error_pages,
+ NULL, NULL, false, c->download);
+ if (!replacement) {
+ msg_data.error = messages_get("BadRedirect");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+
+ free(url);
+ free(referer);
+ return;
+ }
+
+ /* Set replacement's redirect count to 1 greater than ours */
+ replacement->redirect_count = c->redirect_count + 1;
+
+ /* Notify user that content has changed */
+ callback(CONTENT_MSG_NEWPTR, replacement, p1, p2, msg_data);
+
+ /* Start fetching the replacement content */
+ fetchcache_go(replacement, referer, callback, p1, p2,
+ c->width, c->height, NULL, NULL,
+ false, referer ? referer : c->url);
+ }
+
+ /* Clean up */
+ free(url);
+ free(referer);
+}
+
#ifdef TEST
#include <unistd.h>
diff --git a/css/css.c b/css/css.c
index 878be04cf..1853d272f 100644
--- a/css/css.c
+++ b/css/css.c
@@ -431,7 +431,6 @@ bool css_convert(struct content *c, int width, int height)
for (i = 0; i != HASH_SIZE; i++)
c->data.css.css->rule[i] = 0;
c->data.css.import_count = 0;
- c->data.css.import_url = 0;
c->data.css.import_content = 0;
c->data.css.origin = CSS_ORIGIN_UA;
c->active = 0;
@@ -504,11 +503,9 @@ void css_destroy(struct content *c)
/* imported stylesheets */
for (i = 0; i != c->data.css.import_count; i++)
if (c->data.css.import_content[i] != 0) {
- free(c->data.css.import_url[i]);
content_remove_user(c->data.css.import_content[i],
css_atimport_callback, (intptr_t) c, i);
}
- free(c->data.css.import_url);
free(c->data.css.import_content);
}
@@ -790,21 +787,11 @@ void css_atimport(struct content *c, struct css_node *node)
char *t, *url, *url1;
bool string = false, screen = true;
unsigned int i;
- char **import_url;
struct content **import_content;
url_func_result res;
LOG(("@import rule"));
- import_url = realloc(c->data.css.import_url,
- (c->data.css.import_count + 1) *
- sizeof(*c->data.css.import_url));
- if (!import_url) {
- /** \todo report to user */
- return;
- }
- c->data.css.import_url = import_url;
-
import_content = realloc(c->data.css.import_content,
(c->data.css.import_count + 1) *
sizeof(*c->data.css.import_content));
@@ -889,8 +876,7 @@ void css_atimport(struct content *c, struct css_node *node)
/* start the fetch */
c->data.css.import_count++;
i = c->data.css.import_count - 1;
- c->data.css.import_url[i] = url1;
- c->data.css.import_content[i] = fetchcache(c->data.css.import_url[i],
+ c->data.css.import_content[i] = fetchcache(url1,
css_atimport_callback, (intptr_t) c, i,
c->width, c->height, true, 0, 0, false, false);
if (c->data.css.import_content[i]) {
@@ -968,30 +954,6 @@ void css_atimport_callback(content_msg msg, struct content *css,
case CONTENT_MSG_STATUS:
break;
- case CONTENT_MSG_REDIRECT:
- c->active--;
- free(c->data.css.import_url[i]);
- c->data.css.import_url[i] = strdup(data.redirect);
- if (!c->data.css.import_url[i]) {
- /** \todo report to user */
- /* c->error = 1; */
- return;
- }
- c->data.css.import_content[i] = fetchcache(
- c->data.css.import_url[i],
- css_atimport_callback, (intptr_t) c, i,
- css->width, css->height, true, 0, 0,
- false, false);
- if (c->data.css.import_content[i]) {
- c->active++;
- fetchcache_go(c->data.css.import_content[i],
- c->url, css_atimport_callback,
- (intptr_t) c, i,
- css->width, css->height,
- 0, 0, false, c->url);
- }
- break;
-
case CONTENT_MSG_NEWPTR:
c->data.css.import_content[i] = css;
break;
diff --git a/css/css.h b/css/css.h
index 6767ade64..e9d374131 100644
--- a/css/css.h
+++ b/css/css.h
@@ -493,7 +493,6 @@ typedef enum {
struct content_css_data {
struct css_stylesheet *css; /**< Opaque stylesheet data. */
unsigned int import_count; /**< Number of entries in import_url. */
- char **import_url; /**< Imported stylesheet urls. */
struct content **import_content; /**< Imported stylesheet contents. */
css_origin origin; /**< Origin of stylesheet. */
};
diff --git a/debug/netsurfd.c b/debug/netsurfd.c
index b6c289cb7..d96a15da8 100644
--- a/debug/netsurfd.c
+++ b/debug/netsurfd.c
@@ -143,10 +143,6 @@ void callback(content_msg msg, struct content *c, void *p1,
done = destroyed = 1;
} else if (msg == CONTENT_MSG_STATUS)
printf("=== STATUS: %s\n", c->status_message);
- else if (msg == CONTENT_MSG_REDIRECT) {
- printf("=== REDIRECT to '%s'\n", data.redirect);
- done = destroyed = 1;
- }
}
diff --git a/desktop/browser.c b/desktop/browser.c
index 01e91846f..1f2709ec8 100644
--- a/desktop/browser.c
+++ b/desktop/browser.c
@@ -509,23 +509,6 @@ void browser_window_callback(content_msg msg, struct content *c,
browser_window_set_status(bw, c->status_message);
break;
- case CONTENT_MSG_REDIRECT:
- {
- const char *prev_url = bw->loading_content->url;
-
- bw->loading_content = 0;
- browser_window_set_status(bw,
- messages_get("Redirecting"));
- /* the spec says nothing about referrers and
- * redirects => follow Mozilla and preserve the
- * referer across the redirect */
- browser_window_go_post(bw, data.redirect, 0, 0,
- bw->history_add, bw->referer,
- bw->download, false,
- bw->referer ? bw->referer : prev_url);
- }
- break;
-
case CONTENT_MSG_REFORMAT:
if (c == bw->current_content &&
c->type == CONTENT_HTML) {
@@ -1177,7 +1160,6 @@ void download_window_callback(fetch_msg msg, void *p, const void *data,
break;
case FETCH_TYPE:
- case FETCH_REDIRECT:
case FETCH_NOTMODIFIED:
case FETCH_AUTH:
#ifdef WITH_SSL
diff --git a/render/html.c b/render/html.c
index 1068319e3..5a9ba27ef 100644
--- a/render/html.c
+++ b/render/html.c
@@ -1085,24 +1085,6 @@ void html_convert_css_callback(content_msg msg, struct content *css,
content_broadcast(c, CONTENT_MSG_STATUS, data);
break;
- case CONTENT_MSG_REDIRECT:
- c->active--;
- c->data.html.stylesheet_content[i] = fetchcache(
- data.redirect,
- html_convert_css_callback,
- (intptr_t) c, i, css->width, css->height,
- true, 0, 0, false, false);
- if (c->data.html.stylesheet_content[i]) {
- c->active++;
- fetchcache_go(c->data.html.stylesheet_content[i],
- c->url,
- html_convert_css_callback,
- (intptr_t) c, i, css->width,
- css->height, 0, 0, false,
- c->url);
- }
- break;
-
case CONTENT_MSG_NEWPTR:
c->data.html.stylesheet_content[i] = css;
break;
@@ -1169,12 +1151,6 @@ bool html_fetch_object(struct content *c, char *url, struct box *box,
return false;
}
c->data.html.object = object;
- c->data.html.object[i].url = talloc_strdup(c, url);
- if (!c->data.html.object[i].url) {
- content_remove_user(c_fetch, html_object_callback,
- (intptr_t) c, i);
- return false;
- }
c->data.html.object[i].box = box;
c->data.html.object[i].permitted_types = permitted_types;
c->data.html.object[i].background = background;
@@ -1221,8 +1197,6 @@ bool html_replace_object(struct content *c, unsigned int i, char *url,
html_object_callback, (intptr_t) c, i);
c->data.html.object[i].content = 0;
c->data.html.object[i].box->object = 0;
- talloc_free(c->data.html.object[i].url);
- c->data.html.object[i].url = 0;
}
/* initialise fetch */
@@ -1234,12 +1208,6 @@ bool html_replace_object(struct content *c, unsigned int i, char *url,
if (!c_fetch)
return false;
- c->data.html.object[i].url = talloc_strdup(c, url);
- if (!c->data.html.object[i].url) {
- content_remove_user(c_fetch, html_object_callback,
- (intptr_t) c, i);
- return false;
- }
c->data.html.object[i].content = c_fetch;
for (page = c; page; page = page->data.html.page) {
@@ -1349,35 +1317,6 @@ void html_object_callback(content_msg msg, struct content *object,
/* content_broadcast(c, CONTENT_MSG_STATUS, 0); */
break;
- case CONTENT_MSG_REDIRECT:
- c->active--;
- talloc_free(c->data.html.object[i].url);
- c->data.html.object[i].url = talloc_strdup(c,
- data.redirect);
- if (!c->data.html.object[i].url) {
- /** \todo report oom */
- } else {
- c->data.html.object[i].content = fetchcache(
- data.redirect,
- html_object_callback,
- (intptr_t) c, i, 0, 0, true,
- 0, 0, false, false);
- if (!c->data.html.object[i].content) {
- /** \todo report oom */
- } else {
- c->active++;
- fetchcache_go(c->data.html.object[i].
- content,
- c->url,
- html_object_callback,
- (intptr_t) c, i,
- 0, 0,
- 0, 0,
- false, c->url);
- }
- }
- break;
-
case CONTENT_MSG_REFORMAT:
break;
@@ -1439,7 +1378,6 @@ void html_object_callback(content_msg msg, struct content *object,
(msg == CONTENT_MSG_LOADING ||
msg == CONTENT_MSG_DONE ||
msg == CONTENT_MSG_ERROR ||
- msg == CONTENT_MSG_REDIRECT ||
msg == CONTENT_MSG_AUTH)) {
/* all objects have arrived */
content_reformat(c, c->available_width, c->height);
diff --git a/render/html.h b/render/html.h
index d21dc12d9..af6a886bf 100644
--- a/render/html.h
+++ b/render/html.h
@@ -66,7 +66,6 @@ typedef enum {
/** An object (<img>, <object>, etc.) in a CONTENT_HTML document. */
struct content_html_object {
- char *url; /**< URL of this object. */
struct content *content; /**< Content, or 0. */
struct box *box; /**< Node in box tree containing it. */
/** Pointer to array of permitted content_type, terminated by
diff --git a/riscos/plugin.c b/riscos/plugin.c
index aea4ad93d..19f0f106f 100644
--- a/riscos/plugin.c
+++ b/riscos/plugin.c
@@ -1704,14 +1704,6 @@ void plugin_stream_callback(content_msg msg, struct content *c,
plugin_STREAM_DESTROY_ERROR);
break;
- case CONTENT_MSG_REDIRECT:
- /* and re-start fetch with new URL */
- p->c = 0;
- if (!plugin_start_fetch(p, data.redirect))
- plugin_destroy_stream(p,
- plugin_STREAM_DESTROY_ERROR);
- break;
-
case CONTENT_MSG_NEWPTR:
p->c = c;
break;
@@ -1775,7 +1767,6 @@ void plugin_fetch_callback(fetch_msg msg, void *p, const void *data,
break;
case FETCH_TYPE:
- case FETCH_REDIRECT:
case FETCH_NOTMODIFIED:
case FETCH_AUTH:
#ifdef WITH_SSL
diff --git a/riscos/theme_install.c b/riscos/theme_install.c
index 76b8de703..4fa77a687 100644
--- a/riscos/theme_install.c
+++ b/riscos/theme_install.c
@@ -125,7 +125,6 @@ void theme_install_callback(content_msg msg, struct content *c,
break;
case CONTENT_MSG_LOADING:
- case CONTENT_MSG_REDIRECT:
case CONTENT_MSG_REFORMAT:
case CONTENT_MSG_REDRAW:
case CONTENT_MSG_NEWPTR: