diff options
author | John Mark Bell <jmb@netsurf-browser.org> | 2008-07-30 00:51:26 +0000 |
---|---|---|
committer | John Mark Bell <jmb@netsurf-browser.org> | 2008-07-30 00:51:26 +0000 |
commit | 307ab6675f9a7129d645bb5c25b919d25e3ca8bd (patch) | |
tree | ae51e48d616bb648bb1c5ded7303dfc7cfa291bf | |
parent | 3eabd20dcf0f030cebac90b0081b8c8f0fb4f1be (diff) | |
download | netsurf-307ab6675f9a7129d645bb5c25b919d25e3ca8bd.tar.gz netsurf-307ab6675f9a7129d645bb5c25b919d25e3ca8bd.tar.bz2 |
Rewrite parsing of <meta http-equiv="Refresh" ...> content values. The previous code was full of nasty edge cases. As an added bonus, there's some BNF documenting what we expect to support here.
svn path=/trunk/netsurf/; revision=4814
-rw-r--r-- | render/html.c | 133 |
1 files changed, 92 insertions, 41 deletions
diff --git a/render/html.c b/render/html.c index 64d3e1386..8685b6967 100644 --- a/render/html.c +++ b/render/html.c @@ -603,7 +603,7 @@ bool html_meta_refresh(struct content *c, xmlNode *head) xmlNode *n; xmlChar *equiv, *content; union content_msg_data msg_data; - char *url, *end, *refresh; + char *url, *end, *refresh = NULL, quote = 0; url_func_result res; for (n = head == 0 ? 0 : head->children; n; n = n->next) { @@ -642,6 +642,16 @@ bool html_meta_refresh(struct content *c, xmlNode *head) end = (char *) content + strlen((const char *) content); + /* content := *LWS 1*DIGIT *LWS [';' *LWS *1url *LWS] + * url := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq) + * url-nq := *urlchar + * url-sq := "'" (urlchar | '"') "'" + * url-dq := '"' (urlchar | "'") '"' + * urlchar := [#x9#x21#x23-#x26#x28-#x7E] | nonascii + * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF] + */ + + /* *LWS 1*DIGIT */ msg_data.delay = (int)strtol((char *) content, &url, 10); /* a very small delay and self-referencing URL can cause a loop * that grinds machines to a halt. To prevent this we set a @@ -649,6 +659,20 @@ bool html_meta_refresh(struct content *c, xmlNode *head) if (msg_data.delay < 1) msg_data.delay = 1; + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } + + /* ';' */ + if (url < end && *url == ';') + url++; + + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } + if (url == end) { /* Just delay specified, so refresh current page */ xmlFree(content); @@ -665,62 +689,89 @@ bool html_meta_refresh(struct content *c, xmlNode *head) break; } - for ( ; url <= end - 4; url++) { - if (!strncasecmp(url, "url=", 4)) { - url += 4; - break; + /* "url" */ + if (url <= end - 3) { + if (strncasecmp(url, "url", 3) == 0) { + url += 3; + } else { + /* Unexpected input, ignore this header */ + continue; } + } else { + /* Insufficient input, ignore this header */ + continue; } - /* various sites contain junk meta refresh URL components, - * so attempt to deal with this by stripping likely garbage - * from the beginning and end of URLs */ - while (url < end) { - if (isspace(*url) || *url == '\'' || *url == '"') + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } + + /* '=' */ + if (url < end) { + if (*url == '=') { url++; - else - break; + } else { + /* Unexpected input, ignore this header */ + continue; + } + } else { + /* Insufficient input, ignore this header */ + continue; } - while (end > url) { - if (isspace(end[-1]) || end[-1] == '\'' || - end[-1] == '"') - *--end = '\0'; - else - break; + /* *LWS */ + while (url < end && isspace(*url)) { + url++; } - if (url < end) { - res = url_join(url, c->data.html.base_url, &refresh); + /* '"' or "'" */ + if (url < end && (*url == '"' || *url == '\'')) { + quote = *url; + url++; + } - xmlFree(content); + /* Start of URL */ + refresh = url; - if (res == URL_FUNC_NOMEM) { - msg_data.error = messages_get("NoMemory"); - content_broadcast(c, - CONTENT_MSG_ERROR, msg_data); - return false; - } else if (res == URL_FUNC_FAILED) { - /* This isn't fatal so carry on looking */ - continue; - } + if (quote != 0) { + /* url-sq | url-dq */ + while (url < end && *url != quote) + url++; + } else { + /* url-nq */ + while (url < end && !isspace(*url)) + url++; + } - c->refresh = talloc_strdup(c, refresh); + /* '"' or "'" or *LWS (we don't care) */ + if (url < end) + *url = '\0'; - free(refresh); + res = url_join(refresh, c->data.html.base_url, &refresh); - if (!c->refresh) { - msg_data.error = messages_get("NoMemory"); - content_broadcast(c, - CONTENT_MSG_ERROR, msg_data); - return false; - } + xmlFree(content); - content_broadcast(c, CONTENT_MSG_REFRESH, msg_data); - break; + if (res == URL_FUNC_NOMEM) { + msg_data.error = messages_get("NoMemory"); + content_broadcast(c, CONTENT_MSG_ERROR, msg_data); + return false; + } else if (res == URL_FUNC_FAILED) { + /* This isn't fatal so carry on looking */ + continue; } - xmlFree(content); + c->refresh = talloc_strdup(c, refresh); + + free(refresh); + + if (!c->refresh) { + msg_data.error = messages_get("NoMemory"); + content_broadcast(c, CONTENT_MSG_ERROR, msg_data); + return false; + } + + content_broadcast(c, CONTENT_MSG_REFRESH, msg_data); } return true; |