summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--content/fetch.c15
-rw-r--r--content/fetchcache.c1
-rw-r--r--css/css.c1
-rw-r--r--desktop/browser.c19
-rw-r--r--desktop/browser.h6
-rw-r--r--desktop/loginlist.c7
-rw-r--r--desktop/netsurf.c2
-rw-r--r--makefile8
-rw-r--r--render/box.c1
-rw-r--r--render/html.c7
-rw-r--r--riscos/401login.c3
-rw-r--r--riscos/about.c128
-rw-r--r--riscos/gui.c8
-rw-r--r--riscos/save_complete.c50
-rw-r--r--riscos/url_protocol.c (renamed from riscos/url.c)2
-rw-r--r--riscos/url_protocol.h (renamed from riscos/url.h)0
-rw-r--r--riscos/window.c23
-rw-r--r--utils/url.c437
-rw-r--r--utils/url.h20
-rw-r--r--utils/utils.c105
-rw-r--r--utils/utils.h4
21 files changed, 551 insertions, 296 deletions
diff --git a/content/fetch.c b/content/fetch.c
index 295324484..6d35cb29c 100644
--- a/content/fetch.c
+++ b/content/fetch.c
@@ -26,7 +26,6 @@
#include <strings.h>
#include <time.h>
#include "curl/curl.h"
-#include "libxml/uri.h"
#include "netsurf/utils/config.h"
#include "netsurf/content/fetch.h"
#ifdef riscos
@@ -41,6 +40,7 @@
#endif
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
+#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
@@ -174,19 +174,12 @@ struct fetch * fetch_start(char *url, char *referer,
struct fetch *fetch = xcalloc(1, sizeof(*fetch)), *host_fetch;
CURLcode code;
CURLMcode codem;
- xmlURI *uri;
#ifdef WITH_AUTH
struct login *li;
#endif
LOG(("fetch %p, url '%s'", fetch, url));
- uri = xmlParseURI(url);
- if (uri == 0) {
- LOG(("warning: failed to parse url"));
- return 0;
- }
-
/* construct a new fetch structure */
fetch->callback = callback;
fetch->had_headers = false;
@@ -199,9 +192,7 @@ struct fetch * fetch_start(char *url, char *referer,
fetch->referer = xstrdup(referer);
fetch->p = p;
fetch->headers = 0;
- fetch->host = 0;
- if (uri->server != 0)
- fetch->host = xstrdup(uri->server);
+ fetch->host = url_host(url);
fetch->content_length = 0;
#ifdef WITH_POST
fetch->post_urlenc = 0;
@@ -216,8 +207,6 @@ struct fetch * fetch_start(char *url, char *referer,
fetch->prev = 0;
fetch->next = 0;
- xmlFreeURI(uri);
-
/* look for a fetch from the same host */
if (fetch->host != 0) {
for (host_fetch = fetch_list;
diff --git a/content/fetchcache.c b/content/fetchcache.c
index 3639c39a0..21b50ee0b 100644
--- a/content/fetchcache.c
+++ b/content/fetchcache.c
@@ -24,6 +24,7 @@
#include "netsurf/content/fetch.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
+#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
diff --git a/css/css.c b/css/css.c
index e03e0c107..ee125da19 100644
--- a/css/css.c
+++ b/css/css.c
@@ -21,6 +21,7 @@
#include "netsurf/desktop/gui.h"
#endif
#include "netsurf/utils/log.h"
+#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
/**
diff --git a/desktop/browser.c b/desktop/browser.c
index d75b1c33f..6aee554ad 100644
--- a/desktop/browser.c
+++ b/desktop/browser.c
@@ -30,6 +30,7 @@
#include "netsurf/render/layout.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
+#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
@@ -152,12 +153,16 @@ void browser_window_go_post(struct browser_window *bw, const char *url,
browser_window_set_status(bw, messages_get("Loading"));
bw->history_add = history_add;
bw->time0 = clock();
- c = fetchcache(url, 0,
- browser_window_callback, bw, 0,
- gui_window_get_width(bw->window), 0,
- false,
- post_urlenc, post_multipart,
- true);
+ if (strncmp(url, "about:", 6) == 0)
+ c = about_create(url, browser_window_callback, bw, 0,
+ gui_window_get_width(bw->window), 0);
+ else
+ c = fetchcache(url, 0,
+ browser_window_callback, bw, 0,
+ gui_window_get_width(bw->window), 0,
+ false,
+ post_urlenc, post_multipart,
+ true);
if (!c) {
browser_window_set_status(bw, messages_get("FetchFailed"));
return;
@@ -1743,6 +1748,8 @@ void browser_form_submit(struct browser_window *bw, struct form *form,
case method_POST_MULTIPART:
url = url_join(form->action, base);
+ if (!url)
+ break;
browser_window_go_post(bw, url, 0, success, true);
break;
diff --git a/desktop/browser.h b/desktop/browser.h
index e25b26fbe..072201766 100644
--- a/desktop/browser.h
+++ b/desktop/browser.h
@@ -119,4 +119,10 @@ void history_destroy(struct history *history);
void history_back(struct browser_window *bw, struct history *history);
void history_forward(struct browser_window *bw, struct history *history);
+/* In platform specific about.c. */
+struct content *about_create(const char *url,
+ void (*callback)(content_msg msg, struct content *c, void *p1,
+ void *p2, const char *error),
+ void *p1, void *p2, unsigned long width, unsigned long height);
+
#endif
diff --git a/desktop/loginlist.c b/desktop/loginlist.c
index 45d4c0684..85f9e995e 100644
--- a/desktop/loginlist.c
+++ b/desktop/loginlist.c
@@ -12,6 +12,7 @@
#include "netsurf/utils/config.h"
#include "netsurf/desktop/401login.h"
#include "netsurf/utils/log.h"
+#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
#ifdef WITH_AUTH
@@ -30,7 +31,7 @@ static struct login *loginlist = &login;
void login_list_add(char *host, char* logindets) {
struct login *nli = xcalloc(1, sizeof(*nli));
- char *temp = get_host_from_url(host);
+ char *temp = url_host(host);
char *i;
assert(temp);
@@ -81,7 +82,7 @@ struct login *login_list_get(char *host) {
(strncasecmp(host, "https://", 8) != 0))
return NULL;
- temphost = get_host_from_url(host);
+ temphost = url_host(host);
assert(temphost);
temp = xstrdup(host);
@@ -89,7 +90,7 @@ struct login *login_list_get(char *host) {
* So make sure we've got that at least
*/
if (strlen(temphost) > strlen(temp)) {
- temp = get_host_from_url(host);
+ temp = url_host(host);
assert(temp);
}
diff --git a/desktop/netsurf.c b/desktop/netsurf.c
index 724b57415..02a29c3f5 100644
--- a/desktop/netsurf.c
+++ b/desktop/netsurf.c
@@ -17,6 +17,7 @@
#include "netsurf/desktop/browser.h"
#include "netsurf/desktop/gui.h"
#include "netsurf/utils/log.h"
+#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
bool netsurf_quit = false;
@@ -63,6 +64,7 @@ void netsurf_init(int argc, char** argv)
#ifdef WITH_GIF
nsgif_init();
#endif
+ url_init();
}
diff --git a/makefile b/makefile
index f7101c6a9..b64bfc600 100644
--- a/makefile
+++ b/makefile
@@ -9,7 +9,7 @@ CC_DEBUG = gcc
OBJECTS_COMMON = cache.o content.o fetch.o fetchcache.o other.o \
css.o css_enum.o parser.o ruleset.o scanner.o \
box.o form.o html.o layout.o textplain.o \
- messages.o utils.o translit.o pool.o
+ messages.o utils.o translit.o pool.o url.o
OBJECTS = $(OBJECTS_COMMON) \
browser.o loginlist.o netsurf.o options.o \
htmlinstance.o htmlredraw.o \
@@ -17,7 +17,7 @@ OBJECTS = $(OBJECTS_COMMON) \
menus.o mouseactions.o \
textselection.o theme.o window.o \
draw.o gif.o jpeg.o plugin.o png.o sprite.o \
- about.o filetype.o font.o uri.o url.o history.o \
+ about.o filetype.o font.o uri.o url_protocol.o history.o \
version.o save_draw.o save_complete.o thumbnail.o save.o
OBJECTS_DEBUG = $(OBJECTS_COMMON) \
netsurfd.o \
@@ -39,8 +39,8 @@ CFLAGS = -std=c9x -D_BSD_SOURCE -Driscos -DBOOL_DEFINED -O $(WARNFLAGS) -I.. \
-mpoke-function-name
CFLAGS_DEBUG = -std=c9x -D_BSD_SOURCE $(WARNFLAGS) -I.. -I/usr/include/libxml2 -g
LDFLAGS = -L/riscos/lib -lxml2 -lz -lcurl -lssl -lcrypto -lares -lanim -lpng \
- -lifc -loslib -luri -ljpeg
-LDFLAGS_DEBUG = -L/usr/lib -lxml2 -lz -lm -lcurl -lssl -lcrypto -ldl -luri
+ -lifc -loslib -ljpeg
+LDFLAGS_DEBUG = -L/usr/lib -lxml2 -lz -lm -lcurl -lssl -lcrypto -ldl
OBJDIR = $(shell $(CC) -dumpmachine)
SOURCES=$(OBJECTS:.o=.c)
diff --git a/render/box.c b/render/box.c
index fb270e6ad..4ee3ec95b 100644
--- a/render/box.c
+++ b/render/box.c
@@ -32,6 +32,7 @@
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
#include "netsurf/utils/pool.h"
+#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
diff --git a/render/html.c b/render/html.c
index 2eccc78d7..d1c2cafa3 100644
--- a/render/html.c
+++ b/render/html.c
@@ -22,9 +22,10 @@
#endif
#include "netsurf/render/html.h"
#include "netsurf/render/layout.h"
-#include "netsurf/utils/utils.h"
-#include "netsurf/utils/messages.h"
#include "netsurf/utils/log.h"
+#include "netsurf/utils/messages.h"
+#include "netsurf/utils/url.h"
+#include "netsurf/utils/utils.h"
#define CHUNK 4096
@@ -220,7 +221,7 @@ void html_head(struct content *c, xmlNode *head)
} else if (strcmp(node->name, "base") == 0) {
char *href = (char *) xmlGetProp(node, (const xmlChar *) "href");
if (href) {
- char *url = url_join(href, 0);
+ char *url = url_normalize(href);
if (url) {
free(c->data.html.base_url);
c->data.html.base_url = url;
diff --git a/riscos/401login.c b/riscos/401login.c
index b46d115d0..c96fcd2c1 100644
--- a/riscos/401login.c
+++ b/riscos/401login.c
@@ -17,6 +17,7 @@
#include "netsurf/riscos/gui.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
+#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
#ifdef WITH_AUTH
@@ -65,7 +66,7 @@ void gui_401login_open(struct browser_window *bw, struct content *c, char *realm
char *murl, *host;
murl = c->url;
- host = get_host_from_url(murl);
+ host = url_host(murl);
assert(host);
bwin = bw;
diff --git a/riscos/about.c b/riscos/about.c
index cfec2733a..89f39506c 100644
--- a/riscos/about.c
+++ b/riscos/about.c
@@ -16,20 +16,18 @@
#include <stdio.h>
#include <time.h>
#include <unixlib/local.h> /* for __unixify */
-
-#include "netsurf/utils/config.h"
-#include "netsurf/desktop/netsurf.h"
-#include "netsurf/riscos/about.h"
-#include "netsurf/utils/log.h"
-#include "netsurf/utils/messages.h"
-#include "netsurf/utils/utils.h"
-
#include "oslib/fileswitch.h"
#include "oslib/osargs.h"
#include "oslib/osfile.h"
#include "oslib/osfind.h"
#include "oslib/osfscontrol.h"
#include "oslib/osgbpb.h"
+#include "netsurf/utils/config.h"
+#include "netsurf/desktop/browser.h"
+#include "netsurf/desktop/netsurf.h"
+#include "netsurf/utils/log.h"
+#include "netsurf/utils/messages.h"
+#include "netsurf/utils/utils.h"
#ifdef WITH_ABOUT
@@ -41,81 +39,53 @@ static const char *paboutpl3 = "<tr valign=\"top\"><td width=\"30%%\"><font size
static const char *pabtplgft = "</table>"; /**< Plugin table footer */
static const char *paboutftr = "</div></body></html>"; /**< Page footer */
-/** The about page */
-struct about_page {
-
- char *header; /**< page header */
- char *browser; /**< browser details */
- char *plghead; /**< plugin header */
- struct plugd *plugd; /**< plugin details list */
- char *plgfoot; /**< plugin footer */
- char *footer; /**< page footer */
-};
-
-/** A set of plugin details */
-struct plugd {
- char *details; /**< plugin details */
- struct plugd *next; /**< next plugin details */
-};
-
-struct plugd *new_plugin(struct plugd *pd, char* details);
/**
- * Adds a plugin's details to the head of the linked list of plugin details
- * Returns the new head of the list
+ * Create the browser about page.
+ *
+ * \param url requested url (about:...)
+ * \param callback content callback function, for content_add_user()
+ * \param p1 user parameter for callback
+ * \param p2 user parameter for callback
+ * \param width available width
+ * \param height available height
+ * \return a new content containing the about page
*/
-struct plugd *new_plugin(struct plugd *pd, char* details) {
-
- struct plugd *np = xcalloc(1, sizeof(*np));
- np->details = 0;
- np->details = details;
-
- np->next = pd;
- return np;
-}
-
-/**
- * Creates the about page and stores it in <Wimp$ScrapDir>.WWW.Netsurf
- */
-void about_create(void) {
-
- struct about_page *abt;
- struct plugd *temp;
+struct content *about_create(const char *url,
+ void (*callback)(content_msg msg, struct content *c, void *p1,
+ void *p2, const char *error),
+ void *p1, void *p2, unsigned long width, unsigned long height)
+{
+ struct content *c = 0;
FILE *fp;
char *buf, *val, var[20], *ptype, *pdetails, *fname, *furl;
int i, nofiles, j, w, h, size;
fileswitch_object_type fot;
os_error *e;
+ const char *params[] = { 0 };
- abt = (struct about_page*)xcalloc(1, sizeof(*abt));
- abt->plugd = 0;
+ c = content_create(url);
+ c->width = width;
+ c->height = height;
+ content_add_user(c, callback, p1, p2);
+ content_set_type(c, CONTENT_HTML, "text/html", params);
/* Page header */
buf = xcalloc(strlen(pabouthdr) + 50, sizeof(char));
snprintf(buf, strlen(pabouthdr) + 50, pabouthdr, "About NetSurf",
netsurf_version);
- abt->header = xstrdup(buf);
- xfree(buf);
+ content_process_data(c, buf, strlen(buf));
+ free(buf);
/* browser details */
- xosfile_read_stamped_no_path("<NetSurf$Dir>.About.About",0,0,0,&i,0,0);
- fp = fopen("<NetSurf$Dir>.About.About", "r");
- buf = xcalloc((unsigned int)i + 10, sizeof(char));
- fread(buf, sizeof(char), (unsigned int)i, fp);
- fclose(fp);
- abt->browser = xstrdup(buf);
- xfree(buf);
+ buf = load("<NetSurf$Dir>.About.About");
+ content_process_data(c, buf, strlen(buf));
+ free(buf);
/* plugin header */
- abt->plghead = xstrdup(pabtplghd);
-
- /* plugin footer */
- abt->plgfoot = xstrdup(pabtplgft);
-
- /* Page footer */
- abt->footer = xstrdup(paboutftr);
+ content_process_data(c, pabtplghd, strlen(pabtplghd));
/* plugins registered */
for (i=0; i!=4096; i++) {
@@ -172,7 +142,7 @@ void about_create(void) {
furl = xcalloc(strlen(paboutpl1) + strlen(ptype) + strlen(pdetails) + 10, sizeof(char));
sprintf(furl, paboutpl1, ptype, pdetails);
LOG(("furl: %s", furl));
- abt->plugd = new_plugin(abt->plugd, furl);
+ content_process_data(c, furl, strlen(furl));
xfree(pdetails);
continue;
}
@@ -214,7 +184,7 @@ void about_create(void) {
furl = xcalloc(strlen(paboutpl3) + strlen(ptype) + strlen(buf) +
strlen(pdetails) + 10, sizeof(char));
sprintf(furl, paboutpl3, ptype, buf, ptype, w, h, pdetails);
- abt->plugd = new_plugin(abt->plugd, furl);
+ content_process_data(c, furl, strlen(furl));
xfree(pdetails);
continue;
}
@@ -230,7 +200,7 @@ void about_create(void) {
furl = xcalloc(strlen(paboutpl2) + strlen(ptype) + strlen(fname) + strlen(pdetails) + 10, sizeof(char));
sprintf(furl, paboutpl2, ptype, fname, ptype, pdetails);
- abt->plugd = new_plugin(abt->plugd, furl);
+ content_process_data(c, furl, strlen(furl));
xfree(fname);
xfree(pdetails);
}
@@ -241,29 +211,15 @@ void about_create(void) {
}
}
- /* write file */
- xosfile_create_dir("<Wimp$ScrapDir>.WWW", 77);
- xosfile_create_dir("<Wimp$ScrapDir>.WWW.NetSurf", 77);
-
- fp = fopen("<Wimp$ScrapDir>.WWW.Netsurf.About", "w+");
- fprintf(fp, "%s", abt->header);
- fprintf(fp, "%s", abt->browser);
- fprintf(fp, "%s", abt->plghead);
- while (abt->plugd != 0) {
- fprintf(fp, "%s", abt->plugd->details);
- temp = abt->plugd;
- abt->plugd = abt->plugd->next;
- xfree(temp);
- }
- fprintf(fp, "%s", abt->plgfoot);
- fprintf(fp, "%s", abt->footer);
- fclose(fp);
+ /* plugin footer */
+ content_process_data(c, pabtplgft, strlen(pabtplgft));
- xosfile_set_type("<Wimp$ScrapDir>.WWW.NetSurf.About", 0xfaf);
+ /* Page footer */
+ content_process_data(c, paboutftr, strlen(paboutftr));
- xfree(abt);
+ content_convert(c, c->width, c->height);
- return;
+ return c;
}
#ifdef WITH_COOKIES
diff --git a/riscos/gui.c b/riscos/gui.c
index dd976ee4e..a325aebf2 100644
--- a/riscos/gui.c
+++ b/riscos/gui.c
@@ -29,9 +29,6 @@
#include "netsurf/render/font.h"
#include "netsurf/render/form.h"
#include "netsurf/render/html.h"
-#ifdef WITH_ABOUT
-#include "netsurf/riscos/about.h"
-#endif
#include "netsurf/riscos/constdata.h"
#include "netsurf/riscos/gui.h"
#include "netsurf/riscos/options.h"
@@ -43,7 +40,7 @@
#include "netsurf/riscos/uri.h"
#endif
#ifdef WITH_URL
-#include "netsurf/riscos/url.h"
+#include "netsurf/riscos/url_protocol.h"
#endif
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
@@ -257,9 +254,6 @@ void ro_gui_icon_bar_create(void)
void gui_quit(void)
{
-#ifdef WITH_ABOUT
- about_quit();
-#endif
ro_gui_history_quit();
wimp_close_down(task_handle);
xhourglass_off();
diff --git a/riscos/save_complete.c b/riscos/save_complete.c
index a97465679..08baffbd8 100644
--- a/riscos/save_complete.c
+++ b/riscos/save_complete.c
@@ -6,13 +6,8 @@
*/
#include <string.h>
-
#include <unixlib/local.h> /* for __riscosify */
-
-#include <uri.h> /* possibly just have accessor methods in utils.c */
-
#include "oslib/osfile.h"
-
#include "netsurf/utils/config.h"
#include "netsurf/content/content.h"
#include "netsurf/css/css.h"
@@ -30,7 +25,6 @@
*/
void save_imported_sheets(struct content *c, int parent, int level, char *p, char* fn);
-char* get_filename(char * url);
/* this is temporary. */
const char * const SAVE_PATH = "<NetSurf$Dir>.savetest.";
@@ -46,7 +40,7 @@ void save_complete(struct content *c) {
return;
}
- fname = get_filename(c->data.html.base_url);
+ fname = "test"; /*get_filename(c->data.html.base_url);*/
if (!fname) { /* no path -> exit */
return;
@@ -135,46 +129,4 @@ void save_imported_sheets(struct content *c, int parent, int level, char *p, cha
}
}
-char* get_filename(char * url) {
-
- char *ret = 0, *offs;
- uri_t *uri;
-
- uri = uri_alloc(url, (int)strlen(url));
-
- if (!uri) {
- return 0;
- }
-
- if (uri->path) {
- /* Two possible cases here:
- * a) no page name given (eg http://www.blah.com/) -> index.html
- * b) page name given
- */
- /* case a */
- if (strlen(uri->path) == 0) {
- ret = xstrdup("index.html");
- }
- /* case b */
- else {
- offs = strrchr(uri->path, '/');
- if (!offs) {
- ret = xstrdup(uri->path);
- }
- else {
- ret = xstrdup(offs+1);
- }
- }
- }
-
- uri_free(uri);
-
- offs = xcalloc(strlen(ret)+1, sizeof(char));
-
- __riscosify(ret, 0, 0, offs, strlen(ret)+1, 0);
-
- xfree(ret);
-
- return offs;
-}
#endif
diff --git a/riscos/url.c b/riscos/url_protocol.c
index e1ee94d47..1553d17ac 100644
--- a/riscos/url.c
+++ b/riscos/url_protocol.c
@@ -15,7 +15,7 @@
#include "netsurf/riscos/theme.h"
#include "netsurf/desktop/gui.h"
#include "netsurf/riscos/gui.h"
-#include "netsurf/riscos/url.h"
+#include "netsurf/riscos/url_protocol.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/utils.h"
diff --git a/riscos/url.h b/riscos/url_protocol.h
index 01b99b7c1..01b99b7c1 100644
--- a/riscos/url.h
+++ b/riscos/url_protocol.h
diff --git a/riscos/window.c b/riscos/window.c
index 80cd80b02..fa8b2e1d7 100644
--- a/riscos/window.c
+++ b/riscos/window.c
@@ -27,6 +27,7 @@
#include "netsurf/riscos/theme.h"
#include "netsurf/riscos/thumbnail.h"
#include "netsurf/utils/log.h"
+#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
gui_window *window_list = 0;
@@ -676,6 +677,7 @@ bool ro_gui_window_keypress(gui_window *g, int key, bool toolbar)
struct content *content = g->data.browser.bw->current_content;
wimp_window_state state;
int y;
+ char *url;
assert(g->type == GUI_BROWSER_WINDOW);
@@ -744,22 +746,11 @@ bool ro_gui_window_keypress(gui_window *g, int key, bool toolbar)
case wimp_KEY_RETURN:
if (!toolbar)
break;
- else {
- char *url = xcalloc(1, 10 + strlen(g->url));
- char *url2;
- if (g->url[strspn(g->url, "abcdefghijklmnopqrstuvwxyz")] != ':') {
- strcpy(url, "http://");
- strcpy(url + 7, g->url);
- } else {
- strcpy(url, g->url);
- }
- url2 = url_join(url, 0);
- free(url);
- if (url2) {
- gui_window_set_url(g, url2);
- browser_window_go(g->data.browser.bw, url2);
- free(url2);
- }
+ url = url_normalize(g->url);
+ if (url) {
+ gui_window_set_url(g, url);
+ browser_window_go(g->data.browser.bw, url);
+ free(url);
}
return true;
diff --git a/utils/url.c b/utils/url.c
new file mode 100644
index 000000000..c22144495
--- /dev/null
+++ b/utils/url.c
@@ -0,0 +1,437 @@
+/*
+ * This file is part of NetSurf, http://netsurf.sourceforge.net/
+ * Licensed under the GNU General Public License,
+ * http://www.opensource.org/licenses/gpl-license
+ * Copyright 2004 James Bursa <bursa@users.sourceforge.net>
+ */
+
+/** \file
+ * URL parsing and joining (implementation).
+ */
+
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <regex.h>
+#include "netsurf/utils/log.h"
+#include "netsurf/utils/url.h"
+#include "netsurf/utils/utils.h"
+
+
+regex_t url_re, url_up_re;
+
+/**
+ * Initialise URL routines.
+ *
+ * Compiles regular expressions required by the url_ functions.
+ */
+
+void url_init(void)
+{
+ /* regex from RFC 2396 */
+ regcomp_wrapper(&url_re, "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)"
+ "(\\?([^#]*))?(#(.*))?$", REG_EXTENDED);
+ regcomp_wrapper(&url_up_re,
+ "/(|[^/]|[.][^./]|[^./][.]|[^/][^/][^/]+)/[.][.](/|$)",
+ REG_EXTENDED);
+}
+
+
+/**
+ * Normalize a URL.
+ *
+ * \param url an absolute URL
+ * \return cleaned up url, allocated on the heap, or 0 on failure
+ *
+ * If there is no scheme, http:// is added. The scheme and host are
+ * lower-cased. Default ports are removed (http only). An empty path is
+ * replaced with "/". Characters are unescaped if safe.
+ */
+
+char *url_normalize(const char *url)
+{
+ char c;
+ char *res = 0;
+ int m;
+ int i;
+ int len;
+ bool http = false;
+ regmatch_t match[10];
+
+ m = regexec(&url_re, url, 10, match, 0);
+ if (m) {
+ LOG(("url '%s' failed to match regex", url));
+ return 0;
+ }
+
+ len = strlen(url);
+
+ if (match[1].rm_so == -1) {
+ /* scheme missing: add http:// and reparse */
+ LOG(("scheme missing: using http"));
+ res = malloc(strlen(url) + 13);
+ if (!res) {
+ LOG(("malloc failed"));
+ return 0;
+ }
+ strcpy(res, "http://");
+ strcpy(res + 7, url);
+ m = regexec(&url_re, res, 10, match, 0);
+ if (m) {
+ LOG(("url '%s' failed to match regex", res));
+ free(res);
+ return 0;
+ }
+ len += 7;
+ } else {
+ res = malloc(len + 6);
+ if (!res) {
+ LOG(("strdup failed"));
+ return 0;
+ }
+ strcpy(res, url);
+ }
+
+ /*for (unsigned int i = 0; i != 10; i++) {
+ if (match[i].rm_so == -1)
+ continue;
+ fprintf(stderr, "%i: '%.*s'\n", i,
+ match[i].rm_eo - match[i].rm_so,
+ res + match[i].rm_so);
+ }*/
+
+ /* see RFC 2616 section 3.2.3 */
+ /* make scheme lower-case */
+ if (match[2].rm_so != -1) {
+ for (i = match[2].rm_so; i != match[2].rm_eo; i++)
+ res[i] = tolower(res[i]);
+ if (match[2].rm_eo == 4 && res[0] == 'h' && res[1] == 't' &&
+ res[2] == 't' && res[3] == 'p')
+ http = true;
+ }
+
+ /* make empty path into "/" */
+ if (match[5].rm_so != -1 && match[5].rm_so == match[5].rm_eo) {
+ memmove(res + match[5].rm_so + 1, res + match[5].rm_so,
+ len - match[5].rm_so + 1);
+ res[match[5].rm_so] = '/';
+ len++;
+ }
+
+ /* make host lower-case */
+ if (match[4].rm_so != -1) {
+ for (i = match[4].rm_so; i != match[4].rm_eo; i++) {
+ if (res[i] == ':') {
+ if (http && res[i + 1] == '8' &&
+ res[i + 2] == '0' &&
+ i + 3 == match[4].rm_eo) {
+ memmove(res + i, res + i + 3,
+ len - match[4].rm_eo);
+ len -= 3;
+ res[len] = '\0';
+ } else if (i + 1 == match[4].rm_eo) {
+ memmove(res + i, res + i + 1,
+ len - match[4].rm_eo);
+ len--;
+ res[len] = '\0';
+ }
+ break;
+ }
+ res[i] = tolower(res[i]);
+ }
+ }
+
+ /* unescape non-"reserved" escaped characters */
+ for (i = 0; i != len; i++) {
+ if (res[i] != '%')
+ continue;
+ c = tolower(res[i + 1]);
+ if ('0' <= c && c <= '9')
+ m = 16 * (c - '0');
+ else if ('a' <= c && c <= 'f')
+ m = 16 * (c - 'a' + 10);
+ else
+ continue;
+ c = tolower(res[i + 2]);
+ if ('0' <= c && c <= '9')
+ m += c - '0';
+ else if ('a' <= c && c <= 'f')
+ m += c - 'a' + 10;
+ else
+ continue;
+
+ if (m <= 0x20 || strchr(";/?:@&=+$," "<>#%\""
+ "{}|\\^[]`", m)) {
+ i += 2;
+ continue;
+ }
+
+ res[i] = m;
+ memmove(res + i + 1, res + i + 3, len - i - 2);
+ len -= 2;
+ }
+
+ return res;
+}
+
+
+/**
+ * Resolve a relative URL to absolute form.
+ *
+ * \param rel relative URL
+ * \param base base URL, must be absolute and cleaned as by url_normalize()
+ * \return an absolute URL, allocated on the heap, or 0 on failure
+ */
+
+char *url_join(const char *rel, const char *base)
+{
+ int m;
+ int i, j;
+ char *buf = 0;
+ char *res;
+ const char *scheme = 0, *authority = 0, *path = 0, *query = 0,
+ *fragment = 0;
+ int scheme_len = 0, authority_len = 0, path_len = 0, query_len = 0,
+ fragment_len = 0;
+ regmatch_t base_match[10];
+ regmatch_t rel_match[10];
+ regmatch_t up_match[3];
+
+ /* see RFC 2396 section 5.2 */
+ m = regexec(&url_re, base, 10, base_match, 0);
+ if (m) {
+ LOG(("base url '%s' failed to match regex", base));
+ return 0;
+ }
+ /*for (unsigned int i = 0; i != 10; i++) {
+ if (base_match[i].rm_so == -1)
+ continue;
+ fprintf(stderr, "%i: '%.*s'\n", i,
+ base_match[i].rm_eo - base_match[i].rm_so,
+ base + base_match[i].rm_so);
+ }*/
+ if (base_match[2].rm_so == -1) {
+ LOG(("base url '%s' is not absolute", base));
+ return 0;
+ }
+ scheme = base + base_match[2].rm_so;
+ scheme_len = base_match[2].rm_eo - base_match[2].rm_so;
+ if (base_match[4].rm_so != -1) {
+ authority = base + base_match[4].rm_so;
+ authority_len = base_match[4].rm_eo - base_match[4].rm_so;
+ }
+ path = base + base_match[5].rm_so;
+ path_len = base_match[5].rm_eo - base_match[5].rm_so;
+
+ /* 1) */
+ m = regexec(&url_re, rel, 10, rel_match, 0);
+ if (m) {
+ LOG(("relative url '%s' failed to match regex", rel));
+ return 0;
+ }
+
+ /* 2) */
+ if (rel_match[5].rm_so == rel_match[5].rm_eo &&
+ rel_match[2].rm_so == -1 &&
+ rel_match[4].rm_so == -1 &&
+ rel_match[6].rm_so == -1) {
+ goto step7;
+ }
+ if (rel_match[7].rm_so != -1) {
+ query = rel + rel_match[7].rm_so;
+ query_len = rel_match[7].rm_eo - rel_match[7].rm_so;
+ }
+ if (rel_match[9].rm_so != -1) {
+ fragment = rel + rel_match[9].rm_so;
+ fragment_len = rel_match[9].rm_eo - rel_match[9].rm_so;
+ }
+
+ /* 3) */
+ if (rel_match[2].rm_so != -1) {
+ scheme = rel + rel_match[2].rm_so;
+ scheme_len = rel_match[2].rm_eo - rel_match[2].rm_so;
+ authority = 0;
+ authority_len = 0;
+ if (rel_match[4].rm_so != -1) {
+ authority = rel + rel_match[4].rm_so;
+ authority_len = rel_match[4].rm_eo - rel_match[4].rm_so;
+ }
+ path = rel + rel_match[5].rm_so;
+ path_len = rel_match[5].rm_eo - rel_match[5].rm_so;
+ goto step7;
+ }
+
+ /* 4) */
+ if (rel_match[4].rm_so != -1) {
+ authority = rel + rel_match[4].rm_so;
+ authority_len = rel_match[4].rm_eo - rel_match[4].rm_so;
+ path = rel + rel_match[5].rm_so;
+ path_len = rel_match[5].rm_eo - rel_match[5].rm_so;
+ goto step7;
+ }
+
+ /* 5) */
+ if (rel[rel_match[5].rm_so] == '/') {
+ path = rel + rel_match[5].rm_so;
+ path_len = rel_match[5].rm_eo - rel_match[5].rm_so;
+ goto step7;
+ }
+
+ /* 6) */
+ buf = malloc(path_len + rel_match[5].rm_eo + 10);
+ if (!buf) {
+ LOG(("malloc failed"));
+ return 0;
+ }
+ /* a) */
+ strncpy(buf, path, path_len);
+ for (; path_len != 0 && buf[path_len - 1] != '/'; path_len--)
+ ;
+ /* b) */
+ strncpy(buf + path_len, rel + rel_match[5].rm_so,
+ rel_match[5].rm_eo - rel_match[5].rm_so);
+ path_len += rel_match[5].rm_eo - rel_match[5].rm_so;
+ /* c) */
+ buf[path_len] = 0;
+ for (i = j = 0; j != path_len; ) {
+ if (j && buf[j - 1] == '/' && buf[j] == '.' &&
+ buf[j + 1] == '/')
+ j += 2;
+ else
+ buf[i++] = buf[j++];
+ }
+ path_len = i;
+ /* d) */
+ if (buf[path_len - 2] == '/' && buf[path_len - 1] == '.')
+ path_len--;
+ /* e) and f) */
+ while (1) {
+ buf[path_len] = 0;
+ m = regexec(&url_up_re, buf, 3, up_match, 0);
+ if (m)
+ break;
+ if (up_match[1].rm_eo + 4 <= path_len) {
+ memmove(buf + up_match[1].rm_so,
+ buf + up_match[1].rm_eo + 4,
+ path_len - up_match[1].rm_eo - 4);
+ path_len -= up_match[1].rm_eo - up_match[1].rm_so + 4;
+ } else
+ path_len -= up_match[1].rm_eo - up_match[1].rm_so + 3;
+ }
+ buf[path_len] = 0;
+ path = buf;
+
+step7: /* 7) */
+ res = malloc(scheme_len + 1 + 2 + authority_len + path_len + 1 +
+ query_len + 1 + fragment_len + 1);
+ if (!res) {
+ LOG(("malloc failed"));
+ free(buf);
+ return 0;
+ }
+
+ strncpy(res, scheme, scheme_len);
+ res[scheme_len] = ':';
+ i = scheme_len + 1;
+ if (authority) {
+ res[i++] = '/';
+ res[i++] = '/';
+ strncpy(res + i, authority, authority_len);
+ i += authority_len;
+ }
+ strncpy(res + i, path, path_len);
+ i += path_len;
+ if (query) {
+ res[i++] = '?';
+ strncpy(res + i, query, query_len);
+ i += query_len;
+ }
+ if (fragment) {
+ res[i++] = '#';
+ strncpy(res + i, fragment, fragment_len);
+ i += fragment_len;
+ }
+ res[i] = 0;
+
+ free(buf);
+
+ return res;
+}
+
+
+/**
+ * Return the host name from an URL.
+ *
+ * \param url an absolute URL
+ * \returns host name allocated on heap, or 0 on failure
+ */
+
+char *url_host(const char *url)
+{
+ int m;
+ char *host;
+ regmatch_t match[10];
+
+ m = regexec(&url_re, url, 10, match, 0);
+ if (m) {
+ LOG(("url '%s' failed to match regex", url));
+ return 0;
+ }
+ if (match[4].rm_so == -1)
+ return 0;
+
+ host = malloc(match[4].rm_eo - match[4].rm_so + 1);
+ if (!host) {
+ LOG(("malloc failed"));
+ return 0;
+ }
+ strncpy(host, url + match[4].rm_so, match[4].rm_eo - match[4].rm_so);
+ host[match[4].rm_eo - match[4].rm_so] = 0;
+
+ return host;
+}
+
+
+
+#ifdef TEST
+
+int main(int argc, char *argv[])
+{
+ int i;
+ char *s;
+ url_init();
+ for (i = 1; i != argc; i++) {
+/* printf("==> '%s'\n", argv[i]);
+ s = url_normalize(argv[i]);
+ if (s)
+ printf("<== '%s'\n", s);*/
+/* printf("==> '%s'\n", argv[i]);
+ s = url_host(argv[i]);
+ if (s)
+ printf("<== '%s'\n", s);*/
+ if (1 != i) {
+ s = url_join(argv[i], argv[1]);
+ if (s)
+ printf("'%s' + '%s' \t= '%s'\n", argv[1],
+ argv[i], s);
+ }
+ }
+ return 0;
+}
+
+void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
+{
+ char errbuf[200];
+ int r;
+ r = regcomp(preg, regex, cflags);
+ if (r) {
+ regerror(r, preg, errbuf, sizeof errbuf);
+ fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
+ fprintf(stderr, "error: %s\n", errbuf);
+ exit(1);
+ }
+}
+
+#endif
diff --git a/utils/url.h b/utils/url.h
new file mode 100644
index 000000000..f908e8f9a
--- /dev/null
+++ b/utils/url.h
@@ -0,0 +1,20 @@
+/*
+ * This file is part of NetSurf, http://netsurf.sourceforge.net/
+ * Licensed under the GNU General Public License,
+ * http://www.opensource.org/licenses/gpl-license
+ * Copyright 2004 James Bursa <bursa@users.sourceforge.net>
+ */
+
+/** \file
+ * URL parsing and joining (interface).
+ */
+
+#ifndef _NETSURF_UTILS_URL_H_
+#define _NETSURF_UTILS_URL_H_
+
+void url_init(void);
+char *url_normalize(const char *url);
+char *url_join(const char *rel, const char *base);
+char *url_host(const char *url);
+
+#endif
diff --git a/utils/utils.c b/utils/utils.c
index 889985178..f2c7188ea 100644
--- a/utils/utils.c
+++ b/utils/utils.c
@@ -2,7 +2,7 @@
* This file is part of NetSurf, http://netsurf.sourceforge.net/
* Licensed under the GNU General Public License,
* http://www.opensource.org/licenses/gpl-license
- * Copyright 2003 James Bursa <bursa@users.sourceforge.net>
+ * Copyright 2004 James Bursa <bursa@users.sourceforge.net>
* Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net>
* Copyright 2003 John M Bell <jmb202@ecs.soton.ac.uk>
*/
@@ -12,17 +12,11 @@
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
-#include <uri.h>
#include <sys/types.h>
#include <regex.h>
#include <time.h>
#include "libxml/encoding.h"
-#include "libxml/uri.h"
#include "netsurf/utils/config.h"
-#ifdef riscos
-#include "netsurf/riscos/about.h"
-#include "netsurf/riscos/constdata.h"
-#endif
#define NDEBUG
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
@@ -190,103 +184,6 @@ char *squash_tolat1(xmlChar *s)
/**
- * Calculate an URL from a relative and base URL.
- *
- * base may be 0 for a new URL, in which case the URL is canonicalized and
- * returned. Returns 0 in case of error.
- */
-
-char *url_join(char *rel_url, char *base_url)
-{
- char *res;
- uri_t *base = 0, *rel = 0, *abs;
-
- LOG(("rel_url = %s, base_url = %s", rel_url, base_url));
-
-#ifdef riscos
- /* hacky, hacky, hacky...
- * It is, however, best to do this here as it avoids
- * duplicating code for clicking links and url bar handling.
- * It simplifies the code it the other places too (they just
- * call this as usual, then we handle it here).
- */
-#ifdef WITH_ABOUT
- if (strcasecmp(rel_url, "about:") == 0) {
- about_create();
- return xstrdup(ABOUT_URL);
- }
-#ifdef WITH_COOKIES
- if (strcasecmp(rel_url, "about:cookies") == 0) {
- cookie_create();
- return xstrdup(COOKIE_URL);
- }
-#endif
-#endif
-#endif
-
- if (!base_url) {
- res = uri_cannonicalize_string(rel_url,
- (int)(strlen(rel_url)),
- URI_STRING_URI_STYLE);
- LOG(("res = %s", res));
- if (res)
- return xstrdup(res);
- return 0;
- }
-
- base = uri_alloc(base_url, (int)(strlen(base_url)));
- rel = uri_alloc(rel_url, (int)(strlen(rel_url)));
- if (!base || !rel)
- goto fail;
- if (!base->scheme)
- goto fail;
-
- abs = uri_abs_1(base, rel);
-
- res = xstrdup(uri_uri(abs));
-
- uri_free(base);
- uri_free(rel);
-
- LOG(("res = %s", res));
- return res;
-
-fail:
- if (base)
- uri_free(base);
- if (rel)
- uri_free(rel);
-
- LOG(("error"));
-
- return 0;
-}
-
-
-/**
- * Extract the host name from a url.
- *
- * \param url an absolute URL
- * \return a new string, or 0 in case of error
- */
-
-char *get_host_from_url(char *url)
-{
- char *host = 0;
- uri_t *uri;
-
- uri = uri_alloc(url, (int)(strlen(url)));
- if (!uri)
- return 0;
- if (uri->host)
- host = xstrdup(uri->host);
- uri_free(uri);
-
- return host;
-}
-
-
-/**
* Check if a directory exists.
*/
diff --git a/utils/utils.h b/utils/utils.h
index 02b927711..1faef449d 100644
--- a/utils/utils.h
+++ b/utils/utils.h
@@ -2,7 +2,7 @@
* This file is part of NetSurf, http://netsurf.sourceforge.net/
* Licensed under the GNU General Public License,
* http://www.opensource.org/licenses/gpl-license
- * Copyright 2003 James Bursa <bursa@users.sourceforge.net>
+ * Copyright 2004 James Bursa <bursa@users.sourceforge.net>
*/
#ifndef _NETSURF_UTILS_UTILS_H_
@@ -26,8 +26,6 @@ char * squash_whitespace(const char * s);
char * tolat1(xmlChar * s);
char * tolat1_pre(xmlChar * s);
char *squash_tolat1(xmlChar *s);
-char *url_join(char *rel_url, char *base_url);
-char *get_host_from_url(char* url);
bool is_dir(const char *path);
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags);
void clean_cookiejar(void);