From 1c85bf04293cfba663c5170bbe762825b7e72af1 Mon Sep 17 00:00:00 2001 From: James Bursa Date: Tue, 2 Mar 2004 18:02:41 +0000 Subject: [project @ 2004-03-02 18:02:17 by bursa] Add new url functions and modify to use them. svn path=/import/netsurf/; revision=578 --- content/fetch.c | 15 +- content/fetchcache.c | 1 + css/css.c | 1 + desktop/browser.c | 19 ++- desktop/browser.h | 6 + desktop/loginlist.c | 7 +- desktop/netsurf.c | 2 + makefile | 8 +- render/box.c | 1 + render/html.c | 7 +- riscos/401login.c | 3 +- riscos/about.c | 128 +++++---------- riscos/gui.c | 8 +- riscos/save_complete.c | 50 +----- riscos/url.c | 127 -------------- riscos/url.h | 15 -- riscos/url_protocol.c | 127 ++++++++++++++ riscos/url_protocol.h | 15 ++ riscos/window.c | 23 +-- utils/url.c | 437 +++++++++++++++++++++++++++++++++++++++++++++++++ utils/url.h | 20 +++ utils/utils.c | 105 +----------- utils/utils.h | 4 +- 23 files changed, 692 insertions(+), 437 deletions(-) delete mode 100644 riscos/url.c delete mode 100644 riscos/url.h create mode 100644 riscos/url_protocol.c create mode 100644 riscos/url_protocol.h create mode 100644 utils/url.c create mode 100644 utils/url.h diff --git a/content/fetch.c b/content/fetch.c index 295324484..6d35cb29c 100644 --- a/content/fetch.c +++ b/content/fetch.c @@ -26,7 +26,6 @@ #include #include #include "curl/curl.h" -#include "libxml/uri.h" #include "netsurf/utils/config.h" #include "netsurf/content/fetch.h" #ifdef riscos @@ -41,6 +40,7 @@ #endif #include "netsurf/utils/log.h" #include "netsurf/utils/messages.h" +#include "netsurf/utils/url.h" #include "netsurf/utils/utils.h" @@ -174,19 +174,12 @@ struct fetch * fetch_start(char *url, char *referer, struct fetch *fetch = xcalloc(1, sizeof(*fetch)), *host_fetch; CURLcode code; CURLMcode codem; - xmlURI *uri; #ifdef WITH_AUTH struct login *li; #endif LOG(("fetch %p, url '%s'", fetch, url)); - uri = xmlParseURI(url); - if (uri == 0) { - LOG(("warning: failed to parse url")); - return 0; - } - /* construct a new fetch structure */ fetch->callback = callback; fetch->had_headers = false; @@ -199,9 +192,7 @@ struct fetch * fetch_start(char *url, char *referer, fetch->referer = xstrdup(referer); fetch->p = p; fetch->headers = 0; - fetch->host = 0; - if (uri->server != 0) - fetch->host = xstrdup(uri->server); + fetch->host = url_host(url); fetch->content_length = 0; #ifdef WITH_POST fetch->post_urlenc = 0; @@ -216,8 +207,6 @@ struct fetch * fetch_start(char *url, char *referer, fetch->prev = 0; fetch->next = 0; - xmlFreeURI(uri); - /* look for a fetch from the same host */ if (fetch->host != 0) { for (host_fetch = fetch_list; diff --git a/content/fetchcache.c b/content/fetchcache.c index 3639c39a0..21b50ee0b 100644 --- a/content/fetchcache.c +++ b/content/fetchcache.c @@ -24,6 +24,7 @@ #include "netsurf/content/fetch.h" #include "netsurf/utils/log.h" #include "netsurf/utils/messages.h" +#include "netsurf/utils/url.h" #include "netsurf/utils/utils.h" diff --git a/css/css.c b/css/css.c index e03e0c107..ee125da19 100644 --- a/css/css.c +++ b/css/css.c @@ -21,6 +21,7 @@ #include "netsurf/desktop/gui.h" #endif #include "netsurf/utils/log.h" +#include "netsurf/utils/url.h" #include "netsurf/utils/utils.h" /** diff --git a/desktop/browser.c b/desktop/browser.c index d75b1c33f..6aee554ad 100644 --- a/desktop/browser.c +++ b/desktop/browser.c @@ -30,6 +30,7 @@ #include "netsurf/render/layout.h" #include "netsurf/utils/log.h" #include "netsurf/utils/messages.h" +#include "netsurf/utils/url.h" #include "netsurf/utils/utils.h" @@ -152,12 +153,16 @@ void browser_window_go_post(struct browser_window *bw, const char *url, browser_window_set_status(bw, messages_get("Loading")); bw->history_add = history_add; bw->time0 = clock(); - c = fetchcache(url, 0, - browser_window_callback, bw, 0, - gui_window_get_width(bw->window), 0, - false, - post_urlenc, post_multipart, - true); + if (strncmp(url, "about:", 6) == 0) + c = about_create(url, browser_window_callback, bw, 0, + gui_window_get_width(bw->window), 0); + else + c = fetchcache(url, 0, + browser_window_callback, bw, 0, + gui_window_get_width(bw->window), 0, + false, + post_urlenc, post_multipart, + true); if (!c) { browser_window_set_status(bw, messages_get("FetchFailed")); return; @@ -1743,6 +1748,8 @@ void browser_form_submit(struct browser_window *bw, struct form *form, case method_POST_MULTIPART: url = url_join(form->action, base); + if (!url) + break; browser_window_go_post(bw, url, 0, success, true); break; diff --git a/desktop/browser.h b/desktop/browser.h index e25b26fbe..072201766 100644 --- a/desktop/browser.h +++ b/desktop/browser.h @@ -119,4 +119,10 @@ void history_destroy(struct history *history); void history_back(struct browser_window *bw, struct history *history); void history_forward(struct browser_window *bw, struct history *history); +/* In platform specific about.c. */ +struct content *about_create(const char *url, + void (*callback)(content_msg msg, struct content *c, void *p1, + void *p2, const char *error), + void *p1, void *p2, unsigned long width, unsigned long height); + #endif diff --git a/desktop/loginlist.c b/desktop/loginlist.c index 45d4c0684..85f9e995e 100644 --- a/desktop/loginlist.c +++ b/desktop/loginlist.c @@ -12,6 +12,7 @@ #include "netsurf/utils/config.h" #include "netsurf/desktop/401login.h" #include "netsurf/utils/log.h" +#include "netsurf/utils/url.h" #include "netsurf/utils/utils.h" #ifdef WITH_AUTH @@ -30,7 +31,7 @@ static struct login *loginlist = &login; void login_list_add(char *host, char* logindets) { struct login *nli = xcalloc(1, sizeof(*nli)); - char *temp = get_host_from_url(host); + char *temp = url_host(host); char *i; assert(temp); @@ -81,7 +82,7 @@ struct login *login_list_get(char *host) { (strncasecmp(host, "https://", 8) != 0)) return NULL; - temphost = get_host_from_url(host); + temphost = url_host(host); assert(temphost); temp = xstrdup(host); @@ -89,7 +90,7 @@ struct login *login_list_get(char *host) { * So make sure we've got that at least */ if (strlen(temphost) > strlen(temp)) { - temp = get_host_from_url(host); + temp = url_host(host); assert(temp); } diff --git a/desktop/netsurf.c b/desktop/netsurf.c index 724b57415..02a29c3f5 100644 --- a/desktop/netsurf.c +++ b/desktop/netsurf.c @@ -17,6 +17,7 @@ #include "netsurf/desktop/browser.h" #include "netsurf/desktop/gui.h" #include "netsurf/utils/log.h" +#include "netsurf/utils/url.h" #include "netsurf/utils/utils.h" bool netsurf_quit = false; @@ -63,6 +64,7 @@ void netsurf_init(int argc, char** argv) #ifdef WITH_GIF nsgif_init(); #endif + url_init(); } diff --git a/makefile b/makefile index f7101c6a9..b64bfc600 100644 --- a/makefile +++ b/makefile @@ -9,7 +9,7 @@ CC_DEBUG = gcc OBJECTS_COMMON = cache.o content.o fetch.o fetchcache.o other.o \ css.o css_enum.o parser.o ruleset.o scanner.o \ box.o form.o html.o layout.o textplain.o \ - messages.o utils.o translit.o pool.o + messages.o utils.o translit.o pool.o url.o OBJECTS = $(OBJECTS_COMMON) \ browser.o loginlist.o netsurf.o options.o \ htmlinstance.o htmlredraw.o \ @@ -17,7 +17,7 @@ OBJECTS = $(OBJECTS_COMMON) \ menus.o mouseactions.o \ textselection.o theme.o window.o \ draw.o gif.o jpeg.o plugin.o png.o sprite.o \ - about.o filetype.o font.o uri.o url.o history.o \ + about.o filetype.o font.o uri.o url_protocol.o history.o \ version.o save_draw.o save_complete.o thumbnail.o save.o OBJECTS_DEBUG = $(OBJECTS_COMMON) \ netsurfd.o \ @@ -39,8 +39,8 @@ CFLAGS = -std=c9x -D_BSD_SOURCE -Driscos -DBOOL_DEFINED -O $(WARNFLAGS) -I.. \ -mpoke-function-name CFLAGS_DEBUG = -std=c9x -D_BSD_SOURCE $(WARNFLAGS) -I.. -I/usr/include/libxml2 -g LDFLAGS = -L/riscos/lib -lxml2 -lz -lcurl -lssl -lcrypto -lares -lanim -lpng \ - -lifc -loslib -luri -ljpeg -LDFLAGS_DEBUG = -L/usr/lib -lxml2 -lz -lm -lcurl -lssl -lcrypto -ldl -luri + -lifc -loslib -ljpeg +LDFLAGS_DEBUG = -L/usr/lib -lxml2 -lz -lm -lcurl -lssl -lcrypto -ldl OBJDIR = $(shell $(CC) -dumpmachine) SOURCES=$(OBJECTS:.o=.c) diff --git a/render/box.c b/render/box.c index fb270e6ad..4ee3ec95b 100644 --- a/render/box.c +++ b/render/box.c @@ -32,6 +32,7 @@ #include "netsurf/utils/log.h" #include "netsurf/utils/messages.h" #include "netsurf/utils/pool.h" +#include "netsurf/utils/url.h" #include "netsurf/utils/utils.h" diff --git a/render/html.c b/render/html.c index 2eccc78d7..d1c2cafa3 100644 --- a/render/html.c +++ b/render/html.c @@ -22,9 +22,10 @@ #endif #include "netsurf/render/html.h" #include "netsurf/render/layout.h" -#include "netsurf/utils/utils.h" -#include "netsurf/utils/messages.h" #include "netsurf/utils/log.h" +#include "netsurf/utils/messages.h" +#include "netsurf/utils/url.h" +#include "netsurf/utils/utils.h" #define CHUNK 4096 @@ -220,7 +221,7 @@ void html_head(struct content *c, xmlNode *head) } else if (strcmp(node->name, "base") == 0) { char *href = (char *) xmlGetProp(node, (const xmlChar *) "href"); if (href) { - char *url = url_join(href, 0); + char *url = url_normalize(href); if (url) { free(c->data.html.base_url); c->data.html.base_url = url; diff --git a/riscos/401login.c b/riscos/401login.c index b46d115d0..c96fcd2c1 100644 --- a/riscos/401login.c +++ b/riscos/401login.c @@ -17,6 +17,7 @@ #include "netsurf/riscos/gui.h" #include "netsurf/utils/log.h" #include "netsurf/utils/messages.h" +#include "netsurf/utils/url.h" #include "netsurf/utils/utils.h" #ifdef WITH_AUTH @@ -65,7 +66,7 @@ void gui_401login_open(struct browser_window *bw, struct content *c, char *realm char *murl, *host; murl = c->url; - host = get_host_from_url(murl); + host = url_host(murl); assert(host); bwin = bw; diff --git a/riscos/about.c b/riscos/about.c index cfec2733a..89f39506c 100644 --- a/riscos/about.c +++ b/riscos/about.c @@ -16,20 +16,18 @@ #include #include #include /* for __unixify */ - -#include "netsurf/utils/config.h" -#include "netsurf/desktop/netsurf.h" -#include "netsurf/riscos/about.h" -#include "netsurf/utils/log.h" -#include "netsurf/utils/messages.h" -#include "netsurf/utils/utils.h" - #include "oslib/fileswitch.h" #include "oslib/osargs.h" #include "oslib/osfile.h" #include "oslib/osfind.h" #include "oslib/osfscontrol.h" #include "oslib/osgbpb.h" +#include "netsurf/utils/config.h" +#include "netsurf/desktop/browser.h" +#include "netsurf/desktop/netsurf.h" +#include "netsurf/utils/log.h" +#include "netsurf/utils/messages.h" +#include "netsurf/utils/utils.h" #ifdef WITH_ABOUT @@ -41,81 +39,53 @@ static const char *paboutpl3 = "details = 0; - np->details = details; - - np->next = pd; - return np; -} - -/** - * Creates the about page and stores it in .WWW.Netsurf - */ -void about_create(void) { - - struct about_page *abt; - struct plugd *temp; +struct content *about_create(const char *url, + void (*callback)(content_msg msg, struct content *c, void *p1, + void *p2, const char *error), + void *p1, void *p2, unsigned long width, unsigned long height) +{ + struct content *c = 0; FILE *fp; char *buf, *val, var[20], *ptype, *pdetails, *fname, *furl; int i, nofiles, j, w, h, size; fileswitch_object_type fot; os_error *e; + const char *params[] = { 0 }; - abt = (struct about_page*)xcalloc(1, sizeof(*abt)); - abt->plugd = 0; + c = content_create(url); + c->width = width; + c->height = height; + content_add_user(c, callback, p1, p2); + content_set_type(c, CONTENT_HTML, "text/html", params); /* Page header */ buf = xcalloc(strlen(pabouthdr) + 50, sizeof(char)); snprintf(buf, strlen(pabouthdr) + 50, pabouthdr, "About NetSurf", netsurf_version); - abt->header = xstrdup(buf); - xfree(buf); + content_process_data(c, buf, strlen(buf)); + free(buf); /* browser details */ - xosfile_read_stamped_no_path(".About.About",0,0,0,&i,0,0); - fp = fopen(".About.About", "r"); - buf = xcalloc((unsigned int)i + 10, sizeof(char)); - fread(buf, sizeof(char), (unsigned int)i, fp); - fclose(fp); - abt->browser = xstrdup(buf); - xfree(buf); + buf = load(".About.About"); + content_process_data(c, buf, strlen(buf)); + free(buf); /* plugin header */ - abt->plghead = xstrdup(pabtplghd); - - /* plugin footer */ - abt->plgfoot = xstrdup(pabtplgft); - - /* Page footer */ - abt->footer = xstrdup(paboutftr); + content_process_data(c, pabtplghd, strlen(pabtplghd)); /* plugins registered */ for (i=0; i!=4096; i++) { @@ -172,7 +142,7 @@ void about_create(void) { furl = xcalloc(strlen(paboutpl1) + strlen(ptype) + strlen(pdetails) + 10, sizeof(char)); sprintf(furl, paboutpl1, ptype, pdetails); LOG(("furl: %s", furl)); - abt->plugd = new_plugin(abt->plugd, furl); + content_process_data(c, furl, strlen(furl)); xfree(pdetails); continue; } @@ -214,7 +184,7 @@ void about_create(void) { furl = xcalloc(strlen(paboutpl3) + strlen(ptype) + strlen(buf) + strlen(pdetails) + 10, sizeof(char)); sprintf(furl, paboutpl3, ptype, buf, ptype, w, h, pdetails); - abt->plugd = new_plugin(abt->plugd, furl); + content_process_data(c, furl, strlen(furl)); xfree(pdetails); continue; } @@ -230,7 +200,7 @@ void about_create(void) { furl = xcalloc(strlen(paboutpl2) + strlen(ptype) + strlen(fname) + strlen(pdetails) + 10, sizeof(char)); sprintf(furl, paboutpl2, ptype, fname, ptype, pdetails); - abt->plugd = new_plugin(abt->plugd, furl); + content_process_data(c, furl, strlen(furl)); xfree(fname); xfree(pdetails); } @@ -241,29 +211,15 @@ void about_create(void) { } } - /* write file */ - xosfile_create_dir(".WWW", 77); - xosfile_create_dir(".WWW.NetSurf", 77); - - fp = fopen(".WWW.Netsurf.About", "w+"); - fprintf(fp, "%s", abt->header); - fprintf(fp, "%s", abt->browser); - fprintf(fp, "%s", abt->plghead); - while (abt->plugd != 0) { - fprintf(fp, "%s", abt->plugd->details); - temp = abt->plugd; - abt->plugd = abt->plugd->next; - xfree(temp); - } - fprintf(fp, "%s", abt->plgfoot); - fprintf(fp, "%s", abt->footer); - fclose(fp); + /* plugin footer */ + content_process_data(c, pabtplgft, strlen(pabtplgft)); - xosfile_set_type(".WWW.NetSurf.About", 0xfaf); + /* Page footer */ + content_process_data(c, paboutftr, strlen(paboutftr)); - xfree(abt); + content_convert(c, c->width, c->height); - return; + return c; } #ifdef WITH_COOKIES diff --git a/riscos/gui.c b/riscos/gui.c index dd976ee4e..a325aebf2 100644 --- a/riscos/gui.c +++ b/riscos/gui.c @@ -29,9 +29,6 @@ #include "netsurf/render/font.h" #include "netsurf/render/form.h" #include "netsurf/render/html.h" -#ifdef WITH_ABOUT -#include "netsurf/riscos/about.h" -#endif #include "netsurf/riscos/constdata.h" #include "netsurf/riscos/gui.h" #include "netsurf/riscos/options.h" @@ -43,7 +40,7 @@ #include "netsurf/riscos/uri.h" #endif #ifdef WITH_URL -#include "netsurf/riscos/url.h" +#include "netsurf/riscos/url_protocol.h" #endif #include "netsurf/utils/log.h" #include "netsurf/utils/messages.h" @@ -257,9 +254,6 @@ void ro_gui_icon_bar_create(void) void gui_quit(void) { -#ifdef WITH_ABOUT - about_quit(); -#endif ro_gui_history_quit(); wimp_close_down(task_handle); xhourglass_off(); diff --git a/riscos/save_complete.c b/riscos/save_complete.c index a97465679..08baffbd8 100644 --- a/riscos/save_complete.c +++ b/riscos/save_complete.c @@ -6,13 +6,8 @@ */ #include - #include /* for __riscosify */ - -#include /* possibly just have accessor methods in utils.c */ - #include "oslib/osfile.h" - #include "netsurf/utils/config.h" #include "netsurf/content/content.h" #include "netsurf/css/css.h" @@ -30,7 +25,6 @@ */ void save_imported_sheets(struct content *c, int parent, int level, char *p, char* fn); -char* get_filename(char * url); /* this is temporary. */ const char * const SAVE_PATH = ".savetest."; @@ -46,7 +40,7 @@ void save_complete(struct content *c) { return; } - fname = get_filename(c->data.html.base_url); + fname = "test"; /*get_filename(c->data.html.base_url);*/ if (!fname) { /* no path -> exit */ return; @@ -135,46 +129,4 @@ void save_imported_sheets(struct content *c, int parent, int level, char *p, cha } } -char* get_filename(char * url) { - - char *ret = 0, *offs; - uri_t *uri; - - uri = uri_alloc(url, (int)strlen(url)); - - if (!uri) { - return 0; - } - - if (uri->path) { - /* Two possible cases here: - * a) no page name given (eg http://www.blah.com/) -> index.html - * b) page name given - */ - /* case a */ - if (strlen(uri->path) == 0) { - ret = xstrdup("index.html"); - } - /* case b */ - else { - offs = strrchr(uri->path, '/'); - if (!offs) { - ret = xstrdup(uri->path); - } - else { - ret = xstrdup(offs+1); - } - } - } - - uri_free(uri); - - offs = xcalloc(strlen(ret)+1, sizeof(char)); - - __riscosify(ret, 0, 0, offs, strlen(ret)+1, 0); - - xfree(ret); - - return offs; -} #endif diff --git a/riscos/url.c b/riscos/url.c deleted file mode 100644 index e1ee94d47..000000000 --- a/riscos/url.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * This file is part of NetSurf, http://netsurf.sourceforge.net/ - * Licensed under the GNU General Public License, - * http://www.opensource.org/licenses/gpl-license - * Copyright 2003 John M Bell - * Shamelessly hacked from Rob Jackson's URI handler (see uri.c) - */ - -#include -#include -#include "oslib/inetsuite.h" -#include "oslib/wimp.h" -#include "netsurf/utils/config.h" -#include "netsurf/desktop/browser.h" -#include "netsurf/riscos/theme.h" -#include "netsurf/desktop/gui.h" -#include "netsurf/riscos/gui.h" -#include "netsurf/riscos/url.h" -#include "netsurf/utils/log.h" -#include "netsurf/utils/utils.h" - -/* Define this to allow posting of data to an URL */ -#undef ALLOW_POST - -static char *read_string_value(os_string_value string, char *msg); - -void ro_url_message_received(wimp_message* message) -{ - char* uri_requested = NULL; -#ifdef ALLOW_POST - char* filename = NULL, *mimetype = NULL; - bool post=false; -#endif - struct browser_window* bw; - inetsuite_message_open_url *url_message = (inetsuite_message_open_url*)&message->data; - - /* If the url_message->indirect.tag is non-zero, - * then the message data is contained within the message block. - */ - if (url_message->indirect.tag != 0) { - uri_requested = xstrdup(url_message->url); - LOG(("%s", url_message->url)); - } - else { - /* Get URL */ - if (read_string_value(url_message->indirect.url, - (char*)url_message) != 0) { - uri_requested = xstrdup(read_string_value(url_message->indirect.url, - (char*)url_message)); - } - else { - return; - } - LOG(("%s", uri_requested)); - -#ifdef ALLOW_POST - /* Get filename */ - if (read_string_value(url_message->indirect.body_file, - (char*)url_message) != 0) { - filename = xstrdup(read_string_value(url_message->indirect.body_file, - (char*)url_message)); - } - /* We ignore the target window. Just open a new window. */ - /* Get mimetype */ - if (url_message->indirect.flags & inetsuite_USE_MIME_TYPE) { - if (read_string_value(url_message->indirect.body_mimetype, - (char*)url_message) != 0) { - mimetype = xstrdup(read_string_value(url_message->indirect.body_mimetype, - (char*)url_message)); - } - else { - mimetype = xstrdup("application/x-www-form-urlencoded"); - } - } - else { - mimetype = xstrdup("application/x-www-form-urlencoded"); - } - - /* Indicate a post request */ - if (filename && message->size > 28) - post = true; -#endif - } - - if ( (strspn(uri_requested, "http://") != strlen("http://")) && - (strspn(uri_requested, "https://") != strlen("https://")) && - (strspn(uri_requested, "file:/") != strlen("file:/")) ) { -#ifdef ALLOW_POST - xfree(filename); - xfree(mimetype); -#endif - xfree(uri_requested); - return; - } - - /* send ack */ - message->your_ref = message->my_ref; - xwimp_send_message(wimp_USER_MESSAGE_ACKNOWLEDGE, message, - message->sender); - - /* create new browser window */ - browser_window_create(uri_requested); - -#if 0 - if (post) { - /* TODO - create urlencoded data from file contents. - * Delete the file when finished with it. - */ - browser_window_open_location_historical(bw, uri_requested, /*data*/0, 0); - } -#endif - -#ifdef ALLOW_POST - xfree(filename); - xfree(mimetype); -#endif - xfree(uri_requested); - - return; -} - -char *read_string_value(os_string_value string, char *msg) { - - if(string.offset == 0) return NULL; - if(string.offset > 256) return string.pointer; - return &msg[string.offset]; -} diff --git a/riscos/url.h b/riscos/url.h deleted file mode 100644 index 01b99b7c1..000000000 --- a/riscos/url.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * This file is part of NetSurf, http://netsurf.sourceforge.net/ - * Licensed under the GNU General Public License, - * http://www.opensource.org/licenses/gpl-license - * Copyright 2003 John M Bell - */ - -#ifndef _NETSURF_RISCOS_URL_H_ -#define _NETSURF_RISCOS_URL_H_ - -#include "oslib/wimp.h" - -void ro_url_message_received(wimp_message *message); - -#endif diff --git a/riscos/url_protocol.c b/riscos/url_protocol.c new file mode 100644 index 000000000..1553d17ac --- /dev/null +++ b/riscos/url_protocol.c @@ -0,0 +1,127 @@ +/* + * This file is part of NetSurf, http://netsurf.sourceforge.net/ + * Licensed under the GNU General Public License, + * http://www.opensource.org/licenses/gpl-license + * Copyright 2003 John M Bell + * Shamelessly hacked from Rob Jackson's URI handler (see uri.c) + */ + +#include +#include +#include "oslib/inetsuite.h" +#include "oslib/wimp.h" +#include "netsurf/utils/config.h" +#include "netsurf/desktop/browser.h" +#include "netsurf/riscos/theme.h" +#include "netsurf/desktop/gui.h" +#include "netsurf/riscos/gui.h" +#include "netsurf/riscos/url_protocol.h" +#include "netsurf/utils/log.h" +#include "netsurf/utils/utils.h" + +/* Define this to allow posting of data to an URL */ +#undef ALLOW_POST + +static char *read_string_value(os_string_value string, char *msg); + +void ro_url_message_received(wimp_message* message) +{ + char* uri_requested = NULL; +#ifdef ALLOW_POST + char* filename = NULL, *mimetype = NULL; + bool post=false; +#endif + struct browser_window* bw; + inetsuite_message_open_url *url_message = (inetsuite_message_open_url*)&message->data; + + /* If the url_message->indirect.tag is non-zero, + * then the message data is contained within the message block. + */ + if (url_message->indirect.tag != 0) { + uri_requested = xstrdup(url_message->url); + LOG(("%s", url_message->url)); + } + else { + /* Get URL */ + if (read_string_value(url_message->indirect.url, + (char*)url_message) != 0) { + uri_requested = xstrdup(read_string_value(url_message->indirect.url, + (char*)url_message)); + } + else { + return; + } + LOG(("%s", uri_requested)); + +#ifdef ALLOW_POST + /* Get filename */ + if (read_string_value(url_message->indirect.body_file, + (char*)url_message) != 0) { + filename = xstrdup(read_string_value(url_message->indirect.body_file, + (char*)url_message)); + } + /* We ignore the target window. Just open a new window. */ + /* Get mimetype */ + if (url_message->indirect.flags & inetsuite_USE_MIME_TYPE) { + if (read_string_value(url_message->indirect.body_mimetype, + (char*)url_message) != 0) { + mimetype = xstrdup(read_string_value(url_message->indirect.body_mimetype, + (char*)url_message)); + } + else { + mimetype = xstrdup("application/x-www-form-urlencoded"); + } + } + else { + mimetype = xstrdup("application/x-www-form-urlencoded"); + } + + /* Indicate a post request */ + if (filename && message->size > 28) + post = true; +#endif + } + + if ( (strspn(uri_requested, "http://") != strlen("http://")) && + (strspn(uri_requested, "https://") != strlen("https://")) && + (strspn(uri_requested, "file:/") != strlen("file:/")) ) { +#ifdef ALLOW_POST + xfree(filename); + xfree(mimetype); +#endif + xfree(uri_requested); + return; + } + + /* send ack */ + message->your_ref = message->my_ref; + xwimp_send_message(wimp_USER_MESSAGE_ACKNOWLEDGE, message, + message->sender); + + /* create new browser window */ + browser_window_create(uri_requested); + +#if 0 + if (post) { + /* TODO - create urlencoded data from file contents. + * Delete the file when finished with it. + */ + browser_window_open_location_historical(bw, uri_requested, /*data*/0, 0); + } +#endif + +#ifdef ALLOW_POST + xfree(filename); + xfree(mimetype); +#endif + xfree(uri_requested); + + return; +} + +char *read_string_value(os_string_value string, char *msg) { + + if(string.offset == 0) return NULL; + if(string.offset > 256) return string.pointer; + return &msg[string.offset]; +} diff --git a/riscos/url_protocol.h b/riscos/url_protocol.h new file mode 100644 index 000000000..01b99b7c1 --- /dev/null +++ b/riscos/url_protocol.h @@ -0,0 +1,15 @@ +/* + * This file is part of NetSurf, http://netsurf.sourceforge.net/ + * Licensed under the GNU General Public License, + * http://www.opensource.org/licenses/gpl-license + * Copyright 2003 John M Bell + */ + +#ifndef _NETSURF_RISCOS_URL_H_ +#define _NETSURF_RISCOS_URL_H_ + +#include "oslib/wimp.h" + +void ro_url_message_received(wimp_message *message); + +#endif diff --git a/riscos/window.c b/riscos/window.c index 80cd80b02..fa8b2e1d7 100644 --- a/riscos/window.c +++ b/riscos/window.c @@ -27,6 +27,7 @@ #include "netsurf/riscos/theme.h" #include "netsurf/riscos/thumbnail.h" #include "netsurf/utils/log.h" +#include "netsurf/utils/url.h" #include "netsurf/utils/utils.h" gui_window *window_list = 0; @@ -676,6 +677,7 @@ bool ro_gui_window_keypress(gui_window *g, int key, bool toolbar) struct content *content = g->data.browser.bw->current_content; wimp_window_state state; int y; + char *url; assert(g->type == GUI_BROWSER_WINDOW); @@ -744,22 +746,11 @@ bool ro_gui_window_keypress(gui_window *g, int key, bool toolbar) case wimp_KEY_RETURN: if (!toolbar) break; - else { - char *url = xcalloc(1, 10 + strlen(g->url)); - char *url2; - if (g->url[strspn(g->url, "abcdefghijklmnopqrstuvwxyz")] != ':') { - strcpy(url, "http://"); - strcpy(url + 7, g->url); - } else { - strcpy(url, g->url); - } - url2 = url_join(url, 0); - free(url); - if (url2) { - gui_window_set_url(g, url2); - browser_window_go(g->data.browser.bw, url2); - free(url2); - } + url = url_normalize(g->url); + if (url) { + gui_window_set_url(g, url); + browser_window_go(g->data.browser.bw, url); + free(url); } return true; diff --git a/utils/url.c b/utils/url.c new file mode 100644 index 000000000..c22144495 --- /dev/null +++ b/utils/url.c @@ -0,0 +1,437 @@ +/* + * This file is part of NetSurf, http://netsurf.sourceforge.net/ + * Licensed under the GNU General Public License, + * http://www.opensource.org/licenses/gpl-license + * Copyright 2004 James Bursa + */ + +/** \file + * URL parsing and joining (implementation). + */ + +#include +#include +#include +#include +#include +#include +#include "netsurf/utils/log.h" +#include "netsurf/utils/url.h" +#include "netsurf/utils/utils.h" + + +regex_t url_re, url_up_re; + +/** + * Initialise URL routines. + * + * Compiles regular expressions required by the url_ functions. + */ + +void url_init(void) +{ + /* regex from RFC 2396 */ + regcomp_wrapper(&url_re, "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)" + "(\\?([^#]*))?(#(.*))?$", REG_EXTENDED); + regcomp_wrapper(&url_up_re, + "/(|[^/]|[.][^./]|[^./][.]|[^/][^/][^/]+)/[.][.](/|$)", + REG_EXTENDED); +} + + +/** + * Normalize a URL. + * + * \param url an absolute URL + * \return cleaned up url, allocated on the heap, or 0 on failure + * + * If there is no scheme, http:// is added. The scheme and host are + * lower-cased. Default ports are removed (http only). An empty path is + * replaced with "/". Characters are unescaped if safe. + */ + +char *url_normalize(const char *url) +{ + char c; + char *res = 0; + int m; + int i; + int len; + bool http = false; + regmatch_t match[10]; + + m = regexec(&url_re, url, 10, match, 0); + if (m) { + LOG(("url '%s' failed to match regex", url)); + return 0; + } + + len = strlen(url); + + if (match[1].rm_so == -1) { + /* scheme missing: add http:// and reparse */ + LOG(("scheme missing: using http")); + res = malloc(strlen(url) + 13); + if (!res) { + LOG(("malloc failed")); + return 0; + } + strcpy(res, "http://"); + strcpy(res + 7, url); + m = regexec(&url_re, res, 10, match, 0); + if (m) { + LOG(("url '%s' failed to match regex", res)); + free(res); + return 0; + } + len += 7; + } else { + res = malloc(len + 6); + if (!res) { + LOG(("strdup failed")); + return 0; + } + strcpy(res, url); + } + + /*for (unsigned int i = 0; i != 10; i++) { + if (match[i].rm_so == -1) + continue; + fprintf(stderr, "%i: '%.*s'\n", i, + match[i].rm_eo - match[i].rm_so, + res + match[i].rm_so); + }*/ + + /* see RFC 2616 section 3.2.3 */ + /* make scheme lower-case */ + if (match[2].rm_so != -1) { + for (i = match[2].rm_so; i != match[2].rm_eo; i++) + res[i] = tolower(res[i]); + if (match[2].rm_eo == 4 && res[0] == 'h' && res[1] == 't' && + res[2] == 't' && res[3] == 'p') + http = true; + } + + /* make empty path into "/" */ + if (match[5].rm_so != -1 && match[5].rm_so == match[5].rm_eo) { + memmove(res + match[5].rm_so + 1, res + match[5].rm_so, + len - match[5].rm_so + 1); + res[match[5].rm_so] = '/'; + len++; + } + + /* make host lower-case */ + if (match[4].rm_so != -1) { + for (i = match[4].rm_so; i != match[4].rm_eo; i++) { + if (res[i] == ':') { + if (http && res[i + 1] == '8' && + res[i + 2] == '0' && + i + 3 == match[4].rm_eo) { + memmove(res + i, res + i + 3, + len - match[4].rm_eo); + len -= 3; + res[len] = '\0'; + } else if (i + 1 == match[4].rm_eo) { + memmove(res + i, res + i + 1, + len - match[4].rm_eo); + len--; + res[len] = '\0'; + } + break; + } + res[i] = tolower(res[i]); + } + } + + /* unescape non-"reserved" escaped characters */ + for (i = 0; i != len; i++) { + if (res[i] != '%') + continue; + c = tolower(res[i + 1]); + if ('0' <= c && c <= '9') + m = 16 * (c - '0'); + else if ('a' <= c && c <= 'f') + m = 16 * (c - 'a' + 10); + else + continue; + c = tolower(res[i + 2]); + if ('0' <= c && c <= '9') + m += c - '0'; + else if ('a' <= c && c <= 'f') + m += c - 'a' + 10; + else + continue; + + if (m <= 0x20 || strchr(";/?:@&=+$," "<>#%\"" + "{}|\\^[]`", m)) { + i += 2; + continue; + } + + res[i] = m; + memmove(res + i + 1, res + i + 3, len - i - 2); + len -= 2; + } + + return res; +} + + +/** + * Resolve a relative URL to absolute form. + * + * \param rel relative URL + * \param base base URL, must be absolute and cleaned as by url_normalize() + * \return an absolute URL, allocated on the heap, or 0 on failure + */ + +char *url_join(const char *rel, const char *base) +{ + int m; + int i, j; + char *buf = 0; + char *res; + const char *scheme = 0, *authority = 0, *path = 0, *query = 0, + *fragment = 0; + int scheme_len = 0, authority_len = 0, path_len = 0, query_len = 0, + fragment_len = 0; + regmatch_t base_match[10]; + regmatch_t rel_match[10]; + regmatch_t up_match[3]; + + /* see RFC 2396 section 5.2 */ + m = regexec(&url_re, base, 10, base_match, 0); + if (m) { + LOG(("base url '%s' failed to match regex", base)); + return 0; + } + /*for (unsigned int i = 0; i != 10; i++) { + if (base_match[i].rm_so == -1) + continue; + fprintf(stderr, "%i: '%.*s'\n", i, + base_match[i].rm_eo - base_match[i].rm_so, + base + base_match[i].rm_so); + }*/ + if (base_match[2].rm_so == -1) { + LOG(("base url '%s' is not absolute", base)); + return 0; + } + scheme = base + base_match[2].rm_so; + scheme_len = base_match[2].rm_eo - base_match[2].rm_so; + if (base_match[4].rm_so != -1) { + authority = base + base_match[4].rm_so; + authority_len = base_match[4].rm_eo - base_match[4].rm_so; + } + path = base + base_match[5].rm_so; + path_len = base_match[5].rm_eo - base_match[5].rm_so; + + /* 1) */ + m = regexec(&url_re, rel, 10, rel_match, 0); + if (m) { + LOG(("relative url '%s' failed to match regex", rel)); + return 0; + } + + /* 2) */ + if (rel_match[5].rm_so == rel_match[5].rm_eo && + rel_match[2].rm_so == -1 && + rel_match[4].rm_so == -1 && + rel_match[6].rm_so == -1) { + goto step7; + } + if (rel_match[7].rm_so != -1) { + query = rel + rel_match[7].rm_so; + query_len = rel_match[7].rm_eo - rel_match[7].rm_so; + } + if (rel_match[9].rm_so != -1) { + fragment = rel + rel_match[9].rm_so; + fragment_len = rel_match[9].rm_eo - rel_match[9].rm_so; + } + + /* 3) */ + if (rel_match[2].rm_so != -1) { + scheme = rel + rel_match[2].rm_so; + scheme_len = rel_match[2].rm_eo - rel_match[2].rm_so; + authority = 0; + authority_len = 0; + if (rel_match[4].rm_so != -1) { + authority = rel + rel_match[4].rm_so; + authority_len = rel_match[4].rm_eo - rel_match[4].rm_so; + } + path = rel + rel_match[5].rm_so; + path_len = rel_match[5].rm_eo - rel_match[5].rm_so; + goto step7; + } + + /* 4) */ + if (rel_match[4].rm_so != -1) { + authority = rel + rel_match[4].rm_so; + authority_len = rel_match[4].rm_eo - rel_match[4].rm_so; + path = rel + rel_match[5].rm_so; + path_len = rel_match[5].rm_eo - rel_match[5].rm_so; + goto step7; + } + + /* 5) */ + if (rel[rel_match[5].rm_so] == '/') { + path = rel + rel_match[5].rm_so; + path_len = rel_match[5].rm_eo - rel_match[5].rm_so; + goto step7; + } + + /* 6) */ + buf = malloc(path_len + rel_match[5].rm_eo + 10); + if (!buf) { + LOG(("malloc failed")); + return 0; + } + /* a) */ + strncpy(buf, path, path_len); + for (; path_len != 0 && buf[path_len - 1] != '/'; path_len--) + ; + /* b) */ + strncpy(buf + path_len, rel + rel_match[5].rm_so, + rel_match[5].rm_eo - rel_match[5].rm_so); + path_len += rel_match[5].rm_eo - rel_match[5].rm_so; + /* c) */ + buf[path_len] = 0; + for (i = j = 0; j != path_len; ) { + if (j && buf[j - 1] == '/' && buf[j] == '.' && + buf[j + 1] == '/') + j += 2; + else + buf[i++] = buf[j++]; + } + path_len = i; + /* d) */ + if (buf[path_len - 2] == '/' && buf[path_len - 1] == '.') + path_len--; + /* e) and f) */ + while (1) { + buf[path_len] = 0; + m = regexec(&url_up_re, buf, 3, up_match, 0); + if (m) + break; + if (up_match[1].rm_eo + 4 <= path_len) { + memmove(buf + up_match[1].rm_so, + buf + up_match[1].rm_eo + 4, + path_len - up_match[1].rm_eo - 4); + path_len -= up_match[1].rm_eo - up_match[1].rm_so + 4; + } else + path_len -= up_match[1].rm_eo - up_match[1].rm_so + 3; + } + buf[path_len] = 0; + path = buf; + +step7: /* 7) */ + res = malloc(scheme_len + 1 + 2 + authority_len + path_len + 1 + + query_len + 1 + fragment_len + 1); + if (!res) { + LOG(("malloc failed")); + free(buf); + return 0; + } + + strncpy(res, scheme, scheme_len); + res[scheme_len] = ':'; + i = scheme_len + 1; + if (authority) { + res[i++] = '/'; + res[i++] = '/'; + strncpy(res + i, authority, authority_len); + i += authority_len; + } + strncpy(res + i, path, path_len); + i += path_len; + if (query) { + res[i++] = '?'; + strncpy(res + i, query, query_len); + i += query_len; + } + if (fragment) { + res[i++] = '#'; + strncpy(res + i, fragment, fragment_len); + i += fragment_len; + } + res[i] = 0; + + free(buf); + + return res; +} + + +/** + * Return the host name from an URL. + * + * \param url an absolute URL + * \returns host name allocated on heap, or 0 on failure + */ + +char *url_host(const char *url) +{ + int m; + char *host; + regmatch_t match[10]; + + m = regexec(&url_re, url, 10, match, 0); + if (m) { + LOG(("url '%s' failed to match regex", url)); + return 0; + } + if (match[4].rm_so == -1) + return 0; + + host = malloc(match[4].rm_eo - match[4].rm_so + 1); + if (!host) { + LOG(("malloc failed")); + return 0; + } + strncpy(host, url + match[4].rm_so, match[4].rm_eo - match[4].rm_so); + host[match[4].rm_eo - match[4].rm_so] = 0; + + return host; +} + + + +#ifdef TEST + +int main(int argc, char *argv[]) +{ + int i; + char *s; + url_init(); + for (i = 1; i != argc; i++) { +/* printf("==> '%s'\n", argv[i]); + s = url_normalize(argv[i]); + if (s) + printf("<== '%s'\n", s);*/ +/* printf("==> '%s'\n", argv[i]); + s = url_host(argv[i]); + if (s) + printf("<== '%s'\n", s);*/ + if (1 != i) { + s = url_join(argv[i], argv[1]); + if (s) + printf("'%s' + '%s' \t= '%s'\n", argv[1], + argv[i], s); + } + } + return 0; +} + +void regcomp_wrapper(regex_t *preg, const char *regex, int cflags) +{ + char errbuf[200]; + int r; + r = regcomp(preg, regex, cflags); + if (r) { + regerror(r, preg, errbuf, sizeof errbuf); + fprintf(stderr, "Failed to compile regexp '%s'\n", regex); + fprintf(stderr, "error: %s\n", errbuf); + exit(1); + } +} + +#endif diff --git a/utils/url.h b/utils/url.h new file mode 100644 index 000000000..f908e8f9a --- /dev/null +++ b/utils/url.h @@ -0,0 +1,20 @@ +/* + * This file is part of NetSurf, http://netsurf.sourceforge.net/ + * Licensed under the GNU General Public License, + * http://www.opensource.org/licenses/gpl-license + * Copyright 2004 James Bursa + */ + +/** \file + * URL parsing and joining (interface). + */ + +#ifndef _NETSURF_UTILS_URL_H_ +#define _NETSURF_UTILS_URL_H_ + +void url_init(void); +char *url_normalize(const char *url); +char *url_join(const char *rel, const char *base); +char *url_host(const char *url); + +#endif diff --git a/utils/utils.c b/utils/utils.c index 889985178..f2c7188ea 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -2,7 +2,7 @@ * This file is part of NetSurf, http://netsurf.sourceforge.net/ * Licensed under the GNU General Public License, * http://www.opensource.org/licenses/gpl-license - * Copyright 2003 James Bursa + * Copyright 2004 James Bursa * Copyright 2003 Phil Mellor * Copyright 2003 John M Bell */ @@ -12,17 +12,11 @@ #include #include #include -#include #include #include #include #include "libxml/encoding.h" -#include "libxml/uri.h" #include "netsurf/utils/config.h" -#ifdef riscos -#include "netsurf/riscos/about.h" -#include "netsurf/riscos/constdata.h" -#endif #define NDEBUG #include "netsurf/utils/log.h" #include "netsurf/utils/messages.h" @@ -189,103 +183,6 @@ char *squash_tolat1(xmlChar *s) } -/** - * Calculate an URL from a relative and base URL. - * - * base may be 0 for a new URL, in which case the URL is canonicalized and - * returned. Returns 0 in case of error. - */ - -char *url_join(char *rel_url, char *base_url) -{ - char *res; - uri_t *base = 0, *rel = 0, *abs; - - LOG(("rel_url = %s, base_url = %s", rel_url, base_url)); - -#ifdef riscos - /* hacky, hacky, hacky... - * It is, however, best to do this here as it avoids - * duplicating code for clicking links and url bar handling. - * It simplifies the code it the other places too (they just - * call this as usual, then we handle it here). - */ -#ifdef WITH_ABOUT - if (strcasecmp(rel_url, "about:") == 0) { - about_create(); - return xstrdup(ABOUT_URL); - } -#ifdef WITH_COOKIES - if (strcasecmp(rel_url, "about:cookies") == 0) { - cookie_create(); - return xstrdup(COOKIE_URL); - } -#endif -#endif -#endif - - if (!base_url) { - res = uri_cannonicalize_string(rel_url, - (int)(strlen(rel_url)), - URI_STRING_URI_STYLE); - LOG(("res = %s", res)); - if (res) - return xstrdup(res); - return 0; - } - - base = uri_alloc(base_url, (int)(strlen(base_url))); - rel = uri_alloc(rel_url, (int)(strlen(rel_url))); - if (!base || !rel) - goto fail; - if (!base->scheme) - goto fail; - - abs = uri_abs_1(base, rel); - - res = xstrdup(uri_uri(abs)); - - uri_free(base); - uri_free(rel); - - LOG(("res = %s", res)); - return res; - -fail: - if (base) - uri_free(base); - if (rel) - uri_free(rel); - - LOG(("error")); - - return 0; -} - - -/** - * Extract the host name from a url. - * - * \param url an absolute URL - * \return a new string, or 0 in case of error - */ - -char *get_host_from_url(char *url) -{ - char *host = 0; - uri_t *uri; - - uri = uri_alloc(url, (int)(strlen(url))); - if (!uri) - return 0; - if (uri->host) - host = xstrdup(uri->host); - uri_free(uri); - - return host; -} - - /** * Check if a directory exists. */ diff --git a/utils/utils.h b/utils/utils.h index 02b927711..1faef449d 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -2,7 +2,7 @@ * This file is part of NetSurf, http://netsurf.sourceforge.net/ * Licensed under the GNU General Public License, * http://www.opensource.org/licenses/gpl-license - * Copyright 2003 James Bursa + * Copyright 2004 James Bursa */ #ifndef _NETSURF_UTILS_UTILS_H_ @@ -26,8 +26,6 @@ char * squash_whitespace(const char * s); char * tolat1(xmlChar * s); char * tolat1_pre(xmlChar * s); char *squash_tolat1(xmlChar *s); -char *url_join(char *rel_url, char *base_url); -char *get_host_from_url(char* url); bool is_dir(const char *path); void regcomp_wrapper(regex_t *preg, const char *regex, int cflags); void clean_cookiejar(void); -- cgit v1.2.3