From 2077918805c175ecefeb25dd38f78d2d5b3a3ca2 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Thu, 9 Sep 2010 21:45:59 +0000 Subject: Merge branches/vince/netsurf-file-fetcher to trunk r=jmb svn path=/trunk/netsurf/; revision=10750 --- content/fetchers/fetch_curl.c | 64 +---- content/fetchers/fetch_file.c | 617 ++++++++++++++++++++++++++++++++++++++++++ content/fetchers/fetch_file.h | 28 ++ 3 files changed, 650 insertions(+), 59 deletions(-) create mode 100644 content/fetchers/fetch_file.c create mode 100644 content/fetchers/fetch_file.h (limited to 'content/fetchers') diff --git a/content/fetchers/fetch_curl.c b/content/fetchers/fetch_curl.c index e53c844f1..343ffd89f 100644 --- a/content/fetchers/fetch_curl.c +++ b/content/fetchers/fetch_curl.c @@ -217,7 +217,10 @@ void fetch_curl_register(void) data = curl_version_info(CURLVERSION_NOW); - for (i = 0; data->protocols[i]; i++) + for (i = 0; data->protocols[i]; i++) { + if (strcmp(data->protocols[i], "file") == 0) + continue; /* do not use curl for file: */ + if (!fetch_add_fetcher(data->protocols[i], fetch_curl_initialise, fetch_curl_setup, @@ -229,6 +232,7 @@ void fetch_curl_register(void) LOG(("Unable to register cURL fetcher for %s", data->protocols[i])); } + } return; curl_easy_setopt_failed: @@ -1155,64 +1159,6 @@ bool fetch_curl_process_headers(struct curl_fetch_info *f) return true; } - /* find MIME type from filetype for local files */ - if (strncmp(f->url, FILE_SCHEME_PREFIX, FILE_SCHEME_PREFIX_LEN) == 0) { - struct stat s; - char *url_path = url_to_path(f->url); - - LOG(("Obtaining mime type for file %s", url_path)); - - if (url_path != NULL && stat(url_path, &s) == 0) { - /* file: URL and file exists */ - char header[64]; - const char *type; - - /* create etag */ - snprintf(header, sizeof header, - "ETag: \"%10" PRId64 "\"", - (int64_t) s.st_mtime); - /* And send it to the header handler */ - fetch_send_callback(FETCH_HEADER, f->fetch_handle, - header, strlen(header), - FETCH_ERROR_NO_ERROR); - - /* create Content-Type */ - type = fetch_filetype(url_path); - snprintf(header, sizeof header, - "Content-Type: %s", type); - /* Send it to the header handler */ - fetch_send_callback(FETCH_HEADER, f->fetch_handle, - header, strlen(header), - FETCH_ERROR_NO_ERROR); - - /* create Content-Length */ - snprintf(header, sizeof header, - "Content-Length: %" PRId64, - (int64_t) s.st_size); - /* Send it to the header handler */ - fetch_send_callback(FETCH_HEADER, f->fetch_handle, - header, strlen(header), - FETCH_ERROR_NO_ERROR); - - /* don't set last modified time so as to ensure that - * local files are revalidated at all times. */ - - /* Report not modified, if appropriate */ - if (f->last_modified && f->file_etag && - f->last_modified > s.st_mtime && - f->file_etag == s.st_mtime) { - fetch_send_callback(FETCH_NOTMODIFIED, - f->fetch_handle, 0, 0, - FETCH_ERROR_NO_ERROR); - free(url_path); - return true; - } - } - - if (url_path != NULL) - free(url_path); - } - if (f->abort) return true; diff --git a/content/fetchers/fetch_file.c b/content/fetchers/fetch_file.c new file mode 100644 index 000000000..19270c70a --- /dev/null +++ b/content/fetchers/fetch_file.c @@ -0,0 +1,617 @@ +/* + * Copyright 2010 Vincent Sanders + * + * This file is part of NetSurf. + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* file: URL handling. Based on the data fetcher by Rob Kendrick */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils/config.h" +#include "content/dirlist.h" +#include "content/fetch.h" +#include "content/fetchers/fetch_file.h" +#include "content/urldb.h" +#include "desktop/netsurf.h" +#include "desktop/options.h" +#include "utils/log.h" +#include "utils/messages.h" +#include "utils/url.h" +#include "utils/utils.h" +#include "utils/ring.h" + +/* Maximum size of read buffer */ +#define FETCH_FILE_MAX_BUF_SIZE (1024 * 1024) + +/** Context for a fetch */ +struct fetch_file_context { + struct fetch_file_context *r_next, *r_prev; + + struct fetch *fetchh; /**< Handle for this fetch */ + + bool aborted; /**< Flag indicating fetch has been aborted */ + bool locked; /**< Flag indicating entry is already entered */ + + char *url; /**< The full url the fetch refers to */ + char *path; /**< The actual path to be used with open() */ +}; + +static struct fetch_file_context *ring = NULL; + +/** issue fetch callbacks with locking */ +static inline bool fetch_file_send_callback(fetch_msg msg, + struct fetch_file_context *ctx, const void *data, + unsigned long size, fetch_error_code errorcode) +{ + ctx->locked = true; + fetch_send_callback(msg, ctx->fetchh, data, size, errorcode); + ctx->locked = false; + + return ctx->aborted; +} + +static bool fetch_file_send_header(struct fetch_file_context *ctx, const char *fmt, ...) +{ + char header[64]; + va_list ap; + + va_start(ap, fmt); + + vsnprintf(header, sizeof header, fmt, ap); + + va_end(ap); + + fetch_file_send_callback(FETCH_HEADER, ctx, header, strlen(header), FETCH_ERROR_NO_ERROR); + + return ctx->aborted; +} + +static bool fetch_file_send_time(struct fetch_file_context *ctx, const char *fmt, const time_t *val) +{ + char header[64]; + struct tm btm; + + gmtime_r(val, &btm); + + strftime(header, sizeof header, fmt, &btm); + + fetch_file_send_callback(FETCH_HEADER, ctx, header, strlen(header), FETCH_ERROR_NO_ERROR); + + return ctx->aborted; +} + +/** callback to initialise the file fetcher. */ +static bool fetch_file_initialise(const char *scheme) +{ + return true; +} + +/** callback to initialise the file fetcher. */ +static void fetch_file_finalise(const char *scheme) +{ +} + +/** callback to set up a file fetch context. */ +static void * +fetch_file_setup(struct fetch *fetchh, + const char *url, + bool only_2xx, + const char *post_urlenc, + const struct fetch_multipart_data *post_multipart, + const char **headers) +{ + struct fetch_file_context *ctx; + char *path; + url_func_result res; /* result from url routines */ + + ctx = calloc(1, sizeof(*ctx)); + if (ctx == NULL) + return NULL; + + res = url_path(url, &path); + if (res != URL_FUNC_OK) { + free(ctx); + return NULL; + } + + res = url_unescape(path, &ctx->path); + free(path); + if (res != URL_FUNC_OK) { + free(ctx); + return NULL; + } + + ctx->url = strdup(url); + if (ctx->url == NULL) { + free(ctx->path); + free(ctx); + return NULL; + } + + ctx->fetchh = fetchh; + + RING_INSERT(ring, ctx); + + return ctx; +} + +/** callback to free a file fetch */ +static void fetch_file_free(void *ctx) +{ + struct fetch_file_context *c = ctx; + free(c->url); + free(c->path); + RING_REMOVE(ring, c); + free(ctx); +} + +/** callback to start a file fetch */ +static bool fetch_file_start(void *ctx) +{ + return true; +} + +/** callback to abort a file fetch */ +static void fetch_file_abort(void *ctx) +{ + struct fetch_file_context *c = ctx; + + /* To avoid the poll loop having to deal with the fetch context + * disappearing from under it, we simply flag the abort here. + * The poll loop itself will perform the appropriate cleanup. + */ + c->aborted = true; +} + +static void fetch_file_process_error(struct fetch_file_context *ctx, int code) +{ + char buffer[1024]; + const char *title; + char key[8]; + + /* content is going to return error code */ + fetch_set_http_code(ctx->fetchh, code); + + /* content type */ + if (fetch_file_send_header(ctx, "Content-Type: text/html")) + goto fetch_file_process_error_aborted; + + snprintf(key, sizeof key, "HTTP%03d", code); + title = messages_get(key); + + snprintf(buffer, sizeof buffer, "%s

%s

Error %d while fetching file %s

", title, title, code,ctx->url); + + if (fetch_file_send_callback(FETCH_DATA, ctx, buffer, strlen(buffer), FETCH_ERROR_NO_ERROR)) + goto fetch_file_process_error_aborted; + + fetch_file_send_callback(FETCH_FINISHED, ctx, 0, 0, FETCH_ERROR_NO_ERROR); + +fetch_file_process_error_aborted: + return; +} + + +/** Process object as a regular file */ +static void fetch_file_process_plain(struct fetch_file_context *ctx, + int fd, struct stat *fdstat) +{ + char *buf; + size_t buf_size; + + ssize_t tot_read = 0; + ssize_t res; + + /* set buffer size */ + buf_size = fdstat->st_size; + if (buf_size > FETCH_FILE_MAX_BUF_SIZE) + buf_size = FETCH_FILE_MAX_BUF_SIZE; + + /* allocate the buffer storage */ + buf = malloc(buf_size); + if (buf == NULL) { + fetch_file_send_callback(FETCH_ERROR, ctx, + "Unable to allocate memory for file data buffer", + 0, FETCH_ERROR_MEMORY); + return; + } + + /* fetch is going to be successful */ + fetch_set_http_code(ctx->fetchh, 200); + + /* Any callback can result in the fetch being aborted. + * Therefore, we _must_ check for this after _every_ call to + * fetch_file_send_callback(). + */ + + /* content type */ + if (fetch_file_send_header(ctx, "Content-Type: %s", fetch_filetype(ctx->path))) + goto fetch_file_process_aborted; + + /* content length */ + if (fetch_file_send_header(ctx, "Content-Length: %zd", fdstat->st_size)) + goto fetch_file_process_aborted; + + /* Set Last modified header */ + if (fetch_file_send_time(ctx, "Last-Modified: %a, %d %b %Y %H:%M:%S GMT", &fdstat->st_mtime)) + goto fetch_file_process_aborted; + + /* create etag */ + if (fetch_file_send_header(ctx, "ETag: \"%10" PRId64 "\"", (int64_t) fdstat->st_mtime)) + goto fetch_file_process_aborted; + + /* main data loop */ + do { + res = read(fd, buf, buf_size); + if (res == -1) { + fetch_file_send_callback(FETCH_ERROR, ctx, "Error reading file", 0, FETCH_ERROR_PARTIAL_FILE); + goto fetch_file_process_aborted; + } + + if (res == 0) { + fetch_file_send_callback(FETCH_ERROR, ctx, "Unexpected EOF reading file", 0, FETCH_ERROR_PARTIAL_FILE); + goto fetch_file_process_aborted; + } + + tot_read += res; + + if (fetch_file_send_callback(FETCH_DATA, ctx, buf, res, FETCH_ERROR_NO_ERROR)) + break; + + } while (tot_read < fdstat->st_size); + + if (!ctx->aborted) + fetch_file_send_callback(FETCH_FINISHED, ctx, 0, 0, FETCH_ERROR_NO_ERROR); + +fetch_file_process_aborted: + + close(fd); + free(buf); + return; +} + +static char *gen_nice_title(char *path) +{ + char *nice_path, *cnv, *tmp; + char *title; + int title_length; + + /* Convert path for display */ + nice_path = malloc(strlen(path) * SLEN("&") + 1); + if (nice_path == NULL) { + return NULL; + } + + /* Escape special HTML characters */ + for (cnv = nice_path, tmp = path; *tmp != '\0'; tmp++) { + if (*tmp == '<') { + *cnv++ = '&'; + *cnv++ = 'l'; + *cnv++ = 't'; + *cnv++ = ';'; + } else if (*tmp == '>') { + *cnv++ = '&'; + *cnv++ = 'g'; + *cnv++ = 't'; + *cnv++ = ';'; + } else if (*tmp == '&') { + *cnv++ = '&'; + *cnv++ = 'a'; + *cnv++ = 'm'; + *cnv++ = 'p'; + *cnv++ = ';'; + } else { + *cnv++ = *tmp; + } + } + *cnv = '\0'; + + /* Construct a localised title string */ + title_length = (cnv - nice_path) + strlen(messages_get("FileIndex")); + title = malloc(title_length + 1); + + if (title == NULL) { + free(nice_path); + return NULL; + } + + /* Set title to localised "Index of " */ + snprintf(title, title_length, messages_get("FileIndex"), nice_path); + + free(nice_path); + + return title; +} + + +static void fetch_file_process_dir(struct fetch_file_context *ctx, + int fd, struct stat *fdstat) +{ + char buffer[1024]; /* Output buffer */ + bool even = false; /* formatting flag */ + char *title; /* pretty printed title */ + url_func_result res; /* result from url routines */ + char *up; /* url of parent */ + bool compare; /* result of url compare */ + + DIR *scandir; /* handle for enumerating the directory */ + struct dirent* ent; /* leaf directory entry */ + struct stat ent_stat; /* stat result of leaf entry */ + char datebuf[64]; /* buffer for date text */ + char timebuf[64]; /* buffer for time text */ + char urlpath[PATH_MAX]; /* buffer for leaf entry path */ + +#if defined(HAVE_FDOPENDIR) + scandir = fdopendir(fd); +#else + /* this poses the possibility of a race where the directory + * has been removed from the namespace or resources for more + * fd are now unavailable between the previous open() and this + * call. + */ + close(fd); + scandir = opendir(ctx->path); +#endif + + if (scandir == NULL) { + fetch_file_process_error(ctx, 500); + return; + } + + /* fetch is going to be successful */ + fetch_set_http_code(ctx->fetchh, 200); + + /* content type */ + if (fetch_file_send_header(ctx, "Content-Type: text/html")) + goto fetch_file_process_dir_aborted; + + /* directory listing top */ + dirlist_generate_top(buffer, sizeof buffer); + if (fetch_file_send_callback(FETCH_DATA, ctx, buffer, strlen(buffer), FETCH_ERROR_NO_ERROR)) + goto fetch_file_process_dir_aborted; + + /* directory listing title */ + title = gen_nice_title(ctx->path); + dirlist_generate_title(title, buffer, sizeof buffer); + free(title); + if (fetch_file_send_callback(FETCH_DATA, ctx, buffer, strlen(buffer), FETCH_ERROR_NO_ERROR)) + goto fetch_file_process_dir_aborted; + + /* Print parent directory link */ + res = url_parent(ctx->url, &up); + if (res == URL_FUNC_OK) { + res = url_compare(ctx->url, up, false, &compare); + if ((res == URL_FUNC_OK) && !compare) { + dirlist_generate_parent_link(up, buffer, sizeof buffer); + + fetch_file_send_callback(FETCH_DATA, ctx, + buffer, + strlen(buffer), + FETCH_ERROR_NO_ERROR); + + } + free(up); + + if (ctx->aborted) + goto fetch_file_process_dir_aborted; + + } + + /* directory list headings */ + dirlist_generate_headings(buffer, sizeof buffer); + if (fetch_file_send_callback(FETCH_DATA, ctx, buffer, strlen(buffer), FETCH_ERROR_NO_ERROR)) + goto fetch_file_process_dir_aborted; + + while ((ent = readdir(scandir)) != NULL) { + + if (ent->d_name[0] == '.') + continue; + + snprintf(urlpath, + sizeof urlpath, + "%s%s%s", + ctx->path, + (ctx->path[strlen(ctx->path) - 1] == '/')?"":"/" , + ent->d_name); + + if (stat(urlpath, &ent_stat) != 0) { + ent_stat.st_mode = 0; + datebuf[0] = 0; + timebuf[0] = 0; + } else { + /* Get date in output format */ + if (strftime((char *)&datebuf, sizeof datebuf, + "%a %d %b %Y", + localtime(&ent_stat.st_mtime)) == 0) { + strncpy(datebuf, "-", sizeof datebuf); + } + + /* Get time in output format */ + if (strftime((char *)&timebuf, sizeof timebuf, + "%H:%M", + localtime(&ent_stat.st_mtime)) == 0) { + strncpy(timebuf, "-", sizeof timebuf); + } + } + + if (S_ISREG(ent_stat.st_mode)) { + /* regular file */ + dirlist_generate_row(even, + false, + urlpath, + ent->d_name, + fetch_filetype(urlpath), + ent_stat.st_size, + datebuf, timebuf, + buffer, sizeof(buffer)); + } else if (S_ISDIR(ent_stat.st_mode)) { + /* directory */ + dirlist_generate_row(even, + true, + urlpath, + ent->d_name, + messages_get("FileDirectory"), + -1, + datebuf, timebuf, + buffer, sizeof(buffer)); + } else { + /* something else */ + dirlist_generate_row(even, + false, + urlpath, + ent->d_name, + "", + -1, + datebuf, timebuf, + buffer, sizeof(buffer)); + } + + if (fetch_file_send_callback(FETCH_DATA, ctx, + buffer, + strlen(buffer), + FETCH_ERROR_NO_ERROR)) + goto fetch_file_process_dir_aborted; + + even = !even; + } + + /* directory listing bottom */ + dirlist_generate_bottom(buffer, sizeof buffer); + if (fetch_file_send_callback(FETCH_DATA, ctx, buffer, strlen(buffer), FETCH_ERROR_NO_ERROR)) + goto fetch_file_process_dir_aborted; + + + fetch_file_send_callback(FETCH_FINISHED, ctx, 0, 0, FETCH_ERROR_NO_ERROR); + +fetch_file_process_dir_aborted: + + closedir(scandir); +} + + +/* process a file fetch */ +static void fetch_file_process(struct fetch_file_context *ctx) +{ + int fd; /**< The file descriptor of the object */ + struct stat fdstat; /**< The objects stat */ + + fd = open(ctx->path, O_RDONLY); + if (fd < 0) { + /* process errors as appropriate */ + switch (errno) { + case EACCES: + fetch_file_process_error(ctx, 403); + break; + + case ENOENT: + fetch_file_process_error(ctx, 404); + break; + + default: + fetch_file_process_error(ctx, 500); + break; + } + return; + } + + if (fstat(fd, &fdstat) != 0) { + /* process errors as appropriate */ + close(fd); + fetch_file_process_error(ctx, 500); + return; + } + + if (S_ISDIR(fdstat.st_mode)) { + /* directory listing */ + fetch_file_process_dir(ctx, fd, &fdstat); + return; + } else if (S_ISREG(fdstat.st_mode)) { + /* regular file */ + fetch_file_process_plain(ctx, fd, &fdstat); + return; + } else { + /* unhandled type of file */ + close(fd); + fetch_file_process_error(ctx, 501); + } + + return; +} + +/** callback to poll for additional file fetch contents */ +static void fetch_file_poll(const char *scheme) +{ + struct fetch_file_context *c, *next; + + if (ring == NULL) return; + + /* Iterate over ring, processing each pending fetch */ + c = ring; + do { + /* Take a copy of the next pointer as we may destroy + * the ring item we're currently processing */ + next = c->r_next; + + /* Ignore fetches that have been flagged as locked. + * This allows safe re-entrant calls to this function. + * Re-entrancy can occur if, as a result of a callback, + * the interested party causes fetch_poll() to be called + * again. + */ + if (c->locked == true) { + continue; + } + + /* Only process non-aborted fetches */ + if (!c->aborted) { + /* file fetches can be processed in one go */ + fetch_file_process(c); + } + + + fetch_remove_from_queues(c->fetchh); + fetch_free(c->fetchh); + + /* Advance to next ring entry, exiting if we've reached + * the start of the ring or the ring has become empty + */ + } while ( (c = next) != ring && ring != NULL); +} + +void fetch_file_register(void) +{ + fetch_add_fetcher("file", + fetch_file_initialise, + fetch_file_setup, + fetch_file_start, + fetch_file_abort, + fetch_file_free, + fetch_file_poll, + fetch_file_finalise); +} diff --git a/content/fetchers/fetch_file.h b/content/fetchers/fetch_file.h new file mode 100644 index 000000000..d1621b9ba --- /dev/null +++ b/content/fetchers/fetch_file.h @@ -0,0 +1,28 @@ +/* + * Copyright 2010 Vincent Sanders + * + * This file is part of NetSurf. + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** \file + * file: URL method handler + */ + +#ifndef NETSURF_CONTENT_FETCHERS_FETCH_FILE_H +#define NETSURF_CONTENT_FETCHERS_FETCH_FILE_H + +void fetch_file_register(void); + +#endif -- cgit v1.2.3