summaryrefslogtreecommitdiff
path: root/content/fetchers/fetch_curl.c
diff options
context:
space:
mode:
Diffstat (limited to 'content/fetchers/fetch_curl.c')
-rw-r--r--content/fetchers/fetch_curl.c1368
1 files changed, 1368 insertions, 0 deletions
diff --git a/content/fetchers/fetch_curl.c b/content/fetchers/fetch_curl.c
new file mode 100644
index 000000000..4103e18f1
--- /dev/null
+++ b/content/fetchers/fetch_curl.c
@@ -0,0 +1,1368 @@
+/*
+ * This file is part of NetSurf, http://netsurf-browser.org/
+ * Licensed under the GNU General Public License,
+ * http://www.opensource.org/licenses/gpl-license
+ * Copyright 2006 Daniel Silverstone <dsilvers@digital-scurf.org>
+ * Copyright 2007 James Bursa <bursa@users.sourceforge.net>
+ * Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net>
+ */
+
+/** \file
+ * Fetching of data from a URL (implementation).
+ *
+ * This implementation uses libcurl's 'multi' interface.
+ *
+ *
+ * The CURL handles are cached in the cache_ring. There are at most
+ * ::option_max_cached_fetch_handles in this ring.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <string.h>
+#include <strings.h>
+#include <time.h>
+#include <sys/select.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#ifdef riscos
+#include <unixlib/local.h>
+#endif
+#include "utils/config.h"
+#ifdef WITH_SSL
+#include <openssl/ssl.h>
+#endif
+#include "content/fetch.h"
+#include "content/fetchers/fetch_curl.h"
+#include "content/urldb.h"
+#include "desktop/netsurf.h"
+#include "desktop/options.h"
+#include "render/form.h"
+#undef NDEBUG
+#include "utils/log.h"
+#include "utils/messages.h"
+#include "utils/url.h"
+#include "utils/utils.h"
+#include "utils/ring.h"
+#include "utils/useragent.h"
+
+#ifdef WITH_SSL
+/** SSL certificate info */
+struct cert_info {
+ X509 *cert; /**< Pointer to certificate */
+ long err; /**< OpenSSL error code */
+};
+#endif
+
+/** Information for a single fetch. */
+struct curl_fetch_info {
+ struct fetch *fetch_handle; /**< The fetch handle we're parented by. */
+ CURL * curl_handle; /**< cURL handle if being fetched, or 0. */
+ bool had_headers; /**< Headers have been processed. */
+ bool abort; /**< Abort requested. */
+ bool stopped; /**< Download stopped on purpose. */
+ bool only_2xx; /**< Only HTTP 2xx responses acceptable. */
+ bool verifiable; /**< Transaction is verifiable */
+ char *url; /**< URL of this fetch. */
+ char *host; /**< The hostname of this fetch. */
+ char *parent_fetch_url; /**< URL of parent fetch (not necessarily
+ * the same as the referer) */
+ struct curl_slist *headers; /**< List of request headers. */
+ char *location; /**< Response Location header, or 0. */
+ unsigned long content_length; /**< Response Content-Length, or 0. */
+ char *cookie_string; /**< Cookie string for this fetch */
+ char *realm; /**< HTTP Auth Realm */
+ char *post_urlenc; /**< Url encoded POST string, or 0. */
+ unsigned long http_code; /**< HTTP result code from cURL. */
+ struct curl_httppost *post_multipart; /**< Multipart post data, or 0. */
+ struct cache_data cachedata; /**< Cache control data */
+ time_t last_modified; /**< If-Modified-Since time */
+ time_t file_etag; /**< ETag for local objects */
+#ifdef WITH_SSL
+#define MAX_CERTS 10
+ struct cert_info cert_data[MAX_CERTS]; /**< HTTPS certificate data */
+#endif
+};
+
+struct cache_handle {
+ CURL *handle; /**< The cached cURL handle */
+ char *host; /**< The host for which this handle is cached */
+
+ struct cache_handle *r_prev; /**< Previous cached handle in ring. */
+ struct cache_handle *r_next; /**< Next cached handle in ring. */
+};
+
+CURLM *fetch_curl_multi; /**< Global cURL multi handle. */
+/** Curl handle with default options set; not used for transfers. */
+static CURL *fetch_blank_curl;
+static struct cache_handle *curl_handle_ring = 0; /**< Ring of cached handles */
+
+static char fetch_error_buffer[CURL_ERROR_SIZE]; /**< Error buffer for cURL. */
+static char fetch_progress_buffer[256]; /**< Progress buffer for cURL */
+static char fetch_proxy_userpwd[100]; /**< Proxy authentication details. */
+
+static CURLcode fetch_curl_set_options(struct curl_fetch_info *f);
+#ifdef WITH_SSL
+static CURLcode fetch_curl_sslctxfun(CURL *curl_handle, SSL_CTX *sslctx, void *p);
+#endif
+static void fetch_curl_free(void *f);
+static void fetch_curl_stop(struct curl_fetch_info *f);
+static void fetch_curl_done(CURL *curl_handle, CURLcode result);
+static int fetch_curl_progress(void *clientp, double dltotal, double dlnow,
+ double ultotal, double ulnow);
+static size_t fetch_curl_data(void *data, size_t size, size_t nmemb,
+ struct curl_fetch_info *f);
+static size_t fetch_curl_header(char *data, size_t size, size_t nmemb,
+ struct curl_fetch_info *f);
+static bool fetch_curl_process_headers(struct curl_fetch_info *f);
+static struct curl_httppost *fetch_curl_post_convert(
+ struct form_successful_control *control);
+#ifdef WITH_SSL
+static int fetch_curl_verify_callback(int preverify_ok, X509_STORE_CTX *x509_ctx);
+static int fetch_curl_cert_verify_callback(X509_STORE_CTX *x509_ctx, void *parm);
+#endif
+
+static void fetch_curl_cache_handle(CURL *handle, char *hostname);
+
+/**
+ * Start fetching data for the given URL.
+ *
+ * The function returns immediately. The fetch may be queued for later
+ * processing.
+ *
+ * A pointer to an opaque struct curl_fetch_info is returned, which can be passed to
+ * fetch_abort() to abort the fetch at any time. Returns 0 if memory is
+ * exhausted (or some other fatal error occurred).
+ *
+ * The caller must supply a callback function which is called when anything
+ * interesting happens. The callback function is first called with msg
+ * FETCH_TYPE, with the Content-Type header in data, then one or more times
+ * with FETCH_DATA with some data for the url, and finally with
+ * FETCH_FINISHED. Alternatively, FETCH_ERROR indicates an error occurred:
+ * data contains an error message. FETCH_REDIRECT may replace the FETCH_TYPE,
+ * FETCH_DATA, FETCH_FINISHED sequence if the server sends a replacement URL.
+ *
+ * Some private data can be passed as the last parameter to fetch_start, and
+ * callbacks will contain this.
+ */
+
+static void *
+fetch_curl_setup(struct fetch *parent_fetch, const char *url,
+ bool only_2xx, const char *post_urlenc,
+ struct form_successful_control *post_multipart,
+ bool verifiable, const char *parent_url, const char **headers)
+{
+ char *host;
+ struct curl_fetch_info *fetch;
+ struct curl_slist *slist;
+ url_func_result res;
+ int i;
+
+ fetch = malloc(sizeof (*fetch));
+ if (!fetch)
+ return 0;
+
+ fetch->fetch_handle = parent_fetch;
+
+ res = url_host(url, &host);
+ if (res != URL_FUNC_OK) {
+ /* we only fail memory exhaustion */
+ if (res == URL_FUNC_NOMEM)
+ goto failed;
+
+ host = strdup("");
+ if (!host)
+ goto failed;
+ }
+
+ LOG(("fetch %p, url '%s'", fetch, url));
+
+ /* construct a new fetch structure */
+ fetch->curl_handle = 0;
+ fetch->had_headers = false;
+ fetch->abort = false;
+ fetch->stopped = false;
+ fetch->only_2xx = only_2xx;
+ fetch->verifiable = verifiable;
+ fetch->url = strdup(url);
+ fetch->parent_fetch_url = parent_url ? strdup(parent_url) : 0;
+ fetch->headers = 0;
+ fetch->host = host;
+ fetch->location = 0;
+ fetch->content_length = 0;
+ fetch->http_code = 0;
+ fetch->cookie_string = 0;
+ fetch->realm = 0;
+ fetch->post_urlenc = 0;
+ fetch->post_multipart = 0;
+ if (post_urlenc)
+ fetch->post_urlenc = strdup(post_urlenc);
+ else if (post_multipart)
+ fetch->post_multipart = fetch_curl_post_convert(post_multipart);
+ fetch->cachedata.req_time = time(0);
+ fetch->cachedata.res_time = 0;
+ fetch->cachedata.date = 0;
+ fetch->cachedata.expires = 0;
+ fetch->cachedata.age = INVALID_AGE;
+ fetch->cachedata.max_age = INVALID_AGE;
+ fetch->cachedata.no_cache = false;
+ fetch->cachedata.etag = 0;
+ fetch->cachedata.last_modified = 0;
+ fetch->last_modified = 0;
+ fetch->file_etag = 0;
+ fetch->http_code = 0;
+#ifdef WITH_SSL
+ memset(fetch->cert_data, 0, sizeof(fetch->cert_data));
+#endif
+
+ if (!fetch->url ||
+ (parent_url && !fetch->parent_fetch_url) ||
+ (post_urlenc && !fetch->post_urlenc) ||
+ (post_multipart && !fetch->post_multipart))
+ goto failed;
+
+#define APPEND(list, value) \
+ slist = curl_slist_append(list, value); \
+ if (!slist) \
+ goto failed; \
+ list = slist;
+
+ /* remove curl default headers */
+ APPEND(fetch->headers, "Accept:");
+ APPEND(fetch->headers, "Pragma:");
+
+ /* when doing a POST libcurl sends Expect: 100-continue" by default
+ * which fails with lighttpd, so disable it (see bug 1429054) */
+ APPEND(fetch->headers, "Expect:");
+
+ if (option_accept_language) {
+ char s[80];
+ snprintf(s, sizeof s, "Accept-Language: %s, *;q=0.1",
+ option_accept_language);
+ s[sizeof s - 1] = 0;
+ APPEND(fetch->headers, s);
+ }
+
+ if (option_accept_charset) {
+ char s[80];
+ snprintf(s, sizeof s, "Accept-Charset: %s, *;q=0.1",
+ option_accept_charset);
+ s[sizeof s - 1] = 0;
+ APPEND(fetch->headers, s);
+ }
+
+ /* And add any headers specified by the caller */
+ for (i = 0; headers[i]; i++) {
+ if (strncasecmp(headers[i], "If-Modified-Since:", 18) == 0) {
+ char *d = headers[i] + 18;
+ for (; *d && (*d == ' ' || *d == '\t'); d++)
+ /* do nothing */;
+ fetch->last_modified = curl_getdate(d, NULL);
+ }
+ else if (strncasecmp(headers[i], "If-None-Match:", 14) == 0) {
+ char *d = headers[i] + 14;
+ for (; *d && (*d == ' ' || *d == '\t' || *d == '"');
+ d++)
+ /* do nothing */;
+ fetch->file_etag = atoi(d);
+ }
+ APPEND(fetch->headers, headers[i]);
+ }
+
+ return fetch;
+
+failed:
+ free(host);
+ free(fetch->url);
+ free(fetch->parent_fetch_url);
+ free(fetch->post_urlenc);
+ if (fetch->post_multipart)
+ curl_formfree(fetch->post_multipart);
+ curl_slist_free_all(fetch->headers);
+ free(fetch);
+ return 0;
+}
+
+/**
+ * Initiate a fetch from the queue.
+ *
+ * Called with a fetch structure and a CURL handle to be used to fetch the content.
+ *
+ * This will return whether or not the fetch was successfully initiated.
+ */
+static bool fetch_curl_initiate_fetch(struct curl_fetch_info *fetch, CURL *handle)
+{
+ CURLcode code;
+ CURLMcode codem;
+
+ fetch->curl_handle = handle;
+
+ /* Initialise the handle */
+ code = fetch_curl_set_options(fetch);
+ if (code != CURLE_OK) {
+ fetch->curl_handle = 0;
+ return false;
+ }
+
+ /* add to the global curl multi handle */
+ codem = curl_multi_add_handle(fetch_curl_multi, fetch->curl_handle);
+ assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM);
+
+ return true;
+}
+
+/**
+ * Find a CURL handle to use to dispatch a job
+ */
+static CURL *fetch_curl_get_handle(char *host)
+{
+ struct cache_handle *h;
+ CURL *ret;
+ RING_FINDBYHOST(curl_handle_ring, h, host);
+ if (h) {
+ ret = h->handle;
+ free(h->host);
+ RING_REMOVE(curl_handle_ring, h);
+ free(h);
+ } else {
+ ret = curl_easy_duphandle(fetch_blank_curl);
+ }
+ return ret;
+}
+
+/**
+ * Dispatch a single job
+ */
+static bool fetch_curl_start(void *vfetch)
+{
+ struct curl_fetch_info *fetch = (struct curl_fetch_info*)vfetch;
+ return fetch_curl_initiate_fetch(fetch, fetch_curl_get_handle(fetch->host));
+}
+
+/**
+ * Cache a CURL handle for the provided host (if wanted)
+ *
+ */
+static void fetch_curl_cache_handle(CURL *handle, char *host)
+{
+ struct cache_handle *h = 0;
+ int c;
+ RING_FINDBYHOST(curl_handle_ring, h, host);
+ if (h) {
+ /* Already have a handle cached for this hostname */
+ curl_easy_cleanup(handle);
+ return;
+ }
+ /* We do not have a handle cached, first up determine if the cache is full */
+ RING_GETSIZE(struct cache_handle, curl_handle_ring, c);
+ if (c >= option_max_cached_fetch_handles) {
+ /* Cache is full, so, we rotate the ring by one and replace the
+ * oldest handle with this one. We do this without freeing/allocating
+ * memory (except the hostname) and without removing the entry from the
+ * ring and then re-inserting it, in order to be as efficient as we can.
+ */
+ h = curl_handle_ring;
+ curl_handle_ring = h->r_next;
+ curl_easy_cleanup(h->handle);
+ h->handle = handle;
+ free(h->host);
+ h->host = strdup(host);
+ return;
+ }
+ /* The table isn't full yet, so make a shiny new handle to add to the ring */
+ h = (struct cache_handle*)malloc(sizeof(struct cache_handle));
+ h->handle = handle;
+ h->host = strdup(host);
+ RING_INSERT(curl_handle_ring, h);
+}
+
+/**
+ * Set options specific for a fetch.
+ */
+
+static CURLcode
+fetch_curl_set_options(struct curl_fetch_info *f)
+{
+ CURLcode code;
+ const char *auth;
+
+#undef SETOPT
+#define SETOPT(option, value) { \
+ code = curl_easy_setopt(f->curl_handle, option, value); \
+ if (code != CURLE_OK) \
+ return code; \
+ }
+
+ SETOPT(CURLOPT_URL, f->url);
+ SETOPT(CURLOPT_PRIVATE, f);
+ SETOPT(CURLOPT_WRITEDATA, f);
+ SETOPT(CURLOPT_WRITEHEADER, f);
+ SETOPT(CURLOPT_PROGRESSDATA, f);
+ SETOPT(CURLOPT_REFERER, fetch_get_referer_to_send(f->fetch_handle));
+ SETOPT(CURLOPT_HTTPHEADER, f->headers);
+ if (f->post_urlenc) {
+ SETOPT(CURLOPT_HTTPPOST, 0);
+ SETOPT(CURLOPT_HTTPGET, 0L);
+ SETOPT(CURLOPT_POSTFIELDS, f->post_urlenc);
+ } else if (f->post_multipart) {
+ SETOPT(CURLOPT_POSTFIELDS, 0);
+ SETOPT(CURLOPT_HTTPGET, 0L);
+ SETOPT(CURLOPT_HTTPPOST, f->post_multipart);
+ } else {
+ SETOPT(CURLOPT_POSTFIELDS, 0);
+ SETOPT(CURLOPT_HTTPPOST, 0);
+ SETOPT(CURLOPT_HTTPGET, 1L);
+ }
+
+ f->cookie_string = urldb_get_cookie(f->url);
+ if (f->cookie_string) {
+ SETOPT(CURLOPT_COOKIE, f->cookie_string);
+ } else {
+ SETOPT(CURLOPT_COOKIE, 0);
+ }
+
+#ifdef WITH_AUTH
+ if ((auth = urldb_get_auth_details(f->url)) != NULL) {
+ SETOPT(CURLOPT_HTTPAUTH, CURLAUTH_ANY);
+ SETOPT(CURLOPT_USERPWD, auth);
+ } else {
+#endif
+ SETOPT(CURLOPT_USERPWD, 0);
+#ifdef WITH_AUTH
+ }
+#endif
+ if (option_http_proxy && option_http_proxy_host) {
+ SETOPT(CURLOPT_PROXY, option_http_proxy_host);
+ SETOPT(CURLOPT_PROXYPORT, (long) option_http_proxy_port);
+ if (option_http_proxy_auth != OPTION_HTTP_PROXY_AUTH_NONE) {
+ SETOPT(CURLOPT_PROXYAUTH,
+ option_http_proxy_auth ==
+ OPTION_HTTP_PROXY_AUTH_BASIC ?
+ (long) CURLAUTH_BASIC :
+ (long) CURLAUTH_NTLM);
+ snprintf(fetch_proxy_userpwd,
+ sizeof fetch_proxy_userpwd,
+ "%s:%s",
+ option_http_proxy_auth_user,
+ option_http_proxy_auth_pass);
+ SETOPT(CURLOPT_PROXYUSERPWD, fetch_proxy_userpwd);
+ }
+ }
+
+#ifdef WITH_SSL
+ if (urldb_get_cert_permissions(f->url)) {
+ /* Disable certificate verification */
+ SETOPT(CURLOPT_SSL_VERIFYPEER, 0L);
+ SETOPT(CURLOPT_SSL_VERIFYHOST, 0L);
+ SETOPT(CURLOPT_SSL_CTX_FUNCTION, 0);
+ SETOPT(CURLOPT_SSL_CTX_DATA, 0);
+ } else {
+ /* do verification */
+ SETOPT(CURLOPT_SSL_VERIFYPEER, 1L);
+ SETOPT(CURLOPT_SSL_VERIFYHOST, 2L);
+ SETOPT(CURLOPT_SSL_CTX_FUNCTION, fetch_curl_sslctxfun);
+ SETOPT(CURLOPT_SSL_CTX_DATA, f);
+ }
+#endif
+
+ return CURLE_OK;
+}
+
+
+#ifdef WITH_SSL
+/**
+ * cURL SSL setup callback
+ */
+
+static CURLcode
+fetch_curl_sslctxfun(CURL *curl_handle, SSL_CTX *sslctx, void *parm)
+{
+ SSL_CTX_set_verify(sslctx, SSL_VERIFY_PEER, fetch_curl_verify_callback);
+ SSL_CTX_set_cert_verify_callback(sslctx, fetch_curl_cert_verify_callback,
+ parm);
+ return CURLE_OK;
+}
+#endif
+
+
+/**
+ * Abort a fetch.
+ */
+
+static void fetch_curl_abort(void *vf)
+{
+ struct curl_fetch_info *f = (struct curl_fetch_info *)vf;
+ assert(f);
+ LOG(("fetch %p, url '%s'", f, f->url));
+ if (f->curl_handle) {
+ f->abort = true;
+ } else {
+ fetch_can_be_freed(f->fetch_handle);
+ }
+}
+
+
+/**
+ * Clean up the provided fetch object and free it.
+ *
+ * Will prod the queue afterwards to allow pending requests to be initiated.
+ */
+
+static void fetch_curl_stop(struct curl_fetch_info *f)
+{
+ CURLMcode codem;
+
+ assert(f);
+ LOG(("fetch %p, url '%s'", f, f->url));
+
+ if (f->curl_handle) {
+ /* remove from curl multi handle */
+ codem = curl_multi_remove_handle(fetch_curl_multi,
+ f->curl_handle);
+ assert(codem == CURLM_OK);
+ /* Put this curl handle into the cache if wanted. */
+ fetch_curl_cache_handle(f->curl_handle, f->host);
+ f->curl_handle = 0;
+ }
+
+ fetch_can_be_freed(f->fetch_handle);
+}
+
+
+/**
+ * Free a fetch structure and associated resources.
+ */
+
+static void fetch_curl_free(void *vf)
+{
+ struct curl_fetch_info *f = (struct curl_fetch_info *)vf;
+#ifdef WITH_SSL
+ int i;
+#endif
+
+ if (f->curl_handle)
+ curl_easy_cleanup(f->curl_handle);
+ free(f->url);
+ free(f->host);
+ free(f->parent_fetch_url);
+ free(f->location);
+ free(f->cookie_string);
+ free(f->realm);
+ if (f->headers)
+ curl_slist_free_all(f->headers);
+ free(f->post_urlenc);
+ if (f->post_multipart)
+ curl_formfree(f->post_multipart);
+ free(f->cachedata.etag);
+
+#ifdef WITH_SSL
+ for (i = 0; i < MAX_CERTS && f->cert_data[i].cert; i++) {
+ f->cert_data[i].cert->references--;
+ if (f->cert_data[i].cert->references == 0)
+ X509_free(f->cert_data[i].cert);
+ }
+#endif
+
+ free(f);
+}
+
+
+/**
+ * Do some work on current fetches.
+ *
+ * Must be called regularly to make progress on fetches.
+ */
+
+static void fetch_curl_poll(const char *scheme_ignored)
+{
+ int running, queue;
+ CURLMcode codem;
+ CURLMsg *curl_msg;
+
+ /* do any possible work on the current fetches */
+ do {
+ codem = curl_multi_perform(fetch_curl_multi, &running);
+ assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM);
+ } while (codem == CURLM_CALL_MULTI_PERFORM);
+
+ /* process curl results */
+ curl_msg = curl_multi_info_read(fetch_curl_multi, &queue);
+ while (curl_msg) {
+ switch (curl_msg->msg) {
+ case CURLMSG_DONE:
+ fetch_curl_done(curl_msg->easy_handle,
+ curl_msg->data.result);
+ break;
+ default:
+ break;
+ }
+ curl_msg = curl_multi_info_read(fetch_curl_multi, &queue);
+ }
+}
+
+
+/**
+ * Handle a completed fetch (CURLMSG_DONE from curl_multi_info_read()).
+ *
+ * \param curl_handle curl easy handle of fetch
+ */
+
+static void fetch_curl_done(CURL *curl_handle, CURLcode result)
+{
+ bool finished = false;
+ bool error = false;
+#ifdef WITH_SSL
+ bool cert = false;
+#endif
+ bool abort;
+ struct curl_fetch_info *f;
+ CURLcode code;
+ struct cache_data cachedata;
+#ifdef WITH_SSL
+ struct cert_info certs[MAX_CERTS];
+ memset(certs, 0, sizeof(certs));
+#endif
+
+ /* find the structure associated with this fetch */
+ code = curl_easy_getinfo(curl_handle, CURLINFO_PRIVATE, &f);
+ assert(code == CURLE_OK);
+
+ abort = f->abort;
+
+ if (!abort && result == CURLE_OK) {
+ /* fetch completed normally */
+ if (f->stopped ||
+ (!f->had_headers &&
+ fetch_curl_process_headers(f)))
+ ; /* redirect with no body or similar */
+ else
+ finished = true;
+ } else if (result == CURLE_PARTIAL_FILE) {
+ /* CURLE_PARTIAL_FILE occurs if the received body of a
+ * response is smaller than that specified in the
+ * Content-Length header. */
+ if (!f->had_headers && fetch_curl_process_headers(f))
+ ; /* redirect with partial body, or similar */
+ else
+ error = true;
+ } else if (result == CURLE_WRITE_ERROR && f->stopped)
+ /* CURLE_WRITE_ERROR occurs when fetch_curl_data
+ * returns 0, which we use to abort intentionally */
+ ;
+#ifdef WITH_SSL
+ else if (result == CURLE_SSL_PEER_CERTIFICATE ||
+ result == CURLE_SSL_CACERT) {
+ memcpy(certs, f->cert_data, sizeof(certs));
+ memset(f->cert_data, 0, sizeof(f->cert_data));
+ cert = true;
+ }
+#endif
+ else
+ error = true;
+
+ /* If finished, acquire cache info to pass to callback */
+ if (finished) {
+ memcpy(&cachedata, &f->cachedata, sizeof(struct cache_data));
+ f->cachedata.etag = 0;
+ }
+
+ /* postponed until after stop so that queue fetches are started */
+ if (abort)
+ ; /* fetch was aborted: no callback */
+ else if (finished) {
+ fetch_send_callback(FETCH_FINISHED, f->fetch_handle, &cachedata, 0);
+ free(cachedata.etag);
+ }
+#ifdef WITH_SSL
+ else if (cert) {
+ int i;
+ BIO *mem;
+ BUF_MEM *buf;
+ struct ssl_cert_info ssl_certs[MAX_CERTS];
+
+ for (i = 0; i < MAX_CERTS && certs[i].cert; i++) {
+ ssl_certs[i].version =
+ X509_get_version(certs[i].cert);
+
+ mem = BIO_new(BIO_s_mem());
+ ASN1_TIME_print(mem,
+ X509_get_notBefore(certs[i].cert));
+ BIO_get_mem_ptr(mem, &buf);
+ (void) BIO_set_close(mem, BIO_NOCLOSE);
+ BIO_free(mem);
+ snprintf(ssl_certs[i].not_before,
+ min(sizeof ssl_certs[i].not_before,
+ (unsigned) buf->length + 1),
+ "%s", buf->data);
+ BUF_MEM_free(buf);
+
+ mem = BIO_new(BIO_s_mem());
+ ASN1_TIME_print(mem,
+ X509_get_notAfter(certs[i].cert));
+ BIO_get_mem_ptr(mem, &buf);
+ (void) BIO_set_close(mem, BIO_NOCLOSE);
+ BIO_free(mem);
+ snprintf(ssl_certs[i].not_after,
+ min(sizeof ssl_certs[i].not_after,
+ (unsigned) buf->length + 1),
+ "%s", buf->data);
+ BUF_MEM_free(buf);
+
+ ssl_certs[i].sig_type =
+ X509_get_signature_type(certs[i].cert);
+ ssl_certs[i].serial =
+ ASN1_INTEGER_get(
+ X509_get_serialNumber(certs[i].cert));
+ mem = BIO_new(BIO_s_mem());
+ X509_NAME_print_ex(mem,
+ X509_get_issuer_name(certs[i].cert),
+ 0, XN_FLAG_SEP_CPLUS_SPC |
+ XN_FLAG_DN_REV | XN_FLAG_FN_NONE);
+ BIO_get_mem_ptr(mem, &buf);
+ (void) BIO_set_close(mem, BIO_NOCLOSE);
+ BIO_free(mem);
+ snprintf(ssl_certs[i].issuer,
+ min(sizeof ssl_certs[i].issuer,
+ (unsigned) buf->length + 1),
+ "%s", buf->data);
+ BUF_MEM_free(buf);
+
+ mem = BIO_new(BIO_s_mem());
+ X509_NAME_print_ex(mem,
+ X509_get_subject_name(certs[i].cert),
+ 0, XN_FLAG_SEP_CPLUS_SPC |
+ XN_FLAG_DN_REV | XN_FLAG_FN_NONE);
+ BIO_get_mem_ptr(mem, &buf);
+ (void) BIO_set_close(mem, BIO_NOCLOSE);
+ BIO_free(mem);
+ snprintf(ssl_certs[i].subject,
+ min(sizeof ssl_certs[i].subject,
+ (unsigned) buf->length + 1),
+ "%s", buf->data);
+ BUF_MEM_free(buf);
+
+ ssl_certs[i].cert_type =
+ X509_certificate_type(certs[i].cert,
+ X509_get_pubkey(certs[i].cert));
+
+ /* and clean up */
+ certs[i].cert->references--;
+ if (certs[i].cert->references == 0)
+ X509_free(certs[i].cert);
+ }
+
+ fetch_send_callback(FETCH_CERT_ERR, f->fetch_handle, &ssl_certs, i);
+
+ }
+#endif
+ else if (error)
+ fetch_send_callback(FETCH_ERROR, f->fetch_handle, fetch_error_buffer, 0);
+
+ fetch_curl_stop(f);
+}
+
+
+/**
+ * Callback function for fetch progress.
+ */
+
+int fetch_curl_progress(void *clientp, double dltotal, double dlnow,
+ double ultotal, double ulnow)
+{
+ struct curl_fetch_info *f = (struct curl_fetch_info *) clientp;
+ double percent;
+
+ if (f->abort)
+ return 0;
+
+ if (dltotal > 0) {
+ percent = dlnow * 100.0f / dltotal;
+ snprintf(fetch_progress_buffer, 255,
+ messages_get("Progress"),
+ human_friendly_bytesize(dlnow),
+ human_friendly_bytesize(dltotal));
+ fetch_send_callback(FETCH_PROGRESS, f->fetch_handle,
+ fetch_progress_buffer,
+ (unsigned long) percent);
+ } else {
+ snprintf(fetch_progress_buffer, 255,
+ messages_get("ProgressU"),
+ human_friendly_bytesize(dlnow));
+ fetch_send_callback(FETCH_PROGRESS, f->fetch_handle,
+ fetch_progress_buffer, 0);
+ }
+
+ return 0;
+}
+
+
+/**
+ * Callback function for cURL.
+ */
+
+size_t fetch_curl_data(void *data, size_t size, size_t nmemb,
+ struct curl_fetch_info *f)
+{
+ CURLcode code;
+
+ /* ensure we only have to get this information once */
+ if (!f->http_code)
+ {
+ code = curl_easy_getinfo(f->curl_handle, CURLINFO_HTTP_CODE,
+ &f->http_code);
+ fetch_set_http_code(f->fetch_handle, f->http_code);
+ assert(code == CURLE_OK);
+ }
+
+ /* ignore body if this is a 401 reply by skipping it and reset
+ the HTTP response code to enable follow up fetches */
+ if (f->http_code == 401)
+ {
+ f->http_code = 0;
+ return size * nmemb;
+ }
+
+ /*LOG(("fetch %p, size %lu", f, size * nmemb));*/
+
+ if (f->abort || (!f->had_headers && fetch_curl_process_headers(f))) {
+ f->stopped = true;
+ return 0;
+ }
+
+ /* send data to the caller */
+ /*LOG(("FETCH_DATA"));*/
+ fetch_send_callback(FETCH_DATA, f->fetch_handle, data, size * nmemb);
+
+ if (f->abort) {
+ f->stopped = true;
+ return 0;
+ }
+
+ return size * nmemb;
+}
+
+
+/**
+ * Callback function for headers.
+ */
+
+size_t fetch_curl_header(char *data, size_t size, size_t nmemb,
+ struct curl_fetch_info *f)
+{
+ int i;
+ size *= nmemb;
+
+#define SKIP_ST(o) for (i = (o); i < (int) size && (data[i] == ' ' || data[i] == '\t'); i++)
+
+ /* Set fetch response time if not already set */
+ if (f->cachedata.res_time == 0)
+ f->cachedata.res_time = time(0);
+
+ if (12 < size && strncasecmp(data, "Location:", 9) == 0) {
+ /* extract Location header */
+ free(f->location);
+ f->location = malloc(size);
+ if (!f->location) {
+ LOG(("malloc failed"));
+ return size;
+ }
+ SKIP_ST(9);
+ strncpy(f->location, data + i, size - i);
+ f->location[size - i] = '\0';
+ for (i = size - i - 1; i >= 0 &&
+ (f->location[i] == ' ' ||
+ f->location[i] == '\t' ||
+ f->location[i] == '\r' ||
+ f->location[i] == '\n'); i--)
+ f->location[i] = '\0';
+ } else if (15 < size && strncasecmp(data, "Content-Length:", 15) == 0) {
+ /* extract Content-Length header */
+ SKIP_ST(15);
+ if (i < (int)size && '0' <= data[i] && data[i] <= '9')
+ f->content_length = atol(data + i);
+#ifdef WITH_AUTH
+ } else if (17 < size && strncasecmp(data, "WWW-Authenticate:", 17) == 0) {
+ /* extract the first Realm from WWW-Authenticate header */
+ free(f->realm);
+ f->realm = malloc(size);
+ if (!f->realm) {
+ LOG(("malloc failed"));
+ return size;
+ }
+ SKIP_ST(17);
+
+ while (i < (int) size - 5 &&
+ strncasecmp(data + i, "realm", 5))
+ i++;
+ while (i < (int) size - 1 && data[++i] != '"')
+ /* */;
+ i++;
+
+ if (i < (int) size) {
+ strncpy(f->realm, data + i, size - i);
+ f->realm[size - i] = '\0';
+ for (i = size - i - 1; i >= 0 &&
+ (f->realm[i] == ' ' ||
+ f->realm[i] == '"' ||
+ f->realm[i] == '\t' ||
+ f->realm[i] == '\r' ||
+ f->realm[i] == '\n'); --i)
+ f->realm[i] = '\0';
+ }
+#endif
+ } else if (5 < size && strncasecmp(data, "Date:", 5) == 0) {
+ /* extract Date header */
+ SKIP_ST(5);
+ if (i < (int) size)
+ f->cachedata.date = curl_getdate(&data[i], NULL);
+ } else if (4 < size && strncasecmp(data, "Age:", 4) == 0) {
+ /* extract Age header */
+ SKIP_ST(4);
+ if (i < (int) size && '0' <= data[i] && data[i] <= '9')
+ f->cachedata.age = atoi(data + i);
+ } else if (8 < size && strncasecmp(data, "Expires:", 8) == 0) {
+ /* extract Expires header */
+ SKIP_ST(8);
+ if (i < (int) size)
+ f->cachedata.expires = curl_getdate(&data[i], NULL);
+ } else if (14 < size && strncasecmp(data, "Cache-Control:", 14) == 0) {
+ /* extract and parse Cache-Control header */
+ int comma;
+ SKIP_ST(14);
+
+ while (i < (int) size) {
+ for (comma = i; comma < (int) size; comma++)
+ if (data[comma] == ',')
+ break;
+
+ SKIP_ST(i);
+
+ if (8 < comma - i && (strncasecmp(data + i, "no-cache", 8) == 0 || strncasecmp(data + i, "no-store", 8) == 0))
+ /* When we get a disk cache we should
+ * distinguish between these two */
+ f->cachedata.no_cache = true;
+ else if (7 < comma - i && strncasecmp(data + i, "max-age", 7) == 0) {
+ for (; i < comma; i++)
+ if (data[i] == '=')
+ break;
+ SKIP_ST(i+1);
+ if (i < comma)
+ f->cachedata.max_age =
+ atoi(data + i);
+ }
+
+ i = comma + 1;
+ }
+ } else if (5 < size && strncasecmp(data, "ETag:", 5) == 0) {
+ /* extract ETag header */
+ free(f->cachedata.etag);
+ f->cachedata.etag = malloc(size);
+ if (!f->cachedata.etag) {
+ LOG(("malloc failed"));
+ return size;
+ }
+ SKIP_ST(5);
+ strncpy(f->cachedata.etag, data + i, size - i);
+ f->cachedata.etag[size - i] = '\0';
+ for (i = size - i - 1; i >= 0 &&
+ (f->cachedata.etag[i] == ' ' ||
+ f->cachedata.etag[i] == '\t' ||
+ f->cachedata.etag[i] == '\r' ||
+ f->cachedata.etag[i] == '\n'); --i)
+ f->cachedata.etag[i] = '\0';
+ } else if (14 < size && strncasecmp(data, "Last-Modified:", 14) == 0) {
+ /* extract Last-Modified header */
+ SKIP_ST(14);
+ if (i < (int) size) {
+ f->cachedata.last_modified =
+ curl_getdate(&data[i], NULL);
+ }
+ } else if (11 < size && strncasecmp(data, "Set-Cookie:", 11) == 0) {
+ /* extract Set-Cookie header */
+ SKIP_ST(11);
+
+ /* If the fetch is unverifiable and there's no parent fetch
+ * url, err on the side of caution and do not set the
+ cookie */
+
+ if (f->verifiable || f->parent_fetch_url) {
+ /* If the transaction's verifiable, we don't require
+ * that the request uri and the parent domain match,
+ * so don't pass in the parent in this case. */
+ urldb_set_cookie(&data[i], f->url,
+ f->verifiable ? 0
+ : f->parent_fetch_url);
+ }
+ }
+
+ return size;
+#undef SKIP_ST
+}
+
+
+/**
+ * Find the status code and content type and inform the caller.
+ *
+ * Return true if the fetch is being aborted.
+ */
+
+bool fetch_curl_process_headers(struct curl_fetch_info *f)
+{
+ long http_code;
+ const char *type;
+ CURLcode code;
+ struct stat s;
+ char *url_path = 0;
+
+ f->had_headers = true;
+
+ /* Set fetch response time if not already set */
+ if (f->cachedata.res_time == 0)
+ f->cachedata.res_time = time(0);
+
+ if (!f->http_code)
+ {
+ code = curl_easy_getinfo(f->curl_handle, CURLINFO_HTTP_CODE,
+ &f->http_code);
+ fetch_set_http_code(f->fetch_handle, f->http_code);
+ assert(code == CURLE_OK);
+ }
+ http_code = f->http_code;
+ LOG(("HTTP status code %li", http_code));
+
+ if (http_code == 304 && !f->post_urlenc && !f->post_multipart) {
+ /* Not Modified && GET request */
+ fetch_send_callback(FETCH_NOTMODIFIED, f->fetch_handle,
+ (const char *)&f->cachedata, 0);
+ return true;
+ }
+
+ /* handle HTTP redirects (3xx response codes) */
+ if (300 <= http_code && http_code < 400 && f->location != 0) {
+ LOG(("FETCH_REDIRECT, '%s'", f->location));
+ fetch_send_callback(FETCH_REDIRECT, f->fetch_handle, f->location, 0);
+ return true;
+ }
+
+ /* handle HTTP 401 (Authentication errors) */
+#ifdef WITH_AUTH
+ if (http_code == 401) {
+ fetch_send_callback(FETCH_AUTH, f->fetch_handle, f->realm,0);
+ return true;
+ }
+#endif
+
+ /* handle HTTP errors (non 2xx response codes) */
+ if (f->only_2xx && strncmp(f->url, "http", 4) == 0 &&
+ (http_code < 200 || 299 < http_code)) {
+ fetch_send_callback(FETCH_ERROR, f->fetch_handle,
+ messages_get("Not2xx"), 0);
+ return true;
+ }
+
+ /* find MIME type from headers or filetype for local files */
+ code = curl_easy_getinfo(f->curl_handle, CURLINFO_CONTENT_TYPE, &type);
+ assert(code == CURLE_OK);
+
+ if (strncmp(f->url, "file:///", 8) == 0)
+ url_path = curl_unescape(f->url + 7,
+ (int) strlen(f->url) - 7);
+
+ if (url_path && stat(url_path, &s) == 0) {
+ /* file: URL and file exists */
+ /* create etag */
+ free(f->cachedata.etag);
+ f->cachedata.etag = malloc(13);
+ if (f->cachedata.etag)
+ sprintf(f->cachedata.etag,
+ "\"%10d\"", (int)s.st_mtime);
+
+ /* don't set last modified time so as to ensure that local
+ * files are revalidated at all times. */
+
+ /* If performed a conditional request and unmodified ... */
+ if (f->last_modified && f->file_etag &&
+ f->last_modified > s.st_mtime &&
+ f->file_etag == s.st_mtime) {
+ fetch_send_callback(FETCH_NOTMODIFIED, f->fetch_handle,
+ (const char *)&f->cachedata, 0);
+ curl_free(url_path);
+ return true;
+ }
+ }
+
+ if (type == 0) {
+ type = "text/plain";
+ if (url_path) {
+ type = fetch_filetype(url_path);
+ }
+ }
+
+ curl_free(url_path);
+
+ LOG(("FETCH_TYPE, '%s'", type));
+ fetch_send_callback(FETCH_TYPE, f->fetch_handle, type, f->content_length);
+ if (f->abort)
+ return true;
+
+ return false;
+}
+
+
+/**
+ * Convert a list of struct ::form_successful_control to a list of
+ * struct curl_httppost for libcurl.
+ */
+struct curl_httppost *
+fetch_curl_post_convert(struct form_successful_control *control)
+{
+ struct curl_httppost *post = 0, *last = 0;
+ CURLFORMcode code;
+
+ for (; control; control = control->next) {
+ if (control->file) {
+ char *leafname = 0;
+#ifdef riscos
+ char *temp;
+ int leaflen;
+
+ temp = strrchr(control->value, '.');
+ if (!temp)
+ temp = control->value; /* already leafname */
+ else
+ temp += 1;
+
+ leaflen = strlen(temp);
+
+ leafname = malloc(leaflen + 1);
+ if (!leafname) {
+ LOG(("malloc failed"));
+ continue;
+ }
+ memcpy(leafname, temp, leaflen + 1);
+
+ /* and s/\//\./g */
+ for (temp = leafname; *temp; temp++)
+ if (*temp == '/')
+ *temp = '.';
+#else
+ leafname = strrchr(control->value, '/') ;
+ if (!leafname)
+ leafname = control->value;
+ else
+ leafname += 1;
+#endif
+ /* We have to special case filenames of "", so curl
+ * a) actually attempts the fetch and
+ * b) doesn't attempt to open the file ""
+ */
+ if (control->value[0] == '\0') {
+ /* dummy buffer - needs to be static so
+ * pointer's still valid when we go out
+ * of scope (not that libcurl should be
+ * attempting to access it, of course). */
+ static char buf;
+
+ code = curl_formadd(&post, &last,
+ CURLFORM_COPYNAME, control->name,
+ CURLFORM_BUFFER, control->value,
+ /* needed, as basename("") == "." */
+ CURLFORM_FILENAME, "",
+ CURLFORM_BUFFERPTR, &buf,
+ CURLFORM_BUFFERLENGTH, 0,
+ CURLFORM_CONTENTTYPE,
+ "application/octet-stream",
+ CURLFORM_END);
+ if (code != CURL_FORMADD_OK)
+ LOG(("curl_formadd: %d (%s)",
+ code, control->name));
+ } else {
+ char *mimetype = fetch_mimetype(control->value);
+ code = curl_formadd(&post, &last,
+ CURLFORM_COPYNAME, control->name,
+ CURLFORM_FILE, control->value,
+ CURLFORM_FILENAME, leafname,
+ CURLFORM_CONTENTTYPE,
+ (mimetype != 0 ? mimetype : "text/plain"),
+ CURLFORM_END);
+ if (code != CURL_FORMADD_OK)
+ LOG(("curl_formadd: %d (%s=%s)",
+ code, control->name,
+ control->value));
+ free(mimetype);
+ }
+#ifdef riscos
+ free(leafname);
+#endif
+ }
+ else {
+ code = curl_formadd(&post, &last,
+ CURLFORM_COPYNAME, control->name,
+ CURLFORM_COPYCONTENTS, control->value,
+ CURLFORM_END);
+ if (code != CURL_FORMADD_OK)
+ LOG(("curl_formadd: %d (%s=%s)", code,
+ control->name,
+ control->value));
+ }
+ }
+
+ return post;
+}
+
+
+#ifdef WITH_SSL
+/**
+ * OpenSSL Certificate verification callback
+ * Stores certificate details in fetch struct.
+ */
+
+int fetch_curl_verify_callback(int preverify_ok, X509_STORE_CTX *x509_ctx)
+{
+ X509 *cert = X509_STORE_CTX_get_current_cert(x509_ctx);
+ int depth = X509_STORE_CTX_get_error_depth(x509_ctx);
+ int err = X509_STORE_CTX_get_error(x509_ctx);
+ struct curl_fetch_info *f = X509_STORE_CTX_get_app_data(x509_ctx);
+
+ /* save the certificate by incrementing the reference count and
+ * keeping a pointer */
+ if (depth < MAX_CERTS && !f->cert_data[depth].cert) {
+ f->cert_data[depth].cert = cert;
+ f->cert_data[depth].err = err;
+ cert->references++;
+ }
+
+ return preverify_ok;
+}
+
+
+/**
+ * OpenSSL certificate chain verification callback
+ * Verifies certificate chain, setting up context for fetch_curl_verify_callback
+ */
+
+int fetch_curl_cert_verify_callback(X509_STORE_CTX *x509_ctx, void *parm)
+{
+ int ok;
+
+ /* Store fetch struct in context for verify callback */
+ ok = X509_STORE_CTX_set_app_data(x509_ctx, parm);
+
+ /* and verify the certificate chain */
+ if (ok)
+ ok = X509_verify_cert(x509_ctx);
+
+ return ok;
+}
+#endif
+
+static int curl_fetchers_registered = 0;
+/** Initialise a cURL fetcher */
+static bool
+fetch_curl_initialise(const char *scheme)
+{
+ LOG(("Initialise cURL fetcher for %s", scheme));
+ curl_fetchers_registered++;
+ return true; /* Always succeeds */
+}
+
+/** Finalise a cURL fetcher */
+static void
+fetch_curl_finalise(const char *scheme)
+{
+ curl_fetchers_registered--;
+ LOG(("Finalise cURL fetcher %s", scheme));
+ if (curl_fetchers_registered == 0) {
+ /* All the fetchers have been finalised. */
+ LOG(("All cURL fetchers finalised, closing down cURL"));
+ CURLMcode codem;
+
+ curl_easy_cleanup(fetch_blank_curl);
+
+ codem = curl_multi_cleanup(fetch_curl_multi);
+ if (codem != CURLM_OK)
+ LOG(("curl_multi_cleanup failed: ignoring"));
+
+ curl_global_cleanup();
+ }
+}
+
+/**
+ * Initialise the fetcher.
+ *
+ * Must be called once before any other function.
+ */
+
+void register_curl_fetchers(void)
+{
+ CURLcode code;
+ curl_version_info_data *data;
+ int i;
+
+ LOG(("curl_version %s", curl_version()));
+
+ code = curl_global_init(CURL_GLOBAL_ALL);
+ if (code != CURLE_OK)
+ die("Failed to initialise the fetch module "
+ "(curl_global_init failed).");
+
+ fetch_curl_multi = curl_multi_init();
+ if (!fetch_curl_multi)
+ die("Failed to initialise the fetch module "
+ "(curl_multi_init failed).");
+
+ /* Create a curl easy handle with the options that are common to all
+ fetches. */
+ fetch_blank_curl = curl_easy_init();
+ if (!fetch_blank_curl)
+ die("Failed to initialise the fetch module "
+ "(curl_easy_init failed).");
+
+#undef SETOPT
+#define SETOPT(option, value) \
+ code = curl_easy_setopt(fetch_blank_curl, option, value); \
+ if (code != CURLE_OK) \
+ goto curl_easy_setopt_failed;
+
+ if (verbose_log) {
+ SETOPT(CURLOPT_VERBOSE, 1);
+ } else {
+ SETOPT(CURLOPT_VERBOSE, 0);
+ }
+ SETOPT(CURLOPT_ERRORBUFFER, fetch_error_buffer);
+ SETOPT(CURLOPT_WRITEFUNCTION, fetch_curl_data);
+ SETOPT(CURLOPT_HEADERFUNCTION, fetch_curl_header);
+ SETOPT(CURLOPT_PROGRESSFUNCTION, fetch_curl_progress);
+ SETOPT(CURLOPT_NOPROGRESS, 0);
+ SETOPT(CURLOPT_USERAGENT, user_agent_string());
+ SETOPT(CURLOPT_ENCODING, "gzip");
+ SETOPT(CURLOPT_LOW_SPEED_LIMIT, 1L);
+ SETOPT(CURLOPT_LOW_SPEED_TIME, 180L);
+ SETOPT(CURLOPT_NOSIGNAL, 1L);
+ SETOPT(CURLOPT_CONNECTTIMEOUT, 30L);
+
+ if (option_ca_bundle)
+ SETOPT(CURLOPT_CAINFO, option_ca_bundle);
+
+ /* cURL initialised okay, register the fetchers */
+
+ data = curl_version_info(CURLVERSION_NOW);
+
+ for (i = 0; data->protocols[i]; i++)
+ if (!fetch_add_fetcher(data->protocols[i],
+ fetch_curl_initialise,
+ fetch_curl_setup,
+ fetch_curl_start,
+ fetch_curl_abort,
+ fetch_curl_free,
+ fetch_curl_poll,
+ fetch_curl_finalise)) {
+ LOG(("Unable to register cURL fetcher for %s", data->protocols[i]));
+ }
+ return;
+
+curl_easy_setopt_failed:
+ die("Failed to initialise the fetch module "
+ "(curl_easy_setopt failed).");
+}