Author: vince
Date: Mon Aug 30 15:50:49 2010
New Revision: 10727
URL:
http://source.netsurf-browser.org?rev=10727&view=rev
Log:
simple first pass at file fetcher implementation
Added:
branches/vince/netsurf-file-fetcher/content/fetchers/fetch_file.c
branches/vince/netsurf-file-fetcher/content/fetchers/fetch_file.h
Modified:
branches/vince/netsurf-file-fetcher/Makefile.sources
branches/vince/netsurf-file-fetcher/content/fetch.c
branches/vince/netsurf-file-fetcher/content/fetchers/fetch_curl.c
Modified: branches/vince/netsurf-file-fetcher/Makefile.sources
URL:
http://source.netsurf-browser.org/branches/vince/netsurf-file-fetcher/Mak...
==============================================================================
--- branches/vince/netsurf-file-fetcher/Makefile.sources (original)
+++ branches/vince/netsurf-file-fetcher/Makefile.sources Mon Aug 30 15:50:49 2010
@@ -6,7 +6,7 @@
#
S_CONTENT := content.c fetch.c hlcache.c llcache.c urldb.c \
- fetchers/fetch_curl.c fetchers/fetch_data.c
+ fetchers/fetch_curl.c fetchers/fetch_data.c fetchers/fetch_file.c
S_CSS := css.c dump.c internal.c select.c utils.c
S_RENDER := box.c box_construct.c box_normalise.c directory.c favicon.c \
font.c form.c html.c html_interaction.c html_redraw.c \
Modified: branches/vince/netsurf-file-fetcher/content/fetch.c
URL:
http://source.netsurf-browser.org/branches/vince/netsurf-file-fetcher/con...
==============================================================================
--- branches/vince/netsurf-file-fetcher/content/fetch.c (original)
+++ branches/vince/netsurf-file-fetcher/content/fetch.c Mon Aug 30 15:50:49 2010
@@ -38,6 +38,7 @@
#include "content/fetch.h"
#include "content/fetchers/fetch_curl.h"
#include "content/fetchers/fetch_data.h"
+#include "content/fetchers/fetch_file.h"
#include "content/urldb.h"
#include "desktop/netsurf.h"
#include "desktop/options.h"
@@ -106,6 +107,7 @@
{
fetch_curl_register();
fetch_data_register();
+ fetch_file_register();
fetch_active = false;
}
Modified: branches/vince/netsurf-file-fetcher/content/fetchers/fetch_curl.c
URL:
http://source.netsurf-browser.org/branches/vince/netsurf-file-fetcher/con...
==============================================================================
--- branches/vince/netsurf-file-fetcher/content/fetchers/fetch_curl.c (original)
+++ branches/vince/netsurf-file-fetcher/content/fetchers/fetch_curl.c Mon Aug 30 15:50:49
2010
@@ -217,7 +217,10 @@
data = curl_version_info(CURLVERSION_NOW);
- for (i = 0; data->protocols[i]; i++)
+ for (i = 0; data->protocols[i]; i++) {
+ if (strcmp(data->protocols[i], "file") == 0)
+ continue; /* do not use curl for file: */
+
if (!fetch_add_fetcher(data->protocols[i],
fetch_curl_initialise,
fetch_curl_setup,
@@ -229,6 +232,7 @@
LOG(("Unable to register cURL fetcher for %s",
data->protocols[i]));
}
+ }
return;
curl_easy_setopt_failed:
@@ -1153,64 +1157,6 @@
messages_get("Not2xx"), 0,
FETCH_ERROR_HTTP_NOT2);
return true;
- }
-
- /* find MIME type from filetype for local files */
- if (strncmp(f->url, FILE_SCHEME_PREFIX, FILE_SCHEME_PREFIX_LEN) == 0) {
- struct stat s;
- char *url_path = url_to_path(f->url);
-
- LOG(("Obtaining mime type for file %s", url_path));
-
- if (url_path != NULL && stat(url_path, &s) == 0) {
- /* file: URL and file exists */
- char header[64];
- const char *type;
-
- /* create etag */
- snprintf(header, sizeof header,
- "ETag: \"%10" PRId64 "\"",
- (int64_t) s.st_mtime);
- /* And send it to the header handler */
- fetch_send_callback(FETCH_HEADER, f->fetch_handle,
- header, strlen(header),
- FETCH_ERROR_NO_ERROR);
-
- /* create Content-Type */
- type = fetch_filetype(url_path);
- snprintf(header, sizeof header,
- "Content-Type: %s", type);
- /* Send it to the header handler */
- fetch_send_callback(FETCH_HEADER, f->fetch_handle,
- header, strlen(header),
- FETCH_ERROR_NO_ERROR);
-
- /* create Content-Length */
- snprintf(header, sizeof header,
- "Content-Length: %" PRId64,
- (int64_t) s.st_size);
- /* Send it to the header handler */
- fetch_send_callback(FETCH_HEADER, f->fetch_handle,
- header, strlen(header),
- FETCH_ERROR_NO_ERROR);
-
- /* don't set last modified time so as to ensure that
- * local files are revalidated at all times. */
-
- /* Report not modified, if appropriate */
- if (f->last_modified && f->file_etag &&
- f->last_modified > s.st_mtime &&
- f->file_etag == s.st_mtime) {
- fetch_send_callback(FETCH_NOTMODIFIED,
- f->fetch_handle, 0, 0,
- FETCH_ERROR_NO_ERROR);
- free(url_path);
- return true;
- }
- }
-
- if (url_path != NULL)
- free(url_path);
}
if (f->abort)
Added: branches/vince/netsurf-file-fetcher/content/fetchers/fetch_file.c
URL:
http://source.netsurf-browser.org/branches/vince/netsurf-file-fetcher/con...
==============================================================================
--- branches/vince/netsurf-file-fetcher/content/fetchers/fetch_file.c (added)
+++ branches/vince/netsurf-file-fetcher/content/fetchers/fetch_file.c Mon Aug 30 15:50:49
2010
@@ -1,0 +1,510 @@
+/*
+ * Copyright 2010 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of NetSurf.
+ *
+ * NetSurf is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * NetSurf is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <
http://www.gnu.org/licenses/>.
+ */
+
+/* file: URL handling. Based on the data fetcher by Rob Kendrik */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <inttypes.h>
+#include <string.h>
+#include <strings.h>
+#include <time.h>
+#include <stdio.h>
+#include <dirent.h>
+
+#include "utils/config.h"
+#include "content/fetch.h"
+#include "content/fetchers/fetch_file.h"
+#include "content/urldb.h"
+#include "desktop/netsurf.h"
+#include "desktop/options.h"
+#include "utils/log.h"
+#include "utils/messages.h"
+#include "utils/url.h"
+#include "utils/utils.h"
+#include "utils/ring.h"
+#include "utils/base64.h"
+
+/** Context for a fetch */
+struct fetch_file_context {
+ struct fetch_file_context *r_next, *r_prev;
+
+ struct fetch *parent_fetch;
+
+ bool aborted; /**< Flag indicating fetch has been aborted */
+ bool locked; /* Flag indicating entry is already entered */
+
+ char *path;
+ char *mimetype;
+ char *data;
+};
+
+static struct fetch_file_context *ring = NULL;
+
+/** callback to initialise the file fetcher. */
+static bool fetch_file_initialise(const char *scheme)
+{
+ LOG(("fetch_file_initialise called for %s", scheme));
+ return true;
+}
+
+/** callback to initialise the file fetcher. */
+static void fetch_file_finalise(const char *scheme)
+{
+ LOG(("fetch_file_finalise called for %s", scheme));
+}
+
+/** callback to set up a file fetch context. */
+static void *
+fetch_file_setup(struct fetch *parent_fetch,
+ const char *url,
+ bool only_2xx,
+ const char *post_urlenc,
+ const struct fetch_multipart_data *post_multipart,
+ const char **headers)
+{
+ struct fetch_file_context *ctx = calloc(1, sizeof(*ctx));
+
+ if (ctx == NULL)
+ return NULL;
+
+ ctx->parent_fetch = parent_fetch;
+ url_path(url, &ctx->path);
+
+ if (ctx->path == NULL) {
+ free(ctx);
+ return NULL;
+ }
+
+ RING_INSERT(ring, ctx);
+
+ LOG(("created context %p from parent fetch %p, url %s, only_2xx %d, post_urlenc %s,
post_multipart %p, headers %p", ctx,parent_fetch,url,
only_2xx,post_urlenc,post_multipart,headers));
+
+ return ctx;
+}
+
+/** callback to free a file fetch */
+static void fetch_file_free(void *ctx)
+{
+ struct fetch_file_context *c = ctx;
+ LOG(("context %p",ctx));
+
+ free(c->path);
+ free(c->data);
+ free(c->mimetype);
+ RING_REMOVE(ring, c);
+ free(ctx);
+}
+
+/** callback to start a file fetch */
+static bool fetch_file_start(void *ctx)
+{
+ LOG(("context %p",ctx));
+ return true;
+}
+
+/** callback to abort a file fetch */
+static void fetch_file_abort(void *ctx)
+{
+ struct fetch_file_context *c = ctx;
+ LOG(("context %p",ctx));
+
+ /* To avoid the poll loop having to deal with the fetch context
+ * disappearing from under it, we simply flag the abort here.
+ * The poll loop itself will perform the appropriate cleanup.
+ */
+ c->aborted = true;
+}
+
+static void fetch_file_send_callback(fetch_msg msg,
+ struct fetch_file_context *c, const void *data,
+ unsigned long size, fetch_error_code errorcode)
+{
+ c->locked = true;
+ fetch_send_callback(msg, c->parent_fetch, data, size, errorcode);
+ c->locked = false;
+}
+
+/* process a file fetch */
+static void fetch_file_process(struct fetch_file_context *ctx)
+{
+ int filefd;
+ struct stat filestat;
+ char header[64];
+ int res;
+
+ filefd = open(ctx->path, O_RDONLY);
+ if (filefd < 0) {
+ /* find error and set headers */
+ return;
+ }
+
+ if (fstat(filefd, &filestat) != 0) {
+ /* find error and set headers */
+ close(filefd);
+ return;
+ }
+
+ if (S_ISDIR(filestat.st_mode)) {
+ /* directory listing */
+ DIR *scandir;
+ struct dirent* ent;
+
+ /* fetch is going to be successful */
+ fetch_set_http_code(ctx->parent_fetch, 200);
+
+ /* content type */
+ snprintf(header, sizeof header, "Content-Type: text/html");
+ fetch_file_send_callback(FETCH_HEADER, ctx, header, strlen(header),
FETCH_ERROR_NO_ERROR);
+
+ scandir = fdopendir(filefd);
+
+ while ((ent = readdir(scandir)) != NULL) {
+ snprintf(header, sizeof header, "<p>%s</p>", ent->d_name);
+
+ fetch_file_send_callback(FETCH_DATA,
+ ctx, header, strlen(header),
+ FETCH_ERROR_NO_ERROR);
+ }
+
+ closedir(scandir);
+
+ fetch_file_send_callback(FETCH_FINISHED, ctx, 0, 0, FETCH_ERROR_NO_ERROR);
+
+ return;
+ }
+
+ if (!S_ISREG(filestat.st_mode)) {
+ /* not a file respose */
+ close(filefd);
+ return;
+ }
+
+ /* just a plain old file */
+
+ ctx->data = malloc(filestat.st_size);
+ if (ctx->data == NULL) {
+ fetch_file_send_callback(FETCH_ERROR, ctx,
+ "Unable to allocate memory for file data",
+ 0, FETCH_ERROR_MEMORY);
+ return;
+ }
+
+ res = read(filefd, ctx->data, filestat.st_size);
+ if (res != filestat.st_size) {
+ fetch_file_send_callback(FETCH_ERROR, ctx,
+ "Unable to read file",
+ 0, FETCH_ERROR_MEMORY);
+ return;
+ }
+
+ ctx->mimetype = strdup(fetch_filetype(ctx->path));
+
+ LOG(("setting MIME type to %s, length to %zd",
+ ctx->mimetype, filestat.st_size));
+
+ /* fetch is going to be successful */
+ fetch_set_http_code(ctx->parent_fetch, 200);
+
+ /* Any callback can result in the fetch being aborted.
+ * Therefore, we _must_ check for this after _every_ call to
+ * fetch_file_send_callback().
+ */
+
+ /* content type */
+ snprintf(header, sizeof header, "Content-Type: %s", ctx->mimetype);
+ fetch_file_send_callback(FETCH_HEADER, ctx, header, strlen(header),
FETCH_ERROR_NO_ERROR);
+
+ if (ctx->aborted)
+ goto fetch_file_process_aborted;
+
+ /* content length */
+ snprintf(header, sizeof header, "Content-Length: %zd",filestat.st_size);
+ fetch_file_send_callback(FETCH_HEADER, ctx, header, strlen(header),
FETCH_ERROR_NO_ERROR);
+
+ if (ctx->aborted)
+ goto fetch_file_process_aborted;
+
+ /* create etag */
+ snprintf(header, sizeof header, "ETag: \"%10" PRId64 "\"",
(int64_t) filestat.st_mtime);
+ fetch_file_send_callback(FETCH_HEADER, ctx, header, strlen(header),
FETCH_ERROR_NO_ERROR);
+
+ if (ctx->aborted)
+ goto fetch_file_process_aborted;
+
+ fetch_file_send_callback(FETCH_DATA,
+ ctx, ctx->data, filestat.st_size,
+ FETCH_ERROR_NO_ERROR);
+
+ if (ctx->aborted)
+ goto fetch_file_process_aborted;
+
+ fetch_file_send_callback(FETCH_FINISHED, ctx, 0, 0, FETCH_ERROR_NO_ERROR);
+
+fetch_file_process_aborted:
+
+ close(filefd);
+ return;
+
+#ifdef curl_file_mimetype
+ /* find MIME type from filetype for local files */
+ if (strncmp(f->url, FILE_SCHEME_PREFIX, FILE_SCHEME_PREFIX_LEN) == 0) {
+ struct stat s;
+ char *url_path = url_to_path(f->url);
+
+ LOG(("Obtaining mime type for file %s", url_path));
+
+ if (url_path != NULL && stat(url_path, &s) == 0) {
+ /* file: URL and file exists */
+ char header[64];
+ const char *type;
+
+ /* create etag */
+ snprintf(header, sizeof header,
+ "ETag: \"%10" PRId64 "\"",
+ (int64_t) s.st_mtime);
+ /* And send it to the header handler */
+ fetch_send_callback(FETCH_HEADER, f->fetch_handle,
+ header, strlen(header),
+ FETCH_ERROR_NO_ERROR);
+
+ /* create Content-Type */
+ type = fetch_filetype(url_path);
+ snprintf(header, sizeof header,
+ "Content-Type: %s", type);
+ /* Send it to the header handler */
+ fetch_send_callback(FETCH_HEADER, f->fetch_handle,
+ header, strlen(header),
+ FETCH_ERROR_NO_ERROR);
+
+ /* create Content-Length */
+ snprintf(header, sizeof header,
+ "Content-Length: %" PRId64,
+ (int64_t) s.st_size);
+ /* Send it to the header handler */
+ fetch_send_callback(FETCH_HEADER, f->fetch_handle,
+ header, strlen(header),
+ FETCH_ERROR_NO_ERROR);
+
+ /* don't set last modified time so as to ensure that
+ * local files are revalidated at all times. */
+
+ /* Report not modified, if appropriate */
+ if (f->last_modified && f->file_etag &&
+ f->last_modified > s.st_mtime &&
+ f->file_etag == s.st_mtime) {
+ fetch_send_callback(FETCH_NOTMODIFIED,
+ f->fetch_handle, 0, 0,
+ FETCH_ERROR_NO_ERROR);
+ free(url_path);
+ return true;
+ }
+ }
+
+ if (url_path != NULL)
+ free(url_path);
+ }
+#endif
+#ifdef eeek
+ char *params;
+ char *comma;
+ char *unescaped;
+ int templen;
+
+ /* format of a data: URL is:
+ * data:[<mimetype>][;base64],<data>
+ * The mimetype is optional. If it is missing, the , before the
+ * data must still be there.
+ */
+ /* Only processO_RDONLY, non-aborted fetches */
+ if (!c->aborted && fetch_file_process(c) == true) {
+ char header[64];
+
+ fetch_set_http_code(c->parent_fetch, 200);
+ LOG(("setting data: MIME type to %s, length to %zd",
+ c->mimetype, c->datalen));
+ /* Any callback can result in the fetch being aborted.
+ * Therefore, we _must_ check for this after _every_
+ * call to fetch_file_send_callback().
+ */
+ snprintf(header, sizeof header, "Content-Type: %s",
+ c->mimetype);
+ fetch_file_send_callback(FETCH_HEADER, c, header,
+ strlen(header), FETCH_ERROR_NO_ERROR);
+
+ snprintf(header, sizeof header, "Content-Length: %zd",
+ c->datalen);
+ fetch_file_send_callback(FETCH_HEADER, c, header,
+ strlen(header), FETCH_ERROR_NO_ERROR);
+
+ if (!c->aborted) {
+ fetch_file_send_callback(FETCH_DATA,
+ c, c->data, c->datalen,
+ FETCH_ERROR_NO_ERROR);
+ }
+ if (!c->aborted) {
+ fetch_file_send_callback(FETCH_FINISHED,
+ c, 0, 0, FETCH_ERROR_NO_ERROR);
+ }
+ } else {
+ LOG(("Processing of %s failed!", c->path));
+
+ /* Ensure that we're unlocked here. If we aren't,
+ * then fetch_file_process() is broken.
+ */
+ assert(c->locked == false);
+ }
+
+
+ LOG(("*** Processing %s", c->path));
+ if (strlen(c->url) < 6) {
+ /* 6 is the minimum possible length (data:,) */
+ fetch_file_send_callback(FETCH_ERROR, c,
+ "Malformed data: URL", 0, FETCH_ERROR_URL);
+ return false;
+ }
+
+ /* skip the data: part */
+ params = c->url + SLEN("data:");
+
+ /* find the comma */
+ if ( (comma = strchr(params, ',')) == NULL) {
+ fetch_file_send_callback(FETCH_ERROR, c,
+ "Malformed data: URL", 0, FETCH_ERROR_URL);
+ return false;
+ }
+
+ if (params[0] == ',') {
+ /* there is no mimetype here, assume text/plain */
+ c->mimetype = strdup("text/plain;charset=US-ASCII");
+ } else {
+ /* make a copy of everything between data: and the comma */
+ c->mimetype = strndup(params, comma - params);
+ }
+
+ if (c->mimetype == NULL) {
+ fetch_file_send_callback(FETCH_ERROR, c,
+ "Unable to allocate memory for mimetype in data: URL",
+ 0, FETCH_ERROR_MEMORY);
+ return false;
+ }
+
+ if (strcmp(c->mimetype + strlen(c->mimetype) - 7, ";base64") == 0) {
+ c->base64 = true;
+ c->mimetype[strlen(c->mimetype) - 7] = '\0';
+ } else {
+ c->base64 = false;
+ }
+
+ /* we URL unescape the data first, just incase some insane page
+ * decides to nest URL and base64 encoding. Like, say, Acid2.
+ */
+ templen = c->datalen;
+ unescaped = curl_easy_unescape(curl, comma + 1, 0, &templen);
+ c->datalen = templen;
+ if (unescaped == NULL) {
+ fetch_file_send_callback(FETCH_ERROR, c,
+ "Unable to URL decode data: URL", 0,
+ FETCH_ERROR_ENCODING);
+ return false;
+ }
+
+ if (c->base64) {
+ c->data = malloc(c->datalen); /* safe: always gets smaller */
+ if (base64_decode(unescaped, c->datalen, c->data,
+ &(c->datalen)) == false) {
+ fetch_file_send_callback(FETCH_ERROR, c,
+ "Unable to Base64 decode data: URL", 0,
+ FETCH_ERROR_ENCODING);
+ curl_free(unescaped);
+ return false;
+ }
+ } else {
+ c->data = malloc(c->datalen);
+ if (c->data == NULL) {
+ fetch_file_send_callback(FETCH_ERROR, c,
+ "Unable to allocate memory for data: URL", 0,
+ FETCH_ERROR_MEMORY);
+ curl_free(unescaped);
+ return false;
+ }
+ memcpy(c->data, unescaped, c->datalen);
+ }
+
+ curl_free(unescaped);
+#endif
+}
+
+/** callback to poll for additional file fetch contents */
+static void fetch_file_poll(const char *scheme)
+{
+ struct fetch_file_context *c, *next;
+
+ if (ring == NULL) return;
+
+ /* Iterate over ring, processing each pending fetch */
+ c = ring;
+ do {
+ /* Take a copy of the next pointer as we may destroy
+ * the ring item we're currently processing */
+ next = c->r_next;
+
+ /* Ignore fetches that have been flagged as locked.
+ * This allows safe re-entrant calls to this function.
+ * Re-entrancy can occur if, as a result of a callback,
+ * the interested party causes fetch_poll() to be called
+ * again.
+ */
+ if (c->locked == true) {
+ continue;
+ }
+
+ LOG(("polling unlocked context %p",c));
+
+ /* Only process non-aborted fetches */
+ if (!c->aborted) {
+ /* file fetchs can be processed in one go */
+ fetch_file_process(c);
+ }
+
+
+ fetch_remove_from_queues(c->parent_fetch);
+ fetch_free(c->parent_fetch);
+
+ /* Advance to next ring entry, exiting if we've reached
+ * the start of the ring or the ring has become empty
+ */
+ } while ( (c = next) != ring && ring != NULL);
+}
+
+void fetch_file_register(void)
+{
+ fetch_add_fetcher("file",
+ fetch_file_initialise,
+ fetch_file_setup,
+ fetch_file_start,
+ fetch_file_abort,
+ fetch_file_free,
+ fetch_file_poll,
+ fetch_file_finalise);
+}
Added: branches/vince/netsurf-file-fetcher/content/fetchers/fetch_file.h
URL:
http://source.netsurf-browser.org/branches/vince/netsurf-file-fetcher/con...
==============================================================================
--- branches/vince/netsurf-file-fetcher/content/fetchers/fetch_file.h (added)
+++ branches/vince/netsurf-file-fetcher/content/fetchers/fetch_file.h Mon Aug 30 15:50:49
2010
@@ -1,0 +1,28 @@
+/*
+ * Copyright 2010 Vincent Sanders <vince(a)netsurf-browser.org>
+ *
+ * This file is part of NetSurf.
+ *
+ * NetSurf is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * NetSurf is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <
http://www.gnu.org/licenses/>.
+ */
+
+/** \file
+ * file: URL method handler
+ */
+
+#ifndef NETSURF_CONTENT_FETCHERS_FETCH_FILE_H
+#define NETSURF_CONTENT_FETCHERS_FETCH_FILE_H
+
+void fetch_file_register(void);
+
+#endif