Review: branches/vince/netsurf-file-fetcher

Vincent Sanders vince at kyllikki.org
Thu Sep 9 22:08:28 BST 2010


> > +
> > +/* file: URL handling. Based on the data fetcher by Rob Kendrik */
> 
> Kendrick.

Bugger, fixed, sorry Rob

> > +
> > +	ctx->locked = true;
> > +	fetch_send_callback(FETCH_HEADER, ctx->fetchh, header, strlen(header), FETCH_ERROR_NO_ERROR);
> > +	ctx->locked = false;
> 
> Call fetch_file_send_callback, instead?

hmm, ok, another function call but yeah its cleaner, changed

> > +
> > +	LOG(("%s", header));
> 
> Presumably redundant now?

removed

> 
> > +	ctx->locked = true;
> > +	fetch_send_callback(FETCH_HEADER, ctx->fetchh, header, strlen(header), FETCH_ERROR_NO_ERROR);
> > +	ctx->locked = false;
> 
> fetch_file_send_callback?
> 

changed

> > +	return ctx->aborted;
> > +}
> > +
> > +/** callback to initialise the file fetcher. */
> > +static bool fetch_file_initialise(const char *scheme)
> > +{
> > +	LOG(("fetch_file_initialise called for %s", scheme));
> > +	return true;
> > +}
> > +
> > +/** callback to initialise the file fetcher. */
> > +static void fetch_file_finalise(const char *scheme)
> > +{
> > +	LOG(("fetch_file_finalise called for %s", scheme));
> > +}
> 
> This logging is probably unnecessary now.
> 

removed

> > +/** callback to set up a file fetch context. */
> > +static void *
> > +fetch_file_setup(struct fetch *fetchh, 
> > +		 const char *url,
> > +		 bool only_2xx, 
> > +		 const char *post_urlenc,
> > +		 const struct fetch_multipart_data *post_multipart,
> > +		 const char **headers)
> > +{
> > +	struct fetch_file_context *ctx = calloc(1, sizeof(*ctx));
> > +	char *path;
> > +	
> > +	if (ctx == NULL)
> > +		return NULL;
> > +		
> > +	ctx->fetchh = fetchh;
> > +
> > +	ctx->url = strdup(url);
> > +
> > +	url_path(url, &path);
> > +	if (path == NULL) {
> 
> Ensure URL_FUNC_OK here
> 

	res = url_path(url, &path);
	if (res != URL_FUNC_OK) {
		free(ctx);
		return NULL;
	}



> free(ctx->url);

I moved the allocation after these checks

> 
> > +		free(ctx);
> > +		return NULL;
> > +	}
> > +
> > +	url_unescape(path, &ctx->path);
> > +	if (ctx->path == NULL) {
> 
> Ensure URL_FUNC_OK here

	res = url_unescape(path, &ctx->path);
	free(path);
	if (res != URL_FUNC_OK) {
		free(ctx);
		return NULL;
	}

> 
> free(ctx->url);
> 

moved allocation after

> > +		free(ctx);
> > +		return NULL;
> > +	}
> > +
> > +	free(path);
> > +
> > +	RING_INSERT(ring, ctx);
> > +
> > +	LOG(("new context %p for fetch handle %p, url %s, path %s",
> > +	     ctx, fetchh, ctx->url, ctx->path));
> > +	LOG(("only_2xx %d, post_urlenc %s, post_multipart %p, headers %p",  
> > +	     only_2xx, post_urlenc, post_multipart, headers));
> 
> Lose this logging?

gone ;-)

> 	
> > +	return ctx;
> > +}
> > +
> > +/** callback to free a file fetch */
> > +static void fetch_file_free(void *ctx)
> > +{
> > +	struct fetch_file_context *c = ctx;
> > +	LOG(("context %p",ctx));
> 
> Ditto

gonned

> 
> fd?

I took your suggestion and removed fd and fstat from the structure, its now passed

> 
> > +	free(c->url);
> > +	free(c->path);
> > +	RING_REMOVE(ring, c);
> > +	free(ctx);
> > +}
> > +
> > +/** callback to start a file fetch */
> > +static bool fetch_file_start(void *ctx)
> > +{
> > +	LOG(("context %p",ctx));
> 
> Lose logging
> 

removed

> > +	return true;
> > +}
> > +
> > +/** callback to abort a file fetch */
> > +static void fetch_file_abort(void *ctx)
> > +{
> > +	struct fetch_file_context *c = ctx;
> > +	LOG(("context %p",ctx));
> 
> Ditto
> 

goned



> > +
> > +/** Process object as a regular file */
> > +static void fetch_file_process_plain(struct fetch_file_context *ctx)
> > +{
> > +	char *buf;
> > +	size_t buf_size;
> > +
> > +	size_t tot_read = 0;	
> > +	ssize_t res;
> > +
> > +	size_t size;
> > +
> > +	size = ctx->fstat.st_size;
> > +
> > +	/* set buffer size */
> > +	buf_size = size;
> > +	if (buf_size > FETCH_FILE_MAX_BUF_SIZE)
> > +		buf_size = FETCH_FILE_MAX_BUF_SIZE;
> > +
> > +	/* allocate the buffer storage */
> > +	buf = malloc(buf_size);
> > +	if (buf == NULL) {
> > +		fetch_file_send_callback(FETCH_ERROR, ctx,
> > +			"Unable to allocate memory for file data buffer",
> > +			0, FETCH_ERROR_MEMORY);
> > +		return;
> > +	}
> > +	
> > +	/* fetch is going to be successful */
> > +	fetch_set_http_code(ctx->fetchh, 200);
> > +
> > +	/* Any callback can result in the fetch being aborted.
> > +	 * Therefore, we _must_ check for this after _every_ call to
> > +	 * fetch_file_send_callback().
> > +	 */
> > +
> > +	/* content type */
> > +	if (fetch_file_send_header(ctx, "Content-Type: %s", fetch_filetype(ctx->path)))
> > +		goto fetch_file_process_aborted;
> > +
> > +	/* content length */
> > +	if (fetch_file_send_header(ctx, "Content-Length: %zd", size))
> > +		goto fetch_file_process_aborted;
> > +
> > +	/* Set Last modified header */
> > +	if (fetch_file_send_time(ctx, "Last-Modified: %a, %d %b %Y %H:%M:%S GMT", &ctx->fstat.st_mtime))
> > +		goto fetch_file_process_aborted;
> > +
> > +	/* create etag */
> > +	if (fetch_file_send_header(ctx, "ETag: \"%10" PRId64 "\"", (int64_t) ctx->fstat.st_mtime))
> > +		goto fetch_file_process_aborted;
> > +
> > +	/* main data loop */
> > +	do {
> > +		res = read(ctx->fd, buf, buf_size);
> > +		if (res == -1) {
> > +			fetch_file_send_callback(FETCH_ERROR, ctx, "Error reading file", 0, FETCH_ERROR_PARTIAL_FILE);		
> 
> free(buf);

actually these were not closing the fd either! so replaced both
returns with the "goto fetch_file_process_aborted;" method.

> 
> > +			return;
> > +		}
> > +
> > +
> > +		if (res == 0) {
> > +			fetch_file_send_callback(FETCH_ERROR, ctx, "Unexpected EOF reading file", 0, FETCH_ERROR_PARTIAL_FILE);	
> 
> free(buf);

as previous
> 
> > +			return;
> > +		}
> > +
> > +		tot_read += res;
> > +
> > +		if (fetch_file_send_callback(FETCH_DATA, ctx, buf, res, FETCH_ERROR_NO_ERROR))
> > +			break;
> > +
> > +	} while (tot_read < size);
> > +
> > +	if (!ctx->aborted) 
> > +		fetch_file_send_callback(FETCH_FINISHED, ctx, 0, 0, FETCH_ERROR_NO_ERROR);
> > +
> > +fetch_file_process_aborted:
> > +
> > +	close(ctx->fd);
> 
> Where is this opened? Does it also need closing in the error cases,
> above?

its now passed in, opened by the parent and I have altered the error cases

> 
> > +	free(buf);
> > +	return;
> > +}
> > +

this entire function was lifted wholesale from the old directory.c and
I failed to examine it properly, sorry.

> > +static char *gen_nice_title(char *path)
> > +{
> > +	char *nice_path, *cnv, *tmp;
> > +	char *title;
> > +	int title_length;
> > +
> > +	/* Convert path for display */
> > +	nice_path = malloc(strlen(path) * 4 + 1);
> > +	if (!nice_path) {
> 
> nice_path == NULL is preferable

changed

> 
> > +		return NULL;
> > +	}
> > +
> > +	/* Escape special HTML characters */
> > +	for (cnv = nice_path, tmp = path; *tmp != '\0'; tmp++) {
> > +		if (*tmp == '<') {
> > +			*cnv++ = '&';
> > +			*cnv++ = 'l';
> > +			*cnv++ = 't';
> > +			*cnv++ = ';';
> > +		} else if (*tmp == '>') {
> > +			*cnv++ = '&';
> > +			*cnv++ = 'g';
> > +			*cnv++ = 't';
> > +			*cnv++ = ';';
> > +		} else {
> > +			*cnv++ = *tmp;
> > +		}
> 
> Also, '&' -> "&amp;", to be safe.

done

> 
> > +	}
> > +	*cnv = '\0';
> > +
> > +	/* Construct a localised title string */
> > +	title_length = strlen(nice_path) + strlen(messages_get("FileIndex"));
> 
> strlen(nice_path) == cnv - nice_path

done

> 
> > +	title = malloc(title_length);
> 
> Add 1 for the trailing NUL?

done

> 
> > +	if (title == NULL) {
> 
> free(nice_path);

yup


> > +
> > +static void fetch_file_process_error(struct fetch_file_context *ctx, int code)
> > +{
> > +	char buffer[1024];
> > +	const char *title;
> > +
> > +	/* content is going to return error code */
> > +	fetch_set_http_code(ctx->fetchh, code);
> > +
> > +	/* content type */
> > +	if (fetch_file_send_header(ctx, "Content-Type: text/html"))
> > +		goto fetch_file_process_error_aborted;
> > +
> > +	switch (code) {
> > +	case 401:
> > +		title = "Permission Denied";
> > +		break;
> > +
> > +	case 404:
> > +		title = "File not found";
> > +		break;
> > +
> > +	default:
> > +		title = "Internal Error";
> > +	}
> 
> You can simplify this by doing this:
> 
> char key[8];
> 
> snprintf(key, sizeof key, "HTTP%03d", code);
> 
> title = messages_get(key);

sweet! done, thanks

> > +
> > +	/* directory list headings */
> > +	dirlist_generate_headings(buffer, sizeof buffer);
> > +	if (fetch_file_send_callback(FETCH_DATA, ctx, buffer, strlen(buffer), FETCH_ERROR_NO_ERROR))
> > +			goto fetch_file_process_dir_aborted;
> 
> Lose an indent level here.

ta, done

> > +/* process a file fetch */
> > +static void fetch_file_process(struct fetch_file_context *ctx)
> > +{
> > +	ctx->fd = open(ctx->path, O_RDONLY);
> > +	if (ctx->fd < 0) {
> > +		/* process errors as appropriate */
> > +		switch (errno) {
> > +		case EACCES:
> > +			fetch_file_process_error(ctx, 401);
> > +			break;
> 
> 403 is a better response code for this case

done

> 
> > +		case ENOENT:
> > +			fetch_file_process_error(ctx, 404);
> > +			break;
> > +
> > +		default:
> > +			fetch_file_process_error(ctx, 500);
> > +			break;
> > +		}
> > +		return;
> > +	} 
> > +
> > +	if (fstat(ctx->fd, &ctx->fstat) != 0) {
> > +		/* process errors as appropriate */
> > +		close(ctx->fd);
> > +		fetch_file_process_error(ctx, 500);
> > +		return;
> > +	} 
> > +
> > +#ifdef fetch_curl_modified_check
> > +	/* f  was fetch_curl context and I cannot determine where f->last_modified was set */
> > +
> > +	/* Report not modified, if appropriate */
> > +	if (f->last_modified && f->file_etag &&
> > +	    f->last_modified > s.st_mtime &&
> > +	    f->file_etag == s.st_mtime) {
> > +		fetch_send_callback(FETCH_NOTMODIFIED, 
> > +				    f->fetch_handle, 0, 0,
> > +				    FETCH_ERROR_NO_ERROR);
> > +		return true;
> > +	}
> > +#endif	
> 
> Lose this?

gone

> 
> > +	if (S_ISDIR(ctx->fstat.st_mode)) {
> > +		/* directory listing */
> > +		fetch_file_process_dir(ctx);
> > +		return;
> > +	}
> > +
> > +	if (S_ISREG(ctx->fstat.st_mode)) {
> > +		/* not a file respose */
> 
> What?
> 

	if (S_ISREG(fdstat.st_mode)) {
		/* regular file */
		fetch_file_process_plain(ctx, fd, &fdstat);
		return;
	}

	/* unhandled type of file */
	close(fd);
	fetch_file_process_error(ctx, 501);
	return;
}


> > +		fetch_file_process_plain(ctx);
> > +		return;
> > +	}
> > +
> > +	close(ctx->fd);
> > +
> > +	return;
> > +}
> > +
> > +/** callback to poll for additional file fetch contents */
> > +static void fetch_file_poll(const char *scheme)
> > +{
> > +	struct fetch_file_context *c, *next;
> > +	
> > +	if (ring == NULL) return;
> > +	
> > +	/* Iterate over ring, processing each pending fetch */
> > +	c = ring;
> > +	do {
> > +		/* Take a copy of the next pointer as we may destroy
> > +		 * the ring item we're currently processing */
> > +		next = c->r_next;
> > +
> > +		/* Ignore fetches that have been flagged as locked.
> > +		 * This allows safe re-entrant calls to this function.
> > +		 * Re-entrancy can occur if, as a result of a callback,
> > +		 * the interested party causes fetch_poll() to be called 
> > +		 * again.
> > +		 */
> > +		if (c->locked == true) {
> > +			continue;
> > +		}
> > +
> > +		LOG(("polling unlocked context %p",c));
> 
> Lose this logging

gone

> 
> > +		/* Only process non-aborted fetches */
> > +		if (!c->aborted) {
> > +			/* file fetchs can be processed in one go */
> 
> fetches

changed

> >  
> > +/* fdopendir is actually present on most unix systems but unless
> > + * _POSIX_C_SOURCE is set to 2008 it is not declared in the system
> > + * headers. It is unavailable on RISC OS which requires fallback code
> > + */
> > +#if (_POSIX_C_SOURCE - 0) >= 200809L
> > +#define HAVE_FDOPENDIR
> > +#else
> > +#if defined(riscos)
> > +#undef HAVE_FDOPENDIR
> > +#else
> > +#define HAVE_FDOPENDIR
> > +DIR *fdopendir(int fd);
> > +#endif
> 
> We can't provide this ourselves. Therefore, should this non-POSIX 2008
> case simply be #undef HAVE_FDOPENDIR?

difficult to call, glibc has actually had this since forever, our
build system explicitly sets posix 2001 and most modern headers
promptly remove the declaration. If this really is becoming such a
pita I can simply remove its use and we can put up with the race, tbh
I am completely beyond caring now

> 
> > +#endif
> > +
> >  /* For some reason, UnixLib defines this unconditionally. 
> >   * Assume we're using UnixLib if building for RISC OS. */
> >  #if (defined(_GNU_SOURCE) || defined(riscos))
> > Index: utils/url.c
> > ===================================================================
> > --- utils/url.c	(revision 10746)
> > +++ utils/url.c	(working copy)
> > @@ -902,7 +902,28 @@
> >  	return URL_FUNC_FAILED;
> >  }
> >  
> > +/**
> > + * Convert an escaped string to plain.
> > + * \param result unescaped string owned by caller must be freed with free()
> > + * \return  URL_FUNC_OK on success
> > + */
> > +url_func_result url_unescape(const char *str, char **result)
> > +{
> > +	char *curlstr;
> > +	char *retstr;
> >  
> > +	curlstr = curl_unescape(str, 0);
> 
> if (curlstr == NULL)
> 	return URL_FUNC_NOMEM;
> 

done

> > +	retstr = strdup(curlstr);
> > +	curl_free(curlstr);
> > +
> > +	if (retstr == NULL) {
> > +		return URL_FUNC_FAILED;
> 
> URL_FUNC_NOMEM
> 

done

> > +	}
> > +
> > +	*result = retstr;
> > +	return URL_FUNC_OK;
> > +}
> > +
> >  /**
> >   * Escape a string suitable for inclusion in an URL.
> >   *



More information about the netsurf-dev mailing list