http authentication improvement

John Tytgat joty at netsurf-browser.org
Tue Aug 4 03:16:47 BST 2009


In message <1249265808.20182.448.camel at duiker>
          John-Mark Bell <jmb at netsurf-browser.org> wrote:

> On Mon, 2009-08-03 at 00:32 +0100, John Tytgat wrote:
> > Now the questions:
> > 
> > 1. Is this hack acceptable for the time being ?
> > 2. Or should we shoot for, what I think is, the right approach, i.e.
> >    when no prot_space_data ptr can be found in path_date structs, await
> >    a 401 and with the realm given at that point take the right
> >    prot_space_data or offer the uses the possibility to enter a new
> >    username/password for that realm.  But that needs to work for all
> >    content fetched, not only html (what I think is happening).
> >    TBH, this is probably above my head to implement for the time I can
> >    spend on.
> 
> I'd prefer (2), which is why there's been a couple of related
> authentication bugs sat in the tracker since February.

With jmb's help I managed get rid of the hack (so we never speculatively
give authentication data and when 401 arrives with a realm of which we
know the auth data, we do a refetch).

It solves <URL:http://sourceforge.net/tracker/?func=detail&aid=2830829&group_id=51719&atid=464312>.
Following patch should be ready for checkin. Final comments please.

- content/urldb.c(auth_data): Removed;
  (prot_space_data): Added, it lives linked in the leaf host_part
  struct and together with its scheme and port (which defins canonical root
  url) and realm this defines a protection space.
  (path_data): Removed auth_data field and replaced by a prot_space_data
  pointer.
  (host_part::prot_space): Added linked list of protection space data
  structs.
  (urldb_get_auth_details): Given an URL fetch fetches its auth.
  (urldb_set_auth_details): Creates or updates the contents of a
  protection space to which given URL belongs.
  (urldb_destroy_host_tree): Delete protection data space structures
  using urldb_destroy_prot_space.
  (urldb_destroy_prot_space): Added.
- content/urldb.h(urldb_get_auth_details): Added realm parameter.
- content/fetchers/fetch_curl.c(fetch_curl_set_options): Update
  urldb_get_auth_details call (we don't know realm at this point).
- content/fetchcache.c(fetchcache_callback, fetchcache_auth): At FETCH_AUTH,
  use realm to determine if we really don't know auth data and if so,
  refetch content.
- content/content.h(struct content): Add content::tried_with_auth.
- content/content.c(content_create): Initialize content::tried_with_auth.
- riscos/401login.c(ro_gui_401login_open): Show known authentication
  data in dialogue so user can see what was wrong with it and correct it.

-8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<-
Index: content/urldb.c
===================================================================
--- content/urldb.c	(revision 8988)
+++ content/urldb.c	(working copy)
@@ -1,5 +1,6 @@
 /*
  * Copyright 2006 John M Bell <jmb202 at ecs.soton.ac.uk>
+ * Copyright 2009 John Tytgat <joty at netsurf-browser.org>
  *
  * This file is part of NetSurf, http://www.netsurf-browser.org/
  *
@@ -132,10 +133,22 @@
 	struct cookie_internal_data *next;	/**< Next in list */
 };
 
-struct auth_data {
+/* A protection space is defined as a tuple canonical_root_url and realm.
+ * This structure lives as linked list element in a leaf host_part struct
+ * so we need additional scheme and port to have a canonical_root_url.  */
+struct prot_space_data {
+	char *scheme;		/**< URL scheme of canonical hostname of this
+				 * protection space. */
+	unsigned int port;	/**< Port number of canonical hostname of this
+				 * protection space. When 0, it means the
+				 * default port for given scheme, i.e. 80
+				 * (http), 443 (https). */
 	char *realm;		/**< Protection realm */
-	char *auth;		/**< Authentication details in form
+
+	char *auth;		/**< Authentication details for this
+				 * protection space in form
 				 * username:password */
+	struct prot_space_data *next;	/**< Next sibling */
 };
 
 struct cache_internal_data {
@@ -152,7 +165,9 @@
 struct path_data {
 	char *url;		/**< Full URL */
 	char *scheme;		/**< URL scheme for data */
-	unsigned int port;	/**< Port number for data */
+	unsigned int port;	/**< Port number for data. When 0, it means
+				 * the default port for given scheme, i.e.
+				 * 80 (http), 443 (https). */
 	char *segment;		/**< Path segment for this node */
 	unsigned int frag_cnt;	/**< Number of entries in ::fragment */
 	char **fragment;	/**< Array of fragments */
@@ -161,7 +176,11 @@
 	struct bitmap *thumb;	/**< Thumbnail image of resource */
 	struct url_internal_data urld;	/**< URL data for resource */
 	struct cache_internal_data cache;	/**< Cache data for resource */
-	struct auth_data auth;	/**< Authentication data for resource */
+	const struct prot_space_data *prot_space;	/**< Protection space
+				 * to which this resource belongs too. Can be
+				 * NULL when it does not belong to a protection
+				 * space or when it is not known. No
+				 * ownership (is with struct host_part::prot_space). */
 	struct cookie_internal_data *cookies;	/**< Cookies associated with resource */
 	struct cookie_internal_data *cookies_end;	/**< Last cookie in list */
 
@@ -183,6 +202,10 @@
 
 	char *part;		/**< Part of host string */
 
+	struct prot_space_data *prot_space;	/**< Linked list of all known
+				 * proctection spaces known for his host and
+				 * all its schems and ports. */
+
 	struct host_part *next;	/**< Next sibling */
 	struct host_part *prev;	/**< Previous sibling */
 	struct host_part *parent;	/**< Parent host part */
@@ -203,6 +226,7 @@
 static void urldb_destroy_path_tree(struct path_data *root);
 static void urldb_destroy_path_node_content(struct path_data *node);
 static void urldb_destroy_cookie(struct cookie_internal_data *c);
+static void urldb_destroy_prot_space(struct prot_space_data *space);
 static void urldb_destroy_search_tree(struct search_node *root);
 
 /* Saving */
@@ -925,11 +949,13 @@
  * Look up authentication details in database
  *
  * \param url Absolute URL to search for
+ * \param realm When non-NULL, it is realm which can be used to determine
+ * the protection space when that's not been done before for given URL.
  * \return Pointer to authentication details, or NULL if not found
  */
-const char *urldb_get_auth_details(const char *url)
+const char *urldb_get_auth_details(const char *url, const char *realm)
 {
-	struct path_data *p, *q = NULL;
+	struct path_data *p, *p_cur, *p_top;
 
 	assert(url);
 
@@ -940,29 +966,33 @@
 	if (!p)
 		return NULL;
 
-	/* Check for any auth details attached to this node */
-	if (p && p->auth.realm && p->auth.auth)
-		return p->auth.auth;
+	/* Check for any auth details attached to the path_data node or any of
+	 * its parents. */
+	for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) {
+		if (p_cur->prot_space) {
+			return p_cur->prot_space->auth;
+		}
+	}
 
-	/* Now consider ancestors */
-	for (; p; p = p->parent) {
-		/* The parent path entry is stored hung off the
-		 * parent entry with an empty (not NULL) segment string.
-		 * We look for this here.
-		 */
-		for (q = p->children; q; q = q->next) {
-			if (q->segment[0] == '\0')
-				break;
+	/* Only when we have a realm (and canonical root of given URL), we can
+	 * uniquely locate the protection space. */
+	if (realm != NULL) {
+		const struct host_part *h = (const struct host_part *)p_top;
+		const struct prot_space_data *space;
+
+		/* Search for a possible matching protection space. */
+		for (space = h->prot_space; space != NULL;
+				space = space->next) {
+			if (!strcmp(space->realm, realm)
+					&& !strcmp(space->scheme, p->scheme)
+					&& space->port == p->port) {
+				p->prot_space = space;
+				return p->prot_space->auth;
+			}
 		}
-
-		if (q && q->auth.realm && q->auth.auth)
-			break;
 	}
 
-	if (!q)
-		return NULL;
-
-	return q->auth.auth;
+	return NULL;
 }
 
 /**
@@ -975,7 +1005,7 @@
 bool urldb_get_cert_permissions(const char *url)
 {
 	struct path_data *p;
-	struct host_part *h;
+	const struct host_part *h;
 
 	assert(url);
 
@@ -985,8 +1015,9 @@
 
 	for (; p && p->parent; p = p->parent)
 		/* do nothing */;
+	assert(p);
 
-	h = (struct host_part *)p;
+	h = (const struct host_part *)p;
 
 	return h->permit_invalid_certs;
 }
@@ -1001,48 +1032,63 @@
 void urldb_set_auth_details(const char *url, const char *realm,
 		const char *auth)
 {
-	struct path_data *p;
-	char *urlt, *t1, *t2;
+	struct path_data *p, *pi;
+	struct host_part *h;
+	struct prot_space_data *space, *space_alloc;
+	char *realm_alloc, *auth_alloc, *scheme_alloc;
 
 	assert(url && realm && auth);
 
-	urlt = strdup(url);
-	if (!urlt)
-		return;
-
-	/* strip leafname from URL */
-	t1 = strrchr(urlt, '/');
-	if (t1) {
-		*(t1 + 1) = '\0';
-	}
-
 	/* add url, in case it's missing */
-	urldb_add_url(urlt);
+	urldb_add_url(url);
 
-	p = urldb_find_url(urlt);
+	p = urldb_find_url(url);
 
-	free(urlt);
-
 	if (!p)
 		return;
 
-	/** \todo search subtree for same realm/auth details
-	 * and remove them (as the lookup routine searches up the tree) */
+	/* Search for host_part */
+	for (pi = p; pi->parent != NULL; pi = pi->parent)
+		;
+	h = (struct host_part *)pi;
 
-	t1 = strdup(realm);
-	t2 = strdup(auth);
+	/* Search if given URL belongs to a protection space we already know of. */
+	for (space = h->prot_space; space; space = space->next) {
+		if (!strcmp(space->realm, realm)
+				&& !strcmp(space->scheme, p->scheme)
+				&& space->port == p->port)
+			break;
+	}
 
-	if (!t1 || !t2) {
-		free(t1);
-		free(t2);
-		return;
+	if (space != NULL) {
+		/* Overrule existing auth. */
+		free(space->auth);
+		space->auth = strdup(auth);
+	} else {
+		/* Create a new protection space. */
+		space = space_alloc = malloc(sizeof(struct prot_space_data));
+		scheme_alloc = strdup(p->scheme);
+		realm_alloc = strdup(realm);
+		auth_alloc = strdup(auth);
+
+		if (!space_alloc || !scheme_alloc
+				|| !realm_alloc || !auth_alloc) {
+			free(space_alloc);
+			free(scheme_alloc);
+			free(realm_alloc);
+			free(auth_alloc);
+			return;
+		}
+
+		space->scheme = scheme_alloc;
+		space->port = p->port;
+		space->realm = realm_alloc;
+		space->auth = auth_alloc;
+		space->next = h->prot_space;
+		h->prot_space = space;
 	}
 
-	free(p->auth.realm);
-	free(p->auth.auth);
-
-	p->auth.realm = t1;
-	p->auth.auth = t2;
+	p->prot_space = space;
 }
 
 /**
@@ -1067,6 +1113,7 @@
 
 	for (; p && p->parent; p = p->parent)
 		/* do nothing */;
+	assert(p);
 
 	h = (struct host_part *)p;
 
@@ -3878,6 +3925,7 @@
 {
 	struct host_part *a, *b;
 	struct path_data *p, *q;
+	struct prot_space_data *s, *t;
 
 	/* Destroy children */
 	for (a = root->children; a; a = b) {
@@ -3894,6 +3942,12 @@
 	/* Root path */
 	urldb_destroy_path_node_content(&root->paths);
 
+	/* Proctection space data */
+	for (s = root->prot_space; s; s = t) {
+		t = s->next;
+		urldb_destroy_prot_space(s);
+	}
+
 	/* And ourselves */
 	free(root->part);
 	free(root);
@@ -3955,8 +4009,6 @@
 		bitmap_destroy(node->thumb);
 
 	free(node->urld.title);
-	free(node->auth.realm);
-	free(node->auth.auth);
 
 	for (a = node->cookies; a; a = b) {
 		b = a->next;
@@ -3981,6 +4033,21 @@
 }
 
 /**
+ * Destroy protection space data
+ *
+ * \param space Protection space to destroy
+ */
+void urldb_destroy_prot_space(struct prot_space_data *space)
+{
+	free(space->scheme);
+	free(space->realm);
+	free(space->auth);
+
+	free(space);
+}
+
+
+/**
  * Destroy a search tree
  *
  * \param root Root node of tree to destroy
Index: content/urldb.h
===================================================================
--- content/urldb.h	(revision 8988)
+++ content/urldb.h	(working copy)
@@ -85,7 +85,7 @@
 /* Authentication modification / lookup */
 void urldb_set_auth_details(const char *url, const char *realm,
 		const char *auth);
-const char *urldb_get_auth_details(const char *url);
+const char *urldb_get_auth_details(const char *url, const char *realm);
 
 /* SSL certificate permissions */
 void urldb_set_cert_permissions(const char *url, bool permit);
Index: content/fetchers/fetch_curl.c
===================================================================
--- content/fetchers/fetch_curl.c	(revision 8988)
+++ content/fetchers/fetch_curl.c	(working copy)
@@ -560,7 +560,7 @@
 		SETOPT(CURLOPT_COOKIE, NULL);
 	}
 
-	if ((auth = urldb_get_auth_details(f->url)) != NULL) {
+	if ((auth = urldb_get_auth_details(f->url, NULL)) != NULL) {
 		SETOPT(CURLOPT_HTTPAUTH, CURLAUTH_ANY);
 		SETOPT(CURLOPT_USERPWD, auth);
 	} else {
Index: content/fetchcache.c
===================================================================
--- content/fetchcache.c	(revision 8988)
+++ content/fetchcache.c	(working copy)
@@ -1,5 +1,6 @@
 /*
  * Copyright 2005 James Bursa <bursa at users.sourceforge.net>
+ * Copyright 2009 John-Mark Bell <jmb at netsurf-browser.org>
  *
  * This file is part of NetSurf, http://www.netsurf-browser.org/
  *
@@ -37,6 +38,7 @@
 #include "content/content.h"
 #include "content/fetchcache.h"
 #include "content/fetch.h"
+#include "content/urldb.h"
 #include "utils/log.h"
 #include "utils/messages.h"
 #include "utils/talloc.h"
@@ -58,6 +60,7 @@
 static void fetchcache_notmodified(struct content *c, const void *data);
 static void fetchcache_redirect(struct content *c, const void *data,
 		unsigned long size);
+static void fetchcache_auth(struct content *c, const char *realm);
 
 
 /**
@@ -516,14 +519,7 @@
 			break;
 
 		case FETCH_AUTH:
-			/* data -> string containing the Realm */
-			LOG(("FETCH_AUTH, '%s'", (const char *)data));
-			c->fetch = 0;
-			msg_data.auth_realm = data;
-			content_broadcast(c, CONTENT_MSG_AUTH, msg_data);
-			/* set the status to ERROR so that the content is
-			 * destroyed in content_clean() */
-			c->status = CONTENT_STATUS_ERROR;
+			fetchcache_auth(c, data);
 			break;
 
 		case FETCH_CERT_ERR:
@@ -1136,6 +1132,92 @@
 	free(referer);
 }
 
+/**
+ * Authentication callback handler
+ */
+
+void fetchcache_auth(struct content *c, const char *realm)
+{
+	char *referer;
+	const char *ref;
+	const char *auth;
+	struct content *parent;
+	bool parent_was_verifiable;
+	union content_msg_data msg_data;
+	char *headers = NULL;
+
+	/* Preconditions */
+	assert(c && realm);
+	assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN);
+
+	/* Extract fetch details */
+	ref = fetch_get_referer(c->fetch);
+	parent = fetch_get_parent(c->fetch);
+	parent_was_verifiable = fetch_get_verifiable(c->fetch);
+
+	/* Clone referer -- original is destroyed in fetch_abort() */
+	referer = ref ? strdup(ref) : NULL;
+
+	fetch_abort(c->fetch);
+	c->fetch = NULL;
+
+	/* Ensure that referer cloning succeeded 
+	 * _must_ be after content invalidation */
+	if (ref && !referer) {
+		LOG(("Failed cloning referer"));
+
+		c->status = CONTENT_STATUS_ERROR;
+		msg_data.error = messages_get("BadRedirect");
+		content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+
+		return;
+	}
+
+	/* Now, see if we've got some auth details */
+	auth = urldb_get_auth_details(c->url, realm);
+
+	if (auth == NULL || c->tried_with_auth) {
+		/* No authentication details or we tried what we had, so ask
+		 * our client for them. */
+		c->tried_with_auth = false; /* Allow rety. */
+
+		c->status = CONTENT_STATUS_ERROR;
+		msg_data.auth_realm = realm;
+		content_broadcast(c, CONTENT_MSG_AUTH, msg_data);
+
+		return;
+	}
+	/* Flag we're retry fetching with auth data. Will be used to detect
+	 * wrong auth data so that we can ask our client for better auth. */
+	c->tried_with_auth = true;
+
+	/* We have authentication details. Fetch with them. */
+	/** \todo all the useful things like headers, POST. */
+	c->fetch = fetch_start(c->url, referer,
+			fetchcache_callback, c,
+			c->no_error_pages,
+			NULL, NULL, parent_was_verifiable,
+			parent, &headers);
+	if (c->fetch == NULL) {
+		char error_message[500];
+
+		LOG(("warning: fetch_start failed"));
+		snprintf(error_message, sizeof error_message,
+				messages_get("InvalidURL"),
+				c->url);
+		if (c->no_error_pages) {
+			c->status = CONTENT_STATUS_ERROR;
+			msg_data.error = error_message;
+			content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+		} else {
+			fetchcache_error_page(c, error_message);
+		}
+	}
+
+	/* Clean up */
+	free(referer);
+}
+
 #ifdef TEST
 
 #include <unistd.h>
Index: content/content.h
===================================================================
--- content/content.h	(revision 8988)
+++ content/content.h	(working copy)
@@ -259,6 +259,7 @@
 
 	bool no_error_pages;		/**< Used by fetchcache(). */
 	bool download;			/**< Used by fetchcache(). */
+	bool tried_with_auth;		/**< Used by fetchcache(). */
 	unsigned int redirect_count;	/**< Used by fetchcache(). */
 
 	/** Array of first n rendering errors or warnings. */
Index: content/content.c
===================================================================
--- content/content.c	(revision 8988)
+++ content/content.c	(working copy)
@@ -443,6 +443,7 @@
 	c->http_code = 0;
 	c->no_error_pages = false;
 	c->download = false;
+	c->tried_with_auth = false;
 	c->redirect_count = 0;
 	c->error_count = 0;
 	c->cache_data.req_time = 0;
Index: riscos/401login.c
===================================================================
--- riscos/401login.c	(revision 8988)
+++ riscos/401login.c	(working copy)
@@ -98,6 +98,7 @@
 {
 	struct session_401 *session;
 	wimp_w w;
+	const char *auth;
 
 	session = calloc(1, sizeof(struct session_401));
 	if (!session) {
@@ -111,10 +112,28 @@
 		warn_user("NoMemory", 0);
 		return;
 	}
-	session->uname[0] = '\0';
-	session->pwd[0] = '\0';
+	if (realm == NULL)
+		realm = "Secure Area";
+        auth = urldb_get_auth_details(session->url, realm);
+	if (auth == NULL) {
+		session->uname[0] = '\0';
+		session->pwd[0] = '\0';
+	} else {
+		const char *pwd;
+		size_t pwd_len;
+
+		pwd = strchr(auth, ':');
+		assert(pwd && pwd < auth + sizeof(session->uname));
+		memcpy(session->uname, auth, pwd - auth);
+		session->uname[pwd - auth] = '\0';
+		++pwd;
+		pwd_len = strlen(pwd);
+		assert(pwd_len < sizeof(session->pwd));
+		memcpy(session->pwd, pwd, pwd_len);
+		session->pwd[pwd_len] = '\0';
+	}
 	session->host = strdup(host);
-	session->realm = strdup(realm ? realm : "Secure Area");
+	session->realm = strdup(realm);
 	session->bwin = bw;
 	if ((!session->host) || (!session->realm)) {
 		free(session->host);
-8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<-

John.
-- 
John Tytgat
joty at netsurf-browser.org



More information about the netsurf-dev mailing list