netsurf: branch rjek/bloom updated. release/3.0-77-g60e8f11

NetSurf Browser Project (Commit Mailer) no-reply at netsurf-browser.org
Fri May 17 11:40:48 BST 2013


Gitweb links:

...log http://git.netsurf-browser.org/netsurf.git/shortlog/60e8f11850f338aef0742578f1e97f0db9c0548a
...commit http://git.netsurf-browser.org/netsurf.git/commit/60e8f11850f338aef0742578f1e97f0db9c0548a
...tree http://git.netsurf-browser.org/netsurf.git/tree/60e8f11850f338aef0742578f1e97f0db9c0548a

The branch, rjek/bloom has been updated
       via  60e8f11850f338aef0742578f1e97f0db9c0548a (commit)
       via  9f00abec0388ccbce07b67d5e2490e3e6c383456 (commit)
       via  abebc6ae2b3fb5e40b581d9afdc32d954bcb51b9 (commit)
      from  0c45ed935d81d7df958604ae4df34fda7181fca8 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commitdiff http://git.netsurf-browser.org/netsurf.git/commit/?id=60e8f11850f338aef0742578f1e97f0db9c0548a
commit 60e8f11850f338aef0742578f1e97f0db9c0548a
Author: Rob Kendrick (humdrum) <rob.kendrick at codethink.co.uk>
Commit: Rob Kendrick (humdrum) <rob.kendrick at codethink.co.uk>

    urldb maintains a bloom filter of URLs contained within and consults it when searching

diff --git a/content/urldb.c b/content/urldb.c
index e3cc1d7..92065d0 100644
--- a/content/urldb.c
+++ b/content/urldb.c
@@ -107,6 +107,7 @@
 #include "utils/filename.h"
 #include "utils/url.h"
 #include "utils/utils.h"
+#include "utils/bloom.h"
 
 struct cookie_internal_data {
 	char *name;		/**< Cookie name */
@@ -327,6 +328,8 @@ static int loaded_cookie_file_version;
 #define MIN_URL_FILE_VERSION 106
 #define URL_FILE_VERSION 106
 
+static struct bloom_filter *url_bloom;
+
 /**
  * Import an URL database from file, replacing any existing database
  *
@@ -346,7 +349,10 @@ void urldb_load(const char *filename)
 
 	assert(filename);
 
-	LOG(("Loading URL file"));
+	LOG(("Loading URL file %s", filename));
+
+        if (url_bloom == NULL)
+                url_bloom = bloom_create(16384);
 
 	fp = fopen(filename, "r");
 	if (!fp) {
@@ -446,6 +452,11 @@ void urldb_load(const char *filename)
 					(port ? ports : ""),
 					s);
 
+                        if (url_bloom != NULL)
+                                bloom_insert_str(url_bloom,
+                                                 url,
+                                                 strlen(url));
+
 			/* TODO: store URLs in pre-parsed state, and make
 			 *       a nsurl_load to generate the nsurl more
 			 *       swiftly.
@@ -782,6 +793,13 @@ bool urldb_add_url(nsurl *url)
 	unsigned int port_int;
 
 	assert(url);
+        
+        if (url_bloom == NULL)
+                url_bloom = bloom_create(16384);
+        
+        if (url_bloom != NULL)
+                bloom_insert_str(url_bloom, nsurl_access(url),
+                                nsurl_length(url));
 
 	/* Copy and merge path/query strings */
 	if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=
@@ -1857,6 +1875,14 @@ struct path_data *urldb_find_url(nsurl *url)
 	bool match;
 
 	assert(url);
+        
+        if (url_bloom != NULL) {
+                if (bloom_search_str(url_bloom,
+                                     nsurl_access(url),
+                                     nsurl_length(url)) == false) {
+                                        return NULL;
+                }
+        }
 
 	scheme = nsurl_get_component(url, NSURL_SCHEME);
 	if (scheme == NULL)
@@ -3951,6 +3977,10 @@ void urldb_destroy(void)
 		b = a->next;
 		urldb_destroy_host_tree(a);
 	}
+        
+        /* And the bloom filter */
+        if (url_bloom != NULL)
+                bloom_destroy(url_bloom);
 }
 
 /**
diff --git a/utils/Makefile b/utils/Makefile
index ed34e95..071e4fe 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -2,6 +2,6 @@
 
 S_UTILS := base64.c corestrings.c filename.c filepath.c hashtable.c	\
 	libdom.c locale.c log.c messages.c nsurl.c talloc.c url.c	\
-	utf8.c utils.c useragent.c
+	utf8.c utils.c useragent.c bloom.c
 
-S_UTILS := $(addprefix utils/,$(S_UTILS))
\ No newline at end of file
+S_UTILS := $(addprefix utils/,$(S_UTILS))


commitdiff http://git.netsurf-browser.org/netsurf.git/commit/?id=9f00abec0388ccbce07b67d5e2490e3e6c383456
commit 9f00abec0388ccbce07b67d5e2490e3e6c383456
Merge: 0c45ed9 abebc6a
Author: Rob Kendrick (humdrum) <rob.kendrick at codethink.co.uk>
Commit: Rob Kendrick (humdrum) <rob.kendrick at codethink.co.uk>

    Merge branch 'master' into rjek/bloom



-----------------------------------------------------------------------

Summary of changes:
 content/urldb.c |   32 ++++++++++++++++++++++++++-
 css/select.c    |   65 ++++++++++++++++++++++++++++++++++++------------------
 utils/Makefile  |    4 +-
 3 files changed, 76 insertions(+), 25 deletions(-)

diff --git a/content/urldb.c b/content/urldb.c
index e3cc1d7..92065d0 100644
--- a/content/urldb.c
+++ b/content/urldb.c
@@ -107,6 +107,7 @@
 #include "utils/filename.h"
 #include "utils/url.h"
 #include "utils/utils.h"
+#include "utils/bloom.h"
 
 struct cookie_internal_data {
 	char *name;		/**< Cookie name */
@@ -327,6 +328,8 @@ static int loaded_cookie_file_version;
 #define MIN_URL_FILE_VERSION 106
 #define URL_FILE_VERSION 106
 
+static struct bloom_filter *url_bloom;
+
 /**
  * Import an URL database from file, replacing any existing database
  *
@@ -346,7 +349,10 @@ void urldb_load(const char *filename)
 
 	assert(filename);
 
-	LOG(("Loading URL file"));
+	LOG(("Loading URL file %s", filename));
+
+        if (url_bloom == NULL)
+                url_bloom = bloom_create(16384);
 
 	fp = fopen(filename, "r");
 	if (!fp) {
@@ -446,6 +452,11 @@ void urldb_load(const char *filename)
 					(port ? ports : ""),
 					s);
 
+                        if (url_bloom != NULL)
+                                bloom_insert_str(url_bloom,
+                                                 url,
+                                                 strlen(url));
+
 			/* TODO: store URLs in pre-parsed state, and make
 			 *       a nsurl_load to generate the nsurl more
 			 *       swiftly.
@@ -782,6 +793,13 @@ bool urldb_add_url(nsurl *url)
 	unsigned int port_int;
 
 	assert(url);
+        
+        if (url_bloom == NULL)
+                url_bloom = bloom_create(16384);
+        
+        if (url_bloom != NULL)
+                bloom_insert_str(url_bloom, nsurl_access(url),
+                                nsurl_length(url));
 
 	/* Copy and merge path/query strings */
 	if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=
@@ -1857,6 +1875,14 @@ struct path_data *urldb_find_url(nsurl *url)
 	bool match;
 
 	assert(url);
+        
+        if (url_bloom != NULL) {
+                if (bloom_search_str(url_bloom,
+                                     nsurl_access(url),
+                                     nsurl_length(url)) == false) {
+                                        return NULL;
+                }
+        }
 
 	scheme = nsurl_get_component(url, NSURL_SCHEME);
 	if (scheme == NULL)
@@ -3951,6 +3977,10 @@ void urldb_destroy(void)
 		b = a->next;
 		urldb_destroy_host_tree(a);
 	}
+        
+        /* And the bloom filter */
+        if (url_bloom != NULL)
+                bloom_destroy(url_bloom);
 }
 
 /**
diff --git a/css/select.c b/css/select.c
index a98ab06..0d3be38 100644
--- a/css/select.c
+++ b/css/select.c
@@ -1552,38 +1552,59 @@ css_error node_is_visited(void *pw, void *node, bool *match)
 	*match = false;
 
 	/** \todo Implement visted check in a more performant way */
-
 #ifdef SUPPORT_VISITED
+
 	nscss_select_ctx *ctx = pw;
-	xmlNode *n = node;
+	nsurl *url;
+	nserror error;
+	const struct url_data *data;
 
-	if (strcasecmp((const char *) n->name, "a") == 0) {
-		nsurl *url;
-		nserror error;
-		const struct url_data *data;
-		xmlChar *href = xmlGetProp(n, (const xmlChar *) "href");
+	dom_exception exc;
+	dom_node *n = node;
+	dom_string *s = NULL;
 
-		if (href == NULL)
-			return CSS_OK;
+	exc = dom_node_get_node_name(n, &s);
+	if ((exc != DOM_NO_ERR) || (s == NULL)) {
+		return CSS_NOMEM;
+	}
 
-		/* Make href absolute */
-		/* TODO: this duplicates what we do for box->href */
-		error = nsurl_join(ctx->base_url, (const char *)href, &url);
+	if (!dom_string_caseless_lwc_isequal(s, corestring_lwc_a)) {
+		/* Can't be visited; not ancher element */
+		dom_string_unref(s);
+		return CSS_OK;
+	}
 
-		xmlFree(href);
-		if (error != NSERROR_OK) {
-			return CSS_NOMEM;
-		}
+	/* Finished with node name string */
+	dom_string_unref(s);
+	s = NULL;
+
+	exc = dom_element_get_attribute(n, corestring_dom_href, &s);
+	if ((exc != DOM_NO_ERR) || (s == NULL)) {
+		/* Can't be visited; not got a URL */
+		return CSS_OK;
+	}
 
-		data = urldb_get_url_data(nsurl_access(url));
+	/* Make href absolute */
+	/* TODO: this duplicates what we do for box->href
+	 *       should we put the absolute URL on the dom node? */
+	error = nsurl_join(ctx->base_url, dom_string_data(s), &url);
 
-		/* Visited if in the db and has
-		 * non-zero visit count */
-		if (data != NULL && data->visits > 0)
-			*match = true;
+	/* Finished with href string */
+	dom_string_unref(s);
 
-		nsurl_unref(url);
+	if (error != NSERROR_OK) {
+		/* Couldn't make nsurl object */
+		return CSS_NOMEM;
 	}
+
+	data = urldb_get_url_data(url);
+
+	/* Visited if in the db and has
+	 * non-zero visit count */
+	if (data != NULL && data->visits > 0)
+		*match = true;
+
+	nsurl_unref(url);
 #endif
 
 	return CSS_OK;
diff --git a/utils/Makefile b/utils/Makefile
index ed34e95..071e4fe 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -2,6 +2,6 @@
 
 S_UTILS := base64.c corestrings.c filename.c filepath.c hashtable.c	\
 	libdom.c locale.c log.c messages.c nsurl.c talloc.c url.c	\
-	utf8.c utils.c useragent.c
+	utf8.c utils.c useragent.c bloom.c
 
-S_UTILS := $(addprefix utils/,$(S_UTILS))
\ No newline at end of file
+S_UTILS := $(addprefix utils/,$(S_UTILS))


-- 
NetSurf Browser



More information about the netsurf-commits mailing list