netsurf: branch rjek/bloom updated. release/3.0-78-gc204e5f

NetSurf Browser Project (Commit Mailer) no-reply at netsurf-browser.org
Fri May 17 12:12:55 BST 2013


Gitweb links:

...log http://git.netsurf-browser.org/netsurf.git/shortlog/c204e5ff0cbdb71eb8fe05c31252dc1d0674d300
...commit http://git.netsurf-browser.org/netsurf.git/commit/c204e5ff0cbdb71eb8fe05c31252dc1d0674d300
...tree http://git.netsurf-browser.org/netsurf.git/tree/c204e5ff0cbdb71eb8fe05c31252dc1d0674d300

The branch, rjek/bloom has been updated
       via  c204e5ff0cbdb71eb8fe05c31252dc1d0674d300 (commit)
      from  60e8f11850f338aef0742578f1e97f0db9c0548a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commitdiff http://git.netsurf-browser.org/netsurf.git/commit/?id=c204e5ff0cbdb71eb8fe05c31252dc1d0674d300
commit c204e5ff0cbdb71eb8fe05c31252dc1d0674d300
Author: Rob Kendrick (humdrum) <rob.kendrick at codethink.co.uk>
Commit: Rob Kendrick (humdrum) <rob.kendrick at codethink.co.uk>

    Add explanitory comment and make bloom size be a #define

diff --git a/content/urldb.c b/content/urldb.c
index 92065d0..c52dc5b 100644
--- a/content/urldb.c
+++ b/content/urldb.c
@@ -328,7 +328,16 @@ static int loaded_cookie_file_version;
 #define MIN_URL_FILE_VERSION 106
 #define URL_FILE_VERSION 106
 
+/* Bloom filter used for short-circuting the false case of "is this
+ * URL in the database?".  BLOOM_SIZE controls how large the filter is
+ * in bytes.  Primitive experimentation shows that for a filter of X
+ * bytes filled with X items, searching for X items not in the filter
+ * has a 5% false-positive rate.  We set it to 32kB, which should be
+ * enough for all but the largest databases, while not being shockingly
+ * wasteful on memory.
+ */
 static struct bloom_filter *url_bloom;
+#define BLOOM_SIZE (1024 * 32)
 
 /**
  * Import an URL database from file, replacing any existing database
@@ -352,7 +361,7 @@ void urldb_load(const char *filename)
 	LOG(("Loading URL file %s", filename));
 
         if (url_bloom == NULL)
-                url_bloom = bloom_create(16384);
+                url_bloom = bloom_create(BLOOM_SIZE);
 
 	fp = fopen(filename, "r");
 	if (!fp) {
@@ -795,7 +804,7 @@ bool urldb_add_url(nsurl *url)
 	assert(url);
         
         if (url_bloom == NULL)
-                url_bloom = bloom_create(16384);
+                url_bloom = bloom_create(BLOOM_SIZE);
         
         if (url_bloom != NULL)
                 bloom_insert_str(url_bloom, nsurl_access(url),


-----------------------------------------------------------------------

Summary of changes:
 content/urldb.c |   13 +++++++++++--
 1 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/content/urldb.c b/content/urldb.c
index 92065d0..c52dc5b 100644
--- a/content/urldb.c
+++ b/content/urldb.c
@@ -328,7 +328,16 @@ static int loaded_cookie_file_version;
 #define MIN_URL_FILE_VERSION 106
 #define URL_FILE_VERSION 106
 
+/* Bloom filter used for short-circuting the false case of "is this
+ * URL in the database?".  BLOOM_SIZE controls how large the filter is
+ * in bytes.  Primitive experimentation shows that for a filter of X
+ * bytes filled with X items, searching for X items not in the filter
+ * has a 5% false-positive rate.  We set it to 32kB, which should be
+ * enough for all but the largest databases, while not being shockingly
+ * wasteful on memory.
+ */
 static struct bloom_filter *url_bloom;
+#define BLOOM_SIZE (1024 * 32)
 
 /**
  * Import an URL database from file, replacing any existing database
@@ -352,7 +361,7 @@ void urldb_load(const char *filename)
 	LOG(("Loading URL file %s", filename));
 
         if (url_bloom == NULL)
-                url_bloom = bloom_create(16384);
+                url_bloom = bloom_create(BLOOM_SIZE);
 
 	fp = fopen(filename, "r");
 	if (!fp) {
@@ -795,7 +804,7 @@ bool urldb_add_url(nsurl *url)
 	assert(url);
         
         if (url_bloom == NULL)
-                url_bloom = bloom_create(16384);
+                url_bloom = bloom_create(BLOOM_SIZE);
         
         if (url_bloom != NULL)
                 bloom_insert_str(url_bloom, nsurl_access(url),


-- 
NetSurf Browser



More information about the netsurf-commits mailing list