r5857 jmb - in /trunk/libcss/test: css21.c lex.c parse.c
by netsurf@semichrome.net
Author: jmb
Date: Sun Nov 30 16:48:26 2008
New Revision: 5857
URL: http://source.netsurf-browser.org?rev=5857&view=rev
Log:
More modifications to allow numerous iterations for profiling.
Overall time breakdown for allzengarden.css is approximately:
lex : 37%
core : 43%
lang : 20%
Modified:
trunk/libcss/test/css21.c
trunk/libcss/test/lex.c
trunk/libcss/test/parse.c
Modified: trunk/libcss/test/css21.c
URL: http://source.netsurf-browser.org/trunk/libcss/test/css21.c?rev=5857&r1=5...
==============================================================================
--- trunk/libcss/test/css21.c (original)
+++ trunk/libcss/test/css21.c Sun Nov 30 16:48:26 2008
@@ -6,9 +6,9 @@
#include "testutils.h"
-#define ITERATIONS (1)
+#define ITERATIONS (10)
#define DUMP_HASH (0)
-#define DUMP_CSS (1)
+#define DUMP_CSS (0)
extern void parserutils_hash_dump(parserutils_hash *hash);
Modified: trunk/libcss/test/lex.c
URL: http://source.netsurf-browser.org/trunk/libcss/test/lex.c?rev=5857&r1=585...
==============================================================================
--- trunk/libcss/test/lex.c (original)
+++ trunk/libcss/test/lex.c Sun Nov 30 16:48:26 2008
@@ -12,6 +12,10 @@
#include "lex/lex.h"
#include "testutils.h"
+
+#define ITERATIONS (10)
+#define DUMP_TOKENS (0)
+
static void *myrealloc(void *data, size_t len, void *pw)
{
@@ -22,7 +26,7 @@
static void printToken(const css_token *token)
{
-#if 0
+#if !DUMP_TOKENS
UNUSED(token);
#else
printf("[%d, %d] : ", token->line, token->col);
@@ -129,30 +133,55 @@
/* Initialise library */
assert(css_initialise(argv[1], myrealloc, NULL) == CSS_OK);
- assert(parserutils_inputstream_create("UTF-8", CSS_CHARSET_DICTATED,
- css_charset_extract,
+ for (int i = 0; i < ITERATIONS; i++) {
+ assert(parserutils_inputstream_create("UTF-8",
+ CSS_CHARSET_DICTATED,css_charset_extract,
(parserutils_alloc) myrealloc, NULL, &stream) ==
PARSERUTILS_OK);
- assert(css_lexer_create(stream, myrealloc, NULL, &lexer) == CSS_OK);
-
- fp = fopen(argv[2], "rb");
- if (fp == NULL) {
- printf("Failed opening %s\n", argv[2]);
- return 1;
- }
-
- fseek(fp, 0, SEEK_END);
- origlen = len = ftell(fp);
- fseek(fp, 0, SEEK_SET);
-
- while (len >= CHUNK_SIZE) {
- fread(buf, 1, CHUNK_SIZE, fp);
-
- assert(parserutils_inputstream_append(stream,
- buf, CHUNK_SIZE) == PARSERUTILS_OK);
-
- len -= CHUNK_SIZE;
+ assert(css_lexer_create(stream, myrealloc, NULL, &lexer) ==
+ CSS_OK);
+
+ fp = fopen(argv[2], "rb");
+ if (fp == NULL) {
+ printf("Failed opening %s\n", argv[2]);
+ return 1;
+ }
+
+ fseek(fp, 0, SEEK_END);
+ origlen = len = ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+
+ while (len >= CHUNK_SIZE) {
+ fread(buf, 1, CHUNK_SIZE, fp);
+
+ assert(parserutils_inputstream_append(stream,
+ buf, CHUNK_SIZE) == PARSERUTILS_OK);
+
+ len -= CHUNK_SIZE;
+
+ while ((error = css_lexer_get_token(lexer, &tok)) ==
+ CSS_OK) {
+ printToken(tok);
+
+ if (tok->type == CSS_TOKEN_EOF)
+ break;
+ }
+ }
+
+ if (len > 0) {
+ fread(buf, 1, len, fp);
+
+ assert(parserutils_inputstream_append(stream,
+ buf, len) == PARSERUTILS_OK);
+
+ len = 0;
+ }
+
+ fclose(fp);
+
+ assert(parserutils_inputstream_append(stream, NULL, 0) ==
+ PARSERUTILS_OK);
while ((error = css_lexer_get_token(lexer, &tok)) == CSS_OK) {
printToken(tok);
@@ -160,32 +189,11 @@
if (tok->type == CSS_TOKEN_EOF)
break;
}
+
+ css_lexer_destroy(lexer);
+
+ parserutils_inputstream_destroy(stream);
}
-
- if (len > 0) {
- fread(buf, 1, len, fp);
-
- assert(parserutils_inputstream_append(stream,
- buf, len) == PARSERUTILS_OK);
-
- len = 0;
- }
-
- fclose(fp);
-
- assert(parserutils_inputstream_append(stream, NULL, 0) ==
- PARSERUTILS_OK);
-
- while ((error = css_lexer_get_token(lexer, &tok)) == CSS_OK) {
- printToken(tok);
-
- if (tok->type == CSS_TOKEN_EOF)
- break;
- }
-
- css_lexer_destroy(lexer);
-
- parserutils_inputstream_destroy(stream);
assert(css_finalise(myrealloc, NULL) == CSS_OK);
Modified: trunk/libcss/test/parse.c
URL: http://source.netsurf-browser.org/trunk/libcss/test/parse.c?rev=5857&r1=5...
==============================================================================
--- trunk/libcss/test/parse.c (original)
+++ trunk/libcss/test/parse.c Sun Nov 30 16:48:26 2008
@@ -11,6 +11,10 @@
#include "testutils.h"
+#define ITERATIONS (10)
+#define DUMP_EVENTS (0)
+
+#if DUMP_EVENTS
static const char *event_names[] = {
"START_STYLESHEET",
"END_STYLESHEET",
@@ -23,6 +27,7 @@
"BLOCK_CONTENT",
"DECLARATION"
};
+#endif
static void *myrealloc(void *data, size_t len, void *pw)
{
@@ -34,7 +39,7 @@
static css_error event_handler(css_parser_event type,
const parserutils_vector *tokens, void *pw)
{
-#if 0
+#if !DUMP_EVENTS
UNUSED(type);
UNUSED(tokens);
UNUSED(pw);
@@ -87,52 +92,54 @@
/* Initialise library */
assert(css_initialise(argv[1], myrealloc, NULL) == CSS_OK);
- assert(parserutils_hash_create(myrealloc, NULL, &dict) ==
- PARSERUTILS_OK);
+ for (int i = 0; i < ITERATIONS; i++) {
+ assert(parserutils_hash_create(myrealloc, NULL, &dict) ==
+ PARSERUTILS_OK);
- assert(css_parser_create("UTF-8", CSS_CHARSET_DICTATED, dict,
- myrealloc, NULL, &parser) == CSS_OK);
+ assert(css_parser_create("UTF-8", CSS_CHARSET_DICTATED, dict,
+ myrealloc, NULL, &parser) == CSS_OK);
- params.event_handler.handler = event_handler;
- params.event_handler.pw = NULL;
- assert(css_parser_setopt(parser, CSS_PARSER_EVENT_HANDLER,
- ¶ms) == CSS_OK);
+ params.event_handler.handler = event_handler;
+ params.event_handler.pw = NULL;
+ assert(css_parser_setopt(parser, CSS_PARSER_EVENT_HANDLER,
+ ¶ms) == CSS_OK);
- fp = fopen(argv[2], "rb");
- if (fp == NULL) {
- printf("Failed opening %s\n", argv[2]);
- return 1;
+ fp = fopen(argv[2], "rb");
+ if (fp == NULL) {
+ printf("Failed opening %s\n", argv[2]);
+ return 1;
+ }
+
+ fseek(fp, 0, SEEK_END);
+ origlen = len = ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+
+ while (len >= CHUNK_SIZE) {
+ fread(buf, 1, CHUNK_SIZE, fp);
+
+ error = css_parser_parse_chunk(parser, buf, CHUNK_SIZE);
+ assert(error == CSS_OK || error == CSS_NEEDDATA);
+
+ len -= CHUNK_SIZE;
+ }
+
+ if (len > 0) {
+ fread(buf, 1, len, fp);
+
+ error = css_parser_parse_chunk(parser, buf, len);
+ assert(error == CSS_OK || error == CSS_NEEDDATA);
+
+ len = 0;
+ }
+
+ fclose(fp);
+
+ assert(css_parser_completed(parser) == CSS_OK);
+
+ css_parser_destroy(parser);
+
+ parserutils_hash_destroy(dict);
}
-
- fseek(fp, 0, SEEK_END);
- origlen = len = ftell(fp);
- fseek(fp, 0, SEEK_SET);
-
- while (len >= CHUNK_SIZE) {
- fread(buf, 1, CHUNK_SIZE, fp);
-
- error = css_parser_parse_chunk(parser, buf, CHUNK_SIZE);
- assert(error == CSS_OK || error == CSS_NEEDDATA);
-
- len -= CHUNK_SIZE;
- }
-
- if (len > 0) {
- fread(buf, 1, len, fp);
-
- error = css_parser_parse_chunk(parser, buf, len);
- assert(error == CSS_OK || error == CSS_NEEDDATA);
-
- len = 0;
- }
-
- fclose(fp);
-
- assert(css_parser_completed(parser) == CSS_OK);
-
- css_parser_destroy(parser);
-
- parserutils_hash_destroy(dict);
assert(css_finalise(myrealloc, NULL) == CSS_OK);
14 years, 9 months
r5856 jmb - /trunk/libcss/test/css21.c
by netsurf@semichrome.net
Author: jmb
Date: Sun Nov 30 16:19:08 2008
New Revision: 5856
URL: http://source.netsurf-browser.org?rev=5856&view=rev
Log:
Modify to allow a configurable number of iterations for profiling.
Modified:
trunk/libcss/test/css21.c
Modified: trunk/libcss/test/css21.c
URL: http://source.netsurf-browser.org/trunk/libcss/test/css21.c?rev=5856&r1=5...
==============================================================================
--- trunk/libcss/test/css21.c (original)
+++ trunk/libcss/test/css21.c Sun Nov 30 16:19:08 2008
@@ -5,6 +5,12 @@
#include "stylesheet.h"
#include "testutils.h"
+
+#define ITERATIONS (1)
+#define DUMP_HASH (0)
+#define DUMP_CSS (1)
+
+extern void parserutils_hash_dump(parserutils_hash *hash);
static void *myrealloc(void *ptr, size_t len, void *pw)
{
@@ -30,45 +36,55 @@
/* Initialise library */
assert(css_initialise(argv[1], myrealloc, NULL) == CSS_OK);
- assert(css_stylesheet_create(CSS_LEVEL_21, "UTF-8", argv[2], NULL,
- CSS_ORIGIN_AUTHOR, CSS_MEDIA_ALL, NULL, NULL,
- myrealloc, NULL, &sheet) == CSS_OK);
+ for (int count = 0; count < ITERATIONS; count++) {
- fp = fopen(argv[2], "rb");
- if (fp == NULL) {
- printf("Failed opening %s\n", argv[2]);
- return 1;
+ assert(css_stylesheet_create(CSS_LEVEL_21, "UTF-8", argv[2],
+ NULL, CSS_ORIGIN_AUTHOR, CSS_MEDIA_ALL, NULL,
+ NULL, myrealloc, NULL, &sheet) == CSS_OK);
+
+ fp = fopen(argv[2], "rb");
+ if (fp == NULL) {
+ printf("Failed opening %s\n", argv[2]);
+ return 1;
+ }
+
+ fseek(fp, 0, SEEK_END);
+ origlen = len = ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+
+ while (len >= CHUNK_SIZE) {
+ fread(buf, 1, CHUNK_SIZE, fp);
+
+ error = css_stylesheet_append_data(sheet, buf,
+ CHUNK_SIZE);
+ assert(error == CSS_OK || error == CSS_NEEDDATA);
+
+ len -= CHUNK_SIZE;
+ }
+
+ if (len > 0) {
+ fread(buf, 1, len, fp);
+
+ error = css_stylesheet_append_data(sheet, buf, len);
+ assert(error == CSS_OK || error == CSS_NEEDDATA);
+
+ len = 0;
+ }
+
+ fclose(fp);
+
+ assert(css_stylesheet_data_done(sheet) == CSS_OK);
+
+#if DUMP_HASH
+ parserutils_hash_dump(sheet->dictionary);
+#endif
+
+#if DUMP_CSS
+ css_stylesheet_dump(sheet, stdout);
+#endif
+
+ css_stylesheet_destroy(sheet);
}
-
- fseek(fp, 0, SEEK_END);
- origlen = len = ftell(fp);
- fseek(fp, 0, SEEK_SET);
-
- while (len >= CHUNK_SIZE) {
- fread(buf, 1, CHUNK_SIZE, fp);
-
- error = css_stylesheet_append_data(sheet, buf, CHUNK_SIZE);
- assert(error == CSS_OK || error == CSS_NEEDDATA);
-
- len -= CHUNK_SIZE;
- }
-
- if (len > 0) {
- fread(buf, 1, len, fp);
-
- error = css_stylesheet_append_data(sheet, buf, len);
- assert(error == CSS_OK || error == CSS_NEEDDATA);
-
- len = 0;
- }
-
- fclose(fp);
-
- assert(css_stylesheet_data_done(sheet) == CSS_OK);
-
- css_stylesheet_dump(sheet, stdout);
-
- css_stylesheet_destroy(sheet);
assert(css_finalise(myrealloc, NULL) == CSS_OK);
14 years, 9 months
r5855 jmb - /trunk/libcss/src/parse/parse.c
by netsurf@semichrome.net
Author: jmb
Date: Sun Nov 30 16:16:10 2008
New Revision: 5855
URL: http://source.netsurf-browser.org?rev=5855&view=rev
Log:
Make getToken reduce consecutive whitespace tokens to a single whitespace token.
Update eatWS appropriately. This reduces the number of calls to getToken by a million or so.
Modified:
trunk/libcss/src/parse/parse.c
Modified: trunk/libcss/src/parse/parse.c
URL: http://source.netsurf-browser.org/trunk/libcss/src/parse/parse.c?rev=5855...
==============================================================================
--- trunk/libcss/src/parse/parse.c (original)
+++ trunk/libcss/src/parse/parse.c Sun Nov 30 16:16:10 2008
@@ -102,6 +102,8 @@
uint8_t match_char; /**< Close bracket type for parseAny */
+ bool last_was_ws; /**< Last token was whitespace */
+
css_parser_event_handler event; /**< Client's event handler */
void *event_pw; /**< Client data for event handler */
@@ -266,6 +268,7 @@
p->parseError = false;
p->match_char = 0;
p->event = NULL;
+ p->last_was_ws = false;
p->event_pw = NULL;
p->alloc = alloc;
p->pw = pw;
@@ -568,6 +571,14 @@
error = css_lexer_get_token(parser->lexer, &t);
if (error != CSS_OK)
return error;
+
+ /* If the last token read was whitespace, keep reading
+ * tokens until we encounter one that isn't whitespace */
+ while (parser->last_was_ws && t->type == CSS_TOKEN_S) {
+ error = css_lexer_get_token(parser->lexer, &t);
+ if (error != CSS_OK)
+ return error;
+ }
/** \todo We need only intern for the following token types:
*
@@ -662,6 +673,8 @@
if (perror != PARSERUTILS_OK)
return css_error_from_parserutils_error(perror);
+ parser->last_was_ws = ((*token)->type == CSS_TOKEN_S);
+
return CSS_OK;
}
@@ -699,17 +712,14 @@
const css_token *token;
css_error error;
- while (1) {
- error = getToken(parser, &token);
- if (error != CSS_OK)
- return error;
-
- if (token->type != CSS_TOKEN_S) {
- error = pushBack(parser, token);
- if (error != CSS_OK)
- return error;
- break;
- }
+ error = getToken(parser, &token);
+ if (error != CSS_OK)
+ return error;
+
+ if (token->type != CSS_TOKEN_S) {
+ error = pushBack(parser, token);
+ if (error != CSS_OK)
+ return error;
}
return CSS_OK;
14 years, 9 months
r5854 jmb - /trunk/libcss/src/parse/parse.c
by netsurf@semichrome.net
Author: jmb
Date: Sun Nov 30 13:24:34 2008
New Revision: 5854
URL: http://source.netsurf-browser.org?rev=5854&view=rev
Log:
Slightly clearer code. Marginally faster, too.
Modified:
trunk/libcss/src/parse/parse.c
Modified: trunk/libcss/src/parse/parse.c
URL: http://source.netsurf-browser.org/trunk/libcss/src/parse/parse.c?rev=5854...
==============================================================================
--- trunk/libcss/src/parse/parse.c (original)
+++ trunk/libcss/src/parse/parse.c Sun Nov 30 13:24:34 2008
@@ -573,7 +573,7 @@
*
* CSS_TOKEN_IDENT, CSS_TOKEN_ATKEYWORD, CSS_TOKEN_STRING,
* CSS_TOKEN_INVALID_STRING, CSS_TOKEN_HASH, CSS_TOKEN_URI,
- * CSS_TOKEN_UNICODE_RANGE?, CSS_TOKEN_FUNCTION
+ * CSS_TOKEN_UNICODE_RANGE?, CSS_TOKEN_FUNCTION, CSS_TOKEN_CHAR
*
* It would be better if we didn't intern the text for these
* token types:
@@ -581,64 +581,74 @@
* CSS_TOKEN_NUMBER, CSS_TOKEN_PERCENTAGE, CSS_TOKEN_DIMENSION
*/
- if (t->type != CSS_TOKEN_S &&
+ if ((t->type == CSS_TOKEN_IDENT ||
+ t->type == CSS_TOKEN_ATKEYWORD ||
+ t->type == CSS_TOKEN_STRING ||
+ t->type == CSS_TOKEN_INVALID_STRING ||
+ t->type == CSS_TOKEN_HASH ||
+ t->type == CSS_TOKEN_URI ||
+ t->type == CSS_TOKEN_UNICODE_RANGE ||
+ t->type == CSS_TOKEN_FUNCTION ||
+ t->type == CSS_TOKEN_CHAR ||
+ t->type == CSS_TOKEN_NUMBER ||
+ t->type == CSS_TOKEN_PERCENTAGE ||
+ t->type == CSS_TOKEN_DIMENSION) &&
t->data.data != NULL && t->data.len > 0) {
+ const parserutils_hash_entry *interned;
+
+ /* Invalidate lowercase data */
+ t->lower.data = NULL;
+
+ if (t->type == CSS_TOKEN_IDENT ||
+ t->type == CSS_TOKEN_ATKEYWORD ||
+ t->type == CSS_TOKEN_HASH ||
+ t->type == CSS_TOKEN_FUNCTION) {
+ uint8_t temp[t->data.len];
+ bool lower = false;
+
+ for (size_t i = 0; i < t->data.len; i++) {
+ uint8_t c = t->data.data[i];
+
+ if ('A' <= c && c <= 'Z') {
+ lower = true;
+ c |= 0x20;
+ }
+
+ temp[i] = c;
+ }
+
+ if (lower == true) {
+ /* Insert lowercase version */
+ perror = parserutils_hash_insert(
+ parser->dictionary,
+ temp, t->data.len,
+ &interned);
+ if (perror != PARSERUTILS_OK) {
+ return css_error_from_parserutils_error(
+ perror);
+ }
+
+ t->lower.data = interned->data;
+ t->lower.len = interned->len;
+ }
+ }
+
/* Insert token text into the dictionary */
- const parserutils_hash_entry *interned;
- uint8_t temp[t->data.len];
- bool lower = false;
-
- switch (t->type) {
- case CSS_TOKEN_IDENT:
- case CSS_TOKEN_ATKEYWORD:
- case CSS_TOKEN_HASH:
- case CSS_TOKEN_FUNCTION:
- for (size_t i = 0; i < t->data.len; i++) {
- temp[i] = tolower(t->data.data[i]);
- if (temp[i] != t->data.data[i])
- lower = true;
- }
- break;
- default:
- break;
- }
-
- if (lower == true) {
- /* We get to insert it twice - once for the raw
- * data, and once for a lowercased version that
- * we need internally. */
- perror = parserutils_hash_insert(
- parser->dictionary,
- temp, t->data.len,
- &interned);
- if (perror != PARSERUTILS_OK) {
- return css_error_from_parserutils_error(
- perror);
- }
-
- t->lower.data = interned->data;
- t->lower.len = interned->len;
-
- perror = parserutils_hash_insert(
- parser->dictionary,
- t->data.data, t->data.len,
- &interned);
- } else {
- /* Otherwise, we're not interested in case */
- perror = parserutils_hash_insert(
- parser->dictionary,
- t->data.data, t->data.len,
- &interned);
-
+ perror = parserutils_hash_insert(parser->dictionary,
+ t->data.data, t->data.len,
+ &interned);
+
+ if (t->lower.data == NULL) {
t->lower.data = interned->data;
t->lower.len = interned->len;
}
+
if (perror != PARSERUTILS_OK)
return css_error_from_parserutils_error(perror);
t->data.data = interned->data;
t->data.len = interned->len;
- } else if (t->type == CSS_TOKEN_S) {
+ } else {
t->data.data = t->lower.data = NULL;
t->data.len = t->lower.len = 0;
}
14 years, 9 months
r5853 jmb - /trunk/libparserutils/src/utils/hash.c
by netsurf@semichrome.net
Author: jmb
Date: Sun Nov 30 11:34:04 2008
New Revision: 5853
URL: http://source.netsurf-browser.org?rev=5853&view=rev
Log:
Make things clearer through use of temporary variables
Modified:
trunk/libparserutils/src/utils/hash.c
Modified: trunk/libparserutils/src/utils/hash.c
URL: http://source.netsurf-browser.org/trunk/libparserutils/src/utils/hash.c?r...
==============================================================================
--- trunk/libparserutils/src/utils/hash.c (original)
+++ trunk/libparserutils/src/utils/hash.c Sun Nov 30 11:34:04 2008
@@ -128,30 +128,33 @@
const uint8_t *data, size_t len,
const parserutils_hash_entry **inserted)
{
- uint32_t index;
+ uint32_t index, mask;
slot_table *slots;
+ const parserutils_hash_entry **entries;
+ const parserutils_hash_entry *entry;
const void *idata, *ientry;
- parserutils_hash_entry entry;
+ parserutils_hash_entry new_entry;
parserutils_error error;
if (hash == NULL || data == NULL || inserted == NULL)
return PARSERUTILS_BADPARM;
slots = hash->slots;
+ entries = slots->slots;
/* Find index */
- index = _hash(data, len) % slots->n_slots;
+ mask = slots->n_slots - 1;
+ index = _hash(data, len) & mask;
/* Use linear probing to resolve collisions */
- while (slots->slots[index] != NULL) {
+ while ((entry = entries[index]) != NULL) {
/* If this data is already in the hash, return it */
- if (_cmp(data, len, slots->slots[index]->data,
- slots->slots[index]->len) == 0) {
- (*inserted) = slots->slots[index];
+ if (_cmp(data, len, entry->data, entry->len) == 0) {
+ (*inserted) = entry;
return PARSERUTILS_OK;
}
- index = (index + 1) % slots->n_slots;
+ index = (index + 1) & mask;
}
/* The data isn't in the hash. Index identifies the slot to use */
@@ -159,18 +162,18 @@
if (error != PARSERUTILS_OK)
return error;
- entry.len = len;
- entry.data = idata;
+ new_entry.len = len;
+ new_entry.data = idata;
error = parserutils_chunkarray_insert(hash->entries,
- &entry, sizeof(parserutils_hash_entry), &ientry);
+ &new_entry, sizeof(parserutils_hash_entry), &ientry);
if (error != PARSERUTILS_OK) {
/* We effectively leak the inserted data, as chunkarray
* doesn't support deletion. */
return error;
}
- (*inserted) = slots->slots[index] = ientry;
+ (*inserted) = entries[index] = ientry;
/* If the table is 75% full, then increase its size */
if (++slots->n_used >= (slots->n_slots >> 1) + (slots->n_slots >> 2))
@@ -197,7 +200,7 @@
#define read16(d) ((((uint32_t)((d)[1])) << 8) | ((uint32_t)((d)[0])))
- for (len = len / 4; len > 0; len--) {
+ for (len = len >> 2; len > 0; len--) {
hash += read16(data);
tmp = (read16(data + 2) << 11) ^ hash;
hash = (hash << 16) ^ tmp;
@@ -293,11 +296,12 @@
continue;
/* Find new index */
- uint32_t index = _hash(e->data, e->len) % s->n_slots;
+ uint32_t mask = s->n_slots - 1;
+ uint32_t index = _hash(e->data, e->len) & mask;
/* Use linear probing to resolve collisions */
while (s->slots[index] != NULL)
- index = (index + 1) % s->n_slots;
+ index = (index + 1) & mask;
/* Insert into new slot table */
s->slots[index] = e;
14 years, 9 months
r5852 jmb - in /trunk/libcss: include/libcss/types.h src/parse/language.c src/parse/parse.c src/parse/parse.h src/stylesheet.c src/stylesheet.h test/parse.c
by netsurf@semichrome.net
Author: jmb
Date: Sun Nov 30 10:43:37 2008
New Revision: 5852
URL: http://source.netsurf-browser.org?rev=5852&view=rev
Log:
Use parserutils_hash instead of parserutils_dict.
This approximately halves the size of the interned string table.
We now have the following for allzengarden.css:
5507 slots used (of 8192 => 67.224121%)
Data:
3 full blocks: 12288 bytes
10 partial blocks: 38946 bytes (of 40960 => 95.083008%)
Total: 53488 (4112) (32)
Entries:
21 full blocks: 86016 bytes
1 partial blocks: 2096 bytes (of 4096 => 51.171875%)
Total: 90496 (4112) (32)
Hash structures: 65592
Which gives a total dictionary size of 209,576 bytes.
Note that 43% of this is parserutils_hash_entry structures (length-pointer pairs). It would be good, therefore, to be able to purge these.
Modified:
trunk/libcss/include/libcss/types.h
trunk/libcss/src/parse/language.c
trunk/libcss/src/parse/parse.c
trunk/libcss/src/parse/parse.h
trunk/libcss/src/stylesheet.c
trunk/libcss/src/stylesheet.h
trunk/libcss/test/parse.c
Modified: trunk/libcss/include/libcss/types.h
URL: http://source.netsurf-browser.org/trunk/libcss/include/libcss/types.h?rev...
==============================================================================
--- trunk/libcss/include/libcss/types.h (original)
+++ trunk/libcss/include/libcss/types.h Sun Nov 30 10:43:37 2008
@@ -12,7 +12,7 @@
#include <stdint.h>
#include <stdlib.h>
-#include <parserutils/utils/dict.h>
+#include <parserutils/utils/hash.h>
/** Source of charset information, in order of importance
* A client-dictated charset will override all others.
@@ -57,9 +57,9 @@
* String type
*
* \todo It might be better to define parserutils_string, and use that.
- * (where parserutils_string is identical to parserutils_dict_entry)
+ * (where parserutils_string is identical to parserutils_hash_entry)
*/
-typedef parserutils_dict_entry css_string;
+typedef parserutils_hash_entry css_string;
typedef struct css_stylesheet css_stylesheet;
Modified: trunk/libcss/src/parse/language.c
URL: http://source.netsurf-browser.org/trunk/libcss/src/parse/language.c?rev=5...
==============================================================================
--- trunk/libcss/src/parse/language.c (original)
+++ trunk/libcss/src/parse/language.c Sun Nov 30 10:43:37 2008
@@ -409,7 +409,7 @@
return CSS_INVALID;
}
- entry.data = atkeyword->lower.data;
+ entry.data = (void *) atkeyword->lower.data;
perror = parserutils_stack_push(c->context, (void *) &entry);
if (perror != PARSERUTILS_OK) {
Modified: trunk/libcss/src/parse/parse.c
URL: http://source.netsurf-browser.org/trunk/libcss/src/parse/parse.c?rev=5852...
==============================================================================
--- trunk/libcss/src/parse/parse.c (original)
+++ trunk/libcss/src/parse/parse.c Sun Nov 30 10:43:37 2008
@@ -10,7 +10,7 @@
#include <stdbool.h>
#include <parserutils/input/inputstream.h>
-#include <parserutils/utils/dict.h>
+#include <parserutils/utils/hash.h>
#include <parserutils/utils/stack.h>
#include <parserutils/utils/vector.h>
@@ -91,7 +91,7 @@
#define STACK_CHUNK 32
parserutils_stack *states; /**< Stack of states */
- parserutils_dict *dictionary; /**< Dictionary for interned strings */
+ parserutils_hash *dictionary; /**< Dictionary for interned strings */
parserutils_vector *tokens; /**< Vector of pending tokens */
@@ -185,7 +185,7 @@
* CSS_NOMEM on memory exhaustion
*/
css_error css_parser_create(const char *charset, css_charset_source cs_source,
- parserutils_dict *dictionary, css_alloc alloc, void *pw,
+ parserutils_hash *dictionary, css_alloc alloc, void *pw,
css_parser **parser)
{
css_parser *p;
@@ -420,13 +420,13 @@
const uint8_t *css_parser_dict_add(css_parser *parser, const uint8_t *data,
size_t len)
{
- const parserutils_dict_entry *interned;
+ const parserutils_hash_entry *interned;
parserutils_error perror;
if (parser == NULL || data == NULL || len == 0)
return NULL;
- perror = parserutils_dict_insert(parser->dictionary, data, len,
+ perror = parserutils_hash_insert(parser->dictionary, data, len,
&interned);
if (perror != PARSERUTILS_OK)
return NULL;
@@ -584,7 +584,7 @@
if (t->type != CSS_TOKEN_S &&
t->data.data != NULL && t->data.len > 0) {
/* Insert token text into the dictionary */
- const parserutils_dict_entry *interned;
+ const parserutils_hash_entry *interned;
uint8_t temp[t->data.len];
bool lower = false;
@@ -607,7 +607,7 @@
/* We get to insert it twice - once for the raw
* data, and once for a lowercased version that
* we need internally. */
- perror = parserutils_dict_insert(
+ perror = parserutils_hash_insert(
parser->dictionary,
temp, t->data.len,
&interned);
@@ -619,13 +619,13 @@
t->lower.data = interned->data;
t->lower.len = interned->len;
- perror = parserutils_dict_insert(
+ perror = parserutils_hash_insert(
parser->dictionary,
t->data.data, t->data.len,
&interned);
} else {
/* Otherwise, we're not interested in case */
- perror = parserutils_dict_insert(
+ perror = parserutils_hash_insert(
parser->dictionary,
t->data.data, t->data.len,
&interned);
Modified: trunk/libcss/src/parse/parse.h
URL: http://source.netsurf-browser.org/trunk/libcss/src/parse/parse.h?rev=5852...
==============================================================================
--- trunk/libcss/src/parse/parse.h (original)
+++ trunk/libcss/src/parse/parse.h Sun Nov 30 10:43:37 2008
@@ -8,7 +8,7 @@
#ifndef css_parse_parse_h_
#define css_parse_parse_h_
-#include <parserutils/utils/dict.h>
+#include <parserutils/utils/hash.h>
#include <parserutils/utils/vector.h>
#include <libcss/errors.h>
@@ -57,7 +57,7 @@
} css_parser_optparams;
css_error css_parser_create(const char *charset, css_charset_source cs_source,
- parserutils_dict *dict, css_alloc alloc, void *pw,
+ parserutils_hash *dict, css_alloc alloc, void *pw,
css_parser **parser);
css_error css_parser_destroy(css_parser *parser);
Modified: trunk/libcss/src/stylesheet.c
URL: http://source.netsurf-browser.org/trunk/libcss/src/stylesheet.c?rev=5852&...
==============================================================================
--- trunk/libcss/src/stylesheet.c (original)
+++ trunk/libcss/src/stylesheet.c Sun Nov 30 10:43:37 2008
@@ -51,7 +51,7 @@
memset(sheet, 0, sizeof(css_stylesheet));
- perror = parserutils_dict_create((parserutils_alloc) alloc, alloc_pw,
+ perror = parserutils_hash_create((parserutils_alloc) alloc, alloc_pw,
&sheet->dictionary);
if (perror != PARSERUTILS_OK) {
alloc(sheet, 0, alloc_pw);
@@ -62,7 +62,7 @@
charset ? CSS_CHARSET_DICTATED : CSS_CHARSET_DEFAULT,
sheet->dictionary, alloc, alloc_pw, &sheet->parser);
if (error != CSS_OK) {
- parserutils_dict_destroy(sheet->dictionary);
+ parserutils_hash_destroy(sheet->dictionary);
alloc(sheet, 0, alloc_pw);
return error;
}
@@ -72,7 +72,7 @@
&sheet->parser_frontend);
if (error != CSS_OK) {
css_parser_destroy(sheet->parser);
- parserutils_dict_destroy(sheet->dictionary);
+ parserutils_hash_destroy(sheet->dictionary);
alloc(sheet, 0, alloc_pw);
return error;
}
@@ -84,7 +84,7 @@
if (sheet->url == NULL) {
css_language_destroy(sheet->parser_frontend);
css_parser_destroy(sheet->parser);
- parserutils_dict_destroy(sheet->dictionary);
+ parserutils_hash_destroy(sheet->dictionary);
alloc(sheet, 0, alloc_pw);
return CSS_NOMEM;
}
@@ -97,7 +97,7 @@
alloc(sheet->url, 0, alloc_pw);
css_language_destroy(sheet->parser_frontend);
css_parser_destroy(sheet->parser);
- parserutils_dict_destroy(sheet->dictionary);
+ parserutils_hash_destroy(sheet->dictionary);
alloc(sheet, 0, alloc_pw);
return CSS_NOMEM;
}
@@ -129,7 +129,7 @@
if (sheet == NULL)
return CSS_BADPARM;
- parserutils_dict_destroy(sheet->dictionary);
+ parserutils_hash_destroy(sheet->dictionary);
if (sheet->title != NULL)
sheet->alloc(sheet->title, 0, sheet->pw);
@@ -376,7 +376,7 @@
* the dictionary, it would be more efficient to pass a pointer to the
* dictionary entry around rather than re-discovering it here. The same
* applies for value, and in css_stylesheet_selector_detail_create() */
- perror = parserutils_dict_insert(sheet->dictionary, name->data,
+ perror = parserutils_hash_insert(sheet->dictionary, name->data,
name->len, &iname);
if (perror != PARSERUTILS_OK) {
sheet->alloc(sel, 0, sheet->pw);
@@ -385,7 +385,7 @@
sel->data.name = iname;
if (value != NULL) {
- perror = parserutils_dict_insert(sheet->dictionary,
+ perror = parserutils_hash_insert(sheet->dictionary,
value->data, value->len, &ivalue);
if (perror != PARSERUTILS_OK) {
sheet->alloc(sel, 0, sheet->pw);
@@ -454,7 +454,7 @@
det->type = type;
- perror = parserutils_dict_insert(sheet->dictionary, name->data,
+ perror = parserutils_hash_insert(sheet->dictionary, name->data,
name->len, &iname);
if (perror != PARSERUTILS_OK) {
sheet->alloc(det, 0, sheet->pw);
@@ -463,7 +463,7 @@
det->name = iname;
if (value != NULL) {
- perror = parserutils_dict_insert(sheet->dictionary,
+ perror = parserutils_hash_insert(sheet->dictionary,
value->data, value->len, &ivalue);
if (perror != PARSERUTILS_OK) {
sheet->alloc(det, 0, sheet->pw);
Modified: trunk/libcss/src/stylesheet.h
URL: http://source.netsurf-browser.org/trunk/libcss/src/stylesheet.h?rev=5852&...
==============================================================================
--- trunk/libcss/src/stylesheet.h (original)
+++ trunk/libcss/src/stylesheet.h Sun Nov 30 10:43:37 2008
@@ -11,7 +11,7 @@
#include <inttypes.h>
#include <stdio.h>
-#include <parserutils/utils/dict.h>
+#include <parserutils/utils/hash.h>
#include <libcss/errors.h>
#include <libcss/functypes.h>
@@ -169,7 +169,7 @@
css_parser *parser; /**< Core parser for sheet */
void *parser_frontend; /**< Frontend parser */
- parserutils_dict *dictionary; /**< String dictionary */
+ parserutils_hash *dictionary; /**< String dictionary */
css_alloc alloc; /**< Allocation function */
void *pw; /**< Private word */
Modified: trunk/libcss/test/parse.c
URL: http://source.netsurf-browser.org/trunk/libcss/test/parse.c?rev=5852&r1=5...
==============================================================================
--- trunk/libcss/test/parse.c (original)
+++ trunk/libcss/test/parse.c Sun Nov 30 10:43:37 2008
@@ -71,7 +71,7 @@
int main(int argc, char **argv)
{
css_parser_optparams params;
- parserutils_dict *dict;
+ parserutils_hash *dict;
css_parser *parser;
FILE *fp;
size_t len, origlen;
@@ -87,7 +87,7 @@
/* Initialise library */
assert(css_initialise(argv[1], myrealloc, NULL) == CSS_OK);
- assert(parserutils_dict_create(myrealloc, NULL, &dict) ==
+ assert(parserutils_hash_create(myrealloc, NULL, &dict) ==
PARSERUTILS_OK);
assert(css_parser_create("UTF-8", CSS_CHARSET_DICTATED, dict,
@@ -132,7 +132,7 @@
css_parser_destroy(parser);
- parserutils_dict_destroy(dict);
+ parserutils_hash_destroy(dict);
assert(css_finalise(myrealloc, NULL) == CSS_OK);
14 years, 9 months
r5851 jmb - /trunk/hubbub/src/tokeniser/tokeniser.c
by netsurf@semichrome.net
Author: jmb
Date: Sun Nov 30 10:37:27 2008
New Revision: 5851
URL: http://source.netsurf-browser.org?rev=5851&view=rev
Log:
Fix build breakage
Modified:
trunk/hubbub/src/tokeniser/tokeniser.c
Modified: trunk/hubbub/src/tokeniser/tokeniser.c
URL: http://source.netsurf-browser.org/trunk/hubbub/src/tokeniser/tokeniser.c?...
==============================================================================
--- trunk/hubbub/src/tokeniser/tokeniser.c (original)
+++ trunk/hubbub/src/tokeniser/tokeniser.c Sun Nov 30 10:37:27 2008
@@ -3037,7 +3037,6 @@
hubbub_token *token)
{
hubbub_error err = HUBBUB_OK;
- uint32_t i;
assert(tokeniser != NULL);
assert(token != NULL);
@@ -3057,6 +3056,8 @@
break;
case HUBBUB_TOKEN_START_TAG:
case HUBBUB_TOKEN_END_TAG:
+ {
+ uint32_t i;
assert(memchr(token->data.tag.name.ptr, 0xff,
token->data.tag.name.len) == NULL);
for (i = 0; i < token->data.tag.n_attributes; i++) {
@@ -3067,6 +3068,7 @@
assert(memchr(attr->value.ptr, 0xff, attr->value.len) ==
NULL);
}
+ }
break;
case HUBBUB_TOKEN_COMMENT:
assert(memchr(token->data.comment.ptr, 0xff,
14 years, 9 months
r5850 jmb - in /trunk/libparserutils: include/parserutils/utils/hash.h include/parserutils/utils/stack.h src/utils/Makefile src/utils/chunkarray.c src/utils/chunkarray.h src/utils/hash.c src/utils/stack.c test/INDEX test/Makefile test/hash.c
by netsurf@semichrome.net
Author: jmb
Date: Sun Nov 30 10:35:19 2008
New Revision: 5850
URL: http://source.netsurf-browser.org?rev=5850&view=rev
Log:
New datastructures:
+ Chunked array
+ Hash table (open addressing)
Constify parameter to parserutils_stack_push
Added:
trunk/libparserutils/include/parserutils/utils/hash.h
trunk/libparserutils/src/utils/chunkarray.c
trunk/libparserutils/src/utils/chunkarray.h
trunk/libparserutils/src/utils/hash.c
trunk/libparserutils/test/hash.c
Modified:
trunk/libparserutils/include/parserutils/utils/stack.h
trunk/libparserutils/src/utils/Makefile
trunk/libparserutils/src/utils/stack.c
trunk/libparserutils/test/INDEX
trunk/libparserutils/test/Makefile
Added: trunk/libparserutils/include/parserutils/utils/hash.h
URL: http://source.netsurf-browser.org/trunk/libparserutils/include/parserutil...
==============================================================================
--- trunk/libparserutils/include/parserutils/utils/hash.h (added)
+++ trunk/libparserutils/include/parserutils/utils/hash.h Sun Nov 30 10:35:19 2008
@@ -1,0 +1,31 @@
+/*
+ * This file is part of LibParserUtils.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb(a)netsurf-browser.org>
+ */
+
+#ifndef parserutils_utils_hash_h_
+#define parserutils_utils_hash_h_
+
+#include <parserutils/errors.h>
+#include <parserutils/functypes.h>
+
+typedef struct parserutils_hash_entry {
+ size_t len;
+ const uint8_t *data;
+} parserutils_hash_entry;
+
+struct parserutils_hash;
+typedef struct parserutils_hash parserutils_hash;
+
+parserutils_error parserutils_hash_create(parserutils_alloc alloc, void *pw,
+ parserutils_hash **hash);
+parserutils_error parserutils_hash_destroy(parserutils_hash *hash);
+
+parserutils_error parserutils_hash_insert(parserutils_hash *hash,
+ const uint8_t *data, size_t len,
+ const parserutils_hash_entry **inserted);
+
+#endif
+
Modified: trunk/libparserutils/include/parserutils/utils/stack.h
URL: http://source.netsurf-browser.org/trunk/libparserutils/include/parserutil...
==============================================================================
--- trunk/libparserutils/include/parserutils/utils/stack.h (original)
+++ trunk/libparserutils/include/parserutils/utils/stack.h Sun Nov 30 10:35:19 2008
@@ -20,7 +20,8 @@
parserutils_alloc alloc, void *pw, parserutils_stack **stack);
parserutils_error parserutils_stack_destroy(parserutils_stack *stack);
-parserutils_error parserutils_stack_push(parserutils_stack *stack, void *item);
+parserutils_error parserutils_stack_push(parserutils_stack *stack,
+ const void *item);
parserutils_error parserutils_stack_pop(parserutils_stack *stack, void *item);
void *parserutils_stack_get_current(parserutils_stack *stack);
Modified: trunk/libparserutils/src/utils/Makefile
URL: http://source.netsurf-browser.org/trunk/libparserutils/src/utils/Makefile...
==============================================================================
--- trunk/libparserutils/src/utils/Makefile (original)
+++ trunk/libparserutils/src/utils/Makefile Sun Nov 30 10:35:19 2008
@@ -35,7 +35,8 @@
CFLAGS := $(CFLAGS) -I$(d)
# Sources
-SRCS_$(d) := buffer.c dict.c errors.c rbtree.c stack.c vector.c
+SRCS_$(d) := chunkarray.c buffer.c dict.c errors.c hash.c rbtree.c stack.c \
+ vector.c
# Append to sources for component
SOURCES += $(addprefix $(d), $(SRCS_$(d)))
Added: trunk/libparserutils/src/utils/chunkarray.c
URL: http://source.netsurf-browser.org/trunk/libparserutils/src/utils/chunkarr...
==============================================================================
--- trunk/libparserutils/src/utils/chunkarray.c (added)
+++ trunk/libparserutils/src/utils/chunkarray.c Sun Nov 30 10:35:19 2008
@@ -1,0 +1,213 @@
+/*
+ * This file is part of LibParserUtils.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb(a)netsurf-browser.org>
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "utils/chunkarray.h"
+
+typedef struct chunk chunk;
+
+struct chunk {
+#define CHUNK_SIZE (4096)
+ chunk *next;
+
+ uint32_t used;
+
+ uint8_t data[CHUNK_SIZE];
+};
+
+struct parserutils_chunkarray {
+ chunk *used_chunks;
+
+ chunk *free_chunks;
+
+ parserutils_alloc alloc;
+ void *pw;
+};
+
+/**
+ * Create a chunked array
+ *
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data
+ * \param array Pointer to location to receive array instance
+ * \return PARSERUTILS_OK on success, appropriate error otherwise
+ */
+parserutils_error parserutils_chunkarray_create(parserutils_alloc alloc,
+ void *pw, parserutils_chunkarray **array)
+{
+ parserutils_chunkarray *c;
+
+ if (alloc == NULL || array == NULL)
+ return PARSERUTILS_BADPARM;
+
+ c = alloc(0, sizeof(parserutils_chunkarray), pw);
+ if (c == NULL)
+ return PARSERUTILS_NOMEM;
+
+ c->used_chunks = NULL;
+ c->free_chunks = NULL;
+
+ c->alloc = alloc;
+ c->pw = pw;
+
+ *array = c;
+
+ return PARSERUTILS_OK;
+}
+
+/**
+ * Destroy a chunked array
+ *
+ * \param array The array to destroy
+ * \return PARSERUTILS_OK on success, appropriate error otherwise
+ */
+parserutils_error parserutils_chunkarray_destroy(parserutils_chunkarray *array)
+{
+ chunk *c, *d;
+
+ if (array == NULL)
+ return PARSERUTILS_BADPARM;
+
+ for (c = array->used_chunks; c != NULL; c = d) {
+ d = c->next;
+ array->alloc(c, 0, array->pw);
+ }
+
+ for (c = array->free_chunks; c != NULL; c = d) {
+ d = c->next;
+ array->alloc(c, 0, array->pw);
+ }
+
+ array->alloc(array, 0, array->pw);
+
+ return PARSERUTILS_OK;
+}
+
+/**
+ * Insert an item into an array
+ *
+ * \param array The array to insert into
+ * \param data Pointer to data to insert
+ * \param len Length, in bytes, of data
+ * \param inserted Pointer to location to receive pointer to inserted data
+ * \return PARSERUTILS_OK on success, appropriate error otherwise
+ */
+parserutils_error parserutils_chunkarray_insert(parserutils_chunkarray *array,
+ const void *data, size_t len, const void **inserted)
+{
+ if (array == NULL || data == NULL || inserted == NULL)
+ return PARSERUTILS_BADPARM;
+
+ /* If we're trying to insert data larger than CHUNK_SIZE, then it
+ * gets its own chunk. */
+ if (len > CHUNK_SIZE) {
+ chunk *c = array->alloc(0, sizeof(chunk) + len - CHUNK_SIZE,
+ array->pw);
+ if (c == NULL)
+ return PARSERUTILS_NOMEM;
+
+ /* Populate chunk */
+ (*inserted) = c->data;
+ memcpy(c->data, data, len);
+ c->used = len;
+
+ /* And put it in the used list */
+ c->next = array->used_chunks;
+ array->used_chunks = c;
+ } else {
+ /* Scan the free list until we find a chunk with enough space */
+ chunk *c, *prev;
+
+ for (prev = NULL, c = array->free_chunks; c != NULL;
+ prev = c, c = c->next) {
+ if (CHUNK_SIZE - c->used >= len)
+ break;
+ }
+
+ if (c == NULL) {
+ /* None big enough, create a new one */
+ c = array->alloc(0, sizeof(chunk), array->pw);
+ if (c == NULL)
+ return PARSERUTILS_NOMEM;
+
+ c->used = 0;
+
+ /* Insert it at the head of the free list */
+ c->next = array->free_chunks;
+ array->free_chunks = c;
+
+ /* And ensure that prev is kept in sync */
+ prev = NULL;
+ }
+
+ /* Populate chunk */
+ (*inserted) = c->data + c->used;
+ memcpy(c->data + c->used, data, len);
+ c->used += len;
+
+ /* If we've now filled the chunk, move it to the used list */
+ if (c->used == CHUNK_SIZE) {
+ if (prev != NULL)
+ prev->next = c->next;
+ else
+ array->free_chunks = c->next;
+
+ c->next = array->used_chunks;
+ array->used_chunks = c;
+ }
+ }
+
+ return PARSERUTILS_OK;
+}
+
+/******************************************************************************
+ * Private functions *
+ ******************************************************************************/
+
+extern void parserutils_chunkarray_dump(parserutils_chunkarray *array);
+
+/**
+ * Dump details of a chunked array to stdout
+ *
+ * \param array The array to dump
+ */
+void parserutils_chunkarray_dump(parserutils_chunkarray *array)
+{
+ uint32_t n = 0;
+ size_t count = 0;
+ size_t total = sizeof(parserutils_chunkarray);
+
+ for (chunk *c = array->used_chunks; c != NULL; c = c->next) {
+ n++;
+ count += c->used;
+ if (c->used == CHUNK_SIZE)
+ total += sizeof(chunk);
+ else
+ total += sizeof(chunk) + c->used - CHUNK_SIZE;
+ }
+
+ printf("%u full blocks: %zu bytes\n", n, count);
+
+ n = 0;
+ count = 0;
+
+ for (chunk *c = array->free_chunks; c != NULL; c = c->next) {
+ n++;
+ count += c->used;
+ total += sizeof(chunk);
+ }
+
+ printf("%u partial blocks: %zu bytes (of %u => %f%%)\n", n, count,
+ n * CHUNK_SIZE,
+ (count * 100) / ((float) n * CHUNK_SIZE));
+
+ printf("Total: %zu (%lu) (%lu)\n", total, sizeof(chunk),
+ sizeof(parserutils_chunkarray));
+}
+
Added: trunk/libparserutils/src/utils/chunkarray.h
URL: http://source.netsurf-browser.org/trunk/libparserutils/src/utils/chunkarr...
==============================================================================
--- trunk/libparserutils/src/utils/chunkarray.h (added)
+++ trunk/libparserutils/src/utils/chunkarray.h Sun Nov 30 10:35:19 2008
@@ -1,0 +1,26 @@
+/*
+ * This file is part of LibParserUtils.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb(a)netsurf-browser.org>
+ */
+
+#ifndef parserutils_utils_chunkarray_h_
+#define parserutils_utils_chunkarray_h_
+
+#include <parserutils/errors.h>
+#include <parserutils/functypes.h>
+
+struct parserutils_chunkarray;
+typedef struct parserutils_chunkarray parserutils_chunkarray;
+
+parserutils_error parserutils_chunkarray_create(parserutils_alloc alloc,
+ void *pw, parserutils_chunkarray **array);
+parserutils_error parserutils_chunkarray_destroy(parserutils_chunkarray *array);
+
+parserutils_error parserutils_chunkarray_insert(parserutils_chunkarray *array,
+ const void *data, size_t len,
+ const void **inserted);
+
+#endif
+
Added: trunk/libparserutils/src/utils/hash.c
URL: http://source.netsurf-browser.org/trunk/libparserutils/src/utils/hash.c?r...
==============================================================================
--- trunk/libparserutils/src/utils/hash.c (added)
+++ trunk/libparserutils/src/utils/hash.c Sun Nov 30 10:35:19 2008
@@ -1,0 +1,338 @@
+/*
+ * This file is part of LibParserUtils.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb(a)netsurf-browser.org>
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <parserutils/utils/hash.h>
+
+#include "utils/chunkarray.h"
+
+typedef struct slot_table slot_table;
+
+struct parserutils_hash {
+ slot_table *slots;
+
+ parserutils_chunkarray *data;
+ parserutils_chunkarray *entries;
+
+ parserutils_alloc alloc;
+ void *pw;
+};
+
+struct slot_table {
+#define DEFAULT_SLOTS (1<<6)
+ size_t n_slots;
+ size_t n_used;
+
+ const parserutils_hash_entry *slots[];
+};
+
+static inline uint32_t _hash(const uint8_t *data, size_t len);
+static inline int _cmp(const uint8_t *a, size_t alen,
+ const uint8_t *b, size_t blen);
+static inline void grow_slots(parserutils_hash *hash);
+
+/**
+ * Create a hash
+ *
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data
+ * \param hash Pointer to location to receive result
+ * \return PARSERUTILS_OK on success, appropriate error otherwise
+ */
+parserutils_error parserutils_hash_create(parserutils_alloc alloc, void *pw,
+ parserutils_hash **hash)
+{
+ parserutils_hash *h;
+ parserutils_error error;
+
+ if (alloc == NULL || hash == NULL)
+ return PARSERUTILS_BADPARM;
+
+ h = alloc(0, sizeof(parserutils_hash), pw);
+ if (h == NULL)
+ return PARSERUTILS_NOMEM;
+
+ h->slots = alloc(0, sizeof(slot_table) +
+ DEFAULT_SLOTS * sizeof(parserutils_hash_entry *),
+ pw);
+ if (h->slots == NULL) {
+ alloc(h, 0, pw);
+ return PARSERUTILS_NOMEM;
+ }
+
+ memset(h->slots, 0, sizeof(slot_table) +
+ DEFAULT_SLOTS * sizeof(parserutils_hash_entry *));
+ h->slots->n_slots = DEFAULT_SLOTS;
+
+ error = parserutils_chunkarray_create(alloc, pw, &h->data);
+ if (error != PARSERUTILS_OK) {
+ alloc(h->slots, 0, pw);
+ alloc(h, 0, pw);
+ return error;
+ }
+
+ error = parserutils_chunkarray_create(alloc, pw, &h->entries);
+ if (error != PARSERUTILS_OK) {
+ alloc(h->data, 0, pw);
+ alloc(h->slots, 0, pw);
+ alloc(h, 0, pw);
+ return error;
+ }
+
+ h->alloc = alloc;
+ h->pw = pw;
+
+ *hash = h;
+
+ return PARSERUTILS_OK;
+}
+
+/**
+ * Destroy a hash
+ *
+ * \param hash The hash to destroy
+ * \return PARSERUTILS_OK on success, appropriate error otherwise
+ */
+parserutils_error parserutils_hash_destroy(parserutils_hash *hash)
+{
+ if (hash == NULL)
+ return PARSERUTILS_BADPARM;
+
+ parserutils_chunkarray_destroy(hash->data);
+
+ parserutils_chunkarray_destroy(hash->entries);
+
+ hash->alloc(hash->slots, 0, hash->pw);
+
+ hash->alloc(hash, 0, hash->pw);
+
+ return PARSERUTILS_OK;
+}
+
+/**
+ * Insert an item into a hash
+ *
+ * \param hash The hash to insert into
+ * \param data Pointer to data
+ * \param len Length, in bytes, of data
+ * \param inserted Pointer to location to receive pointer to data in hash
+ * \return PARSERUTILS_OK on success, appropriate error otherwise
+ */
+parserutils_error parserutils_hash_insert(parserutils_hash *hash,
+ const uint8_t *data, size_t len,
+ const parserutils_hash_entry **inserted)
+{
+ uint32_t index;
+ slot_table *slots;
+ const void *idata, *ientry;
+ parserutils_hash_entry entry;
+ parserutils_error error;
+
+ if (hash == NULL || data == NULL || inserted == NULL)
+ return PARSERUTILS_BADPARM;
+
+ slots = hash->slots;
+
+ /* Find index */
+ index = _hash(data, len) % slots->n_slots;
+
+ /* Use linear probing to resolve collisions */
+ while (slots->slots[index] != NULL) {
+ /* If this data is already in the hash, return it */
+ if (_cmp(data, len, slots->slots[index]->data,
+ slots->slots[index]->len) == 0) {
+ (*inserted) = slots->slots[index];
+ return PARSERUTILS_OK;
+ }
+
+ index = (index + 1) % slots->n_slots;
+ }
+
+ /* The data isn't in the hash. Index identifies the slot to use */
+ error = parserutils_chunkarray_insert(hash->data, data, len, &idata);
+ if (error != PARSERUTILS_OK)
+ return error;
+
+ entry.len = len;
+ entry.data = idata;
+
+ error = parserutils_chunkarray_insert(hash->entries,
+ &entry, sizeof(parserutils_hash_entry), &ientry);
+ if (error != PARSERUTILS_OK) {
+ /* We effectively leak the inserted data, as chunkarray
+ * doesn't support deletion. */
+ return error;
+ }
+
+ (*inserted) = slots->slots[index] = ientry;
+
+ /* If the table is 75% full, then increase its size */
+ if (++slots->n_used >= (slots->n_slots >> 1) + (slots->n_slots >> 2))
+ grow_slots(hash);
+
+ return PARSERUTILS_OK;
+}
+
+/******************************************************************************
+ * Private functions *
+ ******************************************************************************/
+
+/**
+ * Hsieh hash function
+ *
+ * \param data Pointer to data to hash
+ * \param len Length, in bytes, of data
+ * \return hash value
+ */
+uint32_t _hash(const uint8_t *data, size_t len)
+{
+ uint32_t hash = len, tmp;
+ int rem = len & 3;
+
+#define read16(d) ((((uint32_t)((d)[1])) << 8) | ((uint32_t)((d)[0])))
+
+ for (len = len / 4; len > 0; len--) {
+ hash += read16(data);
+ tmp = (read16(data + 2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 4;
+ hash += hash >> 11;
+ }
+
+ switch (rem) {
+ case 3:
+ hash += read16(data);
+ hash ^= hash << 16;
+ hash ^= data[2] << 18;
+ hash += hash >> 11;
+ break;
+ case 2:
+ hash += read16(data);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1:
+ hash += data[0];
+ hash ^= hash << 10;
+ hash += hash >> 1;
+ }
+
+#undef read16
+
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
+}
+
+/**
+ * Comparator for hash entries
+ *
+ * \param a First item to consider
+ * \param alen Length, in bytes, of a
+ * \param b Second item to consider
+ * \param blen Length, in bytes, of b
+ * \return <0 iff a<b, ==0 iff a=b, >0 iff a>b
+ */
+int _cmp(const uint8_t *a, size_t alen, const uint8_t *b, size_t blen)
+{
+ int result;
+
+ /* Check for identity first */
+ if (a == b)
+ return 0;
+
+ /* Sort first by length, then by data equality */
+ if ((result = alen - blen) == 0)
+ result = memcmp(a, b, alen);
+
+ return result;
+}
+
+/**
+ * Increase the size of the slot table
+ *
+ * \param hash The hash to process
+ */
+void grow_slots(parserutils_hash *hash)
+{
+ slot_table *s;
+ size_t new_size;
+
+ if (hash == NULL)
+ return;
+
+ new_size = hash->slots->n_slots << 1;
+
+ /* Create new slot table */
+ s = hash->alloc(0, sizeof(slot_table) +
+ new_size * sizeof(parserutils_hash_entry *),
+ hash->pw);
+ if (s == NULL)
+ return;
+
+ memset(s, 0, sizeof(slot_table) +
+ new_size * sizeof(parserutils_hash_entry *));
+ s->n_slots = new_size;
+
+ /* Now, populate it with the contents of the current one */
+ for (uint32_t i = 0; i < hash->slots->n_slots; i++) {
+ const parserutils_hash_entry *e = hash->slots->slots[i];
+
+ if (e == NULL)
+ continue;
+
+ /* Find new index */
+ uint32_t index = _hash(e->data, e->len) % s->n_slots;
+
+ /* Use linear probing to resolve collisions */
+ while (s->slots[index] != NULL)
+ index = (index + 1) % s->n_slots;
+
+ /* Insert into new slot table */
+ s->slots[index] = e;
+ s->n_used++;
+ }
+
+ /* Destroy current table, and replace it with the new one */
+ hash->alloc(hash->slots, 0, hash->pw);
+ hash->slots = s;
+
+ return;
+}
+
+extern void parserutils_chunkarray_dump(parserutils_chunkarray *array);
+extern void parserutils_hash_dump(parserutils_hash *hash);
+
+/**
+ * Dump details of a hash to stdout
+ *
+ * \param hash The hash to dump
+ */
+void parserutils_hash_dump(parserutils_hash *hash)
+{
+ printf("%zu slots used (of %zu => %f%%)\n", hash->slots->n_used,
+ hash->slots->n_slots,
+ hash->slots->n_used * 100 / (float) hash->slots->n_slots);
+
+ printf("Data:\n");
+ parserutils_chunkarray_dump(hash->data);
+
+ printf("Entries:\n");
+ parserutils_chunkarray_dump(hash->entries);
+
+ printf("Hash structures: %zu\n",
+ sizeof(parserutils_hash) + sizeof(slot_table) +
+ hash->slots->n_slots * sizeof(parserutils_hash_entry *));
+}
+
Modified: trunk/libparserutils/src/utils/stack.c
URL: http://source.netsurf-browser.org/trunk/libparserutils/src/utils/stack.c?...
==============================================================================
--- trunk/libparserutils/src/utils/stack.c (original)
+++ trunk/libparserutils/src/utils/stack.c Sun Nov 30 10:35:19 2008
@@ -92,7 +92,8 @@
* \param item The item to push
* \return PARSERUTILS_OK on success, appropriate error otherwise
*/
-parserutils_error parserutils_stack_push(parserutils_stack *stack, void *item)
+parserutils_error parserutils_stack_push(parserutils_stack *stack,
+ const void *item)
{
int32_t slot;
Modified: trunk/libparserutils/test/INDEX
URL: http://source.netsurf-browser.org/trunk/libparserutils/test/INDEX?rev=585...
==============================================================================
--- trunk/libparserutils/test/INDEX (original)
+++ trunk/libparserutils/test/INDEX Sun Nov 30 10:35:19 2008
@@ -10,6 +10,7 @@
cscodec-ext8 Extended 8bit charset codec cscodec-ext8
cscodec-8859 ISO-8859-n codec cscodec-8859
dict Dictionary handling
+hash Hashtable implementation
rbtree Red-black tree implementation
filter Input stream filtering
inputstream Inputstream handling input
Modified: trunk/libparserutils/test/Makefile
URL: http://source.netsurf-browser.org/trunk/libparserutils/test/Makefile?rev=...
==============================================================================
--- trunk/libparserutils/test/Makefile (original)
+++ trunk/libparserutils/test/Makefile Sun Nov 30 10:35:19 2008
@@ -36,7 +36,7 @@
# Tests
TESTS_$(d) := aliases cscodec-8859 cscodec-ext8 cscodec-utf8 cscodec-utf16 \
- charset dict filter inputstream parserutils rbtree
+ charset dict filter hash inputstream parserutils rbtree
TESTS_$(d) := $(TESTS_$(d)) regression/cscodec-segv regression/filter-segv \
regression/stream-nomem regression/filter-badenc-segv
Added: trunk/libparserutils/test/hash.c
URL: http://source.netsurf-browser.org/trunk/libparserutils/test/hash.c?rev=58...
==============================================================================
--- trunk/libparserutils/test/hash.c (added)
+++ trunk/libparserutils/test/hash.c Sun Nov 30 10:35:19 2008
@@ -1,0 +1,56 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <parserutils/utils/hash.h>
+
+#include "testutils.h"
+
+extern void parserutils_hash_dump(parserutils_hash *hash);
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ UNUSED(pw);
+
+ return realloc(ptr, len);
+}
+
+int main(int argc, char **argv)
+{
+ parserutils_hash *hash;
+ uint8_t buf[256];
+
+ UNUSED(argc);
+ UNUSED(argv);
+
+ /* Seed buffer with printable ascii */
+ for (int i = 0; i < (int) sizeof(buf); i++) {
+ buf[i] = 97 + (int) (26.0 * (rand() / (RAND_MAX + 1.0)));
+ }
+ buf[sizeof(buf) - 1] = '\0';
+
+ assert(parserutils_hash_create(myrealloc, NULL, &hash) ==
+ PARSERUTILS_OK);
+
+ for (int i = 0; i < (int) sizeof(buf); i++) {
+ uint8_t *s = buf;
+
+ while (s - buf <= i) {
+ const parserutils_hash_entry *e;
+
+ parserutils_hash_insert(hash,
+ s, (size_t) (sizeof(buf) - i), &e);
+
+ s++;
+ }
+ }
+
+// parserutils_hash_dump(hash);
+
+ parserutils_hash_destroy(hash);
+
+ printf("PASS\n");
+
+ return 0;
+}
+
14 years, 9 months
r5849 tlsa - /trunk/netsurfweb/documentation/resinstall.en
by netsurf@semichrome.net
Author: tlsa
Date: Sun Nov 30 05:06:41 2008
New Revision: 5849
URL: http://source.netsurf-browser.org?rev=5849&view=rev
Log:
Reword ROL OS Configure loading option.
Modified:
trunk/netsurfweb/documentation/resinstall.en
Modified: trunk/netsurfweb/documentation/resinstall.en
URL: http://source.netsurf-browser.org/trunk/netsurfweb/documentation/resinsta...
==============================================================================
--- trunk/netsurfweb/documentation/resinstall.en (original)
+++ trunk/netsurfweb/documentation/resinstall.en Sun Nov 30 05:06:41 2008
@@ -80,7 +80,7 @@
<ul>
<li>The most common way to run Configure is to double click !Boot, in the root directory of your hard disc.</li>
<li>On RISC OS 5 you can click <span class="action">menu</span> over the <em>switcher</em> icon which resides at the extreme right hand side of the iconbar. Choose the "Configure" option from the menu.</li>
-<li>On some versions of the OS from RISCOS Ltd you can click <span class="action">menu</span> over the <em>switcher</em> icon which resides at the extreme right hand side of the iconbar. Choose the "Choices..." option from the menu.</li>
+<li>On some versions of the OS from RISCOS Ltd there is a "Choices..." option, which runs Configure, on the <em>switcher</em> menu.</li>
</ul>
<p class="guidescreenshotfeature"><img src="images/resinstall/configure.png" title="Configure Window" alt=""></p>
14 years, 9 months
r5848 jmb - /trunk/libcss/src/parse/parse.c
by netsurf@semichrome.net
Author: jmb
Date: Sat Nov 29 20:14:22 2008
New Revision: 5848
URL: http://source.netsurf-browser.org?rev=5848&view=rev
Log:
Commentary on which tokens actually need string data interning.
Modified:
trunk/libcss/src/parse/parse.c
Modified: trunk/libcss/src/parse/parse.c
URL: http://source.netsurf-browser.org/trunk/libcss/src/parse/parse.c?rev=5848...
==============================================================================
--- trunk/libcss/src/parse/parse.c (original)
+++ trunk/libcss/src/parse/parse.c Sat Nov 29 20:14:22 2008
@@ -569,6 +569,18 @@
if (error != CSS_OK)
return error;
+ /** \todo We need only intern for the following token types:
+ *
+ * CSS_TOKEN_IDENT, CSS_TOKEN_ATKEYWORD, CSS_TOKEN_STRING,
+ * CSS_TOKEN_INVALID_STRING, CSS_TOKEN_HASH, CSS_TOKEN_URI,
+ * CSS_TOKEN_UNICODE_RANGE?, CSS_TOKEN_FUNCTION
+ *
+ * It would be better if we didn't intern the text for these
+ * token types:
+ *
+ * CSS_TOKEN_NUMBER, CSS_TOKEN_PERCENTAGE, CSS_TOKEN_DIMENSION
+ */
+
if (t->type != CSS_TOKEN_S &&
t->data.data != NULL && t->data.len > 0) {
/* Insert token text into the dictionary */
14 years, 9 months