Author: jmb
Date: Thu Jun 26 07:05:14 2008
New Revision: 4455
URL:
http://source.netsurf-browser.org?rev=4455&view=rev
Log:
Correctly process unterminated strings.
Modified:
trunk/libcss/src/lex/lex.c
trunk/libcss/src/lex/lex.h
trunk/libcss/test/data/lex/tests2.dat
trunk/libcss/test/lex-auto.c
Modified: trunk/libcss/src/lex/lex.c
URL:
http://source.netsurf-browser.org/trunk/libcss/src/lex/lex.c?rev=4455&...
==============================================================================
--- trunk/libcss/src/lex/lex.c (original)
+++ trunk/libcss/src/lex/lex.c Thu Jun 26 07:05:14 2008
@@ -34,6 +34,7 @@
#include "lex/lex.h"
#include "utils/parserutilserror.h"
+#include "utils/utils.h"
/** \todo Optimisation -- we're currently revisiting a bunch of input
* characters (Currently, we're calling parserutils_inputstream_peek
@@ -379,7 +380,15 @@
t->data.ptr += 1;
t->data.len -= 1;
- /* Strip the trailing quote */
+ /* Strip the trailing quote, iff it exists (may have hit EOF) */
+ if (t->data.ptr[t->data.len - 1] == '"' ||
+ t->data.ptr[t->data.len - 1] == '\'') {
+ t->data.len -= 1;
+ }
+ break;
+ case CSS_TOKEN_INVALID_STRING:
+ /* Strip the leading quote */
+ t->data.ptr += 1;
t->data.len -= 1;
break;
case CSS_TOKEN_HASH:
@@ -396,8 +405,8 @@
break;
case CSS_TOKEN_URI:
/* Strip the "url(" from the start */
- t->data.ptr += sizeof("url(") - 1;
- t->data.len -= sizeof("url(") - 1;
+ t->data.ptr += SLEN("url(");
+ t->data.len -= SLEN("url(");
/* Strip any leading whitespace */
while (isSpace(t->data.ptr[0])) {
@@ -427,16 +436,16 @@
break;
case CSS_TOKEN_UNICODE_RANGE:
/* Remove "U+" from the start */
- t->data.ptr += sizeof("U+") - 1;
- t->data.len -= sizeof("U+") - 1;
+ t->data.ptr += SLEN("U+");
+ t->data.len -= SLEN("U+");
break;
case CSS_TOKEN_COMMENT:
/* Strip the leading '/' and '*' */
- t->data.ptr += sizeof("/*") - 1;
- t->data.len -= sizeof("/*") - 1;
+ t->data.ptr += SLEN("/*");
+ t->data.len -= SLEN("/*");
/* Strip the trailing '*' and '/' */
- t->data.len -= sizeof("*/") - 1;
+ t->data.len -= SLEN("*/");
break;
case CSS_TOKEN_FUNCTION:
/* Strip the trailing '(' */
@@ -1239,11 +1248,13 @@
*/
error = consumeString(lexer);
- if (error != CSS_OK && error != CSS_EOF)
+ if (error != CSS_OK && error != CSS_EOF && error != CSS_INVALID)
return error;
+ /* EOF will be reprocessed in Start() */
return emitToken(lexer,
- error == CSS_EOF ? CSS_TOKEN_EOF : CSS_TOKEN_STRING,
+ error == CSS_INVALID ? CSS_TOKEN_INVALID_STRING
+ : CSS_TOKEN_STRING,
token);
}
@@ -1450,8 +1461,14 @@
lexer->substate = String;
error = consumeString(lexer);
- if (error != CSS_OK && error != CSS_EOF)
+ if (error == CSS_INVALID) {
+ /* Rewind to "url(" */
+ lexer->bytesReadForToken = lexer->context.bytesForURL;
+ lexer->token.data.len = lexer->context.dataLenForURL;
+ return emitToken(lexer, CSS_TOKEN_FUNCTION, token);
+ } else if (error != CSS_OK && error != CSS_EOF) {
return error;
+ }
/* EOF gets handled in RParen */
@@ -1794,12 +1811,6 @@
* The open quote has been consumed.
*/
- /** \todo Handle unexpected end of string correctly - CSS 2.1 $4.2
- * Need to flag the string as being in error (within token, so the
- * parser can discard the construct in which the string was found).
- * This does not apply in the EOF case. In that case, we must act
- * as described in "Unexpected end of style sheet" and simply close
- * the string */
do {
cptr = parserutils_inputstream_peek(lexer->input,
lexer->bytesReadForToken, &clen);
@@ -1818,8 +1829,8 @@
if (error != CSS_OK)
return error;
} else if (c != quote) {
- /* Invalid character in string -- skip */
- lexer->bytesReadForToken += clen;
+ /* Invalid character in string */
+ return CSS_INVALID;
}
} while(c != quote);
Modified: trunk/libcss/src/lex/lex.h
URL:
http://source.netsurf-browser.org/trunk/libcss/src/lex/lex.h?rev=4455&...
==============================================================================
--- trunk/libcss/src/lex/lex.h (original)
+++ trunk/libcss/src/lex/lex.h Thu Jun 26 07:05:14 2008
@@ -33,13 +33,13 @@
* Token type
*/
typedef enum css_token_type {
- CSS_TOKEN_IDENT, CSS_TOKEN_ATKEYWORD, CSS_TOKEN_STRING,
- CSS_TOKEN_HASH, CSS_TOKEN_NUMBER, CSS_TOKEN_PERCENTAGE,
- CSS_TOKEN_DIMENSION, CSS_TOKEN_URI, CSS_TOKEN_UNICODE_RANGE,
- CSS_TOKEN_CDO, CSS_TOKEN_CDC, CSS_TOKEN_S, CSS_TOKEN_COMMENT,
- CSS_TOKEN_FUNCTION, CSS_TOKEN_INCLUDES, CSS_TOKEN_DASHMATCH,
- CSS_TOKEN_PREFIXMATCH, CSS_TOKEN_SUFFIXMATCH, CSS_TOKEN_SUBSTRINGMATCH,
- CSS_TOKEN_CHAR, CSS_TOKEN_EOF
+ CSS_TOKEN_IDENT, CSS_TOKEN_ATKEYWORD, CSS_TOKEN_STRING,
+ CSS_TOKEN_INVALID_STRING, CSS_TOKEN_HASH, CSS_TOKEN_NUMBER,
+ CSS_TOKEN_PERCENTAGE, CSS_TOKEN_DIMENSION, CSS_TOKEN_URI,
+ CSS_TOKEN_UNICODE_RANGE, CSS_TOKEN_CDO, CSS_TOKEN_CDC, CSS_TOKEN_S,
+ CSS_TOKEN_COMMENT, CSS_TOKEN_FUNCTION, CSS_TOKEN_INCLUDES,
+ CSS_TOKEN_DASHMATCH, CSS_TOKEN_PREFIXMATCH, CSS_TOKEN_SUFFIXMATCH,
+ CSS_TOKEN_SUBSTRINGMATCH, CSS_TOKEN_CHAR, CSS_TOKEN_EOF
} css_token_type;
/**
Modified: trunk/libcss/test/data/lex/tests2.dat
URL:
http://source.netsurf-browser.org/trunk/libcss/test/data/lex/tests2.dat?r...
==============================================================================
--- trunk/libcss/test/data/lex/tests2.dat (original)
+++ trunk/libcss/test/data/lex/tests2.dat Thu Jun 26 07:05:14 2008
@@ -27,3 +27,89 @@
S
EOF
#reset
+
+#data
+@import url("abcde
+);
+#expected
+ATKEYWORD:import
+S
+FUNCTION:url
+INVALID:abcde
+S
+CHAR:)
+CHAR:;
+S
+EOF
+#reset
+
+#data
+body {
+ font-family: "Bitstream Vera Sans;
+}
+.one { width: 10em; }
+#expected
+IDENT:body
+S
+CHAR:{
+S
+IDENT:font-family
+CHAR::
+S
+INVALID:Bitstream Vera Sans;
+S
+CHAR:}
+S
+CHAR:.
+IDENT:one
+S
+CHAR:{
+S
+IDENT:width
+CHAR::
+S
+DIMENSION:10em
+CHAR:;
+S
+CHAR:}
+S
+EOF
+#reset
+
+#data
+body { font-family: "Bitstream Vera Sans; }
+.two { width: 10em; }
+#expected
+IDENT:body
+S
+CHAR:{
+S
+IDENT:font-family
+CHAR::
+S
+INVALID:Bitstream Vera Sans; }
+S
+CHAR:.
+IDENT:two
+S
+CHAR:{
+S
+IDENT:width
+CHAR::
+S
+DIMENSION:10em
+CHAR:;
+S
+CHAR:}
+S
+EOF
+#reset
+
+#data
+"abcde
+#expected
+INVALID:abcde
+S
+EOF
+#reset
+
Modified: trunk/libcss/test/lex-auto.c
URL:
http://source.netsurf-browser.org/trunk/libcss/test/lex-auto.c?rev=4455&a...
==============================================================================
--- trunk/libcss/test/lex-auto.c (original)
+++ trunk/libcss/test/lex-auto.c Thu Jun 26 07:05:14 2008
@@ -204,7 +204,7 @@
{
const char *names[] =
{
- "IDENT", "ATKEYWORD", "STRING", "HASH",
"NUMBER",
+ "IDENT", "ATKEYWORD", "STRING", "INVALID",
"HASH", "NUMBER",
"PERCENTAGE", "DIMENSION", "URI",
"UNICODE-RANGE", "CDO",
"CDC", "S", "COMMENT", "FUNCTION",
"INCLUDES",
"DASHMATCH", "PREFIXMATCH", "SUFFIXMATCH",
"SUBSTRINGMATCH",
@@ -222,6 +222,8 @@
return CSS_TOKEN_ATKEYWORD;
else if (len == 6 && strncasecmp(data, "STRING", len) == 0)
return CSS_TOKEN_STRING;
+ else if (len == 7 && strncasecmp(data, "INVALID", len) == 0)
+ return CSS_TOKEN_INVALID_STRING;
else if (len == 4 && strncasecmp(data, "HASH", len) == 0)
return CSS_TOKEN_HASH;
else if (len == 6 && strncasecmp(data, "NUMBER", len) == 0)