summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/lex/lex.c49
-rw-r--r--src/lex/lex.h14
-rw-r--r--test/data/lex/tests2.dat86
-rw-r--r--test/lex-auto.c4
4 files changed, 126 insertions, 27 deletions
diff --git a/src/lex/lex.c b/src/lex/lex.c
index f184a7c..d487a27 100644
--- a/src/lex/lex.c
+++ b/src/lex/lex.c
@@ -34,6 +34,7 @@
#include "lex/lex.h"
#include "utils/parserutilserror.h"
+#include "utils/utils.h"
/** \todo Optimisation -- we're currently revisiting a bunch of input
* characters (Currently, we're calling parserutils_inputstream_peek
@@ -379,7 +380,15 @@ css_error emitToken(css_lexer *lexer, css_token_type type,
t->data.ptr += 1;
t->data.len -= 1;
- /* Strip the trailing quote */
+ /* Strip the trailing quote, iff it exists (may have hit EOF) */
+ if (t->data.ptr[t->data.len - 1] == '"' ||
+ t->data.ptr[t->data.len - 1] == '\'') {
+ t->data.len -= 1;
+ }
+ break;
+ case CSS_TOKEN_INVALID_STRING:
+ /* Strip the leading quote */
+ t->data.ptr += 1;
t->data.len -= 1;
break;
case CSS_TOKEN_HASH:
@@ -396,8 +405,8 @@ css_error emitToken(css_lexer *lexer, css_token_type type,
break;
case CSS_TOKEN_URI:
/* Strip the "url(" from the start */
- t->data.ptr += sizeof("url(") - 1;
- t->data.len -= sizeof("url(") - 1;
+ t->data.ptr += SLEN("url(");
+ t->data.len -= SLEN("url(");
/* Strip any leading whitespace */
while (isSpace(t->data.ptr[0])) {
@@ -427,16 +436,16 @@ css_error emitToken(css_lexer *lexer, css_token_type type,
break;
case CSS_TOKEN_UNICODE_RANGE:
/* Remove "U+" from the start */
- t->data.ptr += sizeof("U+") - 1;
- t->data.len -= sizeof("U+") - 1;
+ t->data.ptr += SLEN("U+");
+ t->data.len -= SLEN("U+");
break;
case CSS_TOKEN_COMMENT:
/* Strip the leading '/' and '*' */
- t->data.ptr += sizeof("/*") - 1;
- t->data.len -= sizeof("/*") - 1;
+ t->data.ptr += SLEN("/*");
+ t->data.len -= SLEN("/*");
/* Strip the trailing '*' and '/' */
- t->data.len -= sizeof("*/") - 1;
+ t->data.len -= SLEN("*/");
break;
case CSS_TOKEN_FUNCTION:
/* Strip the trailing '(' */
@@ -1239,11 +1248,13 @@ css_error String(css_lexer *lexer, const css_token **token)
*/
error = consumeString(lexer);
- if (error != CSS_OK && error != CSS_EOF)
+ if (error != CSS_OK && error != CSS_EOF && error != CSS_INVALID)
return error;
+ /* EOF will be reprocessed in Start() */
return emitToken(lexer,
- error == CSS_EOF ? CSS_TOKEN_EOF : CSS_TOKEN_STRING,
+ error == CSS_INVALID ? CSS_TOKEN_INVALID_STRING
+ : CSS_TOKEN_STRING,
token);
}
@@ -1450,8 +1461,14 @@ css_error URI(css_lexer *lexer, const css_token **token)
lexer->substate = String;
error = consumeString(lexer);
- if (error != CSS_OK && error != CSS_EOF)
+ if (error == CSS_INVALID) {
+ /* Rewind to "url(" */
+ lexer->bytesReadForToken = lexer->context.bytesForURL;
+ lexer->token.data.len = lexer->context.dataLenForURL;
+ return emitToken(lexer, CSS_TOKEN_FUNCTION, token);
+ } else if (error != CSS_OK && error != CSS_EOF) {
return error;
+ }
/* EOF gets handled in RParen */
@@ -1794,12 +1811,6 @@ css_error consumeString(css_lexer *lexer)
* The open quote has been consumed.
*/
- /** \todo Handle unexpected end of string correctly - CSS 2.1 $4.2
- * Need to flag the string as being in error (within token, so the
- * parser can discard the construct in which the string was found).
- * This does not apply in the EOF case. In that case, we must act
- * as described in "Unexpected end of style sheet" and simply close
- * the string */
do {
cptr = parserutils_inputstream_peek(lexer->input,
lexer->bytesReadForToken, &clen);
@@ -1818,8 +1829,8 @@ css_error consumeString(css_lexer *lexer)
if (error != CSS_OK)
return error;
} else if (c != quote) {
- /* Invalid character in string -- skip */
- lexer->bytesReadForToken += clen;
+ /* Invalid character in string */
+ return CSS_INVALID;
}
} while(c != quote);
diff --git a/src/lex/lex.h b/src/lex/lex.h
index bfd85b9..b40aff3 100644
--- a/src/lex/lex.h
+++ b/src/lex/lex.h
@@ -33,13 +33,13 @@ typedef union css_lexer_optparams {
* Token type
*/
typedef enum css_token_type {
- CSS_TOKEN_IDENT, CSS_TOKEN_ATKEYWORD, CSS_TOKEN_STRING,
- CSS_TOKEN_HASH, CSS_TOKEN_NUMBER, CSS_TOKEN_PERCENTAGE,
- CSS_TOKEN_DIMENSION, CSS_TOKEN_URI, CSS_TOKEN_UNICODE_RANGE,
- CSS_TOKEN_CDO, CSS_TOKEN_CDC, CSS_TOKEN_S, CSS_TOKEN_COMMENT,
- CSS_TOKEN_FUNCTION, CSS_TOKEN_INCLUDES, CSS_TOKEN_DASHMATCH,
- CSS_TOKEN_PREFIXMATCH, CSS_TOKEN_SUFFIXMATCH, CSS_TOKEN_SUBSTRINGMATCH,
- CSS_TOKEN_CHAR, CSS_TOKEN_EOF
+ CSS_TOKEN_IDENT, CSS_TOKEN_ATKEYWORD, CSS_TOKEN_STRING,
+ CSS_TOKEN_INVALID_STRING, CSS_TOKEN_HASH, CSS_TOKEN_NUMBER,
+ CSS_TOKEN_PERCENTAGE, CSS_TOKEN_DIMENSION, CSS_TOKEN_URI,
+ CSS_TOKEN_UNICODE_RANGE, CSS_TOKEN_CDO, CSS_TOKEN_CDC, CSS_TOKEN_S,
+ CSS_TOKEN_COMMENT, CSS_TOKEN_FUNCTION, CSS_TOKEN_INCLUDES,
+ CSS_TOKEN_DASHMATCH, CSS_TOKEN_PREFIXMATCH, CSS_TOKEN_SUFFIXMATCH,
+ CSS_TOKEN_SUBSTRINGMATCH, CSS_TOKEN_CHAR, CSS_TOKEN_EOF
} css_token_type;
/**
diff --git a/test/data/lex/tests2.dat b/test/data/lex/tests2.dat
index 5548327..f13c25b 100644
--- a/test/data/lex/tests2.dat
+++ b/test/data/lex/tests2.dat
@@ -27,3 +27,89 @@ CHAR:}
S
EOF
#reset
+
+#data
+@import url("abcde
+);
+#expected
+ATKEYWORD:import
+S
+FUNCTION:url
+INVALID:abcde
+S
+CHAR:)
+CHAR:;
+S
+EOF
+#reset
+
+#data
+body {
+ font-family: "Bitstream Vera Sans;
+}
+.one { width: 10em; }
+#expected
+IDENT:body
+S
+CHAR:{
+S
+IDENT:font-family
+CHAR::
+S
+INVALID:Bitstream Vera Sans;
+S
+CHAR:}
+S
+CHAR:.
+IDENT:one
+S
+CHAR:{
+S
+IDENT:width
+CHAR::
+S
+DIMENSION:10em
+CHAR:;
+S
+CHAR:}
+S
+EOF
+#reset
+
+#data
+body { font-family: "Bitstream Vera Sans; }
+.two { width: 10em; }
+#expected
+IDENT:body
+S
+CHAR:{
+S
+IDENT:font-family
+CHAR::
+S
+INVALID:Bitstream Vera Sans; }
+S
+CHAR:.
+IDENT:two
+S
+CHAR:{
+S
+IDENT:width
+CHAR::
+S
+DIMENSION:10em
+CHAR:;
+S
+CHAR:}
+S
+EOF
+#reset
+
+#data
+"abcde
+#expected
+INVALID:abcde
+S
+EOF
+#reset
+
diff --git a/test/lex-auto.c b/test/lex-auto.c
index 82533d1..b75fbb0 100644
--- a/test/lex-auto.c
+++ b/test/lex-auto.c
@@ -204,7 +204,7 @@ const char *string_from_type(css_token_type type)
{
const char *names[] =
{
- "IDENT", "ATKEYWORD", "STRING", "HASH", "NUMBER",
+ "IDENT", "ATKEYWORD", "STRING", "INVALID", "HASH", "NUMBER",
"PERCENTAGE", "DIMENSION", "URI", "UNICODE-RANGE", "CDO",
"CDC", "S", "COMMENT", "FUNCTION", "INCLUDES",
"DASHMATCH", "PREFIXMATCH", "SUFFIXMATCH", "SUBSTRINGMATCH",
@@ -222,6 +222,8 @@ css_token_type string_to_type(const char *data, size_t len)
return CSS_TOKEN_ATKEYWORD;
else if (len == 6 && strncasecmp(data, "STRING", len) == 0)
return CSS_TOKEN_STRING;
+ else if (len == 7 && strncasecmp(data, "INVALID", len) == 0)
+ return CSS_TOKEN_INVALID_STRING;
else if (len == 4 && strncasecmp(data, "HASH", len) == 0)
return CSS_TOKEN_HASH;
else if (len == 6 && strncasecmp(data, "NUMBER", len) == 0)