From 9e8f4efc94c896b4df2110272f42d2c93e1512d4 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Mon, 1 Dec 2008 03:35:40 +0000 Subject: Simplify decision as to whether to intern token data by inserting markers into the css_token_type enum. svn path=/trunk/libcss/; revision=5860 --- src/lex/lex.h | 23 ++++++++++++++++------- src/parse/parse.c | 29 +++++++---------------------- 2 files changed, 23 insertions(+), 29 deletions(-) diff --git a/src/lex/lex.h b/src/lex/lex.h index 5fed823..feb589e 100644 --- a/src/lex/lex.h +++ b/src/lex/lex.h @@ -34,13 +34,22 @@ typedef union css_lexer_optparams { * Token type */ typedef enum css_token_type { - CSS_TOKEN_IDENT, CSS_TOKEN_ATKEYWORD, CSS_TOKEN_STRING, - CSS_TOKEN_INVALID_STRING, CSS_TOKEN_HASH, CSS_TOKEN_NUMBER, - CSS_TOKEN_PERCENTAGE, CSS_TOKEN_DIMENSION, CSS_TOKEN_URI, - CSS_TOKEN_UNICODE_RANGE, CSS_TOKEN_CDO, CSS_TOKEN_CDC, CSS_TOKEN_S, - CSS_TOKEN_COMMENT, CSS_TOKEN_FUNCTION, CSS_TOKEN_INCLUDES, - CSS_TOKEN_DASHMATCH, CSS_TOKEN_PREFIXMATCH, CSS_TOKEN_SUFFIXMATCH, - CSS_TOKEN_SUBSTRINGMATCH, CSS_TOKEN_CHAR, CSS_TOKEN_EOF + CSS_TOKEN_IDENT, CSS_TOKEN_ATKEYWORD, CSS_TOKEN_HASH, + CSS_TOKEN_FUNCTION, + + /* Those tokens that want lowercase strings interned appear above */ + CSS_TOKEN_LAST_INTERN_LOWER, + + CSS_TOKEN_STRING, CSS_TOKEN_INVALID_STRING, CSS_TOKEN_URI, + CSS_TOKEN_UNICODE_RANGE, CSS_TOKEN_CHAR, CSS_TOKEN_NUMBER, + CSS_TOKEN_PERCENTAGE, CSS_TOKEN_DIMENSION, + + /* Those tokens that want strings interned appear above */ + CSS_TOKEN_LAST_INTERN, + + CSS_TOKEN_CDO, CSS_TOKEN_CDC, CSS_TOKEN_S, CSS_TOKEN_COMMENT, + CSS_TOKEN_INCLUDES, CSS_TOKEN_DASHMATCH, CSS_TOKEN_PREFIXMATCH, + CSS_TOKEN_SUFFIXMATCH, CSS_TOKEN_SUBSTRINGMATCH, CSS_TOKEN_EOF } css_token_type; /** diff --git a/src/parse/parse.c b/src/parse/parse.c index ff8c813..de61b28 100644 --- a/src/parse/parse.c +++ b/src/parse/parse.c @@ -580,40 +580,25 @@ css_error getToken(css_parser *parser, const css_token **token) return error; } - /** \todo We need only intern for the following token types: + /* We need only intern for the following token types: * * CSS_TOKEN_IDENT, CSS_TOKEN_ATKEYWORD, CSS_TOKEN_STRING, * CSS_TOKEN_INVALID_STRING, CSS_TOKEN_HASH, CSS_TOKEN_URI, - * CSS_TOKEN_UNICODE_RANGE?, CSS_TOKEN_FUNCTION, CSS_TOKEN_CHAR - * - * It would be better if we didn't intern the text for these - * token types: - * + * CSS_TOKEN_UNICODE_RANGE?, CSS_TOKEN_FUNCTION, CSS_TOKEN_CHAR, * CSS_TOKEN_NUMBER, CSS_TOKEN_PERCENTAGE, CSS_TOKEN_DIMENSION + * + * These token types all appear before CSS_TOKEN_LAST_INTERN. + * All other token types appear after this magic value. */ - if ((t->type == CSS_TOKEN_IDENT || - t->type == CSS_TOKEN_ATKEYWORD || - t->type == CSS_TOKEN_STRING || - t->type == CSS_TOKEN_INVALID_STRING || - t->type == CSS_TOKEN_HASH || - t->type == CSS_TOKEN_URI || - t->type == CSS_TOKEN_UNICODE_RANGE || - t->type == CSS_TOKEN_FUNCTION || - t->type == CSS_TOKEN_CHAR || - t->type == CSS_TOKEN_NUMBER || - t->type == CSS_TOKEN_PERCENTAGE || - t->type == CSS_TOKEN_DIMENSION) && + if (t->type < CSS_TOKEN_LAST_INTERN && t->data.data != NULL && t->data.len > 0) { const parserutils_hash_entry *interned; /* Invalidate lowercase data */ t->lower.data = NULL; - if (t->type == CSS_TOKEN_IDENT || - t->type == CSS_TOKEN_ATKEYWORD || - t->type == CSS_TOKEN_HASH || - t->type == CSS_TOKEN_FUNCTION) { + if (t->type < CSS_TOKEN_LAST_INTERN_LOWER) { uint8_t temp[t->data.len]; bool lower = false; -- cgit v1.2.3