From 4680b309e825db1d669b04056bc3cbdf273f5d80 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Fri, 1 Aug 2008 19:00:59 +0000 Subject: Only intern strings when we get a token from the lexer. Strings in tokens that have been pushed back have already been interned, so it's stupid to re-intern every time. This has required that the lexer permits its clients to modify the data members of the css_token object. That's fine, as it assumes nothing about them (they're basically just a window onto the internal lexer state, anyway). svn path=/trunk/libcss/; revision=4857 --- src/lex/lex.c | 75 ++++++++++++++++++++++++++++++------------------------- src/lex/lex.h | 2 +- src/parse/parse.c | 30 +++++++++++----------- test/lex-auto.c | 2 +- test/lex.c | 2 +- 5 files changed, 60 insertions(+), 51 deletions(-) diff --git a/src/lex/lex.c b/src/lex/lex.c index d487a27..5fdcff4 100644 --- a/src/lex/lex.c +++ b/src/lex/lex.c @@ -127,28 +127,28 @@ do { \ static inline css_error appendToTokenData(css_lexer *lexer, const uint8_t *data, size_t len); static inline css_error emitToken(css_lexer *lexer, css_token_type type, - const css_token **token); + css_token **token); -static inline css_error AtKeyword(css_lexer *lexer, const css_token **token); +static inline css_error AtKeyword(css_lexer *lexer, css_token **token); static inline css_error CDCOrIdentOrFunction(css_lexer *lexer, - const css_token **token); -static inline css_error CDO(css_lexer *lexer, const css_token **token); -static inline css_error Comment(css_lexer *lexer, const css_token **token); + css_token **token); +static inline css_error CDO(css_lexer *lexer, css_token **token); +static inline css_error Comment(css_lexer *lexer, css_token **token); static inline css_error EscapedIdentOrFunction(css_lexer *lexer, - const css_token **token); -static inline css_error Hash(css_lexer *lexer, const css_token **token); + css_token **token); +static inline css_error Hash(css_lexer *lexer, css_token **token); static inline css_error IdentOrFunction(css_lexer *lexer, - const css_token **token); -static inline css_error Match(css_lexer *lexer, const css_token **token); + css_token **token); +static inline css_error Match(css_lexer *lexer, css_token **token); static inline css_error NumberOrPercentageOrDimension(css_lexer *lexer, - const css_token **token); -static inline css_error S(css_lexer *lexer, const css_token **token); -static inline css_error Start(css_lexer *lexer, const css_token **token); -static inline css_error String(css_lexer *lexer, const css_token **token); + css_token **token); +static inline css_error S(css_lexer *lexer, css_token **token); +static inline css_error Start(css_lexer *lexer, css_token **token); +static inline css_error String(css_lexer *lexer, css_token **token); static inline css_error URIOrUnicodeRangeOrIdentOrFunction( - css_lexer *lexer, const css_token **token); -static inline css_error URI(css_lexer *lexer, const css_token **token); -static inline css_error UnicodeRange(css_lexer *lexer, const css_token **token); + css_lexer *lexer, css_token **token); +static inline css_error URI(css_lexer *lexer, css_token **token); +static inline css_error UnicodeRange(css_lexer *lexer, css_token **token); static inline css_error consumeDigits(css_lexer *lexer); static inline css_error consumeEscape(css_lexer *lexer, bool nl); @@ -253,8 +253,16 @@ css_error css_lexer_setopt(css_lexer *lexer, css_lexer_opttype type, * \param lexer The lexer instance to read from * \param token Pointer to location to receive pointer to token * \return CSS_OK on success, appropriate error otherwise + * + * The returned token object is owned by the lexer. However, the client is + * permitted to modify the data members of the token. The token must not be + * freed by the client (it may not have been allocated in the first place), + * nor may any of the pointers contained within it. The client may, if they + * wish, overwrite any data member of the returned token object -- the lexer + * does not depend on these remaining constant. This allows the client code + * to efficiently implement a push-back buffer with interned string data. */ -css_error css_lexer_get_token(css_lexer *lexer, const css_token **token) +css_error css_lexer_get_token(css_lexer *lexer, css_token **token) { css_error error; @@ -347,7 +355,7 @@ css_error appendToTokenData(css_lexer *lexer, const uint8_t *data, size_t len) * \return CSS_OK on success, appropriate error otherwise */ css_error emitToken(css_lexer *lexer, css_token_type type, - const css_token **token) + css_token **token) { css_token *t = &lexer->token; @@ -468,7 +476,7 @@ css_error emitToken(css_lexer *lexer, css_token_type type, * State machine components * ******************************************************************************/ -css_error AtKeyword(css_lexer *lexer, const css_token **token) +css_error AtKeyword(css_lexer *lexer, css_token **token) { uintptr_t cptr; uint8_t c; @@ -533,7 +541,7 @@ css_error AtKeyword(css_lexer *lexer, const css_token **token) return emitToken(lexer, CSS_TOKEN_ATKEYWORD, token); } -css_error CDCOrIdentOrFunction(css_lexer *lexer, const css_token **token) +css_error CDCOrIdentOrFunction(css_lexer *lexer, css_token **token) { css_token *t = &lexer->token; uintptr_t cptr; @@ -641,7 +649,7 @@ css_error CDCOrIdentOrFunction(css_lexer *lexer, const css_token **token) return emitToken(lexer, t->type, token); } -css_error CDO(css_lexer *lexer, const css_token **token) +css_error CDO(css_lexer *lexer, css_token **token) { css_token *t = &lexer->token; uintptr_t cptr; @@ -737,7 +745,7 @@ css_error CDO(css_lexer *lexer, const css_token **token) return emitToken(lexer, CSS_TOKEN_CDO, token); } -css_error Comment(css_lexer *lexer, const css_token **token) +css_error Comment(css_lexer *lexer, css_token **token) { uintptr_t cptr; uint8_t c; @@ -806,7 +814,7 @@ css_error Comment(css_lexer *lexer, const css_token **token) return emitToken(lexer, CSS_TOKEN_COMMENT, token); } -css_error EscapedIdentOrFunction(css_lexer *lexer, const css_token **token) +css_error EscapedIdentOrFunction(css_lexer *lexer, css_token **token) { css_error error; @@ -833,7 +841,7 @@ css_error EscapedIdentOrFunction(css_lexer *lexer, const css_token **token) return IdentOrFunction(lexer, token); } -css_error Hash(css_lexer *lexer, const css_token **token) +css_error Hash(css_lexer *lexer, css_token **token) { css_error error; @@ -853,7 +861,7 @@ css_error Hash(css_lexer *lexer, const css_token **token) return emitToken(lexer, CSS_TOKEN_CHAR, token); } -css_error IdentOrFunction(css_lexer *lexer, const css_token **token) +css_error IdentOrFunction(css_lexer *lexer, css_token **token) { css_token *t = &lexer->token; uintptr_t cptr; @@ -903,7 +911,7 @@ css_error IdentOrFunction(css_lexer *lexer, const css_token **token) return emitToken(lexer, t->type, token); } -css_error Match(css_lexer *lexer, const css_token **token) +css_error Match(css_lexer *lexer, css_token **token) { uintptr_t cptr; uint8_t c; @@ -957,8 +965,7 @@ css_error Match(css_lexer *lexer, const css_token **token) return emitToken(lexer, type, token); } -css_error NumberOrPercentageOrDimension(css_lexer *lexer, - const css_token **token) +css_error NumberOrPercentageOrDimension(css_lexer *lexer, css_token **token) { css_token *t = &lexer->token; uintptr_t cptr; @@ -1097,7 +1104,7 @@ css_error NumberOrPercentageOrDimension(css_lexer *lexer, return emitToken(lexer, CSS_TOKEN_DIMENSION, token); } -css_error S(css_lexer *lexer, const css_token **token) +css_error S(css_lexer *lexer, css_token **token) { css_error error; @@ -1113,7 +1120,7 @@ css_error S(css_lexer *lexer, const css_token **token) return emitToken(lexer, CSS_TOKEN_S, token); } -css_error Start(css_lexer *lexer, const css_token **token) +css_error Start(css_lexer *lexer, css_token **token) { css_token *t = &lexer->token; uintptr_t cptr; @@ -1238,7 +1245,7 @@ start: } } -css_error String(css_lexer *lexer, const css_token **token) +css_error String(css_lexer *lexer, css_token **token) { css_error error; @@ -1259,7 +1266,7 @@ css_error String(css_lexer *lexer, const css_token **token) } css_error URIOrUnicodeRangeOrIdentOrFunction(css_lexer *lexer, - const css_token **token) + css_token **token) { uintptr_t cptr; uint8_t c; @@ -1306,7 +1313,7 @@ css_error URIOrUnicodeRangeOrIdentOrFunction(css_lexer *lexer, return IdentOrFunction(lexer, token); } -css_error URI(css_lexer *lexer, const css_token **token) +css_error URI(css_lexer *lexer, css_token **token) { uintptr_t cptr; uint8_t c; @@ -1480,7 +1487,7 @@ css_error URI(css_lexer *lexer, const css_token **token) return emitToken(lexer, CSS_TOKEN_URI, token); } -css_error UnicodeRange(css_lexer *lexer, const css_token **token) +css_error UnicodeRange(css_lexer *lexer, css_token **token) { css_token *t = &lexer->token; uintptr_t cptr = PARSERUTILS_INPUTSTREAM_OOD; /* GCC: shush */ diff --git a/src/lex/lex.h b/src/lex/lex.h index b40aff3..2cdf7f1 100644 --- a/src/lex/lex.h +++ b/src/lex/lex.h @@ -61,7 +61,7 @@ void css_lexer_destroy(css_lexer *lexer); css_error css_lexer_setopt(css_lexer *lexer, css_lexer_opttype type, css_lexer_optparams *params); -css_error css_lexer_get_token(css_lexer *lexer, const css_token **token); +css_error css_lexer_get_token(css_lexer *lexer, css_token **token); #endif diff --git a/src/parse/parse.c b/src/parse/parse.c index e19bd15..b66642b 100644 --- a/src/parse/parse.c +++ b/src/parse/parse.c @@ -521,7 +521,6 @@ css_error expect(css_parser *parser, css_token_type type) */ css_error getToken(css_parser *parser, const css_token **token) { - css_token temp; parserutils_error perror; css_error error; @@ -531,28 +530,31 @@ css_error getToken(css_parser *parser, const css_token **token) parser->pushback = NULL; } else { /* Otherwise, ask the lexer */ - error = css_lexer_get_token(parser->lexer, token); + css_token *t; + + error = css_lexer_get_token(parser->lexer, &t); if (error != CSS_OK) return error; - } - temp = *(*token); + if (t->data.ptr != NULL && t->data.len > 0) { + /* Insert token text into the dictionary */ + const parserutils_dict_entry *interned; - if (temp.data.ptr != NULL && temp.data.len > 0) { - /* Insert token text into the dictionary */ - const parserutils_dict_entry *interned; + perror = parserutils_dict_insert(parser->dictionary, + t->data.ptr, t->data.len, &interned); + if (perror != PARSERUTILS_OK) + return css_error_from_parserutils_error(perror); - perror = parserutils_dict_insert(parser->dictionary, - temp.data.ptr, temp.data.len, &interned); - if (perror != PARSERUTILS_OK) - return css_error_from_parserutils_error(perror); + t->data.ptr = interned->data; + t->data.len = interned->len; + } - temp.data.ptr = interned->data; - temp.data.len = interned->len; + *token = t; } /* Append token to vector */ - perror = parserutils_vector_append(parser->tokens, &temp); + perror = parserutils_vector_append(parser->tokens, + (css_token *) (*token)); if (perror != PARSERUTILS_OK) return css_error_from_parserutils_error(perror); diff --git a/test/lex-auto.c b/test/lex-auto.c index b75fbb0..e148649 100644 --- a/test/lex-auto.c +++ b/test/lex-auto.c @@ -267,7 +267,7 @@ void run_test(const uint8_t *data, size_t len, exp_entry *exp, size_t explen) parserutils_inputstream *input; css_lexer *lexer; css_error error; - const css_token *tok; + css_token *tok; size_t e; static int testnum; diff --git a/test/lex.c b/test/lex.c index d808611..22da973 100644 --- a/test/lex.c +++ b/test/lex.c @@ -108,7 +108,7 @@ int main(int argc, char **argv) size_t len, origlen; #define CHUNK_SIZE (4096) uint8_t buf[CHUNK_SIZE]; - const css_token *tok; + css_token *tok; css_error error; if (argc != 3) { -- cgit v1.2.3