diff options
-rw-r--r-- | README | 2 | ||||
-rw-r--r-- | docs/Grammar | 210 | ||||
-rw-r--r-- | include/libcss/types.h | 2 | ||||
-rw-r--r-- | src/parse/Makefile | 49 | ||||
-rw-r--r-- | src/parse/parse.c | 1671 | ||||
-rw-r--r-- | src/parse/parse.h | 45 | ||||
-rw-r--r-- | test/INDEX | 1 | ||||
-rw-r--r-- | test/Makefile | 2 | ||||
-rw-r--r-- | test/data/css/INDEX | 1 | ||||
-rw-r--r-- | test/data/css/blocks.css | 10 | ||||
-rw-r--r-- | test/parse.c | 79 |
11 files changed, 2070 insertions, 2 deletions
@@ -14,8 +14,8 @@ Requirements + A C99 capable C compiler + GNU make or compatible + + Pkg-config + Perl (for the testcases) - + Pkg-config (for the testcases) LibCSS also requires the following libraries to be installed: diff --git a/docs/Grammar b/docs/Grammar new file mode 100644 index 0000000..0fd27d3 --- /dev/null +++ b/docs/Grammar @@ -0,0 +1,210 @@ +Expanded grammar rules +====================== + +This file provides a fully-expanded version of (a slightly modified) +forward-compatible CSS grammar. See CSS3 Syntax $4.3.2 for the compact version. + +start -> ws stylesheet EOF + +stylesheet -> CDO ws stylesheet +stylesheet -> CDC ws stylesheet +stylesheet -> statement stylesheet +stylesheet -> + +statement -> ruleset +statement -> at-rule + +ruleset -> selector '{' ws ruleset-end +ruleset -> '{' ws ruleset-end + +ruleset-end -> declaration decl-list '}' ws +ruleset-end -> decl-list '}' ws + +at-rule -> ATKEYWORD ws any0 at-rule-end + +at-rule-end -> block +at-rule-end -> ';' ws + +block -> '{' ws block-content '}' ws + +block-content -> any block-content +block-content -> block block-content +block-content -> ATKEYWORD ws block-content +block-content -> ';' ws block-content +block-content -> + +selector -> any1 + +declaration -> property ':' ws value1 + +decl-list -> ';' ws decl-list-end +decl-list -> + +decl-list-end -> declaration decl-list +decl-list-end -> decl-list + +property -> IDENT ws + +value0 -> value value0 +value0 -> + +value1 -> value value0 + +value -> any +value -> block +value -> ATKEYWORD ws + +any0 -> any any0 +any0 -> + +any1 -> any any0 + +any -> IDENT ws +any -> NUMBER ws +any -> PERCENTAGE ws +any -> DIMENSION ws +any -> STRING ws +any -> CHAR ws +any -> URI ws +any -> HASH ws +any -> UNICODE-RANGE ws +any -> INCLUDES ws +any -> DASHMATCH ws +any -> PREFIXMATCH ws +any -> SUFFIXMATCH ws +any -> SUBSTRINGMATCH ws +any -> FUNCTION ws any0 ')' ws +any -> '(' ws any0 ')' ws +any -> '[' ws any0 ']' ws + +ws -> S ws +ws -> + +Differences from the specification +---------------------------------- + +1) The start non-terminal has been introduced. It eats any leading whitespace + and handles EOF. +2) The "stylesheet -> S stylesheet" production has been removed. +3) The "stylesheet -> CDO stylesheet" production has been changed to + "stylesheet -> CDO ws stylesheet". +4) The "stylesheet -> CDC stylesheet" production has been changed to + "stylesheet -> CDC ws stylesheet". + +Essentially, the above changes remove the expectation of leading whitespace +from the stylesheet non-terminal. This is handled by either the start +non-terminal, or by the changes made to the production rules for the stylesheet +non-terminal. Note that the "stylesheet -> statement stylesheet" production +does not require modification as the statement production rule already consumes +any whitespace following the statement. + +If '{', '}', '[', ']', '(', ')', and ';' are omitted from any, then the above +grammar is LL(1). + +Nullable productions +-------------------- + +stylesheet, block-content, decl-list, decl-list-end, value0, any0, ws + +FIRST sets +---------- + +start CDO, CDC, S, IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, + CHAR, URI, HASH, UNICODE-RANGE, INCLUDES, DASHMATCH, + PREFIXMATCH, SUFFIXMATCH, SUBSTRINGMATCH, FUNCTION, '(', + '[', '{', ATKEYWORD, EOF +stylesheet CDO, CDC, IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, + CHAR, URI, HASH, UNICODE-RANGE, INCLUDES, DASHMATCH, + PREFIXMATCH, SUFFIXMATCH, SUBSTRINGMATCH, FUNCTION, '(', + '[', '{', ATKEYWORD +statement IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, CHAR, URI, HASH, + UNICODE-RANGE, INCLUDES, DASHMATCH, PREFIXMATCH, SUFFIXMATCH, + SUBSTRINGMATCH, FUNCTION, '(', '[', '{', ATKEYWORD +ruleset IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, CHAR, URI, HASH, + UNICODE-RANGE, INCLUDES, DASHMATCH, PREFIXMATCH, SUFFIXMATCH, + SUBSTRINGMATCH, FUNCTION, '(', '[', '{' +ruleset-end IDENT, ';' +at-rule ATKEYWORD +at-rule-end '{', ';' +block '{' +block-content IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, CHAR, URI, HASH, + UNICODE-RANGE, INCLUDES, DASHMATCH, PREFIXMATCH, SUFFIXMATCH, + SUBSTRINGMATCH, FUNCTION, '(', '[', '{', ATKEYWORD, ';' +selector IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, CHAR, URI, HASH, + UNICODE-RANGE, INCLUDES, DASHMATCH, PREFIXMATCH, SUFFIXMATCH, + SUBSTRINGMATCH, FUNCTION, '(', '[' +declaration IDENT +decl-list ';', '}' +decl-list-end IDENT, ';', '}' +property IDENT +value0 IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, CHAR, URI, HASH, + UNICODE-RANGE, INCLUDES, DASHMATCH, PREFIXMATCH, SUFFIXMATCH, + SUBSTRINGMATCH, FUNCTION, '(', '[', '{', ATKEYWORD +value1 IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, CHAR, URI, HASH, + UNICODE-RANGE, INCLUDES, DASHMATCH, PREFIXMATCH, SUFFIXMATCH, + SUBSTRINGMATCH, FUNCTION, '(', '[', '{', ATKEYWORD +value IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, CHAR, URI, HASH, + UNICODE-RANGE, INCLUDES, DASHMATCH, PREFIXMATCH, SUFFIXMATCH, + SUBSTRINGMATCH, FUNCTION, '(', '[', '{', ATKEYWORD +any0 IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, CHAR, URI, HASH, + UNICODE-RANGE, INCLUDES, DASHMATCH, PREFIXMATCH, SUFFIXMATCH, + SUBSTRINGMATCH, FUNCTION, '(', '[' +any1 IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, CHAR, URI, HASH, + UNICODE-RANGE, INCLUDES, DASHMATCH, PREFIXMATCH, SUFFIXMATCH, + SUBSTRINGMATCH, FUNCTION, '(', '[' +any IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, CHAR, URI, HASH, + UNICODE-RANGE, INCLUDES, DASHMATCH, PREFIXMATCH, SUFFIXMATCH, + SUBSTRINGMATCH, FUNCTION, '(', '[' +ws S + +FOLLOW sets +----------- + +start +stylesheet EOF +statement CDO, CDC, IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, + CHAR, URI, HASH, UNICODE-RANGE, INCLUDES, DASHMATCH, + PREFIXMATCH, SUFFIXMATCH, SUBSTRINGMATCH, FUNCTION, '(', + '[', '{', ATKEYWORD, EOF +ruleset CDO, CDC, IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, + CHAR, URI, HASH, UNICODE-RANGE, INCLUDES, DASHMATCH, + PREFIXMATCH, SUFFIXMATCH, SUBSTRINGMATCH, FUNCTION, '(', + '[', '{', ATKEYWORD, EOF +ruleset-end CDO, CDC, IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, + CHAR, URI, HASH, UNICODE-RANGE, INCLUDES, DASHMATCH, + PREFIXMATCH, SUFFIXMATCH, SUBSTRINGMATCH, FUNCTION, '(', + '[', '{', ATKEYWORD, EOF +at-rule CDO, CDC, IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, + CHAR, URI, HASH, UNICODE-RANGE, INCLUDES, DASHMATCH, + PREFIXMATCH, SUFFIXMATCH, SUBSTRINGMATCH, FUNCTION, '(', + '[', '{', ATKEYWORD, EOF +at-rule-end CDO, CDC, IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, + CHAR, URI, HASH, UNICODE-RANGE, INCLUDES, DASHMATCH, + PREFIXMATCH, SUFFIXMATCH, SUBSTRINGMATCH, FUNCTION, '(', + '[', '{', ATKEYWORD, EOF +block CDO, CDC, IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, + CHAR, URI, HASH, UNICODE-RANGE, INCLUDES, DASHMATCH, + PREFIXMATCH, SUFFIXMATCH, SUBSTRINGMATCH, FUNCTION, '(', + '[', '{', ATKEYWORD, EOF, ';', '}' +block-content '}' +selector '{' +declaration ';', '}' +decl-list '}' +decl-list-end '}' +property ':' +value0 ';', '}' +value1 ';', '}' +value IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, CHAR, URI, HASH, + UNICODE-RANGE, INCLUDES, DASHMATCH, PREFIXMATCH, SUFFIXMATCH, + SUBSTRINGMATCH, FUNCTION, '(', '[', '{', ATKEYWORD, ';', '}' +any0 '{', ';', ')', ']' +any1 '{' +any IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, CHAR, URI, HASH, + UNICODE-RANGE, INCLUDES, DASHMATCH, PREFIXMATCH, SUFFIXMATCH, + SUBSTRINGMATCH, FUNCTION, '(', '[', '{', ';', ATKEYWORD, '}' +ws CDO, CDC, IDENT, NUMBER, PERCENTAGE, DIMENSION, STRING, + CHAR, URI, HASH, UNICODE-RANGE, INCLUDES, DASHMATCH, + PREFIXMATCH, SUFFIXMATCH, SUBSTRINGMATCH, FUNCTION, '(', + '[', '{', ATKEYWORD, EOF, ';', '}', ':' + + diff --git a/include/libcss/types.h b/include/libcss/types.h index b3e18dc..104e339 100644 --- a/include/libcss/types.h +++ b/include/libcss/types.h @@ -30,4 +30,6 @@ typedef struct css_string { size_t len; /**< Byte length of string */ } css_string; +typedef struct css_stylesheet css_stylesheet; + #endif diff --git a/src/parse/Makefile b/src/parse/Makefile new file mode 100644 index 0000000..9e1c6dd --- /dev/null +++ b/src/parse/Makefile @@ -0,0 +1,49 @@ +# Child makefile fragment +# +# Toolchain is provided by top-level makefile +# +# Variables provided by top-level makefile +# +# COMPONENT The name of the component +# EXPORT The location of the export directory +# TOP The location of the source tree root +# RELEASEDIR The place to put release objects +# DEBUGDIR The place to put debug objects +# +# do_include Canned command sequence to include a child makefile +# +# Variables provided by parent makefile: +# +# DIR The name of the directory we're in, relative to $(TOP) +# +# Variables we can manipulate: +# +# ITEMS_CLEAN The list of items to remove for "make clean" +# ITEMS_DISTCLEAN The list of items to remove for "make distclean" +# TARGET_TESTS The list of target names to run for "make test" +# +# SOURCES The list of sources to build for $(COMPONENT) +# +# Plus anything from the toolchain + +# Push parent directory onto the directory stack +sp := $(sp).x +dirstack_$(sp) := $(d) +d := $(DIR) + +# Manipulate include paths +CFLAGS := $(CFLAGS) -I$(d) + +# Sources +SRCS_$(d) := parse.c + +# Append to sources for component +SOURCES += $(addprefix $(d), $(SRCS_$(d))) + +# Now include any children we may have +MAKE_INCLUDES := $(wildcard $(d)*/Makefile) +$(eval $(foreach INC, $(MAKE_INCLUDES), $(call do_include,$(INC)))) + +# Finally, pop off the directory stack +d := $(dirstack_$(sp)) +sp := $(basename $(sp)) diff --git a/src/parse/parse.c b/src/parse/parse.c new file mode 100644 index 0000000..31b0a9a --- /dev/null +++ b/src/parse/parse.c @@ -0,0 +1,1671 @@ +/* + * This file is part of LibCSS. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org> + */ + +#include <assert.h> + +#include <parserutils/input/inputstream.h> +#include <parserutils/utils/dict.h> +#include <parserutils/utils/stack.h> +#include <parserutils/utils/vector.h> + +#include "charset/detect.h" +#include "lex/lex.h" +#include "parse/parse.h" +#include "utils/parserutilserror.h" +#include "utils/utils.h" + +#undef DEBUG_STACK +#define DEBUG_EVENTS + +#ifndef NDEBUG +#include <stdio.h> +extern void parserutils_stack_dump(parserutils_stack *stack, const char *prefix, + void (*printer)(void *item)); +extern void parserutils_vector_dump(parserutils_vector *vector, + const char *prefix, void (*printer)(void *item)); +#ifdef DEBUG_STACK +static void printer(void *item); +#endif +#ifdef DEBUG_EVENTS +static void tprinter(void *token); +#endif +#endif + +/** + * Major state numbers + */ +enum { + sStart = 0, + sStylesheet = 1, + sStatement = 2, + sRuleset = 3, + sRulesetEnd = 4, + sAtRule = 5, + sAtRuleEnd = 6, + sBlock = 7, + sBlockContent = 8, + sSelector = 9, + sDeclaration = 10, + sDeclList = 11, + sDeclListEnd = 12, + sProperty = 13, + sValue0 = 14, + sValue1 = 15, + sValue = 16, + sAny0 = 17, + sAny1 = 18, + sAny = 19 +}; + +/** + * Representation of a parser state + */ +typedef struct parser_state +{ + uint32_t state : 16; + uint32_t substate : 16; +} parser_state; + +/** + * CSS parser object + */ +struct css_parser +{ + css_stylesheet *sheet; /**< The sheet we're parsing */ + + parserutils_inputstream *stream; /**< The inputstream */ + css_lexer *lexer; /**< The lexer to use */ + + bool quirks; /**< Whether to enable parsing quirks */ + +#define STACK_CHUNK 32 + parserutils_stack *states; /**< Stack of states */ + + parserutils_dict *dictionary; /**< Dictionary for interned strings */ + + parserutils_vector *tokens; /**< Vector of pending tokens */ + + const css_token *pushback; /**< Push back buffer */ + + uint8_t match_char; /**< Close bracket type for parseAny */ + + css_alloc alloc; /**< Memory (de)allocation function */ + void *pw; /**< Client-specific private data */ +}; + +static inline css_error transition(css_parser *parser, parser_state to, + parser_state subsequent); +static inline css_error transitionNoRet(css_parser *parser, parser_state to); +static inline css_error done(css_parser *parser); +static inline css_error expect(css_parser *parser, css_token_type type); +static inline css_error getToken(css_parser *parser, const css_token **token); +static inline css_error pushBack(css_parser *parser, const css_token *token); +static inline css_error eatWS(css_parser *parser); + +static inline css_error parseStart(css_parser *parser); +static inline css_error parseStylesheet(css_parser *parser); +static inline css_error parseStatement(css_parser *parser); +static inline css_error parseRuleset(css_parser *parser); +static inline css_error parseRulesetEnd(css_parser *parser); +static inline css_error parseAtRule(css_parser *parser); +static inline css_error parseAtRuleEnd(css_parser *parser); +static inline css_error parseBlock(css_parser *parser); +static inline css_error parseBlockContent(css_parser *parser); +static inline css_error parseSelector(css_parser *parser); +static inline css_error parseDeclaration(css_parser *parser); +static inline css_error parseDeclList(css_parser *parser); +static inline css_error parseDeclListEnd(css_parser *parser); +static inline css_error parseProperty(css_parser *parser); +static inline css_error parseValue0(css_parser *parser); +static inline css_error parseValue1(css_parser *parser); +static inline css_error parseValue(css_parser *parser); +static inline css_error parseAny0(css_parser *parser); +static inline css_error parseAny1(css_parser *parser); +static inline css_error parseAny(css_parser *parser); + +/** + * Dispatch table for parsing, indexed by major state number + */ +static css_error (*parseFuncs[])(css_parser *parser) = { + parseStart, + parseStylesheet, + parseStatement, + parseRuleset, + parseRulesetEnd, + parseAtRule, + parseAtRuleEnd, + parseBlock, + parseBlockContent, + parseSelector, + parseDeclaration, + parseDeclList, + parseDeclListEnd, + parseProperty, + parseValue0, + parseValue1, + parseValue, + parseAny0, + parseAny1, + parseAny +}; + +/** + * Create a CSS parser + * + * \param sheet The sheet to parse data for + * \param charset Charset of data, if known, or NULL + * \param cs_source Source of charset information, or CSS_CHARSET_DEFAULT + * \param alloc Memory (de)allocation function + * \param pw Pointer to client-specific private data + * \return Pointer to parser instance, or NULL on memory exhaustion + */ +css_parser *css_parser_create(css_stylesheet *sheet, const char *charset, + css_charset_source cs_source, css_alloc alloc, void *pw) +{ + css_parser *parser; + parser_state initial = { sStart, 0 }; + parserutils_error perror; + + if (sheet == NULL || alloc == NULL) + return NULL; + + parser = alloc(NULL, sizeof(css_parser), pw); + if (parser == NULL) + return NULL; + + parser->stream = parserutils_inputstream_create(charset, cs_source, + css_charset_extract, (parserutils_alloc) alloc, pw); + if (parser->stream == NULL) { + alloc(parser, 0, pw); + return NULL; + } + + parser->lexer = css_lexer_create(parser->stream, alloc, pw); + if (parser->lexer == NULL) { + parserutils_inputstream_destroy(parser->stream); + alloc(parser, 0, pw); + return NULL; + } + + parser->states = parserutils_stack_create(sizeof(parser_state), + STACK_CHUNK, (parserutils_alloc) alloc, pw); + if (parser->states == NULL) { + css_lexer_destroy(parser->lexer); + parserutils_inputstream_destroy(parser->stream); + alloc(parser, 0, pw); + return NULL; + } + + parser->dictionary = parserutils_dict_create( + (parserutils_alloc) alloc, pw); + if (parser->dictionary == NULL) { + parserutils_stack_destroy(parser->states); + css_lexer_destroy(parser->lexer); + parserutils_inputstream_destroy(parser->stream); + alloc(parser, 0, pw); + return NULL; + } + + parser->tokens = parserutils_vector_create(sizeof(css_token), + STACK_CHUNK, (parserutils_alloc) alloc, pw); + if (parser->tokens == NULL) { + parserutils_dict_destroy(parser->dictionary); + parserutils_stack_destroy(parser->states); + css_lexer_destroy(parser->lexer); + parserutils_inputstream_destroy(parser->stream); + alloc(parser, 0, pw); + return NULL; + } + + perror = parserutils_stack_push(parser->states, (void *) &initial); + if (perror != PARSERUTILS_OK) { + parserutils_vector_destroy(parser->tokens); + parserutils_dict_destroy(parser->dictionary); + parserutils_stack_destroy(parser->states); + css_lexer_destroy(parser->lexer); + parserutils_inputstream_destroy(parser->stream); + alloc(parser, 0, pw); + return NULL; + } + + parser->sheet = sheet; + parser->quirks = false; + parser->pushback = NULL; + parser->alloc = alloc; + parser->pw = pw; + + return parser; +} + +/** + * Destroy a CSS parser + * + * \param parser The parser instance to destroy + */ +void css_parser_destroy(css_parser *parser) +{ + if (parser == NULL) + return; + + parserutils_vector_destroy(parser->tokens); + + parserutils_dict_destroy(parser->dictionary); + + parserutils_stack_destroy(parser->states); + + css_lexer_destroy(parser->lexer); + + parserutils_inputstream_destroy(parser->stream); + + parser->alloc(parser, 0, parser->pw); +} + +/** + * Configure a CSS parser + * + * \param parser The parser instance to configure + * \param type The option to configure + * \param params Option-specific data + * \return CSS_OK on success, appropriate error otherwise + */ +css_error css_parser_setopt(css_parser *parser, css_parser_opttype type, + css_parser_optparams *params) +{ + if (parser == NULL || params == NULL) + return CSS_BADPARM; + + switch (type) { + case CSS_PARSER_QUIRKS: + parser->quirks = params->quirks; + break; + } + + return CSS_OK; +} + +/** + * Parse a chunk of data using a CSS parser + * + * \param parser The parser to use + * \param data Pointer to the chunk to parse + * \param len Length of chunk + * \return CSS_OK on success, appropriate error otherwise + */ +css_error css_parser_parse_chunk(css_parser *parser, const uint8_t *data, + size_t len) +{ + parserutils_error perror; + parser_state *state; + css_error error = CSS_OK; + + if (parser == NULL || data == NULL) + return CSS_BADPARM; + + perror = parserutils_inputstream_append(parser->stream, data, len); + if (perror != PARSERUTILS_OK) + return css_error_from_parserutils_error(perror); + + do { + state = parserutils_stack_get_current(parser->states); + if (state == NULL) + break; + + error = parseFuncs[state->state](parser); + } while (error == CSS_OK); + + return error; +} + +/** + * Inform a CSS parser that all data has been received. + * + * \param parser The parser to inform + * \return CSS_OK on success, appropriate error otherwise + */ +css_error css_parser_completed(css_parser *parser) +{ + parserutils_error perror; + parser_state *state; + css_error error = CSS_OK; + + if (parser == NULL) + return CSS_BADPARM; + + /* Send EOF to input stream */ + perror = parserutils_inputstream_append(parser->stream, NULL, 0); + if (perror != PARSERUTILS_OK) + return css_error_from_parserutils_error(perror); + + /* Flush through any remaining data */ + do { + state = parserutils_stack_get_current(parser->states); + if (state == NULL) + break; + + error = parseFuncs[state->state](parser); + } while (error == CSS_OK); + + return error; +} + +/** + * Retrieve document charset information from a CSS parser + * + * \param parser The parser instance + * \param source Pointer to location to receive charset source + * \return Pointer to charset name (constant; do not free) + */ +const char *css_parser_read_charset(css_parser *parser, + css_charset_source *source) +{ + if (parser == NULL || source == NULL) + return NULL; + + return parserutils_inputstream_read_charset(parser->stream, source); +} + +/****************************************************************************** + * Helper functions * + ******************************************************************************/ + +/** + * Transition to a new state, ensuring return to the one specified + * + * \param parser The parser instance + * \param to Destination state + * \param subsequent The state to return to + * \return CSS_OK on success, appropriate error otherwise + */ +css_error transition(css_parser *parser, parser_state to, + parser_state subsequent) +{ + parser_state *state = parserutils_stack_get_current(parser->states); + parser_state current = *state; + parserutils_error perror; + + /* Replace current state on the stack with the subsequent one */ + *state = subsequent; + + /* Push next state on the stack */ + perror = parserutils_stack_push(parser->states, (void *) &to); + if (perror != PARSERUTILS_OK) { + *state = current; + return css_error_from_parserutils_error(perror); + } + +#if !defined(NDEBUG) && defined(DEBUG_STACK) + parserutils_stack_dump(parser->states, __func__, printer); +#endif + + return CSS_OK; +} + +/** + * Transition to a new state, returning to previous state on stack + * + * \param parser The parser instance + * \param to Destination state + * \return CSS_OK on success, appropriate error otherwise + */ +css_error transitionNoRet(css_parser *parser, parser_state to) +{ + parser_state *state = parserutils_stack_get_current(parser->states); + + /* Replace current state on the stack with destination */ + *state = to; + +#if !defined(NDEBUG) && defined(DEBUG_STACK) + parserutils_stack_dump(parser->states, __func__, printer); +#endif + + return CSS_OK; +} + +/** + * Return to previous state on the stack + * + * \param parser The parser instance + * \return CSS_OK on success, appropriate error otherwise + */ +css_error done(css_parser *parser) +{ + parserutils_error perror; + + /* Pop current state from stack */ + perror = parserutils_stack_pop(parser->states, NULL); + if (perror != PARSERUTILS_OK) + return css_error_from_parserutils_error(perror); + +#if !defined(NDEBUG) && defined(DEBUG_STACK) + parserutils_stack_dump(parser->states, __func__, printer); +#endif + + return CSS_OK; +} + +/** + * Assert that the expected token is next in the input + * + * \param parser The parser instance + * \param type The expected token type + * \return CSS_OK on success, appropriate error otherwise + */ +css_error expect(css_parser *parser, css_token_type type) +{ + const css_token *token; + css_error error; + + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + if (token->type != type) { + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + return CSS_INVALID; + } + + return CSS_OK; +} + +/** + * Retrieve the next token in the input + * + * \param parser The parser instance + * \param token Pointer to location to receive token + * \return CSS_OK on success, appropriate error otherwise + */ +css_error getToken(css_parser *parser, const css_token **token) +{ + css_token temp; + const parserutils_dict_entry *interned; + parserutils_error perror; + css_error error; + + /* Use pushback, if it exists */ + if (parser->pushback != NULL) { + *token = parser->pushback; + parser->pushback = NULL; + } else { + /* Otherwise, ask the lexer */ + error = css_lexer_get_token(parser->lexer, token); + if (error != CSS_OK) + return error; + } + + temp = *(*token); + + if (temp.data.ptr != NULL) { + /* Insert token text into the dictionary */ + perror = parserutils_dict_insert(parser->dictionary, + temp.data.ptr, temp.data.len, &interned); + if (perror != PARSERUTILS_OK) + return css_error_from_parserutils_error(perror); + + temp.data.ptr = interned->data; + temp.data.len = interned->len; + } + + /* Append token to vector */ + perror = parserutils_vector_append(parser->tokens, &temp); + if (perror != PARSERUTILS_OK) + return css_error_from_parserutils_error(perror); + + return CSS_OK; +} + +/** + * Push a token back on the input + * + * \param parser The parser instance + * \param token The token to push back + * \return CSS_OK on success. + */ +css_error pushBack(css_parser *parser, const css_token *token) +{ + parserutils_error perror; + + /* The pushback buffer depth is 1 token. Assert this. */ + assert(parser->pushback == NULL); + + perror = parserutils_vector_remove_last(parser->tokens); + if (perror != PARSERUTILS_OK) + return css_error_from_parserutils_error(perror); + + parser->pushback = token; + + return CSS_OK; +} + +/** + * Eat whitespace tokens + * + * \param parser The parser instance + * \return CSS_OK on success, appropriate error otherwise + */ +css_error eatWS(css_parser *parser) +{ + const css_token *token; + css_error error; + + while (1) { + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + if (token->type != CSS_TOKEN_S) { + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + break; + } + } + + return CSS_OK; +} + +/****************************************************************************** + * Parser stages * + ******************************************************************************/ + +css_error parseStart(css_parser *parser) +{ + enum { Initial = 0, AfterWS = 1, AfterStylesheet = 2 }; + parser_state *state = parserutils_stack_get_current(parser->states); + css_error error = CSS_OK; + + /* start -> ws stylesheet EOF */ + + switch (state->substate) { + case Initial: +#if !defined(NDEBUG) && defined(DEBUG_EVENTS) + printf("Begin stylesheet\n"); +#endif + error = eatWS(parser); + if (error != CSS_OK) + return error; + state->substate = AfterWS; + /* Fall through */ + case AfterWS: + { + parser_state to = { sStylesheet, Initial }; + parser_state subsequent = { sStart, AfterStylesheet }; + + return transition(parser, to, subsequent); + } + case AfterStylesheet: + error = expect(parser, CSS_TOKEN_EOF); + if (error != CSS_OK) + return error; + + /* Flag completion, just in case */ + } + +#if !defined(NDEBUG) && defined(DEBUG_EVENTS) + parserutils_vector_dump(parser->tokens, __func__, tprinter); + printf("End stylesheet\n"); +#endif + parserutils_vector_clear(parser->tokens); + + return done(parser); +} + +css_error parseStylesheet(css_parser *parser) +{ + enum { Initial = 0, WS = 1 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* stylesheet -> CDO ws stylesheet + * CDC ws stylesheet + * statement ws stylesheet + */ + + while (1) { + switch (state->substate) { + case Initial: + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + switch (token->type) { + case CSS_TOKEN_EOF: + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + + return done(parser); + case CSS_TOKEN_CDO: + case CSS_TOKEN_CDC: + break; + default: + { + parser_state to = { sStatement, Initial }; + parser_state subsequent = { sStylesheet, WS }; + + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + + return transition(parser, to, subsequent); + } + } + + state->substate = WS; + /* Fall through */ + case WS: + error = eatWS(parser); + if (error != CSS_OK) + return error; + + state->substate = Initial; + } + } +} + +css_error parseStatement(css_parser *parser) +{ + enum { Initial = 0 }; + const css_token *token; + parser_state to = { sRuleset, Initial }; + css_error error; + + /* statement -> ruleset + * at-rule + */ + + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + if (token->type == CSS_TOKEN_ATKEYWORD) + to.state = sAtRule; + + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + + return transitionNoRet(parser, to); +} + +css_error parseRuleset(css_parser *parser) +{ + enum { Initial = 0, Brace = 1, WS = 2 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* ruleset -> selector '{' ws ruleset-end + * -> '{' ws ruleset-end + */ + + switch (state->substate) { + case Initial: +#if !defined(NDEBUG) && defined(DEBUG_EVENTS) + printf("Begin ruleset\n"); +#endif + parserutils_vector_clear(parser->tokens); + + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + /* The grammar's ambiguous here -- selectors may start with a + * brace. We're going to assume that that won't happen, + * however. */ + if (token->type == CSS_TOKEN_CHAR && token->data.len == 1 && + token->data.ptr[0] == '{') { + state->substate = WS; + goto ws; + } else { + parser_state to = { sSelector, Initial }; + parser_state subsequent = { sRuleset, Brace }; + + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + + return transition(parser, to, subsequent); + } + break; + case Brace: + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 || + token->data.ptr[0] != '{') { + /** \todo Parse error */ + assert(0 && "Expected {"); + } + + state->substate = WS; + /* Fall through */ + case WS: + ws: + error = eatWS(parser); + if (error != CSS_OK) + return error; + + break; + } + + parser_state to = { sRulesetEnd, Initial }; + + return transitionNoRet(parser, to); +} + +css_error parseRulesetEnd(css_parser *parser) +{ + enum { Initial = 0, DeclList = 1, Brace = 2, WS = 3 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* ruleset-end -> declaration decl-list '}' ws + * -> decl-list '}' ws + */ + + switch (state->substate) { + case Initial: + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + + if (token->type == CSS_TOKEN_IDENT) { + parser_state to = { sDeclaration, Initial }; + parser_state subsequent = { sRulesetEnd, DeclList }; + + return transition(parser, to, subsequent); + } + + state->substate = DeclList; + /* Fall through */ + case DeclList: + { + parser_state to = { sDeclList, Initial }; + parser_state subsequent = { sRulesetEnd, Brace }; + + return transition(parser, to, subsequent); + } + case Brace: + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 || + token->data.ptr[0] != '}') { + /** \todo parse error */ + assert(0 && "Expected }"); + } + + state->substate = WS; + /* Fall through */ + case WS: + error = eatWS(parser); + if (error != CSS_OK) + return error; + + break; + } + +#if !defined(NDEBUG) && defined(DEBUG_EVENTS) + printf("End ruleset\n"); +#endif + + return done(parser); +} + +css_error parseAtRule(css_parser *parser) +{ + enum { Initial = 0, WS = 1, Any = 2, AfterAny = 3 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* at-rule -> ATKEYWORD ws any0 at-rule-end */ + + switch (state->substate) { + case Initial: +#if !defined(NDEBUG) && defined(DEBUG_EVENTS) + printf("Begin at-rule\n"); +#endif + parserutils_vector_clear(parser->tokens); + + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + assert(token->type == CSS_TOKEN_ATKEYWORD); + + state->substate = WS; + /* Fall through */ + case WS: + error = eatWS(parser); + if (error != CSS_OK) + return error; + + state->substate = Any; + /* Fall through */ + case Any: + { + parser_state to = { sAny0, Initial }; + parser_state subsequent = { sAtRule, AfterAny }; + + return transition(parser, to, subsequent); + } + case AfterAny: + break; + } + + parser_state to = { sAtRuleEnd, Initial }; + + return transitionNoRet(parser, to); +} + +css_error parseAtRuleEnd(css_parser *parser) +{ + enum { Initial = 0, WS = 1, AfterBlock = 2 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* at-rule-end -> block + * -> ';' ws + */ + + switch (state->substate) { + case Initial: +#if !defined(NDEBUG) && defined(DEBUG_EVENTS) + parserutils_vector_dump(parser->tokens, __func__, tprinter); +#endif + + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + if (token->type != CSS_TOKEN_CHAR || token->data.len != 1) { + /** \todo parse error */ + assert(0 && "Expected { or ;"); + } + + if (token->data.ptr[0] == '{') { + parser_state to = { sBlock, Initial }; + parser_state subsequent = { sAtRuleEnd, AfterBlock }; + + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + + return transition(parser, to, subsequent); + } else if (token->data.ptr[0] != ';') { + /** \todo parse error */ + assert(0 && "Expected { or ;"); + } + + state->substate = WS; + /* Fall through */ + case WS: + error = eatWS(parser); + if (error != CSS_OK) + return error; + + break; + case AfterBlock: + break; + } + +#if !defined(NDEBUG) && defined(DEBUG_EVENTS) + printf("End at-rule\n"); +#endif + + return done(parser); +} + +css_error parseBlock(css_parser *parser) +{ + enum { Initial = 0, WS = 1, Content = 2, Brace = 3, WS2 = 4 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* block -> '{' ws block-content '}' ws */ + + switch (state->substate) { + case Initial: + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + +#if !defined(NDEBUG) && defined(DEBUG_EVENTS) + printf("Begin block\n"); +#endif + parserutils_vector_clear(parser->tokens); + + if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 || + token->data.ptr[0] != '{') { + /** \todo parse error */ + assert(0 && "Expected {"); + } + + state->substate = WS; + /* Fall through */ + case WS: + error = eatWS(parser); + if (error != CSS_OK) + return error; + + state->substate = Content; + /* Fall through */ + case Content: + { + parser_state to = { sBlockContent, Initial }; + parser_state subsequent = { sBlock, Brace }; + + return transition(parser, to, subsequent); + } + case Brace: + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 || + token->data.ptr[0] != '}') { + /** \todo parse error */ + assert(0 && "Expected }"); + } + + state->substate = WS2; + /* Fall through */ + case WS2: + error = eatWS(parser); + if (error != CSS_OK) + return error; + + break; + } + +#if !defined(NDEBUG) && defined(DEBUG_EVENTS) + printf("End block\n"); +#endif + parserutils_vector_clear(parser->tokens); + + return done(parser); +} + +css_error parseBlockContent(css_parser *parser) +{ + enum { Initial = 0, WS = 1 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* block-content -> any block-content + * -> block block-content + * -> ATKEYWORD ws block-content + * -> ';' ws block-content + * -> + */ + + while (1) { + switch (state->substate) { + case Initial: + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + if (token->type == CSS_TOKEN_ATKEYWORD) { + state->substate = WS; + } else if (token->type == CSS_TOKEN_CHAR) { + if (token->data.len == 1 && + token->data.ptr[0] == '{') { + /* Grammar ambiguity. Assume block */ + parser_state to = { sBlock, Initial }; + parser_state subsequent = + { sBlockContent, Initial }; + + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + +#if !defined(NDEBUG) && defined(DEBUG_EVENTS) + parserutils_vector_dump(parser->tokens, + __func__, tprinter); +#endif + + return transition(parser, to, + subsequent); + } else if (token->data.len == 1 && + token->data.ptr[0] == ';') { + /* Grammar ambiguity. Assume semi */ + state->substate = WS; + } else if (token->data.len == 1 && + token->data.ptr[0] == '}') { + /* Grammar ambiguity. Assume end */ + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + +#if !defined(NDEBUG) && defined(DEBUG_EVENTS) + parserutils_vector_dump(parser->tokens, + __func__, tprinter); +#endif + + return done(parser); + } + } else if (token->type == CSS_TOKEN_EOF) { + /** \todo parse error */ + + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + +#if !defined(NDEBUG) && defined(DEBUG_EVENTS) + parserutils_vector_dump(parser->tokens, + __func__, tprinter); +#endif + + return done(parser); + } + + if (state->substate == Initial) { + parser_state to = { sAny, Initial }; + parser_state subsequent = + { sBlockContent, Initial }; + + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + + return transition(parser, to, subsequent); + } + case WS: + error = eatWS(parser); + if (error != CSS_OK) + return error; + + state->substate = Initial; + } + } + + return done(parser); +} + +css_error parseSelector(css_parser *parser) +{ + enum { Initial = 0, AfterAny1 = 1 }; + parser_state *state = parserutils_stack_get_current(parser->states); + + /* selector -> any1 */ + + switch (state->substate) { + case Initial: + { + parser_state to = { sAny1, Initial }; + parser_state subsequent = { sSelector, AfterAny1 }; + + parserutils_vector_clear(parser->tokens); + + return transition(parser, to, subsequent); + } + case AfterAny1: +#if !defined(NDEBUG) && defined(DEBUG_EVENTS) + parserutils_vector_dump(parser->tokens, __func__, tprinter); +#endif + break; + } + + return done(parser); +} + +css_error parseDeclaration(css_parser *parser) +{ + enum { Initial = 0, Colon = 1, WS = 2, AfterValue1 = 3 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* declaration -> property ':' ws value1 */ + + switch (state->substate) { + case Initial: + { + parser_state to = { sProperty, Initial }; + parser_state subsequent = { sDeclaration, Colon }; + + parserutils_vector_clear(parser->tokens); + + return transition(parser, to, subsequent); + } + case Colon: + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 || + token->data.ptr[0] != ':') { + /** \todo parse error */ + assert(0 && "Expected :"); + } + + state->substate = WS; + /* Fall through */ + case WS: + { + parser_state to = { sValue1, Initial }; + parser_state subsequent = { sDeclaration, AfterValue1 }; + + error = eatWS(parser); + if (error != CSS_OK) + return error; + + return transition(parser, to, subsequent); + } + case AfterValue1: +#if !defined(NDEBUG) && defined(DEBUG_EVENTS) + parserutils_vector_dump(parser->tokens, __func__, tprinter); +#endif + break; + } + + return done(parser); +} + +css_error parseDeclList(css_parser *parser) +{ + enum { Initial = 0, WS = 1 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* decl-list -> ';' ws decl-list-end + * -> + */ + + switch (state->substate) { + case Initial: + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + if (token->type != CSS_TOKEN_CHAR || token->data.len != 1) { + /** \todo parse error */ + assert(0 && "Expected ; or }"); + } + + if (token->data.ptr[0] == '}') { + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + + return done(parser); + } else if (token->data.ptr[0] == ';') { + state->substate = WS; + } else { + /** \todo parse error */ + assert(0 && "Expected ; or }"); + } + + /* Fall through */ + case WS: + error = eatWS(parser); + if (error != CSS_OK) + return error; + + break; + } + + parser_state to = { sDeclListEnd, Initial }; + + return transitionNoRet(parser, to); +} + +css_error parseDeclListEnd(css_parser *parser) +{ + enum { Initial = 0, AfterDeclaration = 1 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* decl-list-end -> declaration decl-list + * -> decl-list + */ + + switch (state->substate) { + case Initial: + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + if (token->type == CSS_TOKEN_IDENT) { + parser_state to = { sDeclaration, Initial }; + parser_state subsequent = + { sDeclListEnd, AfterDeclaration }; + + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + + return transition(parser, to, subsequent); + } else if (token->type != CSS_TOKEN_CHAR || + token->data.len != 1 || + (token->data.ptr[0] != ';' && + token->data.ptr[0] != '}')) { + /** \todo parse error */ + assert(0 && "Expected ; or }"); + } else { + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + } + + state->substate = AfterDeclaration; + /* Fall through */ + case AfterDeclaration: + break; + } + + parser_state to = { sDeclList, Initial }; + + return transitionNoRet(parser, to); +} + +css_error parseProperty(css_parser *parser) +{ + enum { Initial = 0, WS = 1 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* property -> IDENT ws */ + + switch (state->substate) { + case Initial: + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + if (token->type != CSS_TOKEN_IDENT) { + /** \todo parse error */ + assert(0 && "Expected IDENT"); + } + + state->substate = WS; + /* Fall through */ + case WS: + error = eatWS(parser); + if (error != CSS_OK) + return error; + + break; + } + + return done(parser); +} + +css_error parseValue1(css_parser *parser) +{ + enum { Initial = 0, AfterValue = 1 }; + parser_state *state = parserutils_stack_get_current(parser->states); + + /* value1 -> value value0 */ + + switch (state->substate) { + case Initial: + { + parser_state to = { sValue, Initial }; + parser_state subsequent = { sValue1, AfterValue }; + + return transition(parser, to, subsequent); + } + case AfterValue: + break; + } + + parser_state to = { sValue0, Initial }; + + return transitionNoRet(parser, to); +} + +css_error parseValue0(css_parser *parser) +{ + enum { Initial = 0, AfterValue = 1 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* value0 -> value value0 + * -> + */ + + while (1) { + switch (state->substate) { + case Initial: + { + parser_state to = { sValue, Initial }; + parser_state subsequent = { sValue0, AfterValue }; + + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + + /* Grammar ambiguity -- assume ';' or '}' mark end */ + if (token->type == CSS_TOKEN_CHAR && + token->data.len == 1 && + (token->data.ptr[0] == ';' || + token->data.ptr[0] == '}')) { + return done(parser); + } + + return transition(parser, to, subsequent); + } + case AfterValue: + state->substate = Initial; + + break; + } + } + + return done(parser); +} + +css_error parseValue(css_parser *parser) +{ + enum { Initial = 0, WS = 1 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* value -> any + * -> block + * -> ATKEYWORD ws + */ + + switch (state->substate) { + case Initial: + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + if (token->type == CSS_TOKEN_ATKEYWORD) { + state->substate = WS; + } else if (token->type == CSS_TOKEN_CHAR && + token->data.len == 1 && + token->data.ptr[0] == '{') { + /* Grammar ambiguity. Assume block. */ + parser_state to = { sBlock, Initial }; + + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + + return transitionNoRet(parser, to); + } else { + parser_state to = { sAny, Initial }; + + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + + return transitionNoRet(parser, to); + } + + /* Fall through */ + case WS: + error = eatWS(parser); + if (error != CSS_OK) + return error; + + break; + } + + return done(parser); +} + +css_error parseAny0(css_parser *parser) +{ + enum { Initial = 0, AfterAny = 1 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* any0 -> any any0 + * -> + */ + + while (1) { + switch (state->substate) { + case Initial: + { + parser_state to = { sAny, Initial }; + parser_state subsequent = { sAny0, AfterAny }; + + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + + /* Grammar ambiguity: + * assume '{', ';', ')', ']' mark end */ + if (token->type == CSS_TOKEN_CHAR && + token->data.len == 1 && + (token->data.ptr[0] == '{' || + token->data.ptr[0] == ';' || + token->data.ptr[0] == ')' || + token->data.ptr[0] == ']')) { + return done(parser); + } + + return transition(parser, to, subsequent); + } + case AfterAny: + state->substate = Initial; + + break; + } + } + + return done(parser); +} + +css_error parseAny1(css_parser *parser) +{ + enum { Initial = 0, AfterAny = 1, AfterAny0 = 2 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* any1 -> any any0 */ + + switch (state->substate) { + case Initial: + { + parser_state to = { sAny, Initial }; + parser_state subsequent = { sAny1, AfterAny }; + + return transition(parser, to, subsequent); + } + case AfterAny: + { + parser_state to = { sAny0, Initial }; + parser_state subsequent = { sAny1, AfterAny0 }; + + return transition(parser, to, subsequent); + } + case AfterAny0: + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + error = pushBack(parser, token); + if (error != CSS_OK) + return error; + + /* Grammar ambiguity: any0 can be followed by + * '{', ';', ')', ']'. any1 can only be followed by '{'. */ + if (token->type == CSS_TOKEN_CHAR && token->data.len == 1) { + if (token->data.ptr[0] == ';' || + token->data.ptr[0] == ')' || + token->data.ptr[0] == ']') { + parser_state to = { sAny, Initial }; + parser_state subsequent = { sAny1, AfterAny }; + + return transition(parser, to, subsequent); + } else if (token->data.ptr[0] != '{') { + /** \todo parse error */ + assert(0 && "Expected {, ;, ), or ]"); + } + } else { + /** \todo parse error */ + assert(0 && "Expected {, ;, ), or ]"); + } + } + + return done(parser); +} + +css_error parseAny(css_parser *parser) +{ + enum { Initial = 0, WS = 1, AfterAny0 = 2, WS2 = 3 }; + parser_state *state = parserutils_stack_get_current(parser->states); + const css_token *token; + css_error error; + + /* any -> IDENT ws + * -> NUMBER ws + * -> PERCENTAGE ws + * -> DIMENSION ws + * -> STRING ws + * -> CHAR ws + * -> URI ws + * -> HASH ws + * -> UNICODE-RANGE ws + * -> INCLUDES ws + * -> DASHMATCH ws + * -> PREFIXMATCH ws + * -> SUFFIXMATCH ws + * -> SUBSTRINGMATCH ws + * -> FUNCTION ws any0 ')' ws + * -> '(' ws any0 ')' ws + * -> '[' ws any0 ']' ws + */ + + switch (state->substate) { + case Initial: + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + if (token->type != CSS_TOKEN_IDENT && + token->type != CSS_TOKEN_NUMBER && + token->type != CSS_TOKEN_PERCENTAGE && + token->type != CSS_TOKEN_DIMENSION && + token->type != CSS_TOKEN_STRING && + token->type != CSS_TOKEN_CHAR && + token->type != CSS_TOKEN_URI && + token->type != CSS_TOKEN_HASH && + token->type != CSS_TOKEN_UNICODE_RANGE && + token->type != CSS_TOKEN_INCLUDES && + token->type != CSS_TOKEN_DASHMATCH && + token->type != CSS_TOKEN_PREFIXMATCH && + token->type != CSS_TOKEN_SUFFIXMATCH && + token->type != CSS_TOKEN_SUBSTRINGMATCH && + token->type != CSS_TOKEN_FUNCTION) { + /** \todo parse error */ + assert(0 && "Invalid any"); + } + + if (token->type == CSS_TOKEN_FUNCTION) { + parser->match_char = ')'; + state->substate = WS; + } else if (token->type == CSS_TOKEN_CHAR && + token->data.len == 1 && + (token->data.ptr[0] == '(' || + token->data.ptr[0] == '[')) { + parser->match_char = + token->data.ptr[0] == '(' ? ')' : ']'; + state->substate = WS; + } + + state->substate = WS2; + /* Fall through */ + case WS: + case WS2: + ws2: + { + parser_state to = { sAny0, Initial }; + parser_state subsequent = { sAny, AfterAny0 }; + + error = eatWS(parser); + if (error != CSS_OK) + return error; + + if (state->substate == WS2) + break; + + return transition(parser, to, subsequent); + } + case AfterAny0: + { + parser_state to = { sAny0, Initial }; + parser_state subsequent = { sAny, AfterAny0 }; + + error = getToken(parser, &token); + if (error != CSS_OK) + return error; + + /* Match correct close bracket (grammar ambiguity) */ + if (token->type == CSS_TOKEN_CHAR && token->data.len == 1 && + token->data.ptr[0] == parser->match_char) { + state->substate = WS2; + goto ws2; + } + + return transition(parser, to, subsequent); + } + } + + return done(parser); +} + + +#ifndef NDEBUG +#ifdef DEBUG_STACK +static void printer(void *item) +{ + parser_state *s = item; + + printf("[%d %d]", s->state, s->substate); +} +#endif + +#ifdef DEBUG_EVENTS +static void tprinter(void *token) +{ + css_token *t = token; + + if (t->data.ptr) + printf("%d: %.*s", t->type, t->data.len, t->data.ptr); + else + printf("%d", t->type); +} +#endif +#endif + diff --git a/src/parse/parse.h b/src/parse/parse.h new file mode 100644 index 0000000..d4ae3c4 --- /dev/null +++ b/src/parse/parse.h @@ -0,0 +1,45 @@ +/* + * This file is part of LibCSS. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org> + */ + +#ifndef css_parse_parse_h_ +#define css_parse_parse_h_ + +#include <libcss/functypes.h> +#include <libcss/types.h> + +typedef struct css_parser css_parser; + +/** + * Parser option types + */ +typedef enum css_parser_opttype { + CSS_PARSER_QUIRKS, +} css_parser_opttype; + +/** + * Parser option parameters + */ +typedef union css_parser_optparams { + bool quirks; +} css_parser_optparams; + +css_parser *css_parser_create(css_stylesheet *sheet, const char *charset, + css_charset_source cs_source, css_alloc alloc, void *pw); +void css_parser_destroy(css_parser *parser); + +css_error css_parser_setopt(css_parser *parser, css_parser_opttype type, + css_parser_optparams *params); + +css_error css_parser_parse_chunk(css_parser *parser, const uint8_t *data, + size_t len); +css_error css_parser_completed(css_parser *parser); + +const char *css_parser_read_charset(css_parser *parser, + css_charset_source *source); + +#endif + @@ -6,6 +6,7 @@ libcss Library initialisation/finalisation csdetect Character set detection csdetect lex Lexing css lex-auto Automated lexer tests lex +parse Parsing css # Regression tests diff --git a/test/Makefile b/test/Makefile index e2a97c6..a8363f4 100644 --- a/test/Makefile +++ b/test/Makefile @@ -35,7 +35,7 @@ d := $(DIR) CFLAGS := $(CFLAGS) -I$(TOP)/src/ -I$(d) # Tests -TESTS_$(d) := csdetect lex lex-auto libcss +TESTS_$(d) := csdetect lex lex-auto libcss parse TESTS_$(d) := $(TESTS_$(d)) # Items for top-level makefile to use diff --git a/test/data/css/INDEX b/test/data/css/INDEX index 531d4a5..4d46ba2 100644 --- a/test/data/css/INDEX +++ b/test/data/css/INDEX @@ -4,3 +4,4 @@ simple.css Reasonably simple CSS file (semantically invalid) allzengarden.css All CSS Zen Garden stylesheets concatenated +blocks.css Basic blocks and at-rule syntax diff --git a/test/data/css/blocks.css b/test/data/css/blocks.css new file mode 100644 index 0000000..9ecd720 --- /dev/null +++ b/test/data/css/blocks.css @@ -0,0 +1,10 @@ +@charset "UTF-8"; + +@import "simple.css"; + +@media screen +{ + body { background-color: green; } +} + + diff --git a/test/parse.c b/test/parse.c new file mode 100644 index 0000000..f24aa76 --- /dev/null +++ b/test/parse.c @@ -0,0 +1,79 @@ +#include <inttypes.h> +#include <stdio.h> + +#include <libcss/libcss.h> + +#include "charset/detect.h" +#include "utils/utils.h" + +#include "parse/parse.h" + +#include "testutils.h" + +static void *myrealloc(void *ptr, size_t len, void *pw) +{ + UNUSED(pw); + + return realloc(ptr, len); +} + +int main(int argc, char **argv) +{ + css_parser *parser; + FILE *fp; + size_t len, origlen; +#define CHUNK_SIZE (4096) + uint8_t buf[CHUNK_SIZE]; + css_error error; + + if (argc != 3) { + printf("Usage: %s <aliases_file> <filename>\n", argv[0]); + return 1; + } + + /* Initialise library */ + assert(css_initialise(argv[1], myrealloc, NULL) == CSS_OK); + + parser = css_parser_create((css_stylesheet *) 10, + "UTF-8", CSS_CHARSET_DICTATED, myrealloc, NULL); + assert(parser != NULL); + + fp = fopen(argv[2], "rb"); + if (fp == NULL) { + printf("Failed opening %s\n", argv[2]); + return 1; + } + + fseek(fp, 0, SEEK_END); + origlen = len = ftell(fp); + fseek(fp, 0, SEEK_SET); + + while (len >= CHUNK_SIZE) { + fread(buf, 1, CHUNK_SIZE, fp); + + error = css_parser_parse_chunk(parser, buf, CHUNK_SIZE); + assert(error == CSS_OK || error == CSS_NEEDDATA); + } + + if (len > 0) { + fread(buf, 1, len, fp); + + error = css_parser_parse_chunk(parser, buf, len); + assert(error == CSS_OK || error == CSS_NEEDDATA); + + len = 0; + } + + fclose(fp); + + assert(css_parser_completed(parser) == CSS_OK); + + css_parser_destroy(parser); + + assert(css_finalise(myrealloc, NULL) == CSS_OK); + + printf("PASS\n"); + + return 0; +} + |