From d7a4adf48149e0d6d1ae0cc79c1a6d73aa02dd3f Mon Sep 17 00:00:00 2001 From: James Bursa Date: Sat, 1 May 2004 17:48:38 +0000 Subject: [project @ 2004-05-01 17:48:38 by bursa] CSS parsing improvements: new tokeniser using re2c, improve memory-exhaustion behaviour, plug leaks, reduce memory usage, clean up code, add source documention. svn path=/import/netsurf/; revision=806 --- css/parser.y | 260 +++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 180 insertions(+), 80 deletions(-) (limited to 'css/parser.y') diff --git a/css/parser.y b/css/parser.y index b373ad585..81a2772c1 100644 --- a/css/parser.y +++ b/css/parser.y @@ -2,12 +2,12 @@ * This file is part of NetSurf, http://netsurf.sourceforge.net/ * Licensed under the GNU General Public License, * http://www.opensource.org/licenses/gpl-license - * Copyright 2003 James Bursa + * Copyright 2004 James Bursa */ -/* +/** \file -CSS parser using the lemon parser generator +CSS parser using the lemon parser generator. see CSS2 Specification, chapter 4 http://www.w3.org/TR/REC-CSS2/syndata.html, @@ -40,9 +40,10 @@ statement ::= at_rule. at_rule ::= ATKEYWORD any_list block. at_rule ::= ATKEYWORD(A) any_list(B) SEMI. - { if (strcasecmp(A, "@import") == 0) + { if ((A.length == 7) && (strncasecmp(A.text, "@import", 7) == 0) + && B) css_atimport(param->stylesheet, B); - free(A); css_free_node(B); } + css_free_node(B); } block ::= LBRACE block_body RBRACE. block_body ::= . @@ -52,7 +53,10 @@ block_body ::= block_body ATKEYWORD. block_body ::= block_body SEMI. ruleset ::= selector_list(A) LBRACE declaration_list(B) RBRACE. - { css_add_ruleset(param->stylesheet, A, B); + { if (A && B) + css_add_ruleset(param->stylesheet, A, B); + else + css_free_selector(A); css_free_node(B); } ruleset ::= LBRACE declaration_list(A) RBRACE. /* this form of ruleset not used in CSS2 @@ -65,13 +69,28 @@ ruleset ::= any_list_1(A) LBRACE declaration_list(B) RBRACE. selector_list(A) ::= selector(B). { A = B; } selector_list(A) ::= selector_list(B) COMMA selector(C). - { C->next = B; A = C; } + { if (B && C) { + C->next = B; + A = C; + } else { + css_free_selector(B); + css_free_selector(C); + A = 0; + } } selector(A) ::= simple_selector(B). { A = B; } selector(A) ::= selector(B) css_combinator(C) simple_selector(D). - { D->right = B; D->comb = C; A = D; - A->specificity += B->specificity; } + { if (B && D) { + D->combiner = B; + D->comb = C; + D->specificity += B->specificity; + A = D; + } else { + css_free_selector(B); + css_free_selector(D); + A = 0; + } } css_combinator(A) ::= . { A = CSS_COMB_ANCESTOR; } @@ -81,64 +100,106 @@ css_combinator(A) ::= GT. { A = CSS_COMB_PARENT; } simple_selector(A) ::= element_name(B) detail_list(C). - { A = css_new_node(CSS_NODE_SELECTOR, B, C, 0); - A->specificity = 1 + C->specificity; } + { if (C && (A = css_new_selector(CSS_SELECTOR_ELEMENT, + B.text, B.length))) { + A->detail = C; + A->specificity = 1 + C->specificity; + } else { + param->memory_error = true; + css_free_selector(C); + A = 0; + } } simple_selector(A) ::= element_name(B). - { A = css_new_node(CSS_NODE_SELECTOR, B, 0, 0); - A->specificity = 1; } + { if ((A = css_new_selector(CSS_SELECTOR_ELEMENT, + B.text, B.length))) + A->specificity = 1; + else + param->memory_error = true; + } simple_selector(A) ::= detail_list(C). - { A = css_new_node(CSS_NODE_SELECTOR, 0, C, 0); - A->specificity = C->specificity; } + { if (C && (A = css_new_selector(CSS_SELECTOR_ELEMENT, 0, 0))) { + A->detail = C; + A->specificity = C->specificity; + } else { + param->memory_error = true; + css_free_selector(C); + A = 0; + } } element_name(A) ::= IDENT(B). { A = B; } element_name(A) ::= ASTERISK. - { A = 0; } + { A.text = 0; } detail_list(A) ::= detail(B). { A = B; } detail_list(A) ::= detail(B) detail_list(C). - { A = B; A->specificity += C->specificity; A->next = C; } + { if (B && C) { + B->specificity += C->specificity; + B->next = C; + A = B; + } else { + css_free_selector(B); + css_free_selector(C); + A = 0; + } } detail(A) ::= HASH(B). - { A = css_new_node(CSS_NODE_ID, B, 0, 0); - A->specificity = 0x10000; } + { A = css_new_selector(CSS_SELECTOR_ID, B.text+1, B.length-1); + if (A) A->specificity = 0x10000; + else param->memory_error = true; } detail(A) ::= DOT IDENT(B). - { A = css_new_node(CSS_NODE_CLASS, B, 0, 0); - A->specificity = 0x100; } + { A = css_new_selector(CSS_SELECTOR_CLASS, B.text, B.length); + if (A) A->specificity = 0x100; + else param->memory_error = true; } detail(A) ::= LBRAC IDENT(B) RBRAC. - { A = css_new_node(CSS_NODE_ATTRIB, B, 0, 0); - A->specificity = 0x100; } + { A = css_new_selector(CSS_SELECTOR_ATTRIB, B.text, B.length); + if (A) A->specificity = 0x100; + else param->memory_error = true; } detail(A) ::= LBRAC IDENT(B) EQUALS IDENT(C) RBRAC. - { A = css_new_node(CSS_NODE_ATTRIB_EQ, B, 0, 0); A->data2 = C; - A->specificity = 0x100; } + { A = css_new_selector(CSS_SELECTOR_ATTRIB_EQ, B.text, B.length); + if (A) { A->data2 = C.text; A->data2_length = C.length; + A->specificity = 0x100; } + else param->memory_error = true; } detail(A) ::= LBRAC IDENT(B) EQUALS STRING(C) RBRAC. - { A = css_new_node(CSS_NODE_ATTRIB_EQ, B, 0, 0); A->data2 = css_unquote(C); - A->specificity = 0x100; } + { A = css_new_selector(CSS_SELECTOR_ATTRIB_EQ, B.text, B.length); + if (A) { A->data2 = C.text + 1; A->data2_length = C.length - 2; + A->specificity = 0x100; } + else param->memory_error = true; } detail(A) ::= LBRAC IDENT(B) INCLUDES IDENT(C) RBRAC. - { A = css_new_node(CSS_NODE_ATTRIB_INC, B, 0, 0); A->data2 = C; - A->specificity = 0x100; } + { A = css_new_selector(CSS_SELECTOR_ATTRIB_INC, B.text, B.length); + if (A) { A->data2 = C.text; A->data2_length = C.length; + A->specificity = 0x100; } + else param->memory_error = true; } detail(A) ::= LBRAC IDENT(B) INCLUDES STRING(C) RBRAC. - { A = css_new_node(CSS_NODE_ATTRIB_INC, B, 0, 0); A->data2 = css_unquote(C); - A->specificity = 0x100; } + { A = css_new_selector(CSS_SELECTOR_ATTRIB_INC, B.text, B.length); + if (A) { A->data2 = C.text + 1; A->data2_length = C.length - 2; + A->specificity = 0x100; } + else param->memory_error = true; } detail(A) ::= LBRAC IDENT(B) DASHMATCH IDENT(C) RBRAC. - { A = css_new_node(CSS_NODE_ATTRIB_DM, B, 0, 0); A->data2 = C; - A->specificity = 0x100; } + { A = css_new_selector(CSS_SELECTOR_ATTRIB_DM, B.text, B.length); + if (A) { A->data2 = C.text; A->data2_length = C.length; + A->specificity = 0x100; } + else param->memory_error = true; } detail(A) ::= LBRAC IDENT(B) DASHMATCH STRING(C) RBRAC. - { A = css_new_node(CSS_NODE_ATTRIB_DM, B, 0, 0); A->data2 = css_unquote(C); - A->specificity = 0x100; } + { A = css_new_selector(CSS_SELECTOR_ATTRIB_DM, B.text, B.length); + if (A) { A->data2 = C.text + 1; A->data2_length = C.length - 2; + A->specificity = 0x100; } + else param->memory_error = true; } detail(A) ::= COLON IDENT(B). - { if (strcasecmp(B, "link") == 0) { - A = css_new_node(CSS_NODE_ATTRIB, xstrdup("href"), 0, 0); - A->specificity = 0x100; - free(B); + { if (B.length == 4 && strncasecmp(B.text, "link", 4) == 0) { + A = css_new_selector(CSS_SELECTOR_ATTRIB, "href", 4); + if (A) A->specificity = 0x100; + else param->memory_error = true; } else { - A = css_new_node(CSS_NODE_PSEUDO, B, 0, 0); - A->specificity = 0x100; + A = css_new_selector(CSS_SELECTOR_PSEUDO, B.text, B.length); + if (A) A->specificity = 0x100; + else param->memory_error = true; } } detail(A) ::= COLON FUNCTION(B) IDENT RPAREN. - { A = css_new_node(CSS_NODE_PSEUDO, B, 0, 0); - A->specificity = 0x100; } + { A = css_new_selector(CSS_SELECTOR_PSEUDO, B.text, B.length); + if (A) A->specificity = 0x100; + else param->memory_error = true; } declaration_list(A) ::= . { A = 0; } @@ -150,7 +211,14 @@ declaration_list(A) ::= declaration(B) SEMI declaration_list(C). { if (B) { B->next = C; A = B; } else { A = C; } } declaration(A) ::= property(B) COLON value(C). - { A = css_new_node(CSS_NODE_DECLARATION, B, C, 0); } + { if (C && (A = css_new_node(CSS_NODE_DECLARATION, + B.text, B.length))) + A->value = C; + else { + param->memory_error = true; + css_free_node(C); + A = 0; + } } declaration(A) ::= any_list_1(B). /* malformed declaration: ignore */ { A = 0; css_free_node(B); } @@ -160,7 +228,8 @@ property(A) ::= IDENT(B). value(A) ::= any(B). { A = B; } value(A) ::= any(B) value(C). - { B->next = C; A = B; } + { if (B && C) { B->next = C; A = B; } + else { css_free_node(B); css_free_node(C); A = 0; } } value(A) ::= value(B) block. { A = B; } value(A) ::= value(B) ATKEYWORD. @@ -170,70 +239,100 @@ value(A) ::= value(B) ATKEYWORD. any_list(A) ::= . { A = 0; } any_list(A) ::= any(B) any_list(C). - { B->next = C; A = B; } + { if (B) { B->next = C; A = B; } + else { css_free_node(B); css_free_node(C); A = 0; } } any_list_1(A) ::= any(B) any_list(C). - { B->next = C; A = B; } + { if (B) { B->next = C; A = B; } + else { css_free_node(B); css_free_node(C); A = 0; } } any(A) ::= IDENT(B). - { A = css_new_node(CSS_NODE_IDENT, B, 0, 0); } + { A = css_new_node(CSS_NODE_IDENT, B.text, B.length); + if (!A) param->memory_error = true; } any(A) ::= NUMBER(B). - { A = css_new_node(CSS_NODE_NUMBER, B, 0, 0); } + { A = css_new_node(CSS_NODE_NUMBER, B.text, B.length); + if (!A) param->memory_error = true; } any(A) ::= PERCENTAGE(B). - { A = css_new_node(CSS_NODE_PERCENTAGE, B, 0, 0); } + { A = css_new_node(CSS_NODE_PERCENTAGE, B.text, B.length); + if (!A) param->memory_error = true; } any(A) ::= DIMENSION(B). - { A = css_new_node(CSS_NODE_DIMENSION, B, 0, 0); } + { A = css_new_node(CSS_NODE_DIMENSION, B.text, B.length); + if (!A) param->memory_error = true; } any(A) ::= STRING(B). - { A = css_new_node(CSS_NODE_STRING, css_unquote(B), 0, 0); } + { A = css_new_node(CSS_NODE_STRING, B.text + 1, B.length - 2); + if (!A) param->memory_error = true; } any(A) ::= DELIM(B). - { A = css_new_node(CSS_NODE_DELIM, B, 0, 0); } + { A = css_new_node(CSS_NODE_DELIM, B.text, B.length); + if (!A) param->memory_error = true; } any(A) ::= URI(B). - { A = css_new_node(CSS_NODE_URI, B, 0, 0); } + { A = css_new_node(CSS_NODE_URI, B.text, B.length); + if (!A) param->memory_error = true; } any(A) ::= HASH(B). - { A = css_new_node(CSS_NODE_HASH, B, 0, 0); } + { A = css_new_node(CSS_NODE_HASH, B.text, B.length); + if (!A) param->memory_error = true; } any(A) ::= UNICODE_RANGE(B). - { A = css_new_node(CSS_NODE_UNICODE_RANGE, B, 0, 0); } + { A = css_new_node(CSS_NODE_UNICODE_RANGE, B.text, B.length); + if (!A) param->memory_error = true; } any(A) ::= INCLUDES. - { A = css_new_node(CSS_NODE_INCLUDES, 0, 0, 0); } + { A = css_new_node(CSS_NODE_INCLUDES, 0, 0); + if (!A) param->memory_error = true; } any(A) ::= FUNCTION(B). - { A = css_new_node(CSS_NODE_FUNCTION, B, 0, 0); } + { A = css_new_node(CSS_NODE_FUNCTION, B.text, B.length); + if (!A) param->memory_error = true; } any(A) ::= DASHMATCH. - { A = css_new_node(CSS_NODE_DASHMATCH, 0, 0, 0); } + { A = css_new_node(CSS_NODE_DASHMATCH, 0, 0); + if (!A) param->memory_error = true; } any(A) ::= COLON. - { A = css_new_node(CSS_NODE_COLON, 0, 0, 0); } + { A = css_new_node(CSS_NODE_COLON, 0, 0); + if (!A) param->memory_error = true; } any(A) ::= COMMA. - { A = css_new_node(CSS_NODE_COMMA, 0, 0, 0); } + { A = css_new_node(CSS_NODE_COMMA, 0, 0); + if (!A) param->memory_error = true; } any(A) ::= DOT. - { A = css_new_node(CSS_NODE_DOT, 0, 0, 0); } + { A = css_new_node(CSS_NODE_DOT, 0, 0); + if (!A) param->memory_error = true; } any(A) ::= PLUS. - { A = css_new_node(CSS_NODE_PLUS, 0, 0, 0); } + { A = css_new_node(CSS_NODE_PLUS, 0, 0); + if (!A) param->memory_error = true; } any(A) ::= GT. - { A = css_new_node(CSS_NODE_GT, 0, 0, 0); } + { A = css_new_node(CSS_NODE_GT, 0, 0); + if (!A) param->memory_error = true; } any(A) ::= LPAREN any_list(B) RPAREN. - { A = css_new_node(CSS_NODE_PAREN, 0, B, 0); } + { if ((A = css_new_node(CSS_NODE_PAREN, 0, 0))) + A->value = B; + else { + param->memory_error = true; + css_free_node(B); + A = 0; + } } any(A) ::= LBRAC any_list(B) RBRAC. - { A = css_new_node(CSS_NODE_BRAC, 0, B, 0); } + { if ((A = css_new_node(CSS_NODE_BRAC, 0, 0))) + A->value = B; + else { + param->memory_error = true; + css_free_node(B); + A = 0; + } } any(A) ::= ASTERISK(B). - { A = css_new_node(CSS_NODE_DELIM, B, 0, 0); } + { A = css_new_node(CSS_NODE_DELIM, B.text, B.length); + if (!A) param->memory_error = true; } /* lemon directives */ -%extra_argument { struct parse_params *param } +%extra_argument { struct css_parser_params *param } %include { #define CSS_INTERNALS -#include "netsurf/css/scanner.h" #include "netsurf/css/css.h" #include "netsurf/utils/utils.h" } %name css_parser_ -%token_type { char* } -%token_destructor { xfree($$); } +%token_type { struct css_parser_token } -%type selector_list { struct css_node * } -%type selector { struct css_node * } +%type selector_list { struct css_selector * } +%type selector { struct css_selector * } %type css_combinator { css_combinator } -%type simple_selector { struct css_node * } -%type detail_list { struct css_node * } -%type detail { struct css_node * } +%type simple_selector { struct css_selector * } +%type detail_list { struct css_selector * } +%type detail { struct css_selector * } %type declaration_list { struct css_node * } %type declaration { struct css_node * } %type value { struct css_node * } @@ -241,10 +340,11 @@ any(A) ::= ASTERISK(B). %type any_list_1 { struct css_node * } %type any { struct css_node * } -%destructor selector_list { css_free_node($$); } -%destructor selector { css_free_node($$); } -%destructor simple_selector { css_free_node($$); } -%destructor detail_list { css_free_node($$); } +%destructor selector_list { css_free_selector($$); } +%destructor selector { css_free_selector($$); } +%destructor simple_selector { css_free_selector($$); } +%destructor detail_list { css_free_selector($$); } +%destructor detail { css_free_selector($$); } %destructor declaration_list { css_free_node($$); } %destructor declaration { css_free_node($$); } %destructor value { css_free_node($$); } -- cgit v1.2.3