summaryrefslogtreecommitdiff
path: root/css/scanner.l
diff options
context:
space:
mode:
Diffstat (limited to 'css/scanner.l')
-rw-r--r--css/scanner.l145
1 files changed, 82 insertions, 63 deletions
diff --git a/css/scanner.l b/css/scanner.l
index 1f8cc1d92..36347b3a8 100644
--- a/css/scanner.l
+++ b/css/scanner.l
@@ -2,78 +2,97 @@
* This file is part of NetSurf, http://netsurf.sourceforge.net/
* Licensed under the GNU General Public License,
* http://www.opensource.org/licenses/gpl-license
- * Copyright 2003 James Bursa <bursa@users.sourceforge.net>
+ * Copyright 2004 James Bursa <bursa@users.sourceforge.net>
*/
-%{
-#include "parser.h"
-%}
+/** \file
+ * CSS tokeniser using re2c.
+ *
+ * see CSS2 Specification, chapter 4
+ * http://www.w3.org/TR/REC-CSS2/syndata.html,
+ * and errata
+ * http://www.w3.org/Style/css2-updates/REC-CSS2-19980512-errata
+ */
+
+#include <stdbool.h>
+#define CSS_INTERNALS
+#include "netsurf/css/css.h"
+#include "netsurf/css/parser.h"
+
+#define YYCTYPE unsigned char
+#define YYCURSOR (*buffer)
+#define YYLIMIT end
+#define YYMARKER marker
+#define YYFILL(n) { return 0; }
-%option 8bit
-%option batch
-%option case-insensitive
-%option header-file="scanner.h"
-%option outfile="scanner.c"
-%option prefix="css_"
-%option reentrant
-%option never-interactive
-%option noyywrap
-%option yylineno
-/* see CSS2 Specification, chapter 4
- http://www.w3.org/TR/REC-CSS2/syndata.html,
- and errata
- http://www.w3.org/Style/css2-updates/REC-CSS2-19980512-errata */
+/**
+ * Identify a CSS source token.
+ *
+ * \param buffer source to tokenise, updated to new position
+ * \param end end of source
+ * \param token_text updated to start of recognized token
+ * \return token number
+ */
+
+int css_tokenise(unsigned char **buffer, unsigned char *end,
+ unsigned char **token_text)
+{
+ unsigned char *marker;
-ident {nmstart}{nmchar}*
-name {nmchar}+
-nmstart [a-zA-Z_]|{nonascii}|{escape}
-nonascii [\200-\377]
-unicode \\[0-9a-f]{1,6}[ \n\r\t\f]?
-escape {unicode}|\\[ -~\200-\377]
-nmchar [-a-zA-Z0-9_]|{nonascii}|{escape}
-num [+-]?[0-9]+|[0-9]*\.[0-9]+
-string {string1}|{string2}
-string1 \"([\t !#$%&(-~]|\\{nl}|\'|{nonascii}|{escape})*\"
-string2 \'([\t !#$%&(-~]|\\{nl}|\"|{nonascii}|{escape})*\'
-nl \n|\r\n|\r|\f
-w [ \t\r\n\f]*
+start:
+ *token_text = YYCURSOR;
-%%
+/*!re2c
+nonascii = [\200-\377];
+unicode = "\\" [0-9a-f]+ [ \n\r\t\f]?;
+escape = unicode | "\\" [ -~\200-\377];
+nmchar = [-a-zA-Z0-9_] | nonascii | escape;
+nmstart = [a-zA-Z_] | nonascii | escape;
+ident = nmstart nmchar*;
+name = nmchar+;
+num = [+-]? [0-9]+ | [0-9]* "." [0-9]+;
+nl = "\n" | "\r\n" | "\r" | "\f";
+string1 = "\"" ([\t !#$%&(-~] | "\\" nl | "'" | nonascii | escape)* "\"";
+string2 = "'" ([\t !#$%&(-~] | "\\" nl | "\""| nonascii | escape)* "'";
+string = string1 | string2;
+w = [ \t\r\n\f]*;
+any = [\000-\377];
-{ident} { return IDENT; }
-@{ident} { return ATKEYWORD; }
-{string} { return STRING; }
-#{name} { return HASH; }
-{num} { return NUMBER; }
-{num}% { return PERCENTAGE; }
-{num}{ident} { return DIMENSION; }
-url\({w}{string}{w}\)|url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\) {
- return URI; }
-U\+[0-9A-F?]{1,6}(-[0-9A-F]{1,6})? {
- return UNICODE_RANGE; }
-"<!--" /* ignore CDO */
-"-->" /* ignore CDC */
-; { return SEMI; }
-\{ { return LBRACE; }
-\} { return RBRACE; }
-\( { return LPAREN; }
-\) { return RPAREN; }
-\[ { return LBRAC; }
-\] { return RBRAC; }
-[ \t\r\n\f]+ /* ignore whitespace */
-\/\*[^*]*\*+([^/][^*]*\*+)*\/ /* ignore comments */
-{ident}\( { return FUNCTION; }
-= { return EQUALS; }
-~= { return INCLUDES; }
+ident { return IDENT; }
+"@" ident { return ATKEYWORD; }
+string { return STRING; }
+"#" name { return HASH; }
+num { return NUMBER; }
+num "%" { return PERCENTAGE; }
+num ident { return DIMENSION; }
+"url(" w string w ")" | "url(" w ([!#$%&*-~]|nonascii|escape)* w ")"
+ { return URI; }
+"U+" [0-9A-F?]+ ("-" [0-9A-F]+ )?
+ { return UNICODE_RANGE; }
+"<!--" { goto start; /* ignore CDO */ }
+"-->" { goto start; /* ignore CDC */ }
+";" { return SEMI; }
+"{" { return LBRACE; }
+"}" { return RBRACE; }
+"(" { return LPAREN; }
+")" { return RPAREN; }
+"[" { return LBRAC; }
+"]" { return RBRAC; }
+[ \t\r\n\f]+ { goto start; /* ignore whitespace */ }
+"/*" (any\[*])* "*"+ ((any\[/]) (any\[*])* "*"+)* "/"
+ { goto start; /* ignore comments */ }
+ident "(" { return FUNCTION; }
+"=" { return EQUALS; }
+"~=" { return INCLUDES; }
"|=" { return DASHMATCH; }
-: { return COLON; }
-, { return COMMA; }
+":" { return COLON; }
+"," { return COMMA; }
"+" { return PLUS; }
-> { return GT; }
+">" { return GT; }
"." { return DOT; }
"*" { return ASTERISK; }
-. { return DELIM; }
-
-%%
+any { return DELIM; }
+*/
+}