summaryrefslogtreecommitdiff
path: root/docs/Tokens
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2008-05-01 16:36:27 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2008-05-01 16:36:27 +0000
commit72c39e3522c5781d1e7dc8abad77d96141c5d49b (patch)
treee16497caaa0bf20771ef34787de02fc95e5993bf /docs/Tokens
downloadlibcss-72c39e3522c5781d1e7dc8abad77d96141c5d49b.tar.gz
libcss-72c39e3522c5781d1e7dc8abad77d96141c5d49b.tar.bz2
Import beginnings of a CSS parsing library.
Currently comprises a lexer. svn path=/trunk/libcss/; revision=4112
Diffstat (limited to 'docs/Tokens')
-rw-r--r--docs/Tokens65
1 files changed, 65 insertions, 0 deletions
diff --git a/docs/Tokens b/docs/Tokens
new file mode 100644
index 0000000..21e09da
--- /dev/null
+++ b/docs/Tokens
@@ -0,0 +1,65 @@
+Production rules for lexical tokens
+===================================
+
+This file provides a complete set of production rules for the tokens generated
+by the lexer. In case of ambiguity, the longest match wins.
+
+Components
+----------
+
+ident ::= '-'? nmstart nmchar*
+name ::= nmchar+
+nmstart ::= [a-zA-Z] | '_' | nonascii | escape
+nonascii ::= [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
+unicode ::= '\' [0-9a-fA-F]{1,6} wc?
+escape ::= unicode | '\' [^\n\r\f0-9a-fA-F]
+nmchar ::= [a-zA-Z0-9] | '-' | '_' | nonascii | escape
+num ::= [0-9]+ | [0-9]* '.' [0-9]+
+string ::= '"' (stringchar | "'")* '"' | "'" (stringchar | '"')* "'"
+stringchar ::= urlchar | #x20 | #x29 | '\' nl
+urlchar ::= [#x9#x21#x23-#x26#x28#x2A-#x7E] | nonascii | escape
+nl ::= #xA | #xD #xA | #xD | #xC
+w ::= wc*
+wc ::= #x9 | #xA | #xC | #xD | #x20
+
+Tokens
+------
+
+IDENT ::= ident
+ATKEYWORD ::= '@' ident
+STRING ::= string
+HASH ::= '#' name
+NUMBER ::= num
+PERCENTAGE ::= num '%'
+DIMENSION ::= num ident
+URI ::= "url(" w (string | urlchar*) w ')'
+UNICODE-RANGE ::= [Uu] '+' [0-9a-fA-F?]{1,6} ('-' [0-9a-fA-F]{1,6})?
+CDO ::= "<!--"
+CDC ::= "-->"
+S ::= wc+
+COMMENT ::= "/*" [^*]* '*'+ ([^/] [^*]* '*'+) '/'
+FUNCTION ::= ident '('
+INCLUDES ::= "~="
+DASHMATCH ::= "|="
+PREFIXMATCH ::= "^="
+SUFFIXMATCH ::= "$="
+SUBSTRINGMATCH ::= "*="
+CHAR ::= any other character, except " or '
+
+Differences from the CSS3 Syntax module specification
+-----------------------------------------------------
+
+1) UNICODE-RANGE is case insensitive (it's uppercase only in the spec)
+2) escape follows CSS2.1. CSS3 defines it as:
+ escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
+3) urlchar omits ' and ):
+ a) If ' is permitted verbatim then, as stringchar inherits from urlchar,
+ single quoted strings may contain verbatim single quotes. This is
+ clearly nonsense.
+ b) If ) is permitted verbatim then it becomes impossible to determine the
+ true end of URI. Thus, for sanity's sake, it's omitted here.
+4) stringchar explicitly includes ). See 3(b) for why it won't inherit it
+ from urlchar as the spec implies.
+5) BOM ::= #xFEFF is omitted. It is assumed that any leading BOM will be
+ stripped from the document before lexing occurs.
+