From dc672aedbafa7bf24cd24430abcae2d57528cf41 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Tue, 4 Sep 2012 16:36:01 +0100 Subject: Initial version of netsurf webidl javascript binding geenrator tool --- src/webidl-lexer.l | 284 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100644 src/webidl-lexer.l (limited to 'src/webidl-lexer.l') diff --git a/src/webidl-lexer.l b/src/webidl-lexer.l new file mode 100644 index 0000000..f605575 --- /dev/null +++ b/src/webidl-lexer.l @@ -0,0 +1,284 @@ +/* + * This is a unicode capable lexer for web IDL mostly derived from: + * + * W3C WEB IDL - http://www.w3.org/TR/WebIDL/ (especially the grammar + * in apendix A) + * + * The ECMA script spec - + * http://ecma-international.org/ecma-262/5.1/#sec-7.2 (expecially + * section 7.2 for unicode value handling) + */ + +/* lexer options */ +%option outfile="webidl-lexer.c" +%option header-file="webidl-lexer.h" +%option never-interactive +%option yylineno +%option bison-bridge +%option bison-locations +%option nodefault +%option warn +%option debug +%option prefix="webidl_" +%option nounput + +/* header block */ +%{ + +#include +#include +#include + +#include "webidl-parser.h" + +#define YY_USER_ACTION yylloc->first_line = yylloc->last_line; \ + yylloc->first_column = yylloc->last_column + 1; \ + yylloc->last_column += yyleng; + +%} + +/* regular definitions */ + + /* ecmascript section 7.2 defines whitespace http://ecma-international.org/ecma-262/5.1/#sec-7.2 + * Web IDL appendix A has the IDL grammar http://www.w3.org/TR/WebIDL/#idl-grammar + */ + + /* we do not define space, line feed, carriage return, tab, vertical + * tab or form feed here as they are the standard C escapes + */ + + /* other Unicode “space separator” */ +USP (\xe1\x9a\x80)|(\xe1\xa0\x8e)|(\xe2\x80[\x80-\x8a])|(\xe2\x80\xaf)|(\xe2\x81\x9f)|(\xe3\x80\x80) + +/* Line separator \u2028 */ +LS (\xe2\x80\xa8) + +/* paragraph separator \u2029 */ +PS (\xe2\x80\xa9) + +/* non breaking space \u00A0 */ +NBSP (\xc2\xa0) + +/* web idl grammar for whitespace matches single and multiline + * comments too. Here there are separate definitions for both single + * and multiline comments. + */ +whitespace ([ \t\v\f]|{NBSP}|{USP}) +multicomment \/\*(([^*])|(\*[^/]))*\*\/ +singlecomment \/\/ +lineend ([\n\r]|{LS}|{PS}) + +/* integer numbers in hexidecimal, decimal and octal, slight extension + * to spec which only allows for decimal values + */ +hexdigit [0-9A-Fa-f] +hexint 0(x|X){hexdigit}+ + +decimalint 0|([1-9][0-9]*) + +octalint (0[0-8]+) + +/* decimal floating point number */ +decimalexponent (e|E)[\+\-]?[0-9]+ +decimalfloat ({decimalint}\.[0-9]*{decimalexponent}?)|(\.[0-9]+{decimalexponent}?)|({decimalint}{decimalexponent}?) + +/* quoted string. spec simply has "[^"]*" but here escapes are allowed for */ +hexescseq x{hexdigit}{2} +unicodeescseq u{hexdigit}{4} +characterescseq ['\"\\bfnrtv]|[^'\"\\bfnrtv\n\r] +escseq {characterescseq}|0|{hexescseq}|{unicodeescseq} +quotedstring ([^\"\\\n\r]|\\{escseq}) + +/* web idl identifier direct from spec */ +Identifier [A-Z_a-z][0-9A-Z_a-z]* + +/* web idl other direct from spec */ +other [^\t\n\r 0-9A-Z_a-z] + +/* used for #include directive - not part of web idl spec */ +poundsign ^{whitespace}*# + +%x incl +%% + +{whitespace} ++yylloc->last_column; /* skip whitespace */ + +{lineend} if (yytext[0] == '\n') { + /* update position counts */ + ++yylloc->last_line; + yylloc->last_column = 0; + } + + /* Simple text terminals */ + +boolean return TOK_BOOLEAN; + +byte return TOK_BYTE; + +octet return TOK_OCTET; + +attribute return TOK_ATTRIBUTE; + +callback return TOK_CALLBACK; + +const return TOK_CONST; + +creator return TOK_CREATOR; + +deleter return TOK_DELETER; + +dictionary return TOK_DICTIONARY; + +enum return TOK_ENUM; + +exception return TOK_EXCEPTION; + +getter return TOK_GETTER; + +implements return TOK_IMPLEMENTS; + +inherit return TOK_INHERIT; + +interface return TOK_INTERFACE; + +legacycaller return TOK_LEGACYCALLER; + +partial return TOK_PARTIAL; + +setter return TOK_SETTER; + +static return TOK_STATIC; + +stringifier return TOK_STRINGIFIER; + +typedef return TOK_TYPEDEF; + +unrestricted return TOK_UNRESTRICTED; + +"..." return TOK_ELLIPSIS; + +Date return TOK_DATE; + +DOMString return TOK_STRING; /* dom strings are just strings */ + +Infinity return TOK_INFINITY; + +NaN return TOK_NAN; + +any return TOK_ANY; + +double return TOK_DOUBLE; + +false return TOK_FALSE; + +float return TOK_FLOAT; + +long return TOK_LONG; + +null return TOK_NULL_LITERAL; + +object yylval->text = strdup(yytext); return TOK_IDENTIFIER; + +or return TOK_OR; + +optional return TOK_OPTIONAL; + +sequence return TOK_SEQUENCE; + +short return TOK_SHORT; + +true return TOK_TRUE; + +unsigned return TOK_UNSIGNED; + +void return TOK_VOID; + +readonly return TOK_READONLY; + + +{Identifier} { + // A leading "_" is used to escape an identifier from looking like a reserved word terminal. + yylval->text = (yytext[0] == '_') ? strdup(yytext + 1) : strdup(yytext); + return TOK_IDENTIFIER; + } + +{decimalint} yylval->value = strtol(yytext, NULL, 10); return TOK_INT_LITERAL; + +{octalint} yylval->value = strtol(yytext, NULL, 8); return TOK_INT_LITERAL; + +{hexint} yylval->value = strtol(yytext, NULL, 16); return TOK_INT_LITERAL; + +{decimalfloat} yylval->text = strdup(yytext); return TOK_FLOAT_LITERAL; + +\"{quotedstring}*\" yylval->text = strdup(yytext); return TOK_STRING_LITERAL; + +{multicomment} { + /* multicomment */ + char* s = yytext; + for (; *s; ++s) + { + if (*s == '\n') + { + ++yylloc->last_line; + yylloc->last_column = 0; + } + else + { + ++yylloc->last_column; + } + } + if (strncmp(yytext, "/**", 3) == 0) + { + /* Javadoc style comment */ + yylval->text = strdup(yytext); + return TOK_JAVADOC; + } + } + +{singlecomment} { + /* singlecomment */ + int c; + + do { + c = input(); + } while (c != '\n' && c != '\r' && c != EOF); + ++yylloc->last_line; + yylloc->last_column = 0; + } + + + +{poundsign}include BEGIN(incl); + +{other} return (int) yytext[0]; + +[ \t]*\" /* eat the whitespace and open quotes */ + +[^\t\n\"]+ { + /* got the include file name */ + yyin = fopen( yytext, "r" ); + + if ( ! yyin ) { + fprintf(stderr, "Unable to open include %s\n", yytext); + exit(3); + } + yypush_buffer_state(yy_create_buffer( yyin, YY_BUF_SIZE )); + + BEGIN(INITIAL); + } + +\n BEGIN(INITIAL); + +<> { + yypop_buffer_state(); + + if ( !YY_CURRENT_BUFFER ) { + yyterminate(); + } else { + BEGIN(incl); + } + + } + + +%% -- cgit v1.2.3