summaryrefslogtreecommitdiff
path: root/scripts/genksyms/lex.l
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/genksyms/lex.l')
-rw-r--r--scripts/genksyms/lex.l407
1 files changed, 407 insertions, 0 deletions
diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l
new file mode 100644
index 00000000000..fe0dfeedf0f
--- /dev/null
+++ b/scripts/genksyms/lex.l
@@ -0,0 +1,407 @@
+/* Lexical analysis for genksyms.
+ Copyright 1996, 1997 Linux International.
+
+ New implementation contributed by Richard Henderson <rth@tamu.edu>
+ Based on original work by Bjorn Ekwall <bj0rn@blox.se>
+
+ Taken from Linux modutils 2.4.22.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+
+%{
+
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "genksyms.h"
+#include "parse.h"
+
+/* We've got a two-level lexer here. We let flex do basic tokenization
+ and then we categorize those basic tokens in the second stage. */
+#define YY_DECL static int yylex1(void)
+
+%}
+
+IDENT [A-Za-z_\$][A-Za-z0-9_\$]*
+
+O_INT 0[0-7]*
+D_INT [1-9][0-9]*
+X_INT 0[Xx][0-9A-Fa-f]+
+I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
+INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}?
+
+FRAC ([0-9]*\.[0-9]+)|([0-9]+\.)
+EXP [Ee][+-]?[0-9]+
+F_SUF [FfLl]
+REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
+
+STRING L?\"([^\\\"]*\\.)*[^\\\"]*\"
+CHAR L?\'([^\\\']*\\.)*[^\\\']*\'
+
+MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
+
+/* Version 2 checksumming does proper tokenization; version 1 wasn't
+ quite so pedantic. */
+%s V2_TOKENS
+
+/* We don't do multiple input files. */
+%option noyywrap
+
+%%
+
+
+ /* Keep track of our location in the original source files. */
+^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
+^#.*\n cur_line++;
+\n cur_line++;
+
+ /* Ignore all other whitespace. */
+[ \t\f\v\r]+ ;
+
+
+{STRING} return STRING;
+{CHAR} return CHAR;
+{IDENT} return IDENT;
+
+ /* The Pedant requires that the other C multi-character tokens be
+ recognized as tokens. We don't actually use them since we don't
+ parse expressions, but we do want whitespace to be arranged
+ around them properly. */
+<V2_TOKENS>{MC_TOKEN} return OTHER;
+<V2_TOKENS>{INT} return INT;
+<V2_TOKENS>{REAL} return REAL;
+
+"..." return DOTS;
+
+ /* All other tokens are single characters. */
+. return yytext[0];
+
+
+%%
+
+/* Bring in the keyword recognizer. */
+
+#include "keywords.c"
+
+
+/* Macros to append to our phrase collection list. */
+
+#define _APP(T,L) do { \
+ cur_node = next_node; \
+ next_node = xmalloc(sizeof(*next_node)); \
+ next_node->next = cur_node; \
+ cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
+ cur_node->tag = SYM_NORMAL; \
+ } while (0)
+
+#define APP _APP(yytext, yyleng)
+
+
+/* The second stage lexer. Here we incorporate knowledge of the state
+ of the parser to tailor the tokens that are returned. */
+
+int
+yylex(void)
+{
+ static enum {
+ ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
+ ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
+ ST_TABLE_5, ST_TABLE_6
+ } lexstate = ST_NOTSTARTED;
+
+ static int suppress_type_lookup, dont_want_brace_phrase;
+ static struct string_list *next_node;
+
+ int token, count = 0;
+ struct string_list *cur_node;
+
+ if (lexstate == ST_NOTSTARTED)
+ {
+ BEGIN(V2_TOKENS);
+ next_node = xmalloc(sizeof(*next_node));
+ next_node->next = NULL;
+ lexstate = ST_NORMAL;
+ }
+
+repeat:
+ token = yylex1();
+
+ if (token == 0)
+ return 0;
+ else if (token == FILENAME)
+ {
+ char *file, *e;
+
+ /* Save the filename and line number for later error messages. */
+
+ if (cur_filename)
+ free(cur_filename);
+
+ file = strchr(yytext, '\"')+1;
+ e = strchr(file, '\"');
+ *e = '\0';
+ cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
+ cur_line = atoi(yytext+2);
+
+ goto repeat;
+ }
+
+ switch (lexstate)
+ {
+ case ST_NORMAL:
+ switch (token)
+ {
+ case IDENT:
+ APP;
+ {
+ const struct resword *r = is_reserved_word(yytext, yyleng);
+ if (r)
+ {
+ switch (token = r->token)
+ {
+ case ATTRIBUTE_KEYW:
+ lexstate = ST_ATTRIBUTE;
+ count = 0;
+ goto repeat;
+ case ASM_KEYW:
+ lexstate = ST_ASM;
+ count = 0;
+ goto repeat;
+
+ case STRUCT_KEYW:
+ case UNION_KEYW:
+ dont_want_brace_phrase = 3;
+ case ENUM_KEYW:
+ suppress_type_lookup = 2;
+ goto fini;
+
+ case EXPORT_SYMBOL_KEYW:
+ goto fini;
+ }
+ }
+ if (!suppress_type_lookup)
+ {
+ struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF);
+ if (sym && sym->type == SYM_TYPEDEF)
+ token = TYPE;
+ }
+ }
+ break;
+
+ case '[':
+ APP;
+ lexstate = ST_BRACKET;
+ count = 1;
+ goto repeat;
+
+ case '{':
+ APP;
+ if (dont_want_brace_phrase)
+ break;
+ lexstate = ST_BRACE;
+ count = 1;
+ goto repeat;
+
+ case '=': case ':':
+ APP;
+ lexstate = ST_EXPRESSION;
+ break;
+
+ case DOTS:
+ default:
+ APP;
+ break;
+ }
+ break;
+
+ case ST_ATTRIBUTE:
+ APP;
+ switch (token)
+ {
+ case '(':
+ ++count;
+ goto repeat;
+ case ')':
+ if (--count == 0)
+ {
+ lexstate = ST_NORMAL;
+ token = ATTRIBUTE_PHRASE;
+ break;
+ }
+ goto repeat;
+ default:
+ goto repeat;
+ }
+ break;
+
+ case ST_ASM:
+ APP;
+ switch (token)
+ {
+ case '(':
+ ++count;
+ goto repeat;
+ case ')':
+ if (--count == 0)
+ {
+ lexstate = ST_NORMAL;
+ token = ASM_PHRASE;
+ break;
+ }
+ goto repeat;
+ default:
+ goto repeat;
+ }
+ break;
+
+ case ST_BRACKET:
+ APP;
+ switch (token)
+ {
+ case '[':
+ ++count;
+ goto repeat;
+ case ']':
+ if (--count == 0)
+ {
+ lexstate = ST_NORMAL;
+ token = BRACKET_PHRASE;
+ break;
+ }
+ goto repeat;
+ default:
+ goto repeat;
+ }
+ break;
+
+ case ST_BRACE:
+ APP;
+ switch (token)
+ {
+ case '{':
+ ++count;
+ goto repeat;
+ case '}':
+ if (--count == 0)
+ {
+ lexstate = ST_NORMAL;
+ token = BRACE_PHRASE;
+ break;
+ }
+ goto repeat;
+ default:
+ goto repeat;
+ }
+ break;
+
+ case ST_EXPRESSION:
+ switch (token)
+ {
+ case '(': case '[': case '{':
+ ++count;
+ APP;
+ goto repeat;
+ case ')': case ']': case '}':
+ --count;
+ APP;
+ goto repeat;
+ case ',': case ';':
+ if (count == 0)
+ {
+ /* Put back the token we just read so's we can find it again
+ after registering the expression. */
+ unput(token);
+
+ lexstate = ST_NORMAL;
+ token = EXPRESSION_PHRASE;
+ break;
+ }
+ APP;
+ goto repeat;
+ default:
+ APP;
+ goto repeat;
+ }
+ break;
+
+ case ST_TABLE_1:
+ goto repeat;
+
+ case ST_TABLE_2:
+ if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
+ {
+ token = EXPORT_SYMBOL_KEYW;
+ lexstate = ST_TABLE_5;
+ APP;
+ break;
+ }
+ lexstate = ST_TABLE_6;
+ /* FALLTHRU */
+
+ case ST_TABLE_6:
+ switch (token)
+ {
+ case '{': case '[': case '(':
+ ++count;
+ break;
+ case '}': case ']': case ')':
+ --count;
+ break;
+ case ',':
+ if (count == 0)
+ lexstate = ST_TABLE_2;
+ break;
+ };
+ goto repeat;
+
+ case ST_TABLE_3:
+ goto repeat;
+
+ case ST_TABLE_4:
+ if (token == ';')
+ lexstate = ST_NORMAL;
+ goto repeat;
+
+ case ST_TABLE_5:
+ switch (token)
+ {
+ case ',':
+ token = ';';
+ lexstate = ST_TABLE_2;
+ APP;
+ break;
+ default:
+ APP;
+ break;
+ }
+ break;
+
+ default:
+ abort();
+ }
+fini:
+
+ if (suppress_type_lookup > 0)
+ --suppress_type_lookup;
+ if (dont_want_brace_phrase > 0)
+ --dont_want_brace_phrase;
+
+ yylval = &next_node->next;
+
+ return token;
+}