1 /* Lexical analysis for genksyms. 2 Copyright 1996, 1997 Linux International. 3 4 New implementation contributed by Richard Henderson <rth@tamu.edu> 5 Based on original work by Bjorn Ekwall <bj0rn@blox.se> 6 7 Taken from Linux modutils 2.4.22. 8 9 This program is free software; you can redistribute it and/or modify it 10 under the terms of the GNU General Public License as published by the 11 Free Software Foundation; either version 2 of the License, or (at your 12 option) any later version. 13 14 This program is distributed in the hope that it will be useful, but 15 WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program; if not, write to the Free Software Foundation, 21 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 22 23 24 %{ 25 26 #include <limits.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <ctype.h> 30 31 #include "genksyms.h" 32 #include "parse.tab.h" 33 34 /* We've got a two-level lexer here. We let flex do basic tokenization 35 and then we categorize those basic tokens in the second stage. */ 36 #define YY_DECL static int yylex1(void) 37 38 %} 39 40 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 41 42 O_INT 0[0-7]* 43 D_INT [1-9][0-9]* 44 X_INT 0[Xx][0-9A-Fa-f]+ 45 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 46 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 47 48 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 49 EXP [Ee][+-]?[0-9]+ 50 F_SUF [FfLl] 51 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 52 53 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 54 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 55 56 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 57 58 /* We don't do multiple input files. */ 59 %option noyywrap 60 61 %option noinput 62 63 %% 64 65 66 /* Keep track of our location in the original source files. */ 67 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 68 ^#.*\n cur_line++; 69 \n cur_line++; 70 71 /* Ignore all other whitespace. */ 72 [ \t\f\v\r]+ ; 73 74 75 {STRING} return STRING; 76 {CHAR} return CHAR; 77 {IDENT} return IDENT; 78 79 /* The Pedant requires that the other C multi-character tokens be 80 recognized as tokens. We don't actually use them since we don't 81 parse expressions, but we do want whitespace to be arranged 82 around them properly. */ 83 {MC_TOKEN} return OTHER; 84 {INT} return INT; 85 {REAL} return REAL; 86 87 "..." return DOTS; 88 89 /* All other tokens are single characters. */ 90 . return yytext[0]; 91 92 93 %% 94 95 /* Bring in the keyword recognizer. */ 96 97 #include "keywords.hash.c" 98 99 100 /* Macros to append to our phrase collection list. */ 101 102 /* 103 * We mark any token, that that equals to a known enumerator, as 104 * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 105 * the only problem is struct and union members: 106 * enum e { a, b }; struct s { int a, b; } 107 * but in this case, the only effect will be, that the ABI checksums become 108 * more volatile, which is acceptable. Also, such collisions are quite rare, 109 * so far it was only observed in include/linux/telephony.h. 110 */ 111 #define _APP(T,L) do { \ 112 cur_node = next_node; \ 113 next_node = xmalloc(sizeof(*next_node)); \ 114 next_node->next = cur_node; \ 115 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 116 cur_node->tag = \ 117 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 118 SYM_ENUM_CONST : SYM_NORMAL ; \ 119 cur_node->in_source_file = in_source_file; \ 120 } while (0) 121 122 #define APP _APP(yytext, yyleng) 123 124 125 /* The second stage lexer. Here we incorporate knowledge of the state 126 of the parser to tailor the tokens that are returned. */ 127 128 int 129 yylex(void) 130 { 131 static enum { 132 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE, 133 ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, 134 ST_TABLE_5, ST_TABLE_6 135 } lexstate = ST_NOTSTARTED; 136 137 static int suppress_type_lookup, dont_want_brace_phrase; 138 static struct string_list *next_node; 139 140 int token, count = 0; 141 struct string_list *cur_node; 142 143 if (lexstate == ST_NOTSTARTED) 144 { 145 next_node = xmalloc(sizeof(*next_node)); 146 next_node->next = NULL; 147 lexstate = ST_NORMAL; 148 } 149 150 repeat: 151 token = yylex1(); 152 153 if (token == 0) 154 return 0; 155 else if (token == FILENAME) 156 { 157 char *file, *e; 158 159 /* Save the filename and line number for later error messages. */ 160 161 if (cur_filename) 162 free(cur_filename); 163 164 file = strchr(yytext, '\"')+1; 165 e = strchr(file, '\"'); 166 *e = '\0'; 167 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 168 cur_line = atoi(yytext+2); 169 170 if (!source_file) { 171 source_file = xstrdup(cur_filename); 172 in_source_file = 1; 173 } else { 174 in_source_file = (strcmp(cur_filename, source_file) == 0); 175 } 176 177 goto repeat; 178 } 179 180 switch (lexstate) 181 { 182 case ST_NORMAL: 183 switch (token) 184 { 185 case IDENT: 186 APP; 187 { 188 const struct resword *r = is_reserved_word(yytext, yyleng); 189 if (r) 190 { 191 switch (token = r->token) 192 { 193 case ATTRIBUTE_KEYW: 194 lexstate = ST_ATTRIBUTE; 195 count = 0; 196 goto repeat; 197 case ASM_KEYW: 198 lexstate = ST_ASM; 199 count = 0; 200 goto repeat; 201 202 case STRUCT_KEYW: 203 case UNION_KEYW: 204 case ENUM_KEYW: 205 dont_want_brace_phrase = 3; 206 suppress_type_lookup = 2; 207 goto fini; 208 209 case EXPORT_SYMBOL_KEYW: 210 goto fini; 211 } 212 } 213 if (!suppress_type_lookup) 214 { 215 if (find_symbol(yytext, SYM_TYPEDEF, 1)) 216 token = TYPE; 217 } 218 } 219 break; 220 221 case '[': 222 APP; 223 lexstate = ST_BRACKET; 224 count = 1; 225 goto repeat; 226 227 case '{': 228 APP; 229 if (dont_want_brace_phrase) 230 break; 231 lexstate = ST_BRACE; 232 count = 1; 233 goto repeat; 234 235 case '=': case ':': 236 APP; 237 lexstate = ST_EXPRESSION; 238 break; 239 240 case DOTS: 241 default: 242 APP; 243 break; 244 } 245 break; 246 247 case ST_ATTRIBUTE: 248 APP; 249 switch (token) 250 { 251 case '(': 252 ++count; 253 goto repeat; 254 case ')': 255 if (--count == 0) 256 { 257 lexstate = ST_NORMAL; 258 token = ATTRIBUTE_PHRASE; 259 break; 260 } 261 goto repeat; 262 default: 263 goto repeat; 264 } 265 break; 266 267 case ST_ASM: 268 APP; 269 switch (token) 270 { 271 case '(': 272 ++count; 273 goto repeat; 274 case ')': 275 if (--count == 0) 276 { 277 lexstate = ST_NORMAL; 278 token = ASM_PHRASE; 279 break; 280 } 281 goto repeat; 282 default: 283 goto repeat; 284 } 285 break; 286 287 case ST_BRACKET: 288 APP; 289 switch (token) 290 { 291 case '[': 292 ++count; 293 goto repeat; 294 case ']': 295 if (--count == 0) 296 { 297 lexstate = ST_NORMAL; 298 token = BRACKET_PHRASE; 299 break; 300 } 301 goto repeat; 302 default: 303 goto repeat; 304 } 305 break; 306 307 case ST_BRACE: 308 APP; 309 switch (token) 310 { 311 case '{': 312 ++count; 313 goto repeat; 314 case '}': 315 if (--count == 0) 316 { 317 lexstate = ST_NORMAL; 318 token = BRACE_PHRASE; 319 break; 320 } 321 goto repeat; 322 default: 323 goto repeat; 324 } 325 break; 326 327 case ST_EXPRESSION: 328 switch (token) 329 { 330 case '(': case '[': case '{': 331 ++count; 332 APP; 333 goto repeat; 334 case '}': 335 /* is this the last line of an enum declaration? */ 336 if (count == 0) 337 { 338 /* Put back the token we just read so's we can find it again 339 after registering the expression. */ 340 unput(token); 341 342 lexstate = ST_NORMAL; 343 token = EXPRESSION_PHRASE; 344 break; 345 } 346 /* FALLTHRU */ 347 case ')': case ']': 348 --count; 349 APP; 350 goto repeat; 351 case ',': case ';': 352 if (count == 0) 353 { 354 /* Put back the token we just read so's we can find it again 355 after registering the expression. */ 356 unput(token); 357 358 lexstate = ST_NORMAL; 359 token = EXPRESSION_PHRASE; 360 break; 361 } 362 APP; 363 goto repeat; 364 default: 365 APP; 366 goto repeat; 367 } 368 break; 369 370 case ST_TABLE_1: 371 goto repeat; 372 373 case ST_TABLE_2: 374 if (token == IDENT && yyleng == 1 && yytext[0] == 'X') 375 { 376 token = EXPORT_SYMBOL_KEYW; 377 lexstate = ST_TABLE_5; 378 APP; 379 break; 380 } 381 lexstate = ST_TABLE_6; 382 /* FALLTHRU */ 383 384 case ST_TABLE_6: 385 switch (token) 386 { 387 case '{': case '[': case '(': 388 ++count; 389 break; 390 case '}': case ']': case ')': 391 --count; 392 break; 393 case ',': 394 if (count == 0) 395 lexstate = ST_TABLE_2; 396 break; 397 }; 398 goto repeat; 399 400 case ST_TABLE_3: 401 goto repeat; 402 403 case ST_TABLE_4: 404 if (token == ';') 405 lexstate = ST_NORMAL; 406 goto repeat; 407 408 case ST_TABLE_5: 409 switch (token) 410 { 411 case ',': 412 token = ';'; 413 lexstate = ST_TABLE_2; 414 APP; 415 break; 416 default: 417 APP; 418 break; 419 } 420 break; 421 422 default: 423 exit(1); 424 } 425 fini: 426 427 if (suppress_type_lookup > 0) 428 --suppress_type_lookup; 429 if (dont_want_brace_phrase > 0) 430 --dont_want_brace_phrase; 431 432 yylval = &next_node->next; 433 434 return token; 435 } 436