1#include <stdlib.h> 2#include <stdio.h> 3#include <string.h> 4 5#define ADDEQ 257 6#define ANDAND 258 7#define ANDEQ 259 8#define ARRAY 260 9#define ASM 261 10#define AUTO 262 11#define BREAK 263 12#define CASE 264 13#define CHAR 265 14#define CONST 266 15#define CONTINUE 267 16#define DECR 268 17#define DEFAULT 269 18#define DEREF 270 19#define DIVEQ 271 20#define DO 272 21#define DOUBLE 273 22#define ELLIPSIS 274 23#define ELSE 275 24#define ENUM 276 25#define EQL 277 26#define EXTERN 278 27#define FCON 279 28#define FLOAT 280 29#define FOR 281 30#define FUNCTION 282 31#define GEQ 283 32#define GOTO 284 33#define ICON 285 34#define ID 286 35#define IF 287 36#define INCR 288 37#define INT 289 38#define LEQ 290 39#define LONG 291 40#define LSHIFT 292 41#define LSHIFTEQ 293 42#define MODEQ 294 43#define MULEQ 295 44#define NEQ 296 45#define OREQ 297 46#define OROR 298 47#define POINTER 299 48#define REGISTER 300 49#define RETURN 301 50#define RSHIFT 302 51#define RSHIFTEQ 303 52#define SCON 304 53#define SHORT 305 54#define SIGNED 306 55#define SIZEOF 307 56#define STATIC 308 57#define STRUCT 309 58#define SUBEQ 310 59#define SWITCH 311 60#define TYPEDEF 312 61#define UNION 313 62#define UNSIGNED 314 63#define VOID 315 64#define VOLATILE 316 65#define WHILE 317 66#define XOREQ 318 67#define EOI 319 68 69typedef unsigned int uint; 70typedef unsigned char uchar; 71 72#define BSIZE 8192 73 74#define YYCTYPE uchar 75#define YYCURSOR cursor 76#define YYLIMIT s->lim 77#define YYMARKER s->ptr 78#define YYFILL(n) {cursor = fill(s, cursor);} 79 80#define RET(i) {s->cur = cursor; return i;} 81 82typedef struct Scanner { 83 int fd; 84 uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; 85 uint line; 86} Scanner; 87 88uchar *fill(Scanner *s, uchar *cursor){ 89 if(!s->eof){ 90 uint cnt = s->tok - s->bot; 91 if(cnt){ 92 memcpy(s->bot, s->tok, s->lim - s->tok); 93 s->tok = s->bot; 94 s->ptr -= cnt; 95 cursor -= cnt; 96 s->pos -= cnt; 97 s->lim -= cnt; 98 } 99 if((s->top - s->lim) < BSIZE){ 100 uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar)); 101 memcpy(buf, s->tok, s->lim - s->tok); 102 s->tok = buf; 103 s->ptr = &buf[s->ptr - s->bot]; 104 cursor = &buf[cursor - s->bot]; 105 s->pos = &buf[s->pos - s->bot]; 106 s->lim = &buf[s->lim - s->bot]; 107 s->top = &s->lim[BSIZE]; 108 free(s->bot); 109 s->bot = buf; 110 } 111 if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){ 112 s->eof = &s->lim[cnt]; *(s->eof)++ = '\n'; 113 } 114 s->lim += cnt; 115 } 116 return cursor; 117} 118 119int scan(Scanner *s){ 120 uchar *cursor = s->cur; 121std: 122 s->tok = cursor; 123/*!re2c 124any = [\000-\377]; 125O = [0-7]; 126D = [0-9]; 127L = [a-zA-Z_]; 128H = [a-fA-F0-9]; 129E = [Ee] [+-]? D+; 130FS = [fFlL]; 131IS = [uUlL]*; 132ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+); 133*/ 134 135/*!re2c 136 "/*" { goto comment; } 137 138 "auto" { RET(AUTO); } 139 "break" { RET(BREAK); } 140 "case" { RET(CASE); } 141 "char" { RET(CHAR); } 142 "const" { RET(CONST); } 143 "continue" { RET(CONTINUE); } 144 "default" { RET(DEFAULT); } 145 "do" { RET(DO); } 146 "double" { RET(DOUBLE); } 147 "else" { RET(ELSE); } 148 "enum" { RET(ENUM); } 149 "extern" { RET(EXTERN); } 150 "float" { RET(FLOAT); } 151 "for" { RET(FOR); } 152 "goto" { RET(GOTO); } 153 "if" { RET(IF); } 154 "int" { RET(INT); } 155 "long" { RET(LONG); } 156 "register" { RET(REGISTER); } 157 "return" { RET(RETURN); } 158 "short" { RET(SHORT); } 159 "signed" { RET(SIGNED); } 160 "sizeof" { RET(SIZEOF); } 161 "static" { RET(STATIC); } 162 "struct" { RET(STRUCT); } 163 "switch" { RET(SWITCH); } 164 "typedef" { RET(TYPEDEF); } 165 "union" { RET(UNION); } 166 "unsigned" { RET(UNSIGNED); } 167 "void" { RET(VOID); } 168 "volatile" { RET(VOLATILE); } 169 "while" { RET(WHILE); } 170 171 L (L|D)* { RET(ID); } 172 173 ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) | 174 (['] (ESC|any\[\n\\'])* [']) 175 { RET(ICON); } 176 177 (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?) 178 { RET(FCON); } 179 180 (["] (ESC|any\[\n\\"])* ["]) 181 { RET(SCON); } 182 183 "..." { RET(ELLIPSIS); } 184 ">>=" { RET(RSHIFTEQ); } 185 "<<=" { RET(LSHIFTEQ); } 186 "+=" { RET(ADDEQ); } 187 "-=" { RET(SUBEQ); } 188 "*=" { RET(MULEQ); } 189 "/=" { RET(DIVEQ); } 190 "%=" { RET(MODEQ); } 191 "&=" { RET(ANDEQ); } 192 "^=" { RET(XOREQ); } 193 "|=" { RET(OREQ); } 194 ">>" { RET(RSHIFT); } 195 "<<" { RET(LSHIFT); } 196 "++" { RET(INCR); } 197 "--" { RET(DECR); } 198 "->" { RET(DEREF); } 199 "&&" { RET(ANDAND); } 200 "||" { RET(OROR); } 201 "<=" { RET(LEQ); } 202 ">=" { RET(GEQ); } 203 "==" { RET(EQL); } 204 "!=" { RET(NEQ); } 205 ";" { RET(';'); } 206 "{" { RET('{'); } 207 "}" { RET('}'); } 208 "," { RET(','); } 209 ":" { RET(':'); } 210 "=" { RET('='); } 211 "(" { RET('('); } 212 ")" { RET(')'); } 213 "[" { RET('['); } 214 "]" { RET(']'); } 215 "." { RET('.'); } 216 "&" { RET('&'); } 217 "!" { RET('!'); } 218 "~" { RET('~'); } 219 "-" { RET('-'); } 220 "+" { RET('+'); } 221 "*" { RET('*'); } 222 "/" { RET('/'); } 223 "%" { RET('%'); } 224 "<" { RET('<'); } 225 ">" { RET('>'); } 226 "^" { RET('^'); } 227 "|" { RET('|'); } 228 "?" { RET('?'); } 229 230 231 [ \t\v\f]+ { goto std; } 232 233 "\n" 234 { 235 if(cursor == s->eof) RET(EOI); 236 s->pos = cursor; s->line++; 237 goto std; 238 } 239 240 any 241 { 242 printf("unexpected character: %c\n", *s->tok); 243 goto std; 244 } 245*/ 246 247comment: 248/*!re2c 249 "*/" { goto std; } 250 "\n" 251 { 252 if(cursor == s->eof) RET(EOI); 253 s->tok = s->pos = cursor; s->line++; 254 goto comment; 255 } 256 any { goto comment; } 257*/ 258} 259 260main(){ 261 Scanner in; 262 int t; 263 memset((char*) &in, 0, sizeof(in)); 264 in.fd = 0; 265 while((t = scan(&in)) != EOI){ 266/* 267 printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok); 268 printf("%d\n", t); 269*/ 270 } 271 close(in.fd); 272} 273