1// Copyright 2017 syzkaller project authors. All rights reserved. 2// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4package ast 5 6import ( 7 "fmt" 8 "os" 9 "strconv" 10) 11 12type token int 13 14const ( 15 tokIllegal token = iota 16 tokComment 17 tokIdent 18 tokInclude 19 tokIncdir 20 tokDefine 21 tokResource 22 tokString 23 tokCExpr 24 tokInt 25 26 tokNewLine 27 tokLParen 28 tokRParen 29 tokLBrack 30 tokRBrack 31 tokLBrace 32 tokRBrace 33 tokEq 34 tokComma 35 tokColon 36 37 tokEOF 38) 39 40var punctuation = [256]token{ 41 '\n': tokNewLine, 42 '(': tokLParen, 43 ')': tokRParen, 44 '[': tokLBrack, 45 ']': tokRBrack, 46 '{': tokLBrace, 47 '}': tokRBrace, 48 '=': tokEq, 49 ',': tokComma, 50 ':': tokColon, 51} 52 53var tok2str = [...]string{ 54 tokIllegal: "ILLEGAL", 55 tokComment: "comment", 56 tokIdent: "identifier", 57 tokInclude: "include", 58 tokIncdir: "incdir", 59 tokDefine: "define", 60 tokResource: "resource", 61 tokString: "string", 62 tokCExpr: "CEXPR", 63 tokInt: "int", 64 tokNewLine: "NEWLINE", 65 tokEOF: "EOF", 66} 67 68func init() { 69 for ch, tok := range punctuation { 70 if tok == tokIllegal { 71 continue 72 } 73 tok2str[tok] = fmt.Sprintf("%q", ch) 74 } 75} 76 77var keywords = map[string]token{ 78 "include": tokInclude, 79 "incdir": tokIncdir, 80 "define": tokDefine, 81 "resource": tokResource, 82} 83 84func (tok token) String() string { 85 return tok2str[tok] 86} 87 88type scanner struct { 89 data []byte 90 filename string 91 errorHandler ErrorHandler 92 93 ch byte 94 off int 95 line int 96 col int 97 98 prev1 token 99 prev2 token 100 101 errors int 102} 103 104func newScanner(data []byte, filename string, errorHandler ErrorHandler) *scanner { 105 if errorHandler == nil { 106 errorHandler = LoggingHandler 107 } 108 s := &scanner{ 109 data: data, 110 filename: filename, 111 errorHandler: errorHandler, 112 off: -1, 113 } 114 s.next() 115 return s 116} 117 118type ErrorHandler func(pos Pos, msg string) 119 120func LoggingHandler(pos Pos, msg string) { 121 fmt.Fprintf(os.Stderr, "%v: %v\n", pos, msg) 122} 123 124func (pos Pos) String() string { 125 return fmt.Sprintf("%v:%v:%v", pos.File, pos.Line, pos.Col) 126} 127 128func (s *scanner) Scan() (tok token, lit string, pos Pos) { 129 s.skipWhitespace() 130 pos = s.pos() 131 switch { 132 case s.ch == 0: 133 tok = tokEOF 134 s.next() 135 case s.ch == '`': 136 tok = tokCExpr 137 lit = s.scanCExpr(pos) 138 case s.prev2 == tokDefine && s.prev1 == tokIdent: 139 // Note: the old form for C expressions, not really lexable. 140 // TODO(dvyukov): get rid of this eventually. 141 tok = tokCExpr 142 for ; s.ch != '\n'; s.next() { 143 } 144 lit = string(s.data[pos.Off:s.off]) 145 case s.ch == '#': 146 tok = tokComment 147 for s.next(); s.ch != '\n'; s.next() { 148 } 149 lit = string(s.data[pos.Off+1 : s.off]) 150 case s.ch == '"' || s.ch == '<': 151 tok = tokString 152 lit = s.scanStr(pos) 153 case s.ch >= '0' && s.ch <= '9' || s.ch == '-': 154 tok = tokInt 155 lit = s.scanInt(pos) 156 case s.ch == '\'': 157 tok = tokInt 158 lit = s.scanChar(pos) 159 case s.ch == '_' || s.ch >= 'a' && s.ch <= 'z' || s.ch >= 'A' && s.ch <= 'Z': 160 tok, lit = s.scanIdent(pos) 161 default: 162 tok = punctuation[s.ch] 163 if tok == tokIllegal { 164 s.Error(pos, "illegal character %#U", s.ch) 165 } 166 s.next() 167 } 168 s.prev2 = s.prev1 169 s.prev1 = tok 170 return 171} 172 173func (s *scanner) scanCExpr(pos Pos) string { 174 for s.next(); s.ch != '`' && s.ch != '\n'; s.next() { 175 } 176 if s.ch == '\n' { 177 s.Error(pos, "C expression is not terminated") 178 return "" 179 } 180 lit := string(s.data[pos.Off+1 : s.off]) 181 s.next() 182 return lit 183} 184 185func (s *scanner) scanStr(pos Pos) string { 186 // TODO(dvyukov): get rid of <...> strings, that's only includes 187 closing := byte('"') 188 if s.ch == '<' { 189 closing = '>' 190 } 191 for s.next(); s.ch != closing; s.next() { 192 if s.ch == 0 || s.ch == '\n' { 193 s.Error(pos, "string literal is not terminated") 194 return "" 195 } 196 } 197 lit := string(s.data[pos.Off+1 : s.off]) 198 for i := 0; i < len(lit); i++ { 199 if lit[i] < 0x20 || lit[i] >= 0x80 { 200 pos1 := pos 201 pos1.Col += i + 1 202 pos1.Off += i + 1 203 s.Error(pos1, "illegal character %#U in string literal", lit[i]) 204 break 205 } 206 } 207 s.next() 208 return lit 209} 210 211func (s *scanner) scanInt(pos Pos) string { 212 for s.ch >= '0' && s.ch <= '9' || 213 s.ch >= 'a' && s.ch <= 'f' || 214 s.ch >= 'A' && s.ch <= 'F' || 215 s.ch == 'x' || s.ch == '-' { 216 s.next() 217 } 218 lit := string(s.data[pos.Off:s.off]) 219 if _, err := strconv.ParseUint(lit, 10, 64); err == nil { 220 return lit 221 } 222 if len(lit) > 1 && lit[0] == '-' { 223 if _, err := strconv.ParseInt(lit, 10, 64); err == nil { 224 return lit 225 } 226 } 227 if len(lit) > 2 && lit[0] == '0' && lit[1] == 'x' { 228 if _, err := strconv.ParseUint(lit[2:], 16, 64); err == nil { 229 return lit 230 } 231 } 232 s.Error(pos, fmt.Sprintf("bad integer %q", lit)) 233 return "0" 234} 235 236func (s *scanner) scanChar(pos Pos) string { 237 s.next() 238 s.next() 239 if s.ch != '\'' { 240 s.Error(pos, "char literal is not terminated") 241 return "0" 242 } 243 s.next() 244 return string(s.data[pos.Off : pos.Off+3]) 245} 246 247func (s *scanner) scanIdent(pos Pos) (tok token, lit string) { 248 tok = tokIdent 249 for s.ch == '_' || s.ch == '$' || 250 s.ch >= 'a' && s.ch <= 'z' || 251 s.ch >= 'A' && s.ch <= 'Z' || 252 s.ch >= '0' && s.ch <= '9' { 253 s.next() 254 } 255 lit = string(s.data[pos.Off:s.off]) 256 if key, ok := keywords[lit]; ok { 257 tok = key 258 } 259 return 260} 261 262func (s *scanner) Error(pos Pos, msg string, args ...interface{}) { 263 s.errors++ 264 s.errorHandler(pos, fmt.Sprintf(msg, args...)) 265} 266 267func (s *scanner) Ok() bool { 268 return s.errors == 0 269} 270 271func (s *scanner) next() { 272 s.off++ 273 for s.off < len(s.data) && s.data[s.off] == '\r' { 274 s.off++ 275 } 276 if s.off == len(s.data) { 277 // Always emit NEWLINE before EOF. 278 // Makes lots of things simpler as we always 279 // want to treat EOF as NEWLINE as well. 280 s.ch = '\n' 281 s.off++ 282 return 283 } 284 if s.off > len(s.data) { 285 s.ch = 0 286 return 287 } 288 if s.off == 0 || s.data[s.off-1] == '\n' { 289 s.line++ 290 s.col = 0 291 } 292 s.ch = s.data[s.off] 293 s.col++ 294 if s.ch == 0 { 295 s.Error(s.pos(), "illegal character \\x00") 296 } 297} 298 299func (s *scanner) skipWhitespace() { 300 for s.ch == ' ' || s.ch == '\t' { 301 s.next() 302 } 303} 304 305func (s *scanner) pos() Pos { 306 return Pos{ 307 File: s.filename, 308 Off: s.off, 309 Line: s.line, 310 Col: s.col, 311 } 312} 313