// Copyright 2017 syzkaller project authors. All rights reserved. // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. package ast import ( "fmt" "os" "strconv" ) type token int const ( tokIllegal token = iota tokComment tokIdent tokInclude tokIncdir tokDefine tokResource tokString tokCExpr tokInt tokNewLine tokLParen tokRParen tokLBrack tokRBrack tokLBrace tokRBrace tokEq tokComma tokColon tokEOF ) var punctuation = [256]token{ '\n': tokNewLine, '(': tokLParen, ')': tokRParen, '[': tokLBrack, ']': tokRBrack, '{': tokLBrace, '}': tokRBrace, '=': tokEq, ',': tokComma, ':': tokColon, } var tok2str = [...]string{ tokIllegal: "ILLEGAL", tokComment: "comment", tokIdent: "identifier", tokInclude: "include", tokIncdir: "incdir", tokDefine: "define", tokResource: "resource", tokString: "string", tokCExpr: "CEXPR", tokInt: "int", tokNewLine: "NEWLINE", tokEOF: "EOF", } func init() { for ch, tok := range punctuation { if tok == tokIllegal { continue } tok2str[tok] = fmt.Sprintf("%q", ch) } } var keywords = map[string]token{ "include": tokInclude, "incdir": tokIncdir, "define": tokDefine, "resource": tokResource, } func (tok token) String() string { return tok2str[tok] } type scanner struct { data []byte filename string errorHandler ErrorHandler ch byte off int line int col int prev1 token prev2 token errors int } func newScanner(data []byte, filename string, errorHandler ErrorHandler) *scanner { if errorHandler == nil { errorHandler = LoggingHandler } s := &scanner{ data: data, filename: filename, errorHandler: errorHandler, off: -1, } s.next() return s } type ErrorHandler func(pos Pos, msg string) func LoggingHandler(pos Pos, msg string) { fmt.Fprintf(os.Stderr, "%v: %v\n", pos, msg) } func (pos Pos) String() string { return fmt.Sprintf("%v:%v:%v", pos.File, pos.Line, pos.Col) } func (s *scanner) Scan() (tok token, lit string, pos Pos) { s.skipWhitespace() pos = s.pos() switch { case s.ch == 0: tok = tokEOF s.next() case s.ch == '`': tok = tokCExpr lit = s.scanCExpr(pos) case s.prev2 == tokDefine && s.prev1 == tokIdent: // Note: the old form for C expressions, not really lexable. // TODO(dvyukov): get rid of this eventually. tok = tokCExpr for ; s.ch != '\n'; s.next() { } lit = string(s.data[pos.Off:s.off]) case s.ch == '#': tok = tokComment for s.next(); s.ch != '\n'; s.next() { } lit = string(s.data[pos.Off+1 : s.off]) case s.ch == '"' || s.ch == '<': tok = tokString lit = s.scanStr(pos) case s.ch >= '0' && s.ch <= '9' || s.ch == '-': tok = tokInt lit = s.scanInt(pos) case s.ch == '\'': tok = tokInt lit = s.scanChar(pos) case s.ch == '_' || s.ch >= 'a' && s.ch <= 'z' || s.ch >= 'A' && s.ch <= 'Z': tok, lit = s.scanIdent(pos) default: tok = punctuation[s.ch] if tok == tokIllegal { s.Error(pos, "illegal character %#U", s.ch) } s.next() } s.prev2 = s.prev1 s.prev1 = tok return } func (s *scanner) scanCExpr(pos Pos) string { for s.next(); s.ch != '`' && s.ch != '\n'; s.next() { } if s.ch == '\n' { s.Error(pos, "C expression is not terminated") return "" } lit := string(s.data[pos.Off+1 : s.off]) s.next() return lit } func (s *scanner) scanStr(pos Pos) string { // TODO(dvyukov): get rid of <...> strings, that's only includes closing := byte('"') if s.ch == '<' { closing = '>' } for s.next(); s.ch != closing; s.next() { if s.ch == 0 || s.ch == '\n' { s.Error(pos, "string literal is not terminated") return "" } } lit := string(s.data[pos.Off+1 : s.off]) for i := 0; i < len(lit); i++ { if lit[i] < 0x20 || lit[i] >= 0x80 { pos1 := pos pos1.Col += i + 1 pos1.Off += i + 1 s.Error(pos1, "illegal character %#U in string literal", lit[i]) break } } s.next() return lit } func (s *scanner) scanInt(pos Pos) string { for s.ch >= '0' && s.ch <= '9' || s.ch >= 'a' && s.ch <= 'f' || s.ch >= 'A' && s.ch <= 'F' || s.ch == 'x' || s.ch == '-' { s.next() } lit := string(s.data[pos.Off:s.off]) if _, err := strconv.ParseUint(lit, 10, 64); err == nil { return lit } if len(lit) > 1 && lit[0] == '-' { if _, err := strconv.ParseInt(lit, 10, 64); err == nil { return lit } } if len(lit) > 2 && lit[0] == '0' && lit[1] == 'x' { if _, err := strconv.ParseUint(lit[2:], 16, 64); err == nil { return lit } } s.Error(pos, fmt.Sprintf("bad integer %q", lit)) return "0" } func (s *scanner) scanChar(pos Pos) string { s.next() s.next() if s.ch != '\'' { s.Error(pos, "char literal is not terminated") return "0" } s.next() return string(s.data[pos.Off : pos.Off+3]) } func (s *scanner) scanIdent(pos Pos) (tok token, lit string) { tok = tokIdent for s.ch == '_' || s.ch == '$' || s.ch >= 'a' && s.ch <= 'z' || s.ch >= 'A' && s.ch <= 'Z' || s.ch >= '0' && s.ch <= '9' { s.next() } lit = string(s.data[pos.Off:s.off]) if key, ok := keywords[lit]; ok { tok = key } return } func (s *scanner) Error(pos Pos, msg string, args ...interface{}) { s.errors++ s.errorHandler(pos, fmt.Sprintf(msg, args...)) } func (s *scanner) Ok() bool { return s.errors == 0 } func (s *scanner) next() { s.off++ for s.off < len(s.data) && s.data[s.off] == '\r' { s.off++ } if s.off == len(s.data) { // Always emit NEWLINE before EOF. // Makes lots of things simpler as we always // want to treat EOF as NEWLINE as well. s.ch = '\n' s.off++ return } if s.off > len(s.data) { s.ch = 0 return } if s.off == 0 || s.data[s.off-1] == '\n' { s.line++ s.col = 0 } s.ch = s.data[s.off] s.col++ if s.ch == 0 { s.Error(s.pos(), "illegal character \\x00") } } func (s *scanner) skipWhitespace() { for s.ch == ' ' || s.ch == '\t' { s.next() } } func (s *scanner) pos() Pos { return Pos{ File: s.filename, Off: s.off, Line: s.line, Col: s.col, } }