// Copyright (C) 2019 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package parser implements a SPIR-V assembly parser. package parser import ( "fmt" "io" "log" "strings" "unicode" "unicode/utf8" "github.com/KhronosGroup/SPIRV-Tools/utils/vscode/src/schema" ) // Type is an enumerator of token types. type Type int // Type enumerators const ( Ident Type = iota // Foo PIdent // %32, %foo Integer Float String Operator Comment Newline ) func (t Type) String() string { switch t { case Ident: return "Ident" case PIdent: return "PIdent" case Integer: return "Integer" case Float: return "Float" case String: return "String" case Operator: return "Operator" case Comment: return "Comment" default: return "" } } // Token represents a single lexed token. type Token struct { Type Type Range Range } func (t Token) String() string { return fmt.Sprintf("{%v %v}", t.Type, t.Range) } // Text returns the tokens text from the source. func (t Token) Text(lines []string) string { return t.Range.Text(lines) } // Range represents an interval in a text file. type Range struct { Start Position End Position } func (r Range) String() string { return fmt.Sprintf("[%v %v]", r.Start, r.End) } // Text returns the text for the given Range in the provided lines. func (r Range) Text(lines []string) string { sl, sc := r.Start.Line-1, r.Start.Column-1 if sl < 0 || sc < 0 || sl > len(lines) || sc > len(lines[sl]) { return fmt.Sprintf("", r.Start) } el, ec := r.End.Line-1, r.End.Column-1 if el < 0 || ec < 0 || el > len(lines) || ec > len(lines[sl]) { return fmt.Sprintf("", r.End) } sb := strings.Builder{} if sl != el { sb.WriteString(lines[sl][sc:]) for l := sl + 1; l < el; l++ { sb.WriteString(lines[l]) } sb.WriteString(lines[el][:ec]) } else { sb.WriteString(lines[sl][sc:ec]) } return sb.String() } // Contains returns true if p is in r. func (r Range) Contains(p Position) bool { return !(p.LessThan(r.Start) || p.GreaterThan(r.End)) } func (r *Range) grow(o Range) { if !r.Start.IsValid() || o.Start.LessThan(r.Start) { r.Start = o.Start } if !r.End.IsValid() || o.End.GreaterThan(r.End) { r.End = o.End } } // Position holds a line and column position in a text file. type Position struct { Line, Column int } func (p Position) String() string { return fmt.Sprintf("%v:%v", p.Line, p.Column) } // IsValid returns true if the position has a line and column greater than 1. func (p Position) IsValid() bool { return p.Line > 0 && p.Column > 0 } // LessThan returns true iff o is before p. func (p Position) LessThan(o Position) bool { switch { case !p.IsValid() || !o.IsValid(): return false case p.Line < o.Line: return true case p.Line > o.Line: return false case p.Column < o.Column: return true default: return false } } // GreaterThan returns true iff o is greater than p. func (p Position) GreaterThan(o Position) bool { switch { case !p.IsValid() || !o.IsValid(): return false case p.Line > o.Line: return true case p.Line < o.Line: return false case p.Column > o.Column: return true default: return false } } type lexer struct { source string lexerState diags []Diagnostic e error } type lexerState struct { offset int // byte offset in source toks []*Token // all the lexed tokens pos Position // current position } // err appends an fmt.Printf style error into l.diags for the given token. func (l *lexer) err(tok *Token, msg string, args ...interface{}) { rng := Range{} if tok != nil { rng = tok.Range } l.diags = append(l.diags, Diagnostic{ Range: rng, Severity: SeverityError, Message: fmt.Sprintf(msg, args...), }) } // next returns the next rune, or io.EOF if the last rune has already been // consumed. func (l *lexer) next() rune { if l.offset >= len(l.source) { l.e = io.EOF return 0 } r, n := utf8.DecodeRuneInString(l.source[l.offset:]) l.offset += n if n == 0 { l.e = io.EOF return 0 } if r == '\n' { l.pos.Line++ l.pos.Column = 1 } else { l.pos.Column++ } return r } // save returns the current lexerState. func (l *lexer) save() lexerState { return l.lexerState } // restore restores the current lexer state with s. func (l *lexer) restore(s lexerState) { l.lexerState = s } // pident processes the PIdent token at the current position. // The lexer *must* know the next token is a PIdent before calling. func (l *lexer) pident() { tok := &Token{Type: PIdent, Range: Range{Start: l.pos, End: l.pos}} if r := l.next(); r != '%' { log.Fatalf("lexer expected '%%', got '%v'", r) return } for l.e == nil { s := l.save() r := l.next() if !isAlphaNumeric(r) && r != '_' { l.restore(s) break } } tok.Range.End = l.pos l.toks = append(l.toks, tok) } // numberOrIdent processes the Ident, Float or Integer token at the current // position. func (l *lexer) numberOrIdent() { const Unknown Type = -1 tok := &Token{Type: Unknown, Range: Range{Start: l.pos, End: l.pos}} loop: for l.e == nil { s := l.save() r := l.next() switch { case r == '-', r == '+', isNumeric(r): continue case isAlpha(r), r == '_': switch tok.Type { case Unknown: tok.Type = Ident case Float, Integer: l.err(tok, "invalid number") return } case r == '.': switch tok.Type { case Unknown: tok.Type = Float default: l.restore(s) break loop } default: if tok.Type == Unknown { tok.Type = Integer } l.restore(s) break loop } } tok.Range.End = l.pos l.toks = append(l.toks, tok) } // string processes the String token at the current position. // The lexer *must* know the next token is a String before calling. func (l *lexer) string() { tok := &Token{Type: String, Range: Range{Start: l.pos, End: l.pos}} if r := l.next(); r != '"' { log.Fatalf("lexer expected '\"', got '%v'", r) return } escape := false for l.e == nil { switch l.next() { case '"': if !escape { tok.Range.End = l.pos l.toks = append(l.toks, tok) return } case '\\': escape = !escape default: escape = false } } } // operator processes the Operator token at the current position. // The lexer *must* know the next token is a Operator before calling. func (l *lexer) operator() { tok := &Token{Type: Operator, Range: Range{Start: l.pos, End: l.pos}} for l.e == nil { switch l.next() { case '=', '|': tok.Range.End = l.pos l.toks = append(l.toks, tok) return } } } // lineComment processes the Comment token at the current position. // The lexer *must* know the next token is a Comment before calling. func (l *lexer) lineComment() { tok := &Token{Type: Comment, Range: Range{Start: l.pos, End: l.pos}} if r := l.next(); r != ';' { log.Fatalf("lexer expected ';', got '%v'", r) return } for l.e == nil { s := l.save() switch l.next() { case '\n': l.restore(s) tok.Range.End = l.pos l.toks = append(l.toks, tok) return } } } // newline processes the Newline token at the current position. // The lexer *must* know the next token is a Newline before calling. func (l *lexer) newline() { tok := &Token{Type: Newline, Range: Range{Start: l.pos, End: l.pos}} if r := l.next(); r != '\n' { log.Fatalf("lexer expected '\n', got '%v'", r) return } tok.Range.End = l.pos l.toks = append(l.toks, tok) } // lex returns all the tokens and diagnostics after lexing source. func lex(source string) ([]*Token, []Diagnostic, error) { l := lexer{source: source, lexerState: lexerState{pos: Position{1, 1}}} lastPos := Position{} for l.e == nil { // Integrity check that the parser is making progress if l.pos == lastPos { log.Panicf("Parsing stuck at %v", l.pos) } lastPos = l.pos s := l.save() r := l.next() switch { case r == '%': l.restore(s) l.pident() case r == '+' || r == '-' || r == '_' || isAlphaNumeric(r): l.restore(s) l.numberOrIdent() case r == '"': l.restore(s) l.string() case r == '=', r == '|': l.restore(s) l.operator() case r == ';': l.restore(s) l.lineComment() case r == '\n': l.restore(s) l.newline() } } if l.e != nil && l.e != io.EOF { return nil, nil, l.e } return l.toks, l.diags, nil } func isNumeric(r rune) bool { return unicode.IsDigit(r) } func isAlpha(r rune) bool { return unicode.IsLetter(r) } func isAlphaNumeric(r rune) bool { return isAlpha(r) || isNumeric(r) } type parser struct { lines []string // all source lines toks []*Token // all tokens diags []Diagnostic // parser emitted diagnostics idents map[string]*Identifier // identifiers by name mappings map[*Token]interface{} // tokens to semantic map extInstImports map[string]schema.OpcodeMap // extension imports by identifier insts []*Instruction // all instructions } func (p *parser) parse() error { for i := 0; i < len(p.toks); { if p.newline(i) || p.comment(i) { i++ continue } if n := p.instruction(i); n > 0 { i += n } else { p.unexpected(i) i++ } } return nil } // instruction parses the instruction starting at the i'th token. func (p *parser) instruction(i int) (n int) { inst := &Instruction{} switch { case p.opcode(i) != nil: inst.Opcode = p.opcode(i) inst.Tokens = []*Token{p.tok(i)} p.mappings[p.tok(i)] = inst n++ case p.opcode(i+2) != nil: // try '%id' '=' inst.Result, inst.Opcode = p.pident(i), p.opcode(i+2) if inst.Result == nil || p.operator(i+1) != "=" { return 0 } n += 3 inst.Tokens = []*Token{p.tok(i), p.tok(i + 1), p.tok(i + 2)} p.mappings[p.tok(i+2)] = inst default: return } expectsResult := len(inst.Opcode.Operands) > 0 && IsResult(inst.Opcode.Operands[0].Kind) operands := inst.Opcode.Operands switch { case inst.Result != nil && !expectsResult: p.err(inst.Result, "'%s' does not have a result", inst.Opcode.Opname) return case inst.Result == nil && expectsResult: p.err(p.tok(i), "'%s' expects a result", inst.Opcode.Opname) return case inst.Result != nil && expectsResult: // Check the result is of the correct type o := inst.Opcode.Operands[0] p.operand(o.Name, o.Kind, i, false) operands = operands[1:] p.addIdentDef(inst.Result.Text(p.lines), inst, p.tok(i)) } processOperand := func(o schema.Operand) bool { if p.newline(i + n) { return false } switch o.Quantifier { case schema.Once: if op, c := p.operand(o.Name, o.Kind, i+n, false); op != nil { inst.Tokens = append(inst.Tokens, op.Tokens...) n += c } case schema.ZeroOrOnce: if op, c := p.operand(o.Name, o.Kind, i+n, true); op != nil { inst.Tokens = append(inst.Tokens, op.Tokens...) n += c } case schema.ZeroOrMany: for !p.newline(i + n) { if op, c := p.operand(o.Name, o.Kind, i+n, true); op != nil { inst.Tokens = append(inst.Tokens, op.Tokens...) n += c } else { return false } } } return true } for _, o := range operands { if !processOperand(o) { break } if inst.Opcode == schema.OpExtInst && n == 4 { extImportTok, extNameTok := p.tok(i+n), p.tok(i+n+1) extImport := extImportTok.Text(p.lines) if extOpcodes, ok := p.extInstImports[extImport]; ok { extName := extNameTok.Text(p.lines) if extOpcode, ok := extOpcodes[extName]; ok { n += 2 // skip ext import, ext name for _, o := range extOpcode.Operands { if !processOperand(o) { break } } } else { p.err(extNameTok, "Unknown extension opcode '%s'", extName) } } else { p.err(extImportTok, "Expected identifier to OpExtInstImport") } } } for _, t := range inst.Tokens { inst.Range.grow(t.Range) } p.insts = append(p.insts, inst) if inst.Opcode == schema.OpExtInstImport && len(inst.Tokens) >= 4 { // Instruction is a OpExtInstImport. Keep track of this. extTok := inst.Tokens[3] extName := strings.Trim(extTok.Text(p.lines), `"`) extOpcodes, ok := schema.ExtOpcodes[extName] if !ok { p.err(extTok, "Unknown extension '%s'", extName) } extImport := inst.Result.Text(p.lines) p.extInstImports[extImport] = extOpcodes } return } // operand parses the operand with the name n, kind k, starting at the i'th // token. func (p *parser) operand(n string, k *schema.OperandKind, i int, optional bool) (*Operand, int) { tok := p.tok(i) if tok == nil { return nil, 0 } op := &Operand{ Name: n, Kind: k, Tokens: []*Token{tok}, } p.mappings[tok] = op switch k.Category { case schema.OperandCategoryBitEnum, schema.OperandCategoryValueEnum: s := tok.Text(p.lines) for _, e := range k.Enumerants { if e.Enumerant == s { count := 1 for _, param := range e.Parameters { p, c := p.operand(param.Name, param.Kind, i+count, false) if p != nil { op.Tokens = append(op.Tokens, p.Tokens...) op.Parameters = append(op.Parameters, p) } count += c } // Handle bitfield '|' chains if p.tok(i+count).Text(p.lines) == "|" { count++ // '|' p, c := p.operand(n, k, i+count, false) if p != nil { op.Tokens = append(op.Tokens, p.Tokens...) op.Parameters = append(op.Parameters, p) } count += c } return op, count } } if !optional { p.err(p.tok(i), "invalid operand value '%s'", s) } return nil, 0 case schema.OperandCategoryID: id := p.pident(i) if id != nil { p.addIdentRef(p.tok(i)) return op, 1 } if !optional { p.err(p.tok(i), "operand requires id, got '%s'", tok.Text(p.lines)) } return nil, 0 case schema.OperandCategoryLiteral: switch tok.Type { case String, Integer, Float, Ident: return op, 1 } if !optional { p.err(p.tok(i), "operand requires literal, got '%s'", tok.Text(p.lines)) } return nil, 0 case schema.OperandCategoryComposite: n := 1 for _, b := range k.Bases { o, c := p.operand(b.Kind, b, i+n, optional) if o != nil { op.Tokens = append(op.Tokens, o.Tokens...) } n += c } return op, n default: p.err(p.tok(i), "OperandKind '%s' has unexpected category '%s'", k.Kind, k.Category) return nil, 0 } } // tok returns the i'th token, or nil if i is out of bounds. func (p *parser) tok(i int) *Token { if i < 0 || i >= len(p.toks) { return nil } return p.toks[i] } // opcode returns the schema.Opcode for the i'th token, or nil if the i'th token // does not represent an opcode. func (p *parser) opcode(i int) *schema.Opcode { if tok := p.ident(i); tok != nil { name := tok.Text(p.lines) if inst, found := schema.Opcodes[name]; found { return inst } } return nil } // operator returns the operator for the i'th token, or and empty string if the // i'th token is not an operator. func (p *parser) operator(i int) string { if tok := p.tok(i); tok != nil && tok.Type == Operator { return tok.Text(p.lines) } return "" } // ident returns the i'th token if it is an Ident, otherwise nil. func (p *parser) ident(i int) *Token { if tok := p.tok(i); tok != nil && tok.Type == Ident { return tok } return nil } // pident returns the i'th token if it is an PIdent, otherwise nil. func (p *parser) pident(i int) *Token { if tok := p.tok(i); tok != nil && tok.Type == PIdent { return tok } return nil } // comment returns true if the i'th token is a Comment, otherwise false. func (p *parser) comment(i int) bool { if tok := p.tok(i); tok != nil && tok.Type == Comment { return true } return false } // newline returns true if the i'th token is a Newline, otherwise false. func (p *parser) newline(i int) bool { if tok := p.tok(i); tok != nil && tok.Type == Newline { return true } return false } // unexpected emits an 'unexpected token error' for the i'th token. func (p *parser) unexpected(i int) { p.err(p.toks[i], "syntax error: unexpected '%s'", p.toks[i].Text(p.lines)) } // addIdentDef records the token definition for the instruction inst with the // given id. func (p *parser) addIdentDef(id string, inst *Instruction, def *Token) { i, existing := p.idents[id] if !existing { i = &Identifier{} p.idents[id] = i } if i.Definition == nil { i.Definition = inst } else { p.err(def, "id '%v' redeclared", id) } } // addIdentRef adds a identifier reference for the token ref. func (p *parser) addIdentRef(ref *Token) { id := ref.Text(p.lines) i, existing := p.idents[id] if !existing { i = &Identifier{} p.idents[id] = i } i.References = append(i.References, ref) } // err appends an fmt.Printf style error into l.diags for the given token. func (p *parser) err(tok *Token, msg string, args ...interface{}) { rng := Range{} if tok != nil { rng = tok.Range } p.diags = append(p.diags, Diagnostic{ Range: rng, Severity: SeverityError, Message: fmt.Sprintf(msg, args...), }) } // Parse parses the SPIR-V assembly string source, returning the parse results. func Parse(source string) (Results, error) { toks, diags, err := lex(source) if err != nil { return Results{}, err } lines := strings.SplitAfter(source, "\n") p := parser{ lines: lines, toks: toks, idents: map[string]*Identifier{}, mappings: map[*Token]interface{}{}, extInstImports: map[string]schema.OpcodeMap{}, } if err := p.parse(); err != nil { return Results{}, err } diags = append(diags, p.diags...) return Results{ Lines: lines, Tokens: toks, Diagnostics: p.diags, Identifiers: p.idents, Mappings: p.mappings, }, nil } // IsResult returns true if k is used to store the result of an instruction. func IsResult(k *schema.OperandKind) bool { switch k { case schema.OperandKindIdResult, schema.OperandKindIdResultType: return true default: return false } } // Results holds the output of Parse(). type Results struct { Lines []string Tokens []*Token Diagnostics []Diagnostic Identifiers map[string]*Identifier // identifiers by name Mappings map[*Token]interface{} // tokens to semantic map } // Instruction describes a single instruction instance type Instruction struct { Tokens []*Token // all the tokens that make up the instruction Result *Token // the token that represents the result of the instruction, or nil Operands []*Operand // the operands of the instruction Range Range // the textual range of the instruction Opcode *schema.Opcode // the opcode for the instruction } // Operand describes a single operand instance type Operand struct { Name string // name of the operand Kind *schema.OperandKind // kind of the operand Tokens []*Token // all the tokens that make up the operand Parameters []*Operand // all the parameters for the operand } // Identifier describes a single, unique SPIR-V identifier (i.e. %32) type Identifier struct { Definition *Instruction // where the identifier was defined References []*Token // all the places the identifier was referenced } // Severity is an enumerator of diagnostic severities type Severity int // Severity levels const ( SeverityError Severity = iota SeverityWarning SeverityInformation SeverityHint ) // Diagnostic holds a single diagnostic message that was generated while // parsing. type Diagnostic struct { Range Range Severity Severity Message string }