1# Copyright (c) 2017, Google Inc. 2# 3# Permission to use, copy, modify, and/or distribute this software for any 4# purpose with or without fee is hereby granted, provided that the above 5# copyright notice and this permission notice appear in all copies. 6# 7# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ 14 15# This is a rough parser for x86-64 and ppc64le assembly designed to work with 16# https://github.com/pointlander/peg. delocate.go has a go:generate line for 17# rebuilding delocate.peg.go from this file. 18 19package main 20 21type Asm Peg {} 22 23AsmFile <- Statement* !. 24Statement <- WS? (Label / ((GlobalDirective / 25 LocationDirective / 26 LabelContainingDirective / 27 Instruction / 28 Directive / 29 Comment / ) WS? ((Comment? '\n') / ';'))) 30GlobalDirective <- (".global" / ".globl") WS SymbolName 31Directive <- '.' DirectiveName (WS Args)? 32DirectiveName <- [[A-Z0-9_]]+ 33LocationDirective <- (".file" / ".loc") WS [^#\n]+ 34Args <- Arg ((WS? ',' WS?) Arg)* 35Arg <- QuotedArg / [[0-9a-z%+\-*_@.]]* 36QuotedArg <- '"' QuotedText '"' 37QuotedText <- (EscapedChar / [^"])* 38LabelContainingDirective <- LabelContainingDirectiveName WS SymbolArgs 39LabelContainingDirectiveName <- ".long" / ".set" / ".8byte" / ".4byte" / ".quad" / ".tc" / ".localentry" / ".size" / ".type" 40SymbolArgs <- SymbolArg ((WS? ',' WS?) SymbolArg)* 41SymbolArg <- Offset / 42 SymbolType / 43 (Offset / LocalSymbol / SymbolName / Dot) WS? Operator WS? (Offset / LocalSymbol / SymbolName) / 44 LocalSymbol TCMarker? / 45 SymbolName Offset / 46 SymbolName TCMarker? 47SymbolType <- '@function' / '@object' 48Dot <- '.' 49TCMarker <- '[TC]' 50EscapedChar <- '\\' . 51WS <- [ \t]+ 52Comment <- '#' [^\n]* 53Label <- (LocalSymbol / LocalLabel / SymbolName) ':' 54SymbolName <- [[A-Z._]][[A-Z.0-9$_]]* 55LocalSymbol <- '.L' [[A-Z.0-9$_]]+ 56LocalLabel <- [0-9][0-9$]* 57LocalLabelRef <- [0-9][0-9$]*[bf] 58Instruction <- InstructionName (WS InstructionArg ((WS? ',' WS?) InstructionArg)*)? 59InstructionName <- [[A-Z]][[A-Z0-9]]* [.+\-]? 60InstructionArg <- IndirectionIndicator? (RegisterOrConstant / LocalLabelRef / TOCRefHigh / TOCRefLow / MemoryRef) AVX512Token* 61AVX512Token <- WS? '{' '%'? [0-9a-z]* '}' 62TOCRefHigh <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@ha" 63TOCRefLow <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@l" 64IndirectionIndicator <- '*' 65RegisterOrConstant <- (('%'[[A-Z]][[A-Z0-9]]*) / ('$'? ((Offset Offset) / Offset))) ![fb:(+\-] 66# Compilers only output a very limited number of expression forms. Rather than 67# implement a full expression parser, this enumerate those forms plus a few 68# that appear in our hand-written assembly. 69MemoryRef <- (SymbolRef BaseIndexScale / 70 SymbolRef / 71 Offset* BaseIndexScale / 72 SegmentRegister Offset BaseIndexScale / 73 SegmentRegister BaseIndexScale / 74 SegmentRegister Offset / 75 BaseIndexScale) 76SymbolRef <- (Offset* '+')? (LocalSymbol / SymbolName) Offset* ('@' Section Offset*)? 77BaseIndexScale <- '(' RegisterOrConstant? WS? (',' WS? RegisterOrConstant WS? (',' [0-9]+)? )? ')' 78Operator <- [+\-] 79Offset <- '+'? '-'? (("0b" [01]+) / ("0x" [[0-9A-F]]+) / [0-9]+) 80Section <- [[A-Z@]]+ 81SegmentRegister <- '%' [c-gs] 's:' 82