1# Copyright (c) 2017, Google Inc. 2# 3# Permission to use, copy, modify, and/or distribute this software for any 4# purpose with or without fee is hereby granted, provided that the above 5# copyright notice and this permission notice appear in all copies. 6# 7# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ 14 15# This is a rough parser for x86-64 and aarch64 assembly designed to work with 16# https://github.com/pointlander/peg. delocate.go has a go:generate line for 17# rebuilding delocate.peg.go from this file. 18 19# To regenerate delocate.peg.go: 20# 21# go install github.com/pointlander/peg@latest 22# ~/go/bin/peg <path-to-this-file> 23# 24# this will generate delocate.peg.go next to delocate.peg. 25 26package main 27 28type Asm Peg {} 29 30AsmFile <- Statement* !. 31Statement <- WS? (Label / ((GlobalDirective / 32 LocationDirective / 33 LabelContainingDirective / 34 Instruction / 35 Directive / 36 Comment / ) WS? ((Comment? '\n') / ';'))) 37GlobalDirective <- (".global" / ".globl") WS SymbolName 38Directive <- '.' DirectiveName (WS Args)? 39DirectiveName <- [[A-Z0-9_]]+ 40LocationDirective <- FileDirective / LocDirective 41FileDirective <- ".file" WS [^#\n]+ 42LocDirective <- ".loc" WS [^#/\n]+ 43Args <- Arg ((WS? ',' WS?) Arg)* 44Arg <- QuotedArg / [[0-9a-z%+\-*_@.]]* 45QuotedArg <- '"' QuotedText '"' 46QuotedText <- (EscapedChar / [^"])* 47LabelContainingDirective <- LabelContainingDirectiveName WS SymbolArgs 48LabelContainingDirectiveName <- ".xword" / ".word" / ".long" / ".set" / ".byte" / ".8byte" / ".4byte" / ".quad" / ".tc" / ".localentry" / ".size" / ".type" / ".uleb128" / ".sleb128" 49SymbolArgs <- SymbolArg ((WS? ',' WS?) SymbolArg)* 50SymbolShift <- ('<<' / '>>') WS? [0-9]+ 51SymbolArg <- (OpenParen WS?)? ( 52 Offset / 53 SymbolType / 54 (Offset / LocalSymbol / SymbolName / Dot) (WS? Operator WS? (Offset / LocalSymbol / SymbolName))* / 55 LocalSymbol TCMarker? / 56 SymbolName Offset / 57 SymbolName TCMarker?) 58 (WS? CloseParen)? (WS? SymbolShift)? 59OpenParen <- '(' 60CloseParen <- ')' 61SymbolType <- [@%] ('function' / 'object') 62Dot <- '.' 63TCMarker <- '[TC]' 64EscapedChar <- '\\' . 65WS <- [ \t]+ 66Comment <- ("//" / '#') [^\n]* 67Label <- (LocalSymbol / LocalLabel / SymbolName) ':' 68SymbolName <- [[A-Z._]][[A-Z.0-9$_]]* 69LocalSymbol <- '.L' [[A-Za-z.0-9$_]]+ 70LocalLabel <- [0-9][0-9$]* 71LocalLabelRef <- [0-9][0-9$]*[bf] 72Instruction <- InstructionName (WS InstructionArg ((WS? ',' WS?) InstructionArg)*)? 73InstructionName <- [[A-Z]][[A-Z.0-9]]* [.+\-]? 74InstructionArg <- IndirectionIndicator? (ARMConstantTweak / RegisterOrConstant / LocalLabelRef / TOCRefHigh / TOCRefLow / GOTLocation / GOTSymbolOffset / MemoryRef) AVX512Token* 75GOTLocation <- '$_GLOBAL_OFFSET_TABLE_-' LocalSymbol 76GOTSymbolOffset <- ('$' SymbolName '@GOT' 'OFF'?) / (":got:" SymbolName) 77AVX512Token <- WS? '{' '%'? [0-9a-z]* '}' 78TOCRefHigh <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@ha" 79TOCRefLow <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@l" 80IndirectionIndicator <- '*' 81RegisterOrConstant <- (('%'[[A-Z]][[A-Z0-9]]*) / 82 ('$'? ((Offset Offset) / Offset)) / 83 ('#' Offset ('*' [0-9]+ ('-' [0-9] [0-9]*)?)? ) / 84 ('#' '~'? '(' [0-9] WS? "<<" WS? [0-9] ')' ) / 85 ARMRegister) 86 ![fb:(+\-] 87ARMConstantTweak <- (([us] "xt" [xwhb]) / "lsl" / "lsr" / "ror" / "asr") (WS '#' Offset)? 88ARMRegister <- "sp" / ([xwdqshb] [0-9] [0-9]?) / "xzr" / "wzr" / "NZCV" / ARMVectorRegister / ('{' WS? ARMVectorRegister (',' WS? ARMVectorRegister)* WS? '}' ('[' [0-9] [0-9]? ']')? ) 89ARMVectorRegister <- "v" [0-9] [0-9]? ('.' [0-9]* [bsdhq] ('[' [0-9] [0-9]? ']')? )? 90# Compilers only output a very limited number of expression forms. Rather than 91# implement a full expression parser, this enumerate those forms plus a few 92# that appear in our hand-written assembly. 93MemoryRef <- (SymbolRef BaseIndexScale / 94 SymbolRef / 95 Low12BitsSymbolRef / 96 Offset* BaseIndexScale / 97 SegmentRegister Offset BaseIndexScale / 98 SegmentRegister BaseIndexScale / 99 SegmentRegister Offset / 100 ARMBaseIndexScale / 101 BaseIndexScale) 102SymbolRef <- (Offset* '+')? (LocalSymbol / SymbolName) Offset* ('@' Section Offset*)? 103Low12BitsSymbolRef <- ":lo12:" (LocalSymbol / SymbolName) Offset? 104ARMBaseIndexScale <- '[' ARMRegister (',' WS? (('#' Offset (('*' [0-9]+) / ('*' '(' [0-9]+ Operator [0-9]+ ')') / (('+' [0-9]+)*))? ) / ARMGOTLow12 / Low12BitsSymbolRef / ARMRegister) (',' WS? ARMConstantTweak)?)? ']' ARMPostincrement? 105ARMGOTLow12 <- ":got_lo12:" SymbolName 106ARMPostincrement <- '!' 107BaseIndexScale <- '(' RegisterOrConstant? WS? (',' WS? RegisterOrConstant WS? (',' [0-9]+)? )? ')' 108Operator <- [+\-] 109Offset <- '+'? '-'? (("0b" [01]+) / ("0x" [[0-9A-F]]+) / [0-9]+) 110Section <- [[A-Z@]]+ 111SegmentRegister <- '%' [c-gs] 's:' 112