• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2017, Google Inc.
2#
3# Permission to use, copy, modify, and/or distribute this software for any
4# purpose with or without fee is hereby granted, provided that the above
5# copyright notice and this permission notice appear in all copies.
6#
7# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15# This is a rough parser for x86-64 and ppc64le assembly designed to work with
16# https://github.com/pointlander/peg. delocate.go has a go:generate line for
17# rebuilding delocate.peg.go from this file.
18
19package main
20
21type Asm Peg {}
22
23AsmFile <- Statement* !.
24Statement <- WS? (Label / ((GlobalDirective /
25                            LocationDirective /
26                            LabelContainingDirective /
27                            Instruction /
28                            Directive /
29                            Comment / ) WS? ((Comment? '\n') / ';')))
30GlobalDirective <- (".global" / ".globl") WS SymbolName
31Directive <- '.' DirectiveName (WS Args)?
32DirectiveName <- [[A-Z0-9_]]+
33LocationDirective <- FileDirective / LocDirective
34FileDirective <- ".file" WS [^#\n]+
35LocDirective <- ".loc" WS [^#/\n]+
36Args <- Arg ((WS? ',' WS?) Arg)*
37Arg <- QuotedArg / [[0-9a-z%+\-*_@.]]*
38QuotedArg <- '"' QuotedText '"'
39QuotedText <- (EscapedChar / [^"])*
40LabelContainingDirective <- LabelContainingDirectiveName WS SymbolArgs
41LabelContainingDirectiveName <- ".xword" / ".word" / ".long" / ".set" / ".byte" / ".8byte" / ".4byte" / ".quad" / ".tc" / ".localentry" / ".size" / ".type" / ".uleb128" / ".sleb128"
42SymbolArgs <- SymbolArg ((WS? ',' WS?) SymbolArg)*
43SymbolShift <- ('<<' / '>>') WS? [0-9]+
44SymbolArg <- (OpenParen WS?)? (
45               Offset /
46               SymbolType /
47               (Offset / LocalSymbol / SymbolName / Dot) (WS? Operator WS? (Offset / LocalSymbol / SymbolName))* /
48               LocalSymbol TCMarker? /
49               SymbolName Offset /
50               SymbolName TCMarker?)
51             (WS? CloseParen)? (WS? SymbolShift)?
52OpenParen <- '('
53CloseParen <- ')'
54SymbolType <- [@%] ('function' / 'object')
55Dot <- '.'
56TCMarker <- '[TC]'
57EscapedChar <- '\\' .
58WS <- [ \t]+
59Comment <- ("//" / '#') [^\n]*
60Label <- (LocalSymbol / LocalLabel / SymbolName) ':'
61SymbolName <- [[A-Z._]][[A-Z.0-9$_]]*
62LocalSymbol <- '.L' [[A-Za-z.0-9$_]]+
63LocalLabel <- [0-9][0-9$]*
64LocalLabelRef <- [0-9][0-9$]*[bf]
65Instruction <- InstructionName (WS InstructionArg ((WS? ',' WS?) InstructionArg)*)?
66InstructionName <- [[A-Z]][[A-Z.0-9]]* [.+\-]?
67InstructionArg <- IndirectionIndicator? (ARMConstantTweak / RegisterOrConstant / LocalLabelRef / TOCRefHigh / TOCRefLow / GOTLocation / GOTSymbolOffset / MemoryRef) AVX512Token*
68GOTLocation <- '$_GLOBAL_OFFSET_TABLE_-' LocalSymbol
69GOTSymbolOffset <- ('$' SymbolName '@GOT' 'OFF'?) / (":got:" SymbolName)
70AVX512Token <- WS? '{' '%'? [0-9a-z]* '}'
71TOCRefHigh <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@ha"
72TOCRefLow <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@l"
73IndirectionIndicator <- '*'
74RegisterOrConstant <- (('%'[[A-Z]][[A-Z0-9]]*) /
75                       ('$'? ((Offset Offset) / Offset)) /
76                       ('#' Offset ('*' [0-9]+ ('-' [0-9] [0-9]*)?)? ) /
77                       ('#' '~'? '(' [0-9] WS? "<<" WS? [0-9] ')' ) /
78                       ARMRegister)
79                      ![fb:(+\-]
80ARMConstantTweak <- ("lsl" / "sxtw" / "sxtb" / "uxtw" / "uxtb" / "lsr" / "ror" / "asr") (WS '#' Offset)?
81ARMRegister <- "sp" / ([xwdqs] [0-9] [0-9]?) / "xzr" / "wzr" / ARMVectorRegister / ('{' WS? ARMVectorRegister (',' WS? ARMVectorRegister)* WS? '}' ('[' [0-9] [0-9]? ']')? )
82ARMVectorRegister <- "v" [0-9] [0-9]? ('.' [0-9]* [bsdhq] ('[' [0-9] [0-9]? ']')? )?
83# Compilers only output a very limited number of expression forms. Rather than
84# implement a full expression parser, this enumerate those forms plus a few
85# that appear in our hand-written assembly.
86MemoryRef <- (SymbolRef BaseIndexScale /
87              SymbolRef /
88              Low12BitsSymbolRef /
89              Offset* BaseIndexScale /
90              SegmentRegister Offset BaseIndexScale /
91              SegmentRegister BaseIndexScale /
92              SegmentRegister Offset /
93              ARMBaseIndexScale /
94              BaseIndexScale)
95SymbolRef <- (Offset* '+')? (LocalSymbol / SymbolName) Offset* ('@' Section Offset*)?
96Low12BitsSymbolRef <- ":lo12:" (LocalSymbol / SymbolName) Offset?
97ARMBaseIndexScale <- '[' ARMRegister (',' WS? (('#' Offset (('*' [0-9]+) / ('*' '(' [0-9]+ Operator [0-9]+ ')') / (('+' [0-9]+)*))? ) / ARMGOTLow12 / Low12BitsSymbolRef / ARMRegister) (',' WS? ARMConstantTweak)?)? ']' ARMPostincrement?
98ARMGOTLow12 <- ":got_lo12:" SymbolName
99ARMPostincrement <- '!'
100BaseIndexScale <- '(' RegisterOrConstant? WS? (',' WS? RegisterOrConstant WS? (',' [0-9]+)? )? ')'
101Operator <- [+\-]
102Offset <- '+'? '-'? (("0b" [01]+) / ("0x" [[0-9A-F]]+) / [0-9]+)
103Section <- [[A-Z@]]+
104SegmentRegister <- '%' [c-gs] 's:'
105