• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright (c) 2017, Google Inc.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15// delocate performs several transformations of textual assembly code. See
16// crypto/fipsmodule/FIPS.md for an overview.
17package main
18
19import (
20	"errors"
21	"flag"
22	"fmt"
23	"io/ioutil"
24	"os"
25	"sort"
26	"strconv"
27	"strings"
28
29	"boringssl.googlesource.com/boringssl/util/ar"
30	"boringssl.googlesource.com/boringssl/util/fipstools/fipscommon"
31)
32
33// inputFile represents a textual assembly file.
34type inputFile struct {
35	path string
36	// index is a unique identifer given to this file. It's used for
37	// mapping local symbols.
38	index int
39	// isArchive indicates that the input should be processed as an ar
40	// file.
41	isArchive bool
42	// contents contains the contents of the file.
43	contents string
44	// ast points to the head of the syntax tree.
45	ast *node32
46}
47
48type stringWriter interface {
49	WriteString(string) (int, error)
50}
51
52type processorType int
53
54const (
55	ppc64le processorType = iota + 1
56	x86_64
57	aarch64
58)
59
60// delocation holds the state needed during a delocation operation.
61type delocation struct {
62	processor processorType
63	output    stringWriter
64	// commentIndicator starts a comment, e.g. "//" or "#"
65	commentIndicator string
66
67	// symbols is the set of symbols defined in the module.
68	symbols map[string]struct{}
69	// localEntrySymbols is the set of symbols with .localentry directives.
70	localEntrySymbols map[string]struct{}
71	// redirectors maps from out-call symbol name to the name of a
72	// redirector function for that symbol. E.g. “memcpy” ->
73	// “bcm_redirector_memcpy”.
74	redirectors map[string]string
75	// bssAccessorsNeeded maps from a BSS symbol name to the symbol that
76	// should be used to reference it. E.g. “P384_data_storage” ->
77	// “P384_data_storage”.
78	bssAccessorsNeeded map[string]string
79	// tocLoaders is a set of symbol names for which TOC helper functions
80	// are required. (ppc64le only.)
81	tocLoaders map[string]struct{}
82	// gotExternalsNeeded is a set of symbol names for which we need
83	// “delta” symbols: symbols that contain the offset from their location
84	// to the memory in question.
85	gotExternalsNeeded map[string]struct{}
86	// gotDeltaNeeded is true if the code needs to load the value of
87	// _GLOBAL_OFFSET_TABLE_.
88	gotDeltaNeeded bool
89	// gotOffsetsNeeded contains the symbols whose @GOT offsets are needed.
90	gotOffsetsNeeded map[string]struct{}
91	// gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed.
92	gotOffOffsetsNeeded map[string]struct{}
93
94	currentInput inputFile
95}
96
97func (d *delocation) contents(node *node32) string {
98	return d.currentInput.contents[node.begin:node.end]
99}
100
101// writeNode writes out an AST node.
102func (d *delocation) writeNode(node *node32) {
103	if _, err := d.output.WriteString(d.contents(node)); err != nil {
104		panic(err)
105	}
106}
107
108func (d *delocation) writeCommentedNode(node *node32) {
109	line := d.contents(node)
110	if _, err := d.output.WriteString(d.commentIndicator + " WAS " + strings.TrimSpace(line) + "\n"); err != nil {
111		panic(err)
112	}
113}
114
115func locateError(err error, with *node32, in inputFile) error {
116	posMap := translatePositions([]rune(in.contents), []int{int(with.begin)})
117	var line int
118	for _, pos := range posMap {
119		line = pos.line
120	}
121
122	return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err)
123}
124
125func (d *delocation) processInput(input inputFile) (err error) {
126	d.currentInput = input
127
128	var origStatement *node32
129	defer func() {
130		if err := recover(); err != nil {
131			panic(locateError(fmt.Errorf("%s", err), origStatement, input))
132		}
133	}()
134
135	for statement := input.ast.up; statement != nil; statement = statement.next {
136		assertNodeType(statement, ruleStatement)
137		origStatement = statement
138
139		node := skipWS(statement.up)
140		if node == nil {
141			d.writeNode(statement)
142			continue
143		}
144
145		switch node.pegRule {
146		case ruleGlobalDirective, ruleComment, ruleLocationDirective:
147			d.writeNode(statement)
148		case ruleDirective:
149			statement, err = d.processDirective(statement, node.up)
150		case ruleLabelContainingDirective:
151			statement, err = d.processLabelContainingDirective(statement, node.up)
152		case ruleLabel:
153			statement, err = d.processLabel(statement, node.up)
154		case ruleInstruction:
155			switch d.processor {
156			case x86_64:
157				statement, err = d.processIntelInstruction(statement, node.up)
158			case ppc64le:
159				statement, err = d.processPPCInstruction(statement, node.up)
160			case aarch64:
161				statement, err = d.processAarch64Instruction(statement, node.up)
162			default:
163				panic("unknown processor")
164			}
165		default:
166			panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule]))
167		}
168
169		if err != nil {
170			return locateError(err, origStatement, input)
171		}
172	}
173
174	return nil
175}
176
177func (d *delocation) processDirective(statement, directive *node32) (*node32, error) {
178	assertNodeType(directive, ruleDirectiveName)
179	directiveName := d.contents(directive)
180
181	var args []string
182	forEachPath(directive, func(arg *node32) {
183		// If the argument is a quoted string, use the raw contents.
184		// (Note that this doesn't unescape the string, but that's not
185		// needed so far.
186		if arg.up != nil {
187			arg = arg.up
188			assertNodeType(arg, ruleQuotedArg)
189			if arg.up == nil {
190				args = append(args, "")
191				return
192			}
193			arg = arg.up
194			assertNodeType(arg, ruleQuotedText)
195		}
196		args = append(args, d.contents(arg))
197	}, ruleArgs, ruleArg)
198
199	switch directiveName {
200	case "comm", "lcomm":
201		if len(args) < 1 {
202			return nil, errors.New("comm directive has no arguments")
203		}
204		d.bssAccessorsNeeded[args[0]] = args[0]
205		d.writeNode(statement)
206
207	case "data":
208		// ASAN and some versions of MSAN are adding a .data section,
209		// and adding references to symbols within it to the code. We
210		// will have to work around this in the future.
211		return nil, errors.New(".data section found in module")
212
213	case "section":
214		section := args[0]
215
216		if section == ".data.rel.ro" {
217			// In a normal build, this is an indication of a
218			// problem but any references from the module to this
219			// section will result in a relocation and thus will
220			// break the integrity check. ASAN can generate these
221			// sections and so we will likely have to work around
222			// that in the future.
223			return nil, errors.New(".data.rel.ro section found in module")
224		}
225
226		sectionType, ok := sectionType(section)
227		if !ok {
228			// Unknown sections are permitted in order to be robust
229			// to different compiler modes.
230			d.writeNode(statement)
231			break
232		}
233
234		switch sectionType {
235		case ".rodata", ".text":
236			// Move .rodata to .text so it may be accessed without
237			// a relocation. GCC with -fmerge-constants will place
238			// strings into separate sections, so we move all
239			// sections named like .rodata. Also move .text.startup
240			// so the self-test function is also in the module.
241			d.writeCommentedNode(statement)
242			d.output.WriteString(".text\n")
243
244		case ".data":
245			// See above about .data
246			return nil, errors.New(".data section found in module")
247
248		case ".init_array", ".fini_array", ".ctors", ".dtors":
249			// init_array/ctors/dtors contains function
250			// pointers to constructor/destructor
251			// functions. These contain relocations, but
252			// they're in a different section anyway.
253			d.writeNode(statement)
254			break
255
256		case ".debug", ".note", ".toc":
257			d.writeNode(statement)
258			break
259
260		case ".bss":
261			d.writeNode(statement)
262			return d.handleBSS(statement)
263		}
264
265	default:
266		d.writeNode(statement)
267	}
268
269	return statement, nil
270}
271
272func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) {
273	// The symbols within directives need to be mapped so that local
274	// symbols in two different .s inputs don't collide.
275	changed := false
276	assertNodeType(directive, ruleLabelContainingDirectiveName)
277	name := d.contents(directive)
278
279	node := directive.next
280	assertNodeType(node, ruleWS)
281
282	node = node.next
283	assertNodeType(node, ruleSymbolArgs)
284
285	var args []string
286	for node = skipWS(node.up); node != nil; node = skipWS(node.next) {
287		assertNodeType(node, ruleSymbolArg)
288		arg := node.up
289		var mapped string
290
291		for term := arg; term != nil; term = term.next {
292			if term.pegRule != ruleLocalSymbol {
293				mapped += d.contents(term)
294				continue
295			}
296
297			oldSymbol := d.contents(term)
298			newSymbol := d.mapLocalSymbol(oldSymbol)
299			if newSymbol != oldSymbol {
300				changed = true
301			}
302
303			mapped += newSymbol
304		}
305
306		args = append(args, mapped)
307	}
308
309	if !changed {
310		d.writeNode(statement)
311	} else {
312		d.writeCommentedNode(statement)
313		d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n")
314	}
315
316	if name == ".localentry" {
317		d.output.WriteString(localEntryName(args[0]) + ":\n")
318	}
319
320	return statement, nil
321}
322
323func (d *delocation) processLabel(statement, label *node32) (*node32, error) {
324	symbol := d.contents(label)
325
326	switch label.pegRule {
327	case ruleLocalLabel:
328		d.output.WriteString(symbol + ":\n")
329	case ruleLocalSymbol:
330		// symbols need to be mapped so that local symbols from two
331		// different .s inputs don't collide.
332		d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n")
333	case ruleSymbolName:
334		d.output.WriteString(localTargetName(symbol) + ":\n")
335		d.writeNode(statement)
336	default:
337		return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule])
338	}
339
340	return statement, nil
341}
342
343// instructionArgs collects all the arguments to an instruction.
344func instructionArgs(node *node32) (argNodes []*node32) {
345	for node = skipWS(node); node != nil; node = skipWS(node.next) {
346		assertNodeType(node, ruleInstructionArg)
347		argNodes = append(argNodes, node.up)
348	}
349
350	return argNodes
351}
352
353// Aarch64 support
354
355// gotHelperName returns the name of a synthesised function that returns an
356// address from the GOT.
357func gotHelperName(symbol string) string {
358	return ".Lboringssl_loadgot_" + symbol
359}
360
361// loadAarch64Address emits instructions to put the address of |symbol|
362// (optionally adjusted by |offsetStr|) into |targetReg|.
363func (d *delocation) loadAarch64Address(statement *node32, targetReg string, symbol string, offsetStr string) (*node32, error) {
364	// There are two paths here: either the symbol is known to be local in which
365	// case adr is used to get the address (within 1MiB), or a GOT reference is
366	// really needed in which case the code needs to jump to a helper function.
367	//
368	// A helper function is needed because using code appears to be the only way
369	// to load a GOT value. On other platforms we have ".quad foo@GOT" outside of
370	// the module, but on Aarch64 that results in a "COPY" relocation and linker
371	// comments suggest it's a weird hack. So, for each GOT symbol needed, we emit
372	// a function outside of the module that returns the address from the GOT in
373	// x0.
374
375	d.writeCommentedNode(statement)
376
377	_, isKnown := d.symbols[symbol]
378	isLocal := strings.HasPrefix(symbol, ".L")
379	if isKnown || isLocal || isSynthesized(symbol) {
380		if isLocal {
381			symbol = d.mapLocalSymbol(symbol)
382		} else if isKnown {
383			symbol = localTargetName(symbol)
384		}
385
386		d.output.WriteString("\tadr " + targetReg + ", " + symbol + offsetStr + "\n")
387
388		return statement, nil
389	}
390
391	if len(offsetStr) != 0 {
392		panic("non-zero offset for helper-based reference")
393	}
394
395	var helperFunc string
396	if symbol == "OPENSSL_armcap_P" {
397		helperFunc = ".LOPENSSL_armcap_P_addr"
398	} else {
399		// GOT helpers also dereference the GOT entry, thus the subsequent ldr
400		// instruction, which would normally do the dereferencing, needs to be
401		// dropped. GOT helpers have to include the dereference because the
402		// assembler doesn't support ":got_lo12:foo" offsets except in an ldr
403		// instruction.
404		d.gotExternalsNeeded[symbol] = struct{}{}
405		helperFunc = gotHelperName(symbol)
406	}
407
408	// Clear the red-zone. I can't find a definitive answer about whether Linux
409	// Aarch64 includes a red-zone, but Microsoft has a 16-byte one and Apple a
410	// 128-byte one. Thus conservatively clear a 128-byte red-zone.
411	d.output.WriteString("\tsub sp, sp, 128\n")
412
413	// Save x0 (which will be stomped by the return value) and the link register
414	// to the stack. Then save the program counter into the link register and
415	// jump to the helper function.
416	d.output.WriteString("\tstp x0, lr, [sp, #-16]!\n")
417	d.output.WriteString("\tbl " + helperFunc + "\n")
418
419	if targetReg == "x0" {
420		// If the target happens to be x0 then restore the link register from the
421		// stack and send the saved value of x0 to the zero register.
422		d.output.WriteString("\tldp xzr, lr, [sp], #16\n")
423	} else {
424		// Otherwise move the result into place and restore registers.
425		d.output.WriteString("\tmov " + targetReg + ", x0\n")
426		d.output.WriteString("\tldp x0, lr, [sp], #16\n")
427	}
428
429	// Revert the red-zone adjustment.
430	d.output.WriteString("\tadd sp, sp, 128\n")
431
432	return statement, nil
433}
434
435func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) {
436	assertNodeType(instruction, ruleInstructionName)
437	instructionName := d.contents(instruction)
438
439	argNodes := instructionArgs(instruction.next)
440
441	switch instructionName {
442	case "ccmn", "ccmp", "cinc", "cinv", "cneg", "csel", "cset", "csetm", "csinc", "csinv", "csneg":
443		// These functions are special because they take a condition-code name as
444		// an argument and that looks like a symbol reference.
445		d.writeNode(statement)
446		return statement, nil
447
448	case "mrs":
449		// Functions that take special register names also look like a symbol
450		// reference to the parser.
451		d.writeNode(statement)
452		return statement, nil
453
454	case "adrp":
455		// adrp always generates a relocation, even when the target symbol is in the
456		// same segment, because the page-offset of the code isn't known until link
457		// time. Thus adrp instructions are turned into either adr instructions
458		// (limiting the module to 1MiB offsets) or calls to helper functions, both of
459		// which load the full address. Later instructions, which add the low 12 bits
460		// of offset, are tweaked to remove the offset since it's already included.
461		// Loads of GOT symbols are slightly more complex because it's not possible to
462		// avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr
463		// instruction, which would normally do the dereferencing, is dropped
464		// completely. (Or turned into a mov if it targets a different register.)
465		assertNodeType(argNodes[0], ruleRegisterOrConstant)
466		targetReg := d.contents(argNodes[0])
467		if !strings.HasPrefix(targetReg, "x") {
468			panic("adrp targetting register " + targetReg + ", which has the wrong size")
469		}
470
471		var symbol, offset string
472		switch argNodes[1].pegRule {
473		case ruleGOTSymbolOffset:
474			symbol = d.contents(argNodes[1].up)
475		case ruleMemoryRef:
476			assertNodeType(argNodes[1].up, ruleSymbolRef)
477			node, empty := d.gatherOffsets(argNodes[1].up.up, "")
478			if len(empty) != 0 {
479				panic("prefix offsets found for adrp")
480			}
481			symbol = d.contents(node)
482			_, offset = d.gatherOffsets(node.next, "")
483		default:
484			panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule])
485		}
486
487		return d.loadAarch64Address(statement, targetReg, symbol, offset)
488	}
489
490	var args []string
491	changed := false
492
493	for _, arg := range argNodes {
494		fullArg := arg
495
496		switch arg.pegRule {
497		case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak:
498			args = append(args, d.contents(fullArg))
499
500		case ruleGOTSymbolOffset:
501			// These should only be arguments to adrp and thus unreachable.
502			panic("unreachable")
503
504		case ruleMemoryRef:
505			ref := arg.up
506
507			switch ref.pegRule {
508			case ruleSymbolRef:
509				// This is a branch. Either the target needs to be written to a local
510				// version of the symbol to ensure that no relocations are emitted, or
511				// it needs to jump to a redirector function.
512				symbol, offset, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up)
513				changed = didChange
514
515				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
516					symbol = localTargetName(symbol)
517					changed = true
518				} else if !symbolIsLocal && !isSynthesized(symbol) {
519					redirector := redirectorName(symbol)
520					d.redirectors[symbol] = redirector
521					symbol = redirector
522					changed = true
523				} else if didChange && symbolIsLocal && len(offset) > 0 {
524					// didChange is set when the inputFile index is not 0; which is the index of the
525					// first file copied to the output, which is the generated assembly of bcm.c.
526					// In subsequently copied assembly files, local symbols are changed by appending (BCM_ + index)
527					// in order to ensure they don't collide. `index` gets incremented per file.
528					// If there is offset after the symbol, append the `offset`.
529					symbol = symbol + offset
530				}
531
532				args = append(args, symbol)
533
534			case ruleARMBaseIndexScale:
535				parts := ref.up
536				assertNodeType(parts, ruleARMRegister)
537				baseAddrReg := d.contents(parts)
538				parts = skipWS(parts.next)
539
540				// Only two forms need special handling. First there's memory references
541				// like "[x*, :got_lo12:foo]". The base register here will have been the
542				// target of an adrp instruction to load the page address, but the adrp
543				// will have turned into loading the full address *and dereferencing it*,
544				// above. Thus this instruction needs to be dropped otherwise we'll be
545				// dereferencing twice.
546				//
547				// Second there are forms like "[x*, :lo12:foo]" where the code has used
548				// adrp to load the page address into x*. That adrp will have been turned
549				// into loading the full address so just the offset needs to be dropped.
550
551				if parts != nil {
552					if parts.pegRule == ruleARMGOTLow12 {
553						if instructionName != "ldr" {
554							panic("Symbol reference outside of ldr instruction")
555						}
556
557						if skipWS(parts.next) != nil || parts.up.next != nil {
558							panic("can't handle tweak or post-increment with symbol references")
559						}
560
561						// The GOT helper already dereferenced the entry so, at most, just a mov
562						// is needed to put things in the right register.
563						d.writeCommentedNode(statement)
564						if baseAddrReg != args[0] {
565							d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n")
566						}
567						return statement, nil
568					} else if parts.pegRule == ruleLow12BitsSymbolRef {
569						if instructionName != "ldr" {
570							panic("Symbol reference outside of ldr instruction")
571						}
572
573						if skipWS(parts.next) != nil || parts.up.next != nil {
574							panic("can't handle tweak or post-increment with symbol references")
575						}
576
577						// Suppress the offset; adrp loaded the full address.
578						args = append(args, "["+baseAddrReg+"]")
579						changed = true
580						continue
581					}
582				}
583
584				args = append(args, d.contents(fullArg))
585
586			case ruleLow12BitsSymbolRef:
587				// These are the second instruction in a pair:
588				//   adrp x0, symbol           // Load the page address into x0
589				//   add x1, x0, :lo12:symbol  // Adds the page offset.
590				//
591				// The adrp instruction will have been turned into a sequence that loads
592				// the full address, above, thus the offset is turned into zero. If that
593				// results in the instruction being a nop, then it is deleted.
594				if instructionName != "add" {
595					panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName))
596				}
597
598				if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") {
599					panic("address arithmetic with incorrectly sized register")
600				}
601
602				if args[0] == args[1] {
603					d.writeCommentedNode(statement)
604					return statement, nil
605				}
606
607				args = append(args, "#0")
608				changed = true
609
610			default:
611				panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule]))
612			}
613
614		default:
615			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
616		}
617	}
618
619	if changed {
620		d.writeCommentedNode(statement)
621		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
622		d.output.WriteString(replacement)
623	} else {
624		d.writeNode(statement)
625	}
626
627	return statement, nil
628}
629
630/* ppc64le
631
632[PABI]: “64-Bit ELF V2 ABI Specification. Power Architecture.” March 21st,
633        2017
634
635(Also useful is “Power ISA Version 2.07 B”. Note that version three of that
636document is /not/ good as that's POWER9 specific.)
637
638ppc64le doesn't have IP-relative addressing and does a lot to work around this.
639Rather than reference a PLT and GOT direction, it has a single structure called
640the TOC (Table Of Contents). Within the TOC is the contents of .rodata, .data,
641.got, .plt, .bss, etc sections [PABI;3.3].
642
643A pointer to the TOC is maintained in r2 and the following pattern is used to
644load the address of an element into a register:
645
646  addis <address register>, 2, foo@toc@ha
647  addi <address register>, <address register>, foo@toc@l
648
649The “addis” instruction shifts a signed constant left 16 bits and adds the
650result to its second argument, saving the result in the first argument. The
651“addi” instruction does the same, but without shifting. Thus the “@toc@ha"
652suffix on a symbol means “the top 16 bits of the TOC offset” and “@toc@l” means
653“the bottom 16 bits of the offset”. However, note that both values are signed,
654thus offsets in the top half of a 64KB chunk will have an @ha value that's one
655greater than expected and a negative @l value.
656
657The TOC is specific to a “module” (basically an executable or shared object).
658This means that there's not a single TOC in a process and that r2 needs to
659change as control moves between modules. Thus functions have two entry points:
660the “global” entry point and the “local” entry point. Jumps from within the
661same module can use the local entry while jumps from other modules must use the
662global entry. The global entry establishes the correct value of r2 before
663running the function and the local entry skips that code.
664
665The global entry point for a function is defined by its label. The local entry
666is a power-of-two number of bytes from the global entry, set by the
667“.localentry” directive. (ppc64le instructions are always 32 bits, so an offset
668of 1 or 2 bytes is treated as an offset of zero.)
669
670In order to help the global entry code set r2 to point to the local TOC, r12 is
671set to the address of the global entry point when called [PABI;2.2.1.1]. Thus
672the global entry will typically use an addis+addi pair to add a known offset to
673r12 and store it in r2. For example:
674
675foo:
676  addis 2, 12, .TOC. - foo@ha
677  addi  2, 2,  .TOC. - foo@l
678
679(It's worth noting that the '@' operator binds very loosely, so the 3rd
680arguments parse as (.TOC. - foo)@ha and (.TOC. - foo)@l.)
681
682When calling a function, the compiler doesn't know whether that function is in
683the same module or not. Thus it doesn't know whether r12 needs to be set nor
684whether r2 will be clobbered on return. Rather than always assume the worst,
685the linker fixes stuff up once it knows that a call is going out of module:
686
687Firstly, calling, say, memcpy (which we assume to be in a different module)
688won't actually jump directly to memcpy, or even a PLT resolution function.
689It'll call a synthesised function that:
690  a) saves r2 in the caller's stack frame
691  b) loads the address of memcpy@PLT into r12
692  c) jumps to r12.
693
694As this synthesised function loads memcpy@PLT, a call to memcpy from the
695compiled code just references “memcpy” directly, not “memcpy@PLT”.
696
697Since it jumps directly to memcpy@PLT, it can't restore r2 on return. Thus
698calls must be followed by a nop. If the call ends up going out-of-module, the
699linker will rewrite that nop to load r2 from the stack.
700
701Speaking of the stack, the stack pointer is kept in r1 and there's a 288-byte
702red-zone. The format of the stack frame is defined [PABI;2.2.2] and must be
703followed as called functions will write into their parent's stack frame. For
704example, the synthesised out-of-module trampolines will save r2 24 bytes into
705the caller's frame and all non-leaf functions save the return address 16 bytes
706into the caller's frame.
707
708A final point worth noting: some RISC ISAs have r0 wired to zero: all reads
709result in zero and all writes are discarded. POWER does something a little like
710that, but r0 is only special in certain argument positions for certain
711instructions. You just have to read the manual to know which they are.
712
713
714Delocation is easier than Intel because there's just TOC references, but it's
715also harder because there's no IP-relative addressing.
716
717Jumps are IP-relative however, and have a 24-bit immediate value. So we can
718jump to functions that set a register to the needed value. (r3 is the
719return-value register and so that's what is generally used here.) */
720
721// isPPC64LEAPair recognises an addis+addi pair that's adding the offset of
722// source to relative and writing the result to target.
723func (d *delocation) isPPC64LEAPair(statement *node32) (target, source, relative string, ok bool) {
724	instruction := skipWS(statement.up).up
725	assertNodeType(instruction, ruleInstructionName)
726	name1 := d.contents(instruction)
727	args1 := instructionArgs(instruction.next)
728
729	statement = statement.next
730	instruction = skipWS(statement.up).up
731	assertNodeType(instruction, ruleInstructionName)
732	name2 := d.contents(instruction)
733	args2 := instructionArgs(instruction.next)
734
735	if name1 != "addis" ||
736		len(args1) != 3 ||
737		name2 != "addi" ||
738		len(args2) != 3 {
739		return "", "", "", false
740	}
741
742	target = d.contents(args1[0])
743	relative = d.contents(args1[1])
744	source1 := d.contents(args1[2])
745	source2 := d.contents(args2[2])
746
747	if !strings.HasSuffix(source1, "@ha") ||
748		!strings.HasSuffix(source2, "@l") ||
749		source1[:len(source1)-3] != source2[:len(source2)-2] ||
750		d.contents(args2[0]) != target ||
751		d.contents(args2[1]) != target {
752		return "", "", "", false
753	}
754
755	source = source1[:len(source1)-3]
756	ok = true
757	return
758}
759
760// establishTOC writes the global entry prelude for a function. The standard
761// prelude involves relocations so this version moves the relocation outside
762// the integrity-checked area.
763func establishTOC(w stringWriter) {
764	w.WriteString("999:\n")
765	w.WriteString("\taddis 2, 12, .LBORINGSSL_external_toc-999b@ha\n")
766	w.WriteString("\taddi 2, 2, .LBORINGSSL_external_toc-999b@l\n")
767	w.WriteString("\tld 12, 0(2)\n")
768	w.WriteString("\tadd 2, 2, 12\n")
769}
770
771// loadTOCFuncName returns the name of a synthesized function that sets r3 to
772// the value of “symbol+offset”.
773func loadTOCFuncName(symbol, offset string) string {
774	symbol = strings.Replace(symbol, ".", "_dot_", -1)
775	ret := ".Lbcm_loadtoc_" + symbol
776	if len(offset) != 0 {
777		offset = strings.Replace(offset, "+", "_plus_", -1)
778		offset = strings.Replace(offset, "-", "_minus_", -1)
779		ret += "_" + offset
780	}
781	return ret
782}
783
784func (d *delocation) loadFromTOC(w stringWriter, symbol, offset, dest string) wrapperFunc {
785	d.tocLoaders[symbol+"\x00"+offset] = struct{}{}
786
787	return func(k func()) {
788		w.WriteString("\taddi 1, 1, -288\n")   // Clear the red zone.
789		w.WriteString("\tmflr " + dest + "\n") // Stash the link register.
790		w.WriteString("\tstd " + dest + ", -8(1)\n")
791		// The TOC loader will use r3, so stash it if necessary.
792		if dest != "3" {
793			w.WriteString("\tstd 3, -16(1)\n")
794		}
795
796		// Because loadTOCFuncName returns a “.L” name, we don't need a
797		// nop after this call.
798		w.WriteString("\tbl " + loadTOCFuncName(symbol, offset) + "\n")
799
800		// Cycle registers around. We need r3 -> destReg, -8(1) ->
801		// lr and, optionally, -16(1) -> r3.
802		w.WriteString("\tstd 3, -24(1)\n")
803		w.WriteString("\tld 3, -8(1)\n")
804		w.WriteString("\tmtlr 3\n")
805		w.WriteString("\tld " + dest + ", -24(1)\n")
806		if dest != "3" {
807			w.WriteString("\tld 3, -16(1)\n")
808		}
809		w.WriteString("\taddi 1, 1, 288\n")
810
811		k()
812	}
813}
814
815func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) {
816	for symRef != nil && symRef.pegRule == ruleOffset {
817		offset := d.contents(symRef)
818		if offset[0] != '+' && offset[0] != '-' {
819			offset = "+" + offset
820		}
821		offsets = offsets + offset
822		symRef = symRef.next
823	}
824	return symRef, offsets
825}
826
827func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) {
828	if memRef.pegRule != ruleSymbolRef {
829		return "", "", "", false, false, memRef
830	}
831
832	symRef := memRef.up
833	nextRef = memRef.next
834
835	// (Offset* '+')?
836	symRef, offset = d.gatherOffsets(symRef, offset)
837
838	// (LocalSymbol / SymbolName)
839	symbol = d.contents(symRef)
840	if symRef.pegRule == ruleLocalSymbol {
841		symbolIsLocal = true
842		mapped := d.mapLocalSymbol(symbol)
843		if mapped != symbol {
844			symbol = mapped
845			didChange = true
846		}
847	}
848	symRef = symRef.next
849
850	// Offset*
851	symRef, offset = d.gatherOffsets(symRef, offset)
852
853	// ('@' Section / Offset*)?
854	if symRef != nil {
855		assertNodeType(symRef, ruleSection)
856		section = d.contents(symRef)
857		symRef = symRef.next
858
859		symRef, offset = d.gatherOffsets(symRef, offset)
860	}
861
862	if symRef != nil {
863		panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule]))
864	}
865
866	return
867}
868
869func (d *delocation) processPPCInstruction(statement, instruction *node32) (*node32, error) {
870	assertNodeType(instruction, ruleInstructionName)
871	instructionName := d.contents(instruction)
872	isBranch := instructionName[0] == 'b'
873
874	argNodes := instructionArgs(instruction.next)
875
876	var wrappers wrapperStack
877	var args []string
878	changed := false
879
880Args:
881	for i, arg := range argNodes {
882		fullArg := arg
883		isIndirect := false
884
885		if arg.pegRule == ruleIndirectionIndicator {
886			arg = arg.next
887			isIndirect = true
888		}
889
890		switch arg.pegRule {
891		case ruleRegisterOrConstant, ruleLocalLabelRef:
892			args = append(args, d.contents(fullArg))
893
894		case ruleTOCRefLow:
895			return nil, errors.New("Found low TOC reference outside preamble pattern")
896
897		case ruleTOCRefHigh:
898			target, _, relative, ok := d.isPPC64LEAPair(statement)
899			if !ok {
900				return nil, errors.New("Found high TOC reference outside preamble pattern")
901			}
902
903			if relative != "12" {
904				return nil, fmt.Errorf("preamble is relative to %q, not r12", relative)
905			}
906
907			if target != "2" {
908				return nil, fmt.Errorf("preamble is setting %q, not r2", target)
909			}
910
911			statement = statement.next
912			establishTOC(d.output)
913			instructionName = ""
914			changed = true
915			break Args
916
917		case ruleMemoryRef:
918			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
919			changed = didChange
920
921			if len(symbol) > 0 {
922				if _, localEntrySymbol := d.localEntrySymbols[symbol]; localEntrySymbol && isBranch {
923					symbol = localEntryName(symbol)
924					changed = true
925				} else if _, knownSymbol := d.symbols[symbol]; knownSymbol {
926					symbol = localTargetName(symbol)
927					changed = true
928				} else if !symbolIsLocal && !isSynthesized(symbol) && len(section) == 0 {
929					changed = true
930					d.redirectors[symbol] = redirectorName(symbol)
931					symbol = redirectorName(symbol)
932					// TODO(davidben): This should sanity-check the next
933					// instruction is a nop and ideally remove it.
934					wrappers = append(wrappers, func(k func()) {
935						k()
936						// Like the linker's PLT stubs, redirector functions
937						// expect callers to restore r2.
938						d.output.WriteString("\tld 2, 24(1)\n")
939					})
940				}
941			}
942
943			switch section {
944			case "":
945
946			case "tls":
947				// This section identifier just tells the
948				// assembler to use r13, the pointer to the
949				// thread-local data [PABI;3.7.3.3].
950
951			case "toc@ha":
952				// Delete toc@ha instructions. Per
953				// [PABI;3.6.3], the linker is allowed to erase
954				// toc@ha instructions. We take advantage of
955				// this by unconditionally erasing the toc@ha
956				// instructions and doing the full lookup when
957				// processing toc@l.
958				//
959				// Note that any offset here applies before @ha
960				// and @l. That is, 42+foo@toc@ha is
961				// #ha(42+foo-.TOC.), not 42+#ha(foo-.TOC.). Any
962				// corresponding toc@l references are required
963				// by the ABI to have the same offset. The
964				// offset will be incorporated in full when
965				// those are processed.
966				if instructionName != "addis" || len(argNodes) != 3 || i != 2 || args[1] != "2" {
967					return nil, errors.New("can't process toc@ha reference")
968				}
969				changed = true
970				instructionName = ""
971				break Args
972
973			case "toc@l":
974				// Per [PAB;3.6.3], this instruction must take
975				// as input a register which was the output of
976				// a toc@ha computation and compute the actual
977				// address of some symbol. The toc@ha
978				// computation was elided, so we ignore that
979				// input register and compute the address
980				// directly.
981				changed = true
982
983				// For all supported toc@l instructions, the
984				// destination register is the first argument.
985				destReg := args[0]
986
987				wrappers = append(wrappers, d.loadFromTOC(d.output, symbol, offset, destReg))
988				switch instructionName {
989				case "addi":
990					// The original instruction was:
991					//   addi destReg, tocHaReg, offset+symbol@toc@l
992					instructionName = ""
993
994				case "ld", "lhz", "lwz":
995					// The original instruction was:
996					//   l?? destReg, offset+symbol@toc@l(tocHaReg)
997					//
998					// We transform that into the
999					// equivalent dereference of destReg:
1000					//   l?? destReg, 0(destReg)
1001					origInstructionName := instructionName
1002					instructionName = ""
1003
1004					assertNodeType(memRef, ruleBaseIndexScale)
1005					assertNodeType(memRef.up, ruleRegisterOrConstant)
1006					if memRef.next != nil || memRef.up.next != nil {
1007						return nil, errors.New("expected single register in BaseIndexScale for ld argument")
1008					}
1009
1010					baseReg := destReg
1011					if baseReg == "0" {
1012						// Register zero is special as the base register for a load.
1013						// Avoid it by spilling and using r3 instead.
1014						baseReg = "3"
1015						wrappers = append(wrappers, func(k func()) {
1016							d.output.WriteString("\taddi 1, 1, -288\n") // Clear the red zone.
1017							d.output.WriteString("\tstd " + baseReg + ", -8(1)\n")
1018							d.output.WriteString("\tmr " + baseReg + ", " + destReg + "\n")
1019							k()
1020							d.output.WriteString("\tld " + baseReg + ", -8(1)\n")
1021							d.output.WriteString("\taddi 1, 1, 288\n") // Clear the red zone.
1022						})
1023					}
1024
1025					wrappers = append(wrappers, func(k func()) {
1026						d.output.WriteString("\t" + origInstructionName + " " + destReg + ", 0(" + baseReg + ")\n")
1027					})
1028				default:
1029					return nil, fmt.Errorf("can't process TOC argument to %q", instructionName)
1030				}
1031
1032			default:
1033				return nil, fmt.Errorf("Unknown section type %q", section)
1034			}
1035
1036			argStr := ""
1037			if isIndirect {
1038				argStr += "*"
1039			}
1040			argStr += symbol
1041			if len(offset) > 0 {
1042				argStr += offset
1043			}
1044			if len(section) > 0 {
1045				argStr += "@"
1046				argStr += section
1047			}
1048
1049			for ; memRef != nil; memRef = memRef.next {
1050				argStr += d.contents(memRef)
1051			}
1052
1053			args = append(args, argStr)
1054
1055		default:
1056			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
1057		}
1058	}
1059
1060	if changed {
1061		d.writeCommentedNode(statement)
1062
1063		var replacement string
1064		if len(instructionName) > 0 {
1065			replacement = "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
1066		}
1067
1068		wrappers.do(func() {
1069			d.output.WriteString(replacement)
1070		})
1071	} else {
1072		d.writeNode(statement)
1073	}
1074
1075	return statement, nil
1076}
1077
1078/* Intel */
1079
1080type instructionType int
1081
1082const (
1083	instrPush instructionType = iota
1084	instrMove
1085	// instrTransformingMove is essentially a move, but it performs some
1086	// transformation of the data during the process.
1087	instrTransformingMove
1088	instrJump
1089	instrConditionalMove
1090	// instrCombine merges the source and destination in some fashion, for example
1091	// a 2-operand bitwise operation.
1092	instrCombine
1093	// instrMemoryVectorCombine is similer to instrCombine, but the source
1094	// register must be a memory reference and the destination register
1095	// must be a vector register.
1096	instrMemoryVectorCombine
1097	// instrThreeArg merges two sources into a destination in some fashion.
1098	instrThreeArg
1099	// instrCompare takes two arguments and writes outputs to the flags register.
1100	instrCompare
1101	instrOther
1102)
1103
1104func classifyInstruction(instr string, args []*node32) instructionType {
1105	switch instr {
1106	case "push", "pushq":
1107		if len(args) == 1 {
1108			return instrPush
1109		}
1110
1111	case "mov", "movq", "vmovq", "movsd", "vmovsd":
1112		if len(args) == 2 {
1113			return instrMove
1114		}
1115
1116	case "cmovneq", "cmoveq":
1117		if len(args) == 2 {
1118			return instrConditionalMove
1119		}
1120
1121	case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo":
1122		if len(args) == 1 {
1123			return instrJump
1124		}
1125
1126	case "orq", "andq", "xorq":
1127		if len(args) == 2 {
1128			return instrCombine
1129		}
1130
1131	case "cmpq":
1132		if len(args) == 2 {
1133			return instrCompare
1134		}
1135
1136	case "sarxq", "shlxq", "shrxq":
1137		if len(args) == 3 {
1138			return instrThreeArg
1139		}
1140
1141	case "vpbroadcastq":
1142		if len(args) == 2 {
1143			return instrTransformingMove
1144		}
1145
1146	case "movlps", "movhps":
1147		if len(args) == 2 {
1148			return instrMemoryVectorCombine
1149		}
1150	}
1151
1152	return instrOther
1153}
1154
1155func push(w stringWriter) wrapperFunc {
1156	return func(k func()) {
1157		w.WriteString("\tpushq %rax\n")
1158		k()
1159		w.WriteString("\txchg %rax, (%rsp)\n")
1160	}
1161}
1162
1163func compare(w stringWriter, instr, a, b string) wrapperFunc {
1164	return func(k func()) {
1165		k()
1166		w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b))
1167	}
1168}
1169
1170func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc {
1171	d.gotExternalsNeeded[symbol+"@"+section] = struct{}{}
1172
1173	return func(k func()) {
1174		if !redzoneCleared {
1175			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
1176		}
1177		w.WriteString("\tpushf\n")
1178		w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination))
1179		w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination))
1180		w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination))
1181		w.WriteString("\tpopf\n")
1182		if !redzoneCleared {
1183			w.WriteString("\tleaq\t128(%rsp), %rsp\n")
1184		}
1185	}
1186}
1187
1188func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc {
1189	return func(k func()) {
1190		if !redzoneCleared {
1191			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
1192			defer w.WriteString("\tleaq 128(%rsp), %rsp\n")
1193		}
1194		w.WriteString("\tpushfq\n")
1195		k()
1196		w.WriteString("\tpopfq\n")
1197	}
1198}
1199
1200func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) {
1201	candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"}
1202
1203	var reg string
1204NextCandidate:
1205	for _, candidate := range candidates {
1206		for _, avoid := range avoidRegs {
1207			if candidate == avoid {
1208				continue NextCandidate
1209			}
1210		}
1211
1212		reg = candidate
1213		break
1214	}
1215
1216	if len(reg) == 0 {
1217		panic("too many excluded registers")
1218	}
1219
1220	return func(k func()) {
1221		w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
1222		w.WriteString("\tpushq " + reg + "\n")
1223		k()
1224		w.WriteString("\tpopq " + reg + "\n")
1225		w.WriteString("\tleaq 128(%rsp), %rsp\n")
1226	}, reg
1227}
1228
1229func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc {
1230	return func(k func()) {
1231		k()
1232		prefix := ""
1233		if isAVX {
1234			prefix = "v"
1235		}
1236		w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n")
1237	}
1238}
1239
1240func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc {
1241	return func(k func()) {
1242		k()
1243		w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n")
1244	}
1245}
1246
1247func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
1248	return func(k func()) {
1249		k()
1250		w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n")
1251	}
1252}
1253
1254func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc {
1255	return func(k func()) {
1256		k()
1257		w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n")
1258	}
1259}
1260
1261func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
1262	return func(k func()) {
1263		k()
1264		// These instructions can only read from memory, so push
1265		// tempReg and read from the stack. Note we assume the red zone
1266		// was previously cleared by saveRegister().
1267		w.WriteString("\tpushq " + source + "\n")
1268		w.WriteString("\t" + instructionName + " (%rsp), " + dest + "\n")
1269		w.WriteString("\tleaq 8(%rsp), %rsp\n")
1270	}
1271}
1272
1273func isValidLEATarget(reg string) bool {
1274	return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm")
1275}
1276
1277func undoConditionalMove(w stringWriter, instr string) wrapperFunc {
1278	var invertedCondition string
1279
1280	switch instr {
1281	case "cmoveq":
1282		invertedCondition = "ne"
1283	case "cmovneq":
1284		invertedCondition = "e"
1285	default:
1286		panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr))
1287	}
1288
1289	return func(k func()) {
1290		w.WriteString("\tj" + invertedCondition + " 999f\n")
1291		k()
1292		w.WriteString("999:\n")
1293	}
1294}
1295
1296func (d *delocation) isRIPRelative(node *node32) bool {
1297	return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)"
1298}
1299
1300func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) {
1301	assertNodeType(instruction, ruleInstructionName)
1302	instructionName := d.contents(instruction)
1303
1304	argNodes := instructionArgs(instruction.next)
1305
1306	var wrappers wrapperStack
1307	var args []string
1308	changed := false
1309
1310Args:
1311	for i, arg := range argNodes {
1312		fullArg := arg
1313		isIndirect := false
1314
1315		if arg.pegRule == ruleIndirectionIndicator {
1316			arg = arg.next
1317			isIndirect = true
1318		}
1319
1320		switch arg.pegRule {
1321		case ruleRegisterOrConstant, ruleLocalLabelRef:
1322			args = append(args, d.contents(fullArg))
1323
1324		case ruleMemoryRef:
1325			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
1326			changed = didChange
1327
1328			if symbol == "OPENSSL_ia32cap_P" && section == "" {
1329				if instructionName != "leaq" {
1330					return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName)
1331				}
1332
1333				if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 {
1334					return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName)
1335				}
1336
1337				target := argNodes[1]
1338				assertNodeType(target, ruleRegisterOrConstant)
1339				reg := d.contents(target)
1340
1341				if !strings.HasPrefix(reg, "%r") {
1342					return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg)
1343				}
1344
1345				changed = true
1346
1347				// Flag-altering instructions (i.e. addq) are going to be used so the
1348				// flags need to be preserved.
1349				wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */))
1350
1351				wrappers = append(wrappers, func(k func()) {
1352					d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n")
1353					d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n")
1354				})
1355
1356				break Args
1357			}
1358
1359			switch section {
1360			case "":
1361				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1362					symbol = localTargetName(symbol)
1363					changed = true
1364				}
1365
1366			case "PLT":
1367				if classifyInstruction(instructionName, argNodes) != instrJump {
1368					return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName)
1369				}
1370
1371				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1372					symbol = localTargetName(symbol)
1373					changed = true
1374				} else if !symbolIsLocal && !isSynthesized(symbol) {
1375					// Unknown symbol via PLT is an
1376					// out-call from the module, e.g.
1377					// memcpy.
1378					d.redirectors[symbol+"@"+section] = redirectorName(symbol)
1379					symbol = redirectorName(symbol)
1380				}
1381
1382				changed = true
1383
1384			case "GOTPCREL":
1385				if len(offset) > 0 {
1386					return nil, errors.New("loading from GOT with offset is unsupported")
1387				}
1388				if !d.isRIPRelative(memRef) {
1389					return nil, errors.New("GOT access must be IP-relative")
1390				}
1391
1392				useGOT := false
1393				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1394					symbol = localTargetName(symbol)
1395					changed = true
1396				} else if !isSynthesized(symbol) {
1397					useGOT = true
1398				}
1399
1400				classification := classifyInstruction(instructionName, argNodes)
1401				if classification != instrThreeArg && classification != instrCompare && i != 0 {
1402					return nil, errors.New("GOT access must be source operand")
1403				}
1404
1405				// Reduce the instruction to movq symbol@GOTPCREL, targetReg.
1406				var targetReg string
1407				var redzoneCleared bool
1408				switch classification {
1409				case instrPush:
1410					wrappers = append(wrappers, push(d.output))
1411					targetReg = "%rax"
1412				case instrConditionalMove:
1413					wrappers = append(wrappers, undoConditionalMove(d.output, instructionName))
1414					fallthrough
1415				case instrMove:
1416					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1417					targetReg = d.contents(argNodes[1])
1418				case instrCompare:
1419					otherSource := d.contents(argNodes[i^1])
1420					saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource})
1421					redzoneCleared = true
1422					wrappers = append(wrappers, saveRegWrapper)
1423					if i == 0 {
1424						wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource))
1425					} else {
1426						wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg))
1427					}
1428					targetReg = tempReg
1429				case instrTransformingMove:
1430					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1431					targetReg = d.contents(argNodes[1])
1432					wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg))
1433					if isValidLEATarget(targetReg) {
1434						return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.")
1435					}
1436				case instrCombine:
1437					targetReg = d.contents(argNodes[1])
1438					if !isValidLEATarget(targetReg) {
1439						return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers")
1440					}
1441					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg})
1442					redzoneCleared = true
1443					wrappers = append(wrappers, saveRegWrapper)
1444
1445					wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg))
1446					targetReg = tempReg
1447				case instrMemoryVectorCombine:
1448					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1449					targetReg = d.contents(argNodes[1])
1450					if isValidLEATarget(targetReg) {
1451						return nil, errors.New("target register must be an XMM register")
1452					}
1453					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1454					wrappers = append(wrappers, saveRegWrapper)
1455					redzoneCleared = true
1456					wrappers = append(wrappers, memoryVectorCombineOp(d.output, instructionName, tempReg, targetReg))
1457					targetReg = tempReg
1458				case instrThreeArg:
1459					if n := len(argNodes); n != 3 {
1460						return nil, fmt.Errorf("three-argument instruction has %d arguments", n)
1461					}
1462					if i != 0 && i != 1 {
1463						return nil, errors.New("GOT access must be from source operand")
1464					}
1465					targetReg = d.contents(argNodes[2])
1466
1467					otherSource := d.contents(argNodes[1])
1468					if i == 1 {
1469						otherSource = d.contents(argNodes[0])
1470					}
1471
1472					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource})
1473					redzoneCleared = true
1474					wrappers = append(wrappers, saveRegWrapper)
1475
1476					if i == 0 {
1477						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg))
1478					} else {
1479						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg))
1480					}
1481					targetReg = tempReg
1482				default:
1483					return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName)
1484				}
1485
1486				if !isValidLEATarget(targetReg) {
1487					// Sometimes the compiler will load from the GOT to an
1488					// XMM register, which is not a valid target of an LEA
1489					// instruction.
1490					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1491					wrappers = append(wrappers, saveRegWrapper)
1492					isAVX := strings.HasPrefix(instructionName, "v")
1493					wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg))
1494					targetReg = tempReg
1495					if redzoneCleared {
1496						return nil, fmt.Errorf("internal error: Red Zone was already cleared")
1497					}
1498					redzoneCleared = true
1499				}
1500
1501				if symbol == "OPENSSL_ia32cap_P" {
1502					// Flag-altering instructions (i.e. addq) are going to be used so the
1503					// flags need to be preserved.
1504					wrappers = append(wrappers, saveFlags(d.output, redzoneCleared))
1505					wrappers = append(wrappers, func(k func()) {
1506						d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n")
1507						d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n")
1508					})
1509				} else if useGOT {
1510					wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared))
1511				} else {
1512					wrappers = append(wrappers, func(k func()) {
1513						d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg))
1514					})
1515				}
1516				changed = true
1517				break Args
1518
1519			default:
1520				return nil, fmt.Errorf("Unknown section type %q", section)
1521			}
1522
1523			if !changed && len(section) > 0 {
1524				panic("section was not handled")
1525			}
1526			section = ""
1527
1528			argStr := ""
1529			if isIndirect {
1530				argStr += "*"
1531			}
1532			argStr += symbol
1533			argStr += offset
1534
1535			for ; memRef != nil; memRef = memRef.next {
1536				argStr += d.contents(memRef)
1537			}
1538
1539			args = append(args, argStr)
1540
1541		case ruleGOTLocation:
1542			if instructionName != "movabsq" {
1543				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq")
1544			}
1545			if i != 0 || len(argNodes) != 2 {
1546				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form")
1547			}
1548
1549			d.gotDeltaNeeded = true
1550			changed = true
1551			instructionName = "movq"
1552			assertNodeType(arg.up, ruleLocalSymbol)
1553			baseSymbol := d.mapLocalSymbol(d.contents(arg.up))
1554			targetReg := d.contents(argNodes[1])
1555			args = append(args, ".Lboringssl_got_delta(%rip)")
1556			wrappers = append(wrappers, func(k func()) {
1557				k()
1558				d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg))
1559			})
1560
1561		case ruleGOTSymbolOffset:
1562			if instructionName != "movabsq" {
1563				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq")
1564			}
1565			if i != 0 || len(argNodes) != 2 {
1566				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form")
1567			}
1568
1569			assertNodeType(arg.up, ruleSymbolName)
1570			symbol := d.contents(arg.up)
1571			if strings.HasPrefix(symbol, ".L") {
1572				symbol = d.mapLocalSymbol(symbol)
1573			}
1574			targetReg := d.contents(argNodes[1])
1575
1576			var prefix string
1577			isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF")
1578			if isGOTOFF {
1579				prefix = "gotoff"
1580				d.gotOffOffsetsNeeded[symbol] = struct{}{}
1581			} else {
1582				prefix = "got"
1583				d.gotOffsetsNeeded[symbol] = struct{}{}
1584			}
1585			changed = true
1586
1587			wrappers = append(wrappers, func(k func()) {
1588				// Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time
1589				// of writing) emits 64-bit relocations anyway, so the following four bytes
1590				// get stomped. Thus we use 64-bit offsets.
1591				d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg))
1592			})
1593
1594		default:
1595			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
1596		}
1597	}
1598
1599	if changed {
1600		d.writeCommentedNode(statement)
1601		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
1602		wrappers.do(func() {
1603			d.output.WriteString(replacement)
1604		})
1605	} else {
1606		d.writeNode(statement)
1607	}
1608
1609	return statement, nil
1610}
1611
1612func (d *delocation) handleBSS(statement *node32) (*node32, error) {
1613	lastStatement := statement
1614	for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next {
1615		node := skipWS(statement.up)
1616		if node == nil {
1617			d.writeNode(statement)
1618			continue
1619		}
1620
1621		switch node.pegRule {
1622		case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective:
1623			d.writeNode(statement)
1624
1625		case ruleDirective:
1626			directive := node.up
1627			assertNodeType(directive, ruleDirectiveName)
1628			directiveName := d.contents(directive)
1629			if directiveName == "text" || directiveName == "section" || directiveName == "data" {
1630				return lastStatement, nil
1631			}
1632			d.writeNode(statement)
1633
1634		case ruleLabel:
1635			label := node.up
1636			d.writeNode(statement)
1637
1638			if label.pegRule != ruleLocalSymbol {
1639				symbol := d.contents(label)
1640				localSymbol := localTargetName(symbol)
1641				d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol))
1642
1643				d.bssAccessorsNeeded[symbol] = localSymbol
1644			}
1645
1646		case ruleLabelContainingDirective:
1647			var err error
1648			statement, err = d.processLabelContainingDirective(statement, node.up)
1649			if err != nil {
1650				return nil, err
1651			}
1652
1653		default:
1654			return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement))
1655		}
1656	}
1657
1658	return lastStatement, nil
1659}
1660
1661func writeAarch64Function(w stringWriter, funcName string, writeContents func(stringWriter)) {
1662	w.WriteString(".p2align 2\n")
1663	w.WriteString(".hidden " + funcName + "\n")
1664	w.WriteString(".type " + funcName + ", @function\n")
1665	w.WriteString(funcName + ":\n")
1666	w.WriteString(".cfi_startproc\n")
1667	writeContents(w)
1668	w.WriteString(".cfi_endproc\n")
1669	w.WriteString(".size " + funcName + ", .-" + funcName + "\n")
1670}
1671
1672func transform(w stringWriter, inputs []inputFile) error {
1673	// symbols contains all defined symbols.
1674	symbols := make(map[string]struct{})
1675	// localEntrySymbols contains all symbols with a .localentry directive.
1676	localEntrySymbols := make(map[string]struct{})
1677	// fileNumbers is the set of IDs seen in .file directives.
1678	fileNumbers := make(map[int]struct{})
1679	// maxObservedFileNumber contains the largest seen file number in a
1680	// .file directive. Zero is not a valid number.
1681	maxObservedFileNumber := 0
1682	// fileDirectivesContainMD5 is true if the compiler is outputting MD5
1683	// checksums in .file directives. If it does so, then this script needs
1684	// to match that behaviour otherwise warnings result.
1685	fileDirectivesContainMD5 := false
1686
1687	// OPENSSL_ia32cap_get will be synthesized by this script.
1688	symbols["OPENSSL_ia32cap_get"] = struct{}{}
1689
1690	for _, input := range inputs {
1691		forEachPath(input.ast.up, func(node *node32) {
1692			symbol := input.contents[node.begin:node.end]
1693			if _, ok := symbols[symbol]; ok {
1694				panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path))
1695			}
1696			symbols[symbol] = struct{}{}
1697		}, ruleStatement, ruleLabel, ruleSymbolName)
1698
1699		forEachPath(input.ast.up, func(node *node32) {
1700			node = node.up
1701			assertNodeType(node, ruleLabelContainingDirectiveName)
1702			directive := input.contents[node.begin:node.end]
1703			if directive != ".localentry" {
1704				return
1705			}
1706			// Extract the first argument.
1707			node = skipWS(node.next)
1708			assertNodeType(node, ruleSymbolArgs)
1709			node = node.up
1710			assertNodeType(node, ruleSymbolArg)
1711			symbol := input.contents[node.begin:node.end]
1712			if _, ok := localEntrySymbols[symbol]; ok {
1713				panic(fmt.Sprintf("Duplicate .localentry directive found: %q in %q", symbol, input.path))
1714			}
1715			localEntrySymbols[symbol] = struct{}{}
1716		}, ruleStatement, ruleLabelContainingDirective)
1717
1718		forEachPath(input.ast.up, func(node *node32) {
1719			assertNodeType(node, ruleLocationDirective)
1720			directive := input.contents[node.begin:node.end]
1721			if !strings.HasPrefix(directive, ".file") {
1722				return
1723			}
1724			parts := strings.Fields(directive)
1725			if len(parts) == 2 {
1726				// This is a .file directive with just a
1727				// filename. Clang appears to generate just one
1728				// of these at the beginning of the output for
1729				// the compilation unit. Ignore it.
1730				return
1731			}
1732			fileNo, err := strconv.Atoi(parts[1])
1733			if err != nil {
1734				panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive))
1735			}
1736
1737			if _, ok := fileNumbers[fileNo]; ok {
1738				panic(fmt.Sprintf("Duplicate file number %d observed", fileNo))
1739			}
1740			fileNumbers[fileNo] = struct{}{}
1741
1742			if fileNo > maxObservedFileNumber {
1743				maxObservedFileNumber = fileNo
1744			}
1745
1746			for _, token := range parts[2:] {
1747				if token == "md5" {
1748					fileDirectivesContainMD5 = true
1749				}
1750			}
1751		}, ruleStatement, ruleLocationDirective)
1752	}
1753
1754	processor := x86_64
1755	if len(inputs) > 0 {
1756		processor = detectProcessor(inputs[0])
1757	}
1758
1759	commentIndicator := "#"
1760	if processor == aarch64 {
1761		commentIndicator = "//"
1762	}
1763
1764	d := &delocation{
1765		symbols:             symbols,
1766		localEntrySymbols:   localEntrySymbols,
1767		processor:           processor,
1768		commentIndicator:    commentIndicator,
1769		output:              w,
1770		redirectors:         make(map[string]string),
1771		bssAccessorsNeeded:  make(map[string]string),
1772		tocLoaders:          make(map[string]struct{}),
1773		gotExternalsNeeded:  make(map[string]struct{}),
1774		gotOffsetsNeeded:    make(map[string]struct{}),
1775		gotOffOffsetsNeeded: make(map[string]struct{}),
1776	}
1777
1778	w.WriteString(".text\n")
1779	var fileTrailing string
1780	if fileDirectivesContainMD5 {
1781		fileTrailing = " md5 0x00000000000000000000000000000000"
1782	}
1783	w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing))
1784	w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1))
1785	w.WriteString("BORINGSSL_bcm_text_start:\n")
1786
1787	for _, input := range inputs {
1788		if err := d.processInput(input); err != nil {
1789			return err
1790		}
1791	}
1792
1793	w.WriteString(".text\n")
1794	w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1))
1795	w.WriteString("BORINGSSL_bcm_text_end:\n")
1796
1797	// Emit redirector functions. Each is a single jump instruction.
1798	var redirectorNames []string
1799	for name := range d.redirectors {
1800		redirectorNames = append(redirectorNames, name)
1801	}
1802	sort.Strings(redirectorNames)
1803
1804	for _, name := range redirectorNames {
1805		redirector := d.redirectors[name]
1806		switch d.processor {
1807		case ppc64le:
1808			w.WriteString(".section \".toc\", \"aw\"\n")
1809			w.WriteString(".Lredirector_toc_" + name + ":\n")
1810			w.WriteString(".quad " + name + "\n")
1811			w.WriteString(".text\n")
1812			w.WriteString(".type " + redirector + ", @function\n")
1813			w.WriteString(redirector + ":\n")
1814			// |name| will clobber r2, so save it. This is matched by a restore in
1815			// redirector calls.
1816			w.WriteString("\tstd 2, 24(1)\n")
1817			// Load and call |name|'s global entry point.
1818			w.WriteString("\taddis 12, 2, .Lredirector_toc_" + name + "@toc@ha\n")
1819			w.WriteString("\tld 12, .Lredirector_toc_" + name + "@toc@l(12)\n")
1820			w.WriteString("\tmtctr 12\n")
1821			w.WriteString("\tbctr\n")
1822
1823		case aarch64:
1824			writeAarch64Function(w, redirector, func(w stringWriter) {
1825				w.WriteString("\tb " + name + "\n")
1826			})
1827
1828		case x86_64:
1829			w.WriteString(".type " + redirector + ", @function\n")
1830			w.WriteString(redirector + ":\n")
1831			w.WriteString("\tjmp\t" + name + "\n")
1832		}
1833	}
1834
1835	var accessorNames []string
1836	for accessor := range d.bssAccessorsNeeded {
1837		accessorNames = append(accessorNames, accessor)
1838	}
1839	sort.Strings(accessorNames)
1840
1841	// Emit BSS accessor functions. Each is a single LEA followed by RET.
1842	for _, name := range accessorNames {
1843		funcName := accessorName(name)
1844		target := d.bssAccessorsNeeded[name]
1845
1846		switch d.processor {
1847		case ppc64le:
1848			w.WriteString(".type " + funcName + ", @function\n")
1849			w.WriteString(funcName + ":\n")
1850			w.WriteString("\taddis 3, 2, " + target + "@toc@ha\n")
1851			w.WriteString("\taddi 3, 3, " + target + "@toc@l\n")
1852			w.WriteString("\tblr\n")
1853
1854		case x86_64:
1855			w.WriteString(".type " + funcName + ", @function\n")
1856			w.WriteString(funcName + ":\n")
1857			w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n")
1858
1859		case aarch64:
1860			writeAarch64Function(w, funcName, func(w stringWriter) {
1861				w.WriteString("\tadrp x0, " + target + "\n")
1862				w.WriteString("\tadd x0, x0, :lo12:" + target + "\n")
1863				w.WriteString("\tret\n")
1864			})
1865		}
1866	}
1867
1868	switch d.processor {
1869	case ppc64le:
1870		loadTOCNames := sortedSet(d.tocLoaders)
1871		for _, symbolAndOffset := range loadTOCNames {
1872			parts := strings.SplitN(symbolAndOffset, "\x00", 2)
1873			symbol, offset := parts[0], parts[1]
1874
1875			funcName := loadTOCFuncName(symbol, offset)
1876			ref := symbol + offset
1877
1878			w.WriteString(".type " + funcName[2:] + ", @function\n")
1879			w.WriteString(funcName[2:] + ":\n")
1880			w.WriteString(funcName + ":\n")
1881			w.WriteString("\taddis 3, 2, " + ref + "@toc@ha\n")
1882			w.WriteString("\taddi 3, 3, " + ref + "@toc@l\n")
1883			w.WriteString("\tblr\n")
1884		}
1885
1886		w.WriteString(".LBORINGSSL_external_toc:\n")
1887		w.WriteString(".quad .TOC.-.LBORINGSSL_external_toc\n")
1888
1889	case aarch64:
1890		externalNames := sortedSet(d.gotExternalsNeeded)
1891		for _, symbol := range externalNames {
1892			writeAarch64Function(w, gotHelperName(symbol), func(w stringWriter) {
1893				w.WriteString("\tadrp x0, :got:" + symbol + "\n")
1894				w.WriteString("\tldr x0, [x0, :got_lo12:" + symbol + "]\n")
1895				w.WriteString("\tret\n")
1896			})
1897		}
1898
1899		writeAarch64Function(w, ".LOPENSSL_armcap_P_addr", func(w stringWriter) {
1900			w.WriteString("\tadrp x0, OPENSSL_armcap_P\n")
1901			w.WriteString("\tadd x0, x0, :lo12:OPENSSL_armcap_P\n")
1902			w.WriteString("\tret\n")
1903		})
1904
1905	case x86_64:
1906		externalNames := sortedSet(d.gotExternalsNeeded)
1907		for _, name := range externalNames {
1908			parts := strings.SplitN(name, "@", 2)
1909			symbol, section := parts[0], parts[1]
1910			w.WriteString(".type " + symbol + "_" + section + "_external, @object\n")
1911			w.WriteString(".size " + symbol + "_" + section + "_external, 8\n")
1912			w.WriteString(symbol + "_" + section + "_external:\n")
1913			// Ideally this would be .quad foo@GOTPCREL, but clang's
1914			// assembler cannot emit a 64-bit GOTPCREL relocation. Instead,
1915			// we manually sign-extend the value, knowing that the GOT is
1916			// always at the end, thus foo@GOTPCREL has a positive value.
1917			w.WriteString("\t.long " + symbol + "@" + section + "\n")
1918			w.WriteString("\t.long 0\n")
1919		}
1920
1921		w.WriteString(".type OPENSSL_ia32cap_get, @function\n")
1922		w.WriteString(".globl OPENSSL_ia32cap_get\n")
1923		w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n")
1924		w.WriteString("OPENSSL_ia32cap_get:\n")
1925		w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n")
1926		w.WriteString("\tret\n")
1927
1928		w.WriteString(".extern OPENSSL_ia32cap_P\n")
1929		w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n")
1930		w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n")
1931		w.WriteString("OPENSSL_ia32cap_addr_delta:\n")
1932		w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n")
1933
1934		if d.gotDeltaNeeded {
1935			w.WriteString(".Lboringssl_got_delta:\n")
1936			w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n")
1937		}
1938
1939		for _, name := range sortedSet(d.gotOffsetsNeeded) {
1940			w.WriteString(".Lboringssl_got_" + name + ":\n")
1941			w.WriteString("\t.quad " + name + "@GOT\n")
1942		}
1943		for _, name := range sortedSet(d.gotOffOffsetsNeeded) {
1944			w.WriteString(".Lboringssl_gotoff_" + name + ":\n")
1945			w.WriteString("\t.quad " + name + "@GOTOFF\n")
1946		}
1947	}
1948
1949	w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n")
1950	w.WriteString(".size BORINGSSL_bcm_text_hash, 32\n")
1951	w.WriteString("BORINGSSL_bcm_text_hash:\n")
1952	for _, b := range fipscommon.UninitHashValue {
1953		w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n")
1954	}
1955
1956	return nil
1957}
1958
1959func parseInputs(inputs []inputFile) error {
1960	for i, input := range inputs {
1961		var contents string
1962
1963		if input.isArchive {
1964			arFile, err := os.Open(input.path)
1965			if err != nil {
1966				return err
1967			}
1968			defer arFile.Close()
1969
1970			ar, err := ar.ParseAR(arFile)
1971			if err != nil {
1972				return err
1973			}
1974
1975			if len(ar) != 1 {
1976				return fmt.Errorf("expected one file in archive, but found %d", len(ar))
1977			}
1978
1979			for _, c := range ar {
1980				contents = string(c)
1981			}
1982		} else {
1983			inBytes, err := ioutil.ReadFile(input.path)
1984			if err != nil {
1985				return err
1986			}
1987
1988			contents = string(inBytes)
1989		}
1990
1991		asm := Asm{Buffer: contents, Pretty: true}
1992		asm.Init()
1993		if err := asm.Parse(); err != nil {
1994			return fmt.Errorf("error while parsing %q: %s", input.path, err)
1995		}
1996		ast := asm.AST()
1997
1998		inputs[i].contents = contents
1999		inputs[i].ast = ast
2000	}
2001
2002	return nil
2003}
2004
2005func main() {
2006	// The .a file, if given, is expected to be an archive of textual
2007	// assembly sources. That's odd, but CMake really wants to create
2008	// archive files so it's the only way that we can make it work.
2009	arInput := flag.String("a", "", "Path to a .a file containing assembly sources")
2010	outFile := flag.String("o", "", "Path to output assembly")
2011
2012	flag.Parse()
2013
2014	if len(*outFile) == 0 {
2015		fmt.Fprintf(os.Stderr, "Must give argument to -o.\n")
2016		os.Exit(1)
2017	}
2018
2019	var inputs []inputFile
2020	if len(*arInput) > 0 {
2021		inputs = append(inputs, inputFile{
2022			path:      *arInput,
2023			index:     0,
2024			isArchive: true,
2025		})
2026	}
2027
2028	for i, path := range flag.Args() {
2029		if len(path) == 0 {
2030			continue
2031		}
2032
2033		inputs = append(inputs, inputFile{
2034			path:  path,
2035			index: i + 1,
2036		})
2037	}
2038
2039	if err := parseInputs(inputs); err != nil {
2040		fmt.Fprintf(os.Stderr, "%s\n", err)
2041		os.Exit(1)
2042	}
2043
2044	out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
2045	if err != nil {
2046		panic(err)
2047	}
2048	defer out.Close()
2049
2050	if err := transform(out, inputs); err != nil {
2051		fmt.Fprintf(os.Stderr, "%s\n", err)
2052		os.Exit(1)
2053	}
2054}
2055
2056func forEachPath(node *node32, cb func(*node32), rules ...pegRule) {
2057	if node == nil {
2058		return
2059	}
2060
2061	if len(rules) == 0 {
2062		cb(node)
2063		return
2064	}
2065
2066	rule := rules[0]
2067	childRules := rules[1:]
2068
2069	for ; node != nil; node = node.next {
2070		if node.pegRule != rule {
2071			continue
2072		}
2073
2074		if len(childRules) == 0 {
2075			cb(node)
2076		} else {
2077			forEachPath(node.up, cb, childRules...)
2078		}
2079	}
2080}
2081
2082func skipNodes(node *node32, ruleToSkip pegRule) *node32 {
2083	for ; node != nil && node.pegRule == ruleToSkip; node = node.next {
2084	}
2085	return node
2086}
2087
2088func skipWS(node *node32) *node32 {
2089	return skipNodes(node, ruleWS)
2090}
2091
2092func assertNodeType(node *node32, expected pegRule) {
2093	if rule := node.pegRule; rule != expected {
2094		panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected]))
2095	}
2096}
2097
2098type wrapperFunc func(func())
2099
2100type wrapperStack []wrapperFunc
2101
2102func (w *wrapperStack) do(baseCase func()) {
2103	if len(*w) == 0 {
2104		baseCase()
2105		return
2106	}
2107
2108	wrapper := (*w)[0]
2109	*w = (*w)[1:]
2110	wrapper(func() { w.do(baseCase) })
2111}
2112
2113// localTargetName returns the name of the local target label for a global
2114// symbol named name.
2115func localTargetName(name string) string {
2116	return ".L" + name + "_local_target"
2117}
2118
2119func localEntryName(name string) string {
2120	return ".L" + name + "_local_entry"
2121}
2122
2123func isSynthesized(symbol string) bool {
2124	return strings.HasSuffix(symbol, "_bss_get") ||
2125		symbol == "OPENSSL_ia32cap_get" ||
2126		strings.HasPrefix(symbol, "BORINGSSL_bcm_text_")
2127}
2128
2129func redirectorName(symbol string) string {
2130	return "bcm_redirector_" + symbol
2131}
2132
2133// sectionType returns the type of a section. I.e. a section called “.text.foo”
2134// is a “.text” section.
2135func sectionType(section string) (string, bool) {
2136	if len(section) == 0 || section[0] != '.' {
2137		return "", false
2138	}
2139
2140	i := strings.Index(section[1:], ".")
2141	if i != -1 {
2142		section = section[:i+1]
2143	}
2144
2145	if strings.HasPrefix(section, ".debug_") {
2146		return ".debug", true
2147	}
2148
2149	return section, true
2150}
2151
2152// accessorName returns the name of the accessor function for a BSS symbol
2153// named name.
2154func accessorName(name string) string {
2155	return name + "_bss_get"
2156}
2157
2158func (d *delocation) mapLocalSymbol(symbol string) string {
2159	if d.currentInput.index == 0 {
2160		return symbol
2161	}
2162	return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index)
2163}
2164
2165func detectProcessor(input inputFile) processorType {
2166	for statement := input.ast.up; statement != nil; statement = statement.next {
2167		node := skipNodes(statement.up, ruleWS)
2168		if node == nil || node.pegRule != ruleInstruction {
2169			continue
2170		}
2171
2172		instruction := node.up
2173		instructionName := input.contents[instruction.begin:instruction.end]
2174
2175		switch instructionName {
2176		case "movq", "call", "leaq":
2177			return x86_64
2178		case "addis", "addi", "mflr":
2179			return ppc64le
2180		case "str", "bl", "ldr", "st1":
2181			return aarch64
2182		}
2183	}
2184
2185	panic("processed entire input and didn't recognise any instructions.")
2186}
2187
2188func sortedSet(m map[string]struct{}) []string {
2189	ret := make([]string, 0, len(m))
2190	for key := range m {
2191		ret = append(ret, key)
2192	}
2193	sort.Strings(ret)
2194	return ret
2195}
2196