• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright (c) 2017, Google Inc.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15// delocate performs several transformations of textual assembly code. See
16// crypto/fipsmodule/FIPS.md for an overview.
17package main
18
19import (
20	"errors"
21	"flag"
22	"fmt"
23	"io/ioutil"
24	"os"
25	"sort"
26	"strconv"
27	"strings"
28
29	"boringssl.googlesource.com/boringssl/util/ar"
30	"boringssl.googlesource.com/boringssl/util/fipstools/fipscommon"
31)
32
33// inputFile represents a textual assembly file.
34type inputFile struct {
35	path string
36	// index is a unique identifer given to this file. It's used for
37	// mapping local symbols.
38	index int
39	// isArchive indicates that the input should be processed as an ar
40	// file.
41	isArchive bool
42	// contents contains the contents of the file.
43	contents string
44	// ast points to the head of the syntax tree.
45	ast *node32
46}
47
48type stringWriter interface {
49	WriteString(string) (int, error)
50}
51
52type processorType int
53
54const (
55	ppc64le processorType = iota + 1
56	x86_64
57)
58
59// delocation holds the state needed during a delocation operation.
60type delocation struct {
61	processor processorType
62	output    stringWriter
63
64	// symbols is the set of symbols defined in the module.
65	symbols map[string]struct{}
66	// localEntrySymbols is the set of symbols with .localentry directives.
67	localEntrySymbols map[string]struct{}
68	// redirectors maps from out-call symbol name to the name of a
69	// redirector function for that symbol. E.g. “memcpy” ->
70	// “bcm_redirector_memcpy”.
71	redirectors map[string]string
72	// bssAccessorsNeeded maps from a BSS symbol name to the symbol that
73	// should be used to reference it. E.g. “P384_data_storage” ->
74	// “P384_data_storage”.
75	bssAccessorsNeeded map[string]string
76	// tocLoaders is a set of symbol names for which TOC helper functions
77	// are required. (ppc64le only.)
78	tocLoaders map[string]struct{}
79	// gotExternalsNeeded is a set of symbol names for which we need
80	// “delta” symbols: symbols that contain the offset from their location
81	// to the memory in question.
82	gotExternalsNeeded map[string]struct{}
83
84	currentInput inputFile
85}
86
87func (d *delocation) contents(node *node32) string {
88	return d.currentInput.contents[node.begin:node.end]
89}
90
91// writeNode writes out an AST node.
92func (d *delocation) writeNode(node *node32) {
93	if _, err := d.output.WriteString(d.contents(node)); err != nil {
94		panic(err)
95	}
96}
97
98func (d *delocation) writeCommentedNode(node *node32) {
99	line := d.contents(node)
100	if _, err := d.output.WriteString("# WAS " + strings.TrimSpace(line) + "\n"); err != nil {
101		panic(err)
102	}
103}
104
105func locateError(err error, with *node32, in inputFile) error {
106	posMap := translatePositions([]rune(in.contents), []int{int(with.begin)})
107	var line int
108	for _, pos := range posMap {
109		line = pos.line
110	}
111
112	return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err)
113}
114
115func (d *delocation) processInput(input inputFile) (err error) {
116	d.currentInput = input
117
118	var origStatement *node32
119	defer func() {
120		if err := recover(); err != nil {
121			panic(locateError(fmt.Errorf("%s", err), origStatement, input))
122		}
123	}()
124
125	for statement := input.ast.up; statement != nil; statement = statement.next {
126		assertNodeType(statement, ruleStatement)
127		origStatement = statement
128
129		node := skipWS(statement.up)
130		if node == nil {
131			d.writeNode(statement)
132			continue
133		}
134
135		switch node.pegRule {
136		case ruleGlobalDirective, ruleComment, ruleLocationDirective:
137			d.writeNode(statement)
138		case ruleDirective:
139			statement, err = d.processDirective(statement, node.up)
140		case ruleLabelContainingDirective:
141			statement, err = d.processLabelContainingDirective(statement, node.up)
142		case ruleLabel:
143			statement, err = d.processLabel(statement, node.up)
144		case ruleInstruction:
145			switch d.processor {
146			case x86_64:
147				statement, err = d.processIntelInstruction(statement, node.up)
148			case ppc64le:
149				statement, err = d.processPPCInstruction(statement, node.up)
150			default:
151				panic("unknown processor")
152			}
153		default:
154			panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule]))
155		}
156
157		if err != nil {
158			return locateError(err, origStatement, input)
159		}
160	}
161
162	return nil
163}
164
165func (d *delocation) processDirective(statement, directive *node32) (*node32, error) {
166	assertNodeType(directive, ruleDirectiveName)
167	directiveName := d.contents(directive)
168
169	var args []string
170	forEachPath(directive, func(arg *node32) {
171		// If the argument is a quoted string, use the raw contents.
172		// (Note that this doesn't unescape the string, but that's not
173		// needed so far.
174		if arg.up != nil {
175			arg = arg.up
176			assertNodeType(arg, ruleQuotedArg)
177			if arg.up == nil {
178				args = append(args, "")
179				return
180			}
181			arg = arg.up
182			assertNodeType(arg, ruleQuotedText)
183		}
184		args = append(args, d.contents(arg))
185	}, ruleArgs, ruleArg)
186
187	switch directiveName {
188	case "comm", "lcomm":
189		if len(args) < 1 {
190			return nil, errors.New("comm directive has no arguments")
191		}
192		d.bssAccessorsNeeded[args[0]] = args[0]
193		d.writeNode(statement)
194
195	case "data":
196		// ASAN and some versions of MSAN are adding a .data section,
197		// and adding references to symbols within it to the code. We
198		// will have to work around this in the future.
199		return nil, errors.New(".data section found in module")
200
201	case "section":
202		section := args[0]
203
204		if section == ".data.rel.ro" {
205			// In a normal build, this is an indication of a
206			// problem but any references from the module to this
207			// section will result in a relocation and thus will
208			// break the integrity check. ASAN can generate these
209			// sections and so we will likely have to work around
210			// that in the future.
211			return nil, errors.New(".data.rel.ro section found in module")
212		}
213
214		sectionType, ok := sectionType(section)
215		if !ok {
216			// Unknown sections are permitted in order to be robust
217			// to different compiler modes.
218			d.writeNode(statement)
219			break
220		}
221
222		switch sectionType {
223		case ".rodata", ".text":
224			// Move .rodata to .text so it may be accessed without
225			// a relocation. GCC with -fmerge-constants will place
226			// strings into separate sections, so we move all
227			// sections named like .rodata. Also move .text.startup
228			// so the self-test function is also in the module.
229			d.writeCommentedNode(statement)
230			d.output.WriteString(".text\n")
231
232		case ".data":
233			// See above about .data
234			return nil, errors.New(".data section found in module")
235
236		case ".init_array", ".fini_array", ".ctors", ".dtors":
237			// init_array/ctors/dtors contains function
238			// pointers to constructor/destructor
239			// functions. These contain relocations, but
240			// they're in a different section anyway.
241			d.writeNode(statement)
242			break
243
244		case ".debug", ".note", ".toc":
245			d.writeNode(statement)
246			break
247
248		case ".bss":
249			d.writeNode(statement)
250			return d.handleBSS(statement)
251		}
252
253	default:
254		d.writeNode(statement)
255	}
256
257	return statement, nil
258}
259
260func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) {
261	// The symbols within directives need to be mapped so that local
262	// symbols in two different .s inputs don't collide.
263	changed := false
264	assertNodeType(directive, ruleLabelContainingDirectiveName)
265	name := d.contents(directive)
266
267	node := directive.next
268	assertNodeType(node, ruleWS)
269
270	node = node.next
271	assertNodeType(node, ruleSymbolArgs)
272
273	var args []string
274	for node = skipWS(node.up); node != nil; node = skipWS(node.next) {
275		assertNodeType(node, ruleSymbolArg)
276		arg := node.up
277		var mapped string
278
279		for term := arg; term != nil; term = term.next {
280			if term.pegRule != ruleLocalSymbol {
281				mapped += d.contents(term)
282				continue
283			}
284
285			oldSymbol := d.contents(term)
286			newSymbol := d.mapLocalSymbol(oldSymbol)
287			if newSymbol != oldSymbol {
288				changed = true
289			}
290
291			mapped += newSymbol
292		}
293
294		args = append(args, mapped)
295	}
296
297	if !changed {
298		d.writeNode(statement)
299	} else {
300		d.writeCommentedNode(statement)
301		d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n")
302	}
303
304	if name == ".localentry" {
305		d.output.WriteString(localEntryName(args[0]) + ":\n")
306	}
307
308	return statement, nil
309}
310
311func (d *delocation) processLabel(statement, label *node32) (*node32, error) {
312	symbol := d.contents(label)
313
314	switch label.pegRule {
315	case ruleLocalLabel:
316		d.output.WriteString(symbol + ":\n")
317	case ruleLocalSymbol:
318		// symbols need to be mapped so that local symbols from two
319		// different .s inputs don't collide.
320		d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n")
321	case ruleSymbolName:
322		d.output.WriteString(localTargetName(symbol) + ":\n")
323		d.writeNode(statement)
324	default:
325		return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule])
326	}
327
328	return statement, nil
329}
330
331// instructionArgs collects all the arguments to an instruction.
332func instructionArgs(node *node32) (argNodes []*node32) {
333	for node = skipWS(node); node != nil; node = skipWS(node.next) {
334		assertNodeType(node, ruleInstructionArg)
335		argNodes = append(argNodes, node.up)
336	}
337
338	return argNodes
339}
340
341/* ppc64le
342
343[PABI]: “64-Bit ELF V2 ABI Specification. Power Architecture.” March 21st,
344        2017
345
346(Also useful is “Power ISA Version 2.07 B”. Note that version three of that
347document is /not/ good as that's POWER9 specific.)
348
349ppc64le doesn't have IP-relative addressing and does a lot to work around this.
350Rather than reference a PLT and GOT direction, it has a single structure called
351the TOC (Table Of Contents). Within the TOC is the contents of .rodata, .data,
352.got, .plt, .bss, etc sections [PABI;3.3].
353
354A pointer to the TOC is maintained in r2 and the following pattern is used to
355load the address of an element into a register:
356
357  addis <address register>, 2, foo@toc@ha
358  addi <address register>, <address register>, foo@toc@l
359
360The “addis” instruction shifts a signed constant left 16 bits and adds the
361result to its second argument, saving the result in the first argument. The
362“addi” instruction does the same, but without shifting. Thus the “@toc@ha"
363suffix on a symbol means “the top 16 bits of the TOC offset” and “@toc@l” means
364“the bottom 16 bits of the offset”. However, note that both values are signed,
365thus offsets in the top half of a 64KB chunk will have an @ha value that's one
366greater than expected and a negative @l value.
367
368The TOC is specific to a “module” (basically an executable or shared object).
369This means that there's not a single TOC in a process and that r2 needs to
370change as control moves between modules. Thus functions have two entry points:
371the “global” entry point and the “local” entry point. Jumps from within the
372same module can use the local entry while jumps from other modules must use the
373global entry. The global entry establishes the correct value of r2 before
374running the function and the local entry skips that code.
375
376The global entry point for a function is defined by its label. The local entry
377is a power-of-two number of bytes from the global entry, set by the
378“.localentry” directive. (ppc64le instructions are always 32 bits, so an offset
379of 1 or 2 bytes is treated as an offset of zero.)
380
381In order to help the global entry code set r2 to point to the local TOC, r12 is
382set to the address of the global entry point when called [PABI;2.2.1.1]. Thus
383the global entry will typically use an addis+addi pair to add a known offset to
384r12 and store it in r2. For example:
385
386foo:
387  addis 2, 12, .TOC. - foo@ha
388  addi  2, 2,  .TOC. - foo@l
389
390(It's worth noting that the '@' operator binds very loosely, so the 3rd
391arguments parse as (.TOC. - foo)@ha and (.TOC. - foo)@l.)
392
393When calling a function, the compiler doesn't know whether that function is in
394the same module or not. Thus it doesn't know whether r12 needs to be set nor
395whether r2 will be clobbered on return. Rather than always assume the worst,
396the linker fixes stuff up once it knows that a call is going out of module:
397
398Firstly, calling, say, memcpy (which we assume to be in a different module)
399won't actually jump directly to memcpy, or even a PLT resolution function.
400It'll call a synthesised function that:
401  a) saves r2 in the caller's stack frame
402  b) loads the address of memcpy@PLT into r12
403  c) jumps to r12.
404
405As this synthesised function loads memcpy@PLT, a call to memcpy from the
406compiled code just references “memcpy” directly, not “memcpy@PLT”.
407
408Since it jumps directly to memcpy@PLT, it can't restore r2 on return. Thus
409calls must be followed by a nop. If the call ends up going out-of-module, the
410linker will rewrite that nop to load r2 from the stack.
411
412Speaking of the stack, the stack pointer is kept in r1 and there's a 288-byte
413red-zone. The format of the stack frame is defined [PABI;2.2.2] and must be
414followed as called functions will write into their parent's stack frame. For
415example, the synthesised out-of-module trampolines will save r2 24 bytes into
416the caller's frame and all non-leaf functions save the return address 16 bytes
417into the caller's frame.
418
419A final point worth noting: some RISC ISAs have r0 wired to zero: all reads
420result in zero and all writes are discarded. POWER does something a little like
421that, but r0 is only special in certain argument positions for certain
422instructions. You just have to read the manual to know which they are.
423
424
425Delocation is easier than Intel because there's just TOC references, but it's
426also harder because there's no IP-relative addressing.
427
428Jumps are IP-relative however, and have a 24-bit immediate value. So we can
429jump to functions that set a register to the needed value. (r3 is the
430return-value register and so that's what is generally used here.) */
431
432// isPPC64LEAPair recognises an addis+addi pair that's adding the offset of
433// source to relative and writing the result to target.
434func (d *delocation) isPPC64LEAPair(statement *node32) (target, source, relative string, ok bool) {
435	instruction := skipWS(statement.up).up
436	assertNodeType(instruction, ruleInstructionName)
437	name1 := d.contents(instruction)
438	args1 := instructionArgs(instruction.next)
439
440	statement = statement.next
441	instruction = skipWS(statement.up).up
442	assertNodeType(instruction, ruleInstructionName)
443	name2 := d.contents(instruction)
444	args2 := instructionArgs(instruction.next)
445
446	if name1 != "addis" ||
447		len(args1) != 3 ||
448		name2 != "addi" ||
449		len(args2) != 3 {
450		return "", "", "", false
451	}
452
453	target = d.contents(args1[0])
454	relative = d.contents(args1[1])
455	source1 := d.contents(args1[2])
456	source2 := d.contents(args2[2])
457
458	if !strings.HasSuffix(source1, "@ha") ||
459		!strings.HasSuffix(source2, "@l") ||
460		source1[:len(source1)-3] != source2[:len(source2)-2] ||
461		d.contents(args2[0]) != target ||
462		d.contents(args2[1]) != target {
463		return "", "", "", false
464	}
465
466	source = source1[:len(source1)-3]
467	ok = true
468	return
469}
470
471// establishTOC writes the global entry prelude for a function. The standard
472// prelude involves relocations so this version moves the relocation outside
473// the integrity-checked area.
474func establishTOC(w stringWriter) {
475	w.WriteString("999:\n")
476	w.WriteString("\taddis 2, 12, .LBORINGSSL_external_toc-999b@ha\n")
477	w.WriteString("\taddi 2, 2, .LBORINGSSL_external_toc-999b@l\n")
478	w.WriteString("\tld 12, 0(2)\n")
479	w.WriteString("\tadd 2, 2, 12\n")
480}
481
482// loadTOCFuncName returns the name of a synthesized function that sets r3 to
483// the value of “symbol+offset”.
484func loadTOCFuncName(symbol, offset string) string {
485	symbol = strings.Replace(symbol, ".", "_dot_", -1)
486	ret := ".Lbcm_loadtoc_" + symbol
487	if len(offset) != 0 {
488		offset = strings.Replace(offset, "+", "_plus_", -1)
489		offset = strings.Replace(offset, "-", "_minus_", -1)
490		ret += "_" + offset
491	}
492	return ret
493}
494
495func (d *delocation) loadFromTOC(w stringWriter, symbol, offset, dest string) wrapperFunc {
496	d.tocLoaders[symbol+"\x00"+offset] = struct{}{}
497
498	return func(k func()) {
499		w.WriteString("\taddi 1, 1, -288\n")   // Clear the red zone.
500		w.WriteString("\tmflr " + dest + "\n") // Stash the link register.
501		w.WriteString("\tstd " + dest + ", -8(1)\n")
502		// The TOC loader will use r3, so stash it if necessary.
503		if dest != "3" {
504			w.WriteString("\tstd 3, -16(1)\n")
505		}
506
507		// Because loadTOCFuncName returns a “.L” name, we don't need a
508		// nop after this call.
509		w.WriteString("\tbl " + loadTOCFuncName(symbol, offset) + "\n")
510
511		// Cycle registers around. We need r3 -> destReg, -8(1) ->
512		// lr and, optionally, -16(1) -> r3.
513		w.WriteString("\tstd 3, -24(1)\n")
514		w.WriteString("\tld 3, -8(1)\n")
515		w.WriteString("\tmtlr 3\n")
516		w.WriteString("\tld " + dest + ", -24(1)\n")
517		if dest != "3" {
518			w.WriteString("\tld 3, -16(1)\n")
519		}
520		w.WriteString("\taddi 1, 1, 288\n")
521
522		k()
523	}
524}
525
526func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) {
527	for symRef != nil && symRef.pegRule == ruleOffset {
528		offset := d.contents(symRef)
529		if offset[0] != '+' && offset[0] != '-' {
530			offset = "+" + offset
531		}
532		offsets = offsets + offset
533		symRef = symRef.next
534	}
535	return symRef, offsets
536}
537
538func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) {
539	if memRef.pegRule != ruleSymbolRef {
540		return "", "", "", false, false, memRef
541	}
542
543	symRef := memRef.up
544	nextRef = memRef.next
545
546	// (Offset* '+')?
547	symRef, offset = d.gatherOffsets(symRef, offset)
548
549	// (LocalSymbol / SymbolName)
550	symbol = d.contents(symRef)
551	if symRef.pegRule == ruleLocalSymbol {
552		symbolIsLocal = true
553		mapped := d.mapLocalSymbol(symbol)
554		if mapped != symbol {
555			symbol = mapped
556			didChange = true
557		}
558	}
559	symRef = symRef.next
560
561	// Offset*
562	symRef, offset = d.gatherOffsets(symRef, offset)
563
564	// ('@' Section / Offset*)?
565	if symRef != nil {
566		assertNodeType(symRef, ruleSection)
567		section = d.contents(symRef)
568		symRef = symRef.next
569
570		symRef, offset = d.gatherOffsets(symRef, offset)
571	}
572
573	if symRef != nil {
574		panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule]))
575	}
576
577	return
578}
579
580func (d *delocation) processPPCInstruction(statement, instruction *node32) (*node32, error) {
581	assertNodeType(instruction, ruleInstructionName)
582	instructionName := d.contents(instruction)
583	isBranch := instructionName[0] == 'b'
584
585	argNodes := instructionArgs(instruction.next)
586
587	var wrappers wrapperStack
588	var args []string
589	changed := false
590
591Args:
592	for i, arg := range argNodes {
593		fullArg := arg
594		isIndirect := false
595
596		if arg.pegRule == ruleIndirectionIndicator {
597			arg = arg.next
598			isIndirect = true
599		}
600
601		switch arg.pegRule {
602		case ruleRegisterOrConstant, ruleLocalLabelRef:
603			args = append(args, d.contents(fullArg))
604
605		case ruleTOCRefLow:
606			return nil, errors.New("Found low TOC reference outside preamble pattern")
607
608		case ruleTOCRefHigh:
609			target, _, relative, ok := d.isPPC64LEAPair(statement)
610			if !ok {
611				return nil, errors.New("Found high TOC reference outside preamble pattern")
612			}
613
614			if relative != "12" {
615				return nil, fmt.Errorf("preamble is relative to %q, not r12", relative)
616			}
617
618			if target != "2" {
619				return nil, fmt.Errorf("preamble is setting %q, not r2", target)
620			}
621
622			statement = statement.next
623			establishTOC(d.output)
624			instructionName = ""
625			changed = true
626			break Args
627
628		case ruleMemoryRef:
629			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
630			changed = didChange
631
632			if len(symbol) > 0 {
633				if _, localEntrySymbol := d.localEntrySymbols[symbol]; localEntrySymbol && isBranch {
634					symbol = localEntryName(symbol)
635					changed = true
636				} else if _, knownSymbol := d.symbols[symbol]; knownSymbol {
637					symbol = localTargetName(symbol)
638					changed = true
639				} else if !symbolIsLocal && !isSynthesized(symbol) && len(section) == 0 {
640					changed = true
641					d.redirectors[symbol] = redirectorName(symbol)
642					symbol = redirectorName(symbol)
643					// TODO(davidben): This should sanity-check the next
644					// instruction is a nop and ideally remove it.
645					wrappers = append(wrappers, func(k func()) {
646						k()
647						// Like the linker's PLT stubs, redirector functions
648						// expect callers to restore r2.
649						d.output.WriteString("\tld 2, 24(1)\n")
650					})
651				}
652			}
653
654			switch section {
655			case "":
656
657			case "tls":
658				// This section identifier just tells the
659				// assembler to use r13, the pointer to the
660				// thread-local data [PABI;3.7.3.3].
661
662			case "toc@ha":
663				// Delete toc@ha instructions. Per
664				// [PABI;3.6.3], the linker is allowed to erase
665				// toc@ha instructions. We take advantage of
666				// this by unconditionally erasing the toc@ha
667				// instructions and doing the full lookup when
668				// processing toc@l.
669				//
670				// Note that any offset here applies before @ha
671				// and @l. That is, 42+foo@toc@ha is
672				// #ha(42+foo-.TOC.), not 42+#ha(foo-.TOC.). Any
673				// corresponding toc@l references are required
674				// by the ABI to have the same offset. The
675				// offset will be incorporated in full when
676				// those are processed.
677				if instructionName != "addis" || len(argNodes) != 3 || i != 2 || args[1] != "2" {
678					return nil, errors.New("can't process toc@ha reference")
679				}
680				changed = true
681				instructionName = ""
682				break Args
683
684			case "toc@l":
685				// Per [PAB;3.6.3], this instruction must take
686				// as input a register which was the output of
687				// a toc@ha computation and compute the actual
688				// address of some symbol. The toc@ha
689				// computation was elided, so we ignore that
690				// input register and compute the address
691				// directly.
692				changed = true
693
694				// For all supported toc@l instructions, the
695				// destination register is the first argument.
696				destReg := args[0]
697
698				wrappers = append(wrappers, d.loadFromTOC(d.output, symbol, offset, destReg))
699				switch instructionName {
700				case "addi":
701					// The original instruction was:
702					//   addi destReg, tocHaReg, offset+symbol@toc@l
703					instructionName = ""
704
705				case "ld", "lhz", "lwz":
706					// The original instruction was:
707					//   l?? destReg, offset+symbol@toc@l(tocHaReg)
708					//
709					// We transform that into the
710					// equivalent dereference of destReg:
711					//   l?? destReg, 0(destReg)
712					origInstructionName := instructionName
713					instructionName = ""
714
715					assertNodeType(memRef, ruleBaseIndexScale)
716					assertNodeType(memRef.up, ruleRegisterOrConstant)
717					if memRef.next != nil || memRef.up.next != nil {
718						return nil, errors.New("expected single register in BaseIndexScale for ld argument")
719					}
720
721					baseReg := destReg
722					if baseReg == "0" {
723						// Register zero is special as the base register for a load.
724						// Avoid it by spilling and using r3 instead.
725						baseReg = "3"
726						wrappers = append(wrappers, func(k func()) {
727							d.output.WriteString("\taddi 1, 1, -288\n") // Clear the red zone.
728							d.output.WriteString("\tstd " + baseReg + ", -8(1)\n")
729							d.output.WriteString("\tmr " + baseReg + ", " + destReg + "\n")
730							k()
731							d.output.WriteString("\tld " + baseReg + ", -8(1)\n")
732							d.output.WriteString("\taddi 1, 1, 288\n") // Clear the red zone.
733						})
734					}
735
736					wrappers = append(wrappers, func(k func()) {
737						d.output.WriteString("\t" + origInstructionName + " " + destReg + ", 0(" + baseReg + ")\n")
738					})
739				default:
740					return nil, fmt.Errorf("can't process TOC argument to %q", instructionName)
741				}
742
743			default:
744				return nil, fmt.Errorf("Unknown section type %q", section)
745			}
746
747			argStr := ""
748			if isIndirect {
749				argStr += "*"
750			}
751			argStr += symbol
752			if len(offset) > 0 {
753				argStr += offset
754			}
755			if len(section) > 0 {
756				argStr += "@"
757				argStr += section
758			}
759
760			for ; memRef != nil; memRef = memRef.next {
761				argStr += d.contents(memRef)
762			}
763
764			args = append(args, argStr)
765
766		default:
767			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
768		}
769	}
770
771	if changed {
772		d.writeCommentedNode(statement)
773
774		var replacement string
775		if len(instructionName) > 0 {
776			replacement = "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
777		}
778
779		wrappers.do(func() {
780			d.output.WriteString(replacement)
781		})
782	} else {
783		d.writeNode(statement)
784	}
785
786	return statement, nil
787}
788
789/* Intel */
790
791type instructionType int
792
793const (
794	instrPush instructionType = iota
795	instrMove
796	// instrTransformingMove is essentially a move, but it performs some
797	// transformation of the data during the process.
798	instrTransformingMove
799	instrJump
800	instrConditionalMove
801	// instrCombine merges the source and destination in some fashion, for example
802	// a 2-operand bitwise operation.
803	instrCombine
804	// instrThreeArg merges two sources into a destination in some fashion.
805	instrThreeArg
806	instrOther
807)
808
809func classifyInstruction(instr string, args []*node32) instructionType {
810	switch instr {
811	case "push", "pushq":
812		if len(args) == 1 {
813			return instrPush
814		}
815
816	case "mov", "movq", "vmovq", "movsd", "vmovsd":
817		if len(args) == 2 {
818			return instrMove
819		}
820
821	case "cmovneq", "cmoveq":
822		if len(args) == 2 {
823			return instrConditionalMove
824		}
825
826	case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo":
827		if len(args) == 1 {
828			return instrJump
829		}
830
831	case "orq", "andq", "xorq":
832		if len(args) == 2 {
833			return instrCombine
834		}
835
836	case "sarxq", "shlxq", "shrxq":
837		if len(args) == 3 {
838			return instrThreeArg
839		}
840
841	case "vpbroadcastq":
842		if len(args) == 2 {
843			return instrTransformingMove
844		}
845	}
846
847	return instrOther
848}
849
850func push(w stringWriter) wrapperFunc {
851	return func(k func()) {
852		w.WriteString("\tpushq %rax\n")
853		k()
854		w.WriteString("\txchg %rax, (%rsp)\n")
855	}
856}
857
858func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc {
859	d.gotExternalsNeeded[symbol+"@"+section] = struct{}{}
860
861	return func(k func()) {
862		if !redzoneCleared {
863			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
864		}
865		w.WriteString("\tpushf\n")
866		w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination))
867		w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination))
868		w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination))
869		w.WriteString("\tpopf\n")
870		if !redzoneCleared {
871			w.WriteString("\tleaq\t128(%rsp), %rsp\n")
872		}
873	}
874}
875
876func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc {
877	return func(k func()) {
878		if !redzoneCleared {
879			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
880			defer w.WriteString("\tleaq 128(%rsp), %rsp\n")
881		}
882		w.WriteString("\tpushfq\n")
883		k()
884		w.WriteString("\tpopfq\n")
885	}
886}
887
888func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) {
889	candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"}
890
891	var reg string
892NextCandidate:
893	for _, candidate := range candidates {
894		for _, avoid := range avoidRegs {
895			if candidate == avoid {
896				continue NextCandidate
897			}
898		}
899
900		reg = candidate
901		break
902	}
903
904	if len(reg) == 0 {
905		panic("too many excluded registers")
906	}
907
908	return func(k func()) {
909		w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
910		w.WriteString("\tpushq " + reg + "\n")
911		k()
912		w.WriteString("\tpopq " + reg + "\n")
913		w.WriteString("\tleaq 128(%rsp), %rsp\n")
914	}, reg
915}
916
917func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc {
918	return func(k func()) {
919		k()
920		prefix := ""
921		if isAVX {
922			prefix = "v"
923		}
924		w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n")
925	}
926}
927
928func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc {
929	return func(k func()) {
930		k()
931		w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n")
932	}
933}
934
935func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
936	return func(k func()) {
937		k()
938		w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n")
939	}
940}
941
942func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc {
943	return func(k func()) {
944		k()
945		w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n")
946	}
947}
948
949func isValidLEATarget(reg string) bool {
950	return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm")
951}
952
953func undoConditionalMove(w stringWriter, instr string) wrapperFunc {
954	var invertedCondition string
955
956	switch instr {
957	case "cmoveq":
958		invertedCondition = "ne"
959	case "cmovneq":
960		invertedCondition = "e"
961	default:
962		panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr))
963	}
964
965	return func(k func()) {
966		w.WriteString("\tj" + invertedCondition + " 999f\n")
967		k()
968		w.WriteString("999:\n")
969	}
970}
971
972func (d *delocation) isRIPRelative(node *node32) bool {
973	return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)"
974}
975
976func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) {
977	assertNodeType(instruction, ruleInstructionName)
978	instructionName := d.contents(instruction)
979
980	argNodes := instructionArgs(instruction.next)
981
982	var wrappers wrapperStack
983	var args []string
984	changed := false
985
986Args:
987	for i, arg := range argNodes {
988		fullArg := arg
989		isIndirect := false
990
991		if arg.pegRule == ruleIndirectionIndicator {
992			arg = arg.next
993			isIndirect = true
994		}
995
996		switch arg.pegRule {
997		case ruleRegisterOrConstant, ruleLocalLabelRef:
998			args = append(args, d.contents(fullArg))
999
1000		case ruleMemoryRef:
1001			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
1002			changed = didChange
1003
1004			if symbol == "OPENSSL_ia32cap_P" && section == "" {
1005				if instructionName != "leaq" {
1006					return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName)
1007				}
1008
1009				if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 {
1010					return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName)
1011				}
1012
1013				target := argNodes[1]
1014				assertNodeType(target, ruleRegisterOrConstant)
1015				reg := d.contents(target)
1016
1017				if !strings.HasPrefix(reg, "%r") {
1018					return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg)
1019				}
1020
1021				changed = true
1022
1023				// Flag-altering instructions (i.e. addq) are going to be used so the
1024				// flags need to be preserved.
1025				wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */))
1026
1027				wrappers = append(wrappers, func(k func()) {
1028					d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n")
1029					d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n")
1030				})
1031
1032				break Args
1033			}
1034
1035			switch section {
1036			case "":
1037				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1038					symbol = localTargetName(symbol)
1039					changed = true
1040				}
1041
1042			case "PLT":
1043				if classifyInstruction(instructionName, argNodes) != instrJump {
1044					return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName)
1045				}
1046
1047				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1048					symbol = localTargetName(symbol)
1049					changed = true
1050				} else if !symbolIsLocal && !isSynthesized(symbol) {
1051					// Unknown symbol via PLT is an
1052					// out-call from the module, e.g.
1053					// memcpy.
1054					d.redirectors[symbol+"@"+section] = redirectorName(symbol)
1055					symbol = redirectorName(symbol)
1056				}
1057
1058				changed = true
1059
1060			case "GOTPCREL":
1061				if len(offset) > 0 {
1062					return nil, errors.New("loading from GOT with offset is unsupported")
1063				}
1064				if !d.isRIPRelative(memRef) {
1065					return nil, errors.New("GOT access must be IP-relative")
1066				}
1067
1068				useGOT := false
1069				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1070					symbol = localTargetName(symbol)
1071					changed = true
1072				} else if !isSynthesized(symbol) {
1073					useGOT = true
1074				}
1075
1076				classification := classifyInstruction(instructionName, argNodes)
1077				if classification != instrThreeArg && i != 0 {
1078					return nil, errors.New("GOT access must be source operand")
1079				}
1080
1081				// Reduce the instruction to movq symbol@GOTPCREL, targetReg.
1082				var targetReg string
1083				var redzoneCleared bool
1084				switch classification {
1085				case instrPush:
1086					wrappers = append(wrappers, push(d.output))
1087					targetReg = "%rax"
1088				case instrConditionalMove:
1089					wrappers = append(wrappers, undoConditionalMove(d.output, instructionName))
1090					fallthrough
1091				case instrMove:
1092					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1093					targetReg = d.contents(argNodes[1])
1094				case instrTransformingMove:
1095					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1096					targetReg = d.contents(argNodes[1])
1097					wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg))
1098					if isValidLEATarget(targetReg) {
1099						return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.")
1100					}
1101				case instrCombine:
1102					targetReg = d.contents(argNodes[1])
1103					if !isValidLEATarget(targetReg) {
1104						return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers")
1105					}
1106					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg})
1107					redzoneCleared = true
1108					wrappers = append(wrappers, saveRegWrapper)
1109
1110					wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg))
1111					targetReg = tempReg
1112				case instrThreeArg:
1113					if n := len(argNodes); n != 3 {
1114						return nil, fmt.Errorf("three-argument instruction has %d arguments", n)
1115					}
1116					if i != 0 && i != 1 {
1117						return nil, errors.New("GOT access must be from soure operand")
1118					}
1119					targetReg = d.contents(argNodes[2])
1120
1121					otherSource := d.contents(argNodes[1])
1122					if i == 1 {
1123						otherSource = d.contents(argNodes[0])
1124					}
1125
1126					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource})
1127					redzoneCleared = true
1128					wrappers = append(wrappers, saveRegWrapper)
1129
1130					if i == 0 {
1131						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg))
1132					} else {
1133						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg))
1134					}
1135					targetReg = tempReg
1136				default:
1137					return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName)
1138				}
1139
1140				if !isValidLEATarget(targetReg) {
1141					// Sometimes the compiler will load from the GOT to an
1142					// XMM register, which is not a valid target of an LEA
1143					// instruction.
1144					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1145					wrappers = append(wrappers, saveRegWrapper)
1146					isAVX := strings.HasPrefix(instructionName, "v")
1147					wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg))
1148					targetReg = tempReg
1149					if redzoneCleared {
1150						return nil, fmt.Errorf("internal error: Red Zone was already cleared")
1151					}
1152					redzoneCleared = true
1153				}
1154
1155				if symbol == "OPENSSL_ia32cap_P" {
1156					// Flag-altering instructions (i.e. addq) are going to be used so the
1157					// flags need to be preserved.
1158					wrappers = append(wrappers, saveFlags(d.output, redzoneCleared))
1159					wrappers = append(wrappers, func(k func()) {
1160						d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n")
1161						d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n")
1162					})
1163				} else if useGOT {
1164					wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared))
1165				} else {
1166					wrappers = append(wrappers, func(k func()) {
1167						d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg))
1168					})
1169				}
1170				changed = true
1171				break Args
1172
1173			default:
1174				return nil, fmt.Errorf("Unknown section type %q", section)
1175			}
1176
1177			if !changed && len(section) > 0 {
1178				panic("section was not handled")
1179			}
1180			section = ""
1181
1182			argStr := ""
1183			if isIndirect {
1184				argStr += "*"
1185			}
1186			argStr += symbol
1187			argStr += offset
1188
1189			for ; memRef != nil; memRef = memRef.next {
1190				argStr += d.contents(memRef)
1191			}
1192
1193			args = append(args, argStr)
1194
1195		default:
1196			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
1197		}
1198	}
1199
1200	if changed {
1201		d.writeCommentedNode(statement)
1202		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
1203		wrappers.do(func() {
1204			d.output.WriteString(replacement)
1205		})
1206	} else {
1207		d.writeNode(statement)
1208	}
1209
1210	return statement, nil
1211}
1212
1213func (d *delocation) handleBSS(statement *node32) (*node32, error) {
1214	lastStatement := statement
1215	for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next {
1216		node := skipWS(statement.up)
1217		if node == nil {
1218			d.writeNode(statement)
1219			continue
1220		}
1221
1222		switch node.pegRule {
1223		case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective:
1224			d.writeNode(statement)
1225
1226		case ruleDirective:
1227			directive := node.up
1228			assertNodeType(directive, ruleDirectiveName)
1229			directiveName := d.contents(directive)
1230			if directiveName == "text" || directiveName == "section" || directiveName == "data" {
1231				return lastStatement, nil
1232			}
1233			d.writeNode(statement)
1234
1235		case ruleLabel:
1236			label := node.up
1237			d.writeNode(statement)
1238
1239			if label.pegRule != ruleLocalSymbol {
1240				symbol := d.contents(label)
1241				localSymbol := localTargetName(symbol)
1242				d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol))
1243
1244				d.bssAccessorsNeeded[symbol] = localSymbol
1245			}
1246
1247		case ruleLabelContainingDirective:
1248			var err error
1249			statement, err = d.processLabelContainingDirective(statement, node.up)
1250			if err != nil {
1251				return nil, err
1252			}
1253
1254		default:
1255			return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement))
1256		}
1257	}
1258
1259	return lastStatement, nil
1260}
1261
1262func transform(w stringWriter, inputs []inputFile) error {
1263	// symbols contains all defined symbols.
1264	symbols := make(map[string]struct{})
1265	// localEntrySymbols contains all symbols with a .localentry directive.
1266	localEntrySymbols := make(map[string]struct{})
1267	// fileNumbers is the set of IDs seen in .file directives.
1268	fileNumbers := make(map[int]struct{})
1269	// maxObservedFileNumber contains the largest seen file number in a
1270	// .file directive. Zero is not a valid number.
1271	maxObservedFileNumber := 0
1272
1273	// OPENSSL_ia32cap_get will be synthesized by this script.
1274	symbols["OPENSSL_ia32cap_get"] = struct{}{}
1275
1276	for _, input := range inputs {
1277		forEachPath(input.ast.up, func(node *node32) {
1278			symbol := input.contents[node.begin:node.end]
1279			if _, ok := symbols[symbol]; ok {
1280				panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path))
1281			}
1282			symbols[symbol] = struct{}{}
1283		}, ruleStatement, ruleLabel, ruleSymbolName)
1284
1285		forEachPath(input.ast.up, func(node *node32) {
1286			node = node.up
1287			assertNodeType(node, ruleLabelContainingDirectiveName)
1288			directive := input.contents[node.begin:node.end]
1289			if directive != ".localentry" {
1290				return
1291			}
1292			// Extract the first argument.
1293			node = skipWS(node.next)
1294			assertNodeType(node, ruleSymbolArgs)
1295			node = node.up
1296			assertNodeType(node, ruleSymbolArg)
1297			symbol := input.contents[node.begin:node.end]
1298			if _, ok := localEntrySymbols[symbol]; ok {
1299				panic(fmt.Sprintf("Duplicate .localentry directive found: %q in %q", symbol, input.path))
1300			}
1301			localEntrySymbols[symbol] = struct{}{}
1302		}, ruleStatement, ruleLabelContainingDirective)
1303
1304		forEachPath(input.ast.up, func(node *node32) {
1305			assertNodeType(node, ruleLocationDirective)
1306			directive := input.contents[node.begin:node.end]
1307			if !strings.HasPrefix(directive, ".file") {
1308				return
1309			}
1310			parts := strings.Fields(directive)
1311			if len(parts) == 2 {
1312				// This is a .file directive with just a
1313				// filename. Clang appears to generate just one
1314				// of these at the beginning of the output for
1315				// the compilation unit. Ignore it.
1316				return
1317			}
1318			fileNo, err := strconv.Atoi(parts[1])
1319			if err != nil {
1320				panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive))
1321			}
1322
1323			if _, ok := fileNumbers[fileNo]; ok {
1324				panic(fmt.Sprintf("Duplicate file number %d observed", fileNo))
1325			}
1326			fileNumbers[fileNo] = struct{}{}
1327
1328			if fileNo > maxObservedFileNumber {
1329				maxObservedFileNumber = fileNo
1330			}
1331		}, ruleStatement, ruleLocationDirective)
1332	}
1333
1334	processor := x86_64
1335	if len(inputs) > 0 {
1336		processor = detectProcessor(inputs[0])
1337	}
1338
1339	d := &delocation{
1340		symbols:            symbols,
1341		localEntrySymbols:  localEntrySymbols,
1342		processor:          processor,
1343		output:             w,
1344		redirectors:        make(map[string]string),
1345		bssAccessorsNeeded: make(map[string]string),
1346		tocLoaders:         make(map[string]struct{}),
1347		gotExternalsNeeded: make(map[string]struct{}),
1348	}
1349
1350	w.WriteString(".text\n")
1351	w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"\n", maxObservedFileNumber+1))
1352	w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1))
1353	w.WriteString("BORINGSSL_bcm_text_start:\n")
1354
1355	for _, input := range inputs {
1356		if err := d.processInput(input); err != nil {
1357			return err
1358		}
1359	}
1360
1361	w.WriteString(".text\n")
1362	w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1))
1363	w.WriteString("BORINGSSL_bcm_text_end:\n")
1364
1365	// Emit redirector functions. Each is a single jump instruction.
1366	var redirectorNames []string
1367	for name := range d.redirectors {
1368		redirectorNames = append(redirectorNames, name)
1369	}
1370	sort.Strings(redirectorNames)
1371
1372	for _, name := range redirectorNames {
1373		redirector := d.redirectors[name]
1374		if d.processor == ppc64le {
1375			w.WriteString(".section \".toc\", \"aw\"\n")
1376			w.WriteString(".Lredirector_toc_" + name + ":\n")
1377			w.WriteString(".quad " + name + "\n")
1378			w.WriteString(".text\n")
1379			w.WriteString(".type " + redirector + ", @function\n")
1380			w.WriteString(redirector + ":\n")
1381			// |name| will clobber r2, so save it. This is matched by a restore in
1382			// redirector calls.
1383			w.WriteString("\tstd 2, 24(1)\n")
1384			// Load and call |name|'s global entry point.
1385			w.WriteString("\taddis 12, 2, .Lredirector_toc_" + name + "@toc@ha\n")
1386			w.WriteString("\tld 12, .Lredirector_toc_" + name + "@toc@l(12)\n")
1387			w.WriteString("\tmtctr 12\n")
1388			w.WriteString("\tbctr\n")
1389		} else {
1390			w.WriteString(".type " + redirector + ", @function\n")
1391			w.WriteString(redirector + ":\n")
1392			w.WriteString("\tjmp\t" + name + "\n")
1393		}
1394	}
1395
1396	var accessorNames []string
1397	for accessor := range d.bssAccessorsNeeded {
1398		accessorNames = append(accessorNames, accessor)
1399	}
1400	sort.Strings(accessorNames)
1401
1402	// Emit BSS accessor functions. Each is a single LEA followed by RET.
1403	for _, name := range accessorNames {
1404		funcName := accessorName(name)
1405		w.WriteString(".type " + funcName + ", @function\n")
1406		w.WriteString(funcName + ":\n")
1407		target := d.bssAccessorsNeeded[name]
1408
1409		if d.processor == ppc64le {
1410			w.WriteString("\taddis 3, 2, " + target + "@toc@ha\n")
1411			w.WriteString("\taddi 3, 3, " + target + "@toc@l\n")
1412			w.WriteString("\tblr\n")
1413		} else {
1414			w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n")
1415		}
1416	}
1417
1418	if d.processor == ppc64le {
1419		loadTOCNames := sortedSet(d.tocLoaders)
1420		for _, symbolAndOffset := range loadTOCNames {
1421			parts := strings.SplitN(symbolAndOffset, "\x00", 2)
1422			symbol, offset := parts[0], parts[1]
1423
1424			funcName := loadTOCFuncName(symbol, offset)
1425			ref := symbol + offset
1426
1427			w.WriteString(".type " + funcName[2:] + ", @function\n")
1428			w.WriteString(funcName[2:] + ":\n")
1429			w.WriteString(funcName + ":\n")
1430			w.WriteString("\taddis 3, 2, " + ref + "@toc@ha\n")
1431			w.WriteString("\taddi 3, 3, " + ref + "@toc@l\n")
1432			w.WriteString("\tblr\n")
1433		}
1434
1435		w.WriteString(".LBORINGSSL_external_toc:\n")
1436		w.WriteString(".quad .TOC.-.LBORINGSSL_external_toc\n")
1437	} else {
1438		externalNames := sortedSet(d.gotExternalsNeeded)
1439		for _, name := range externalNames {
1440			parts := strings.SplitN(name, "@", 2)
1441			symbol, section := parts[0], parts[1]
1442			w.WriteString(".type " + symbol + "_" + section + "_external, @object\n")
1443			w.WriteString(".size " + symbol + "_" + section + "_external, 8\n")
1444			w.WriteString(symbol + "_" + section + "_external:\n")
1445			// Ideally this would be .quad foo@GOTPCREL, but clang's
1446			// assembler cannot emit a 64-bit GOTPCREL relocation. Instead,
1447			// we manually sign-extend the value, knowing that the GOT is
1448			// always at the end, thus foo@GOTPCREL has a positive value.
1449			w.WriteString("\t.long " + symbol + "@" + section + "\n")
1450			w.WriteString("\t.long 0\n")
1451		}
1452
1453		w.WriteString(".type OPENSSL_ia32cap_get, @function\n")
1454		w.WriteString(".globl OPENSSL_ia32cap_get\n")
1455		w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n")
1456		w.WriteString("OPENSSL_ia32cap_get:\n")
1457		w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n")
1458		w.WriteString("\tret\n")
1459
1460		w.WriteString(".extern OPENSSL_ia32cap_P\n")
1461		w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n")
1462		w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n")
1463		w.WriteString("OPENSSL_ia32cap_addr_delta:\n")
1464		w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n")
1465	}
1466
1467	w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n")
1468	w.WriteString(".size BORINGSSL_bcm_text_hash, 64\n")
1469	w.WriteString("BORINGSSL_bcm_text_hash:\n")
1470	for _, b := range fipscommon.UninitHashValue {
1471		w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n")
1472	}
1473
1474	return nil
1475}
1476
1477func parseInputs(inputs []inputFile) error {
1478	for i, input := range inputs {
1479		var contents string
1480
1481		if input.isArchive {
1482			arFile, err := os.Open(input.path)
1483			if err != nil {
1484				return err
1485			}
1486			defer arFile.Close()
1487
1488			ar, err := ar.ParseAR(arFile)
1489			if err != nil {
1490				return err
1491			}
1492
1493			if len(ar) != 1 {
1494				return fmt.Errorf("expected one file in archive, but found %d", len(ar))
1495			}
1496
1497			for _, c := range ar {
1498				contents = string(c)
1499			}
1500		} else {
1501			inBytes, err := ioutil.ReadFile(input.path)
1502			if err != nil {
1503				return err
1504			}
1505
1506			contents = string(inBytes)
1507		}
1508
1509		asm := Asm{Buffer: contents, Pretty: true}
1510		asm.Init()
1511		if err := asm.Parse(); err != nil {
1512			return fmt.Errorf("error while parsing %q: %s", input.path, err)
1513		}
1514		ast := asm.AST()
1515
1516		inputs[i].contents = contents
1517		inputs[i].ast = ast
1518	}
1519
1520	return nil
1521}
1522
1523func main() {
1524	// The .a file, if given, is expected to be an archive of textual
1525	// assembly sources. That's odd, but CMake really wants to create
1526	// archive files so it's the only way that we can make it work.
1527	arInput := flag.String("a", "", "Path to a .a file containing assembly sources")
1528	outFile := flag.String("o", "", "Path to output assembly")
1529
1530	flag.Parse()
1531
1532	if len(*outFile) == 0 {
1533		fmt.Fprintf(os.Stderr, "Must give argument to -o.\n")
1534		os.Exit(1)
1535	}
1536
1537	var inputs []inputFile
1538	if len(*arInput) > 0 {
1539		inputs = append(inputs, inputFile{
1540			path:      *arInput,
1541			index:     0,
1542			isArchive: true,
1543		})
1544	}
1545
1546	for i, path := range flag.Args() {
1547		if len(path) == 0 {
1548			continue
1549		}
1550
1551		inputs = append(inputs, inputFile{
1552			path:  path,
1553			index: i + 1,
1554		})
1555	}
1556
1557	if err := parseInputs(inputs); err != nil {
1558		fmt.Fprintf(os.Stderr, "%s\n", err)
1559		os.Exit(1)
1560	}
1561
1562	out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
1563	if err != nil {
1564		panic(err)
1565	}
1566	defer out.Close()
1567
1568	if err := transform(out, inputs); err != nil {
1569		fmt.Fprintf(os.Stderr, "%s\n", err)
1570		os.Exit(1)
1571	}
1572}
1573
1574func forEachPath(node *node32, cb func(*node32), rules ...pegRule) {
1575	if node == nil {
1576		return
1577	}
1578
1579	if len(rules) == 0 {
1580		cb(node)
1581		return
1582	}
1583
1584	rule := rules[0]
1585	childRules := rules[1:]
1586
1587	for ; node != nil; node = node.next {
1588		if node.pegRule != rule {
1589			continue
1590		}
1591
1592		if len(childRules) == 0 {
1593			cb(node)
1594		} else {
1595			forEachPath(node.up, cb, childRules...)
1596		}
1597	}
1598}
1599
1600func skipNodes(node *node32, ruleToSkip pegRule) *node32 {
1601	for ; node != nil && node.pegRule == ruleToSkip; node = node.next {
1602	}
1603	return node
1604}
1605
1606func skipWS(node *node32) *node32 {
1607	return skipNodes(node, ruleWS)
1608}
1609
1610func assertNodeType(node *node32, expected pegRule) {
1611	if rule := node.pegRule; rule != expected {
1612		panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected]))
1613	}
1614}
1615
1616type wrapperFunc func(func())
1617
1618type wrapperStack []wrapperFunc
1619
1620func (w *wrapperStack) do(baseCase func()) {
1621	if len(*w) == 0 {
1622		baseCase()
1623		return
1624	}
1625
1626	wrapper := (*w)[0]
1627	*w = (*w)[1:]
1628	wrapper(func() { w.do(baseCase) })
1629}
1630
1631// localTargetName returns the name of the local target label for a global
1632// symbol named name.
1633func localTargetName(name string) string {
1634	return ".L" + name + "_local_target"
1635}
1636
1637func localEntryName(name string) string {
1638	return ".L" + name + "_local_entry"
1639}
1640
1641func isSynthesized(symbol string) bool {
1642	return strings.HasSuffix(symbol, "_bss_get") ||
1643		symbol == "OPENSSL_ia32cap_get" ||
1644		strings.HasPrefix(symbol, "BORINGSSL_bcm_text_")
1645}
1646
1647func redirectorName(symbol string) string {
1648	return "bcm_redirector_" + symbol
1649}
1650
1651// sectionType returns the type of a section. I.e. a section called “.text.foo”
1652// is a “.text” section.
1653func sectionType(section string) (string, bool) {
1654	if len(section) == 0 || section[0] != '.' {
1655		return "", false
1656	}
1657
1658	i := strings.Index(section[1:], ".")
1659	if i != -1 {
1660		section = section[:i+1]
1661	}
1662
1663	if strings.HasPrefix(section, ".debug_") {
1664		return ".debug", true
1665	}
1666
1667	return section, true
1668}
1669
1670// accessorName returns the name of the accessor function for a BSS symbol
1671// named name.
1672func accessorName(name string) string {
1673	return name + "_bss_get"
1674}
1675
1676func (d *delocation) mapLocalSymbol(symbol string) string {
1677	if d.currentInput.index == 0 {
1678		return symbol
1679	}
1680	return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index)
1681}
1682
1683func detectProcessor(input inputFile) processorType {
1684	for statement := input.ast.up; statement != nil; statement = statement.next {
1685		node := skipNodes(statement.up, ruleWS)
1686		if node == nil || node.pegRule != ruleInstruction {
1687			continue
1688		}
1689
1690		instruction := node.up
1691		instructionName := input.contents[instruction.begin:instruction.end]
1692
1693		switch instructionName {
1694		case "movq", "call", "leaq":
1695			return x86_64
1696		case "addis", "addi", "mflr":
1697			return ppc64le
1698		}
1699	}
1700
1701	panic("processed entire input and didn't recognise any instructions.")
1702}
1703
1704func sortedSet(m map[string]struct{}) []string {
1705	ret := make([]string, 0, len(m))
1706	for key := range m {
1707		ret = append(ret, key)
1708	}
1709	sort.Strings(ret)
1710	return ret
1711}
1712