• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright (c) 2017, Google Inc.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15// delocate performs several transformations of textual assembly code. See
16// crypto/fipsmodule/FIPS.md for an overview.
17package main
18
19import (
20	"errors"
21	"flag"
22	"fmt"
23	"io/ioutil"
24	"os"
25	"sort"
26	"strconv"
27	"strings"
28
29	"boringssl.googlesource.com/boringssl/util/ar"
30	"boringssl.googlesource.com/boringssl/util/fipstools/fipscommon"
31)
32
33// inputFile represents a textual assembly file.
34type inputFile struct {
35	path string
36	// index is a unique identifer given to this file. It's used for
37	// mapping local symbols.
38	index int
39	// isArchive indicates that the input should be processed as an ar
40	// file.
41	isArchive bool
42	// contents contains the contents of the file.
43	contents string
44	// ast points to the head of the syntax tree.
45	ast *node32
46}
47
48type stringWriter interface {
49	WriteString(string) (int, error)
50}
51
52type processorType int
53
54const (
55	ppc64le processorType = iota + 1
56	x86_64
57)
58
59// delocation holds the state needed during a delocation operation.
60type delocation struct {
61	processor processorType
62	output    stringWriter
63
64	// symbols is the set of symbols defined in the module.
65	symbols map[string]struct{}
66	// localEntrySymbols is the set of symbols with .localentry directives.
67	localEntrySymbols map[string]struct{}
68	// redirectors maps from out-call symbol name to the name of a
69	// redirector function for that symbol. E.g. “memcpy” ->
70	// “bcm_redirector_memcpy”.
71	redirectors map[string]string
72	// bssAccessorsNeeded maps from a BSS symbol name to the symbol that
73	// should be used to reference it. E.g. “P384_data_storage” ->
74	// “P384_data_storage”.
75	bssAccessorsNeeded map[string]string
76	// tocLoaders is a set of symbol names for which TOC helper functions
77	// are required. (ppc64le only.)
78	tocLoaders map[string]struct{}
79	// gotExternalsNeeded is a set of symbol names for which we need
80	// “delta” symbols: symbols that contain the offset from their location
81	// to the memory in question.
82	gotExternalsNeeded map[string]struct{}
83
84	currentInput inputFile
85}
86
87func (d *delocation) contents(node *node32) string {
88	return d.currentInput.contents[node.begin:node.end]
89}
90
91// writeNode writes out an AST node.
92func (d *delocation) writeNode(node *node32) {
93	if _, err := d.output.WriteString(d.contents(node)); err != nil {
94		panic(err)
95	}
96}
97
98func (d *delocation) writeCommentedNode(node *node32) {
99	line := d.contents(node)
100	if _, err := d.output.WriteString("# WAS " + strings.TrimSpace(line) + "\n"); err != nil {
101		panic(err)
102	}
103}
104
105func locateError(err error, with *node32, in inputFile) error {
106	posMap := translatePositions([]rune(in.contents), []int{int(with.begin)})
107	var line int
108	for _, pos := range posMap {
109		line = pos.line
110	}
111
112	return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err)
113}
114
115func (d *delocation) processInput(input inputFile) (err error) {
116	d.currentInput = input
117
118	var origStatement *node32
119	defer func() {
120		if err := recover(); err != nil {
121			panic(locateError(fmt.Errorf("%s", err), origStatement, input))
122		}
123	}()
124
125	for statement := input.ast.up; statement != nil; statement = statement.next {
126		assertNodeType(statement, ruleStatement)
127		origStatement = statement
128
129		node := skipWS(statement.up)
130		if node == nil {
131			d.writeNode(statement)
132			continue
133		}
134
135		switch node.pegRule {
136		case ruleGlobalDirective, ruleComment, ruleLocationDirective:
137			d.writeNode(statement)
138		case ruleDirective:
139			statement, err = d.processDirective(statement, node.up)
140		case ruleLabelContainingDirective:
141			statement, err = d.processLabelContainingDirective(statement, node.up)
142		case ruleLabel:
143			statement, err = d.processLabel(statement, node.up)
144		case ruleInstruction:
145			switch d.processor {
146			case x86_64:
147				statement, err = d.processIntelInstruction(statement, node.up)
148			case ppc64le:
149				statement, err = d.processPPCInstruction(statement, node.up)
150			default:
151				panic("unknown processor")
152			}
153		default:
154			panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule]))
155		}
156
157		if err != nil {
158			return locateError(err, origStatement, input)
159		}
160	}
161
162	return nil
163}
164
165func (d *delocation) processDirective(statement, directive *node32) (*node32, error) {
166	assertNodeType(directive, ruleDirectiveName)
167	directiveName := d.contents(directive)
168
169	var args []string
170	forEachPath(directive, func(arg *node32) {
171		// If the argument is a quoted string, use the raw contents.
172		// (Note that this doesn't unescape the string, but that's not
173		// needed so far.
174		if arg.up != nil {
175			arg = arg.up
176			assertNodeType(arg, ruleQuotedArg)
177			if arg.up == nil {
178				args = append(args, "")
179				return
180			}
181			arg = arg.up
182			assertNodeType(arg, ruleQuotedText)
183		}
184		args = append(args, d.contents(arg))
185	}, ruleArgs, ruleArg)
186
187	switch directiveName {
188	case "comm", "lcomm":
189		if len(args) < 1 {
190			return nil, errors.New("comm directive has no arguments")
191		}
192		d.bssAccessorsNeeded[args[0]] = args[0]
193		d.writeNode(statement)
194
195	case "data":
196		// ASAN and some versions of MSAN are adding a .data section,
197		// and adding references to symbols within it to the code. We
198		// will have to work around this in the future.
199		return nil, errors.New(".data section found in module")
200
201	case "section":
202		section := args[0]
203
204		if section == ".data.rel.ro" {
205			// In a normal build, this is an indication of a
206			// problem but any references from the module to this
207			// section will result in a relocation and thus will
208			// break the integrity check. ASAN can generate these
209			// sections and so we will likely have to work around
210			// that in the future.
211			return nil, errors.New(".data.rel.ro section found in module")
212		}
213
214		sectionType, ok := sectionType(section)
215		if !ok {
216			// Unknown sections are permitted in order to be robust
217			// to different compiler modes.
218			d.writeNode(statement)
219			break
220		}
221
222		switch sectionType {
223		case ".rodata", ".text":
224			// Move .rodata to .text so it may be accessed without
225			// a relocation. GCC with -fmerge-constants will place
226			// strings into separate sections, so we move all
227			// sections named like .rodata. Also move .text.startup
228			// so the self-test function is also in the module.
229			d.writeCommentedNode(statement)
230			d.output.WriteString(".text\n")
231
232		case ".data":
233			// See above about .data
234			return nil, errors.New(".data section found in module")
235
236		case ".init_array", ".fini_array", ".ctors", ".dtors":
237			// init_array/ctors/dtors contains function
238			// pointers to constructor/destructor
239			// functions. These contain relocations, but
240			// they're in a different section anyway.
241			d.writeNode(statement)
242			break
243
244		case ".debug", ".note", ".toc":
245			d.writeNode(statement)
246			break
247
248		case ".bss":
249			d.writeNode(statement)
250			return d.handleBSS(statement)
251		}
252
253	default:
254		d.writeNode(statement)
255	}
256
257	return statement, nil
258}
259
260func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) {
261	// The symbols within directives need to be mapped so that local
262	// symbols in two different .s inputs don't collide.
263	changed := false
264	assertNodeType(directive, ruleLabelContainingDirectiveName)
265	name := d.contents(directive)
266
267	node := directive.next
268	assertNodeType(node, ruleWS)
269
270	node = node.next
271	assertNodeType(node, ruleSymbolArgs)
272
273	var args []string
274	for node = skipWS(node.up); node != nil; node = skipWS(node.next) {
275		assertNodeType(node, ruleSymbolArg)
276		arg := node.up
277		var mapped string
278
279		for term := arg; term != nil; term = term.next {
280			if term.pegRule != ruleLocalSymbol {
281				mapped += d.contents(term)
282				continue
283			}
284
285			oldSymbol := d.contents(term)
286			newSymbol := d.mapLocalSymbol(oldSymbol)
287			if newSymbol != oldSymbol {
288				changed = true
289			}
290
291			mapped += newSymbol
292		}
293
294		args = append(args, mapped)
295	}
296
297	if !changed {
298		d.writeNode(statement)
299	} else {
300		d.writeCommentedNode(statement)
301		d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n")
302	}
303
304	if name == ".localentry" {
305		d.output.WriteString(localEntryName(args[0]) + ":\n")
306	}
307
308	return statement, nil
309}
310
311func (d *delocation) processLabel(statement, label *node32) (*node32, error) {
312	symbol := d.contents(label)
313
314	switch label.pegRule {
315	case ruleLocalLabel:
316		d.output.WriteString(symbol + ":\n")
317	case ruleLocalSymbol:
318		// symbols need to be mapped so that local symbols from two
319		// different .s inputs don't collide.
320		d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n")
321	case ruleSymbolName:
322		d.output.WriteString(localTargetName(symbol) + ":\n")
323		d.writeNode(statement)
324	default:
325		return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule])
326	}
327
328	return statement, nil
329}
330
331// instructionArgs collects all the arguments to an instruction.
332func instructionArgs(node *node32) (argNodes []*node32) {
333	for node = skipWS(node); node != nil; node = skipWS(node.next) {
334		assertNodeType(node, ruleInstructionArg)
335		argNodes = append(argNodes, node.up)
336	}
337
338	return argNodes
339}
340
341/* ppc64le
342
343[PABI]: “64-Bit ELF V2 ABI Specification. Power Architecture.” March 21st,
344        2017
345
346(Also useful is “Power ISA Version 2.07 B”. Note that version three of that
347document is /not/ good as that's POWER9 specific.)
348
349ppc64le doesn't have IP-relative addressing and does a lot to work around this.
350Rather than reference a PLT and GOT direction, it has a single structure called
351the TOC (Table Of Contents). Within the TOC is the contents of .rodata, .data,
352.got, .plt, .bss, etc sections [PABI;3.3].
353
354A pointer to the TOC is maintained in r2 and the following pattern is used to
355load the address of an element into a register:
356
357  addis <address register>, 2, foo@toc@ha
358  addi <address register>, <address register>, foo@toc@l
359
360The “addis” instruction shifts a signed constant left 16 bits and adds the
361result to its second argument, saving the result in the first argument. The
362“addi” instruction does the same, but without shifting. Thus the “@toc@ha"
363suffix on a symbol means “the top 16 bits of the TOC offset” and “@toc@l” means
364“the bottom 16 bits of the offset”. However, note that both values are signed,
365thus offsets in the top half of a 64KB chunk will have an @ha value that's one
366greater than expected and a negative @l value.
367
368The TOC is specific to a “module” (basically an executable or shared object).
369This means that there's not a single TOC in a process and that r2 needs to
370change as control moves between modules. Thus functions have two entry points:
371the “global” entry point and the “local” entry point. Jumps from within the
372same module can use the local entry while jumps from other modules must use the
373global entry. The global entry establishes the correct value of r2 before
374running the function and the local entry skips that code.
375
376The global entry point for a function is defined by its label. The local entry
377is a power-of-two number of bytes from the global entry, set by the
378“.localentry” directive. (ppc64le instructions are always 32 bits, so an offset
379of 1 or 2 bytes is treated as an offset of zero.)
380
381In order to help the global entry code set r2 to point to the local TOC, r12 is
382set to the address of the global entry point when called [PABI;2.2.1.1]. Thus
383the global entry will typically use an addis+addi pair to add a known offset to
384r12 and store it in r2. For example:
385
386foo:
387  addis 2, 12, .TOC. - foo@ha
388  addi  2, 2,  .TOC. - foo@l
389
390(It's worth noting that the '@' operator binds very loosely, so the 3rd
391arguments parse as (.TOC. - foo)@ha and (.TOC. - foo)@l.)
392
393When calling a function, the compiler doesn't know whether that function is in
394the same module or not. Thus it doesn't know whether r12 needs to be set nor
395whether r2 will be clobbered on return. Rather than always assume the worst,
396the linker fixes stuff up once it knows that a call is going out of module:
397
398Firstly, calling, say, memcpy (which we assume to be in a different module)
399won't actually jump directly to memcpy, or even a PLT resolution function.
400It'll call a synthesised function that:
401  a) saves r2 in the caller's stack frame
402  b) loads the address of memcpy@PLT into r12
403  c) jumps to r12.
404
405As this synthesised function loads memcpy@PLT, a call to memcpy from the
406compiled code just references “memcpy” directly, not “memcpy@PLT”.
407
408Since it jumps directly to memcpy@PLT, it can't restore r2 on return. Thus
409calls must be followed by a nop. If the call ends up going out-of-module, the
410linker will rewrite that nop to load r2 from the stack.
411
412Speaking of the stack, the stack pointer is kept in r1 and there's a 288-byte
413red-zone. The format of the stack frame is defined [PABI;2.2.2] and must be
414followed as called functions will write into their parent's stack frame. For
415example, the synthesised out-of-module trampolines will save r2 24 bytes into
416the caller's frame and all non-leaf functions save the return address 16 bytes
417into the caller's frame.
418
419A final point worth noting: some RISC ISAs have r0 wired to zero: all reads
420result in zero and all writes are discarded. POWER does something a little like
421that, but r0 is only special in certain argument positions for certain
422instructions. You just have to read the manual to know which they are.
423
424
425Delocation is easier than Intel because there's just TOC references, but it's
426also harder because there's no IP-relative addressing.
427
428Jumps are IP-relative however, and have a 24-bit immediate value. So we can
429jump to functions that set a register to the needed value. (r3 is the
430return-value register and so that's what is generally used here.) */
431
432// isPPC64LEAPair recognises an addis+addi pair that's adding the offset of
433// source to relative and writing the result to target.
434func (d *delocation) isPPC64LEAPair(statement *node32) (target, source, relative string, ok bool) {
435	instruction := skipWS(statement.up).up
436	assertNodeType(instruction, ruleInstructionName)
437	name1 := d.contents(instruction)
438	args1 := instructionArgs(instruction.next)
439
440	statement = statement.next
441	instruction = skipWS(statement.up).up
442	assertNodeType(instruction, ruleInstructionName)
443	name2 := d.contents(instruction)
444	args2 := instructionArgs(instruction.next)
445
446	if name1 != "addis" ||
447		len(args1) != 3 ||
448		name2 != "addi" ||
449		len(args2) != 3 {
450		return "", "", "", false
451	}
452
453	target = d.contents(args1[0])
454	relative = d.contents(args1[1])
455	source1 := d.contents(args1[2])
456	source2 := d.contents(args2[2])
457
458	if !strings.HasSuffix(source1, "@ha") ||
459		!strings.HasSuffix(source2, "@l") ||
460		source1[:len(source1)-3] != source2[:len(source2)-2] ||
461		d.contents(args2[0]) != target ||
462		d.contents(args2[1]) != target {
463		return "", "", "", false
464	}
465
466	source = source1[:len(source1)-3]
467	ok = true
468	return
469}
470
471// establishTOC writes the global entry prelude for a function. The standard
472// prelude involves relocations so this version moves the relocation outside
473// the integrity-checked area.
474func establishTOC(w stringWriter) {
475	w.WriteString("999:\n")
476	w.WriteString("\taddis 2, 12, .LBORINGSSL_external_toc-999b@ha\n")
477	w.WriteString("\taddi 2, 2, .LBORINGSSL_external_toc-999b@l\n")
478	w.WriteString("\tld 12, 0(2)\n")
479	w.WriteString("\tadd 2, 2, 12\n")
480}
481
482// loadTOCFuncName returns the name of a synthesized function that sets r3 to
483// the value of “symbol+offset”.
484func loadTOCFuncName(symbol, offset string) string {
485	symbol = strings.Replace(symbol, ".", "_dot_", -1)
486	ret := ".Lbcm_loadtoc_" + symbol
487	if len(offset) != 0 {
488		offset = strings.Replace(offset, "+", "_plus_", -1)
489		offset = strings.Replace(offset, "-", "_minus_", -1)
490		ret += "_" + offset
491	}
492	return ret
493}
494
495func (d *delocation) loadFromTOC(w stringWriter, symbol, offset, dest string) wrapperFunc {
496	d.tocLoaders[symbol+"\x00"+offset] = struct{}{}
497
498	return func(k func()) {
499		w.WriteString("\taddi 1, 1, -288\n")   // Clear the red zone.
500		w.WriteString("\tmflr " + dest + "\n") // Stash the link register.
501		w.WriteString("\tstd " + dest + ", -8(1)\n")
502		// The TOC loader will use r3, so stash it if necessary.
503		if dest != "3" {
504			w.WriteString("\tstd 3, -16(1)\n")
505		}
506
507		// Because loadTOCFuncName returns a “.L” name, we don't need a
508		// nop after this call.
509		w.WriteString("\tbl " + loadTOCFuncName(symbol, offset) + "\n")
510
511		// Cycle registers around. We need r3 -> destReg, -8(1) ->
512		// lr and, optionally, -16(1) -> r3.
513		w.WriteString("\tstd 3, -24(1)\n")
514		w.WriteString("\tld 3, -8(1)\n")
515		w.WriteString("\tmtlr 3\n")
516		w.WriteString("\tld " + dest + ", -24(1)\n")
517		if dest != "3" {
518			w.WriteString("\tld 3, -16(1)\n")
519		}
520		w.WriteString("\taddi 1, 1, 288\n")
521
522		k()
523	}
524}
525
526func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) {
527	for symRef != nil && symRef.pegRule == ruleOffset {
528		offset := d.contents(symRef)
529		if offset[0] != '+' && offset[0] != '-' {
530			offset = "+" + offset
531		}
532		offsets = offsets + offset
533		symRef = symRef.next
534	}
535	return symRef, offsets
536}
537
538func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) {
539	if memRef.pegRule != ruleSymbolRef {
540		return "", "", "", false, false, memRef
541	}
542
543	symRef := memRef.up
544	nextRef = memRef.next
545
546	// (Offset* '+')?
547	symRef, offset = d.gatherOffsets(symRef, offset)
548
549	// (LocalSymbol / SymbolName)
550	symbol = d.contents(symRef)
551	if symRef.pegRule == ruleLocalSymbol {
552		symbolIsLocal = true
553		mapped := d.mapLocalSymbol(symbol)
554		if mapped != symbol {
555			symbol = mapped
556			didChange = true
557		}
558	}
559	symRef = symRef.next
560
561	// Offset*
562	symRef, offset = d.gatherOffsets(symRef, offset)
563
564	// ('@' Section / Offset*)?
565	if symRef != nil {
566		assertNodeType(symRef, ruleSection)
567		section = d.contents(symRef)
568		symRef = symRef.next
569
570		symRef, offset = d.gatherOffsets(symRef, offset)
571	}
572
573	if symRef != nil {
574		panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule]))
575	}
576
577	return
578}
579
580func (d *delocation) processPPCInstruction(statement, instruction *node32) (*node32, error) {
581	assertNodeType(instruction, ruleInstructionName)
582	instructionName := d.contents(instruction)
583	isBranch := instructionName[0] == 'b'
584
585	argNodes := instructionArgs(instruction.next)
586
587	var wrappers wrapperStack
588	var args []string
589	changed := false
590
591Args:
592	for i, arg := range argNodes {
593		fullArg := arg
594		isIndirect := false
595
596		if arg.pegRule == ruleIndirectionIndicator {
597			arg = arg.next
598			isIndirect = true
599		}
600
601		switch arg.pegRule {
602		case ruleRegisterOrConstant, ruleLocalLabelRef:
603			args = append(args, d.contents(fullArg))
604
605		case ruleTOCRefLow:
606			return nil, errors.New("Found low TOC reference outside preamble pattern")
607
608		case ruleTOCRefHigh:
609			target, _, relative, ok := d.isPPC64LEAPair(statement)
610			if !ok {
611				return nil, errors.New("Found high TOC reference outside preamble pattern")
612			}
613
614			if relative != "12" {
615				return nil, fmt.Errorf("preamble is relative to %q, not r12", relative)
616			}
617
618			if target != "2" {
619				return nil, fmt.Errorf("preamble is setting %q, not r2", target)
620			}
621
622			statement = statement.next
623			establishTOC(d.output)
624			instructionName = ""
625			changed = true
626			break Args
627
628		case ruleMemoryRef:
629			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
630			changed = didChange
631
632			if len(symbol) > 0 {
633				if _, localEntrySymbol := d.localEntrySymbols[symbol]; localEntrySymbol && isBranch {
634					symbol = localEntryName(symbol)
635					changed = true
636				} else if _, knownSymbol := d.symbols[symbol]; knownSymbol {
637					symbol = localTargetName(symbol)
638					changed = true
639				} else if !symbolIsLocal && !isSynthesized(symbol) && len(section) == 0 {
640					changed = true
641					d.redirectors[symbol] = redirectorName(symbol)
642					symbol = redirectorName(symbol)
643					// TODO(davidben): This should sanity-check the next
644					// instruction is a nop and ideally remove it.
645					wrappers = append(wrappers, func(k func()) {
646						k()
647						// Like the linker's PLT stubs, redirector functions
648						// expect callers to restore r2.
649						d.output.WriteString("\tld 2, 24(1)\n")
650					})
651				}
652			}
653
654			switch section {
655			case "":
656
657			case "tls":
658				// This section identifier just tells the
659				// assembler to use r13, the pointer to the
660				// thread-local data [PABI;3.7.3.3].
661
662			case "toc@ha":
663				// Delete toc@ha instructions. Per
664				// [PABI;3.6.3], the linker is allowed to erase
665				// toc@ha instructions. We take advantage of
666				// this by unconditionally erasing the toc@ha
667				// instructions and doing the full lookup when
668				// processing toc@l.
669				//
670				// Note that any offset here applies before @ha
671				// and @l. That is, 42+foo@toc@ha is
672				// #ha(42+foo-.TOC.), not 42+#ha(foo-.TOC.). Any
673				// corresponding toc@l references are required
674				// by the ABI to have the same offset. The
675				// offset will be incorporated in full when
676				// those are processed.
677				if instructionName != "addis" || len(argNodes) != 3 || i != 2 || args[1] != "2" {
678					return nil, errors.New("can't process toc@ha reference")
679				}
680				changed = true
681				instructionName = ""
682				break Args
683
684			case "toc@l":
685				// Per [PAB;3.6.3], this instruction must take
686				// as input a register which was the output of
687				// a toc@ha computation and compute the actual
688				// address of some symbol. The toc@ha
689				// computation was elided, so we ignore that
690				// input register and compute the address
691				// directly.
692				changed = true
693
694				// For all supported toc@l instructions, the
695				// destination register is the first argument.
696				destReg := args[0]
697
698				wrappers = append(wrappers, d.loadFromTOC(d.output, symbol, offset, destReg))
699				switch instructionName {
700				case "addi":
701					// The original instruction was:
702					//   addi destReg, tocHaReg, offset+symbol@toc@l
703					instructionName = ""
704
705				case "ld", "lhz", "lwz":
706					// The original instruction was:
707					//   l?? destReg, offset+symbol@toc@l(tocHaReg)
708					//
709					// We transform that into the
710					// equivalent dereference of destReg:
711					//   l?? destReg, 0(destReg)
712					origInstructionName := instructionName
713					instructionName = ""
714
715					assertNodeType(memRef, ruleBaseIndexScale)
716					assertNodeType(memRef.up, ruleRegisterOrConstant)
717					if memRef.next != nil || memRef.up.next != nil {
718						return nil, errors.New("expected single register in BaseIndexScale for ld argument")
719					}
720
721					baseReg := destReg
722					if baseReg == "0" {
723						// Register zero is special as the base register for a load.
724						// Avoid it by spilling and using r3 instead.
725						baseReg = "3"
726						wrappers = append(wrappers, func(k func()) {
727							d.output.WriteString("\taddi 1, 1, -288\n") // Clear the red zone.
728							d.output.WriteString("\tstd " + baseReg + ", -8(1)\n")
729							d.output.WriteString("\tmr " + baseReg + ", " + destReg + "\n")
730							k()
731							d.output.WriteString("\tld " + baseReg + ", -8(1)\n")
732							d.output.WriteString("\taddi 1, 1, 288\n") // Clear the red zone.
733						})
734					}
735
736					wrappers = append(wrappers, func(k func()) {
737						d.output.WriteString("\t" + origInstructionName + " " + destReg + ", 0(" + baseReg + ")\n")
738					})
739				default:
740					return nil, fmt.Errorf("can't process TOC argument to %q", instructionName)
741				}
742
743			default:
744				return nil, fmt.Errorf("Unknown section type %q", section)
745			}
746
747			argStr := ""
748			if isIndirect {
749				argStr += "*"
750			}
751			argStr += symbol
752			if len(offset) > 0 {
753				argStr += offset
754			}
755			if len(section) > 0 {
756				argStr += "@"
757				argStr += section
758			}
759
760			for ; memRef != nil; memRef = memRef.next {
761				argStr += d.contents(memRef)
762			}
763
764			args = append(args, argStr)
765
766		default:
767			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
768		}
769	}
770
771	if changed {
772		d.writeCommentedNode(statement)
773
774		var replacement string
775		if len(instructionName) > 0 {
776			replacement = "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
777		}
778
779		wrappers.do(func() {
780			d.output.WriteString(replacement)
781		})
782	} else {
783		d.writeNode(statement)
784	}
785
786	return statement, nil
787}
788
789/* Intel */
790
791type instructionType int
792
793const (
794	instrPush instructionType = iota
795	instrMove
796	// instrTransformingMove is essentially a move, but it performs some
797	// transformation of the data during the process.
798	instrTransformingMove
799	instrJump
800	instrConditionalMove
801	// instrCombine merges the source and destination in some fashion, for example
802	// a 2-operand bitwise operation.
803	instrCombine
804	// instrThreeArg merges two sources into a destination in some fashion.
805	instrThreeArg
806	// instrCompare takes two arguments and writes outputs to the flags register.
807	instrCompare
808	instrOther
809)
810
811func classifyInstruction(instr string, args []*node32) instructionType {
812	switch instr {
813	case "push", "pushq":
814		if len(args) == 1 {
815			return instrPush
816		}
817
818	case "mov", "movq", "vmovq", "movsd", "vmovsd":
819		if len(args) == 2 {
820			return instrMove
821		}
822
823	case "cmovneq", "cmoveq":
824		if len(args) == 2 {
825			return instrConditionalMove
826		}
827
828	case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo":
829		if len(args) == 1 {
830			return instrJump
831		}
832
833	case "orq", "andq", "xorq":
834		if len(args) == 2 {
835			return instrCombine
836		}
837
838	case "cmpq":
839		if len(args) == 2 {
840			return instrCompare
841		}
842
843	case "sarxq", "shlxq", "shrxq":
844		if len(args) == 3 {
845			return instrThreeArg
846		}
847
848	case "vpbroadcastq":
849		if len(args) == 2 {
850			return instrTransformingMove
851		}
852	}
853
854	return instrOther
855}
856
857func push(w stringWriter) wrapperFunc {
858	return func(k func()) {
859		w.WriteString("\tpushq %rax\n")
860		k()
861		w.WriteString("\txchg %rax, (%rsp)\n")
862	}
863}
864
865func compare(w stringWriter, instr, a, b string) wrapperFunc {
866	return func(k func()) {
867		k()
868		w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b))
869	}
870}
871
872func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc {
873	d.gotExternalsNeeded[symbol+"@"+section] = struct{}{}
874
875	return func(k func()) {
876		if !redzoneCleared {
877			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
878		}
879		w.WriteString("\tpushf\n")
880		w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination))
881		w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination))
882		w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination))
883		w.WriteString("\tpopf\n")
884		if !redzoneCleared {
885			w.WriteString("\tleaq\t128(%rsp), %rsp\n")
886		}
887	}
888}
889
890func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc {
891	return func(k func()) {
892		if !redzoneCleared {
893			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
894			defer w.WriteString("\tleaq 128(%rsp), %rsp\n")
895		}
896		w.WriteString("\tpushfq\n")
897		k()
898		w.WriteString("\tpopfq\n")
899	}
900}
901
902func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) {
903	candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"}
904
905	var reg string
906NextCandidate:
907	for _, candidate := range candidates {
908		for _, avoid := range avoidRegs {
909			if candidate == avoid {
910				continue NextCandidate
911			}
912		}
913
914		reg = candidate
915		break
916	}
917
918	if len(reg) == 0 {
919		panic("too many excluded registers")
920	}
921
922	return func(k func()) {
923		w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
924		w.WriteString("\tpushq " + reg + "\n")
925		k()
926		w.WriteString("\tpopq " + reg + "\n")
927		w.WriteString("\tleaq 128(%rsp), %rsp\n")
928	}, reg
929}
930
931func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc {
932	return func(k func()) {
933		k()
934		prefix := ""
935		if isAVX {
936			prefix = "v"
937		}
938		w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n")
939	}
940}
941
942func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc {
943	return func(k func()) {
944		k()
945		w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n")
946	}
947}
948
949func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
950	return func(k func()) {
951		k()
952		w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n")
953	}
954}
955
956func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc {
957	return func(k func()) {
958		k()
959		w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n")
960	}
961}
962
963func isValidLEATarget(reg string) bool {
964	return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm")
965}
966
967func undoConditionalMove(w stringWriter, instr string) wrapperFunc {
968	var invertedCondition string
969
970	switch instr {
971	case "cmoveq":
972		invertedCondition = "ne"
973	case "cmovneq":
974		invertedCondition = "e"
975	default:
976		panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr))
977	}
978
979	return func(k func()) {
980		w.WriteString("\tj" + invertedCondition + " 999f\n")
981		k()
982		w.WriteString("999:\n")
983	}
984}
985
986func (d *delocation) isRIPRelative(node *node32) bool {
987	return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)"
988}
989
990func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) {
991	assertNodeType(instruction, ruleInstructionName)
992	instructionName := d.contents(instruction)
993
994	argNodes := instructionArgs(instruction.next)
995
996	var wrappers wrapperStack
997	var args []string
998	changed := false
999
1000Args:
1001	for i, arg := range argNodes {
1002		fullArg := arg
1003		isIndirect := false
1004
1005		if arg.pegRule == ruleIndirectionIndicator {
1006			arg = arg.next
1007			isIndirect = true
1008		}
1009
1010		switch arg.pegRule {
1011		case ruleRegisterOrConstant, ruleLocalLabelRef:
1012			args = append(args, d.contents(fullArg))
1013
1014		case ruleMemoryRef:
1015			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
1016			changed = didChange
1017
1018			if symbol == "OPENSSL_ia32cap_P" && section == "" {
1019				if instructionName != "leaq" {
1020					return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName)
1021				}
1022
1023				if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 {
1024					return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName)
1025				}
1026
1027				target := argNodes[1]
1028				assertNodeType(target, ruleRegisterOrConstant)
1029				reg := d.contents(target)
1030
1031				if !strings.HasPrefix(reg, "%r") {
1032					return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg)
1033				}
1034
1035				changed = true
1036
1037				// Flag-altering instructions (i.e. addq) are going to be used so the
1038				// flags need to be preserved.
1039				wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */))
1040
1041				wrappers = append(wrappers, func(k func()) {
1042					d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n")
1043					d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n")
1044				})
1045
1046				break Args
1047			}
1048
1049			switch section {
1050			case "":
1051				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1052					symbol = localTargetName(symbol)
1053					changed = true
1054				}
1055
1056			case "PLT":
1057				if classifyInstruction(instructionName, argNodes) != instrJump {
1058					return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName)
1059				}
1060
1061				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1062					symbol = localTargetName(symbol)
1063					changed = true
1064				} else if !symbolIsLocal && !isSynthesized(symbol) {
1065					// Unknown symbol via PLT is an
1066					// out-call from the module, e.g.
1067					// memcpy.
1068					d.redirectors[symbol+"@"+section] = redirectorName(symbol)
1069					symbol = redirectorName(symbol)
1070				}
1071
1072				changed = true
1073
1074			case "GOTPCREL":
1075				if len(offset) > 0 {
1076					return nil, errors.New("loading from GOT with offset is unsupported")
1077				}
1078				if !d.isRIPRelative(memRef) {
1079					return nil, errors.New("GOT access must be IP-relative")
1080				}
1081
1082				useGOT := false
1083				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1084					symbol = localTargetName(symbol)
1085					changed = true
1086				} else if !isSynthesized(symbol) {
1087					useGOT = true
1088				}
1089
1090				classification := classifyInstruction(instructionName, argNodes)
1091				if classification != instrThreeArg && classification != instrCompare && i != 0 {
1092					return nil, errors.New("GOT access must be source operand")
1093				}
1094
1095				// Reduce the instruction to movq symbol@GOTPCREL, targetReg.
1096				var targetReg string
1097				var redzoneCleared bool
1098				switch classification {
1099				case instrPush:
1100					wrappers = append(wrappers, push(d.output))
1101					targetReg = "%rax"
1102				case instrConditionalMove:
1103					wrappers = append(wrappers, undoConditionalMove(d.output, instructionName))
1104					fallthrough
1105				case instrMove:
1106					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1107					targetReg = d.contents(argNodes[1])
1108				case instrCompare:
1109					otherSource := d.contents(argNodes[i^1])
1110					saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource})
1111					redzoneCleared = true
1112					wrappers = append(wrappers, saveRegWrapper)
1113					if i == 0 {
1114						wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource))
1115					} else {
1116						wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg))
1117					}
1118					targetReg = tempReg
1119				case instrTransformingMove:
1120					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1121					targetReg = d.contents(argNodes[1])
1122					wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg))
1123					if isValidLEATarget(targetReg) {
1124						return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.")
1125					}
1126				case instrCombine:
1127					targetReg = d.contents(argNodes[1])
1128					if !isValidLEATarget(targetReg) {
1129						return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers")
1130					}
1131					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg})
1132					redzoneCleared = true
1133					wrappers = append(wrappers, saveRegWrapper)
1134
1135					wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg))
1136					targetReg = tempReg
1137				case instrThreeArg:
1138					if n := len(argNodes); n != 3 {
1139						return nil, fmt.Errorf("three-argument instruction has %d arguments", n)
1140					}
1141					if i != 0 && i != 1 {
1142						return nil, errors.New("GOT access must be from source operand")
1143					}
1144					targetReg = d.contents(argNodes[2])
1145
1146					otherSource := d.contents(argNodes[1])
1147					if i == 1 {
1148						otherSource = d.contents(argNodes[0])
1149					}
1150
1151					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource})
1152					redzoneCleared = true
1153					wrappers = append(wrappers, saveRegWrapper)
1154
1155					if i == 0 {
1156						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg))
1157					} else {
1158						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg))
1159					}
1160					targetReg = tempReg
1161				default:
1162					return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName)
1163				}
1164
1165				if !isValidLEATarget(targetReg) {
1166					// Sometimes the compiler will load from the GOT to an
1167					// XMM register, which is not a valid target of an LEA
1168					// instruction.
1169					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1170					wrappers = append(wrappers, saveRegWrapper)
1171					isAVX := strings.HasPrefix(instructionName, "v")
1172					wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg))
1173					targetReg = tempReg
1174					if redzoneCleared {
1175						return nil, fmt.Errorf("internal error: Red Zone was already cleared")
1176					}
1177					redzoneCleared = true
1178				}
1179
1180				if symbol == "OPENSSL_ia32cap_P" {
1181					// Flag-altering instructions (i.e. addq) are going to be used so the
1182					// flags need to be preserved.
1183					wrappers = append(wrappers, saveFlags(d.output, redzoneCleared))
1184					wrappers = append(wrappers, func(k func()) {
1185						d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n")
1186						d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n")
1187					})
1188				} else if useGOT {
1189					wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared))
1190				} else {
1191					wrappers = append(wrappers, func(k func()) {
1192						d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg))
1193					})
1194				}
1195				changed = true
1196				break Args
1197
1198			default:
1199				return nil, fmt.Errorf("Unknown section type %q", section)
1200			}
1201
1202			if !changed && len(section) > 0 {
1203				panic("section was not handled")
1204			}
1205			section = ""
1206
1207			argStr := ""
1208			if isIndirect {
1209				argStr += "*"
1210			}
1211			argStr += symbol
1212			argStr += offset
1213
1214			for ; memRef != nil; memRef = memRef.next {
1215				argStr += d.contents(memRef)
1216			}
1217
1218			args = append(args, argStr)
1219
1220		default:
1221			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
1222		}
1223	}
1224
1225	if changed {
1226		d.writeCommentedNode(statement)
1227		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
1228		wrappers.do(func() {
1229			d.output.WriteString(replacement)
1230		})
1231	} else {
1232		d.writeNode(statement)
1233	}
1234
1235	return statement, nil
1236}
1237
1238func (d *delocation) handleBSS(statement *node32) (*node32, error) {
1239	lastStatement := statement
1240	for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next {
1241		node := skipWS(statement.up)
1242		if node == nil {
1243			d.writeNode(statement)
1244			continue
1245		}
1246
1247		switch node.pegRule {
1248		case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective:
1249			d.writeNode(statement)
1250
1251		case ruleDirective:
1252			directive := node.up
1253			assertNodeType(directive, ruleDirectiveName)
1254			directiveName := d.contents(directive)
1255			if directiveName == "text" || directiveName == "section" || directiveName == "data" {
1256				return lastStatement, nil
1257			}
1258			d.writeNode(statement)
1259
1260		case ruleLabel:
1261			label := node.up
1262			d.writeNode(statement)
1263
1264			if label.pegRule != ruleLocalSymbol {
1265				symbol := d.contents(label)
1266				localSymbol := localTargetName(symbol)
1267				d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol))
1268
1269				d.bssAccessorsNeeded[symbol] = localSymbol
1270			}
1271
1272		case ruleLabelContainingDirective:
1273			var err error
1274			statement, err = d.processLabelContainingDirective(statement, node.up)
1275			if err != nil {
1276				return nil, err
1277			}
1278
1279		default:
1280			return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement))
1281		}
1282	}
1283
1284	return lastStatement, nil
1285}
1286
1287func transform(w stringWriter, inputs []inputFile) error {
1288	// symbols contains all defined symbols.
1289	symbols := make(map[string]struct{})
1290	// localEntrySymbols contains all symbols with a .localentry directive.
1291	localEntrySymbols := make(map[string]struct{})
1292	// fileNumbers is the set of IDs seen in .file directives.
1293	fileNumbers := make(map[int]struct{})
1294	// maxObservedFileNumber contains the largest seen file number in a
1295	// .file directive. Zero is not a valid number.
1296	maxObservedFileNumber := 0
1297	// fileDirectivesContainMD5 is true if the compiler is outputting MD5
1298	// checksums in .file directives. If it does so, then this script needs
1299	// to match that behaviour otherwise warnings result.
1300	fileDirectivesContainMD5 := false
1301
1302	// OPENSSL_ia32cap_get will be synthesized by this script.
1303	symbols["OPENSSL_ia32cap_get"] = struct{}{}
1304
1305	for _, input := range inputs {
1306		forEachPath(input.ast.up, func(node *node32) {
1307			symbol := input.contents[node.begin:node.end]
1308			if _, ok := symbols[symbol]; ok {
1309				panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path))
1310			}
1311			symbols[symbol] = struct{}{}
1312		}, ruleStatement, ruleLabel, ruleSymbolName)
1313
1314		forEachPath(input.ast.up, func(node *node32) {
1315			node = node.up
1316			assertNodeType(node, ruleLabelContainingDirectiveName)
1317			directive := input.contents[node.begin:node.end]
1318			if directive != ".localentry" {
1319				return
1320			}
1321			// Extract the first argument.
1322			node = skipWS(node.next)
1323			assertNodeType(node, ruleSymbolArgs)
1324			node = node.up
1325			assertNodeType(node, ruleSymbolArg)
1326			symbol := input.contents[node.begin:node.end]
1327			if _, ok := localEntrySymbols[symbol]; ok {
1328				panic(fmt.Sprintf("Duplicate .localentry directive found: %q in %q", symbol, input.path))
1329			}
1330			localEntrySymbols[symbol] = struct{}{}
1331		}, ruleStatement, ruleLabelContainingDirective)
1332
1333		forEachPath(input.ast.up, func(node *node32) {
1334			assertNodeType(node, ruleLocationDirective)
1335			directive := input.contents[node.begin:node.end]
1336			if !strings.HasPrefix(directive, ".file") {
1337				return
1338			}
1339			parts := strings.Fields(directive)
1340			if len(parts) == 2 {
1341				// This is a .file directive with just a
1342				// filename. Clang appears to generate just one
1343				// of these at the beginning of the output for
1344				// the compilation unit. Ignore it.
1345				return
1346			}
1347			fileNo, err := strconv.Atoi(parts[1])
1348			if err != nil {
1349				panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive))
1350			}
1351
1352			if _, ok := fileNumbers[fileNo]; ok {
1353				panic(fmt.Sprintf("Duplicate file number %d observed", fileNo))
1354			}
1355			fileNumbers[fileNo] = struct{}{}
1356
1357			if fileNo > maxObservedFileNumber {
1358				maxObservedFileNumber = fileNo
1359			}
1360
1361			for _, token := range parts[2:] {
1362				if token == "md5" {
1363					fileDirectivesContainMD5 = true
1364				}
1365			}
1366		}, ruleStatement, ruleLocationDirective)
1367	}
1368
1369	processor := x86_64
1370	if len(inputs) > 0 {
1371		processor = detectProcessor(inputs[0])
1372	}
1373
1374	d := &delocation{
1375		symbols:            symbols,
1376		localEntrySymbols:  localEntrySymbols,
1377		processor:          processor,
1378		output:             w,
1379		redirectors:        make(map[string]string),
1380		bssAccessorsNeeded: make(map[string]string),
1381		tocLoaders:         make(map[string]struct{}),
1382		gotExternalsNeeded: make(map[string]struct{}),
1383	}
1384
1385	w.WriteString(".text\n")
1386	var fileTrailing string
1387	if fileDirectivesContainMD5 {
1388		fileTrailing = " md5 0x00000000000000000000000000000000"
1389	}
1390	w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing))
1391	w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1))
1392	w.WriteString("BORINGSSL_bcm_text_start:\n")
1393
1394	for _, input := range inputs {
1395		if err := d.processInput(input); err != nil {
1396			return err
1397		}
1398	}
1399
1400	w.WriteString(".text\n")
1401	w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1))
1402	w.WriteString("BORINGSSL_bcm_text_end:\n")
1403
1404	// Emit redirector functions. Each is a single jump instruction.
1405	var redirectorNames []string
1406	for name := range d.redirectors {
1407		redirectorNames = append(redirectorNames, name)
1408	}
1409	sort.Strings(redirectorNames)
1410
1411	for _, name := range redirectorNames {
1412		redirector := d.redirectors[name]
1413		if d.processor == ppc64le {
1414			w.WriteString(".section \".toc\", \"aw\"\n")
1415			w.WriteString(".Lredirector_toc_" + name + ":\n")
1416			w.WriteString(".quad " + name + "\n")
1417			w.WriteString(".text\n")
1418			w.WriteString(".type " + redirector + ", @function\n")
1419			w.WriteString(redirector + ":\n")
1420			// |name| will clobber r2, so save it. This is matched by a restore in
1421			// redirector calls.
1422			w.WriteString("\tstd 2, 24(1)\n")
1423			// Load and call |name|'s global entry point.
1424			w.WriteString("\taddis 12, 2, .Lredirector_toc_" + name + "@toc@ha\n")
1425			w.WriteString("\tld 12, .Lredirector_toc_" + name + "@toc@l(12)\n")
1426			w.WriteString("\tmtctr 12\n")
1427			w.WriteString("\tbctr\n")
1428		} else {
1429			w.WriteString(".type " + redirector + ", @function\n")
1430			w.WriteString(redirector + ":\n")
1431			w.WriteString("\tjmp\t" + name + "\n")
1432		}
1433	}
1434
1435	var accessorNames []string
1436	for accessor := range d.bssAccessorsNeeded {
1437		accessorNames = append(accessorNames, accessor)
1438	}
1439	sort.Strings(accessorNames)
1440
1441	// Emit BSS accessor functions. Each is a single LEA followed by RET.
1442	for _, name := range accessorNames {
1443		funcName := accessorName(name)
1444		w.WriteString(".type " + funcName + ", @function\n")
1445		w.WriteString(funcName + ":\n")
1446		target := d.bssAccessorsNeeded[name]
1447
1448		if d.processor == ppc64le {
1449			w.WriteString("\taddis 3, 2, " + target + "@toc@ha\n")
1450			w.WriteString("\taddi 3, 3, " + target + "@toc@l\n")
1451			w.WriteString("\tblr\n")
1452		} else {
1453			w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n")
1454		}
1455	}
1456
1457	if d.processor == ppc64le {
1458		loadTOCNames := sortedSet(d.tocLoaders)
1459		for _, symbolAndOffset := range loadTOCNames {
1460			parts := strings.SplitN(symbolAndOffset, "\x00", 2)
1461			symbol, offset := parts[0], parts[1]
1462
1463			funcName := loadTOCFuncName(symbol, offset)
1464			ref := symbol + offset
1465
1466			w.WriteString(".type " + funcName[2:] + ", @function\n")
1467			w.WriteString(funcName[2:] + ":\n")
1468			w.WriteString(funcName + ":\n")
1469			w.WriteString("\taddis 3, 2, " + ref + "@toc@ha\n")
1470			w.WriteString("\taddi 3, 3, " + ref + "@toc@l\n")
1471			w.WriteString("\tblr\n")
1472		}
1473
1474		w.WriteString(".LBORINGSSL_external_toc:\n")
1475		w.WriteString(".quad .TOC.-.LBORINGSSL_external_toc\n")
1476	} else {
1477		externalNames := sortedSet(d.gotExternalsNeeded)
1478		for _, name := range externalNames {
1479			parts := strings.SplitN(name, "@", 2)
1480			symbol, section := parts[0], parts[1]
1481			w.WriteString(".type " + symbol + "_" + section + "_external, @object\n")
1482			w.WriteString(".size " + symbol + "_" + section + "_external, 8\n")
1483			w.WriteString(symbol + "_" + section + "_external:\n")
1484			// Ideally this would be .quad foo@GOTPCREL, but clang's
1485			// assembler cannot emit a 64-bit GOTPCREL relocation. Instead,
1486			// we manually sign-extend the value, knowing that the GOT is
1487			// always at the end, thus foo@GOTPCREL has a positive value.
1488			w.WriteString("\t.long " + symbol + "@" + section + "\n")
1489			w.WriteString("\t.long 0\n")
1490		}
1491
1492		w.WriteString(".type OPENSSL_ia32cap_get, @function\n")
1493		w.WriteString(".globl OPENSSL_ia32cap_get\n")
1494		w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n")
1495		w.WriteString("OPENSSL_ia32cap_get:\n")
1496		w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n")
1497		w.WriteString("\tret\n")
1498
1499		w.WriteString(".extern OPENSSL_ia32cap_P\n")
1500		w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n")
1501		w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n")
1502		w.WriteString("OPENSSL_ia32cap_addr_delta:\n")
1503		w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n")
1504	}
1505
1506	w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n")
1507	w.WriteString(".size BORINGSSL_bcm_text_hash, 64\n")
1508	w.WriteString("BORINGSSL_bcm_text_hash:\n")
1509	for _, b := range fipscommon.UninitHashValue {
1510		w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n")
1511	}
1512
1513	return nil
1514}
1515
1516func parseInputs(inputs []inputFile) error {
1517	for i, input := range inputs {
1518		var contents string
1519
1520		if input.isArchive {
1521			arFile, err := os.Open(input.path)
1522			if err != nil {
1523				return err
1524			}
1525			defer arFile.Close()
1526
1527			ar, err := ar.ParseAR(arFile)
1528			if err != nil {
1529				return err
1530			}
1531
1532			if len(ar) != 1 {
1533				return fmt.Errorf("expected one file in archive, but found %d", len(ar))
1534			}
1535
1536			for _, c := range ar {
1537				contents = string(c)
1538			}
1539		} else {
1540			inBytes, err := ioutil.ReadFile(input.path)
1541			if err != nil {
1542				return err
1543			}
1544
1545			contents = string(inBytes)
1546		}
1547
1548		asm := Asm{Buffer: contents, Pretty: true}
1549		asm.Init()
1550		if err := asm.Parse(); err != nil {
1551			return fmt.Errorf("error while parsing %q: %s", input.path, err)
1552		}
1553		ast := asm.AST()
1554
1555		inputs[i].contents = contents
1556		inputs[i].ast = ast
1557	}
1558
1559	return nil
1560}
1561
1562func main() {
1563	// The .a file, if given, is expected to be an archive of textual
1564	// assembly sources. That's odd, but CMake really wants to create
1565	// archive files so it's the only way that we can make it work.
1566	arInput := flag.String("a", "", "Path to a .a file containing assembly sources")
1567	outFile := flag.String("o", "", "Path to output assembly")
1568
1569	flag.Parse()
1570
1571	if len(*outFile) == 0 {
1572		fmt.Fprintf(os.Stderr, "Must give argument to -o.\n")
1573		os.Exit(1)
1574	}
1575
1576	var inputs []inputFile
1577	if len(*arInput) > 0 {
1578		inputs = append(inputs, inputFile{
1579			path:      *arInput,
1580			index:     0,
1581			isArchive: true,
1582		})
1583	}
1584
1585	for i, path := range flag.Args() {
1586		if len(path) == 0 {
1587			continue
1588		}
1589
1590		inputs = append(inputs, inputFile{
1591			path:  path,
1592			index: i + 1,
1593		})
1594	}
1595
1596	if err := parseInputs(inputs); err != nil {
1597		fmt.Fprintf(os.Stderr, "%s\n", err)
1598		os.Exit(1)
1599	}
1600
1601	out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
1602	if err != nil {
1603		panic(err)
1604	}
1605	defer out.Close()
1606
1607	if err := transform(out, inputs); err != nil {
1608		fmt.Fprintf(os.Stderr, "%s\n", err)
1609		os.Exit(1)
1610	}
1611}
1612
1613func forEachPath(node *node32, cb func(*node32), rules ...pegRule) {
1614	if node == nil {
1615		return
1616	}
1617
1618	if len(rules) == 0 {
1619		cb(node)
1620		return
1621	}
1622
1623	rule := rules[0]
1624	childRules := rules[1:]
1625
1626	for ; node != nil; node = node.next {
1627		if node.pegRule != rule {
1628			continue
1629		}
1630
1631		if len(childRules) == 0 {
1632			cb(node)
1633		} else {
1634			forEachPath(node.up, cb, childRules...)
1635		}
1636	}
1637}
1638
1639func skipNodes(node *node32, ruleToSkip pegRule) *node32 {
1640	for ; node != nil && node.pegRule == ruleToSkip; node = node.next {
1641	}
1642	return node
1643}
1644
1645func skipWS(node *node32) *node32 {
1646	return skipNodes(node, ruleWS)
1647}
1648
1649func assertNodeType(node *node32, expected pegRule) {
1650	if rule := node.pegRule; rule != expected {
1651		panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected]))
1652	}
1653}
1654
1655type wrapperFunc func(func())
1656
1657type wrapperStack []wrapperFunc
1658
1659func (w *wrapperStack) do(baseCase func()) {
1660	if len(*w) == 0 {
1661		baseCase()
1662		return
1663	}
1664
1665	wrapper := (*w)[0]
1666	*w = (*w)[1:]
1667	wrapper(func() { w.do(baseCase) })
1668}
1669
1670// localTargetName returns the name of the local target label for a global
1671// symbol named name.
1672func localTargetName(name string) string {
1673	return ".L" + name + "_local_target"
1674}
1675
1676func localEntryName(name string) string {
1677	return ".L" + name + "_local_entry"
1678}
1679
1680func isSynthesized(symbol string) bool {
1681	return strings.HasSuffix(symbol, "_bss_get") ||
1682		symbol == "OPENSSL_ia32cap_get" ||
1683		strings.HasPrefix(symbol, "BORINGSSL_bcm_text_")
1684}
1685
1686func redirectorName(symbol string) string {
1687	return "bcm_redirector_" + symbol
1688}
1689
1690// sectionType returns the type of a section. I.e. a section called “.text.foo”
1691// is a “.text” section.
1692func sectionType(section string) (string, bool) {
1693	if len(section) == 0 || section[0] != '.' {
1694		return "", false
1695	}
1696
1697	i := strings.Index(section[1:], ".")
1698	if i != -1 {
1699		section = section[:i+1]
1700	}
1701
1702	if strings.HasPrefix(section, ".debug_") {
1703		return ".debug", true
1704	}
1705
1706	return section, true
1707}
1708
1709// accessorName returns the name of the accessor function for a BSS symbol
1710// named name.
1711func accessorName(name string) string {
1712	return name + "_bss_get"
1713}
1714
1715func (d *delocation) mapLocalSymbol(symbol string) string {
1716	if d.currentInput.index == 0 {
1717		return symbol
1718	}
1719	return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index)
1720}
1721
1722func detectProcessor(input inputFile) processorType {
1723	for statement := input.ast.up; statement != nil; statement = statement.next {
1724		node := skipNodes(statement.up, ruleWS)
1725		if node == nil || node.pegRule != ruleInstruction {
1726			continue
1727		}
1728
1729		instruction := node.up
1730		instructionName := input.contents[instruction.begin:instruction.end]
1731
1732		switch instructionName {
1733		case "movq", "call", "leaq":
1734			return x86_64
1735		case "addis", "addi", "mflr":
1736			return ppc64le
1737		}
1738	}
1739
1740	panic("processed entire input and didn't recognise any instructions.")
1741}
1742
1743func sortedSet(m map[string]struct{}) []string {
1744	ret := make([]string, 0, len(m))
1745	for key := range m {
1746		ret = append(ret, key)
1747	}
1748	sort.Strings(ret)
1749	return ret
1750}
1751