• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2017 The BoringSSL Authors
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// delocate performs several transformations of textual assembly code. See
16// crypto/fipsmodule/FIPS.md for an overview.
17package main
18
19import (
20	"bytes"
21	"errors"
22	"flag"
23	"fmt"
24	"os"
25	"os/exec"
26	"path/filepath"
27	"sort"
28	"strconv"
29	"strings"
30
31	"boringssl.googlesource.com/boringssl/util/ar"
32	"boringssl.googlesource.com/boringssl/util/fipstools/fipscommon"
33)
34
35// inputFile represents a textual assembly file.
36type inputFile struct {
37	path string
38	// index is a unique identifier given to this file. It's used for
39	// mapping local symbols.
40	index int
41	// isArchive indicates that the input should be processed as an ar
42	// file.
43	isArchive bool
44	// contents contains the contents of the file.
45	contents string
46	// ast points to the head of the syntax tree.
47	ast *node32
48}
49
50type stringWriter interface {
51	WriteString(string) (int, error)
52}
53
54type processorType int
55
56const (
57	x86_64 processorType = iota + 1
58	aarch64
59)
60
61// delocation holds the state needed during a delocation operation.
62type delocation struct {
63	processor processorType
64	output    stringWriter
65	// commentIndicator starts a comment, e.g. "//" or "#"
66	commentIndicator string
67
68	// symbols is the set of symbols defined in the module.
69	symbols map[string]struct{}
70	// redirectors maps from out-call symbol name to the name of a
71	// redirector function for that symbol. E.g. “memcpy” ->
72	// “bcm_redirector_memcpy”.
73	redirectors map[string]string
74	// bssAccessorsNeeded maps from a BSS symbol name to the symbol that
75	// should be used to reference it. E.g. “P384_data_storage” ->
76	// “P384_data_storage”.
77	bssAccessorsNeeded map[string]string
78	// gotExternalsNeeded is a set of symbol names for which we need
79	// “delta” symbols: symbols that contain the offset from their location
80	// to the memory in question.
81	gotExternalsNeeded map[string]struct{}
82	// gotDeltaNeeded is true if the code needs to load the value of
83	// _GLOBAL_OFFSET_TABLE_.
84	gotDeltaNeeded bool
85	// gotOffsetsNeeded contains the symbols whose @GOT offsets are needed.
86	gotOffsetsNeeded map[string]struct{}
87	// gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed.
88	gotOffOffsetsNeeded map[string]struct{}
89
90	currentInput inputFile
91}
92
93func (d *delocation) contents(node *node32) string {
94	return d.currentInput.contents[node.begin:node.end]
95}
96
97// writeNode writes out an AST node.
98func (d *delocation) writeNode(node *node32) {
99	if _, err := d.output.WriteString(d.contents(node)); err != nil {
100		panic(err)
101	}
102}
103
104func (d *delocation) writeCommentedNode(node *node32) {
105	line := d.contents(node)
106	if _, err := d.output.WriteString(d.commentIndicator + " WAS " + strings.TrimSpace(line) + "\n"); err != nil {
107		panic(err)
108	}
109}
110
111func locateError(err error, with *node32, in inputFile) error {
112	posMap := translatePositions([]rune(in.contents), []int{int(with.begin)})
113	var line int
114	for _, pos := range posMap {
115		line = pos.line
116	}
117
118	return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err)
119}
120
121func (d *delocation) processInput(input inputFile) (err error) {
122	d.currentInput = input
123
124	var origStatement *node32
125	defer func() {
126		if err := recover(); err != nil {
127			panic(locateError(fmt.Errorf("%s", err), origStatement, input))
128		}
129	}()
130
131	for statement := input.ast.up; statement != nil; statement = statement.next {
132		assertNodeType(statement, ruleStatement)
133		origStatement = statement
134
135		node := skipWS(statement.up)
136		if node == nil {
137			d.writeNode(statement)
138			continue
139		}
140
141		switch node.pegRule {
142		case ruleGlobalDirective, ruleComment, ruleLocationDirective:
143			d.writeNode(statement)
144		case ruleDirective:
145			statement, err = d.processDirective(statement, node.up)
146		case ruleLabelContainingDirective:
147			statement, err = d.processLabelContainingDirective(statement, node.up)
148		case ruleLabel:
149			statement, err = d.processLabel(statement, node.up)
150		case ruleInstruction:
151			switch d.processor {
152			case x86_64:
153				statement, err = d.processIntelInstruction(statement, node.up)
154			case aarch64:
155				statement, err = d.processAarch64Instruction(statement, node.up)
156			default:
157				panic("unknown processor")
158			}
159		default:
160			panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule]))
161		}
162
163		if err != nil {
164			return locateError(err, origStatement, input)
165		}
166	}
167
168	return nil
169}
170
171func (d *delocation) processDirective(statement, directive *node32) (*node32, error) {
172	assertNodeType(directive, ruleDirectiveName)
173	directiveName := d.contents(directive)
174
175	var args []string
176	forEachPath(directive, func(arg *node32) {
177		// If the argument is a quoted string, use the raw contents.
178		// (Note that this doesn't unescape the string, but that's not
179		// needed so far.
180		if arg.up != nil {
181			arg = arg.up
182			assertNodeType(arg, ruleQuotedArg)
183			if arg.up == nil {
184				args = append(args, "")
185				return
186			}
187			arg = arg.up
188			assertNodeType(arg, ruleQuotedText)
189		}
190		args = append(args, d.contents(arg))
191	}, ruleArgs, ruleArg)
192
193	switch directiveName {
194	case "comm", "lcomm":
195		if len(args) < 1 {
196			return nil, errors.New("comm directive has no arguments")
197		}
198		d.bssAccessorsNeeded[args[0]] = args[0]
199		d.writeNode(statement)
200
201	case "data":
202		// ASAN and some versions of MSAN are adding a .data section,
203		// and adding references to symbols within it to the code. We
204		// will have to work around this in the future.
205		return nil, errors.New(".data section found in module")
206
207	case "bss":
208		d.writeNode(statement)
209		return d.handleBSS(statement)
210
211	case "section":
212		section := args[0]
213
214		if section == ".data.rel.ro" {
215			// In a normal build, this is an indication of a
216			// problem but any references from the module to this
217			// section will result in a relocation and thus will
218			// break the integrity check. ASAN can generate these
219			// sections and so we will likely have to work around
220			// that in the future.
221			return nil, errors.New(".data.rel.ro section found in module")
222		}
223
224		sectionType, ok := sectionType(section)
225		if !ok {
226			// Unknown sections are permitted in order to be robust
227			// to different compiler modes.
228			d.writeNode(statement)
229			break
230		}
231
232		switch sectionType {
233		case ".rodata", ".text":
234			// Move .rodata to .text so it may be accessed without
235			// a relocation. GCC with -fmerge-constants will place
236			// strings into separate sections, so we move all
237			// sections named like .rodata. Also move .text.startup
238			// so the self-test function is also in the module.
239			d.writeCommentedNode(statement)
240			d.output.WriteString(".text\n")
241
242		case ".data":
243			// See above about .data
244			return nil, errors.New(".data section found in module")
245
246		case ".init_array", ".fini_array", ".ctors", ".dtors":
247			// init_array/ctors/dtors contains function
248			// pointers to constructor/destructor
249			// functions. These contain relocations, but
250			// they're in a different section anyway.
251			d.writeNode(statement)
252			break
253
254		case ".debug", ".note":
255			d.writeNode(statement)
256			break
257
258		case ".bss":
259			d.writeNode(statement)
260			return d.handleBSS(statement)
261		}
262
263	default:
264		d.writeNode(statement)
265	}
266
267	return statement, nil
268}
269
270func (d *delocation) processSymbolExpr(expr *node32, b *strings.Builder) bool {
271	changed := false
272	assertNodeType(expr, ruleSymbolExpr)
273
274	for expr != nil {
275		atom := expr.up
276		assertNodeType(atom, ruleSymbolAtom)
277
278		for term := atom.up; term != nil; term = skipWS(term.next) {
279			if term.pegRule == ruleSymbolExpr {
280				changed = d.processSymbolExpr(term, b) || changed
281				continue
282			}
283
284			if term.pegRule != ruleLocalSymbol {
285				b.WriteString(d.contents(term))
286				continue
287			}
288
289			oldSymbol := d.contents(term)
290			newSymbol := d.mapLocalSymbol(oldSymbol)
291			if newSymbol != oldSymbol {
292				changed = true
293			}
294
295			b.WriteString(newSymbol)
296		}
297
298		next := skipWS(atom.next)
299		if next == nil {
300			break
301		}
302		assertNodeType(next, ruleSymbolOperator)
303		b.WriteString(d.contents(next))
304		next = skipWS(next.next)
305		assertNodeType(next, ruleSymbolExpr)
306		expr = next
307	}
308	return changed
309}
310
311func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) {
312	// The symbols within directives need to be mapped so that local
313	// symbols in two different .s inputs don't collide.
314	changed := false
315	assertNodeType(directive, ruleLabelContainingDirectiveName)
316	name := d.contents(directive)
317
318	node := directive.next
319	assertNodeType(node, ruleWS)
320
321	node = node.next
322	assertNodeType(node, ruleSymbolArgs)
323
324	var args []string
325	for node = skipWS(node.up); node != nil; node = skipWS(node.next) {
326		assertNodeType(node, ruleSymbolArg)
327		arg := node.up
328		assertNodeType(arg, ruleSymbolExpr)
329
330		var b strings.Builder
331		changed = d.processSymbolExpr(arg, &b) || changed
332
333		args = append(args, b.String())
334	}
335
336	if !changed {
337		d.writeNode(statement)
338	} else {
339		d.writeCommentedNode(statement)
340		d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n")
341	}
342
343	return statement, nil
344}
345
346func (d *delocation) processLabel(statement, label *node32) (*node32, error) {
347	symbol := d.contents(label)
348
349	switch label.pegRule {
350	case ruleLocalLabel:
351		d.output.WriteString(symbol + ":\n")
352	case ruleLocalSymbol:
353		// symbols need to be mapped so that local symbols from two
354		// different .s inputs don't collide.
355		d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n")
356	case ruleSymbolName:
357		d.output.WriteString(localTargetName(symbol) + ":\n")
358		d.writeNode(statement)
359	default:
360		return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule])
361	}
362
363	return statement, nil
364}
365
366// instructionArgs collects all the arguments to an instruction.
367func instructionArgs(node *node32) (argNodes []*node32) {
368	for node = skipWS(node); node != nil; node = skipWS(node.next) {
369		assertNodeType(node, ruleInstructionArg)
370		argNodes = append(argNodes, node.up)
371	}
372
373	return argNodes
374}
375
376// Aarch64 support
377
378// gotHelperName returns the name of a synthesised function that returns an
379// address from the GOT.
380func gotHelperName(symbol string) string {
381	return ".Lboringssl_loadgot_" + symbol
382}
383
384// loadAarch64Address emits instructions to put the address of |symbol|
385// (optionally adjusted by |offsetStr|) into |targetReg|.
386func (d *delocation) loadAarch64Address(statement *node32, targetReg string, symbol string, offsetStr string) (*node32, error) {
387	// There are two paths here: either the symbol is known to be local in which
388	// case adr is used to get the address (within 1MiB), or a GOT reference is
389	// really needed in which case the code needs to jump to a helper function.
390	//
391	// A helper function is needed because using code appears to be the only way
392	// to load a GOT value. On other platforms we have ".quad foo@GOT" outside of
393	// the module, but on Aarch64 that results in a "COPY" relocation and linker
394	// comments suggest it's a weird hack. So, for each GOT symbol needed, we emit
395	// a function outside of the module that returns the address from the GOT in
396	// x0.
397
398	d.writeCommentedNode(statement)
399
400	_, isKnown := d.symbols[symbol]
401	isLocal := strings.HasPrefix(symbol, ".L")
402	if isKnown || isLocal || isSynthesized(symbol) {
403		if isLocal {
404			symbol = d.mapLocalSymbol(symbol)
405		} else if isKnown {
406			symbol = localTargetName(symbol)
407		}
408
409		d.output.WriteString("\tadr " + targetReg + ", " + symbol + offsetStr + "\n")
410
411		return statement, nil
412	}
413
414	if len(offsetStr) != 0 {
415		panic("non-zero offset for helper-based reference")
416	}
417
418	var helperFunc string
419	if symbol == "OPENSSL_armcap_P" {
420		helperFunc = ".LOPENSSL_armcap_P_addr"
421	} else {
422		// GOT helpers also dereference the GOT entry, thus the subsequent ldr
423		// instruction, which would normally do the dereferencing, needs to be
424		// dropped. GOT helpers have to include the dereference because the
425		// assembler doesn't support ":got_lo12:foo" offsets except in an ldr
426		// instruction.
427		d.gotExternalsNeeded[symbol] = struct{}{}
428		helperFunc = gotHelperName(symbol)
429	}
430
431	// Clear the red-zone. I can't find a definitive answer about whether Linux
432	// Aarch64 includes a red-zone, but Microsoft has a 16-byte one and Apple a
433	// 128-byte one. Thus conservatively clear a 128-byte red-zone.
434	d.output.WriteString("\tsub sp, sp, 128\n")
435
436	// Save x0 (which will be stomped by the return value) and the link register
437	// to the stack. Then save the program counter into the link register and
438	// jump to the helper function.
439	d.output.WriteString("\tstp x0, lr, [sp, #-16]!\n")
440	d.output.WriteString("\tbl " + helperFunc + "\n")
441
442	if targetReg == "x0" {
443		// If the target happens to be x0 then restore the link register from the
444		// stack and send the saved value of x0 to the zero register.
445		d.output.WriteString("\tldp xzr, lr, [sp], #16\n")
446	} else {
447		// Otherwise move the result into place and restore registers.
448		d.output.WriteString("\tmov " + targetReg + ", x0\n")
449		d.output.WriteString("\tldp x0, lr, [sp], #16\n")
450	}
451
452	// Revert the red-zone adjustment.
453	d.output.WriteString("\tadd sp, sp, 128\n")
454
455	return statement, nil
456}
457
458func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) {
459	assertNodeType(instruction, ruleInstructionName)
460	instructionName := d.contents(instruction)
461
462	argNodes := instructionArgs(instruction.next)
463
464	switch instructionName {
465	case "ccmn", "ccmp", "cinc", "cinv", "cneg", "csel", "cset", "csetm", "csinc", "csinv", "csneg":
466		// These functions are special because they take a condition-code name as
467		// an argument and that looks like a symbol reference.
468		d.writeNode(statement)
469		return statement, nil
470
471	case "mrs":
472		// Functions that take special register names also look like a symbol
473		// reference to the parser.
474		d.writeNode(statement)
475		return statement, nil
476
477	case "adrp":
478		// adrp always generates a relocation, even when the target symbol is in the
479		// same segment, because the page-offset of the code isn't known until link
480		// time. Thus adrp instructions are turned into either adr instructions
481		// (limiting the module to 1MiB offsets) or calls to helper functions, both of
482		// which load the full address. Later instructions, which add the low 12 bits
483		// of offset, are tweaked to remove the offset since it's already included.
484		// Loads of GOT symbols are slightly more complex because it's not possible to
485		// avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr
486		// instruction, which would normally do the dereferencing, is dropped
487		// completely. (Or turned into a mov if it targets a different register.)
488		assertNodeType(argNodes[0], ruleRegisterOrConstant)
489		targetReg := d.contents(argNodes[0])
490		if !strings.HasPrefix(targetReg, "x") {
491			panic("adrp targetting register " + targetReg + ", which has the wrong size")
492		}
493
494		var symbol, offset string
495		switch argNodes[1].pegRule {
496		case ruleGOTSymbolOffset:
497			symbol = d.contents(argNodes[1].up)
498		case ruleMemoryRef:
499			assertNodeType(argNodes[1].up, ruleSymbolRef)
500			node, empty := d.gatherOffsets(argNodes[1].up.up, "")
501			if len(empty) != 0 {
502				panic("prefix offsets found for adrp")
503			}
504			symbol = d.contents(node)
505			_, offset = d.gatherOffsets(node.next, "")
506		default:
507			panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule])
508		}
509
510		return d.loadAarch64Address(statement, targetReg, symbol, offset)
511	}
512
513	var args []string
514	changed := false
515
516	for _, arg := range argNodes {
517		fullArg := arg
518
519		switch arg.pegRule {
520		case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak:
521			args = append(args, d.contents(fullArg))
522
523		case ruleGOTSymbolOffset:
524			// These should only be arguments to adrp and thus unreachable.
525			panic("unreachable")
526
527		case ruleMemoryRef:
528			ref := arg.up
529
530			switch ref.pegRule {
531			case ruleSymbolRef:
532				// This is a branch. Either the target needs to be written to a local
533				// version of the symbol to ensure that no relocations are emitted, or
534				// it needs to jump to a redirector function.
535				symbol, offset, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up)
536				changed = didChange
537
538				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
539					symbol = localTargetName(symbol)
540					changed = true
541				} else if !symbolIsLocal && !isSynthesized(symbol) {
542					redirector := redirectorName(symbol)
543					d.redirectors[symbol] = redirector
544					symbol = redirector
545					changed = true
546				} else if didChange && symbolIsLocal && len(offset) > 0 {
547					// didChange is set when the inputFile index is not 0; which is the index of the
548					// first file copied to the output, which is the generated assembly of bcm.c.
549					// In subsequently copied assembly files, local symbols are changed by appending (BCM_ + index)
550					// in order to ensure they don't collide. `index` gets incremented per file.
551					// If there is offset after the symbol, append the `offset`.
552					symbol = symbol + offset
553				}
554
555				args = append(args, symbol)
556
557			case ruleARMBaseIndexScale:
558				parts := ref.up
559				assertNodeType(parts, ruleARMRegister)
560				baseAddrReg := d.contents(parts)
561				parts = skipWS(parts.next)
562
563				// Only two forms need special handling. First there's memory references
564				// like "[x*, :got_lo12:foo]". The base register here will have been the
565				// target of an adrp instruction to load the page address, but the adrp
566				// will have turned into loading the full address *and dereferencing it*,
567				// above. Thus this instruction needs to be dropped otherwise we'll be
568				// dereferencing twice.
569				//
570				// Second there are forms like "[x*, :lo12:foo]" where the code has used
571				// adrp to load the page address into x*. That adrp will have been turned
572				// into loading the full address so just the offset needs to be dropped.
573
574				if parts != nil {
575					if parts.pegRule == ruleARMGOTLow12 {
576						if instructionName != "ldr" {
577							panic("Symbol reference outside of ldr instruction")
578						}
579
580						if skipWS(parts.next) != nil || parts.up.next != nil {
581							panic("can't handle tweak or post-increment with symbol references")
582						}
583
584						// The GOT helper already dereferenced the entry so, at most, just a mov
585						// is needed to put things in the right register.
586						d.writeCommentedNode(statement)
587						if baseAddrReg != args[0] {
588							d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n")
589						}
590						return statement, nil
591					} else if parts.pegRule == ruleLow12BitsSymbolRef {
592						if instructionName != "ldr" {
593							panic("Symbol reference outside of ldr instruction")
594						}
595
596						// Suppress the offset; adrp loaded the full address. This assumes the
597						// the compiler does not emit code like the following:
598						//
599						//   adrp x0, symbol
600						//   ldr x1, [x0, :lo12:symbol]
601						//   ldr x2, [x0, :lo12:symbol+4]
602						//
603						// Such code would only work if lo12(symbol+4) = lo12(symbol) + 4, but
604						// this is true when symbol is sufficiently aligned.
605						args = append(args, "["+baseAddrReg+"]")
606						changed = true
607						continue
608					}
609				}
610
611				args = append(args, d.contents(fullArg))
612
613			case ruleLow12BitsSymbolRef:
614				// These are the second instruction in a pair:
615				//   adrp x0, symbol           // Load the page address into x0
616				//   add x1, x0, :lo12:symbol  // Adds the page offset.
617				//
618				// The adrp instruction will have been turned into a sequence that loads
619				// the full address, above, thus the offset is turned into zero. If that
620				// results in the instruction being a nop, then it is deleted.
621				//
622				// This assumes the compiler does not emit code like the following:
623				//
624				//   adrp x0, symbol
625				//   add x1, x0, :lo12:symbol
626				//   add x2, x0, :lo12:symbol+4
627				//
628				// Such code would only work if lo12(symbol+4) = lo12(symbol) + 4, but
629				// this is true when symbol is sufficiently aligned.
630				if instructionName != "add" {
631					panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName))
632				}
633
634				if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") {
635					panic("address arithmetic with incorrectly sized register")
636				}
637
638				if args[0] == args[1] {
639					d.writeCommentedNode(statement)
640					return statement, nil
641				}
642
643				args = append(args, "#0")
644				changed = true
645
646			default:
647				panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule]))
648			}
649
650		default:
651			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
652		}
653	}
654
655	if changed {
656		d.writeCommentedNode(statement)
657		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
658		d.output.WriteString(replacement)
659	} else {
660		d.writeNode(statement)
661	}
662
663	return statement, nil
664}
665
666func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) {
667	for symRef != nil && symRef.pegRule == ruleOffset {
668		offset := d.contents(symRef)
669		if offset[0] != '+' && offset[0] != '-' {
670			offset = "+" + offset
671		}
672		offsets = offsets + offset
673		symRef = symRef.next
674	}
675	return symRef, offsets
676}
677
678func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) {
679	if memRef.pegRule != ruleSymbolRef {
680		return "", "", "", false, false, memRef
681	}
682
683	symRef := memRef.up
684	nextRef = memRef.next
685
686	// (Offset* '+')?
687	symRef, offset = d.gatherOffsets(symRef, offset)
688
689	// (LocalSymbol / SymbolName)
690	symbol = d.contents(symRef)
691	if symRef.pegRule == ruleLocalSymbol {
692		symbolIsLocal = true
693		mapped := d.mapLocalSymbol(symbol)
694		if mapped != symbol {
695			symbol = mapped
696			didChange = true
697		}
698	}
699	symRef = symRef.next
700
701	// Offset*
702	symRef, offset = d.gatherOffsets(symRef, offset)
703
704	// ('@' Section / Offset*)?
705	if symRef != nil {
706		assertNodeType(symRef, ruleSection)
707		section = d.contents(symRef)
708		symRef = symRef.next
709
710		symRef, offset = d.gatherOffsets(symRef, offset)
711	}
712
713	if symRef != nil {
714		panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule]))
715	}
716
717	return
718}
719
720/* Intel */
721
722type instructionType int
723
724const (
725	instrPush instructionType = iota
726	instrMove
727	// instrTransformingMove is essentially a move, but it performs some
728	// transformation of the data during the process.
729	instrTransformingMove
730	instrJump
731	instrConditionalMove
732	// instrCombine merges the source and destination in some fashion, for example
733	// a 2-operand bitwise operation.
734	instrCombine
735	// instrMemoryVectorCombine is similer to instrCombine, but the source
736	// register must be a memory reference and the destination register
737	// must be a vector register.
738	instrMemoryVectorCombine
739	// instrThreeArg merges two sources into a destination in some fashion.
740	instrThreeArg
741	// instrCompare takes two arguments and writes outputs to the flags register.
742	instrCompare
743	instrOther
744)
745
746func classifyInstruction(instr string, args []*node32) instructionType {
747	switch instr {
748	case "push", "pushq":
749		if len(args) == 1 {
750			return instrPush
751		}
752
753	case "mov", "movq", "vmovq", "movsd", "vmovsd":
754		if len(args) == 2 {
755			return instrMove
756		}
757
758	case "cmovneq", "cmoveq":
759		if len(args) == 2 {
760			return instrConditionalMove
761		}
762
763	case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo":
764		if len(args) == 1 {
765			return instrJump
766		}
767
768	case "orq", "andq", "xorq":
769		if len(args) == 2 {
770			return instrCombine
771		}
772
773	case "cmpq":
774		if len(args) == 2 {
775			return instrCompare
776		}
777
778	case "sarxq", "shlxq", "shrxq":
779		if len(args) == 3 {
780			return instrThreeArg
781		}
782
783	case "vpbroadcastq":
784		if len(args) == 2 {
785			return instrTransformingMove
786		}
787
788	case "movlps", "movhps":
789		if len(args) == 2 {
790			return instrMemoryVectorCombine
791		}
792	}
793
794	return instrOther
795}
796
797func push(w stringWriter) wrapperFunc {
798	return func(k func()) {
799		w.WriteString("\tpushq %rax\n")
800		k()
801		w.WriteString("\txchg %rax, (%rsp)\n")
802	}
803}
804
805func compare(w stringWriter, instr, a, b string) wrapperFunc {
806	return func(k func()) {
807		k()
808		w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b))
809	}
810}
811
812func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc {
813	d.gotExternalsNeeded[symbol+"@"+section] = struct{}{}
814
815	return func(k func()) {
816		if !redzoneCleared {
817			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
818		}
819		w.WriteString("\tpushf\n")
820		w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination))
821		w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination))
822		w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination))
823		w.WriteString("\tpopf\n")
824		if !redzoneCleared {
825			w.WriteString("\tleaq\t128(%rsp), %rsp\n")
826		}
827	}
828}
829
830func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc {
831	return func(k func()) {
832		if !redzoneCleared {
833			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
834			defer w.WriteString("\tleaq 128(%rsp), %rsp\n")
835		}
836		w.WriteString("\tpushfq\n")
837		k()
838		w.WriteString("\tpopfq\n")
839	}
840}
841
842func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) {
843	candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"}
844
845	var reg string
846NextCandidate:
847	for _, candidate := range candidates {
848		for _, avoid := range avoidRegs {
849			if candidate == avoid {
850				continue NextCandidate
851			}
852		}
853
854		reg = candidate
855		break
856	}
857
858	if len(reg) == 0 {
859		panic("too many excluded registers")
860	}
861
862	return func(k func()) {
863		w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
864		w.WriteString("\tpushq " + reg + "\n")
865		k()
866		w.WriteString("\tpopq " + reg + "\n")
867		w.WriteString("\tleaq 128(%rsp), %rsp\n")
868	}, reg
869}
870
871func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc {
872	return func(k func()) {
873		k()
874		prefix := ""
875		if isAVX {
876			prefix = "v"
877		}
878		w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n")
879	}
880}
881
882func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc {
883	return func(k func()) {
884		k()
885		w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n")
886	}
887}
888
889func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
890	return func(k func()) {
891		k()
892		w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n")
893	}
894}
895
896func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc {
897	return func(k func()) {
898		k()
899		w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n")
900	}
901}
902
903func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
904	return func(k func()) {
905		k()
906		// These instructions can only read from memory, so push
907		// tempReg and read from the stack. Note we assume the red zone
908		// was previously cleared by saveRegister().
909		w.WriteString("\tpushq " + source + "\n")
910		w.WriteString("\t" + instructionName + " (%rsp), " + dest + "\n")
911		w.WriteString("\tleaq 8(%rsp), %rsp\n")
912	}
913}
914
915func isValidLEATarget(reg string) bool {
916	return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm")
917}
918
919func undoConditionalMove(w stringWriter, instr string) wrapperFunc {
920	var invertedCondition string
921
922	switch instr {
923	case "cmoveq":
924		invertedCondition = "ne"
925	case "cmovneq":
926		invertedCondition = "e"
927	default:
928		panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr))
929	}
930
931	return func(k func()) {
932		w.WriteString("\tj" + invertedCondition + " 999f\n")
933		k()
934		w.WriteString("999:\n")
935	}
936}
937
938func (d *delocation) isRIPRelative(node *node32) bool {
939	return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)"
940}
941
942func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) {
943	var prefix string
944	if instruction.pegRule == ruleInstructionPrefix {
945		prefix = d.contents(instruction)
946		instruction = skipWS(instruction.next)
947	}
948
949	assertNodeType(instruction, ruleInstructionName)
950	instructionName := d.contents(instruction)
951
952	argNodes := instructionArgs(instruction.next)
953
954	var wrappers wrapperStack
955	var args []string
956	changed := false
957
958Args:
959	for i, arg := range argNodes {
960		fullArg := arg
961		isIndirect := false
962
963		if arg.pegRule == ruleIndirectionIndicator {
964			arg = arg.next
965			isIndirect = true
966		}
967
968		switch arg.pegRule {
969		case ruleRegisterOrConstant, ruleLocalLabelRef:
970			args = append(args, d.contents(fullArg))
971
972		case ruleMemoryRef:
973			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
974			changed = didChange
975
976			if symbol == "OPENSSL_ia32cap_P" && section == "" {
977				if instructionName != "leaq" {
978					return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName)
979				}
980
981				if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 {
982					return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName)
983				}
984
985				target := argNodes[1]
986				assertNodeType(target, ruleRegisterOrConstant)
987				reg := d.contents(target)
988
989				if !strings.HasPrefix(reg, "%r") {
990					return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg)
991				}
992
993				changed = true
994
995				// Flag-altering instructions (i.e. addq) are going to be used so the
996				// flags need to be preserved.
997				wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */))
998
999				wrappers = append(wrappers, func(k func()) {
1000					d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n")
1001					d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n")
1002				})
1003
1004				break Args
1005			}
1006
1007			switch section {
1008			case "":
1009				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1010					symbol = localTargetName(symbol)
1011					changed = true
1012				}
1013
1014			case "PLT":
1015				if classifyInstruction(instructionName, argNodes) != instrJump {
1016					return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName)
1017				}
1018
1019				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1020					symbol = localTargetName(symbol)
1021					changed = true
1022				} else if !symbolIsLocal && !isSynthesized(symbol) {
1023					// Unknown symbol via PLT is an
1024					// out-call from the module, e.g.
1025					// memcpy.
1026					d.redirectors[symbol+"@"+section] = redirectorName(symbol)
1027					symbol = redirectorName(symbol)
1028				}
1029
1030				changed = true
1031
1032			case "GOTPCREL":
1033				if len(offset) > 0 {
1034					return nil, errors.New("loading from GOT with offset is unsupported")
1035				}
1036				if !d.isRIPRelative(memRef) {
1037					return nil, errors.New("GOT access must be IP-relative")
1038				}
1039
1040				useGOT := false
1041				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1042					symbol = localTargetName(symbol)
1043					changed = true
1044				} else if !isSynthesized(symbol) {
1045					useGOT = true
1046				}
1047
1048				classification := classifyInstruction(instructionName, argNodes)
1049				if classification != instrThreeArg && classification != instrCompare && i != 0 {
1050					return nil, errors.New("GOT access must be source operand")
1051				}
1052
1053				// Reduce the instruction to movq symbol@GOTPCREL, targetReg.
1054				var targetReg string
1055				var redzoneCleared bool
1056				switch classification {
1057				case instrPush:
1058					wrappers = append(wrappers, push(d.output))
1059					targetReg = "%rax"
1060				case instrConditionalMove:
1061					wrappers = append(wrappers, undoConditionalMove(d.output, instructionName))
1062					fallthrough
1063				case instrMove:
1064					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1065					targetReg = d.contents(argNodes[1])
1066				case instrCompare:
1067					otherSource := d.contents(argNodes[i^1])
1068					saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource})
1069					redzoneCleared = true
1070					wrappers = append(wrappers, saveRegWrapper)
1071					if i == 0 {
1072						wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource))
1073					} else {
1074						wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg))
1075					}
1076					targetReg = tempReg
1077				case instrTransformingMove:
1078					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1079					targetReg = d.contents(argNodes[1])
1080					wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg))
1081					if isValidLEATarget(targetReg) {
1082						return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.")
1083					}
1084				case instrCombine:
1085					targetReg = d.contents(argNodes[1])
1086					if !isValidLEATarget(targetReg) {
1087						return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers")
1088					}
1089					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg})
1090					redzoneCleared = true
1091					wrappers = append(wrappers, saveRegWrapper)
1092
1093					wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg))
1094					targetReg = tempReg
1095				case instrMemoryVectorCombine:
1096					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1097					targetReg = d.contents(argNodes[1])
1098					if isValidLEATarget(targetReg) {
1099						return nil, errors.New("target register must be an XMM register")
1100					}
1101					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1102					wrappers = append(wrappers, saveRegWrapper)
1103					redzoneCleared = true
1104					wrappers = append(wrappers, memoryVectorCombineOp(d.output, instructionName, tempReg, targetReg))
1105					targetReg = tempReg
1106				case instrThreeArg:
1107					if n := len(argNodes); n != 3 {
1108						return nil, fmt.Errorf("three-argument instruction has %d arguments", n)
1109					}
1110					if i != 0 && i != 1 {
1111						return nil, errors.New("GOT access must be from source operand")
1112					}
1113					targetReg = d.contents(argNodes[2])
1114
1115					otherSource := d.contents(argNodes[1])
1116					if i == 1 {
1117						otherSource = d.contents(argNodes[0])
1118					}
1119
1120					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource})
1121					redzoneCleared = true
1122					wrappers = append(wrappers, saveRegWrapper)
1123
1124					if i == 0 {
1125						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg))
1126					} else {
1127						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg))
1128					}
1129					targetReg = tempReg
1130				default:
1131					return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName)
1132				}
1133
1134				if !isValidLEATarget(targetReg) {
1135					// Sometimes the compiler will load from the GOT to an
1136					// XMM register, which is not a valid target of an LEA
1137					// instruction.
1138					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1139					wrappers = append(wrappers, saveRegWrapper)
1140					isAVX := strings.HasPrefix(instructionName, "v")
1141					wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg))
1142					targetReg = tempReg
1143					if redzoneCleared {
1144						return nil, fmt.Errorf("internal error: Red Zone was already cleared")
1145					}
1146					redzoneCleared = true
1147				}
1148
1149				if symbol == "OPENSSL_ia32cap_P" {
1150					// Flag-altering instructions (i.e. addq) are going to be used so the
1151					// flags need to be preserved.
1152					wrappers = append(wrappers, saveFlags(d.output, redzoneCleared))
1153					wrappers = append(wrappers, func(k func()) {
1154						d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n")
1155						d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n")
1156					})
1157				} else if useGOT {
1158					wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared))
1159				} else {
1160					wrappers = append(wrappers, func(k func()) {
1161						d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg))
1162					})
1163				}
1164				changed = true
1165				break Args
1166
1167			default:
1168				return nil, fmt.Errorf("Unknown section type %q", section)
1169			}
1170
1171			if !changed && len(section) > 0 {
1172				panic("section was not handled")
1173			}
1174			section = ""
1175
1176			argStr := ""
1177			if isIndirect {
1178				argStr += "*"
1179			}
1180			argStr += symbol
1181			argStr += offset
1182
1183			for ; memRef != nil; memRef = memRef.next {
1184				argStr += d.contents(memRef)
1185			}
1186
1187			args = append(args, argStr)
1188
1189		case ruleGOTAddress:
1190			if instructionName != "leaq" {
1191				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ used outside of lea")
1192			}
1193			if i != 0 || len(argNodes) != 2 {
1194				return nil, fmt.Errorf("Load of _GLOBAL_OFFSET_TABLE_ address didn't have expected form")
1195			}
1196			d.gotDeltaNeeded = true
1197			changed = true
1198			targetReg := d.contents(argNodes[1])
1199			args = append(args, ".Lboringssl_got_delta(%rip)")
1200			wrappers = append(wrappers, func(k func()) {
1201				k()
1202				d.output.WriteString(fmt.Sprintf("\taddq .Lboringssl_got_delta(%%rip), %s\n", targetReg))
1203			})
1204
1205		case ruleGOTLocation:
1206			if instructionName != "movabsq" {
1207				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq")
1208			}
1209			if i != 0 || len(argNodes) != 2 {
1210				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form")
1211			}
1212
1213			d.gotDeltaNeeded = true
1214			changed = true
1215			instructionName = "movq"
1216			assertNodeType(arg.up, ruleLocalSymbol)
1217			baseSymbol := d.mapLocalSymbol(d.contents(arg.up))
1218			targetReg := d.contents(argNodes[1])
1219			args = append(args, ".Lboringssl_got_delta(%rip)")
1220			wrappers = append(wrappers, func(k func()) {
1221				k()
1222				d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg))
1223			})
1224
1225		case ruleGOTSymbolOffset:
1226			if instructionName != "movabsq" {
1227				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq")
1228			}
1229			if i != 0 || len(argNodes) != 2 {
1230				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form")
1231			}
1232
1233			assertNodeType(arg.up, ruleSymbolName)
1234			symbol := d.contents(arg.up)
1235			if strings.HasPrefix(symbol, ".L") {
1236				symbol = d.mapLocalSymbol(symbol)
1237			}
1238			targetReg := d.contents(argNodes[1])
1239
1240			var prefix string
1241			isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF")
1242			if isGOTOFF {
1243				prefix = "gotoff"
1244				d.gotOffOffsetsNeeded[symbol] = struct{}{}
1245			} else {
1246				prefix = "got"
1247				d.gotOffsetsNeeded[symbol] = struct{}{}
1248			}
1249			changed = true
1250
1251			wrappers = append(wrappers, func(k func()) {
1252				// Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time
1253				// of writing) emits 64-bit relocations anyway, so the following four bytes
1254				// get stomped. Thus we use 64-bit offsets.
1255				d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg))
1256			})
1257
1258		default:
1259			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
1260		}
1261	}
1262
1263	if changed {
1264		d.writeCommentedNode(statement)
1265		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
1266		if len(prefix) != 0 {
1267			replacement = "\t" + prefix + replacement
1268		}
1269		wrappers.do(func() {
1270			d.output.WriteString(replacement)
1271		})
1272	} else {
1273		d.writeNode(statement)
1274	}
1275
1276	return statement, nil
1277}
1278
1279func (d *delocation) handleBSS(statement *node32) (*node32, error) {
1280	lastStatement := statement
1281	for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next {
1282		node := skipWS(statement.up)
1283		if node == nil {
1284			d.writeNode(statement)
1285			continue
1286		}
1287
1288		switch node.pegRule {
1289		case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective:
1290			d.writeNode(statement)
1291
1292		case ruleDirective:
1293			directive := node.up
1294			assertNodeType(directive, ruleDirectiveName)
1295			directiveName := d.contents(directive)
1296			if directiveName == "text" || directiveName == "section" || directiveName == "data" {
1297				return lastStatement, nil
1298			}
1299			d.writeNode(statement)
1300
1301		case ruleLabel:
1302			label := node.up
1303			d.writeNode(statement)
1304
1305			if label.pegRule != ruleLocalSymbol {
1306				symbol := d.contents(label)
1307				localSymbol := localTargetName(symbol)
1308				d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol))
1309
1310				d.bssAccessorsNeeded[symbol] = localSymbol
1311			}
1312
1313		case ruleLabelContainingDirective:
1314			var err error
1315			statement, err = d.processLabelContainingDirective(statement, node.up)
1316			if err != nil {
1317				return nil, err
1318			}
1319
1320		default:
1321			return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement))
1322		}
1323	}
1324
1325	return lastStatement, nil
1326}
1327
1328func writeAarch64Function(w stringWriter, funcName string, writeContents func(stringWriter)) {
1329	w.WriteString(".p2align 2\n")
1330	w.WriteString(".hidden " + funcName + "\n")
1331	w.WriteString(".type " + funcName + ", @function\n")
1332	w.WriteString(funcName + ":\n")
1333	w.WriteString(".cfi_startproc\n")
1334	// We insert a landing pad (`bti c` instruction) unconditionally at the beginning of
1335	// every generated function so that they can be called indirectly (with `blr` or
1336	// `br x16/x17`). The instruction is encoded in the HINT space as `hint #34` and is
1337	// a no-op on machines or program states not supporting BTI (Branch Target Identification).
1338	// None of the generated function bodies call other functions (with bl or blr), so we only
1339	// insert a landing pad instead of signing and validating $lr with `paciasp` and `autiasp`.
1340	// Normally we would also generate a .note.gnu.property section to annotate the assembly
1341	// file as BTI-compatible, but if the input assembly files are BTI-compatible, they should
1342	// already have those sections so there is no need to add an extra one ourselves.
1343	w.WriteString("\thint #34 // bti c\n")
1344	writeContents(w)
1345	w.WriteString(".cfi_endproc\n")
1346	w.WriteString(".size " + funcName + ", .-" + funcName + "\n")
1347}
1348
1349func transform(w stringWriter, inputs []inputFile) error {
1350	// symbols contains all defined symbols.
1351	symbols := make(map[string]struct{})
1352	// fileNumbers is the set of IDs seen in .file directives.
1353	fileNumbers := make(map[int]struct{})
1354	// maxObservedFileNumber contains the largest seen file number in a
1355	// .file directive. Zero is not a valid number.
1356	maxObservedFileNumber := 0
1357	// fileDirectivesContainMD5 is true if the compiler is outputting MD5
1358	// checksums in .file directives. If it does so, then this script needs
1359	// to match that behaviour otherwise warnings result.
1360	fileDirectivesContainMD5 := false
1361
1362	// OPENSSL_ia32cap_get will be synthesized by this script.
1363	symbols["OPENSSL_ia32cap_get"] = struct{}{}
1364
1365	for _, input := range inputs {
1366		forEachPath(input.ast.up, func(node *node32) {
1367			symbol := input.contents[node.begin:node.end]
1368			if _, ok := symbols[symbol]; ok {
1369				panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path))
1370			}
1371			symbols[symbol] = struct{}{}
1372		}, ruleStatement, ruleLabel, ruleSymbolName)
1373
1374		forEachPath(input.ast.up, func(node *node32) {
1375			assertNodeType(node, ruleLocationDirective)
1376			directive := input.contents[node.begin:node.end]
1377			if !strings.HasPrefix(directive, ".file") {
1378				return
1379			}
1380			parts := strings.Fields(directive)
1381			if len(parts) == 2 {
1382				// This is a .file directive with just a
1383				// filename. Clang appears to generate just one
1384				// of these at the beginning of the output for
1385				// the compilation unit. Ignore it.
1386				return
1387			}
1388			fileNo, err := strconv.Atoi(parts[1])
1389			if err != nil {
1390				panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive))
1391			}
1392
1393			if _, ok := fileNumbers[fileNo]; ok {
1394				panic(fmt.Sprintf("Duplicate file number %d observed", fileNo))
1395			}
1396			fileNumbers[fileNo] = struct{}{}
1397
1398			if fileNo > maxObservedFileNumber {
1399				maxObservedFileNumber = fileNo
1400			}
1401
1402			for _, token := range parts[2:] {
1403				if token == "md5" {
1404					fileDirectivesContainMD5 = true
1405				}
1406			}
1407		}, ruleStatement, ruleLocationDirective)
1408	}
1409
1410	processor := x86_64
1411	if len(inputs) > 0 {
1412		processor = detectProcessor(inputs[0])
1413	}
1414
1415	commentIndicator := "#"
1416	if processor == aarch64 {
1417		commentIndicator = "//"
1418	}
1419
1420	d := &delocation{
1421		symbols:             symbols,
1422		processor:           processor,
1423		commentIndicator:    commentIndicator,
1424		output:              w,
1425		redirectors:         make(map[string]string),
1426		bssAccessorsNeeded:  make(map[string]string),
1427		gotExternalsNeeded:  make(map[string]struct{}),
1428		gotOffsetsNeeded:    make(map[string]struct{}),
1429		gotOffOffsetsNeeded: make(map[string]struct{}),
1430	}
1431
1432	w.WriteString(".text\n")
1433	var fileTrailing string
1434	if fileDirectivesContainMD5 {
1435		fileTrailing = " md5 0x00000000000000000000000000000000"
1436	}
1437	w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing))
1438	w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1))
1439	w.WriteString("BORINGSSL_bcm_text_start:\n")
1440
1441	for _, input := range inputs {
1442		if err := d.processInput(input); err != nil {
1443			return err
1444		}
1445	}
1446
1447	w.WriteString(".text\n")
1448	w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1))
1449	w.WriteString("BORINGSSL_bcm_text_end:\n")
1450
1451	// Emit redirector functions. Each is a single jump instruction.
1452	var redirectorNames []string
1453	for name := range d.redirectors {
1454		redirectorNames = append(redirectorNames, name)
1455	}
1456	sort.Strings(redirectorNames)
1457
1458	for _, name := range redirectorNames {
1459		redirector := d.redirectors[name]
1460		switch d.processor {
1461		case aarch64:
1462			writeAarch64Function(w, redirector, func(w stringWriter) {
1463				w.WriteString("\tb " + name + "\n")
1464			})
1465
1466		case x86_64:
1467			w.WriteString(".type " + redirector + ", @function\n")
1468			w.WriteString(redirector + ":\n")
1469			w.WriteString("\tjmp\t" + name + "\n")
1470		}
1471	}
1472
1473	var accessorNames []string
1474	for accessor := range d.bssAccessorsNeeded {
1475		accessorNames = append(accessorNames, accessor)
1476	}
1477	sort.Strings(accessorNames)
1478
1479	// Emit BSS accessor functions. Each is a single LEA followed by RET.
1480	for _, name := range accessorNames {
1481		funcName := accessorName(name)
1482		target := d.bssAccessorsNeeded[name]
1483
1484		switch d.processor {
1485		case x86_64:
1486			w.WriteString(".type " + funcName + ", @function\n")
1487			w.WriteString(funcName + ":\n")
1488			w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n")
1489
1490		case aarch64:
1491			writeAarch64Function(w, funcName, func(w stringWriter) {
1492				w.WriteString("\tadrp x0, " + target + "\n")
1493				w.WriteString("\tadd x0, x0, :lo12:" + target + "\n")
1494				w.WriteString("\tret\n")
1495			})
1496		}
1497	}
1498
1499	switch d.processor {
1500	case aarch64:
1501		externalNames := sortedSet(d.gotExternalsNeeded)
1502		for _, symbol := range externalNames {
1503			writeAarch64Function(w, gotHelperName(symbol), func(w stringWriter) {
1504				w.WriteString("\tadrp x0, :got:" + symbol + "\n")
1505				w.WriteString("\tldr x0, [x0, :got_lo12:" + symbol + "]\n")
1506				w.WriteString("\tret\n")
1507			})
1508		}
1509
1510		writeAarch64Function(w, ".LOPENSSL_armcap_P_addr", func(w stringWriter) {
1511			w.WriteString("\tadrp x0, OPENSSL_armcap_P\n")
1512			w.WriteString("\tadd x0, x0, :lo12:OPENSSL_armcap_P\n")
1513			w.WriteString("\tret\n")
1514		})
1515
1516	case x86_64:
1517		externalNames := sortedSet(d.gotExternalsNeeded)
1518		for _, name := range externalNames {
1519			parts := strings.SplitN(name, "@", 2)
1520			symbol, section := parts[0], parts[1]
1521			w.WriteString(".type " + symbol + "_" + section + "_external, @object\n")
1522			w.WriteString(".size " + symbol + "_" + section + "_external, 8\n")
1523			w.WriteString(symbol + "_" + section + "_external:\n")
1524			// Ideally this would be .quad foo@GOTPCREL, but clang's
1525			// assembler cannot emit a 64-bit GOTPCREL relocation. Instead,
1526			// we manually sign-extend the value, knowing that the GOT is
1527			// always at the end, thus foo@GOTPCREL has a positive value.
1528			w.WriteString("\t.long " + symbol + "@" + section + "\n")
1529			w.WriteString("\t.long 0\n")
1530		}
1531
1532		w.WriteString(".type OPENSSL_ia32cap_get, @function\n")
1533		w.WriteString(".globl OPENSSL_ia32cap_get\n")
1534		w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n")
1535		w.WriteString("OPENSSL_ia32cap_get:\n")
1536		w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n")
1537		w.WriteString("\tret\n")
1538
1539		w.WriteString(".extern OPENSSL_ia32cap_P\n")
1540		w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n")
1541		w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n")
1542		w.WriteString("OPENSSL_ia32cap_addr_delta:\n")
1543		w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n")
1544
1545		if d.gotDeltaNeeded {
1546			w.WriteString(".Lboringssl_got_delta:\n")
1547			w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n")
1548		}
1549
1550		for _, name := range sortedSet(d.gotOffsetsNeeded) {
1551			w.WriteString(".Lboringssl_got_" + name + ":\n")
1552			w.WriteString("\t.quad " + name + "@GOT\n")
1553		}
1554		for _, name := range sortedSet(d.gotOffOffsetsNeeded) {
1555			w.WriteString(".Lboringssl_gotoff_" + name + ":\n")
1556			w.WriteString("\t.quad " + name + "@GOTOFF\n")
1557		}
1558	}
1559
1560	w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n")
1561	w.WriteString(".size BORINGSSL_bcm_text_hash, 32\n")
1562	w.WriteString("BORINGSSL_bcm_text_hash:\n")
1563	for _, b := range fipscommon.UninitHashValue {
1564		w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n")
1565	}
1566
1567	return nil
1568}
1569
1570// preprocess runs source through the C preprocessor.
1571func preprocess(cppCommand []string, path string) ([]byte, error) {
1572	var args []string
1573	args = append(args, cppCommand...)
1574	args = append(args, path)
1575
1576	cpp := exec.Command(args[0], args[1:]...)
1577	cpp.Stderr = os.Stderr
1578	var result bytes.Buffer
1579	cpp.Stdout = &result
1580
1581	if err := cpp.Run(); err != nil {
1582		return nil, err
1583	}
1584
1585	return result.Bytes(), nil
1586}
1587
1588func parseInputs(inputs []inputFile, cppCommand []string) error {
1589	for i, input := range inputs {
1590		var contents string
1591
1592		if input.isArchive {
1593			arFile, err := os.Open(input.path)
1594			if err != nil {
1595				return err
1596			}
1597			defer arFile.Close()
1598
1599			ar, err := ar.ParseAR(arFile)
1600			if err != nil {
1601				return err
1602			}
1603
1604			if len(ar) != 1 {
1605				return fmt.Errorf("expected one file in archive, but found %d", len(ar))
1606			}
1607
1608			for _, c := range ar {
1609				contents = string(c)
1610			}
1611		} else {
1612			var inBytes []byte
1613			var err error
1614
1615			if len(cppCommand) > 0 {
1616				inBytes, err = preprocess(cppCommand, input.path)
1617			} else {
1618				inBytes, err = os.ReadFile(input.path)
1619			}
1620			if err != nil {
1621				return err
1622			}
1623
1624			contents = string(inBytes)
1625		}
1626
1627		asm := Asm{Buffer: contents, Pretty: true}
1628		asm.Init()
1629		if err := asm.Parse(); err != nil {
1630			return fmt.Errorf("error while parsing %q: %s", input.path, err)
1631		}
1632		ast := asm.AST()
1633
1634		inputs[i].contents = contents
1635		inputs[i].ast = ast
1636	}
1637
1638	return nil
1639}
1640
1641// includePathFromHeaderFilePath returns an include directory path based on the
1642// path of a specific header file. It walks up the path and assumes that the
1643// include files are rooted in a directory called "openssl".
1644func includePathFromHeaderFilePath(path string) (string, error) {
1645	dir := path
1646	for {
1647		var file string
1648		dir, file = filepath.Split(dir)
1649
1650		if file == "openssl" {
1651			return dir, nil
1652		}
1653
1654		if len(dir) == 0 {
1655			break
1656		}
1657		dir = dir[:len(dir)-1]
1658	}
1659
1660	return "", fmt.Errorf("failed to find 'openssl' path element in header file path %q", path)
1661}
1662
1663func main() {
1664	// The .a file, if given, is expected to be an archive of textual
1665	// assembly sources. That's odd, but CMake really wants to create
1666	// archive files so it's the only way that we can make it work.
1667	arInput := flag.String("a", "", "Path to a .a file containing assembly sources")
1668	outFile := flag.String("o", "", "Path to output assembly")
1669	ccPath := flag.String("cc", "", "Path to the C compiler for preprocessing inputs")
1670	ccFlags := flag.String("cc-flags", "", "Flags for the C compiler when preprocessing")
1671
1672	flag.Parse()
1673
1674	if len(*outFile) == 0 {
1675		fmt.Fprintf(os.Stderr, "Must give argument to -o.\n")
1676		os.Exit(1)
1677	}
1678
1679	var inputs []inputFile
1680	if len(*arInput) > 0 {
1681		inputs = append(inputs, inputFile{
1682			path:      *arInput,
1683			index:     0,
1684			isArchive: true,
1685		})
1686	}
1687
1688	includePaths := make(map[string]struct{})
1689
1690	for i, path := range flag.Args() {
1691		if len(path) == 0 {
1692			continue
1693		}
1694
1695		// Header files are not processed but their path is remembered
1696		// and passed as -I arguments when invoking the preprocessor.
1697		if strings.HasSuffix(path, ".h") {
1698			dir, err := includePathFromHeaderFilePath(path)
1699			if err != nil {
1700				fmt.Fprintf(os.Stderr, "%s\n", err)
1701				os.Exit(1)
1702			}
1703			includePaths[dir] = struct{}{}
1704			continue
1705		}
1706
1707		inputs = append(inputs, inputFile{
1708			path:  path,
1709			index: i + 1,
1710		})
1711	}
1712
1713	var cppCommand []string
1714	if len(*ccPath) > 0 {
1715		cppCommand = append(cppCommand, *ccPath)
1716		cppCommand = append(cppCommand, strings.Fields(*ccFlags)...)
1717		// Some of ccFlags might be superfluous when running the
1718		// preprocessor, but we don't want the compiler complaining that
1719		// "argument unused during compilation".
1720		cppCommand = append(cppCommand, "-Wno-unused-command-line-argument")
1721
1722		for includePath := range includePaths {
1723			cppCommand = append(cppCommand, "-I"+includePath)
1724		}
1725
1726		// -E requests only preprocessing.
1727		cppCommand = append(cppCommand, "-E")
1728	}
1729
1730	if err := parseInputs(inputs, cppCommand); err != nil {
1731		fmt.Fprintf(os.Stderr, "%s\n", err)
1732		os.Exit(1)
1733	}
1734
1735	out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
1736	if err != nil {
1737		panic(err)
1738	}
1739	defer out.Close()
1740
1741	if err := transform(out, inputs); err != nil {
1742		fmt.Fprintf(os.Stderr, "%s\n", err)
1743		os.Exit(1)
1744	}
1745}
1746
1747func forEachPath(node *node32, cb func(*node32), rules ...pegRule) {
1748	if node == nil {
1749		return
1750	}
1751
1752	if len(rules) == 0 {
1753		cb(node)
1754		return
1755	}
1756
1757	rule := rules[0]
1758	childRules := rules[1:]
1759
1760	for ; node != nil; node = node.next {
1761		if node.pegRule != rule {
1762			continue
1763		}
1764
1765		if len(childRules) == 0 {
1766			cb(node)
1767		} else {
1768			forEachPath(node.up, cb, childRules...)
1769		}
1770	}
1771}
1772
1773func skipNodes(node *node32, ruleToSkip pegRule) *node32 {
1774	for ; node != nil && node.pegRule == ruleToSkip; node = node.next {
1775	}
1776	return node
1777}
1778
1779func skipWS(node *node32) *node32 {
1780	return skipNodes(node, ruleWS)
1781}
1782
1783func assertNodeType(node *node32, expected pegRule) {
1784	if rule := node.pegRule; rule != expected {
1785		panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected]))
1786	}
1787}
1788
1789type wrapperFunc func(func())
1790
1791type wrapperStack []wrapperFunc
1792
1793func (w *wrapperStack) do(baseCase func()) {
1794	if len(*w) == 0 {
1795		baseCase()
1796		return
1797	}
1798
1799	wrapper := (*w)[0]
1800	*w = (*w)[1:]
1801	wrapper(func() { w.do(baseCase) })
1802}
1803
1804// localTargetName returns the name of the local target label for a global
1805// symbol named name.
1806func localTargetName(name string) string {
1807	return ".L" + name + "_local_target"
1808}
1809
1810func isSynthesized(symbol string) bool {
1811	return strings.HasSuffix(symbol, "_bss_get") ||
1812		symbol == "OPENSSL_ia32cap_get" ||
1813		strings.HasPrefix(symbol, "BORINGSSL_bcm_text_")
1814}
1815
1816func redirectorName(symbol string) string {
1817	return "bcm_redirector_" + symbol
1818}
1819
1820// sectionType returns the type of a section. I.e. a section called “.text.foo”
1821// is a “.text” section.
1822func sectionType(section string) (string, bool) {
1823	if len(section) == 0 || section[0] != '.' {
1824		return "", false
1825	}
1826
1827	i := strings.Index(section[1:], ".")
1828	if i != -1 {
1829		section = section[:i+1]
1830	}
1831
1832	if strings.HasPrefix(section, ".debug_") {
1833		return ".debug", true
1834	}
1835
1836	return section, true
1837}
1838
1839// accessorName returns the name of the accessor function for a BSS symbol
1840// named name.
1841func accessorName(name string) string {
1842	return name + "_bss_get"
1843}
1844
1845func (d *delocation) mapLocalSymbol(symbol string) string {
1846	if d.currentInput.index == 0 {
1847		return symbol
1848	}
1849	return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index)
1850}
1851
1852func detectProcessor(input inputFile) processorType {
1853	for statement := input.ast.up; statement != nil; statement = statement.next {
1854		node := skipNodes(statement.up, ruleWS)
1855		if node == nil || node.pegRule != ruleInstruction {
1856			continue
1857		}
1858
1859		instruction := node.up
1860		instructionName := input.contents[instruction.begin:instruction.end]
1861
1862		switch instructionName {
1863		case "movq", "call", "leaq":
1864			return x86_64
1865		case "str", "bl", "ldr", "st1":
1866			return aarch64
1867		}
1868	}
1869
1870	panic("processed entire input and didn't recognise any instructions.")
1871}
1872
1873func sortedSet(m map[string]struct{}) []string {
1874	ret := make([]string, 0, len(m))
1875	for key := range m {
1876		ret = append(ret, key)
1877	}
1878	sort.Strings(ret)
1879	return ret
1880}
1881