• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright (c) 2017, Google Inc.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// delocate performs several transformations of textual assembly code. See
16// crypto/fipsmodule/FIPS.md for an overview.
17package main
18
19import (
20	"bytes"
21	"errors"
22	"flag"
23	"fmt"
24	"os"
25	"os/exec"
26	"path/filepath"
27	"sort"
28	"strconv"
29	"strings"
30
31	"boringssl.googlesource.com/boringssl/util/ar"
32	"boringssl.googlesource.com/boringssl/util/fipstools/fipscommon"
33)
34
35// inputFile represents a textual assembly file.
36type inputFile struct {
37	path string
38	// index is a unique identifier given to this file. It's used for
39	// mapping local symbols.
40	index int
41	// isArchive indicates that the input should be processed as an ar
42	// file.
43	isArchive bool
44	// contents contains the contents of the file.
45	contents string
46	// ast points to the head of the syntax tree.
47	ast *node32
48}
49
50type stringWriter interface {
51	WriteString(string) (int, error)
52}
53
54type processorType int
55
56const (
57	x86_64 processorType = iota + 1
58	aarch64
59)
60
61// delocation holds the state needed during a delocation operation.
62type delocation struct {
63	processor processorType
64	output    stringWriter
65	// commentIndicator starts a comment, e.g. "//" or "#"
66	commentIndicator string
67
68	// symbols is the set of symbols defined in the module.
69	symbols map[string]struct{}
70	// redirectors maps from out-call symbol name to the name of a
71	// redirector function for that symbol. E.g. “memcpy” ->
72	// “bcm_redirector_memcpy”.
73	redirectors map[string]string
74	// bssAccessorsNeeded maps from a BSS symbol name to the symbol that
75	// should be used to reference it. E.g. “P384_data_storage” ->
76	// “P384_data_storage”.
77	bssAccessorsNeeded map[string]string
78	// gotExternalsNeeded is a set of symbol names for which we need
79	// “delta” symbols: symbols that contain the offset from their location
80	// to the memory in question.
81	gotExternalsNeeded map[string]struct{}
82	// gotDeltaNeeded is true if the code needs to load the value of
83	// _GLOBAL_OFFSET_TABLE_.
84	gotDeltaNeeded bool
85	// gotOffsetsNeeded contains the symbols whose @GOT offsets are needed.
86	gotOffsetsNeeded map[string]struct{}
87	// gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed.
88	gotOffOffsetsNeeded map[string]struct{}
89
90	currentInput inputFile
91}
92
93func (d *delocation) contents(node *node32) string {
94	return d.currentInput.contents[node.begin:node.end]
95}
96
97// writeNode writes out an AST node.
98func (d *delocation) writeNode(node *node32) {
99	if _, err := d.output.WriteString(d.contents(node)); err != nil {
100		panic(err)
101	}
102}
103
104func (d *delocation) writeCommentedNode(node *node32) {
105	line := d.contents(node)
106	if _, err := d.output.WriteString(d.commentIndicator + " WAS " + strings.TrimSpace(line) + "\n"); err != nil {
107		panic(err)
108	}
109}
110
111func locateError(err error, with *node32, in inputFile) error {
112	posMap := translatePositions([]rune(in.contents), []int{int(with.begin)})
113	var line int
114	for _, pos := range posMap {
115		line = pos.line
116	}
117
118	return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err)
119}
120
121func (d *delocation) processInput(input inputFile) (err error) {
122	d.currentInput = input
123
124	var origStatement *node32
125	defer func() {
126		if err := recover(); err != nil {
127			panic(locateError(fmt.Errorf("%s", err), origStatement, input))
128		}
129	}()
130
131	for statement := input.ast.up; statement != nil; statement = statement.next {
132		assertNodeType(statement, ruleStatement)
133		origStatement = statement
134
135		node := skipWS(statement.up)
136		if node == nil {
137			d.writeNode(statement)
138			continue
139		}
140
141		switch node.pegRule {
142		case ruleGlobalDirective, ruleComment, ruleLocationDirective:
143			d.writeNode(statement)
144		case ruleDirective:
145			statement, err = d.processDirective(statement, node.up)
146		case ruleLabelContainingDirective:
147			statement, err = d.processLabelContainingDirective(statement, node.up)
148		case ruleLabel:
149			statement, err = d.processLabel(statement, node.up)
150		case ruleInstruction:
151			switch d.processor {
152			case x86_64:
153				statement, err = d.processIntelInstruction(statement, node.up)
154			case aarch64:
155				statement, err = d.processAarch64Instruction(statement, node.up)
156			default:
157				panic("unknown processor")
158			}
159		default:
160			panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule]))
161		}
162
163		if err != nil {
164			return locateError(err, origStatement, input)
165		}
166	}
167
168	return nil
169}
170
171func (d *delocation) processDirective(statement, directive *node32) (*node32, error) {
172	assertNodeType(directive, ruleDirectiveName)
173	directiveName := d.contents(directive)
174
175	var args []string
176	forEachPath(directive, func(arg *node32) {
177		// If the argument is a quoted string, use the raw contents.
178		// (Note that this doesn't unescape the string, but that's not
179		// needed so far.
180		if arg.up != nil {
181			arg = arg.up
182			assertNodeType(arg, ruleQuotedArg)
183			if arg.up == nil {
184				args = append(args, "")
185				return
186			}
187			arg = arg.up
188			assertNodeType(arg, ruleQuotedText)
189		}
190		args = append(args, d.contents(arg))
191	}, ruleArgs, ruleArg)
192
193	switch directiveName {
194	case "comm", "lcomm":
195		if len(args) < 1 {
196			return nil, errors.New("comm directive has no arguments")
197		}
198		d.bssAccessorsNeeded[args[0]] = args[0]
199		d.writeNode(statement)
200
201	case "data":
202		// ASAN and some versions of MSAN are adding a .data section,
203		// and adding references to symbols within it to the code. We
204		// will have to work around this in the future.
205		return nil, errors.New(".data section found in module")
206
207	case "section":
208		section := args[0]
209
210		if section == ".data.rel.ro" {
211			// In a normal build, this is an indication of a
212			// problem but any references from the module to this
213			// section will result in a relocation and thus will
214			// break the integrity check. ASAN can generate these
215			// sections and so we will likely have to work around
216			// that in the future.
217			return nil, errors.New(".data.rel.ro section found in module")
218		}
219
220		sectionType, ok := sectionType(section)
221		if !ok {
222			// Unknown sections are permitted in order to be robust
223			// to different compiler modes.
224			d.writeNode(statement)
225			break
226		}
227
228		switch sectionType {
229		case ".rodata", ".text":
230			// Move .rodata to .text so it may be accessed without
231			// a relocation. GCC with -fmerge-constants will place
232			// strings into separate sections, so we move all
233			// sections named like .rodata. Also move .text.startup
234			// so the self-test function is also in the module.
235			d.writeCommentedNode(statement)
236			d.output.WriteString(".text\n")
237
238		case ".data":
239			// See above about .data
240			return nil, errors.New(".data section found in module")
241
242		case ".init_array", ".fini_array", ".ctors", ".dtors":
243			// init_array/ctors/dtors contains function
244			// pointers to constructor/destructor
245			// functions. These contain relocations, but
246			// they're in a different section anyway.
247			d.writeNode(statement)
248			break
249
250		case ".debug", ".note":
251			d.writeNode(statement)
252			break
253
254		case ".bss":
255			d.writeNode(statement)
256			return d.handleBSS(statement)
257		}
258
259	default:
260		d.writeNode(statement)
261	}
262
263	return statement, nil
264}
265
266func (d *delocation) processSymbolExpr(expr *node32, b *strings.Builder) bool {
267	changed := false
268	assertNodeType(expr, ruleSymbolExpr)
269
270	for expr != nil {
271		atom := expr.up
272		assertNodeType(atom, ruleSymbolAtom)
273
274		for term := atom.up; term != nil; term = skipWS(term.next) {
275			if term.pegRule == ruleSymbolExpr {
276				changed = d.processSymbolExpr(term, b) || changed
277				continue
278			}
279
280			if term.pegRule != ruleLocalSymbol {
281				b.WriteString(d.contents(term))
282				continue
283			}
284
285			oldSymbol := d.contents(term)
286			newSymbol := d.mapLocalSymbol(oldSymbol)
287			if newSymbol != oldSymbol {
288				changed = true
289			}
290
291			b.WriteString(newSymbol)
292		}
293
294		next := skipWS(atom.next)
295		if next == nil {
296			break
297		}
298		assertNodeType(next, ruleSymbolOperator)
299		b.WriteString(d.contents(next))
300		next = skipWS(next.next)
301		assertNodeType(next, ruleSymbolExpr)
302		expr = next
303	}
304	return changed
305}
306
307func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) {
308	// The symbols within directives need to be mapped so that local
309	// symbols in two different .s inputs don't collide.
310	changed := false
311	assertNodeType(directive, ruleLabelContainingDirectiveName)
312	name := d.contents(directive)
313
314	node := directive.next
315	assertNodeType(node, ruleWS)
316
317	node = node.next
318	assertNodeType(node, ruleSymbolArgs)
319
320	var args []string
321	for node = skipWS(node.up); node != nil; node = skipWS(node.next) {
322		assertNodeType(node, ruleSymbolArg)
323		arg := node.up
324		assertNodeType(arg, ruleSymbolExpr)
325
326		var b strings.Builder
327		changed = d.processSymbolExpr(arg, &b) || changed
328
329		args = append(args, b.String())
330	}
331
332	if !changed {
333		d.writeNode(statement)
334	} else {
335		d.writeCommentedNode(statement)
336		d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n")
337	}
338
339	return statement, nil
340}
341
342func (d *delocation) processLabel(statement, label *node32) (*node32, error) {
343	symbol := d.contents(label)
344
345	switch label.pegRule {
346	case ruleLocalLabel:
347		d.output.WriteString(symbol + ":\n")
348	case ruleLocalSymbol:
349		// symbols need to be mapped so that local symbols from two
350		// different .s inputs don't collide.
351		d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n")
352	case ruleSymbolName:
353		d.output.WriteString(localTargetName(symbol) + ":\n")
354		d.writeNode(statement)
355	default:
356		return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule])
357	}
358
359	return statement, nil
360}
361
362// instructionArgs collects all the arguments to an instruction.
363func instructionArgs(node *node32) (argNodes []*node32) {
364	for node = skipWS(node); node != nil; node = skipWS(node.next) {
365		assertNodeType(node, ruleInstructionArg)
366		argNodes = append(argNodes, node.up)
367	}
368
369	return argNodes
370}
371
372// Aarch64 support
373
374// gotHelperName returns the name of a synthesised function that returns an
375// address from the GOT.
376func gotHelperName(symbol string) string {
377	return ".Lboringssl_loadgot_" + symbol
378}
379
380// loadAarch64Address emits instructions to put the address of |symbol|
381// (optionally adjusted by |offsetStr|) into |targetReg|.
382func (d *delocation) loadAarch64Address(statement *node32, targetReg string, symbol string, offsetStr string) (*node32, error) {
383	// There are two paths here: either the symbol is known to be local in which
384	// case adr is used to get the address (within 1MiB), or a GOT reference is
385	// really needed in which case the code needs to jump to a helper function.
386	//
387	// A helper function is needed because using code appears to be the only way
388	// to load a GOT value. On other platforms we have ".quad foo@GOT" outside of
389	// the module, but on Aarch64 that results in a "COPY" relocation and linker
390	// comments suggest it's a weird hack. So, for each GOT symbol needed, we emit
391	// a function outside of the module that returns the address from the GOT in
392	// x0.
393
394	d.writeCommentedNode(statement)
395
396	_, isKnown := d.symbols[symbol]
397	isLocal := strings.HasPrefix(symbol, ".L")
398	if isKnown || isLocal || isSynthesized(symbol) {
399		if isLocal {
400			symbol = d.mapLocalSymbol(symbol)
401		} else if isKnown {
402			symbol = localTargetName(symbol)
403		}
404
405		d.output.WriteString("\tadr " + targetReg + ", " + symbol + offsetStr + "\n")
406
407		return statement, nil
408	}
409
410	if len(offsetStr) != 0 {
411		panic("non-zero offset for helper-based reference")
412	}
413
414	var helperFunc string
415	if symbol == "OPENSSL_armcap_P" {
416		helperFunc = ".LOPENSSL_armcap_P_addr"
417	} else {
418		// GOT helpers also dereference the GOT entry, thus the subsequent ldr
419		// instruction, which would normally do the dereferencing, needs to be
420		// dropped. GOT helpers have to include the dereference because the
421		// assembler doesn't support ":got_lo12:foo" offsets except in an ldr
422		// instruction.
423		d.gotExternalsNeeded[symbol] = struct{}{}
424		helperFunc = gotHelperName(symbol)
425	}
426
427	// Clear the red-zone. I can't find a definitive answer about whether Linux
428	// Aarch64 includes a red-zone, but Microsoft has a 16-byte one and Apple a
429	// 128-byte one. Thus conservatively clear a 128-byte red-zone.
430	d.output.WriteString("\tsub sp, sp, 128\n")
431
432	// Save x0 (which will be stomped by the return value) and the link register
433	// to the stack. Then save the program counter into the link register and
434	// jump to the helper function.
435	d.output.WriteString("\tstp x0, lr, [sp, #-16]!\n")
436	d.output.WriteString("\tbl " + helperFunc + "\n")
437
438	if targetReg == "x0" {
439		// If the target happens to be x0 then restore the link register from the
440		// stack and send the saved value of x0 to the zero register.
441		d.output.WriteString("\tldp xzr, lr, [sp], #16\n")
442	} else {
443		// Otherwise move the result into place and restore registers.
444		d.output.WriteString("\tmov " + targetReg + ", x0\n")
445		d.output.WriteString("\tldp x0, lr, [sp], #16\n")
446	}
447
448	// Revert the red-zone adjustment.
449	d.output.WriteString("\tadd sp, sp, 128\n")
450
451	return statement, nil
452}
453
454func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) {
455	assertNodeType(instruction, ruleInstructionName)
456	instructionName := d.contents(instruction)
457
458	argNodes := instructionArgs(instruction.next)
459
460	switch instructionName {
461	case "ccmn", "ccmp", "cinc", "cinv", "cneg", "csel", "cset", "csetm", "csinc", "csinv", "csneg":
462		// These functions are special because they take a condition-code name as
463		// an argument and that looks like a symbol reference.
464		d.writeNode(statement)
465		return statement, nil
466
467	case "mrs":
468		// Functions that take special register names also look like a symbol
469		// reference to the parser.
470		d.writeNode(statement)
471		return statement, nil
472
473	case "adrp":
474		// adrp always generates a relocation, even when the target symbol is in the
475		// same segment, because the page-offset of the code isn't known until link
476		// time. Thus adrp instructions are turned into either adr instructions
477		// (limiting the module to 1MiB offsets) or calls to helper functions, both of
478		// which load the full address. Later instructions, which add the low 12 bits
479		// of offset, are tweaked to remove the offset since it's already included.
480		// Loads of GOT symbols are slightly more complex because it's not possible to
481		// avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr
482		// instruction, which would normally do the dereferencing, is dropped
483		// completely. (Or turned into a mov if it targets a different register.)
484		assertNodeType(argNodes[0], ruleRegisterOrConstant)
485		targetReg := d.contents(argNodes[0])
486		if !strings.HasPrefix(targetReg, "x") {
487			panic("adrp targetting register " + targetReg + ", which has the wrong size")
488		}
489
490		var symbol, offset string
491		switch argNodes[1].pegRule {
492		case ruleGOTSymbolOffset:
493			symbol = d.contents(argNodes[1].up)
494		case ruleMemoryRef:
495			assertNodeType(argNodes[1].up, ruleSymbolRef)
496			node, empty := d.gatherOffsets(argNodes[1].up.up, "")
497			if len(empty) != 0 {
498				panic("prefix offsets found for adrp")
499			}
500			symbol = d.contents(node)
501			_, offset = d.gatherOffsets(node.next, "")
502		default:
503			panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule])
504		}
505
506		return d.loadAarch64Address(statement, targetReg, symbol, offset)
507	}
508
509	var args []string
510	changed := false
511
512	for _, arg := range argNodes {
513		fullArg := arg
514
515		switch arg.pegRule {
516		case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak:
517			args = append(args, d.contents(fullArg))
518
519		case ruleGOTSymbolOffset:
520			// These should only be arguments to adrp and thus unreachable.
521			panic("unreachable")
522
523		case ruleMemoryRef:
524			ref := arg.up
525
526			switch ref.pegRule {
527			case ruleSymbolRef:
528				// This is a branch. Either the target needs to be written to a local
529				// version of the symbol to ensure that no relocations are emitted, or
530				// it needs to jump to a redirector function.
531				symbol, offset, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up)
532				changed = didChange
533
534				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
535					symbol = localTargetName(symbol)
536					changed = true
537				} else if !symbolIsLocal && !isSynthesized(symbol) {
538					redirector := redirectorName(symbol)
539					d.redirectors[symbol] = redirector
540					symbol = redirector
541					changed = true
542				} else if didChange && symbolIsLocal && len(offset) > 0 {
543					// didChange is set when the inputFile index is not 0; which is the index of the
544					// first file copied to the output, which is the generated assembly of bcm.c.
545					// In subsequently copied assembly files, local symbols are changed by appending (BCM_ + index)
546					// in order to ensure they don't collide. `index` gets incremented per file.
547					// If there is offset after the symbol, append the `offset`.
548					symbol = symbol + offset
549				}
550
551				args = append(args, symbol)
552
553			case ruleARMBaseIndexScale:
554				parts := ref.up
555				assertNodeType(parts, ruleARMRegister)
556				baseAddrReg := d.contents(parts)
557				parts = skipWS(parts.next)
558
559				// Only two forms need special handling. First there's memory references
560				// like "[x*, :got_lo12:foo]". The base register here will have been the
561				// target of an adrp instruction to load the page address, but the adrp
562				// will have turned into loading the full address *and dereferencing it*,
563				// above. Thus this instruction needs to be dropped otherwise we'll be
564				// dereferencing twice.
565				//
566				// Second there are forms like "[x*, :lo12:foo]" where the code has used
567				// adrp to load the page address into x*. That adrp will have been turned
568				// into loading the full address so just the offset needs to be dropped.
569
570				if parts != nil {
571					if parts.pegRule == ruleARMGOTLow12 {
572						if instructionName != "ldr" {
573							panic("Symbol reference outside of ldr instruction")
574						}
575
576						if skipWS(parts.next) != nil || parts.up.next != nil {
577							panic("can't handle tweak or post-increment with symbol references")
578						}
579
580						// The GOT helper already dereferenced the entry so, at most, just a mov
581						// is needed to put things in the right register.
582						d.writeCommentedNode(statement)
583						if baseAddrReg != args[0] {
584							d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n")
585						}
586						return statement, nil
587					} else if parts.pegRule == ruleLow12BitsSymbolRef {
588						if instructionName != "ldr" {
589							panic("Symbol reference outside of ldr instruction")
590						}
591
592						if skipWS(parts.next) != nil || parts.up.next != nil {
593							panic("can't handle tweak or post-increment with symbol references")
594						}
595
596						// Suppress the offset; adrp loaded the full address.
597						args = append(args, "["+baseAddrReg+"]")
598						changed = true
599						continue
600					}
601				}
602
603				args = append(args, d.contents(fullArg))
604
605			case ruleLow12BitsSymbolRef:
606				// These are the second instruction in a pair:
607				//   adrp x0, symbol           // Load the page address into x0
608				//   add x1, x0, :lo12:symbol  // Adds the page offset.
609				//
610				// The adrp instruction will have been turned into a sequence that loads
611				// the full address, above, thus the offset is turned into zero. If that
612				// results in the instruction being a nop, then it is deleted.
613				if instructionName != "add" {
614					panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName))
615				}
616
617				if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") {
618					panic("address arithmetic with incorrectly sized register")
619				}
620
621				if args[0] == args[1] {
622					d.writeCommentedNode(statement)
623					return statement, nil
624				}
625
626				args = append(args, "#0")
627				changed = true
628
629			default:
630				panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule]))
631			}
632
633		default:
634			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
635		}
636	}
637
638	if changed {
639		d.writeCommentedNode(statement)
640		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
641		d.output.WriteString(replacement)
642	} else {
643		d.writeNode(statement)
644	}
645
646	return statement, nil
647}
648
649func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) {
650	for symRef != nil && symRef.pegRule == ruleOffset {
651		offset := d.contents(symRef)
652		if offset[0] != '+' && offset[0] != '-' {
653			offset = "+" + offset
654		}
655		offsets = offsets + offset
656		symRef = symRef.next
657	}
658	return symRef, offsets
659}
660
661func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) {
662	if memRef.pegRule != ruleSymbolRef {
663		return "", "", "", false, false, memRef
664	}
665
666	symRef := memRef.up
667	nextRef = memRef.next
668
669	// (Offset* '+')?
670	symRef, offset = d.gatherOffsets(symRef, offset)
671
672	// (LocalSymbol / SymbolName)
673	symbol = d.contents(symRef)
674	if symRef.pegRule == ruleLocalSymbol {
675		symbolIsLocal = true
676		mapped := d.mapLocalSymbol(symbol)
677		if mapped != symbol {
678			symbol = mapped
679			didChange = true
680		}
681	}
682	symRef = symRef.next
683
684	// Offset*
685	symRef, offset = d.gatherOffsets(symRef, offset)
686
687	// ('@' Section / Offset*)?
688	if symRef != nil {
689		assertNodeType(symRef, ruleSection)
690		section = d.contents(symRef)
691		symRef = symRef.next
692
693		symRef, offset = d.gatherOffsets(symRef, offset)
694	}
695
696	if symRef != nil {
697		panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule]))
698	}
699
700	return
701}
702
703/* Intel */
704
705type instructionType int
706
707const (
708	instrPush instructionType = iota
709	instrMove
710	// instrTransformingMove is essentially a move, but it performs some
711	// transformation of the data during the process.
712	instrTransformingMove
713	instrJump
714	instrConditionalMove
715	// instrCombine merges the source and destination in some fashion, for example
716	// a 2-operand bitwise operation.
717	instrCombine
718	// instrMemoryVectorCombine is similer to instrCombine, but the source
719	// register must be a memory reference and the destination register
720	// must be a vector register.
721	instrMemoryVectorCombine
722	// instrThreeArg merges two sources into a destination in some fashion.
723	instrThreeArg
724	// instrCompare takes two arguments and writes outputs to the flags register.
725	instrCompare
726	instrOther
727)
728
729func classifyInstruction(instr string, args []*node32) instructionType {
730	switch instr {
731	case "push", "pushq":
732		if len(args) == 1 {
733			return instrPush
734		}
735
736	case "mov", "movq", "vmovq", "movsd", "vmovsd":
737		if len(args) == 2 {
738			return instrMove
739		}
740
741	case "cmovneq", "cmoveq":
742		if len(args) == 2 {
743			return instrConditionalMove
744		}
745
746	case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo":
747		if len(args) == 1 {
748			return instrJump
749		}
750
751	case "orq", "andq", "xorq":
752		if len(args) == 2 {
753			return instrCombine
754		}
755
756	case "cmpq":
757		if len(args) == 2 {
758			return instrCompare
759		}
760
761	case "sarxq", "shlxq", "shrxq":
762		if len(args) == 3 {
763			return instrThreeArg
764		}
765
766	case "vpbroadcastq":
767		if len(args) == 2 {
768			return instrTransformingMove
769		}
770
771	case "movlps", "movhps":
772		if len(args) == 2 {
773			return instrMemoryVectorCombine
774		}
775	}
776
777	return instrOther
778}
779
780func push(w stringWriter) wrapperFunc {
781	return func(k func()) {
782		w.WriteString("\tpushq %rax\n")
783		k()
784		w.WriteString("\txchg %rax, (%rsp)\n")
785	}
786}
787
788func compare(w stringWriter, instr, a, b string) wrapperFunc {
789	return func(k func()) {
790		k()
791		w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b))
792	}
793}
794
795func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc {
796	d.gotExternalsNeeded[symbol+"@"+section] = struct{}{}
797
798	return func(k func()) {
799		if !redzoneCleared {
800			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
801		}
802		w.WriteString("\tpushf\n")
803		w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination))
804		w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination))
805		w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination))
806		w.WriteString("\tpopf\n")
807		if !redzoneCleared {
808			w.WriteString("\tleaq\t128(%rsp), %rsp\n")
809		}
810	}
811}
812
813func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc {
814	return func(k func()) {
815		if !redzoneCleared {
816			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
817			defer w.WriteString("\tleaq 128(%rsp), %rsp\n")
818		}
819		w.WriteString("\tpushfq\n")
820		k()
821		w.WriteString("\tpopfq\n")
822	}
823}
824
825func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) {
826	candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"}
827
828	var reg string
829NextCandidate:
830	for _, candidate := range candidates {
831		for _, avoid := range avoidRegs {
832			if candidate == avoid {
833				continue NextCandidate
834			}
835		}
836
837		reg = candidate
838		break
839	}
840
841	if len(reg) == 0 {
842		panic("too many excluded registers")
843	}
844
845	return func(k func()) {
846		w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
847		w.WriteString("\tpushq " + reg + "\n")
848		k()
849		w.WriteString("\tpopq " + reg + "\n")
850		w.WriteString("\tleaq 128(%rsp), %rsp\n")
851	}, reg
852}
853
854func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc {
855	return func(k func()) {
856		k()
857		prefix := ""
858		if isAVX {
859			prefix = "v"
860		}
861		w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n")
862	}
863}
864
865func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc {
866	return func(k func()) {
867		k()
868		w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n")
869	}
870}
871
872func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
873	return func(k func()) {
874		k()
875		w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n")
876	}
877}
878
879func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc {
880	return func(k func()) {
881		k()
882		w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n")
883	}
884}
885
886func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
887	return func(k func()) {
888		k()
889		// These instructions can only read from memory, so push
890		// tempReg and read from the stack. Note we assume the red zone
891		// was previously cleared by saveRegister().
892		w.WriteString("\tpushq " + source + "\n")
893		w.WriteString("\t" + instructionName + " (%rsp), " + dest + "\n")
894		w.WriteString("\tleaq 8(%rsp), %rsp\n")
895	}
896}
897
898func isValidLEATarget(reg string) bool {
899	return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm")
900}
901
902func undoConditionalMove(w stringWriter, instr string) wrapperFunc {
903	var invertedCondition string
904
905	switch instr {
906	case "cmoveq":
907		invertedCondition = "ne"
908	case "cmovneq":
909		invertedCondition = "e"
910	default:
911		panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr))
912	}
913
914	return func(k func()) {
915		w.WriteString("\tj" + invertedCondition + " 999f\n")
916		k()
917		w.WriteString("999:\n")
918	}
919}
920
921func (d *delocation) isRIPRelative(node *node32) bool {
922	return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)"
923}
924
925func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) {
926	assertNodeType(instruction, ruleInstructionName)
927	instructionName := d.contents(instruction)
928
929	argNodes := instructionArgs(instruction.next)
930
931	var wrappers wrapperStack
932	var args []string
933	changed := false
934
935Args:
936	for i, arg := range argNodes {
937		fullArg := arg
938		isIndirect := false
939
940		if arg.pegRule == ruleIndirectionIndicator {
941			arg = arg.next
942			isIndirect = true
943		}
944
945		switch arg.pegRule {
946		case ruleRegisterOrConstant, ruleLocalLabelRef:
947			args = append(args, d.contents(fullArg))
948
949		case ruleMemoryRef:
950			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
951			changed = didChange
952
953			if symbol == "OPENSSL_ia32cap_P" && section == "" {
954				if instructionName != "leaq" {
955					return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName)
956				}
957
958				if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 {
959					return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName)
960				}
961
962				target := argNodes[1]
963				assertNodeType(target, ruleRegisterOrConstant)
964				reg := d.contents(target)
965
966				if !strings.HasPrefix(reg, "%r") {
967					return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg)
968				}
969
970				changed = true
971
972				// Flag-altering instructions (i.e. addq) are going to be used so the
973				// flags need to be preserved.
974				wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */))
975
976				wrappers = append(wrappers, func(k func()) {
977					d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n")
978					d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n")
979				})
980
981				break Args
982			}
983
984			switch section {
985			case "":
986				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
987					symbol = localTargetName(symbol)
988					changed = true
989				}
990
991			case "PLT":
992				if classifyInstruction(instructionName, argNodes) != instrJump {
993					return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName)
994				}
995
996				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
997					symbol = localTargetName(symbol)
998					changed = true
999				} else if !symbolIsLocal && !isSynthesized(symbol) {
1000					// Unknown symbol via PLT is an
1001					// out-call from the module, e.g.
1002					// memcpy.
1003					d.redirectors[symbol+"@"+section] = redirectorName(symbol)
1004					symbol = redirectorName(symbol)
1005				}
1006
1007				changed = true
1008
1009			case "GOTPCREL":
1010				if len(offset) > 0 {
1011					return nil, errors.New("loading from GOT with offset is unsupported")
1012				}
1013				if !d.isRIPRelative(memRef) {
1014					return nil, errors.New("GOT access must be IP-relative")
1015				}
1016
1017				useGOT := false
1018				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1019					symbol = localTargetName(symbol)
1020					changed = true
1021				} else if !isSynthesized(symbol) {
1022					useGOT = true
1023				}
1024
1025				classification := classifyInstruction(instructionName, argNodes)
1026				if classification != instrThreeArg && classification != instrCompare && i != 0 {
1027					return nil, errors.New("GOT access must be source operand")
1028				}
1029
1030				// Reduce the instruction to movq symbol@GOTPCREL, targetReg.
1031				var targetReg string
1032				var redzoneCleared bool
1033				switch classification {
1034				case instrPush:
1035					wrappers = append(wrappers, push(d.output))
1036					targetReg = "%rax"
1037				case instrConditionalMove:
1038					wrappers = append(wrappers, undoConditionalMove(d.output, instructionName))
1039					fallthrough
1040				case instrMove:
1041					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1042					targetReg = d.contents(argNodes[1])
1043				case instrCompare:
1044					otherSource := d.contents(argNodes[i^1])
1045					saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource})
1046					redzoneCleared = true
1047					wrappers = append(wrappers, saveRegWrapper)
1048					if i == 0 {
1049						wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource))
1050					} else {
1051						wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg))
1052					}
1053					targetReg = tempReg
1054				case instrTransformingMove:
1055					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1056					targetReg = d.contents(argNodes[1])
1057					wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg))
1058					if isValidLEATarget(targetReg) {
1059						return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.")
1060					}
1061				case instrCombine:
1062					targetReg = d.contents(argNodes[1])
1063					if !isValidLEATarget(targetReg) {
1064						return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers")
1065					}
1066					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg})
1067					redzoneCleared = true
1068					wrappers = append(wrappers, saveRegWrapper)
1069
1070					wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg))
1071					targetReg = tempReg
1072				case instrMemoryVectorCombine:
1073					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1074					targetReg = d.contents(argNodes[1])
1075					if isValidLEATarget(targetReg) {
1076						return nil, errors.New("target register must be an XMM register")
1077					}
1078					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1079					wrappers = append(wrappers, saveRegWrapper)
1080					redzoneCleared = true
1081					wrappers = append(wrappers, memoryVectorCombineOp(d.output, instructionName, tempReg, targetReg))
1082					targetReg = tempReg
1083				case instrThreeArg:
1084					if n := len(argNodes); n != 3 {
1085						return nil, fmt.Errorf("three-argument instruction has %d arguments", n)
1086					}
1087					if i != 0 && i != 1 {
1088						return nil, errors.New("GOT access must be from source operand")
1089					}
1090					targetReg = d.contents(argNodes[2])
1091
1092					otherSource := d.contents(argNodes[1])
1093					if i == 1 {
1094						otherSource = d.contents(argNodes[0])
1095					}
1096
1097					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource})
1098					redzoneCleared = true
1099					wrappers = append(wrappers, saveRegWrapper)
1100
1101					if i == 0 {
1102						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg))
1103					} else {
1104						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg))
1105					}
1106					targetReg = tempReg
1107				default:
1108					return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName)
1109				}
1110
1111				if !isValidLEATarget(targetReg) {
1112					// Sometimes the compiler will load from the GOT to an
1113					// XMM register, which is not a valid target of an LEA
1114					// instruction.
1115					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1116					wrappers = append(wrappers, saveRegWrapper)
1117					isAVX := strings.HasPrefix(instructionName, "v")
1118					wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg))
1119					targetReg = tempReg
1120					if redzoneCleared {
1121						return nil, fmt.Errorf("internal error: Red Zone was already cleared")
1122					}
1123					redzoneCleared = true
1124				}
1125
1126				if symbol == "OPENSSL_ia32cap_P" {
1127					// Flag-altering instructions (i.e. addq) are going to be used so the
1128					// flags need to be preserved.
1129					wrappers = append(wrappers, saveFlags(d.output, redzoneCleared))
1130					wrappers = append(wrappers, func(k func()) {
1131						d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n")
1132						d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n")
1133					})
1134				} else if useGOT {
1135					wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared))
1136				} else {
1137					wrappers = append(wrappers, func(k func()) {
1138						d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg))
1139					})
1140				}
1141				changed = true
1142				break Args
1143
1144			default:
1145				return nil, fmt.Errorf("Unknown section type %q", section)
1146			}
1147
1148			if !changed && len(section) > 0 {
1149				panic("section was not handled")
1150			}
1151			section = ""
1152
1153			argStr := ""
1154			if isIndirect {
1155				argStr += "*"
1156			}
1157			argStr += symbol
1158			argStr += offset
1159
1160			for ; memRef != nil; memRef = memRef.next {
1161				argStr += d.contents(memRef)
1162			}
1163
1164			args = append(args, argStr)
1165
1166		case ruleGOTAddress:
1167			if instructionName != "leaq" {
1168				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ used outside of lea")
1169			}
1170			if i != 0 || len(argNodes) != 2 {
1171				return nil, fmt.Errorf("Load of _GLOBAL_OFFSET_TABLE_ address didn't have expected form")
1172			}
1173			d.gotDeltaNeeded = true
1174			changed = true
1175			targetReg := d.contents(argNodes[1])
1176			args = append(args, ".Lboringssl_got_delta(%rip)")
1177			wrappers = append(wrappers, func(k func()) {
1178				k()
1179				d.output.WriteString(fmt.Sprintf("\taddq .Lboringssl_got_delta(%%rip), %s\n", targetReg))
1180			})
1181
1182		case ruleGOTLocation:
1183			if instructionName != "movabsq" {
1184				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq")
1185			}
1186			if i != 0 || len(argNodes) != 2 {
1187				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form")
1188			}
1189
1190			d.gotDeltaNeeded = true
1191			changed = true
1192			instructionName = "movq"
1193			assertNodeType(arg.up, ruleLocalSymbol)
1194			baseSymbol := d.mapLocalSymbol(d.contents(arg.up))
1195			targetReg := d.contents(argNodes[1])
1196			args = append(args, ".Lboringssl_got_delta(%rip)")
1197			wrappers = append(wrappers, func(k func()) {
1198				k()
1199				d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg))
1200			})
1201
1202		case ruleGOTSymbolOffset:
1203			if instructionName != "movabsq" {
1204				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq")
1205			}
1206			if i != 0 || len(argNodes) != 2 {
1207				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form")
1208			}
1209
1210			assertNodeType(arg.up, ruleSymbolName)
1211			symbol := d.contents(arg.up)
1212			if strings.HasPrefix(symbol, ".L") {
1213				symbol = d.mapLocalSymbol(symbol)
1214			}
1215			targetReg := d.contents(argNodes[1])
1216
1217			var prefix string
1218			isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF")
1219			if isGOTOFF {
1220				prefix = "gotoff"
1221				d.gotOffOffsetsNeeded[symbol] = struct{}{}
1222			} else {
1223				prefix = "got"
1224				d.gotOffsetsNeeded[symbol] = struct{}{}
1225			}
1226			changed = true
1227
1228			wrappers = append(wrappers, func(k func()) {
1229				// Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time
1230				// of writing) emits 64-bit relocations anyway, so the following four bytes
1231				// get stomped. Thus we use 64-bit offsets.
1232				d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg))
1233			})
1234
1235		default:
1236			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
1237		}
1238	}
1239
1240	if changed {
1241		d.writeCommentedNode(statement)
1242		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
1243		wrappers.do(func() {
1244			d.output.WriteString(replacement)
1245		})
1246	} else {
1247		d.writeNode(statement)
1248	}
1249
1250	return statement, nil
1251}
1252
1253func (d *delocation) handleBSS(statement *node32) (*node32, error) {
1254	lastStatement := statement
1255	for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next {
1256		node := skipWS(statement.up)
1257		if node == nil {
1258			d.writeNode(statement)
1259			continue
1260		}
1261
1262		switch node.pegRule {
1263		case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective:
1264			d.writeNode(statement)
1265
1266		case ruleDirective:
1267			directive := node.up
1268			assertNodeType(directive, ruleDirectiveName)
1269			directiveName := d.contents(directive)
1270			if directiveName == "text" || directiveName == "section" || directiveName == "data" {
1271				return lastStatement, nil
1272			}
1273			d.writeNode(statement)
1274
1275		case ruleLabel:
1276			label := node.up
1277			d.writeNode(statement)
1278
1279			if label.pegRule != ruleLocalSymbol {
1280				symbol := d.contents(label)
1281				localSymbol := localTargetName(symbol)
1282				d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol))
1283
1284				d.bssAccessorsNeeded[symbol] = localSymbol
1285			}
1286
1287		case ruleLabelContainingDirective:
1288			var err error
1289			statement, err = d.processLabelContainingDirective(statement, node.up)
1290			if err != nil {
1291				return nil, err
1292			}
1293
1294		default:
1295			return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement))
1296		}
1297	}
1298
1299	return lastStatement, nil
1300}
1301
1302func writeAarch64Function(w stringWriter, funcName string, writeContents func(stringWriter)) {
1303	w.WriteString(".p2align 2\n")
1304	w.WriteString(".hidden " + funcName + "\n")
1305	w.WriteString(".type " + funcName + ", @function\n")
1306	w.WriteString(funcName + ":\n")
1307	w.WriteString(".cfi_startproc\n")
1308	// We insert a landing pad (`bti c` instruction) unconditionally at the beginning of
1309	// every generated function so that they can be called indirectly (with `blr` or
1310	// `br x16/x17`). The instruction is encoded in the HINT space as `hint #34` and is
1311	// a no-op on machines or program states not supporting BTI (Branch Target Identification).
1312	// None of the generated function bodies call other functions (with bl or blr), so we only
1313	// insert a landing pad instead of signing and validating $lr with `paciasp` and `autiasp`.
1314	// Normally we would also generate a .note.gnu.property section to annotate the assembly
1315	// file as BTI-compatible, but if the input assembly files are BTI-compatible, they should
1316	// already have those sections so there is no need to add an extra one ourselves.
1317	w.WriteString("\thint #34 // bti c\n")
1318	writeContents(w)
1319	w.WriteString(".cfi_endproc\n")
1320	w.WriteString(".size " + funcName + ", .-" + funcName + "\n")
1321}
1322
1323func transform(w stringWriter, inputs []inputFile) error {
1324	// symbols contains all defined symbols.
1325	symbols := make(map[string]struct{})
1326	// fileNumbers is the set of IDs seen in .file directives.
1327	fileNumbers := make(map[int]struct{})
1328	// maxObservedFileNumber contains the largest seen file number in a
1329	// .file directive. Zero is not a valid number.
1330	maxObservedFileNumber := 0
1331	// fileDirectivesContainMD5 is true if the compiler is outputting MD5
1332	// checksums in .file directives. If it does so, then this script needs
1333	// to match that behaviour otherwise warnings result.
1334	fileDirectivesContainMD5 := false
1335
1336	// OPENSSL_ia32cap_get will be synthesized by this script.
1337	symbols["OPENSSL_ia32cap_get"] = struct{}{}
1338
1339	for _, input := range inputs {
1340		forEachPath(input.ast.up, func(node *node32) {
1341			symbol := input.contents[node.begin:node.end]
1342			if _, ok := symbols[symbol]; ok {
1343				panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path))
1344			}
1345			symbols[symbol] = struct{}{}
1346		}, ruleStatement, ruleLabel, ruleSymbolName)
1347
1348		forEachPath(input.ast.up, func(node *node32) {
1349			assertNodeType(node, ruleLocationDirective)
1350			directive := input.contents[node.begin:node.end]
1351			if !strings.HasPrefix(directive, ".file") {
1352				return
1353			}
1354			parts := strings.Fields(directive)
1355			if len(parts) == 2 {
1356				// This is a .file directive with just a
1357				// filename. Clang appears to generate just one
1358				// of these at the beginning of the output for
1359				// the compilation unit. Ignore it.
1360				return
1361			}
1362			fileNo, err := strconv.Atoi(parts[1])
1363			if err != nil {
1364				panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive))
1365			}
1366
1367			if _, ok := fileNumbers[fileNo]; ok {
1368				panic(fmt.Sprintf("Duplicate file number %d observed", fileNo))
1369			}
1370			fileNumbers[fileNo] = struct{}{}
1371
1372			if fileNo > maxObservedFileNumber {
1373				maxObservedFileNumber = fileNo
1374			}
1375
1376			for _, token := range parts[2:] {
1377				if token == "md5" {
1378					fileDirectivesContainMD5 = true
1379				}
1380			}
1381		}, ruleStatement, ruleLocationDirective)
1382	}
1383
1384	processor := x86_64
1385	if len(inputs) > 0 {
1386		processor = detectProcessor(inputs[0])
1387	}
1388
1389	commentIndicator := "#"
1390	if processor == aarch64 {
1391		commentIndicator = "//"
1392	}
1393
1394	d := &delocation{
1395		symbols:             symbols,
1396		processor:           processor,
1397		commentIndicator:    commentIndicator,
1398		output:              w,
1399		redirectors:         make(map[string]string),
1400		bssAccessorsNeeded:  make(map[string]string),
1401		gotExternalsNeeded:  make(map[string]struct{}),
1402		gotOffsetsNeeded:    make(map[string]struct{}),
1403		gotOffOffsetsNeeded: make(map[string]struct{}),
1404	}
1405
1406	w.WriteString(".text\n")
1407	var fileTrailing string
1408	if fileDirectivesContainMD5 {
1409		fileTrailing = " md5 0x00000000000000000000000000000000"
1410	}
1411	w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing))
1412	w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1))
1413	w.WriteString("BORINGSSL_bcm_text_start:\n")
1414
1415	for _, input := range inputs {
1416		if err := d.processInput(input); err != nil {
1417			return err
1418		}
1419	}
1420
1421	w.WriteString(".text\n")
1422	w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1))
1423	w.WriteString("BORINGSSL_bcm_text_end:\n")
1424
1425	// Emit redirector functions. Each is a single jump instruction.
1426	var redirectorNames []string
1427	for name := range d.redirectors {
1428		redirectorNames = append(redirectorNames, name)
1429	}
1430	sort.Strings(redirectorNames)
1431
1432	for _, name := range redirectorNames {
1433		redirector := d.redirectors[name]
1434		switch d.processor {
1435		case aarch64:
1436			writeAarch64Function(w, redirector, func(w stringWriter) {
1437				w.WriteString("\tb " + name + "\n")
1438			})
1439
1440		case x86_64:
1441			w.WriteString(".type " + redirector + ", @function\n")
1442			w.WriteString(redirector + ":\n")
1443			w.WriteString("\tjmp\t" + name + "\n")
1444		}
1445	}
1446
1447	var accessorNames []string
1448	for accessor := range d.bssAccessorsNeeded {
1449		accessorNames = append(accessorNames, accessor)
1450	}
1451	sort.Strings(accessorNames)
1452
1453	// Emit BSS accessor functions. Each is a single LEA followed by RET.
1454	for _, name := range accessorNames {
1455		funcName := accessorName(name)
1456		target := d.bssAccessorsNeeded[name]
1457
1458		switch d.processor {
1459		case x86_64:
1460			w.WriteString(".type " + funcName + ", @function\n")
1461			w.WriteString(funcName + ":\n")
1462			w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n")
1463
1464		case aarch64:
1465			writeAarch64Function(w, funcName, func(w stringWriter) {
1466				w.WriteString("\tadrp x0, " + target + "\n")
1467				w.WriteString("\tadd x0, x0, :lo12:" + target + "\n")
1468				w.WriteString("\tret\n")
1469			})
1470		}
1471	}
1472
1473	switch d.processor {
1474	case aarch64:
1475		externalNames := sortedSet(d.gotExternalsNeeded)
1476		for _, symbol := range externalNames {
1477			writeAarch64Function(w, gotHelperName(symbol), func(w stringWriter) {
1478				w.WriteString("\tadrp x0, :got:" + symbol + "\n")
1479				w.WriteString("\tldr x0, [x0, :got_lo12:" + symbol + "]\n")
1480				w.WriteString("\tret\n")
1481			})
1482		}
1483
1484		writeAarch64Function(w, ".LOPENSSL_armcap_P_addr", func(w stringWriter) {
1485			w.WriteString("\tadrp x0, OPENSSL_armcap_P\n")
1486			w.WriteString("\tadd x0, x0, :lo12:OPENSSL_armcap_P\n")
1487			w.WriteString("\tret\n")
1488		})
1489
1490	case x86_64:
1491		externalNames := sortedSet(d.gotExternalsNeeded)
1492		for _, name := range externalNames {
1493			parts := strings.SplitN(name, "@", 2)
1494			symbol, section := parts[0], parts[1]
1495			w.WriteString(".type " + symbol + "_" + section + "_external, @object\n")
1496			w.WriteString(".size " + symbol + "_" + section + "_external, 8\n")
1497			w.WriteString(symbol + "_" + section + "_external:\n")
1498			// Ideally this would be .quad foo@GOTPCREL, but clang's
1499			// assembler cannot emit a 64-bit GOTPCREL relocation. Instead,
1500			// we manually sign-extend the value, knowing that the GOT is
1501			// always at the end, thus foo@GOTPCREL has a positive value.
1502			w.WriteString("\t.long " + symbol + "@" + section + "\n")
1503			w.WriteString("\t.long 0\n")
1504		}
1505
1506		w.WriteString(".type OPENSSL_ia32cap_get, @function\n")
1507		w.WriteString(".globl OPENSSL_ia32cap_get\n")
1508		w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n")
1509		w.WriteString("OPENSSL_ia32cap_get:\n")
1510		w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n")
1511		w.WriteString("\tret\n")
1512
1513		w.WriteString(".extern OPENSSL_ia32cap_P\n")
1514		w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n")
1515		w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n")
1516		w.WriteString("OPENSSL_ia32cap_addr_delta:\n")
1517		w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n")
1518
1519		if d.gotDeltaNeeded {
1520			w.WriteString(".Lboringssl_got_delta:\n")
1521			w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n")
1522		}
1523
1524		for _, name := range sortedSet(d.gotOffsetsNeeded) {
1525			w.WriteString(".Lboringssl_got_" + name + ":\n")
1526			w.WriteString("\t.quad " + name + "@GOT\n")
1527		}
1528		for _, name := range sortedSet(d.gotOffOffsetsNeeded) {
1529			w.WriteString(".Lboringssl_gotoff_" + name + ":\n")
1530			w.WriteString("\t.quad " + name + "@GOTOFF\n")
1531		}
1532	}
1533
1534	w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n")
1535	w.WriteString(".size BORINGSSL_bcm_text_hash, 32\n")
1536	w.WriteString("BORINGSSL_bcm_text_hash:\n")
1537	for _, b := range fipscommon.UninitHashValue {
1538		w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n")
1539	}
1540
1541	return nil
1542}
1543
1544// preprocess runs source through the C preprocessor.
1545func preprocess(cppCommand []string, path string) ([]byte, error) {
1546	var args []string
1547	args = append(args, cppCommand...)
1548	args = append(args, path)
1549
1550	cpp := exec.Command(args[0], args[1:]...)
1551	cpp.Stderr = os.Stderr
1552	var result bytes.Buffer
1553	cpp.Stdout = &result
1554
1555	if err := cpp.Run(); err != nil {
1556		return nil, err
1557	}
1558
1559	return result.Bytes(), nil
1560}
1561
1562func parseInputs(inputs []inputFile, cppCommand []string) error {
1563	for i, input := range inputs {
1564		var contents string
1565
1566		if input.isArchive {
1567			arFile, err := os.Open(input.path)
1568			if err != nil {
1569				return err
1570			}
1571			defer arFile.Close()
1572
1573			ar, err := ar.ParseAR(arFile)
1574			if err != nil {
1575				return err
1576			}
1577
1578			if len(ar) != 1 {
1579				return fmt.Errorf("expected one file in archive, but found %d", len(ar))
1580			}
1581
1582			for _, c := range ar {
1583				contents = string(c)
1584			}
1585		} else {
1586			var inBytes []byte
1587			var err error
1588
1589			if len(cppCommand) > 0 {
1590				inBytes, err = preprocess(cppCommand, input.path)
1591			} else {
1592				inBytes, err = os.ReadFile(input.path)
1593			}
1594			if err != nil {
1595				return err
1596			}
1597
1598			contents = string(inBytes)
1599		}
1600
1601		asm := Asm{Buffer: contents, Pretty: true}
1602		asm.Init()
1603		if err := asm.Parse(); err != nil {
1604			return fmt.Errorf("error while parsing %q: %s", input.path, err)
1605		}
1606		ast := asm.AST()
1607
1608		inputs[i].contents = contents
1609		inputs[i].ast = ast
1610	}
1611
1612	return nil
1613}
1614
1615// includePathFromHeaderFilePath returns an include directory path based on the
1616// path of a specific header file. It walks up the path and assumes that the
1617// include files are rooted in a directory called "openssl".
1618func includePathFromHeaderFilePath(path string) (string, error) {
1619	dir := path
1620	for {
1621		var file string
1622		dir, file = filepath.Split(dir)
1623
1624		if file == "openssl" {
1625			return dir, nil
1626		}
1627
1628		if len(dir) == 0 {
1629			break
1630		}
1631		dir = dir[:len(dir)-1]
1632	}
1633
1634	return "", fmt.Errorf("failed to find 'openssl' path element in header file path %q", path)
1635}
1636
1637func main() {
1638	// The .a file, if given, is expected to be an archive of textual
1639	// assembly sources. That's odd, but CMake really wants to create
1640	// archive files so it's the only way that we can make it work.
1641	arInput := flag.String("a", "", "Path to a .a file containing assembly sources")
1642	outFile := flag.String("o", "", "Path to output assembly")
1643	ccPath := flag.String("cc", "", "Path to the C compiler for preprocessing inputs")
1644	ccFlags := flag.String("cc-flags", "", "Flags for the C compiler when preprocessing")
1645
1646	flag.Parse()
1647
1648	if len(*outFile) == 0 {
1649		fmt.Fprintf(os.Stderr, "Must give argument to -o.\n")
1650		os.Exit(1)
1651	}
1652
1653	var inputs []inputFile
1654	if len(*arInput) > 0 {
1655		inputs = append(inputs, inputFile{
1656			path:      *arInput,
1657			index:     0,
1658			isArchive: true,
1659		})
1660	}
1661
1662	includePaths := make(map[string]struct{})
1663
1664	for i, path := range flag.Args() {
1665		if len(path) == 0 {
1666			continue
1667		}
1668
1669		// Header files are not processed but their path is remembered
1670		// and passed as -I arguments when invoking the preprocessor.
1671		if strings.HasSuffix(path, ".h") {
1672			dir, err := includePathFromHeaderFilePath(path)
1673			if err != nil {
1674				fmt.Fprintf(os.Stderr, "%s\n", err)
1675				os.Exit(1)
1676			}
1677			includePaths[dir] = struct{}{}
1678			continue
1679		}
1680
1681		inputs = append(inputs, inputFile{
1682			path:  path,
1683			index: i + 1,
1684		})
1685	}
1686
1687	var cppCommand []string
1688	if len(*ccPath) > 0 {
1689		cppCommand = append(cppCommand, *ccPath)
1690		cppCommand = append(cppCommand, strings.Fields(*ccFlags)...)
1691		// Some of ccFlags might be superfluous when running the
1692		// preprocessor, but we don't want the compiler complaining that
1693		// "argument unused during compilation".
1694		cppCommand = append(cppCommand, "-Wno-unused-command-line-argument")
1695
1696		for includePath := range includePaths {
1697			cppCommand = append(cppCommand, "-I"+includePath)
1698		}
1699
1700		// -E requests only preprocessing.
1701		cppCommand = append(cppCommand, "-E")
1702	}
1703
1704	if err := parseInputs(inputs, cppCommand); err != nil {
1705		fmt.Fprintf(os.Stderr, "%s\n", err)
1706		os.Exit(1)
1707	}
1708
1709	out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
1710	if err != nil {
1711		panic(err)
1712	}
1713	defer out.Close()
1714
1715	if err := transform(out, inputs); err != nil {
1716		fmt.Fprintf(os.Stderr, "%s\n", err)
1717		os.Exit(1)
1718	}
1719}
1720
1721func forEachPath(node *node32, cb func(*node32), rules ...pegRule) {
1722	if node == nil {
1723		return
1724	}
1725
1726	if len(rules) == 0 {
1727		cb(node)
1728		return
1729	}
1730
1731	rule := rules[0]
1732	childRules := rules[1:]
1733
1734	for ; node != nil; node = node.next {
1735		if node.pegRule != rule {
1736			continue
1737		}
1738
1739		if len(childRules) == 0 {
1740			cb(node)
1741		} else {
1742			forEachPath(node.up, cb, childRules...)
1743		}
1744	}
1745}
1746
1747func skipNodes(node *node32, ruleToSkip pegRule) *node32 {
1748	for ; node != nil && node.pegRule == ruleToSkip; node = node.next {
1749	}
1750	return node
1751}
1752
1753func skipWS(node *node32) *node32 {
1754	return skipNodes(node, ruleWS)
1755}
1756
1757func assertNodeType(node *node32, expected pegRule) {
1758	if rule := node.pegRule; rule != expected {
1759		panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected]))
1760	}
1761}
1762
1763type wrapperFunc func(func())
1764
1765type wrapperStack []wrapperFunc
1766
1767func (w *wrapperStack) do(baseCase func()) {
1768	if len(*w) == 0 {
1769		baseCase()
1770		return
1771	}
1772
1773	wrapper := (*w)[0]
1774	*w = (*w)[1:]
1775	wrapper(func() { w.do(baseCase) })
1776}
1777
1778// localTargetName returns the name of the local target label for a global
1779// symbol named name.
1780func localTargetName(name string) string {
1781	return ".L" + name + "_local_target"
1782}
1783
1784func isSynthesized(symbol string) bool {
1785	return strings.HasSuffix(symbol, "_bss_get") ||
1786		symbol == "OPENSSL_ia32cap_get" ||
1787		strings.HasPrefix(symbol, "BORINGSSL_bcm_text_")
1788}
1789
1790func redirectorName(symbol string) string {
1791	return "bcm_redirector_" + symbol
1792}
1793
1794// sectionType returns the type of a section. I.e. a section called “.text.foo”
1795// is a “.text” section.
1796func sectionType(section string) (string, bool) {
1797	if len(section) == 0 || section[0] != '.' {
1798		return "", false
1799	}
1800
1801	i := strings.Index(section[1:], ".")
1802	if i != -1 {
1803		section = section[:i+1]
1804	}
1805
1806	if strings.HasPrefix(section, ".debug_") {
1807		return ".debug", true
1808	}
1809
1810	return section, true
1811}
1812
1813// accessorName returns the name of the accessor function for a BSS symbol
1814// named name.
1815func accessorName(name string) string {
1816	return name + "_bss_get"
1817}
1818
1819func (d *delocation) mapLocalSymbol(symbol string) string {
1820	if d.currentInput.index == 0 {
1821		return symbol
1822	}
1823	return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index)
1824}
1825
1826func detectProcessor(input inputFile) processorType {
1827	for statement := input.ast.up; statement != nil; statement = statement.next {
1828		node := skipNodes(statement.up, ruleWS)
1829		if node == nil || node.pegRule != ruleInstruction {
1830			continue
1831		}
1832
1833		instruction := node.up
1834		instructionName := input.contents[instruction.begin:instruction.end]
1835
1836		switch instructionName {
1837		case "movq", "call", "leaq":
1838			return x86_64
1839		case "str", "bl", "ldr", "st1":
1840			return aarch64
1841		}
1842	}
1843
1844	panic("processed entire input and didn't recognise any instructions.")
1845}
1846
1847func sortedSet(m map[string]struct{}) []string {
1848	ret := make([]string, 0, len(m))
1849	for key := range m {
1850		ret = append(ret, key)
1851	}
1852	sort.Strings(ret)
1853	return ret
1854}
1855