• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright (c) 2017, Google Inc.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// delocate performs several transformations of textual assembly code. See
16// crypto/fipsmodule/FIPS.md for an overview.
17package main
18
19import (
20	"bytes"
21	"errors"
22	"flag"
23	"fmt"
24	"os"
25	"os/exec"
26	"path/filepath"
27	"sort"
28	"strconv"
29	"strings"
30
31	"boringssl.googlesource.com/boringssl/util/ar"
32	"boringssl.googlesource.com/boringssl/util/fipstools/fipscommon"
33)
34
35// inputFile represents a textual assembly file.
36type inputFile struct {
37	path string
38	// index is a unique identifer given to this file. It's used for
39	// mapping local symbols.
40	index int
41	// isArchive indicates that the input should be processed as an ar
42	// file.
43	isArchive bool
44	// contents contains the contents of the file.
45	contents string
46	// ast points to the head of the syntax tree.
47	ast *node32
48}
49
50type stringWriter interface {
51	WriteString(string) (int, error)
52}
53
54type processorType int
55
56const (
57	x86_64 processorType = iota + 1
58	aarch64
59)
60
61// delocation holds the state needed during a delocation operation.
62type delocation struct {
63	processor processorType
64	output    stringWriter
65	// commentIndicator starts a comment, e.g. "//" or "#"
66	commentIndicator string
67
68	// symbols is the set of symbols defined in the module.
69	symbols map[string]struct{}
70	// redirectors maps from out-call symbol name to the name of a
71	// redirector function for that symbol. E.g. “memcpy” ->
72	// “bcm_redirector_memcpy”.
73	redirectors map[string]string
74	// bssAccessorsNeeded maps from a BSS symbol name to the symbol that
75	// should be used to reference it. E.g. “P384_data_storage” ->
76	// “P384_data_storage”.
77	bssAccessorsNeeded map[string]string
78	// gotExternalsNeeded is a set of symbol names for which we need
79	// “delta” symbols: symbols that contain the offset from their location
80	// to the memory in question.
81	gotExternalsNeeded map[string]struct{}
82	// gotDeltaNeeded is true if the code needs to load the value of
83	// _GLOBAL_OFFSET_TABLE_.
84	gotDeltaNeeded bool
85	// gotOffsetsNeeded contains the symbols whose @GOT offsets are needed.
86	gotOffsetsNeeded map[string]struct{}
87	// gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed.
88	gotOffOffsetsNeeded map[string]struct{}
89
90	currentInput inputFile
91}
92
93func (d *delocation) contents(node *node32) string {
94	return d.currentInput.contents[node.begin:node.end]
95}
96
97// writeNode writes out an AST node.
98func (d *delocation) writeNode(node *node32) {
99	if _, err := d.output.WriteString(d.contents(node)); err != nil {
100		panic(err)
101	}
102}
103
104func (d *delocation) writeCommentedNode(node *node32) {
105	line := d.contents(node)
106	if _, err := d.output.WriteString(d.commentIndicator + " WAS " + strings.TrimSpace(line) + "\n"); err != nil {
107		panic(err)
108	}
109}
110
111func locateError(err error, with *node32, in inputFile) error {
112	posMap := translatePositions([]rune(in.contents), []int{int(with.begin)})
113	var line int
114	for _, pos := range posMap {
115		line = pos.line
116	}
117
118	return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err)
119}
120
121func (d *delocation) processInput(input inputFile) (err error) {
122	d.currentInput = input
123
124	var origStatement *node32
125	defer func() {
126		if err := recover(); err != nil {
127			panic(locateError(fmt.Errorf("%s", err), origStatement, input))
128		}
129	}()
130
131	for statement := input.ast.up; statement != nil; statement = statement.next {
132		assertNodeType(statement, ruleStatement)
133		origStatement = statement
134
135		node := skipWS(statement.up)
136		if node == nil {
137			d.writeNode(statement)
138			continue
139		}
140
141		switch node.pegRule {
142		case ruleGlobalDirective, ruleComment, ruleLocationDirective:
143			d.writeNode(statement)
144		case ruleDirective:
145			statement, err = d.processDirective(statement, node.up)
146		case ruleLabelContainingDirective:
147			statement, err = d.processLabelContainingDirective(statement, node.up)
148		case ruleLabel:
149			statement, err = d.processLabel(statement, node.up)
150		case ruleInstruction:
151			switch d.processor {
152			case x86_64:
153				statement, err = d.processIntelInstruction(statement, node.up)
154			case aarch64:
155				statement, err = d.processAarch64Instruction(statement, node.up)
156			default:
157				panic("unknown processor")
158			}
159		default:
160			panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule]))
161		}
162
163		if err != nil {
164			return locateError(err, origStatement, input)
165		}
166	}
167
168	return nil
169}
170
171func (d *delocation) processDirective(statement, directive *node32) (*node32, error) {
172	assertNodeType(directive, ruleDirectiveName)
173	directiveName := d.contents(directive)
174
175	var args []string
176	forEachPath(directive, func(arg *node32) {
177		// If the argument is a quoted string, use the raw contents.
178		// (Note that this doesn't unescape the string, but that's not
179		// needed so far.
180		if arg.up != nil {
181			arg = arg.up
182			assertNodeType(arg, ruleQuotedArg)
183			if arg.up == nil {
184				args = append(args, "")
185				return
186			}
187			arg = arg.up
188			assertNodeType(arg, ruleQuotedText)
189		}
190		args = append(args, d.contents(arg))
191	}, ruleArgs, ruleArg)
192
193	switch directiveName {
194	case "comm", "lcomm":
195		if len(args) < 1 {
196			return nil, errors.New("comm directive has no arguments")
197		}
198		d.bssAccessorsNeeded[args[0]] = args[0]
199		d.writeNode(statement)
200
201	case "data":
202		// ASAN and some versions of MSAN are adding a .data section,
203		// and adding references to symbols within it to the code. We
204		// will have to work around this in the future.
205		return nil, errors.New(".data section found in module")
206
207	case "section":
208		section := args[0]
209
210		if section == ".data.rel.ro" {
211			// In a normal build, this is an indication of a
212			// problem but any references from the module to this
213			// section will result in a relocation and thus will
214			// break the integrity check. ASAN can generate these
215			// sections and so we will likely have to work around
216			// that in the future.
217			return nil, errors.New(".data.rel.ro section found in module")
218		}
219
220		sectionType, ok := sectionType(section)
221		if !ok {
222			// Unknown sections are permitted in order to be robust
223			// to different compiler modes.
224			d.writeNode(statement)
225			break
226		}
227
228		switch sectionType {
229		case ".rodata", ".text":
230			// Move .rodata to .text so it may be accessed without
231			// a relocation. GCC with -fmerge-constants will place
232			// strings into separate sections, so we move all
233			// sections named like .rodata. Also move .text.startup
234			// so the self-test function is also in the module.
235			d.writeCommentedNode(statement)
236			d.output.WriteString(".text\n")
237
238		case ".data":
239			// See above about .data
240			return nil, errors.New(".data section found in module")
241
242		case ".init_array", ".fini_array", ".ctors", ".dtors":
243			// init_array/ctors/dtors contains function
244			// pointers to constructor/destructor
245			// functions. These contain relocations, but
246			// they're in a different section anyway.
247			d.writeNode(statement)
248			break
249
250		case ".debug", ".note":
251			d.writeNode(statement)
252			break
253
254		case ".bss":
255			d.writeNode(statement)
256			return d.handleBSS(statement)
257		}
258
259	default:
260		d.writeNode(statement)
261	}
262
263	return statement, nil
264}
265
266func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) {
267	// The symbols within directives need to be mapped so that local
268	// symbols in two different .s inputs don't collide.
269	changed := false
270	assertNodeType(directive, ruleLabelContainingDirectiveName)
271	name := d.contents(directive)
272
273	node := directive.next
274	assertNodeType(node, ruleWS)
275
276	node = node.next
277	assertNodeType(node, ruleSymbolArgs)
278
279	var args []string
280	for node = skipWS(node.up); node != nil; node = skipWS(node.next) {
281		assertNodeType(node, ruleSymbolArg)
282		arg := node.up
283		var mapped string
284
285		for term := arg; term != nil; term = term.next {
286			if term.pegRule != ruleLocalSymbol {
287				mapped += d.contents(term)
288				continue
289			}
290
291			oldSymbol := d.contents(term)
292			newSymbol := d.mapLocalSymbol(oldSymbol)
293			if newSymbol != oldSymbol {
294				changed = true
295			}
296
297			mapped += newSymbol
298		}
299
300		args = append(args, mapped)
301	}
302
303	if !changed {
304		d.writeNode(statement)
305	} else {
306		d.writeCommentedNode(statement)
307		d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n")
308	}
309
310	return statement, nil
311}
312
313func (d *delocation) processLabel(statement, label *node32) (*node32, error) {
314	symbol := d.contents(label)
315
316	switch label.pegRule {
317	case ruleLocalLabel:
318		d.output.WriteString(symbol + ":\n")
319	case ruleLocalSymbol:
320		// symbols need to be mapped so that local symbols from two
321		// different .s inputs don't collide.
322		d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n")
323	case ruleSymbolName:
324		d.output.WriteString(localTargetName(symbol) + ":\n")
325		d.writeNode(statement)
326	default:
327		return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule])
328	}
329
330	return statement, nil
331}
332
333// instructionArgs collects all the arguments to an instruction.
334func instructionArgs(node *node32) (argNodes []*node32) {
335	for node = skipWS(node); node != nil; node = skipWS(node.next) {
336		assertNodeType(node, ruleInstructionArg)
337		argNodes = append(argNodes, node.up)
338	}
339
340	return argNodes
341}
342
343// Aarch64 support
344
345// gotHelperName returns the name of a synthesised function that returns an
346// address from the GOT.
347func gotHelperName(symbol string) string {
348	return ".Lboringssl_loadgot_" + symbol
349}
350
351// loadAarch64Address emits instructions to put the address of |symbol|
352// (optionally adjusted by |offsetStr|) into |targetReg|.
353func (d *delocation) loadAarch64Address(statement *node32, targetReg string, symbol string, offsetStr string) (*node32, error) {
354	// There are two paths here: either the symbol is known to be local in which
355	// case adr is used to get the address (within 1MiB), or a GOT reference is
356	// really needed in which case the code needs to jump to a helper function.
357	//
358	// A helper function is needed because using code appears to be the only way
359	// to load a GOT value. On other platforms we have ".quad foo@GOT" outside of
360	// the module, but on Aarch64 that results in a "COPY" relocation and linker
361	// comments suggest it's a weird hack. So, for each GOT symbol needed, we emit
362	// a function outside of the module that returns the address from the GOT in
363	// x0.
364
365	d.writeCommentedNode(statement)
366
367	_, isKnown := d.symbols[symbol]
368	isLocal := strings.HasPrefix(symbol, ".L")
369	if isKnown || isLocal || isSynthesized(symbol) {
370		if isLocal {
371			symbol = d.mapLocalSymbol(symbol)
372		} else if isKnown {
373			symbol = localTargetName(symbol)
374		}
375
376		d.output.WriteString("\tadr " + targetReg + ", " + symbol + offsetStr + "\n")
377
378		return statement, nil
379	}
380
381	if len(offsetStr) != 0 {
382		panic("non-zero offset for helper-based reference")
383	}
384
385	var helperFunc string
386	if symbol == "OPENSSL_armcap_P" {
387		helperFunc = ".LOPENSSL_armcap_P_addr"
388	} else {
389		// GOT helpers also dereference the GOT entry, thus the subsequent ldr
390		// instruction, which would normally do the dereferencing, needs to be
391		// dropped. GOT helpers have to include the dereference because the
392		// assembler doesn't support ":got_lo12:foo" offsets except in an ldr
393		// instruction.
394		d.gotExternalsNeeded[symbol] = struct{}{}
395		helperFunc = gotHelperName(symbol)
396	}
397
398	// Clear the red-zone. I can't find a definitive answer about whether Linux
399	// Aarch64 includes a red-zone, but Microsoft has a 16-byte one and Apple a
400	// 128-byte one. Thus conservatively clear a 128-byte red-zone.
401	d.output.WriteString("\tsub sp, sp, 128\n")
402
403	// Save x0 (which will be stomped by the return value) and the link register
404	// to the stack. Then save the program counter into the link register and
405	// jump to the helper function.
406	d.output.WriteString("\tstp x0, lr, [sp, #-16]!\n")
407	d.output.WriteString("\tbl " + helperFunc + "\n")
408
409	if targetReg == "x0" {
410		// If the target happens to be x0 then restore the link register from the
411		// stack and send the saved value of x0 to the zero register.
412		d.output.WriteString("\tldp xzr, lr, [sp], #16\n")
413	} else {
414		// Otherwise move the result into place and restore registers.
415		d.output.WriteString("\tmov " + targetReg + ", x0\n")
416		d.output.WriteString("\tldp x0, lr, [sp], #16\n")
417	}
418
419	// Revert the red-zone adjustment.
420	d.output.WriteString("\tadd sp, sp, 128\n")
421
422	return statement, nil
423}
424
425func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) {
426	assertNodeType(instruction, ruleInstructionName)
427	instructionName := d.contents(instruction)
428
429	argNodes := instructionArgs(instruction.next)
430
431	switch instructionName {
432	case "ccmn", "ccmp", "cinc", "cinv", "cneg", "csel", "cset", "csetm", "csinc", "csinv", "csneg":
433		// These functions are special because they take a condition-code name as
434		// an argument and that looks like a symbol reference.
435		d.writeNode(statement)
436		return statement, nil
437
438	case "mrs":
439		// Functions that take special register names also look like a symbol
440		// reference to the parser.
441		d.writeNode(statement)
442		return statement, nil
443
444	case "adrp":
445		// adrp always generates a relocation, even when the target symbol is in the
446		// same segment, because the page-offset of the code isn't known until link
447		// time. Thus adrp instructions are turned into either adr instructions
448		// (limiting the module to 1MiB offsets) or calls to helper functions, both of
449		// which load the full address. Later instructions, which add the low 12 bits
450		// of offset, are tweaked to remove the offset since it's already included.
451		// Loads of GOT symbols are slightly more complex because it's not possible to
452		// avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr
453		// instruction, which would normally do the dereferencing, is dropped
454		// completely. (Or turned into a mov if it targets a different register.)
455		assertNodeType(argNodes[0], ruleRegisterOrConstant)
456		targetReg := d.contents(argNodes[0])
457		if !strings.HasPrefix(targetReg, "x") {
458			panic("adrp targetting register " + targetReg + ", which has the wrong size")
459		}
460
461		var symbol, offset string
462		switch argNodes[1].pegRule {
463		case ruleGOTSymbolOffset:
464			symbol = d.contents(argNodes[1].up)
465		case ruleMemoryRef:
466			assertNodeType(argNodes[1].up, ruleSymbolRef)
467			node, empty := d.gatherOffsets(argNodes[1].up.up, "")
468			if len(empty) != 0 {
469				panic("prefix offsets found for adrp")
470			}
471			symbol = d.contents(node)
472			_, offset = d.gatherOffsets(node.next, "")
473		default:
474			panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule])
475		}
476
477		return d.loadAarch64Address(statement, targetReg, symbol, offset)
478	}
479
480	var args []string
481	changed := false
482
483	for _, arg := range argNodes {
484		fullArg := arg
485
486		switch arg.pegRule {
487		case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak:
488			args = append(args, d.contents(fullArg))
489
490		case ruleGOTSymbolOffset:
491			// These should only be arguments to adrp and thus unreachable.
492			panic("unreachable")
493
494		case ruleMemoryRef:
495			ref := arg.up
496
497			switch ref.pegRule {
498			case ruleSymbolRef:
499				// This is a branch. Either the target needs to be written to a local
500				// version of the symbol to ensure that no relocations are emitted, or
501				// it needs to jump to a redirector function.
502				symbol, offset, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up)
503				changed = didChange
504
505				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
506					symbol = localTargetName(symbol)
507					changed = true
508				} else if !symbolIsLocal && !isSynthesized(symbol) {
509					redirector := redirectorName(symbol)
510					d.redirectors[symbol] = redirector
511					symbol = redirector
512					changed = true
513				} else if didChange && symbolIsLocal && len(offset) > 0 {
514					// didChange is set when the inputFile index is not 0; which is the index of the
515					// first file copied to the output, which is the generated assembly of bcm.c.
516					// In subsequently copied assembly files, local symbols are changed by appending (BCM_ + index)
517					// in order to ensure they don't collide. `index` gets incremented per file.
518					// If there is offset after the symbol, append the `offset`.
519					symbol = symbol + offset
520				}
521
522				args = append(args, symbol)
523
524			case ruleARMBaseIndexScale:
525				parts := ref.up
526				assertNodeType(parts, ruleARMRegister)
527				baseAddrReg := d.contents(parts)
528				parts = skipWS(parts.next)
529
530				// Only two forms need special handling. First there's memory references
531				// like "[x*, :got_lo12:foo]". The base register here will have been the
532				// target of an adrp instruction to load the page address, but the adrp
533				// will have turned into loading the full address *and dereferencing it*,
534				// above. Thus this instruction needs to be dropped otherwise we'll be
535				// dereferencing twice.
536				//
537				// Second there are forms like "[x*, :lo12:foo]" where the code has used
538				// adrp to load the page address into x*. That adrp will have been turned
539				// into loading the full address so just the offset needs to be dropped.
540
541				if parts != nil {
542					if parts.pegRule == ruleARMGOTLow12 {
543						if instructionName != "ldr" {
544							panic("Symbol reference outside of ldr instruction")
545						}
546
547						if skipWS(parts.next) != nil || parts.up.next != nil {
548							panic("can't handle tweak or post-increment with symbol references")
549						}
550
551						// The GOT helper already dereferenced the entry so, at most, just a mov
552						// is needed to put things in the right register.
553						d.writeCommentedNode(statement)
554						if baseAddrReg != args[0] {
555							d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n")
556						}
557						return statement, nil
558					} else if parts.pegRule == ruleLow12BitsSymbolRef {
559						if instructionName != "ldr" {
560							panic("Symbol reference outside of ldr instruction")
561						}
562
563						if skipWS(parts.next) != nil || parts.up.next != nil {
564							panic("can't handle tweak or post-increment with symbol references")
565						}
566
567						// Suppress the offset; adrp loaded the full address.
568						args = append(args, "["+baseAddrReg+"]")
569						changed = true
570						continue
571					}
572				}
573
574				args = append(args, d.contents(fullArg))
575
576			case ruleLow12BitsSymbolRef:
577				// These are the second instruction in a pair:
578				//   adrp x0, symbol           // Load the page address into x0
579				//   add x1, x0, :lo12:symbol  // Adds the page offset.
580				//
581				// The adrp instruction will have been turned into a sequence that loads
582				// the full address, above, thus the offset is turned into zero. If that
583				// results in the instruction being a nop, then it is deleted.
584				if instructionName != "add" {
585					panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName))
586				}
587
588				if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") {
589					panic("address arithmetic with incorrectly sized register")
590				}
591
592				if args[0] == args[1] {
593					d.writeCommentedNode(statement)
594					return statement, nil
595				}
596
597				args = append(args, "#0")
598				changed = true
599
600			default:
601				panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule]))
602			}
603
604		default:
605			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
606		}
607	}
608
609	if changed {
610		d.writeCommentedNode(statement)
611		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
612		d.output.WriteString(replacement)
613	} else {
614		d.writeNode(statement)
615	}
616
617	return statement, nil
618}
619
620func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) {
621	for symRef != nil && symRef.pegRule == ruleOffset {
622		offset := d.contents(symRef)
623		if offset[0] != '+' && offset[0] != '-' {
624			offset = "+" + offset
625		}
626		offsets = offsets + offset
627		symRef = symRef.next
628	}
629	return symRef, offsets
630}
631
632func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) {
633	if memRef.pegRule != ruleSymbolRef {
634		return "", "", "", false, false, memRef
635	}
636
637	symRef := memRef.up
638	nextRef = memRef.next
639
640	// (Offset* '+')?
641	symRef, offset = d.gatherOffsets(symRef, offset)
642
643	// (LocalSymbol / SymbolName)
644	symbol = d.contents(symRef)
645	if symRef.pegRule == ruleLocalSymbol {
646		symbolIsLocal = true
647		mapped := d.mapLocalSymbol(symbol)
648		if mapped != symbol {
649			symbol = mapped
650			didChange = true
651		}
652	}
653	symRef = symRef.next
654
655	// Offset*
656	symRef, offset = d.gatherOffsets(symRef, offset)
657
658	// ('@' Section / Offset*)?
659	if symRef != nil {
660		assertNodeType(symRef, ruleSection)
661		section = d.contents(symRef)
662		symRef = symRef.next
663
664		symRef, offset = d.gatherOffsets(symRef, offset)
665	}
666
667	if symRef != nil {
668		panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule]))
669	}
670
671	return
672}
673
674/* Intel */
675
676type instructionType int
677
678const (
679	instrPush instructionType = iota
680	instrMove
681	// instrTransformingMove is essentially a move, but it performs some
682	// transformation of the data during the process.
683	instrTransformingMove
684	instrJump
685	instrConditionalMove
686	// instrCombine merges the source and destination in some fashion, for example
687	// a 2-operand bitwise operation.
688	instrCombine
689	// instrMemoryVectorCombine is similer to instrCombine, but the source
690	// register must be a memory reference and the destination register
691	// must be a vector register.
692	instrMemoryVectorCombine
693	// instrThreeArg merges two sources into a destination in some fashion.
694	instrThreeArg
695	// instrCompare takes two arguments and writes outputs to the flags register.
696	instrCompare
697	instrOther
698)
699
700func classifyInstruction(instr string, args []*node32) instructionType {
701	switch instr {
702	case "push", "pushq":
703		if len(args) == 1 {
704			return instrPush
705		}
706
707	case "mov", "movq", "vmovq", "movsd", "vmovsd":
708		if len(args) == 2 {
709			return instrMove
710		}
711
712	case "cmovneq", "cmoveq":
713		if len(args) == 2 {
714			return instrConditionalMove
715		}
716
717	case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo":
718		if len(args) == 1 {
719			return instrJump
720		}
721
722	case "orq", "andq", "xorq":
723		if len(args) == 2 {
724			return instrCombine
725		}
726
727	case "cmpq":
728		if len(args) == 2 {
729			return instrCompare
730		}
731
732	case "sarxq", "shlxq", "shrxq":
733		if len(args) == 3 {
734			return instrThreeArg
735		}
736
737	case "vpbroadcastq":
738		if len(args) == 2 {
739			return instrTransformingMove
740		}
741
742	case "movlps", "movhps":
743		if len(args) == 2 {
744			return instrMemoryVectorCombine
745		}
746	}
747
748	return instrOther
749}
750
751func push(w stringWriter) wrapperFunc {
752	return func(k func()) {
753		w.WriteString("\tpushq %rax\n")
754		k()
755		w.WriteString("\txchg %rax, (%rsp)\n")
756	}
757}
758
759func compare(w stringWriter, instr, a, b string) wrapperFunc {
760	return func(k func()) {
761		k()
762		w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b))
763	}
764}
765
766func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc {
767	d.gotExternalsNeeded[symbol+"@"+section] = struct{}{}
768
769	return func(k func()) {
770		if !redzoneCleared {
771			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
772		}
773		w.WriteString("\tpushf\n")
774		w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination))
775		w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination))
776		w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination))
777		w.WriteString("\tpopf\n")
778		if !redzoneCleared {
779			w.WriteString("\tleaq\t128(%rsp), %rsp\n")
780		}
781	}
782}
783
784func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc {
785	return func(k func()) {
786		if !redzoneCleared {
787			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
788			defer w.WriteString("\tleaq 128(%rsp), %rsp\n")
789		}
790		w.WriteString("\tpushfq\n")
791		k()
792		w.WriteString("\tpopfq\n")
793	}
794}
795
796func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) {
797	candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"}
798
799	var reg string
800NextCandidate:
801	for _, candidate := range candidates {
802		for _, avoid := range avoidRegs {
803			if candidate == avoid {
804				continue NextCandidate
805			}
806		}
807
808		reg = candidate
809		break
810	}
811
812	if len(reg) == 0 {
813		panic("too many excluded registers")
814	}
815
816	return func(k func()) {
817		w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
818		w.WriteString("\tpushq " + reg + "\n")
819		k()
820		w.WriteString("\tpopq " + reg + "\n")
821		w.WriteString("\tleaq 128(%rsp), %rsp\n")
822	}, reg
823}
824
825func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc {
826	return func(k func()) {
827		k()
828		prefix := ""
829		if isAVX {
830			prefix = "v"
831		}
832		w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n")
833	}
834}
835
836func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc {
837	return func(k func()) {
838		k()
839		w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n")
840	}
841}
842
843func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
844	return func(k func()) {
845		k()
846		w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n")
847	}
848}
849
850func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc {
851	return func(k func()) {
852		k()
853		w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n")
854	}
855}
856
857func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
858	return func(k func()) {
859		k()
860		// These instructions can only read from memory, so push
861		// tempReg and read from the stack. Note we assume the red zone
862		// was previously cleared by saveRegister().
863		w.WriteString("\tpushq " + source + "\n")
864		w.WriteString("\t" + instructionName + " (%rsp), " + dest + "\n")
865		w.WriteString("\tleaq 8(%rsp), %rsp\n")
866	}
867}
868
869func isValidLEATarget(reg string) bool {
870	return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm")
871}
872
873func undoConditionalMove(w stringWriter, instr string) wrapperFunc {
874	var invertedCondition string
875
876	switch instr {
877	case "cmoveq":
878		invertedCondition = "ne"
879	case "cmovneq":
880		invertedCondition = "e"
881	default:
882		panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr))
883	}
884
885	return func(k func()) {
886		w.WriteString("\tj" + invertedCondition + " 999f\n")
887		k()
888		w.WriteString("999:\n")
889	}
890}
891
892func (d *delocation) isRIPRelative(node *node32) bool {
893	return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)"
894}
895
896func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) {
897	assertNodeType(instruction, ruleInstructionName)
898	instructionName := d.contents(instruction)
899
900	argNodes := instructionArgs(instruction.next)
901
902	var wrappers wrapperStack
903	var args []string
904	changed := false
905
906Args:
907	for i, arg := range argNodes {
908		fullArg := arg
909		isIndirect := false
910
911		if arg.pegRule == ruleIndirectionIndicator {
912			arg = arg.next
913			isIndirect = true
914		}
915
916		switch arg.pegRule {
917		case ruleRegisterOrConstant, ruleLocalLabelRef:
918			args = append(args, d.contents(fullArg))
919
920		case ruleMemoryRef:
921			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
922			changed = didChange
923
924			if symbol == "OPENSSL_ia32cap_P" && section == "" {
925				if instructionName != "leaq" {
926					return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName)
927				}
928
929				if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 {
930					return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName)
931				}
932
933				target := argNodes[1]
934				assertNodeType(target, ruleRegisterOrConstant)
935				reg := d.contents(target)
936
937				if !strings.HasPrefix(reg, "%r") {
938					return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg)
939				}
940
941				changed = true
942
943				// Flag-altering instructions (i.e. addq) are going to be used so the
944				// flags need to be preserved.
945				wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */))
946
947				wrappers = append(wrappers, func(k func()) {
948					d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n")
949					d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n")
950				})
951
952				break Args
953			}
954
955			switch section {
956			case "":
957				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
958					symbol = localTargetName(symbol)
959					changed = true
960				}
961
962			case "PLT":
963				if classifyInstruction(instructionName, argNodes) != instrJump {
964					return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName)
965				}
966
967				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
968					symbol = localTargetName(symbol)
969					changed = true
970				} else if !symbolIsLocal && !isSynthesized(symbol) {
971					// Unknown symbol via PLT is an
972					// out-call from the module, e.g.
973					// memcpy.
974					d.redirectors[symbol+"@"+section] = redirectorName(symbol)
975					symbol = redirectorName(symbol)
976				}
977
978				changed = true
979
980			case "GOTPCREL":
981				if len(offset) > 0 {
982					return nil, errors.New("loading from GOT with offset is unsupported")
983				}
984				if !d.isRIPRelative(memRef) {
985					return nil, errors.New("GOT access must be IP-relative")
986				}
987
988				useGOT := false
989				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
990					symbol = localTargetName(symbol)
991					changed = true
992				} else if !isSynthesized(symbol) {
993					useGOT = true
994				}
995
996				classification := classifyInstruction(instructionName, argNodes)
997				if classification != instrThreeArg && classification != instrCompare && i != 0 {
998					return nil, errors.New("GOT access must be source operand")
999				}
1000
1001				// Reduce the instruction to movq symbol@GOTPCREL, targetReg.
1002				var targetReg string
1003				var redzoneCleared bool
1004				switch classification {
1005				case instrPush:
1006					wrappers = append(wrappers, push(d.output))
1007					targetReg = "%rax"
1008				case instrConditionalMove:
1009					wrappers = append(wrappers, undoConditionalMove(d.output, instructionName))
1010					fallthrough
1011				case instrMove:
1012					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1013					targetReg = d.contents(argNodes[1])
1014				case instrCompare:
1015					otherSource := d.contents(argNodes[i^1])
1016					saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource})
1017					redzoneCleared = true
1018					wrappers = append(wrappers, saveRegWrapper)
1019					if i == 0 {
1020						wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource))
1021					} else {
1022						wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg))
1023					}
1024					targetReg = tempReg
1025				case instrTransformingMove:
1026					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1027					targetReg = d.contents(argNodes[1])
1028					wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg))
1029					if isValidLEATarget(targetReg) {
1030						return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.")
1031					}
1032				case instrCombine:
1033					targetReg = d.contents(argNodes[1])
1034					if !isValidLEATarget(targetReg) {
1035						return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers")
1036					}
1037					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg})
1038					redzoneCleared = true
1039					wrappers = append(wrappers, saveRegWrapper)
1040
1041					wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg))
1042					targetReg = tempReg
1043				case instrMemoryVectorCombine:
1044					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1045					targetReg = d.contents(argNodes[1])
1046					if isValidLEATarget(targetReg) {
1047						return nil, errors.New("target register must be an XMM register")
1048					}
1049					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1050					wrappers = append(wrappers, saveRegWrapper)
1051					redzoneCleared = true
1052					wrappers = append(wrappers, memoryVectorCombineOp(d.output, instructionName, tempReg, targetReg))
1053					targetReg = tempReg
1054				case instrThreeArg:
1055					if n := len(argNodes); n != 3 {
1056						return nil, fmt.Errorf("three-argument instruction has %d arguments", n)
1057					}
1058					if i != 0 && i != 1 {
1059						return nil, errors.New("GOT access must be from source operand")
1060					}
1061					targetReg = d.contents(argNodes[2])
1062
1063					otherSource := d.contents(argNodes[1])
1064					if i == 1 {
1065						otherSource = d.contents(argNodes[0])
1066					}
1067
1068					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource})
1069					redzoneCleared = true
1070					wrappers = append(wrappers, saveRegWrapper)
1071
1072					if i == 0 {
1073						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg))
1074					} else {
1075						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg))
1076					}
1077					targetReg = tempReg
1078				default:
1079					return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName)
1080				}
1081
1082				if !isValidLEATarget(targetReg) {
1083					// Sometimes the compiler will load from the GOT to an
1084					// XMM register, which is not a valid target of an LEA
1085					// instruction.
1086					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1087					wrappers = append(wrappers, saveRegWrapper)
1088					isAVX := strings.HasPrefix(instructionName, "v")
1089					wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg))
1090					targetReg = tempReg
1091					if redzoneCleared {
1092						return nil, fmt.Errorf("internal error: Red Zone was already cleared")
1093					}
1094					redzoneCleared = true
1095				}
1096
1097				if symbol == "OPENSSL_ia32cap_P" {
1098					// Flag-altering instructions (i.e. addq) are going to be used so the
1099					// flags need to be preserved.
1100					wrappers = append(wrappers, saveFlags(d.output, redzoneCleared))
1101					wrappers = append(wrappers, func(k func()) {
1102						d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n")
1103						d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n")
1104					})
1105				} else if useGOT {
1106					wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared))
1107				} else {
1108					wrappers = append(wrappers, func(k func()) {
1109						d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg))
1110					})
1111				}
1112				changed = true
1113				break Args
1114
1115			default:
1116				return nil, fmt.Errorf("Unknown section type %q", section)
1117			}
1118
1119			if !changed && len(section) > 0 {
1120				panic("section was not handled")
1121			}
1122			section = ""
1123
1124			argStr := ""
1125			if isIndirect {
1126				argStr += "*"
1127			}
1128			argStr += symbol
1129			argStr += offset
1130
1131			for ; memRef != nil; memRef = memRef.next {
1132				argStr += d.contents(memRef)
1133			}
1134
1135			args = append(args, argStr)
1136
1137		case ruleGOTLocation:
1138			if instructionName != "movabsq" {
1139				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq")
1140			}
1141			if i != 0 || len(argNodes) != 2 {
1142				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form")
1143			}
1144
1145			d.gotDeltaNeeded = true
1146			changed = true
1147			instructionName = "movq"
1148			assertNodeType(arg.up, ruleLocalSymbol)
1149			baseSymbol := d.mapLocalSymbol(d.contents(arg.up))
1150			targetReg := d.contents(argNodes[1])
1151			args = append(args, ".Lboringssl_got_delta(%rip)")
1152			wrappers = append(wrappers, func(k func()) {
1153				k()
1154				d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg))
1155			})
1156
1157		case ruleGOTSymbolOffset:
1158			if instructionName != "movabsq" {
1159				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq")
1160			}
1161			if i != 0 || len(argNodes) != 2 {
1162				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form")
1163			}
1164
1165			assertNodeType(arg.up, ruleSymbolName)
1166			symbol := d.contents(arg.up)
1167			if strings.HasPrefix(symbol, ".L") {
1168				symbol = d.mapLocalSymbol(symbol)
1169			}
1170			targetReg := d.contents(argNodes[1])
1171
1172			var prefix string
1173			isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF")
1174			if isGOTOFF {
1175				prefix = "gotoff"
1176				d.gotOffOffsetsNeeded[symbol] = struct{}{}
1177			} else {
1178				prefix = "got"
1179				d.gotOffsetsNeeded[symbol] = struct{}{}
1180			}
1181			changed = true
1182
1183			wrappers = append(wrappers, func(k func()) {
1184				// Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time
1185				// of writing) emits 64-bit relocations anyway, so the following four bytes
1186				// get stomped. Thus we use 64-bit offsets.
1187				d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg))
1188			})
1189
1190		default:
1191			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
1192		}
1193	}
1194
1195	if changed {
1196		d.writeCommentedNode(statement)
1197		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
1198		wrappers.do(func() {
1199			d.output.WriteString(replacement)
1200		})
1201	} else {
1202		d.writeNode(statement)
1203	}
1204
1205	return statement, nil
1206}
1207
1208func (d *delocation) handleBSS(statement *node32) (*node32, error) {
1209	lastStatement := statement
1210	for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next {
1211		node := skipWS(statement.up)
1212		if node == nil {
1213			d.writeNode(statement)
1214			continue
1215		}
1216
1217		switch node.pegRule {
1218		case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective:
1219			d.writeNode(statement)
1220
1221		case ruleDirective:
1222			directive := node.up
1223			assertNodeType(directive, ruleDirectiveName)
1224			directiveName := d.contents(directive)
1225			if directiveName == "text" || directiveName == "section" || directiveName == "data" {
1226				return lastStatement, nil
1227			}
1228			d.writeNode(statement)
1229
1230		case ruleLabel:
1231			label := node.up
1232			d.writeNode(statement)
1233
1234			if label.pegRule != ruleLocalSymbol {
1235				symbol := d.contents(label)
1236				localSymbol := localTargetName(symbol)
1237				d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol))
1238
1239				d.bssAccessorsNeeded[symbol] = localSymbol
1240			}
1241
1242		case ruleLabelContainingDirective:
1243			var err error
1244			statement, err = d.processLabelContainingDirective(statement, node.up)
1245			if err != nil {
1246				return nil, err
1247			}
1248
1249		default:
1250			return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement))
1251		}
1252	}
1253
1254	return lastStatement, nil
1255}
1256
1257func writeAarch64Function(w stringWriter, funcName string, writeContents func(stringWriter)) {
1258	w.WriteString(".p2align 2\n")
1259	w.WriteString(".hidden " + funcName + "\n")
1260	w.WriteString(".type " + funcName + ", @function\n")
1261	w.WriteString(funcName + ":\n")
1262	w.WriteString(".cfi_startproc\n")
1263	writeContents(w)
1264	w.WriteString(".cfi_endproc\n")
1265	w.WriteString(".size " + funcName + ", .-" + funcName + "\n")
1266}
1267
1268func transform(w stringWriter, inputs []inputFile) error {
1269	// symbols contains all defined symbols.
1270	symbols := make(map[string]struct{})
1271	// fileNumbers is the set of IDs seen in .file directives.
1272	fileNumbers := make(map[int]struct{})
1273	// maxObservedFileNumber contains the largest seen file number in a
1274	// .file directive. Zero is not a valid number.
1275	maxObservedFileNumber := 0
1276	// fileDirectivesContainMD5 is true if the compiler is outputting MD5
1277	// checksums in .file directives. If it does so, then this script needs
1278	// to match that behaviour otherwise warnings result.
1279	fileDirectivesContainMD5 := false
1280
1281	// OPENSSL_ia32cap_get will be synthesized by this script.
1282	symbols["OPENSSL_ia32cap_get"] = struct{}{}
1283
1284	for _, input := range inputs {
1285		forEachPath(input.ast.up, func(node *node32) {
1286			symbol := input.contents[node.begin:node.end]
1287			if _, ok := symbols[symbol]; ok {
1288				panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path))
1289			}
1290			symbols[symbol] = struct{}{}
1291		}, ruleStatement, ruleLabel, ruleSymbolName)
1292
1293		forEachPath(input.ast.up, func(node *node32) {
1294			assertNodeType(node, ruleLocationDirective)
1295			directive := input.contents[node.begin:node.end]
1296			if !strings.HasPrefix(directive, ".file") {
1297				return
1298			}
1299			parts := strings.Fields(directive)
1300			if len(parts) == 2 {
1301				// This is a .file directive with just a
1302				// filename. Clang appears to generate just one
1303				// of these at the beginning of the output for
1304				// the compilation unit. Ignore it.
1305				return
1306			}
1307			fileNo, err := strconv.Atoi(parts[1])
1308			if err != nil {
1309				panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive))
1310			}
1311
1312			if _, ok := fileNumbers[fileNo]; ok {
1313				panic(fmt.Sprintf("Duplicate file number %d observed", fileNo))
1314			}
1315			fileNumbers[fileNo] = struct{}{}
1316
1317			if fileNo > maxObservedFileNumber {
1318				maxObservedFileNumber = fileNo
1319			}
1320
1321			for _, token := range parts[2:] {
1322				if token == "md5" {
1323					fileDirectivesContainMD5 = true
1324				}
1325			}
1326		}, ruleStatement, ruleLocationDirective)
1327	}
1328
1329	processor := x86_64
1330	if len(inputs) > 0 {
1331		processor = detectProcessor(inputs[0])
1332	}
1333
1334	commentIndicator := "#"
1335	if processor == aarch64 {
1336		commentIndicator = "//"
1337	}
1338
1339	d := &delocation{
1340		symbols:             symbols,
1341		processor:           processor,
1342		commentIndicator:    commentIndicator,
1343		output:              w,
1344		redirectors:         make(map[string]string),
1345		bssAccessorsNeeded:  make(map[string]string),
1346		gotExternalsNeeded:  make(map[string]struct{}),
1347		gotOffsetsNeeded:    make(map[string]struct{}),
1348		gotOffOffsetsNeeded: make(map[string]struct{}),
1349	}
1350
1351	w.WriteString(".text\n")
1352	var fileTrailing string
1353	if fileDirectivesContainMD5 {
1354		fileTrailing = " md5 0x00000000000000000000000000000000"
1355	}
1356	w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing))
1357	w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1))
1358	w.WriteString("BORINGSSL_bcm_text_start:\n")
1359
1360	for _, input := range inputs {
1361		if err := d.processInput(input); err != nil {
1362			return err
1363		}
1364	}
1365
1366	w.WriteString(".text\n")
1367	w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1))
1368	w.WriteString("BORINGSSL_bcm_text_end:\n")
1369
1370	// Emit redirector functions. Each is a single jump instruction.
1371	var redirectorNames []string
1372	for name := range d.redirectors {
1373		redirectorNames = append(redirectorNames, name)
1374	}
1375	sort.Strings(redirectorNames)
1376
1377	for _, name := range redirectorNames {
1378		redirector := d.redirectors[name]
1379		switch d.processor {
1380		case aarch64:
1381			writeAarch64Function(w, redirector, func(w stringWriter) {
1382				w.WriteString("\tb " + name + "\n")
1383			})
1384
1385		case x86_64:
1386			w.WriteString(".type " + redirector + ", @function\n")
1387			w.WriteString(redirector + ":\n")
1388			w.WriteString("\tjmp\t" + name + "\n")
1389		}
1390	}
1391
1392	var accessorNames []string
1393	for accessor := range d.bssAccessorsNeeded {
1394		accessorNames = append(accessorNames, accessor)
1395	}
1396	sort.Strings(accessorNames)
1397
1398	// Emit BSS accessor functions. Each is a single LEA followed by RET.
1399	for _, name := range accessorNames {
1400		funcName := accessorName(name)
1401		target := d.bssAccessorsNeeded[name]
1402
1403		switch d.processor {
1404		case x86_64:
1405			w.WriteString(".type " + funcName + ", @function\n")
1406			w.WriteString(funcName + ":\n")
1407			w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n")
1408
1409		case aarch64:
1410			writeAarch64Function(w, funcName, func(w stringWriter) {
1411				w.WriteString("\tadrp x0, " + target + "\n")
1412				w.WriteString("\tadd x0, x0, :lo12:" + target + "\n")
1413				w.WriteString("\tret\n")
1414			})
1415		}
1416	}
1417
1418	switch d.processor {
1419	case aarch64:
1420		externalNames := sortedSet(d.gotExternalsNeeded)
1421		for _, symbol := range externalNames {
1422			writeAarch64Function(w, gotHelperName(symbol), func(w stringWriter) {
1423				w.WriteString("\tadrp x0, :got:" + symbol + "\n")
1424				w.WriteString("\tldr x0, [x0, :got_lo12:" + symbol + "]\n")
1425				w.WriteString("\tret\n")
1426			})
1427		}
1428
1429		writeAarch64Function(w, ".LOPENSSL_armcap_P_addr", func(w stringWriter) {
1430			w.WriteString("\tadrp x0, OPENSSL_armcap_P\n")
1431			w.WriteString("\tadd x0, x0, :lo12:OPENSSL_armcap_P\n")
1432			w.WriteString("\tret\n")
1433		})
1434
1435	case x86_64:
1436		externalNames := sortedSet(d.gotExternalsNeeded)
1437		for _, name := range externalNames {
1438			parts := strings.SplitN(name, "@", 2)
1439			symbol, section := parts[0], parts[1]
1440			w.WriteString(".type " + symbol + "_" + section + "_external, @object\n")
1441			w.WriteString(".size " + symbol + "_" + section + "_external, 8\n")
1442			w.WriteString(symbol + "_" + section + "_external:\n")
1443			// Ideally this would be .quad foo@GOTPCREL, but clang's
1444			// assembler cannot emit a 64-bit GOTPCREL relocation. Instead,
1445			// we manually sign-extend the value, knowing that the GOT is
1446			// always at the end, thus foo@GOTPCREL has a positive value.
1447			w.WriteString("\t.long " + symbol + "@" + section + "\n")
1448			w.WriteString("\t.long 0\n")
1449		}
1450
1451		w.WriteString(".type OPENSSL_ia32cap_get, @function\n")
1452		w.WriteString(".globl OPENSSL_ia32cap_get\n")
1453		w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n")
1454		w.WriteString("OPENSSL_ia32cap_get:\n")
1455		w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n")
1456		w.WriteString("\tret\n")
1457
1458		w.WriteString(".extern OPENSSL_ia32cap_P\n")
1459		w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n")
1460		w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n")
1461		w.WriteString("OPENSSL_ia32cap_addr_delta:\n")
1462		w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n")
1463
1464		if d.gotDeltaNeeded {
1465			w.WriteString(".Lboringssl_got_delta:\n")
1466			w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n")
1467		}
1468
1469		for _, name := range sortedSet(d.gotOffsetsNeeded) {
1470			w.WriteString(".Lboringssl_got_" + name + ":\n")
1471			w.WriteString("\t.quad " + name + "@GOT\n")
1472		}
1473		for _, name := range sortedSet(d.gotOffOffsetsNeeded) {
1474			w.WriteString(".Lboringssl_gotoff_" + name + ":\n")
1475			w.WriteString("\t.quad " + name + "@GOTOFF\n")
1476		}
1477	}
1478
1479	w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n")
1480	w.WriteString(".size BORINGSSL_bcm_text_hash, 32\n")
1481	w.WriteString("BORINGSSL_bcm_text_hash:\n")
1482	for _, b := range fipscommon.UninitHashValue {
1483		w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n")
1484	}
1485
1486	return nil
1487}
1488
1489// preprocess runs source through the C preprocessor.
1490func preprocess(cppCommand []string, path string) ([]byte, error) {
1491	var args []string
1492	args = append(args, cppCommand...)
1493	args = append(args, path)
1494
1495	cpp := exec.Command(args[0], args[1:]...)
1496	cpp.Stderr = os.Stderr
1497	var result bytes.Buffer
1498	cpp.Stdout = &result
1499
1500	if err := cpp.Run(); err != nil {
1501		return nil, err
1502	}
1503
1504	return result.Bytes(), nil
1505}
1506
1507func parseInputs(inputs []inputFile, cppCommand []string) error {
1508	for i, input := range inputs {
1509		var contents string
1510
1511		if input.isArchive {
1512			arFile, err := os.Open(input.path)
1513			if err != nil {
1514				return err
1515			}
1516			defer arFile.Close()
1517
1518			ar, err := ar.ParseAR(arFile)
1519			if err != nil {
1520				return err
1521			}
1522
1523			if len(ar) != 1 {
1524				return fmt.Errorf("expected one file in archive, but found %d", len(ar))
1525			}
1526
1527			for _, c := range ar {
1528				contents = string(c)
1529			}
1530		} else {
1531			var inBytes []byte
1532			var err error
1533
1534			if len(cppCommand) > 0 {
1535				inBytes, err = preprocess(cppCommand, input.path)
1536			} else {
1537				inBytes, err = os.ReadFile(input.path)
1538			}
1539			if err != nil {
1540				return err
1541			}
1542
1543			contents = string(inBytes)
1544		}
1545
1546		asm := Asm{Buffer: contents, Pretty: true}
1547		asm.Init()
1548		if err := asm.Parse(); err != nil {
1549			return fmt.Errorf("error while parsing %q: %s", input.path, err)
1550		}
1551		ast := asm.AST()
1552
1553		inputs[i].contents = contents
1554		inputs[i].ast = ast
1555	}
1556
1557	return nil
1558}
1559
1560// includePathFromHeaderFilePath returns an include directory path based on the
1561// path of a specific header file. It walks up the path and assumes that the
1562// include files are rooted in a directory called "openssl".
1563func includePathFromHeaderFilePath(path string) (string, error) {
1564	dir := path
1565	for {
1566		var file string
1567		dir, file = filepath.Split(dir)
1568
1569		if file == "openssl" {
1570			return dir, nil
1571		}
1572
1573		if len(dir) == 0 {
1574			break
1575		}
1576		dir = dir[:len(dir)-1]
1577	}
1578
1579	return "", fmt.Errorf("failed to find 'openssl' path element in header file path %q", path)
1580}
1581
1582func main() {
1583	// The .a file, if given, is expected to be an archive of textual
1584	// assembly sources. That's odd, but CMake really wants to create
1585	// archive files so it's the only way that we can make it work.
1586	arInput := flag.String("a", "", "Path to a .a file containing assembly sources")
1587	outFile := flag.String("o", "", "Path to output assembly")
1588	ccPath := flag.String("cc", "", "Path to the C compiler for preprocessing inputs")
1589	ccFlags := flag.String("cc-flags", "", "Flags for the C compiler when preprocessing")
1590
1591	flag.Parse()
1592
1593	if len(*outFile) == 0 {
1594		fmt.Fprintf(os.Stderr, "Must give argument to -o.\n")
1595		os.Exit(1)
1596	}
1597
1598	var inputs []inputFile
1599	if len(*arInput) > 0 {
1600		inputs = append(inputs, inputFile{
1601			path:      *arInput,
1602			index:     0,
1603			isArchive: true,
1604		})
1605	}
1606
1607	includePaths := make(map[string]struct{})
1608
1609	for i, path := range flag.Args() {
1610		if len(path) == 0 {
1611			continue
1612		}
1613
1614		// Header files are not processed but their path is remembered
1615		// and passed as -I arguments when invoking the preprocessor.
1616		if strings.HasSuffix(path, ".h") {
1617			dir, err := includePathFromHeaderFilePath(path)
1618			if err != nil {
1619				fmt.Fprintf(os.Stderr, "%s\n", err)
1620				os.Exit(1)
1621			}
1622			includePaths[dir] = struct{}{}
1623			continue
1624		}
1625
1626		inputs = append(inputs, inputFile{
1627			path:  path,
1628			index: i + 1,
1629		})
1630	}
1631
1632	var cppCommand []string
1633	if len(*ccPath) > 0 {
1634		cppCommand = append(cppCommand, *ccPath)
1635		cppCommand = append(cppCommand, strings.Fields(*ccFlags)...)
1636		// Some of ccFlags might be superfluous when running the
1637		// preprocessor, but we don't want the compiler complaining that
1638		// "argument unused during compilation".
1639		cppCommand = append(cppCommand, "-Wno-unused-command-line-argument")
1640		// We are preprocessing for assembly output and need to simulate that
1641		// environment for arm_arch.h.
1642		cppCommand = append(cppCommand, "-D__ASSEMBLER__=1")
1643
1644		for includePath := range includePaths {
1645			cppCommand = append(cppCommand, "-I"+includePath)
1646		}
1647
1648		// -E requests only preprocessing.
1649		cppCommand = append(cppCommand, "-E")
1650	}
1651
1652	if err := parseInputs(inputs, cppCommand); err != nil {
1653		fmt.Fprintf(os.Stderr, "%s\n", err)
1654		os.Exit(1)
1655	}
1656
1657	out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
1658	if err != nil {
1659		panic(err)
1660	}
1661	defer out.Close()
1662
1663	if err := transform(out, inputs); err != nil {
1664		fmt.Fprintf(os.Stderr, "%s\n", err)
1665		os.Exit(1)
1666	}
1667}
1668
1669func forEachPath(node *node32, cb func(*node32), rules ...pegRule) {
1670	if node == nil {
1671		return
1672	}
1673
1674	if len(rules) == 0 {
1675		cb(node)
1676		return
1677	}
1678
1679	rule := rules[0]
1680	childRules := rules[1:]
1681
1682	for ; node != nil; node = node.next {
1683		if node.pegRule != rule {
1684			continue
1685		}
1686
1687		if len(childRules) == 0 {
1688			cb(node)
1689		} else {
1690			forEachPath(node.up, cb, childRules...)
1691		}
1692	}
1693}
1694
1695func skipNodes(node *node32, ruleToSkip pegRule) *node32 {
1696	for ; node != nil && node.pegRule == ruleToSkip; node = node.next {
1697	}
1698	return node
1699}
1700
1701func skipWS(node *node32) *node32 {
1702	return skipNodes(node, ruleWS)
1703}
1704
1705func assertNodeType(node *node32, expected pegRule) {
1706	if rule := node.pegRule; rule != expected {
1707		panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected]))
1708	}
1709}
1710
1711type wrapperFunc func(func())
1712
1713type wrapperStack []wrapperFunc
1714
1715func (w *wrapperStack) do(baseCase func()) {
1716	if len(*w) == 0 {
1717		baseCase()
1718		return
1719	}
1720
1721	wrapper := (*w)[0]
1722	*w = (*w)[1:]
1723	wrapper(func() { w.do(baseCase) })
1724}
1725
1726// localTargetName returns the name of the local target label for a global
1727// symbol named name.
1728func localTargetName(name string) string {
1729	return ".L" + name + "_local_target"
1730}
1731
1732func isSynthesized(symbol string) bool {
1733	return strings.HasSuffix(symbol, "_bss_get") ||
1734		symbol == "OPENSSL_ia32cap_get" ||
1735		strings.HasPrefix(symbol, "BORINGSSL_bcm_text_")
1736}
1737
1738func redirectorName(symbol string) string {
1739	return "bcm_redirector_" + symbol
1740}
1741
1742// sectionType returns the type of a section. I.e. a section called “.text.foo”
1743// is a “.text” section.
1744func sectionType(section string) (string, bool) {
1745	if len(section) == 0 || section[0] != '.' {
1746		return "", false
1747	}
1748
1749	i := strings.Index(section[1:], ".")
1750	if i != -1 {
1751		section = section[:i+1]
1752	}
1753
1754	if strings.HasPrefix(section, ".debug_") {
1755		return ".debug", true
1756	}
1757
1758	return section, true
1759}
1760
1761// accessorName returns the name of the accessor function for a BSS symbol
1762// named name.
1763func accessorName(name string) string {
1764	return name + "_bss_get"
1765}
1766
1767func (d *delocation) mapLocalSymbol(symbol string) string {
1768	if d.currentInput.index == 0 {
1769		return symbol
1770	}
1771	return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index)
1772}
1773
1774func detectProcessor(input inputFile) processorType {
1775	for statement := input.ast.up; statement != nil; statement = statement.next {
1776		node := skipNodes(statement.up, ruleWS)
1777		if node == nil || node.pegRule != ruleInstruction {
1778			continue
1779		}
1780
1781		instruction := node.up
1782		instructionName := input.contents[instruction.begin:instruction.end]
1783
1784		switch instructionName {
1785		case "movq", "call", "leaq":
1786			return x86_64
1787		case "str", "bl", "ldr", "st1":
1788			return aarch64
1789		}
1790	}
1791
1792	panic("processed entire input and didn't recognise any instructions.")
1793}
1794
1795func sortedSet(m map[string]struct{}) []string {
1796	ret := make([]string, 0, len(m))
1797	for key := range m {
1798		ret = append(ret, key)
1799	}
1800	sort.Strings(ret)
1801	return ret
1802}
1803