• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright (c) 2017, Google Inc.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// delocate performs several transformations of textual assembly code. See
16// crypto/fipsmodule/FIPS.md for an overview.
17package main
18
19import (
20	"bytes"
21	"errors"
22	"flag"
23	"fmt"
24	"os"
25	"os/exec"
26	"path/filepath"
27	"sort"
28	"strconv"
29	"strings"
30
31	"boringssl.googlesource.com/boringssl/util/ar"
32	"boringssl.googlesource.com/boringssl/util/fipstools/fipscommon"
33)
34
35// inputFile represents a textual assembly file.
36type inputFile struct {
37	path string
38	// index is a unique identifer given to this file. It's used for
39	// mapping local symbols.
40	index int
41	// isArchive indicates that the input should be processed as an ar
42	// file.
43	isArchive bool
44	// contents contains the contents of the file.
45	contents string
46	// ast points to the head of the syntax tree.
47	ast *node32
48}
49
50type stringWriter interface {
51	WriteString(string) (int, error)
52}
53
54type processorType int
55
56const (
57	x86_64 processorType = iota + 1
58	aarch64
59)
60
61// delocation holds the state needed during a delocation operation.
62type delocation struct {
63	processor processorType
64	output    stringWriter
65	// commentIndicator starts a comment, e.g. "//" or "#"
66	commentIndicator string
67
68	// symbols is the set of symbols defined in the module.
69	symbols map[string]struct{}
70	// redirectors maps from out-call symbol name to the name of a
71	// redirector function for that symbol. E.g. “memcpy” ->
72	// “bcm_redirector_memcpy”.
73	redirectors map[string]string
74	// bssAccessorsNeeded maps from a BSS symbol name to the symbol that
75	// should be used to reference it. E.g. “P384_data_storage” ->
76	// “P384_data_storage”.
77	bssAccessorsNeeded map[string]string
78	// gotExternalsNeeded is a set of symbol names for which we need
79	// “delta” symbols: symbols that contain the offset from their location
80	// to the memory in question.
81	gotExternalsNeeded map[string]struct{}
82	// gotDeltaNeeded is true if the code needs to load the value of
83	// _GLOBAL_OFFSET_TABLE_.
84	gotDeltaNeeded bool
85	// gotOffsetsNeeded contains the symbols whose @GOT offsets are needed.
86	gotOffsetsNeeded map[string]struct{}
87	// gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed.
88	gotOffOffsetsNeeded map[string]struct{}
89
90	currentInput inputFile
91}
92
93func (d *delocation) contents(node *node32) string {
94	return d.currentInput.contents[node.begin:node.end]
95}
96
97// writeNode writes out an AST node.
98func (d *delocation) writeNode(node *node32) {
99	if _, err := d.output.WriteString(d.contents(node)); err != nil {
100		panic(err)
101	}
102}
103
104func (d *delocation) writeCommentedNode(node *node32) {
105	line := d.contents(node)
106	if _, err := d.output.WriteString(d.commentIndicator + " WAS " + strings.TrimSpace(line) + "\n"); err != nil {
107		panic(err)
108	}
109}
110
111func locateError(err error, with *node32, in inputFile) error {
112	posMap := translatePositions([]rune(in.contents), []int{int(with.begin)})
113	var line int
114	for _, pos := range posMap {
115		line = pos.line
116	}
117
118	return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err)
119}
120
121func (d *delocation) processInput(input inputFile) (err error) {
122	d.currentInput = input
123
124	var origStatement *node32
125	defer func() {
126		if err := recover(); err != nil {
127			panic(locateError(fmt.Errorf("%s", err), origStatement, input))
128		}
129	}()
130
131	for statement := input.ast.up; statement != nil; statement = statement.next {
132		assertNodeType(statement, ruleStatement)
133		origStatement = statement
134
135		node := skipWS(statement.up)
136		if node == nil {
137			d.writeNode(statement)
138			continue
139		}
140
141		switch node.pegRule {
142		case ruleGlobalDirective, ruleComment, ruleLocationDirective:
143			d.writeNode(statement)
144		case ruleDirective:
145			statement, err = d.processDirective(statement, node.up)
146		case ruleLabelContainingDirective:
147			statement, err = d.processLabelContainingDirective(statement, node.up)
148		case ruleLabel:
149			statement, err = d.processLabel(statement, node.up)
150		case ruleInstruction:
151			switch d.processor {
152			case x86_64:
153				statement, err = d.processIntelInstruction(statement, node.up)
154			case aarch64:
155				statement, err = d.processAarch64Instruction(statement, node.up)
156			default:
157				panic("unknown processor")
158			}
159		default:
160			panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule]))
161		}
162
163		if err != nil {
164			return locateError(err, origStatement, input)
165		}
166	}
167
168	return nil
169}
170
171func (d *delocation) processDirective(statement, directive *node32) (*node32, error) {
172	assertNodeType(directive, ruleDirectiveName)
173	directiveName := d.contents(directive)
174
175	var args []string
176	forEachPath(directive, func(arg *node32) {
177		// If the argument is a quoted string, use the raw contents.
178		// (Note that this doesn't unescape the string, but that's not
179		// needed so far.
180		if arg.up != nil {
181			arg = arg.up
182			assertNodeType(arg, ruleQuotedArg)
183			if arg.up == nil {
184				args = append(args, "")
185				return
186			}
187			arg = arg.up
188			assertNodeType(arg, ruleQuotedText)
189		}
190		args = append(args, d.contents(arg))
191	}, ruleArgs, ruleArg)
192
193	switch directiveName {
194	case "comm", "lcomm":
195		if len(args) < 1 {
196			return nil, errors.New("comm directive has no arguments")
197		}
198		d.bssAccessorsNeeded[args[0]] = args[0]
199		d.writeNode(statement)
200
201	case "data":
202		// ASAN and some versions of MSAN are adding a .data section,
203		// and adding references to symbols within it to the code. We
204		// will have to work around this in the future.
205		return nil, errors.New(".data section found in module")
206
207	case "section":
208		section := args[0]
209
210		if section == ".data.rel.ro" {
211			// In a normal build, this is an indication of a
212			// problem but any references from the module to this
213			// section will result in a relocation and thus will
214			// break the integrity check. ASAN can generate these
215			// sections and so we will likely have to work around
216			// that in the future.
217			return nil, errors.New(".data.rel.ro section found in module")
218		}
219
220		sectionType, ok := sectionType(section)
221		if !ok {
222			// Unknown sections are permitted in order to be robust
223			// to different compiler modes.
224			d.writeNode(statement)
225			break
226		}
227
228		switch sectionType {
229		case ".rodata", ".text":
230			// Move .rodata to .text so it may be accessed without
231			// a relocation. GCC with -fmerge-constants will place
232			// strings into separate sections, so we move all
233			// sections named like .rodata. Also move .text.startup
234			// so the self-test function is also in the module.
235			d.writeCommentedNode(statement)
236			d.output.WriteString(".text\n")
237
238		case ".data":
239			// See above about .data
240			return nil, errors.New(".data section found in module")
241
242		case ".init_array", ".fini_array", ".ctors", ".dtors":
243			// init_array/ctors/dtors contains function
244			// pointers to constructor/destructor
245			// functions. These contain relocations, but
246			// they're in a different section anyway.
247			d.writeNode(statement)
248			break
249
250		case ".debug", ".note":
251			d.writeNode(statement)
252			break
253
254		case ".bss":
255			d.writeNode(statement)
256			return d.handleBSS(statement)
257		}
258
259	default:
260		d.writeNode(statement)
261	}
262
263	return statement, nil
264}
265
266func (d *delocation) processSymbolExpr(expr *node32, b *strings.Builder) bool {
267	changed := false
268	assertNodeType(expr, ruleSymbolExpr)
269
270	for expr != nil {
271		atom := expr.up
272		assertNodeType(atom, ruleSymbolAtom)
273
274		for term := atom.up; term != nil; term = skipWS(term.next) {
275			if term.pegRule == ruleSymbolExpr {
276				changed = d.processSymbolExpr(term, b) || changed
277				continue
278			}
279
280			if term.pegRule != ruleLocalSymbol {
281				b.WriteString(d.contents(term))
282				continue
283			}
284
285			oldSymbol := d.contents(term)
286			newSymbol := d.mapLocalSymbol(oldSymbol)
287			if newSymbol != oldSymbol {
288				changed = true
289			}
290
291			b.WriteString(newSymbol)
292		}
293
294		next := skipWS(atom.next)
295		if next == nil {
296			break
297		}
298		assertNodeType(next, ruleSymbolOperator)
299		b.WriteString(d.contents(next))
300		next = skipWS(next.next)
301		assertNodeType(next, ruleSymbolExpr)
302		expr = next
303	}
304	return changed
305}
306
307func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) {
308	// The symbols within directives need to be mapped so that local
309	// symbols in two different .s inputs don't collide.
310	changed := false
311	assertNodeType(directive, ruleLabelContainingDirectiveName)
312	name := d.contents(directive)
313
314	node := directive.next
315	assertNodeType(node, ruleWS)
316
317	node = node.next
318	assertNodeType(node, ruleSymbolArgs)
319
320	var args []string
321	for node = skipWS(node.up); node != nil; node = skipWS(node.next) {
322		assertNodeType(node, ruleSymbolArg)
323		arg := node.up
324		assertNodeType(arg, ruleSymbolExpr)
325
326		var b strings.Builder
327		changed = d.processSymbolExpr(arg, &b) || changed
328
329		args = append(args, b.String())
330	}
331
332	if !changed {
333		d.writeNode(statement)
334	} else {
335		d.writeCommentedNode(statement)
336		d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n")
337	}
338
339	return statement, nil
340}
341
342func (d *delocation) processLabel(statement, label *node32) (*node32, error) {
343	symbol := d.contents(label)
344
345	switch label.pegRule {
346	case ruleLocalLabel:
347		d.output.WriteString(symbol + ":\n")
348	case ruleLocalSymbol:
349		// symbols need to be mapped so that local symbols from two
350		// different .s inputs don't collide.
351		d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n")
352	case ruleSymbolName:
353		d.output.WriteString(localTargetName(symbol) + ":\n")
354		d.writeNode(statement)
355	default:
356		return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule])
357	}
358
359	return statement, nil
360}
361
362// instructionArgs collects all the arguments to an instruction.
363func instructionArgs(node *node32) (argNodes []*node32) {
364	for node = skipWS(node); node != nil; node = skipWS(node.next) {
365		assertNodeType(node, ruleInstructionArg)
366		argNodes = append(argNodes, node.up)
367	}
368
369	return argNodes
370}
371
372// Aarch64 support
373
374// gotHelperName returns the name of a synthesised function that returns an
375// address from the GOT.
376func gotHelperName(symbol string) string {
377	return ".Lboringssl_loadgot_" + symbol
378}
379
380// loadAarch64Address emits instructions to put the address of |symbol|
381// (optionally adjusted by |offsetStr|) into |targetReg|.
382func (d *delocation) loadAarch64Address(statement *node32, targetReg string, symbol string, offsetStr string) (*node32, error) {
383	// There are two paths here: either the symbol is known to be local in which
384	// case adr is used to get the address (within 1MiB), or a GOT reference is
385	// really needed in which case the code needs to jump to a helper function.
386	//
387	// A helper function is needed because using code appears to be the only way
388	// to load a GOT value. On other platforms we have ".quad foo@GOT" outside of
389	// the module, but on Aarch64 that results in a "COPY" relocation and linker
390	// comments suggest it's a weird hack. So, for each GOT symbol needed, we emit
391	// a function outside of the module that returns the address from the GOT in
392	// x0.
393
394	d.writeCommentedNode(statement)
395
396	_, isKnown := d.symbols[symbol]
397	isLocal := strings.HasPrefix(symbol, ".L")
398	if isKnown || isLocal || isSynthesized(symbol) {
399		if isLocal {
400			symbol = d.mapLocalSymbol(symbol)
401		} else if isKnown {
402			symbol = localTargetName(symbol)
403		}
404
405		d.output.WriteString("\tadr " + targetReg + ", " + symbol + offsetStr + "\n")
406
407		return statement, nil
408	}
409
410	if len(offsetStr) != 0 {
411		panic("non-zero offset for helper-based reference")
412	}
413
414	var helperFunc string
415	if symbol == "OPENSSL_armcap_P" {
416		helperFunc = ".LOPENSSL_armcap_P_addr"
417	} else {
418		// GOT helpers also dereference the GOT entry, thus the subsequent ldr
419		// instruction, which would normally do the dereferencing, needs to be
420		// dropped. GOT helpers have to include the dereference because the
421		// assembler doesn't support ":got_lo12:foo" offsets except in an ldr
422		// instruction.
423		d.gotExternalsNeeded[symbol] = struct{}{}
424		helperFunc = gotHelperName(symbol)
425	}
426
427	// Clear the red-zone. I can't find a definitive answer about whether Linux
428	// Aarch64 includes a red-zone, but Microsoft has a 16-byte one and Apple a
429	// 128-byte one. Thus conservatively clear a 128-byte red-zone.
430	d.output.WriteString("\tsub sp, sp, 128\n")
431
432	// Save x0 (which will be stomped by the return value) and the link register
433	// to the stack. Then save the program counter into the link register and
434	// jump to the helper function.
435	d.output.WriteString("\tstp x0, lr, [sp, #-16]!\n")
436	d.output.WriteString("\tbl " + helperFunc + "\n")
437
438	if targetReg == "x0" {
439		// If the target happens to be x0 then restore the link register from the
440		// stack and send the saved value of x0 to the zero register.
441		d.output.WriteString("\tldp xzr, lr, [sp], #16\n")
442	} else {
443		// Otherwise move the result into place and restore registers.
444		d.output.WriteString("\tmov " + targetReg + ", x0\n")
445		d.output.WriteString("\tldp x0, lr, [sp], #16\n")
446	}
447
448	// Revert the red-zone adjustment.
449	d.output.WriteString("\tadd sp, sp, 128\n")
450
451	return statement, nil
452}
453
454func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) {
455	assertNodeType(instruction, ruleInstructionName)
456	instructionName := d.contents(instruction)
457
458	argNodes := instructionArgs(instruction.next)
459
460	switch instructionName {
461	case "ccmn", "ccmp", "cinc", "cinv", "cneg", "csel", "cset", "csetm", "csinc", "csinv", "csneg":
462		// These functions are special because they take a condition-code name as
463		// an argument and that looks like a symbol reference.
464		d.writeNode(statement)
465		return statement, nil
466
467	case "mrs":
468		// Functions that take special register names also look like a symbol
469		// reference to the parser.
470		d.writeNode(statement)
471		return statement, nil
472
473	case "adrp":
474		// adrp always generates a relocation, even when the target symbol is in the
475		// same segment, because the page-offset of the code isn't known until link
476		// time. Thus adrp instructions are turned into either adr instructions
477		// (limiting the module to 1MiB offsets) or calls to helper functions, both of
478		// which load the full address. Later instructions, which add the low 12 bits
479		// of offset, are tweaked to remove the offset since it's already included.
480		// Loads of GOT symbols are slightly more complex because it's not possible to
481		// avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr
482		// instruction, which would normally do the dereferencing, is dropped
483		// completely. (Or turned into a mov if it targets a different register.)
484		assertNodeType(argNodes[0], ruleRegisterOrConstant)
485		targetReg := d.contents(argNodes[0])
486		if !strings.HasPrefix(targetReg, "x") {
487			panic("adrp targetting register " + targetReg + ", which has the wrong size")
488		}
489
490		var symbol, offset string
491		switch argNodes[1].pegRule {
492		case ruleGOTSymbolOffset:
493			symbol = d.contents(argNodes[1].up)
494		case ruleMemoryRef:
495			assertNodeType(argNodes[1].up, ruleSymbolRef)
496			node, empty := d.gatherOffsets(argNodes[1].up.up, "")
497			if len(empty) != 0 {
498				panic("prefix offsets found for adrp")
499			}
500			symbol = d.contents(node)
501			_, offset = d.gatherOffsets(node.next, "")
502		default:
503			panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule])
504		}
505
506		return d.loadAarch64Address(statement, targetReg, symbol, offset)
507	}
508
509	var args []string
510	changed := false
511
512	for _, arg := range argNodes {
513		fullArg := arg
514
515		switch arg.pegRule {
516		case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak:
517			args = append(args, d.contents(fullArg))
518
519		case ruleGOTSymbolOffset:
520			// These should only be arguments to adrp and thus unreachable.
521			panic("unreachable")
522
523		case ruleMemoryRef:
524			ref := arg.up
525
526			switch ref.pegRule {
527			case ruleSymbolRef:
528				// This is a branch. Either the target needs to be written to a local
529				// version of the symbol to ensure that no relocations are emitted, or
530				// it needs to jump to a redirector function.
531				symbol, offset, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up)
532				changed = didChange
533
534				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
535					symbol = localTargetName(symbol)
536					changed = true
537				} else if !symbolIsLocal && !isSynthesized(symbol) {
538					redirector := redirectorName(symbol)
539					d.redirectors[symbol] = redirector
540					symbol = redirector
541					changed = true
542				} else if didChange && symbolIsLocal && len(offset) > 0 {
543					// didChange is set when the inputFile index is not 0; which is the index of the
544					// first file copied to the output, which is the generated assembly of bcm.c.
545					// In subsequently copied assembly files, local symbols are changed by appending (BCM_ + index)
546					// in order to ensure they don't collide. `index` gets incremented per file.
547					// If there is offset after the symbol, append the `offset`.
548					symbol = symbol + offset
549				}
550
551				args = append(args, symbol)
552
553			case ruleARMBaseIndexScale:
554				parts := ref.up
555				assertNodeType(parts, ruleARMRegister)
556				baseAddrReg := d.contents(parts)
557				parts = skipWS(parts.next)
558
559				// Only two forms need special handling. First there's memory references
560				// like "[x*, :got_lo12:foo]". The base register here will have been the
561				// target of an adrp instruction to load the page address, but the adrp
562				// will have turned into loading the full address *and dereferencing it*,
563				// above. Thus this instruction needs to be dropped otherwise we'll be
564				// dereferencing twice.
565				//
566				// Second there are forms like "[x*, :lo12:foo]" where the code has used
567				// adrp to load the page address into x*. That adrp will have been turned
568				// into loading the full address so just the offset needs to be dropped.
569
570				if parts != nil {
571					if parts.pegRule == ruleARMGOTLow12 {
572						if instructionName != "ldr" {
573							panic("Symbol reference outside of ldr instruction")
574						}
575
576						if skipWS(parts.next) != nil || parts.up.next != nil {
577							panic("can't handle tweak or post-increment with symbol references")
578						}
579
580						// The GOT helper already dereferenced the entry so, at most, just a mov
581						// is needed to put things in the right register.
582						d.writeCommentedNode(statement)
583						if baseAddrReg != args[0] {
584							d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n")
585						}
586						return statement, nil
587					} else if parts.pegRule == ruleLow12BitsSymbolRef {
588						if instructionName != "ldr" {
589							panic("Symbol reference outside of ldr instruction")
590						}
591
592						if skipWS(parts.next) != nil || parts.up.next != nil {
593							panic("can't handle tweak or post-increment with symbol references")
594						}
595
596						// Suppress the offset; adrp loaded the full address.
597						args = append(args, "["+baseAddrReg+"]")
598						changed = true
599						continue
600					}
601				}
602
603				args = append(args, d.contents(fullArg))
604
605			case ruleLow12BitsSymbolRef:
606				// These are the second instruction in a pair:
607				//   adrp x0, symbol           // Load the page address into x0
608				//   add x1, x0, :lo12:symbol  // Adds the page offset.
609				//
610				// The adrp instruction will have been turned into a sequence that loads
611				// the full address, above, thus the offset is turned into zero. If that
612				// results in the instruction being a nop, then it is deleted.
613				if instructionName != "add" {
614					panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName))
615				}
616
617				if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") {
618					panic("address arithmetic with incorrectly sized register")
619				}
620
621				if args[0] == args[1] {
622					d.writeCommentedNode(statement)
623					return statement, nil
624				}
625
626				args = append(args, "#0")
627				changed = true
628
629			default:
630				panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule]))
631			}
632
633		default:
634			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
635		}
636	}
637
638	if changed {
639		d.writeCommentedNode(statement)
640		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
641		d.output.WriteString(replacement)
642	} else {
643		d.writeNode(statement)
644	}
645
646	return statement, nil
647}
648
649func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) {
650	for symRef != nil && symRef.pegRule == ruleOffset {
651		offset := d.contents(symRef)
652		if offset[0] != '+' && offset[0] != '-' {
653			offset = "+" + offset
654		}
655		offsets = offsets + offset
656		symRef = symRef.next
657	}
658	return symRef, offsets
659}
660
661func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) {
662	if memRef.pegRule != ruleSymbolRef {
663		return "", "", "", false, false, memRef
664	}
665
666	symRef := memRef.up
667	nextRef = memRef.next
668
669	// (Offset* '+')?
670	symRef, offset = d.gatherOffsets(symRef, offset)
671
672	// (LocalSymbol / SymbolName)
673	symbol = d.contents(symRef)
674	if symRef.pegRule == ruleLocalSymbol {
675		symbolIsLocal = true
676		mapped := d.mapLocalSymbol(symbol)
677		if mapped != symbol {
678			symbol = mapped
679			didChange = true
680		}
681	}
682	symRef = symRef.next
683
684	// Offset*
685	symRef, offset = d.gatherOffsets(symRef, offset)
686
687	// ('@' Section / Offset*)?
688	if symRef != nil {
689		assertNodeType(symRef, ruleSection)
690		section = d.contents(symRef)
691		symRef = symRef.next
692
693		symRef, offset = d.gatherOffsets(symRef, offset)
694	}
695
696	if symRef != nil {
697		panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule]))
698	}
699
700	return
701}
702
703/* Intel */
704
705type instructionType int
706
707const (
708	instrPush instructionType = iota
709	instrMove
710	// instrTransformingMove is essentially a move, but it performs some
711	// transformation of the data during the process.
712	instrTransformingMove
713	instrJump
714	instrConditionalMove
715	// instrCombine merges the source and destination in some fashion, for example
716	// a 2-operand bitwise operation.
717	instrCombine
718	// instrMemoryVectorCombine is similer to instrCombine, but the source
719	// register must be a memory reference and the destination register
720	// must be a vector register.
721	instrMemoryVectorCombine
722	// instrThreeArg merges two sources into a destination in some fashion.
723	instrThreeArg
724	// instrCompare takes two arguments and writes outputs to the flags register.
725	instrCompare
726	instrOther
727)
728
729func classifyInstruction(instr string, args []*node32) instructionType {
730	switch instr {
731	case "push", "pushq":
732		if len(args) == 1 {
733			return instrPush
734		}
735
736	case "mov", "movq", "vmovq", "movsd", "vmovsd":
737		if len(args) == 2 {
738			return instrMove
739		}
740
741	case "cmovneq", "cmoveq":
742		if len(args) == 2 {
743			return instrConditionalMove
744		}
745
746	case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo":
747		if len(args) == 1 {
748			return instrJump
749		}
750
751	case "orq", "andq", "xorq":
752		if len(args) == 2 {
753			return instrCombine
754		}
755
756	case "cmpq":
757		if len(args) == 2 {
758			return instrCompare
759		}
760
761	case "sarxq", "shlxq", "shrxq":
762		if len(args) == 3 {
763			return instrThreeArg
764		}
765
766	case "vpbroadcastq":
767		if len(args) == 2 {
768			return instrTransformingMove
769		}
770
771	case "movlps", "movhps":
772		if len(args) == 2 {
773			return instrMemoryVectorCombine
774		}
775	}
776
777	return instrOther
778}
779
780func push(w stringWriter) wrapperFunc {
781	return func(k func()) {
782		w.WriteString("\tpushq %rax\n")
783		k()
784		w.WriteString("\txchg %rax, (%rsp)\n")
785	}
786}
787
788func compare(w stringWriter, instr, a, b string) wrapperFunc {
789	return func(k func()) {
790		k()
791		w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b))
792	}
793}
794
795func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc {
796	d.gotExternalsNeeded[symbol+"@"+section] = struct{}{}
797
798	return func(k func()) {
799		if !redzoneCleared {
800			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
801		}
802		w.WriteString("\tpushf\n")
803		w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination))
804		w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination))
805		w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination))
806		w.WriteString("\tpopf\n")
807		if !redzoneCleared {
808			w.WriteString("\tleaq\t128(%rsp), %rsp\n")
809		}
810	}
811}
812
813func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc {
814	return func(k func()) {
815		if !redzoneCleared {
816			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
817			defer w.WriteString("\tleaq 128(%rsp), %rsp\n")
818		}
819		w.WriteString("\tpushfq\n")
820		k()
821		w.WriteString("\tpopfq\n")
822	}
823}
824
825func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) {
826	candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"}
827
828	var reg string
829NextCandidate:
830	for _, candidate := range candidates {
831		for _, avoid := range avoidRegs {
832			if candidate == avoid {
833				continue NextCandidate
834			}
835		}
836
837		reg = candidate
838		break
839	}
840
841	if len(reg) == 0 {
842		panic("too many excluded registers")
843	}
844
845	return func(k func()) {
846		w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
847		w.WriteString("\tpushq " + reg + "\n")
848		k()
849		w.WriteString("\tpopq " + reg + "\n")
850		w.WriteString("\tleaq 128(%rsp), %rsp\n")
851	}, reg
852}
853
854func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc {
855	return func(k func()) {
856		k()
857		prefix := ""
858		if isAVX {
859			prefix = "v"
860		}
861		w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n")
862	}
863}
864
865func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc {
866	return func(k func()) {
867		k()
868		w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n")
869	}
870}
871
872func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
873	return func(k func()) {
874		k()
875		w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n")
876	}
877}
878
879func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc {
880	return func(k func()) {
881		k()
882		w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n")
883	}
884}
885
886func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc {
887	return func(k func()) {
888		k()
889		// These instructions can only read from memory, so push
890		// tempReg and read from the stack. Note we assume the red zone
891		// was previously cleared by saveRegister().
892		w.WriteString("\tpushq " + source + "\n")
893		w.WriteString("\t" + instructionName + " (%rsp), " + dest + "\n")
894		w.WriteString("\tleaq 8(%rsp), %rsp\n")
895	}
896}
897
898func isValidLEATarget(reg string) bool {
899	return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm")
900}
901
902func undoConditionalMove(w stringWriter, instr string) wrapperFunc {
903	var invertedCondition string
904
905	switch instr {
906	case "cmoveq":
907		invertedCondition = "ne"
908	case "cmovneq":
909		invertedCondition = "e"
910	default:
911		panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr))
912	}
913
914	return func(k func()) {
915		w.WriteString("\tj" + invertedCondition + " 999f\n")
916		k()
917		w.WriteString("999:\n")
918	}
919}
920
921func (d *delocation) isRIPRelative(node *node32) bool {
922	return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)"
923}
924
925func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) {
926	assertNodeType(instruction, ruleInstructionName)
927	instructionName := d.contents(instruction)
928
929	argNodes := instructionArgs(instruction.next)
930
931	var wrappers wrapperStack
932	var args []string
933	changed := false
934
935Args:
936	for i, arg := range argNodes {
937		fullArg := arg
938		isIndirect := false
939
940		if arg.pegRule == ruleIndirectionIndicator {
941			arg = arg.next
942			isIndirect = true
943		}
944
945		switch arg.pegRule {
946		case ruleRegisterOrConstant, ruleLocalLabelRef:
947			args = append(args, d.contents(fullArg))
948
949		case ruleMemoryRef:
950			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
951			changed = didChange
952
953			if symbol == "OPENSSL_ia32cap_P" && section == "" {
954				if instructionName != "leaq" {
955					return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName)
956				}
957
958				if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 {
959					return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName)
960				}
961
962				target := argNodes[1]
963				assertNodeType(target, ruleRegisterOrConstant)
964				reg := d.contents(target)
965
966				if !strings.HasPrefix(reg, "%r") {
967					return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg)
968				}
969
970				changed = true
971
972				// Flag-altering instructions (i.e. addq) are going to be used so the
973				// flags need to be preserved.
974				wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */))
975
976				wrappers = append(wrappers, func(k func()) {
977					d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n")
978					d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n")
979				})
980
981				break Args
982			}
983
984			switch section {
985			case "":
986				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
987					symbol = localTargetName(symbol)
988					changed = true
989				}
990
991			case "PLT":
992				if classifyInstruction(instructionName, argNodes) != instrJump {
993					return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName)
994				}
995
996				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
997					symbol = localTargetName(symbol)
998					changed = true
999				} else if !symbolIsLocal && !isSynthesized(symbol) {
1000					// Unknown symbol via PLT is an
1001					// out-call from the module, e.g.
1002					// memcpy.
1003					d.redirectors[symbol+"@"+section] = redirectorName(symbol)
1004					symbol = redirectorName(symbol)
1005				}
1006
1007				changed = true
1008
1009			case "GOTPCREL":
1010				if len(offset) > 0 {
1011					return nil, errors.New("loading from GOT with offset is unsupported")
1012				}
1013				if !d.isRIPRelative(memRef) {
1014					return nil, errors.New("GOT access must be IP-relative")
1015				}
1016
1017				useGOT := false
1018				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
1019					symbol = localTargetName(symbol)
1020					changed = true
1021				} else if !isSynthesized(symbol) {
1022					useGOT = true
1023				}
1024
1025				classification := classifyInstruction(instructionName, argNodes)
1026				if classification != instrThreeArg && classification != instrCompare && i != 0 {
1027					return nil, errors.New("GOT access must be source operand")
1028				}
1029
1030				// Reduce the instruction to movq symbol@GOTPCREL, targetReg.
1031				var targetReg string
1032				var redzoneCleared bool
1033				switch classification {
1034				case instrPush:
1035					wrappers = append(wrappers, push(d.output))
1036					targetReg = "%rax"
1037				case instrConditionalMove:
1038					wrappers = append(wrappers, undoConditionalMove(d.output, instructionName))
1039					fallthrough
1040				case instrMove:
1041					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1042					targetReg = d.contents(argNodes[1])
1043				case instrCompare:
1044					otherSource := d.contents(argNodes[i^1])
1045					saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource})
1046					redzoneCleared = true
1047					wrappers = append(wrappers, saveRegWrapper)
1048					if i == 0 {
1049						wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource))
1050					} else {
1051						wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg))
1052					}
1053					targetReg = tempReg
1054				case instrTransformingMove:
1055					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1056					targetReg = d.contents(argNodes[1])
1057					wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg))
1058					if isValidLEATarget(targetReg) {
1059						return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.")
1060					}
1061				case instrCombine:
1062					targetReg = d.contents(argNodes[1])
1063					if !isValidLEATarget(targetReg) {
1064						return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers")
1065					}
1066					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg})
1067					redzoneCleared = true
1068					wrappers = append(wrappers, saveRegWrapper)
1069
1070					wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg))
1071					targetReg = tempReg
1072				case instrMemoryVectorCombine:
1073					assertNodeType(argNodes[1], ruleRegisterOrConstant)
1074					targetReg = d.contents(argNodes[1])
1075					if isValidLEATarget(targetReg) {
1076						return nil, errors.New("target register must be an XMM register")
1077					}
1078					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1079					wrappers = append(wrappers, saveRegWrapper)
1080					redzoneCleared = true
1081					wrappers = append(wrappers, memoryVectorCombineOp(d.output, instructionName, tempReg, targetReg))
1082					targetReg = tempReg
1083				case instrThreeArg:
1084					if n := len(argNodes); n != 3 {
1085						return nil, fmt.Errorf("three-argument instruction has %d arguments", n)
1086					}
1087					if i != 0 && i != 1 {
1088						return nil, errors.New("GOT access must be from source operand")
1089					}
1090					targetReg = d.contents(argNodes[2])
1091
1092					otherSource := d.contents(argNodes[1])
1093					if i == 1 {
1094						otherSource = d.contents(argNodes[0])
1095					}
1096
1097					saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource})
1098					redzoneCleared = true
1099					wrappers = append(wrappers, saveRegWrapper)
1100
1101					if i == 0 {
1102						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg))
1103					} else {
1104						wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg))
1105					}
1106					targetReg = tempReg
1107				default:
1108					return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName)
1109				}
1110
1111				if !isValidLEATarget(targetReg) {
1112					// Sometimes the compiler will load from the GOT to an
1113					// XMM register, which is not a valid target of an LEA
1114					// instruction.
1115					saveRegWrapper, tempReg := saveRegister(d.output, nil)
1116					wrappers = append(wrappers, saveRegWrapper)
1117					isAVX := strings.HasPrefix(instructionName, "v")
1118					wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg))
1119					targetReg = tempReg
1120					if redzoneCleared {
1121						return nil, fmt.Errorf("internal error: Red Zone was already cleared")
1122					}
1123					redzoneCleared = true
1124				}
1125
1126				if symbol == "OPENSSL_ia32cap_P" {
1127					// Flag-altering instructions (i.e. addq) are going to be used so the
1128					// flags need to be preserved.
1129					wrappers = append(wrappers, saveFlags(d.output, redzoneCleared))
1130					wrappers = append(wrappers, func(k func()) {
1131						d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n")
1132						d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n")
1133					})
1134				} else if useGOT {
1135					wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared))
1136				} else {
1137					wrappers = append(wrappers, func(k func()) {
1138						d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg))
1139					})
1140				}
1141				changed = true
1142				break Args
1143
1144			default:
1145				return nil, fmt.Errorf("Unknown section type %q", section)
1146			}
1147
1148			if !changed && len(section) > 0 {
1149				panic("section was not handled")
1150			}
1151			section = ""
1152
1153			argStr := ""
1154			if isIndirect {
1155				argStr += "*"
1156			}
1157			argStr += symbol
1158			argStr += offset
1159
1160			for ; memRef != nil; memRef = memRef.next {
1161				argStr += d.contents(memRef)
1162			}
1163
1164			args = append(args, argStr)
1165
1166		case ruleGOTLocation:
1167			if instructionName != "movabsq" {
1168				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq")
1169			}
1170			if i != 0 || len(argNodes) != 2 {
1171				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form")
1172			}
1173
1174			d.gotDeltaNeeded = true
1175			changed = true
1176			instructionName = "movq"
1177			assertNodeType(arg.up, ruleLocalSymbol)
1178			baseSymbol := d.mapLocalSymbol(d.contents(arg.up))
1179			targetReg := d.contents(argNodes[1])
1180			args = append(args, ".Lboringssl_got_delta(%rip)")
1181			wrappers = append(wrappers, func(k func()) {
1182				k()
1183				d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg))
1184			})
1185
1186		case ruleGOTSymbolOffset:
1187			if instructionName != "movabsq" {
1188				return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq")
1189			}
1190			if i != 0 || len(argNodes) != 2 {
1191				return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form")
1192			}
1193
1194			assertNodeType(arg.up, ruleSymbolName)
1195			symbol := d.contents(arg.up)
1196			if strings.HasPrefix(symbol, ".L") {
1197				symbol = d.mapLocalSymbol(symbol)
1198			}
1199			targetReg := d.contents(argNodes[1])
1200
1201			var prefix string
1202			isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF")
1203			if isGOTOFF {
1204				prefix = "gotoff"
1205				d.gotOffOffsetsNeeded[symbol] = struct{}{}
1206			} else {
1207				prefix = "got"
1208				d.gotOffsetsNeeded[symbol] = struct{}{}
1209			}
1210			changed = true
1211
1212			wrappers = append(wrappers, func(k func()) {
1213				// Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time
1214				// of writing) emits 64-bit relocations anyway, so the following four bytes
1215				// get stomped. Thus we use 64-bit offsets.
1216				d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg))
1217			})
1218
1219		default:
1220			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
1221		}
1222	}
1223
1224	if changed {
1225		d.writeCommentedNode(statement)
1226		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
1227		wrappers.do(func() {
1228			d.output.WriteString(replacement)
1229		})
1230	} else {
1231		d.writeNode(statement)
1232	}
1233
1234	return statement, nil
1235}
1236
1237func (d *delocation) handleBSS(statement *node32) (*node32, error) {
1238	lastStatement := statement
1239	for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next {
1240		node := skipWS(statement.up)
1241		if node == nil {
1242			d.writeNode(statement)
1243			continue
1244		}
1245
1246		switch node.pegRule {
1247		case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective:
1248			d.writeNode(statement)
1249
1250		case ruleDirective:
1251			directive := node.up
1252			assertNodeType(directive, ruleDirectiveName)
1253			directiveName := d.contents(directive)
1254			if directiveName == "text" || directiveName == "section" || directiveName == "data" {
1255				return lastStatement, nil
1256			}
1257			d.writeNode(statement)
1258
1259		case ruleLabel:
1260			label := node.up
1261			d.writeNode(statement)
1262
1263			if label.pegRule != ruleLocalSymbol {
1264				symbol := d.contents(label)
1265				localSymbol := localTargetName(symbol)
1266				d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol))
1267
1268				d.bssAccessorsNeeded[symbol] = localSymbol
1269			}
1270
1271		case ruleLabelContainingDirective:
1272			var err error
1273			statement, err = d.processLabelContainingDirective(statement, node.up)
1274			if err != nil {
1275				return nil, err
1276			}
1277
1278		default:
1279			return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement))
1280		}
1281	}
1282
1283	return lastStatement, nil
1284}
1285
1286func writeAarch64Function(w stringWriter, funcName string, writeContents func(stringWriter)) {
1287	w.WriteString(".p2align 2\n")
1288	w.WriteString(".hidden " + funcName + "\n")
1289	w.WriteString(".type " + funcName + ", @function\n")
1290	w.WriteString(funcName + ":\n")
1291	w.WriteString(".cfi_startproc\n")
1292	// We insert a landing pad (`bti c` instruction) unconditionally at the beginning of
1293	// every generated function so that they can be called indirectly (with `blr` or
1294	// `br x16/x17`). The instruction is encoded in the HINT space as `hint #34` and is
1295	// a no-op on machines or program states not supporting BTI (Branch Target Identification).
1296	// None of the generated function bodies call other functions (with bl or blr), so we only
1297	// insert a landing pad instead of signing and validating $lr with `paciasp` and `autiasp`.
1298	// Normally we would also generate a .note.gnu.property section to annotate the assembly
1299	// file as BTI-compatible, but if the input assembly files are BTI-compatible, they should
1300	// already have those sections so there is no need to add an extra one ourselves.
1301	w.WriteString("\thint #34 // bti c\n")
1302	writeContents(w)
1303	w.WriteString(".cfi_endproc\n")
1304	w.WriteString(".size " + funcName + ", .-" + funcName + "\n")
1305}
1306
1307func transform(w stringWriter, inputs []inputFile) error {
1308	// symbols contains all defined symbols.
1309	symbols := make(map[string]struct{})
1310	// fileNumbers is the set of IDs seen in .file directives.
1311	fileNumbers := make(map[int]struct{})
1312	// maxObservedFileNumber contains the largest seen file number in a
1313	// .file directive. Zero is not a valid number.
1314	maxObservedFileNumber := 0
1315	// fileDirectivesContainMD5 is true if the compiler is outputting MD5
1316	// checksums in .file directives. If it does so, then this script needs
1317	// to match that behaviour otherwise warnings result.
1318	fileDirectivesContainMD5 := false
1319
1320	// OPENSSL_ia32cap_get will be synthesized by this script.
1321	symbols["OPENSSL_ia32cap_get"] = struct{}{}
1322
1323	for _, input := range inputs {
1324		forEachPath(input.ast.up, func(node *node32) {
1325			symbol := input.contents[node.begin:node.end]
1326			if _, ok := symbols[symbol]; ok {
1327				panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path))
1328			}
1329			symbols[symbol] = struct{}{}
1330		}, ruleStatement, ruleLabel, ruleSymbolName)
1331
1332		forEachPath(input.ast.up, func(node *node32) {
1333			assertNodeType(node, ruleLocationDirective)
1334			directive := input.contents[node.begin:node.end]
1335			if !strings.HasPrefix(directive, ".file") {
1336				return
1337			}
1338			parts := strings.Fields(directive)
1339			if len(parts) == 2 {
1340				// This is a .file directive with just a
1341				// filename. Clang appears to generate just one
1342				// of these at the beginning of the output for
1343				// the compilation unit. Ignore it.
1344				return
1345			}
1346			fileNo, err := strconv.Atoi(parts[1])
1347			if err != nil {
1348				panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive))
1349			}
1350
1351			if _, ok := fileNumbers[fileNo]; ok {
1352				panic(fmt.Sprintf("Duplicate file number %d observed", fileNo))
1353			}
1354			fileNumbers[fileNo] = struct{}{}
1355
1356			if fileNo > maxObservedFileNumber {
1357				maxObservedFileNumber = fileNo
1358			}
1359
1360			for _, token := range parts[2:] {
1361				if token == "md5" {
1362					fileDirectivesContainMD5 = true
1363				}
1364			}
1365		}, ruleStatement, ruleLocationDirective)
1366	}
1367
1368	processor := x86_64
1369	if len(inputs) > 0 {
1370		processor = detectProcessor(inputs[0])
1371	}
1372
1373	commentIndicator := "#"
1374	if processor == aarch64 {
1375		commentIndicator = "//"
1376	}
1377
1378	d := &delocation{
1379		symbols:             symbols,
1380		processor:           processor,
1381		commentIndicator:    commentIndicator,
1382		output:              w,
1383		redirectors:         make(map[string]string),
1384		bssAccessorsNeeded:  make(map[string]string),
1385		gotExternalsNeeded:  make(map[string]struct{}),
1386		gotOffsetsNeeded:    make(map[string]struct{}),
1387		gotOffOffsetsNeeded: make(map[string]struct{}),
1388	}
1389
1390	w.WriteString(".text\n")
1391	var fileTrailing string
1392	if fileDirectivesContainMD5 {
1393		fileTrailing = " md5 0x00000000000000000000000000000000"
1394	}
1395	w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing))
1396	w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1))
1397	w.WriteString("BORINGSSL_bcm_text_start:\n")
1398
1399	for _, input := range inputs {
1400		if err := d.processInput(input); err != nil {
1401			return err
1402		}
1403	}
1404
1405	w.WriteString(".text\n")
1406	w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1))
1407	w.WriteString("BORINGSSL_bcm_text_end:\n")
1408
1409	// Emit redirector functions. Each is a single jump instruction.
1410	var redirectorNames []string
1411	for name := range d.redirectors {
1412		redirectorNames = append(redirectorNames, name)
1413	}
1414	sort.Strings(redirectorNames)
1415
1416	for _, name := range redirectorNames {
1417		redirector := d.redirectors[name]
1418		switch d.processor {
1419		case aarch64:
1420			writeAarch64Function(w, redirector, func(w stringWriter) {
1421				w.WriteString("\tb " + name + "\n")
1422			})
1423
1424		case x86_64:
1425			w.WriteString(".type " + redirector + ", @function\n")
1426			w.WriteString(redirector + ":\n")
1427			w.WriteString("\tjmp\t" + name + "\n")
1428		}
1429	}
1430
1431	var accessorNames []string
1432	for accessor := range d.bssAccessorsNeeded {
1433		accessorNames = append(accessorNames, accessor)
1434	}
1435	sort.Strings(accessorNames)
1436
1437	// Emit BSS accessor functions. Each is a single LEA followed by RET.
1438	for _, name := range accessorNames {
1439		funcName := accessorName(name)
1440		target := d.bssAccessorsNeeded[name]
1441
1442		switch d.processor {
1443		case x86_64:
1444			w.WriteString(".type " + funcName + ", @function\n")
1445			w.WriteString(funcName + ":\n")
1446			w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n")
1447
1448		case aarch64:
1449			writeAarch64Function(w, funcName, func(w stringWriter) {
1450				w.WriteString("\tadrp x0, " + target + "\n")
1451				w.WriteString("\tadd x0, x0, :lo12:" + target + "\n")
1452				w.WriteString("\tret\n")
1453			})
1454		}
1455	}
1456
1457	switch d.processor {
1458	case aarch64:
1459		externalNames := sortedSet(d.gotExternalsNeeded)
1460		for _, symbol := range externalNames {
1461			writeAarch64Function(w, gotHelperName(symbol), func(w stringWriter) {
1462				w.WriteString("\tadrp x0, :got:" + symbol + "\n")
1463				w.WriteString("\tldr x0, [x0, :got_lo12:" + symbol + "]\n")
1464				w.WriteString("\tret\n")
1465			})
1466		}
1467
1468		writeAarch64Function(w, ".LOPENSSL_armcap_P_addr", func(w stringWriter) {
1469			w.WriteString("\tadrp x0, OPENSSL_armcap_P\n")
1470			w.WriteString("\tadd x0, x0, :lo12:OPENSSL_armcap_P\n")
1471			w.WriteString("\tret\n")
1472		})
1473
1474	case x86_64:
1475		externalNames := sortedSet(d.gotExternalsNeeded)
1476		for _, name := range externalNames {
1477			parts := strings.SplitN(name, "@", 2)
1478			symbol, section := parts[0], parts[1]
1479			w.WriteString(".type " + symbol + "_" + section + "_external, @object\n")
1480			w.WriteString(".size " + symbol + "_" + section + "_external, 8\n")
1481			w.WriteString(symbol + "_" + section + "_external:\n")
1482			// Ideally this would be .quad foo@GOTPCREL, but clang's
1483			// assembler cannot emit a 64-bit GOTPCREL relocation. Instead,
1484			// we manually sign-extend the value, knowing that the GOT is
1485			// always at the end, thus foo@GOTPCREL has a positive value.
1486			w.WriteString("\t.long " + symbol + "@" + section + "\n")
1487			w.WriteString("\t.long 0\n")
1488		}
1489
1490		w.WriteString(".type OPENSSL_ia32cap_get, @function\n")
1491		w.WriteString(".globl OPENSSL_ia32cap_get\n")
1492		w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n")
1493		w.WriteString("OPENSSL_ia32cap_get:\n")
1494		w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n")
1495		w.WriteString("\tret\n")
1496
1497		w.WriteString(".extern OPENSSL_ia32cap_P\n")
1498		w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n")
1499		w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n")
1500		w.WriteString("OPENSSL_ia32cap_addr_delta:\n")
1501		w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n")
1502
1503		if d.gotDeltaNeeded {
1504			w.WriteString(".Lboringssl_got_delta:\n")
1505			w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n")
1506		}
1507
1508		for _, name := range sortedSet(d.gotOffsetsNeeded) {
1509			w.WriteString(".Lboringssl_got_" + name + ":\n")
1510			w.WriteString("\t.quad " + name + "@GOT\n")
1511		}
1512		for _, name := range sortedSet(d.gotOffOffsetsNeeded) {
1513			w.WriteString(".Lboringssl_gotoff_" + name + ":\n")
1514			w.WriteString("\t.quad " + name + "@GOTOFF\n")
1515		}
1516	}
1517
1518	w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n")
1519	w.WriteString(".size BORINGSSL_bcm_text_hash, 32\n")
1520	w.WriteString("BORINGSSL_bcm_text_hash:\n")
1521	for _, b := range fipscommon.UninitHashValue {
1522		w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n")
1523	}
1524
1525	return nil
1526}
1527
1528// preprocess runs source through the C preprocessor.
1529func preprocess(cppCommand []string, path string) ([]byte, error) {
1530	var args []string
1531	args = append(args, cppCommand...)
1532	args = append(args, path)
1533
1534	cpp := exec.Command(args[0], args[1:]...)
1535	cpp.Stderr = os.Stderr
1536	var result bytes.Buffer
1537	cpp.Stdout = &result
1538
1539	if err := cpp.Run(); err != nil {
1540		return nil, err
1541	}
1542
1543	return result.Bytes(), nil
1544}
1545
1546func parseInputs(inputs []inputFile, cppCommand []string) error {
1547	for i, input := range inputs {
1548		var contents string
1549
1550		if input.isArchive {
1551			arFile, err := os.Open(input.path)
1552			if err != nil {
1553				return err
1554			}
1555			defer arFile.Close()
1556
1557			ar, err := ar.ParseAR(arFile)
1558			if err != nil {
1559				return err
1560			}
1561
1562			if len(ar) != 1 {
1563				return fmt.Errorf("expected one file in archive, but found %d", len(ar))
1564			}
1565
1566			for _, c := range ar {
1567				contents = string(c)
1568			}
1569		} else {
1570			var inBytes []byte
1571			var err error
1572
1573			if len(cppCommand) > 0 {
1574				inBytes, err = preprocess(cppCommand, input.path)
1575			} else {
1576				inBytes, err = os.ReadFile(input.path)
1577			}
1578			if err != nil {
1579				return err
1580			}
1581
1582			contents = string(inBytes)
1583		}
1584
1585		asm := Asm{Buffer: contents, Pretty: true}
1586		asm.Init()
1587		if err := asm.Parse(); err != nil {
1588			return fmt.Errorf("error while parsing %q: %s", input.path, err)
1589		}
1590		ast := asm.AST()
1591
1592		inputs[i].contents = contents
1593		inputs[i].ast = ast
1594	}
1595
1596	return nil
1597}
1598
1599// includePathFromHeaderFilePath returns an include directory path based on the
1600// path of a specific header file. It walks up the path and assumes that the
1601// include files are rooted in a directory called "openssl".
1602func includePathFromHeaderFilePath(path string) (string, error) {
1603	dir := path
1604	for {
1605		var file string
1606		dir, file = filepath.Split(dir)
1607
1608		if file == "openssl" {
1609			return dir, nil
1610		}
1611
1612		if len(dir) == 0 {
1613			break
1614		}
1615		dir = dir[:len(dir)-1]
1616	}
1617
1618	return "", fmt.Errorf("failed to find 'openssl' path element in header file path %q", path)
1619}
1620
1621func main() {
1622	// The .a file, if given, is expected to be an archive of textual
1623	// assembly sources. That's odd, but CMake really wants to create
1624	// archive files so it's the only way that we can make it work.
1625	arInput := flag.String("a", "", "Path to a .a file containing assembly sources")
1626	outFile := flag.String("o", "", "Path to output assembly")
1627	ccPath := flag.String("cc", "", "Path to the C compiler for preprocessing inputs")
1628	ccFlags := flag.String("cc-flags", "", "Flags for the C compiler when preprocessing")
1629
1630	flag.Parse()
1631
1632	if len(*outFile) == 0 {
1633		fmt.Fprintf(os.Stderr, "Must give argument to -o.\n")
1634		os.Exit(1)
1635	}
1636
1637	var inputs []inputFile
1638	if len(*arInput) > 0 {
1639		inputs = append(inputs, inputFile{
1640			path:      *arInput,
1641			index:     0,
1642			isArchive: true,
1643		})
1644	}
1645
1646	includePaths := make(map[string]struct{})
1647
1648	for i, path := range flag.Args() {
1649		if len(path) == 0 {
1650			continue
1651		}
1652
1653		// Header files are not processed but their path is remembered
1654		// and passed as -I arguments when invoking the preprocessor.
1655		if strings.HasSuffix(path, ".h") {
1656			dir, err := includePathFromHeaderFilePath(path)
1657			if err != nil {
1658				fmt.Fprintf(os.Stderr, "%s\n", err)
1659				os.Exit(1)
1660			}
1661			includePaths[dir] = struct{}{}
1662			continue
1663		}
1664
1665		inputs = append(inputs, inputFile{
1666			path:  path,
1667			index: i + 1,
1668		})
1669	}
1670
1671	var cppCommand []string
1672	if len(*ccPath) > 0 {
1673		cppCommand = append(cppCommand, *ccPath)
1674		cppCommand = append(cppCommand, strings.Fields(*ccFlags)...)
1675		// Some of ccFlags might be superfluous when running the
1676		// preprocessor, but we don't want the compiler complaining that
1677		// "argument unused during compilation".
1678		cppCommand = append(cppCommand, "-Wno-unused-command-line-argument")
1679
1680		for includePath := range includePaths {
1681			cppCommand = append(cppCommand, "-I"+includePath)
1682		}
1683
1684		// -E requests only preprocessing.
1685		cppCommand = append(cppCommand, "-E")
1686	}
1687
1688	if err := parseInputs(inputs, cppCommand); err != nil {
1689		fmt.Fprintf(os.Stderr, "%s\n", err)
1690		os.Exit(1)
1691	}
1692
1693	out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
1694	if err != nil {
1695		panic(err)
1696	}
1697	defer out.Close()
1698
1699	if err := transform(out, inputs); err != nil {
1700		fmt.Fprintf(os.Stderr, "%s\n", err)
1701		os.Exit(1)
1702	}
1703}
1704
1705func forEachPath(node *node32, cb func(*node32), rules ...pegRule) {
1706	if node == nil {
1707		return
1708	}
1709
1710	if len(rules) == 0 {
1711		cb(node)
1712		return
1713	}
1714
1715	rule := rules[0]
1716	childRules := rules[1:]
1717
1718	for ; node != nil; node = node.next {
1719		if node.pegRule != rule {
1720			continue
1721		}
1722
1723		if len(childRules) == 0 {
1724			cb(node)
1725		} else {
1726			forEachPath(node.up, cb, childRules...)
1727		}
1728	}
1729}
1730
1731func skipNodes(node *node32, ruleToSkip pegRule) *node32 {
1732	for ; node != nil && node.pegRule == ruleToSkip; node = node.next {
1733	}
1734	return node
1735}
1736
1737func skipWS(node *node32) *node32 {
1738	return skipNodes(node, ruleWS)
1739}
1740
1741func assertNodeType(node *node32, expected pegRule) {
1742	if rule := node.pegRule; rule != expected {
1743		panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected]))
1744	}
1745}
1746
1747type wrapperFunc func(func())
1748
1749type wrapperStack []wrapperFunc
1750
1751func (w *wrapperStack) do(baseCase func()) {
1752	if len(*w) == 0 {
1753		baseCase()
1754		return
1755	}
1756
1757	wrapper := (*w)[0]
1758	*w = (*w)[1:]
1759	wrapper(func() { w.do(baseCase) })
1760}
1761
1762// localTargetName returns the name of the local target label for a global
1763// symbol named name.
1764func localTargetName(name string) string {
1765	return ".L" + name + "_local_target"
1766}
1767
1768func isSynthesized(symbol string) bool {
1769	return strings.HasSuffix(symbol, "_bss_get") ||
1770		symbol == "OPENSSL_ia32cap_get" ||
1771		strings.HasPrefix(symbol, "BORINGSSL_bcm_text_")
1772}
1773
1774func redirectorName(symbol string) string {
1775	return "bcm_redirector_" + symbol
1776}
1777
1778// sectionType returns the type of a section. I.e. a section called “.text.foo”
1779// is a “.text” section.
1780func sectionType(section string) (string, bool) {
1781	if len(section) == 0 || section[0] != '.' {
1782		return "", false
1783	}
1784
1785	i := strings.Index(section[1:], ".")
1786	if i != -1 {
1787		section = section[:i+1]
1788	}
1789
1790	if strings.HasPrefix(section, ".debug_") {
1791		return ".debug", true
1792	}
1793
1794	return section, true
1795}
1796
1797// accessorName returns the name of the accessor function for a BSS symbol
1798// named name.
1799func accessorName(name string) string {
1800	return name + "_bss_get"
1801}
1802
1803func (d *delocation) mapLocalSymbol(symbol string) string {
1804	if d.currentInput.index == 0 {
1805		return symbol
1806	}
1807	return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index)
1808}
1809
1810func detectProcessor(input inputFile) processorType {
1811	for statement := input.ast.up; statement != nil; statement = statement.next {
1812		node := skipNodes(statement.up, ruleWS)
1813		if node == nil || node.pegRule != ruleInstruction {
1814			continue
1815		}
1816
1817		instruction := node.up
1818		instructionName := input.contents[instruction.begin:instruction.end]
1819
1820		switch instructionName {
1821		case "movq", "call", "leaq":
1822			return x86_64
1823		case "str", "bl", "ldr", "st1":
1824			return aarch64
1825		}
1826	}
1827
1828	panic("processed entire input and didn't recognise any instructions.")
1829}
1830
1831func sortedSet(m map[string]struct{}) []string {
1832	ret := make([]string, 0, len(m))
1833	for key := range m {
1834		ret = append(ret, key)
1835	}
1836	sort.Strings(ret)
1837	return ret
1838}
1839