1// Copyright 2017 The BoringSSL Authors 2// 3// Permission to use, copy, modify, and/or distribute this software for any 4// purpose with or without fee is hereby granted, provided that the above 5// copyright notice and this permission notice appear in all copies. 6// 7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15// delocate performs several transformations of textual assembly code. See 16// crypto/fipsmodule/FIPS.md for an overview. 17package main 18 19import ( 20 "bytes" 21 "errors" 22 "flag" 23 "fmt" 24 "os" 25 "os/exec" 26 "path/filepath" 27 "sort" 28 "strconv" 29 "strings" 30 31 "boringssl.googlesource.com/boringssl/util/ar" 32 "boringssl.googlesource.com/boringssl/util/fipstools/fipscommon" 33) 34 35// inputFile represents a textual assembly file. 36type inputFile struct { 37 path string 38 // index is a unique identifier given to this file. It's used for 39 // mapping local symbols. 40 index int 41 // isArchive indicates that the input should be processed as an ar 42 // file. 43 isArchive bool 44 // contents contains the contents of the file. 45 contents string 46 // ast points to the head of the syntax tree. 47 ast *node32 48} 49 50type stringWriter interface { 51 WriteString(string) (int, error) 52} 53 54type processorType int 55 56const ( 57 x86_64 processorType = iota + 1 58 aarch64 59) 60 61// delocation holds the state needed during a delocation operation. 62type delocation struct { 63 processor processorType 64 output stringWriter 65 // commentIndicator starts a comment, e.g. "//" or "#" 66 commentIndicator string 67 68 // symbols is the set of symbols defined in the module. 69 symbols map[string]struct{} 70 // redirectors maps from out-call symbol name to the name of a 71 // redirector function for that symbol. E.g. “memcpy” -> 72 // “bcm_redirector_memcpy”. 73 redirectors map[string]string 74 // bssAccessorsNeeded maps from a BSS symbol name to the symbol that 75 // should be used to reference it. E.g. “P384_data_storage” -> 76 // “P384_data_storage”. 77 bssAccessorsNeeded map[string]string 78 // gotExternalsNeeded is a set of symbol names for which we need 79 // “delta” symbols: symbols that contain the offset from their location 80 // to the memory in question. 81 gotExternalsNeeded map[string]struct{} 82 // gotDeltaNeeded is true if the code needs to load the value of 83 // _GLOBAL_OFFSET_TABLE_. 84 gotDeltaNeeded bool 85 // gotOffsetsNeeded contains the symbols whose @GOT offsets are needed. 86 gotOffsetsNeeded map[string]struct{} 87 // gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed. 88 gotOffOffsetsNeeded map[string]struct{} 89 90 currentInput inputFile 91} 92 93func (d *delocation) contents(node *node32) string { 94 return d.currentInput.contents[node.begin:node.end] 95} 96 97// writeNode writes out an AST node. 98func (d *delocation) writeNode(node *node32) { 99 if _, err := d.output.WriteString(d.contents(node)); err != nil { 100 panic(err) 101 } 102} 103 104func (d *delocation) writeCommentedNode(node *node32) { 105 line := d.contents(node) 106 if _, err := d.output.WriteString(d.commentIndicator + " WAS " + strings.TrimSpace(line) + "\n"); err != nil { 107 panic(err) 108 } 109} 110 111func locateError(err error, with *node32, in inputFile) error { 112 posMap := translatePositions([]rune(in.contents), []int{int(with.begin)}) 113 var line int 114 for _, pos := range posMap { 115 line = pos.line 116 } 117 118 return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err) 119} 120 121func (d *delocation) processInput(input inputFile) (err error) { 122 d.currentInput = input 123 124 var origStatement *node32 125 defer func() { 126 if err := recover(); err != nil { 127 panic(locateError(fmt.Errorf("%s", err), origStatement, input)) 128 } 129 }() 130 131 for statement := input.ast.up; statement != nil; statement = statement.next { 132 assertNodeType(statement, ruleStatement) 133 origStatement = statement 134 135 node := skipWS(statement.up) 136 if node == nil { 137 d.writeNode(statement) 138 continue 139 } 140 141 switch node.pegRule { 142 case ruleGlobalDirective, ruleComment, ruleLocationDirective: 143 d.writeNode(statement) 144 case ruleDirective: 145 statement, err = d.processDirective(statement, node.up) 146 case ruleLabelContainingDirective: 147 statement, err = d.processLabelContainingDirective(statement, node.up) 148 case ruleLabel: 149 statement, err = d.processLabel(statement, node.up) 150 case ruleInstruction: 151 switch d.processor { 152 case x86_64: 153 statement, err = d.processIntelInstruction(statement, node.up) 154 case aarch64: 155 statement, err = d.processAarch64Instruction(statement, node.up) 156 default: 157 panic("unknown processor") 158 } 159 default: 160 panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule])) 161 } 162 163 if err != nil { 164 return locateError(err, origStatement, input) 165 } 166 } 167 168 return nil 169} 170 171func (d *delocation) processDirective(statement, directive *node32) (*node32, error) { 172 assertNodeType(directive, ruleDirectiveName) 173 directiveName := d.contents(directive) 174 175 var args []string 176 forEachPath(directive, func(arg *node32) { 177 // If the argument is a quoted string, use the raw contents. 178 // (Note that this doesn't unescape the string, but that's not 179 // needed so far. 180 if arg.up != nil { 181 arg = arg.up 182 assertNodeType(arg, ruleQuotedArg) 183 if arg.up == nil { 184 args = append(args, "") 185 return 186 } 187 arg = arg.up 188 assertNodeType(arg, ruleQuotedText) 189 } 190 args = append(args, d.contents(arg)) 191 }, ruleArgs, ruleArg) 192 193 switch directiveName { 194 case "comm", "lcomm": 195 if len(args) < 1 { 196 return nil, errors.New("comm directive has no arguments") 197 } 198 d.bssAccessorsNeeded[args[0]] = args[0] 199 d.writeNode(statement) 200 201 case "data": 202 // ASAN and some versions of MSAN are adding a .data section, 203 // and adding references to symbols within it to the code. We 204 // will have to work around this in the future. 205 return nil, errors.New(".data section found in module") 206 207 case "bss": 208 d.writeNode(statement) 209 return d.handleBSS(statement) 210 211 case "section": 212 section := args[0] 213 214 if section == ".data.rel.ro" { 215 // In a normal build, this is an indication of a 216 // problem but any references from the module to this 217 // section will result in a relocation and thus will 218 // break the integrity check. ASAN can generate these 219 // sections and so we will likely have to work around 220 // that in the future. 221 return nil, errors.New(".data.rel.ro section found in module") 222 } 223 224 sectionType, ok := sectionType(section) 225 if !ok { 226 // Unknown sections are permitted in order to be robust 227 // to different compiler modes. 228 d.writeNode(statement) 229 break 230 } 231 232 switch sectionType { 233 case ".rodata", ".text": 234 // Move .rodata to .text so it may be accessed without 235 // a relocation. GCC with -fmerge-constants will place 236 // strings into separate sections, so we move all 237 // sections named like .rodata. Also move .text.startup 238 // so the self-test function is also in the module. 239 d.writeCommentedNode(statement) 240 d.output.WriteString(".text\n") 241 242 case ".data": 243 // See above about .data 244 return nil, errors.New(".data section found in module") 245 246 case ".init_array", ".fini_array", ".ctors", ".dtors": 247 // init_array/ctors/dtors contains function 248 // pointers to constructor/destructor 249 // functions. These contain relocations, but 250 // they're in a different section anyway. 251 d.writeNode(statement) 252 break 253 254 case ".debug", ".note": 255 d.writeNode(statement) 256 break 257 258 case ".bss": 259 d.writeNode(statement) 260 return d.handleBSS(statement) 261 } 262 263 default: 264 d.writeNode(statement) 265 } 266 267 return statement, nil 268} 269 270func (d *delocation) processSymbolExpr(expr *node32, b *strings.Builder) bool { 271 changed := false 272 assertNodeType(expr, ruleSymbolExpr) 273 274 for expr != nil { 275 atom := expr.up 276 assertNodeType(atom, ruleSymbolAtom) 277 278 for term := atom.up; term != nil; term = skipWS(term.next) { 279 if term.pegRule == ruleSymbolExpr { 280 changed = d.processSymbolExpr(term, b) || changed 281 continue 282 } 283 284 if term.pegRule != ruleLocalSymbol { 285 b.WriteString(d.contents(term)) 286 continue 287 } 288 289 oldSymbol := d.contents(term) 290 newSymbol := d.mapLocalSymbol(oldSymbol) 291 if newSymbol != oldSymbol { 292 changed = true 293 } 294 295 b.WriteString(newSymbol) 296 } 297 298 next := skipWS(atom.next) 299 if next == nil { 300 break 301 } 302 assertNodeType(next, ruleSymbolOperator) 303 b.WriteString(d.contents(next)) 304 next = skipWS(next.next) 305 assertNodeType(next, ruleSymbolExpr) 306 expr = next 307 } 308 return changed 309} 310 311func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) { 312 // The symbols within directives need to be mapped so that local 313 // symbols in two different .s inputs don't collide. 314 changed := false 315 assertNodeType(directive, ruleLabelContainingDirectiveName) 316 name := d.contents(directive) 317 318 node := directive.next 319 assertNodeType(node, ruleWS) 320 321 node = node.next 322 assertNodeType(node, ruleSymbolArgs) 323 324 var args []string 325 for node = skipWS(node.up); node != nil; node = skipWS(node.next) { 326 assertNodeType(node, ruleSymbolArg) 327 arg := node.up 328 assertNodeType(arg, ruleSymbolExpr) 329 330 var b strings.Builder 331 changed = d.processSymbolExpr(arg, &b) || changed 332 333 args = append(args, b.String()) 334 } 335 336 if !changed { 337 d.writeNode(statement) 338 } else { 339 d.writeCommentedNode(statement) 340 d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n") 341 } 342 343 return statement, nil 344} 345 346func (d *delocation) processLabel(statement, label *node32) (*node32, error) { 347 symbol := d.contents(label) 348 349 switch label.pegRule { 350 case ruleLocalLabel: 351 d.output.WriteString(symbol + ":\n") 352 case ruleLocalSymbol: 353 // symbols need to be mapped so that local symbols from two 354 // different .s inputs don't collide. 355 d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n") 356 case ruleSymbolName: 357 d.output.WriteString(localTargetName(symbol) + ":\n") 358 d.writeNode(statement) 359 default: 360 return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule]) 361 } 362 363 return statement, nil 364} 365 366// instructionArgs collects all the arguments to an instruction. 367func instructionArgs(node *node32) (argNodes []*node32) { 368 for node = skipWS(node); node != nil; node = skipWS(node.next) { 369 assertNodeType(node, ruleInstructionArg) 370 argNodes = append(argNodes, node.up) 371 } 372 373 return argNodes 374} 375 376// Aarch64 support 377 378// gotHelperName returns the name of a synthesised function that returns an 379// address from the GOT. 380func gotHelperName(symbol string) string { 381 return ".Lboringssl_loadgot_" + symbol 382} 383 384// loadAarch64Address emits instructions to put the address of |symbol| 385// (optionally adjusted by |offsetStr|) into |targetReg|. 386func (d *delocation) loadAarch64Address(statement *node32, targetReg string, symbol string, offsetStr string) (*node32, error) { 387 // There are two paths here: either the symbol is known to be local in which 388 // case adr is used to get the address (within 1MiB), or a GOT reference is 389 // really needed in which case the code needs to jump to a helper function. 390 // 391 // A helper function is needed because using code appears to be the only way 392 // to load a GOT value. On other platforms we have ".quad foo@GOT" outside of 393 // the module, but on Aarch64 that results in a "COPY" relocation and linker 394 // comments suggest it's a weird hack. So, for each GOT symbol needed, we emit 395 // a function outside of the module that returns the address from the GOT in 396 // x0. 397 398 d.writeCommentedNode(statement) 399 400 _, isKnown := d.symbols[symbol] 401 isLocal := strings.HasPrefix(symbol, ".L") 402 if isKnown || isLocal || isSynthesized(symbol) { 403 if isLocal { 404 symbol = d.mapLocalSymbol(symbol) 405 } else if isKnown { 406 symbol = localTargetName(symbol) 407 } 408 409 d.output.WriteString("\tadr " + targetReg + ", " + symbol + offsetStr + "\n") 410 411 return statement, nil 412 } 413 414 if len(offsetStr) != 0 { 415 panic("non-zero offset for helper-based reference") 416 } 417 418 var helperFunc string 419 if symbol == "OPENSSL_armcap_P" { 420 helperFunc = ".LOPENSSL_armcap_P_addr" 421 } else { 422 // GOT helpers also dereference the GOT entry, thus the subsequent ldr 423 // instruction, which would normally do the dereferencing, needs to be 424 // dropped. GOT helpers have to include the dereference because the 425 // assembler doesn't support ":got_lo12:foo" offsets except in an ldr 426 // instruction. 427 d.gotExternalsNeeded[symbol] = struct{}{} 428 helperFunc = gotHelperName(symbol) 429 } 430 431 // Clear the red-zone. I can't find a definitive answer about whether Linux 432 // Aarch64 includes a red-zone, but Microsoft has a 16-byte one and Apple a 433 // 128-byte one. Thus conservatively clear a 128-byte red-zone. 434 d.output.WriteString("\tsub sp, sp, 128\n") 435 436 // Save x0 (which will be stomped by the return value) and the link register 437 // to the stack. Then save the program counter into the link register and 438 // jump to the helper function. 439 d.output.WriteString("\tstp x0, lr, [sp, #-16]!\n") 440 d.output.WriteString("\tbl " + helperFunc + "\n") 441 442 if targetReg == "x0" { 443 // If the target happens to be x0 then restore the link register from the 444 // stack and send the saved value of x0 to the zero register. 445 d.output.WriteString("\tldp xzr, lr, [sp], #16\n") 446 } else { 447 // Otherwise move the result into place and restore registers. 448 d.output.WriteString("\tmov " + targetReg + ", x0\n") 449 d.output.WriteString("\tldp x0, lr, [sp], #16\n") 450 } 451 452 // Revert the red-zone adjustment. 453 d.output.WriteString("\tadd sp, sp, 128\n") 454 455 return statement, nil 456} 457 458func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) { 459 assertNodeType(instruction, ruleInstructionName) 460 instructionName := d.contents(instruction) 461 462 argNodes := instructionArgs(instruction.next) 463 464 switch instructionName { 465 case "ccmn", "ccmp", "cinc", "cinv", "cneg", "csel", "cset", "csetm", "csinc", "csinv", "csneg": 466 // These functions are special because they take a condition-code name as 467 // an argument and that looks like a symbol reference. 468 d.writeNode(statement) 469 return statement, nil 470 471 case "mrs": 472 // Functions that take special register names also look like a symbol 473 // reference to the parser. 474 d.writeNode(statement) 475 return statement, nil 476 477 case "adrp": 478 // adrp always generates a relocation, even when the target symbol is in the 479 // same segment, because the page-offset of the code isn't known until link 480 // time. Thus adrp instructions are turned into either adr instructions 481 // (limiting the module to 1MiB offsets) or calls to helper functions, both of 482 // which load the full address. Later instructions, which add the low 12 bits 483 // of offset, are tweaked to remove the offset since it's already included. 484 // Loads of GOT symbols are slightly more complex because it's not possible to 485 // avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr 486 // instruction, which would normally do the dereferencing, is dropped 487 // completely. (Or turned into a mov if it targets a different register.) 488 assertNodeType(argNodes[0], ruleRegisterOrConstant) 489 targetReg := d.contents(argNodes[0]) 490 if !strings.HasPrefix(targetReg, "x") { 491 panic("adrp targetting register " + targetReg + ", which has the wrong size") 492 } 493 494 var symbol, offset string 495 switch argNodes[1].pegRule { 496 case ruleGOTSymbolOffset: 497 symbol = d.contents(argNodes[1].up) 498 case ruleMemoryRef: 499 assertNodeType(argNodes[1].up, ruleSymbolRef) 500 node, empty := d.gatherOffsets(argNodes[1].up.up, "") 501 if len(empty) != 0 { 502 panic("prefix offsets found for adrp") 503 } 504 symbol = d.contents(node) 505 _, offset = d.gatherOffsets(node.next, "") 506 default: 507 panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule]) 508 } 509 510 return d.loadAarch64Address(statement, targetReg, symbol, offset) 511 } 512 513 var args []string 514 changed := false 515 516 for _, arg := range argNodes { 517 fullArg := arg 518 519 switch arg.pegRule { 520 case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak: 521 args = append(args, d.contents(fullArg)) 522 523 case ruleGOTSymbolOffset: 524 // These should only be arguments to adrp and thus unreachable. 525 panic("unreachable") 526 527 case ruleMemoryRef: 528 ref := arg.up 529 530 switch ref.pegRule { 531 case ruleSymbolRef: 532 // This is a branch. Either the target needs to be written to a local 533 // version of the symbol to ensure that no relocations are emitted, or 534 // it needs to jump to a redirector function. 535 symbol, offset, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up) 536 changed = didChange 537 538 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 539 symbol = localTargetName(symbol) 540 changed = true 541 } else if !symbolIsLocal && !isSynthesized(symbol) { 542 redirector := redirectorName(symbol) 543 d.redirectors[symbol] = redirector 544 symbol = redirector 545 changed = true 546 } else if didChange && symbolIsLocal && len(offset) > 0 { 547 // didChange is set when the inputFile index is not 0; which is the index of the 548 // first file copied to the output, which is the generated assembly of bcm.c. 549 // In subsequently copied assembly files, local symbols are changed by appending (BCM_ + index) 550 // in order to ensure they don't collide. `index` gets incremented per file. 551 // If there is offset after the symbol, append the `offset`. 552 symbol = symbol + offset 553 } 554 555 args = append(args, symbol) 556 557 case ruleARMBaseIndexScale: 558 parts := ref.up 559 assertNodeType(parts, ruleARMRegister) 560 baseAddrReg := d.contents(parts) 561 parts = skipWS(parts.next) 562 563 // Only two forms need special handling. First there's memory references 564 // like "[x*, :got_lo12:foo]". The base register here will have been the 565 // target of an adrp instruction to load the page address, but the adrp 566 // will have turned into loading the full address *and dereferencing it*, 567 // above. Thus this instruction needs to be dropped otherwise we'll be 568 // dereferencing twice. 569 // 570 // Second there are forms like "[x*, :lo12:foo]" where the code has used 571 // adrp to load the page address into x*. That adrp will have been turned 572 // into loading the full address so just the offset needs to be dropped. 573 574 if parts != nil { 575 if parts.pegRule == ruleARMGOTLow12 { 576 if instructionName != "ldr" { 577 panic("Symbol reference outside of ldr instruction") 578 } 579 580 if skipWS(parts.next) != nil || parts.up.next != nil { 581 panic("can't handle tweak or post-increment with symbol references") 582 } 583 584 // The GOT helper already dereferenced the entry so, at most, just a mov 585 // is needed to put things in the right register. 586 d.writeCommentedNode(statement) 587 if baseAddrReg != args[0] { 588 d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n") 589 } 590 return statement, nil 591 } else if parts.pegRule == ruleLow12BitsSymbolRef { 592 if instructionName != "ldr" { 593 panic("Symbol reference outside of ldr instruction") 594 } 595 596 // Suppress the offset; adrp loaded the full address. This assumes the 597 // the compiler does not emit code like the following: 598 // 599 // adrp x0, symbol 600 // ldr x1, [x0, :lo12:symbol] 601 // ldr x2, [x0, :lo12:symbol+4] 602 // 603 // Such code would only work if lo12(symbol+4) = lo12(symbol) + 4, but 604 // this is true when symbol is sufficiently aligned. 605 args = append(args, "["+baseAddrReg+"]") 606 changed = true 607 continue 608 } 609 } 610 611 args = append(args, d.contents(fullArg)) 612 613 case ruleLow12BitsSymbolRef: 614 // These are the second instruction in a pair: 615 // adrp x0, symbol // Load the page address into x0 616 // add x1, x0, :lo12:symbol // Adds the page offset. 617 // 618 // The adrp instruction will have been turned into a sequence that loads 619 // the full address, above, thus the offset is turned into zero. If that 620 // results in the instruction being a nop, then it is deleted. 621 // 622 // This assumes the compiler does not emit code like the following: 623 // 624 // adrp x0, symbol 625 // add x1, x0, :lo12:symbol 626 // add x2, x0, :lo12:symbol+4 627 // 628 // Such code would only work if lo12(symbol+4) = lo12(symbol) + 4, but 629 // this is true when symbol is sufficiently aligned. 630 if instructionName != "add" { 631 panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName)) 632 } 633 634 if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") { 635 panic("address arithmetic with incorrectly sized register") 636 } 637 638 if args[0] == args[1] { 639 d.writeCommentedNode(statement) 640 return statement, nil 641 } 642 643 args = append(args, "#0") 644 changed = true 645 646 default: 647 panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule])) 648 } 649 650 default: 651 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 652 } 653 } 654 655 if changed { 656 d.writeCommentedNode(statement) 657 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 658 d.output.WriteString(replacement) 659 } else { 660 d.writeNode(statement) 661 } 662 663 return statement, nil 664} 665 666func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) { 667 for symRef != nil && symRef.pegRule == ruleOffset { 668 offset := d.contents(symRef) 669 if offset[0] != '+' && offset[0] != '-' { 670 offset = "+" + offset 671 } 672 offsets = offsets + offset 673 symRef = symRef.next 674 } 675 return symRef, offsets 676} 677 678func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) { 679 if memRef.pegRule != ruleSymbolRef { 680 return "", "", "", false, false, memRef 681 } 682 683 symRef := memRef.up 684 nextRef = memRef.next 685 686 // (Offset* '+')? 687 symRef, offset = d.gatherOffsets(symRef, offset) 688 689 // (LocalSymbol / SymbolName) 690 symbol = d.contents(symRef) 691 if symRef.pegRule == ruleLocalSymbol { 692 symbolIsLocal = true 693 mapped := d.mapLocalSymbol(symbol) 694 if mapped != symbol { 695 symbol = mapped 696 didChange = true 697 } 698 } 699 symRef = symRef.next 700 701 // Offset* 702 symRef, offset = d.gatherOffsets(symRef, offset) 703 704 // ('@' Section / Offset*)? 705 if symRef != nil { 706 assertNodeType(symRef, ruleSection) 707 section = d.contents(symRef) 708 symRef = symRef.next 709 710 symRef, offset = d.gatherOffsets(symRef, offset) 711 } 712 713 if symRef != nil { 714 panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule])) 715 } 716 717 return 718} 719 720/* Intel */ 721 722type instructionType int 723 724const ( 725 instrPush instructionType = iota 726 instrMove 727 // instrTransformingMove is essentially a move, but it performs some 728 // transformation of the data during the process. 729 instrTransformingMove 730 instrJump 731 instrConditionalMove 732 // instrCombine merges the source and destination in some fashion, for example 733 // a 2-operand bitwise operation. 734 instrCombine 735 // instrMemoryVectorCombine is similer to instrCombine, but the source 736 // register must be a memory reference and the destination register 737 // must be a vector register. 738 instrMemoryVectorCombine 739 // instrThreeArg merges two sources into a destination in some fashion. 740 instrThreeArg 741 // instrCompare takes two arguments and writes outputs to the flags register. 742 instrCompare 743 instrOther 744) 745 746func classifyInstruction(instr string, args []*node32) instructionType { 747 switch instr { 748 case "push", "pushq": 749 if len(args) == 1 { 750 return instrPush 751 } 752 753 case "mov", "movq", "vmovq", "movsd", "vmovsd": 754 if len(args) == 2 { 755 return instrMove 756 } 757 758 case "cmovneq", "cmoveq": 759 if len(args) == 2 { 760 return instrConditionalMove 761 } 762 763 case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo": 764 if len(args) == 1 { 765 return instrJump 766 } 767 768 case "orq", "andq", "xorq": 769 if len(args) == 2 { 770 return instrCombine 771 } 772 773 case "cmpq": 774 if len(args) == 2 { 775 return instrCompare 776 } 777 778 case "sarxq", "shlxq", "shrxq": 779 if len(args) == 3 { 780 return instrThreeArg 781 } 782 783 case "vpbroadcastq": 784 if len(args) == 2 { 785 return instrTransformingMove 786 } 787 788 case "movlps", "movhps": 789 if len(args) == 2 { 790 return instrMemoryVectorCombine 791 } 792 } 793 794 return instrOther 795} 796 797func push(w stringWriter) wrapperFunc { 798 return func(k func()) { 799 w.WriteString("\tpushq %rax\n") 800 k() 801 w.WriteString("\txchg %rax, (%rsp)\n") 802 } 803} 804 805func compare(w stringWriter, instr, a, b string) wrapperFunc { 806 return func(k func()) { 807 k() 808 w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b)) 809 } 810} 811 812func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc { 813 d.gotExternalsNeeded[symbol+"@"+section] = struct{}{} 814 815 return func(k func()) { 816 if !redzoneCleared { 817 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 818 } 819 w.WriteString("\tpushf\n") 820 w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination)) 821 w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination)) 822 w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination)) 823 w.WriteString("\tpopf\n") 824 if !redzoneCleared { 825 w.WriteString("\tleaq\t128(%rsp), %rsp\n") 826 } 827 } 828} 829 830func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc { 831 return func(k func()) { 832 if !redzoneCleared { 833 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 834 defer w.WriteString("\tleaq 128(%rsp), %rsp\n") 835 } 836 w.WriteString("\tpushfq\n") 837 k() 838 w.WriteString("\tpopfq\n") 839 } 840} 841 842func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) { 843 candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"} 844 845 var reg string 846NextCandidate: 847 for _, candidate := range candidates { 848 for _, avoid := range avoidRegs { 849 if candidate == avoid { 850 continue NextCandidate 851 } 852 } 853 854 reg = candidate 855 break 856 } 857 858 if len(reg) == 0 { 859 panic("too many excluded registers") 860 } 861 862 return func(k func()) { 863 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 864 w.WriteString("\tpushq " + reg + "\n") 865 k() 866 w.WriteString("\tpopq " + reg + "\n") 867 w.WriteString("\tleaq 128(%rsp), %rsp\n") 868 }, reg 869} 870 871func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc { 872 return func(k func()) { 873 k() 874 prefix := "" 875 if isAVX { 876 prefix = "v" 877 } 878 w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n") 879 } 880} 881 882func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc { 883 return func(k func()) { 884 k() 885 w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n") 886 } 887} 888 889func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 890 return func(k func()) { 891 k() 892 w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n") 893 } 894} 895 896func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc { 897 return func(k func()) { 898 k() 899 w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n") 900 } 901} 902 903func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 904 return func(k func()) { 905 k() 906 // These instructions can only read from memory, so push 907 // tempReg and read from the stack. Note we assume the red zone 908 // was previously cleared by saveRegister(). 909 w.WriteString("\tpushq " + source + "\n") 910 w.WriteString("\t" + instructionName + " (%rsp), " + dest + "\n") 911 w.WriteString("\tleaq 8(%rsp), %rsp\n") 912 } 913} 914 915func isValidLEATarget(reg string) bool { 916 return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm") 917} 918 919func undoConditionalMove(w stringWriter, instr string) wrapperFunc { 920 var invertedCondition string 921 922 switch instr { 923 case "cmoveq": 924 invertedCondition = "ne" 925 case "cmovneq": 926 invertedCondition = "e" 927 default: 928 panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr)) 929 } 930 931 return func(k func()) { 932 w.WriteString("\tj" + invertedCondition + " 999f\n") 933 k() 934 w.WriteString("999:\n") 935 } 936} 937 938func (d *delocation) isRIPRelative(node *node32) bool { 939 return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)" 940} 941 942func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) { 943 var prefix string 944 if instruction.pegRule == ruleInstructionPrefix { 945 prefix = d.contents(instruction) 946 instruction = skipWS(instruction.next) 947 } 948 949 assertNodeType(instruction, ruleInstructionName) 950 instructionName := d.contents(instruction) 951 952 argNodes := instructionArgs(instruction.next) 953 954 var wrappers wrapperStack 955 var args []string 956 changed := false 957 958Args: 959 for i, arg := range argNodes { 960 fullArg := arg 961 isIndirect := false 962 963 if arg.pegRule == ruleIndirectionIndicator { 964 arg = arg.next 965 isIndirect = true 966 } 967 968 switch arg.pegRule { 969 case ruleRegisterOrConstant, ruleLocalLabelRef: 970 args = append(args, d.contents(fullArg)) 971 972 case ruleMemoryRef: 973 symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up) 974 changed = didChange 975 976 if symbol == "OPENSSL_ia32cap_P" && section == "" { 977 if instructionName != "leaq" { 978 return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName) 979 } 980 981 if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 { 982 return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName) 983 } 984 985 target := argNodes[1] 986 assertNodeType(target, ruleRegisterOrConstant) 987 reg := d.contents(target) 988 989 if !strings.HasPrefix(reg, "%r") { 990 return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg) 991 } 992 993 changed = true 994 995 // Flag-altering instructions (i.e. addq) are going to be used so the 996 // flags need to be preserved. 997 wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */)) 998 999 wrappers = append(wrappers, func(k func()) { 1000 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n") 1001 d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n") 1002 }) 1003 1004 break Args 1005 } 1006 1007 switch section { 1008 case "": 1009 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1010 symbol = localTargetName(symbol) 1011 changed = true 1012 } 1013 1014 case "PLT": 1015 if classifyInstruction(instructionName, argNodes) != instrJump { 1016 return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName) 1017 } 1018 1019 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1020 symbol = localTargetName(symbol) 1021 changed = true 1022 } else if !symbolIsLocal && !isSynthesized(symbol) { 1023 // Unknown symbol via PLT is an 1024 // out-call from the module, e.g. 1025 // memcpy. 1026 d.redirectors[symbol+"@"+section] = redirectorName(symbol) 1027 symbol = redirectorName(symbol) 1028 } 1029 1030 changed = true 1031 1032 case "GOTPCREL": 1033 if len(offset) > 0 { 1034 return nil, errors.New("loading from GOT with offset is unsupported") 1035 } 1036 if !d.isRIPRelative(memRef) { 1037 return nil, errors.New("GOT access must be IP-relative") 1038 } 1039 1040 useGOT := false 1041 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1042 symbol = localTargetName(symbol) 1043 changed = true 1044 } else if !isSynthesized(symbol) { 1045 useGOT = true 1046 } 1047 1048 classification := classifyInstruction(instructionName, argNodes) 1049 if classification != instrThreeArg && classification != instrCompare && i != 0 { 1050 return nil, errors.New("GOT access must be source operand") 1051 } 1052 1053 // Reduce the instruction to movq symbol@GOTPCREL, targetReg. 1054 var targetReg string 1055 var redzoneCleared bool 1056 switch classification { 1057 case instrPush: 1058 wrappers = append(wrappers, push(d.output)) 1059 targetReg = "%rax" 1060 case instrConditionalMove: 1061 wrappers = append(wrappers, undoConditionalMove(d.output, instructionName)) 1062 fallthrough 1063 case instrMove: 1064 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1065 targetReg = d.contents(argNodes[1]) 1066 case instrCompare: 1067 otherSource := d.contents(argNodes[i^1]) 1068 saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource}) 1069 redzoneCleared = true 1070 wrappers = append(wrappers, saveRegWrapper) 1071 if i == 0 { 1072 wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource)) 1073 } else { 1074 wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg)) 1075 } 1076 targetReg = tempReg 1077 case instrTransformingMove: 1078 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1079 targetReg = d.contents(argNodes[1]) 1080 wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg)) 1081 if isValidLEATarget(targetReg) { 1082 return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.") 1083 } 1084 case instrCombine: 1085 targetReg = d.contents(argNodes[1]) 1086 if !isValidLEATarget(targetReg) { 1087 return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers") 1088 } 1089 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg}) 1090 redzoneCleared = true 1091 wrappers = append(wrappers, saveRegWrapper) 1092 1093 wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg)) 1094 targetReg = tempReg 1095 case instrMemoryVectorCombine: 1096 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1097 targetReg = d.contents(argNodes[1]) 1098 if isValidLEATarget(targetReg) { 1099 return nil, errors.New("target register must be an XMM register") 1100 } 1101 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1102 wrappers = append(wrappers, saveRegWrapper) 1103 redzoneCleared = true 1104 wrappers = append(wrappers, memoryVectorCombineOp(d.output, instructionName, tempReg, targetReg)) 1105 targetReg = tempReg 1106 case instrThreeArg: 1107 if n := len(argNodes); n != 3 { 1108 return nil, fmt.Errorf("three-argument instruction has %d arguments", n) 1109 } 1110 if i != 0 && i != 1 { 1111 return nil, errors.New("GOT access must be from source operand") 1112 } 1113 targetReg = d.contents(argNodes[2]) 1114 1115 otherSource := d.contents(argNodes[1]) 1116 if i == 1 { 1117 otherSource = d.contents(argNodes[0]) 1118 } 1119 1120 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource}) 1121 redzoneCleared = true 1122 wrappers = append(wrappers, saveRegWrapper) 1123 1124 if i == 0 { 1125 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg)) 1126 } else { 1127 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg)) 1128 } 1129 targetReg = tempReg 1130 default: 1131 return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName) 1132 } 1133 1134 if !isValidLEATarget(targetReg) { 1135 // Sometimes the compiler will load from the GOT to an 1136 // XMM register, which is not a valid target of an LEA 1137 // instruction. 1138 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1139 wrappers = append(wrappers, saveRegWrapper) 1140 isAVX := strings.HasPrefix(instructionName, "v") 1141 wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg)) 1142 targetReg = tempReg 1143 if redzoneCleared { 1144 return nil, fmt.Errorf("internal error: Red Zone was already cleared") 1145 } 1146 redzoneCleared = true 1147 } 1148 1149 if symbol == "OPENSSL_ia32cap_P" { 1150 // Flag-altering instructions (i.e. addq) are going to be used so the 1151 // flags need to be preserved. 1152 wrappers = append(wrappers, saveFlags(d.output, redzoneCleared)) 1153 wrappers = append(wrappers, func(k func()) { 1154 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n") 1155 d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n") 1156 }) 1157 } else if useGOT { 1158 wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared)) 1159 } else { 1160 wrappers = append(wrappers, func(k func()) { 1161 d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg)) 1162 }) 1163 } 1164 changed = true 1165 break Args 1166 1167 default: 1168 return nil, fmt.Errorf("Unknown section type %q", section) 1169 } 1170 1171 if !changed && len(section) > 0 { 1172 panic("section was not handled") 1173 } 1174 section = "" 1175 1176 argStr := "" 1177 if isIndirect { 1178 argStr += "*" 1179 } 1180 argStr += symbol 1181 argStr += offset 1182 1183 for ; memRef != nil; memRef = memRef.next { 1184 argStr += d.contents(memRef) 1185 } 1186 1187 args = append(args, argStr) 1188 1189 case ruleGOTAddress: 1190 if instructionName != "leaq" { 1191 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ used outside of lea") 1192 } 1193 if i != 0 || len(argNodes) != 2 { 1194 return nil, fmt.Errorf("Load of _GLOBAL_OFFSET_TABLE_ address didn't have expected form") 1195 } 1196 d.gotDeltaNeeded = true 1197 changed = true 1198 targetReg := d.contents(argNodes[1]) 1199 args = append(args, ".Lboringssl_got_delta(%rip)") 1200 wrappers = append(wrappers, func(k func()) { 1201 k() 1202 d.output.WriteString(fmt.Sprintf("\taddq .Lboringssl_got_delta(%%rip), %s\n", targetReg)) 1203 }) 1204 1205 case ruleGOTLocation: 1206 if instructionName != "movabsq" { 1207 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq") 1208 } 1209 if i != 0 || len(argNodes) != 2 { 1210 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form") 1211 } 1212 1213 d.gotDeltaNeeded = true 1214 changed = true 1215 instructionName = "movq" 1216 assertNodeType(arg.up, ruleLocalSymbol) 1217 baseSymbol := d.mapLocalSymbol(d.contents(arg.up)) 1218 targetReg := d.contents(argNodes[1]) 1219 args = append(args, ".Lboringssl_got_delta(%rip)") 1220 wrappers = append(wrappers, func(k func()) { 1221 k() 1222 d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg)) 1223 }) 1224 1225 case ruleGOTSymbolOffset: 1226 if instructionName != "movabsq" { 1227 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq") 1228 } 1229 if i != 0 || len(argNodes) != 2 { 1230 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form") 1231 } 1232 1233 assertNodeType(arg.up, ruleSymbolName) 1234 symbol := d.contents(arg.up) 1235 if strings.HasPrefix(symbol, ".L") { 1236 symbol = d.mapLocalSymbol(symbol) 1237 } 1238 targetReg := d.contents(argNodes[1]) 1239 1240 var prefix string 1241 isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF") 1242 if isGOTOFF { 1243 prefix = "gotoff" 1244 d.gotOffOffsetsNeeded[symbol] = struct{}{} 1245 } else { 1246 prefix = "got" 1247 d.gotOffsetsNeeded[symbol] = struct{}{} 1248 } 1249 changed = true 1250 1251 wrappers = append(wrappers, func(k func()) { 1252 // Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time 1253 // of writing) emits 64-bit relocations anyway, so the following four bytes 1254 // get stomped. Thus we use 64-bit offsets. 1255 d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg)) 1256 }) 1257 1258 default: 1259 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 1260 } 1261 } 1262 1263 if changed { 1264 d.writeCommentedNode(statement) 1265 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 1266 if len(prefix) != 0 { 1267 replacement = "\t" + prefix + replacement 1268 } 1269 wrappers.do(func() { 1270 d.output.WriteString(replacement) 1271 }) 1272 } else { 1273 d.writeNode(statement) 1274 } 1275 1276 return statement, nil 1277} 1278 1279func (d *delocation) handleBSS(statement *node32) (*node32, error) { 1280 lastStatement := statement 1281 for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next { 1282 node := skipWS(statement.up) 1283 if node == nil { 1284 d.writeNode(statement) 1285 continue 1286 } 1287 1288 switch node.pegRule { 1289 case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective: 1290 d.writeNode(statement) 1291 1292 case ruleDirective: 1293 directive := node.up 1294 assertNodeType(directive, ruleDirectiveName) 1295 directiveName := d.contents(directive) 1296 if directiveName == "text" || directiveName == "section" || directiveName == "data" { 1297 return lastStatement, nil 1298 } 1299 d.writeNode(statement) 1300 1301 case ruleLabel: 1302 label := node.up 1303 d.writeNode(statement) 1304 1305 if label.pegRule != ruleLocalSymbol { 1306 symbol := d.contents(label) 1307 localSymbol := localTargetName(symbol) 1308 d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol)) 1309 1310 d.bssAccessorsNeeded[symbol] = localSymbol 1311 } 1312 1313 case ruleLabelContainingDirective: 1314 var err error 1315 statement, err = d.processLabelContainingDirective(statement, node.up) 1316 if err != nil { 1317 return nil, err 1318 } 1319 1320 default: 1321 return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement)) 1322 } 1323 } 1324 1325 return lastStatement, nil 1326} 1327 1328func writeAarch64Function(w stringWriter, funcName string, writeContents func(stringWriter)) { 1329 w.WriteString(".p2align 2\n") 1330 w.WriteString(".hidden " + funcName + "\n") 1331 w.WriteString(".type " + funcName + ", @function\n") 1332 w.WriteString(funcName + ":\n") 1333 w.WriteString(".cfi_startproc\n") 1334 // We insert a landing pad (`bti c` instruction) unconditionally at the beginning of 1335 // every generated function so that they can be called indirectly (with `blr` or 1336 // `br x16/x17`). The instruction is encoded in the HINT space as `hint #34` and is 1337 // a no-op on machines or program states not supporting BTI (Branch Target Identification). 1338 // None of the generated function bodies call other functions (with bl or blr), so we only 1339 // insert a landing pad instead of signing and validating $lr with `paciasp` and `autiasp`. 1340 // Normally we would also generate a .note.gnu.property section to annotate the assembly 1341 // file as BTI-compatible, but if the input assembly files are BTI-compatible, they should 1342 // already have those sections so there is no need to add an extra one ourselves. 1343 w.WriteString("\thint #34 // bti c\n") 1344 writeContents(w) 1345 w.WriteString(".cfi_endproc\n") 1346 w.WriteString(".size " + funcName + ", .-" + funcName + "\n") 1347} 1348 1349func transform(w stringWriter, inputs []inputFile) error { 1350 // symbols contains all defined symbols. 1351 symbols := make(map[string]struct{}) 1352 // fileNumbers is the set of IDs seen in .file directives. 1353 fileNumbers := make(map[int]struct{}) 1354 // maxObservedFileNumber contains the largest seen file number in a 1355 // .file directive. Zero is not a valid number. 1356 maxObservedFileNumber := 0 1357 // fileDirectivesContainMD5 is true if the compiler is outputting MD5 1358 // checksums in .file directives. If it does so, then this script needs 1359 // to match that behaviour otherwise warnings result. 1360 fileDirectivesContainMD5 := false 1361 1362 // OPENSSL_ia32cap_get will be synthesized by this script. 1363 symbols["OPENSSL_ia32cap_get"] = struct{}{} 1364 1365 for _, input := range inputs { 1366 forEachPath(input.ast.up, func(node *node32) { 1367 symbol := input.contents[node.begin:node.end] 1368 if _, ok := symbols[symbol]; ok { 1369 panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path)) 1370 } 1371 symbols[symbol] = struct{}{} 1372 }, ruleStatement, ruleLabel, ruleSymbolName) 1373 1374 forEachPath(input.ast.up, func(node *node32) { 1375 assertNodeType(node, ruleLocationDirective) 1376 directive := input.contents[node.begin:node.end] 1377 if !strings.HasPrefix(directive, ".file") { 1378 return 1379 } 1380 parts := strings.Fields(directive) 1381 if len(parts) == 2 { 1382 // This is a .file directive with just a 1383 // filename. Clang appears to generate just one 1384 // of these at the beginning of the output for 1385 // the compilation unit. Ignore it. 1386 return 1387 } 1388 fileNo, err := strconv.Atoi(parts[1]) 1389 if err != nil { 1390 panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive)) 1391 } 1392 1393 if _, ok := fileNumbers[fileNo]; ok { 1394 panic(fmt.Sprintf("Duplicate file number %d observed", fileNo)) 1395 } 1396 fileNumbers[fileNo] = struct{}{} 1397 1398 if fileNo > maxObservedFileNumber { 1399 maxObservedFileNumber = fileNo 1400 } 1401 1402 for _, token := range parts[2:] { 1403 if token == "md5" { 1404 fileDirectivesContainMD5 = true 1405 } 1406 } 1407 }, ruleStatement, ruleLocationDirective) 1408 } 1409 1410 processor := x86_64 1411 if len(inputs) > 0 { 1412 processor = detectProcessor(inputs[0]) 1413 } 1414 1415 commentIndicator := "#" 1416 if processor == aarch64 { 1417 commentIndicator = "//" 1418 } 1419 1420 d := &delocation{ 1421 symbols: symbols, 1422 processor: processor, 1423 commentIndicator: commentIndicator, 1424 output: w, 1425 redirectors: make(map[string]string), 1426 bssAccessorsNeeded: make(map[string]string), 1427 gotExternalsNeeded: make(map[string]struct{}), 1428 gotOffsetsNeeded: make(map[string]struct{}), 1429 gotOffOffsetsNeeded: make(map[string]struct{}), 1430 } 1431 1432 w.WriteString(".text\n") 1433 var fileTrailing string 1434 if fileDirectivesContainMD5 { 1435 fileTrailing = " md5 0x00000000000000000000000000000000" 1436 } 1437 w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing)) 1438 w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1)) 1439 w.WriteString("BORINGSSL_bcm_text_start:\n") 1440 1441 for _, input := range inputs { 1442 if err := d.processInput(input); err != nil { 1443 return err 1444 } 1445 } 1446 1447 w.WriteString(".text\n") 1448 w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1)) 1449 w.WriteString("BORINGSSL_bcm_text_end:\n") 1450 1451 // Emit redirector functions. Each is a single jump instruction. 1452 var redirectorNames []string 1453 for name := range d.redirectors { 1454 redirectorNames = append(redirectorNames, name) 1455 } 1456 sort.Strings(redirectorNames) 1457 1458 for _, name := range redirectorNames { 1459 redirector := d.redirectors[name] 1460 switch d.processor { 1461 case aarch64: 1462 writeAarch64Function(w, redirector, func(w stringWriter) { 1463 w.WriteString("\tb " + name + "\n") 1464 }) 1465 1466 case x86_64: 1467 w.WriteString(".type " + redirector + ", @function\n") 1468 w.WriteString(redirector + ":\n") 1469 w.WriteString("\tjmp\t" + name + "\n") 1470 } 1471 } 1472 1473 var accessorNames []string 1474 for accessor := range d.bssAccessorsNeeded { 1475 accessorNames = append(accessorNames, accessor) 1476 } 1477 sort.Strings(accessorNames) 1478 1479 // Emit BSS accessor functions. Each is a single LEA followed by RET. 1480 for _, name := range accessorNames { 1481 funcName := accessorName(name) 1482 target := d.bssAccessorsNeeded[name] 1483 1484 switch d.processor { 1485 case x86_64: 1486 w.WriteString(".type " + funcName + ", @function\n") 1487 w.WriteString(funcName + ":\n") 1488 w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n") 1489 1490 case aarch64: 1491 writeAarch64Function(w, funcName, func(w stringWriter) { 1492 w.WriteString("\tadrp x0, " + target + "\n") 1493 w.WriteString("\tadd x0, x0, :lo12:" + target + "\n") 1494 w.WriteString("\tret\n") 1495 }) 1496 } 1497 } 1498 1499 switch d.processor { 1500 case aarch64: 1501 externalNames := sortedSet(d.gotExternalsNeeded) 1502 for _, symbol := range externalNames { 1503 writeAarch64Function(w, gotHelperName(symbol), func(w stringWriter) { 1504 w.WriteString("\tadrp x0, :got:" + symbol + "\n") 1505 w.WriteString("\tldr x0, [x0, :got_lo12:" + symbol + "]\n") 1506 w.WriteString("\tret\n") 1507 }) 1508 } 1509 1510 writeAarch64Function(w, ".LOPENSSL_armcap_P_addr", func(w stringWriter) { 1511 w.WriteString("\tadrp x0, OPENSSL_armcap_P\n") 1512 w.WriteString("\tadd x0, x0, :lo12:OPENSSL_armcap_P\n") 1513 w.WriteString("\tret\n") 1514 }) 1515 1516 case x86_64: 1517 externalNames := sortedSet(d.gotExternalsNeeded) 1518 for _, name := range externalNames { 1519 parts := strings.SplitN(name, "@", 2) 1520 symbol, section := parts[0], parts[1] 1521 w.WriteString(".type " + symbol + "_" + section + "_external, @object\n") 1522 w.WriteString(".size " + symbol + "_" + section + "_external, 8\n") 1523 w.WriteString(symbol + "_" + section + "_external:\n") 1524 // Ideally this would be .quad foo@GOTPCREL, but clang's 1525 // assembler cannot emit a 64-bit GOTPCREL relocation. Instead, 1526 // we manually sign-extend the value, knowing that the GOT is 1527 // always at the end, thus foo@GOTPCREL has a positive value. 1528 w.WriteString("\t.long " + symbol + "@" + section + "\n") 1529 w.WriteString("\t.long 0\n") 1530 } 1531 1532 w.WriteString(".type OPENSSL_ia32cap_get, @function\n") 1533 w.WriteString(".globl OPENSSL_ia32cap_get\n") 1534 w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n") 1535 w.WriteString("OPENSSL_ia32cap_get:\n") 1536 w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n") 1537 w.WriteString("\tret\n") 1538 1539 w.WriteString(".extern OPENSSL_ia32cap_P\n") 1540 w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n") 1541 w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n") 1542 w.WriteString("OPENSSL_ia32cap_addr_delta:\n") 1543 w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n") 1544 1545 if d.gotDeltaNeeded { 1546 w.WriteString(".Lboringssl_got_delta:\n") 1547 w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n") 1548 } 1549 1550 for _, name := range sortedSet(d.gotOffsetsNeeded) { 1551 w.WriteString(".Lboringssl_got_" + name + ":\n") 1552 w.WriteString("\t.quad " + name + "@GOT\n") 1553 } 1554 for _, name := range sortedSet(d.gotOffOffsetsNeeded) { 1555 w.WriteString(".Lboringssl_gotoff_" + name + ":\n") 1556 w.WriteString("\t.quad " + name + "@GOTOFF\n") 1557 } 1558 } 1559 1560 w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n") 1561 w.WriteString(".size BORINGSSL_bcm_text_hash, 32\n") 1562 w.WriteString("BORINGSSL_bcm_text_hash:\n") 1563 for _, b := range fipscommon.UninitHashValue { 1564 w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n") 1565 } 1566 1567 return nil 1568} 1569 1570// preprocess runs source through the C preprocessor. 1571func preprocess(cppCommand []string, path string) ([]byte, error) { 1572 var args []string 1573 args = append(args, cppCommand...) 1574 args = append(args, path) 1575 1576 cpp := exec.Command(args[0], args[1:]...) 1577 cpp.Stderr = os.Stderr 1578 var result bytes.Buffer 1579 cpp.Stdout = &result 1580 1581 if err := cpp.Run(); err != nil { 1582 return nil, err 1583 } 1584 1585 return result.Bytes(), nil 1586} 1587 1588func parseInputs(inputs []inputFile, cppCommand []string) error { 1589 for i, input := range inputs { 1590 var contents string 1591 1592 if input.isArchive { 1593 arFile, err := os.Open(input.path) 1594 if err != nil { 1595 return err 1596 } 1597 defer arFile.Close() 1598 1599 ar, err := ar.ParseAR(arFile) 1600 if err != nil { 1601 return err 1602 } 1603 1604 if len(ar) != 1 { 1605 return fmt.Errorf("expected one file in archive, but found %d", len(ar)) 1606 } 1607 1608 for _, c := range ar { 1609 contents = string(c) 1610 } 1611 } else { 1612 var inBytes []byte 1613 var err error 1614 1615 if len(cppCommand) > 0 { 1616 inBytes, err = preprocess(cppCommand, input.path) 1617 } else { 1618 inBytes, err = os.ReadFile(input.path) 1619 } 1620 if err != nil { 1621 return err 1622 } 1623 1624 contents = string(inBytes) 1625 } 1626 1627 asm := Asm{Buffer: contents, Pretty: true} 1628 asm.Init() 1629 if err := asm.Parse(); err != nil { 1630 return fmt.Errorf("error while parsing %q: %s", input.path, err) 1631 } 1632 ast := asm.AST() 1633 1634 inputs[i].contents = contents 1635 inputs[i].ast = ast 1636 } 1637 1638 return nil 1639} 1640 1641// includePathFromHeaderFilePath returns an include directory path based on the 1642// path of a specific header file. It walks up the path and assumes that the 1643// include files are rooted in a directory called "openssl". 1644func includePathFromHeaderFilePath(path string) (string, error) { 1645 dir := path 1646 for { 1647 var file string 1648 dir, file = filepath.Split(dir) 1649 1650 if file == "openssl" { 1651 return dir, nil 1652 } 1653 1654 if len(dir) == 0 { 1655 break 1656 } 1657 dir = dir[:len(dir)-1] 1658 } 1659 1660 return "", fmt.Errorf("failed to find 'openssl' path element in header file path %q", path) 1661} 1662 1663func main() { 1664 // The .a file, if given, is expected to be an archive of textual 1665 // assembly sources. That's odd, but CMake really wants to create 1666 // archive files so it's the only way that we can make it work. 1667 arInput := flag.String("a", "", "Path to a .a file containing assembly sources") 1668 outFile := flag.String("o", "", "Path to output assembly") 1669 ccPath := flag.String("cc", "", "Path to the C compiler for preprocessing inputs") 1670 ccFlags := flag.String("cc-flags", "", "Flags for the C compiler when preprocessing") 1671 1672 flag.Parse() 1673 1674 if len(*outFile) == 0 { 1675 fmt.Fprintf(os.Stderr, "Must give argument to -o.\n") 1676 os.Exit(1) 1677 } 1678 1679 var inputs []inputFile 1680 if len(*arInput) > 0 { 1681 inputs = append(inputs, inputFile{ 1682 path: *arInput, 1683 index: 0, 1684 isArchive: true, 1685 }) 1686 } 1687 1688 includePaths := make(map[string]struct{}) 1689 1690 for i, path := range flag.Args() { 1691 if len(path) == 0 { 1692 continue 1693 } 1694 1695 // Header files are not processed but their path is remembered 1696 // and passed as -I arguments when invoking the preprocessor. 1697 if strings.HasSuffix(path, ".h") { 1698 dir, err := includePathFromHeaderFilePath(path) 1699 if err != nil { 1700 fmt.Fprintf(os.Stderr, "%s\n", err) 1701 os.Exit(1) 1702 } 1703 includePaths[dir] = struct{}{} 1704 continue 1705 } 1706 1707 inputs = append(inputs, inputFile{ 1708 path: path, 1709 index: i + 1, 1710 }) 1711 } 1712 1713 var cppCommand []string 1714 if len(*ccPath) > 0 { 1715 cppCommand = append(cppCommand, *ccPath) 1716 cppCommand = append(cppCommand, strings.Fields(*ccFlags)...) 1717 // Some of ccFlags might be superfluous when running the 1718 // preprocessor, but we don't want the compiler complaining that 1719 // "argument unused during compilation". 1720 cppCommand = append(cppCommand, "-Wno-unused-command-line-argument") 1721 1722 for includePath := range includePaths { 1723 cppCommand = append(cppCommand, "-I"+includePath) 1724 } 1725 1726 // -E requests only preprocessing. 1727 cppCommand = append(cppCommand, "-E") 1728 } 1729 1730 if err := parseInputs(inputs, cppCommand); err != nil { 1731 fmt.Fprintf(os.Stderr, "%s\n", err) 1732 os.Exit(1) 1733 } 1734 1735 out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) 1736 if err != nil { 1737 panic(err) 1738 } 1739 defer out.Close() 1740 1741 if err := transform(out, inputs); err != nil { 1742 fmt.Fprintf(os.Stderr, "%s\n", err) 1743 os.Exit(1) 1744 } 1745} 1746 1747func forEachPath(node *node32, cb func(*node32), rules ...pegRule) { 1748 if node == nil { 1749 return 1750 } 1751 1752 if len(rules) == 0 { 1753 cb(node) 1754 return 1755 } 1756 1757 rule := rules[0] 1758 childRules := rules[1:] 1759 1760 for ; node != nil; node = node.next { 1761 if node.pegRule != rule { 1762 continue 1763 } 1764 1765 if len(childRules) == 0 { 1766 cb(node) 1767 } else { 1768 forEachPath(node.up, cb, childRules...) 1769 } 1770 } 1771} 1772 1773func skipNodes(node *node32, ruleToSkip pegRule) *node32 { 1774 for ; node != nil && node.pegRule == ruleToSkip; node = node.next { 1775 } 1776 return node 1777} 1778 1779func skipWS(node *node32) *node32 { 1780 return skipNodes(node, ruleWS) 1781} 1782 1783func assertNodeType(node *node32, expected pegRule) { 1784 if rule := node.pegRule; rule != expected { 1785 panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected])) 1786 } 1787} 1788 1789type wrapperFunc func(func()) 1790 1791type wrapperStack []wrapperFunc 1792 1793func (w *wrapperStack) do(baseCase func()) { 1794 if len(*w) == 0 { 1795 baseCase() 1796 return 1797 } 1798 1799 wrapper := (*w)[0] 1800 *w = (*w)[1:] 1801 wrapper(func() { w.do(baseCase) }) 1802} 1803 1804// localTargetName returns the name of the local target label for a global 1805// symbol named name. 1806func localTargetName(name string) string { 1807 return ".L" + name + "_local_target" 1808} 1809 1810func isSynthesized(symbol string) bool { 1811 return strings.HasSuffix(symbol, "_bss_get") || 1812 symbol == "OPENSSL_ia32cap_get" || 1813 strings.HasPrefix(symbol, "BORINGSSL_bcm_text_") 1814} 1815 1816func redirectorName(symbol string) string { 1817 return "bcm_redirector_" + symbol 1818} 1819 1820// sectionType returns the type of a section. I.e. a section called “.text.foo” 1821// is a “.text” section. 1822func sectionType(section string) (string, bool) { 1823 if len(section) == 0 || section[0] != '.' { 1824 return "", false 1825 } 1826 1827 i := strings.Index(section[1:], ".") 1828 if i != -1 { 1829 section = section[:i+1] 1830 } 1831 1832 if strings.HasPrefix(section, ".debug_") { 1833 return ".debug", true 1834 } 1835 1836 return section, true 1837} 1838 1839// accessorName returns the name of the accessor function for a BSS symbol 1840// named name. 1841func accessorName(name string) string { 1842 return name + "_bss_get" 1843} 1844 1845func (d *delocation) mapLocalSymbol(symbol string) string { 1846 if d.currentInput.index == 0 { 1847 return symbol 1848 } 1849 return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index) 1850} 1851 1852func detectProcessor(input inputFile) processorType { 1853 for statement := input.ast.up; statement != nil; statement = statement.next { 1854 node := skipNodes(statement.up, ruleWS) 1855 if node == nil || node.pegRule != ruleInstruction { 1856 continue 1857 } 1858 1859 instruction := node.up 1860 instructionName := input.contents[instruction.begin:instruction.end] 1861 1862 switch instructionName { 1863 case "movq", "call", "leaq": 1864 return x86_64 1865 case "str", "bl", "ldr", "st1": 1866 return aarch64 1867 } 1868 } 1869 1870 panic("processed entire input and didn't recognise any instructions.") 1871} 1872 1873func sortedSet(m map[string]struct{}) []string { 1874 ret := make([]string, 0, len(m)) 1875 for key := range m { 1876 ret = append(ret, key) 1877 } 1878 sort.Strings(ret) 1879 return ret 1880} 1881