1// Copyright (c) 2017, Google Inc. 2// 3// Permission to use, copy, modify, and/or distribute this software for any 4// purpose with or without fee is hereby granted, provided that the above 5// copyright notice and this permission notice appear in all copies. 6// 7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15// delocate performs several transformations of textual assembly code. See 16// crypto/fipsmodule/FIPS.md for an overview. 17package main 18 19import ( 20 "bytes" 21 "errors" 22 "flag" 23 "fmt" 24 "os" 25 "os/exec" 26 "path/filepath" 27 "sort" 28 "strconv" 29 "strings" 30 31 "boringssl.googlesource.com/boringssl/util/ar" 32 "boringssl.googlesource.com/boringssl/util/fipstools/fipscommon" 33) 34 35// inputFile represents a textual assembly file. 36type inputFile struct { 37 path string 38 // index is a unique identifier given to this file. It's used for 39 // mapping local symbols. 40 index int 41 // isArchive indicates that the input should be processed as an ar 42 // file. 43 isArchive bool 44 // contents contains the contents of the file. 45 contents string 46 // ast points to the head of the syntax tree. 47 ast *node32 48} 49 50type stringWriter interface { 51 WriteString(string) (int, error) 52} 53 54type processorType int 55 56const ( 57 x86_64 processorType = iota + 1 58 aarch64 59) 60 61// delocation holds the state needed during a delocation operation. 62type delocation struct { 63 processor processorType 64 output stringWriter 65 // commentIndicator starts a comment, e.g. "//" or "#" 66 commentIndicator string 67 68 // symbols is the set of symbols defined in the module. 69 symbols map[string]struct{} 70 // redirectors maps from out-call symbol name to the name of a 71 // redirector function for that symbol. E.g. “memcpy” -> 72 // “bcm_redirector_memcpy”. 73 redirectors map[string]string 74 // bssAccessorsNeeded maps from a BSS symbol name to the symbol that 75 // should be used to reference it. E.g. “P384_data_storage” -> 76 // “P384_data_storage”. 77 bssAccessorsNeeded map[string]string 78 // gotExternalsNeeded is a set of symbol names for which we need 79 // “delta” symbols: symbols that contain the offset from their location 80 // to the memory in question. 81 gotExternalsNeeded map[string]struct{} 82 // gotDeltaNeeded is true if the code needs to load the value of 83 // _GLOBAL_OFFSET_TABLE_. 84 gotDeltaNeeded bool 85 // gotOffsetsNeeded contains the symbols whose @GOT offsets are needed. 86 gotOffsetsNeeded map[string]struct{} 87 // gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed. 88 gotOffOffsetsNeeded map[string]struct{} 89 90 currentInput inputFile 91} 92 93func (d *delocation) contents(node *node32) string { 94 return d.currentInput.contents[node.begin:node.end] 95} 96 97// writeNode writes out an AST node. 98func (d *delocation) writeNode(node *node32) { 99 if _, err := d.output.WriteString(d.contents(node)); err != nil { 100 panic(err) 101 } 102} 103 104func (d *delocation) writeCommentedNode(node *node32) { 105 line := d.contents(node) 106 if _, err := d.output.WriteString(d.commentIndicator + " WAS " + strings.TrimSpace(line) + "\n"); err != nil { 107 panic(err) 108 } 109} 110 111func locateError(err error, with *node32, in inputFile) error { 112 posMap := translatePositions([]rune(in.contents), []int{int(with.begin)}) 113 var line int 114 for _, pos := range posMap { 115 line = pos.line 116 } 117 118 return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err) 119} 120 121func (d *delocation) processInput(input inputFile) (err error) { 122 d.currentInput = input 123 124 var origStatement *node32 125 defer func() { 126 if err := recover(); err != nil { 127 panic(locateError(fmt.Errorf("%s", err), origStatement, input)) 128 } 129 }() 130 131 for statement := input.ast.up; statement != nil; statement = statement.next { 132 assertNodeType(statement, ruleStatement) 133 origStatement = statement 134 135 node := skipWS(statement.up) 136 if node == nil { 137 d.writeNode(statement) 138 continue 139 } 140 141 switch node.pegRule { 142 case ruleGlobalDirective, ruleComment, ruleLocationDirective: 143 d.writeNode(statement) 144 case ruleDirective: 145 statement, err = d.processDirective(statement, node.up) 146 case ruleLabelContainingDirective: 147 statement, err = d.processLabelContainingDirective(statement, node.up) 148 case ruleLabel: 149 statement, err = d.processLabel(statement, node.up) 150 case ruleInstruction: 151 switch d.processor { 152 case x86_64: 153 statement, err = d.processIntelInstruction(statement, node.up) 154 case aarch64: 155 statement, err = d.processAarch64Instruction(statement, node.up) 156 default: 157 panic("unknown processor") 158 } 159 default: 160 panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule])) 161 } 162 163 if err != nil { 164 return locateError(err, origStatement, input) 165 } 166 } 167 168 return nil 169} 170 171func (d *delocation) processDirective(statement, directive *node32) (*node32, error) { 172 assertNodeType(directive, ruleDirectiveName) 173 directiveName := d.contents(directive) 174 175 var args []string 176 forEachPath(directive, func(arg *node32) { 177 // If the argument is a quoted string, use the raw contents. 178 // (Note that this doesn't unescape the string, but that's not 179 // needed so far. 180 if arg.up != nil { 181 arg = arg.up 182 assertNodeType(arg, ruleQuotedArg) 183 if arg.up == nil { 184 args = append(args, "") 185 return 186 } 187 arg = arg.up 188 assertNodeType(arg, ruleQuotedText) 189 } 190 args = append(args, d.contents(arg)) 191 }, ruleArgs, ruleArg) 192 193 switch directiveName { 194 case "comm", "lcomm": 195 if len(args) < 1 { 196 return nil, errors.New("comm directive has no arguments") 197 } 198 d.bssAccessorsNeeded[args[0]] = args[0] 199 d.writeNode(statement) 200 201 case "data": 202 // ASAN and some versions of MSAN are adding a .data section, 203 // and adding references to symbols within it to the code. We 204 // will have to work around this in the future. 205 return nil, errors.New(".data section found in module") 206 207 case "section": 208 section := args[0] 209 210 if section == ".data.rel.ro" { 211 // In a normal build, this is an indication of a 212 // problem but any references from the module to this 213 // section will result in a relocation and thus will 214 // break the integrity check. ASAN can generate these 215 // sections and so we will likely have to work around 216 // that in the future. 217 return nil, errors.New(".data.rel.ro section found in module") 218 } 219 220 sectionType, ok := sectionType(section) 221 if !ok { 222 // Unknown sections are permitted in order to be robust 223 // to different compiler modes. 224 d.writeNode(statement) 225 break 226 } 227 228 switch sectionType { 229 case ".rodata", ".text": 230 // Move .rodata to .text so it may be accessed without 231 // a relocation. GCC with -fmerge-constants will place 232 // strings into separate sections, so we move all 233 // sections named like .rodata. Also move .text.startup 234 // so the self-test function is also in the module. 235 d.writeCommentedNode(statement) 236 d.output.WriteString(".text\n") 237 238 case ".data": 239 // See above about .data 240 return nil, errors.New(".data section found in module") 241 242 case ".init_array", ".fini_array", ".ctors", ".dtors": 243 // init_array/ctors/dtors contains function 244 // pointers to constructor/destructor 245 // functions. These contain relocations, but 246 // they're in a different section anyway. 247 d.writeNode(statement) 248 break 249 250 case ".debug", ".note": 251 d.writeNode(statement) 252 break 253 254 case ".bss": 255 d.writeNode(statement) 256 return d.handleBSS(statement) 257 } 258 259 default: 260 d.writeNode(statement) 261 } 262 263 return statement, nil 264} 265 266func (d *delocation) processSymbolExpr(expr *node32, b *strings.Builder) bool { 267 changed := false 268 assertNodeType(expr, ruleSymbolExpr) 269 270 for expr != nil { 271 atom := expr.up 272 assertNodeType(atom, ruleSymbolAtom) 273 274 for term := atom.up; term != nil; term = skipWS(term.next) { 275 if term.pegRule == ruleSymbolExpr { 276 changed = d.processSymbolExpr(term, b) || changed 277 continue 278 } 279 280 if term.pegRule != ruleLocalSymbol { 281 b.WriteString(d.contents(term)) 282 continue 283 } 284 285 oldSymbol := d.contents(term) 286 newSymbol := d.mapLocalSymbol(oldSymbol) 287 if newSymbol != oldSymbol { 288 changed = true 289 } 290 291 b.WriteString(newSymbol) 292 } 293 294 next := skipWS(atom.next) 295 if next == nil { 296 break 297 } 298 assertNodeType(next, ruleSymbolOperator) 299 b.WriteString(d.contents(next)) 300 next = skipWS(next.next) 301 assertNodeType(next, ruleSymbolExpr) 302 expr = next 303 } 304 return changed 305} 306 307func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) { 308 // The symbols within directives need to be mapped so that local 309 // symbols in two different .s inputs don't collide. 310 changed := false 311 assertNodeType(directive, ruleLabelContainingDirectiveName) 312 name := d.contents(directive) 313 314 node := directive.next 315 assertNodeType(node, ruleWS) 316 317 node = node.next 318 assertNodeType(node, ruleSymbolArgs) 319 320 var args []string 321 for node = skipWS(node.up); node != nil; node = skipWS(node.next) { 322 assertNodeType(node, ruleSymbolArg) 323 arg := node.up 324 assertNodeType(arg, ruleSymbolExpr) 325 326 var b strings.Builder 327 changed = d.processSymbolExpr(arg, &b) || changed 328 329 args = append(args, b.String()) 330 } 331 332 if !changed { 333 d.writeNode(statement) 334 } else { 335 d.writeCommentedNode(statement) 336 d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n") 337 } 338 339 return statement, nil 340} 341 342func (d *delocation) processLabel(statement, label *node32) (*node32, error) { 343 symbol := d.contents(label) 344 345 switch label.pegRule { 346 case ruleLocalLabel: 347 d.output.WriteString(symbol + ":\n") 348 case ruleLocalSymbol: 349 // symbols need to be mapped so that local symbols from two 350 // different .s inputs don't collide. 351 d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n") 352 case ruleSymbolName: 353 d.output.WriteString(localTargetName(symbol) + ":\n") 354 d.writeNode(statement) 355 default: 356 return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule]) 357 } 358 359 return statement, nil 360} 361 362// instructionArgs collects all the arguments to an instruction. 363func instructionArgs(node *node32) (argNodes []*node32) { 364 for node = skipWS(node); node != nil; node = skipWS(node.next) { 365 assertNodeType(node, ruleInstructionArg) 366 argNodes = append(argNodes, node.up) 367 } 368 369 return argNodes 370} 371 372// Aarch64 support 373 374// gotHelperName returns the name of a synthesised function that returns an 375// address from the GOT. 376func gotHelperName(symbol string) string { 377 return ".Lboringssl_loadgot_" + symbol 378} 379 380// loadAarch64Address emits instructions to put the address of |symbol| 381// (optionally adjusted by |offsetStr|) into |targetReg|. 382func (d *delocation) loadAarch64Address(statement *node32, targetReg string, symbol string, offsetStr string) (*node32, error) { 383 // There are two paths here: either the symbol is known to be local in which 384 // case adr is used to get the address (within 1MiB), or a GOT reference is 385 // really needed in which case the code needs to jump to a helper function. 386 // 387 // A helper function is needed because using code appears to be the only way 388 // to load a GOT value. On other platforms we have ".quad foo@GOT" outside of 389 // the module, but on Aarch64 that results in a "COPY" relocation and linker 390 // comments suggest it's a weird hack. So, for each GOT symbol needed, we emit 391 // a function outside of the module that returns the address from the GOT in 392 // x0. 393 394 d.writeCommentedNode(statement) 395 396 _, isKnown := d.symbols[symbol] 397 isLocal := strings.HasPrefix(symbol, ".L") 398 if isKnown || isLocal || isSynthesized(symbol) { 399 if isLocal { 400 symbol = d.mapLocalSymbol(symbol) 401 } else if isKnown { 402 symbol = localTargetName(symbol) 403 } 404 405 d.output.WriteString("\tadr " + targetReg + ", " + symbol + offsetStr + "\n") 406 407 return statement, nil 408 } 409 410 if len(offsetStr) != 0 { 411 panic("non-zero offset for helper-based reference") 412 } 413 414 var helperFunc string 415 if symbol == "OPENSSL_armcap_P" { 416 helperFunc = ".LOPENSSL_armcap_P_addr" 417 } else { 418 // GOT helpers also dereference the GOT entry, thus the subsequent ldr 419 // instruction, which would normally do the dereferencing, needs to be 420 // dropped. GOT helpers have to include the dereference because the 421 // assembler doesn't support ":got_lo12:foo" offsets except in an ldr 422 // instruction. 423 d.gotExternalsNeeded[symbol] = struct{}{} 424 helperFunc = gotHelperName(symbol) 425 } 426 427 // Clear the red-zone. I can't find a definitive answer about whether Linux 428 // Aarch64 includes a red-zone, but Microsoft has a 16-byte one and Apple a 429 // 128-byte one. Thus conservatively clear a 128-byte red-zone. 430 d.output.WriteString("\tsub sp, sp, 128\n") 431 432 // Save x0 (which will be stomped by the return value) and the link register 433 // to the stack. Then save the program counter into the link register and 434 // jump to the helper function. 435 d.output.WriteString("\tstp x0, lr, [sp, #-16]!\n") 436 d.output.WriteString("\tbl " + helperFunc + "\n") 437 438 if targetReg == "x0" { 439 // If the target happens to be x0 then restore the link register from the 440 // stack and send the saved value of x0 to the zero register. 441 d.output.WriteString("\tldp xzr, lr, [sp], #16\n") 442 } else { 443 // Otherwise move the result into place and restore registers. 444 d.output.WriteString("\tmov " + targetReg + ", x0\n") 445 d.output.WriteString("\tldp x0, lr, [sp], #16\n") 446 } 447 448 // Revert the red-zone adjustment. 449 d.output.WriteString("\tadd sp, sp, 128\n") 450 451 return statement, nil 452} 453 454func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) { 455 assertNodeType(instruction, ruleInstructionName) 456 instructionName := d.contents(instruction) 457 458 argNodes := instructionArgs(instruction.next) 459 460 switch instructionName { 461 case "ccmn", "ccmp", "cinc", "cinv", "cneg", "csel", "cset", "csetm", "csinc", "csinv", "csneg": 462 // These functions are special because they take a condition-code name as 463 // an argument and that looks like a symbol reference. 464 d.writeNode(statement) 465 return statement, nil 466 467 case "mrs": 468 // Functions that take special register names also look like a symbol 469 // reference to the parser. 470 d.writeNode(statement) 471 return statement, nil 472 473 case "adrp": 474 // adrp always generates a relocation, even when the target symbol is in the 475 // same segment, because the page-offset of the code isn't known until link 476 // time. Thus adrp instructions are turned into either adr instructions 477 // (limiting the module to 1MiB offsets) or calls to helper functions, both of 478 // which load the full address. Later instructions, which add the low 12 bits 479 // of offset, are tweaked to remove the offset since it's already included. 480 // Loads of GOT symbols are slightly more complex because it's not possible to 481 // avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr 482 // instruction, which would normally do the dereferencing, is dropped 483 // completely. (Or turned into a mov if it targets a different register.) 484 assertNodeType(argNodes[0], ruleRegisterOrConstant) 485 targetReg := d.contents(argNodes[0]) 486 if !strings.HasPrefix(targetReg, "x") { 487 panic("adrp targetting register " + targetReg + ", which has the wrong size") 488 } 489 490 var symbol, offset string 491 switch argNodes[1].pegRule { 492 case ruleGOTSymbolOffset: 493 symbol = d.contents(argNodes[1].up) 494 case ruleMemoryRef: 495 assertNodeType(argNodes[1].up, ruleSymbolRef) 496 node, empty := d.gatherOffsets(argNodes[1].up.up, "") 497 if len(empty) != 0 { 498 panic("prefix offsets found for adrp") 499 } 500 symbol = d.contents(node) 501 _, offset = d.gatherOffsets(node.next, "") 502 default: 503 panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule]) 504 } 505 506 return d.loadAarch64Address(statement, targetReg, symbol, offset) 507 } 508 509 var args []string 510 changed := false 511 512 for _, arg := range argNodes { 513 fullArg := arg 514 515 switch arg.pegRule { 516 case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak: 517 args = append(args, d.contents(fullArg)) 518 519 case ruleGOTSymbolOffset: 520 // These should only be arguments to adrp and thus unreachable. 521 panic("unreachable") 522 523 case ruleMemoryRef: 524 ref := arg.up 525 526 switch ref.pegRule { 527 case ruleSymbolRef: 528 // This is a branch. Either the target needs to be written to a local 529 // version of the symbol to ensure that no relocations are emitted, or 530 // it needs to jump to a redirector function. 531 symbol, offset, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up) 532 changed = didChange 533 534 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 535 symbol = localTargetName(symbol) 536 changed = true 537 } else if !symbolIsLocal && !isSynthesized(symbol) { 538 redirector := redirectorName(symbol) 539 d.redirectors[symbol] = redirector 540 symbol = redirector 541 changed = true 542 } else if didChange && symbolIsLocal && len(offset) > 0 { 543 // didChange is set when the inputFile index is not 0; which is the index of the 544 // first file copied to the output, which is the generated assembly of bcm.c. 545 // In subsequently copied assembly files, local symbols are changed by appending (BCM_ + index) 546 // in order to ensure they don't collide. `index` gets incremented per file. 547 // If there is offset after the symbol, append the `offset`. 548 symbol = symbol + offset 549 } 550 551 args = append(args, symbol) 552 553 case ruleARMBaseIndexScale: 554 parts := ref.up 555 assertNodeType(parts, ruleARMRegister) 556 baseAddrReg := d.contents(parts) 557 parts = skipWS(parts.next) 558 559 // Only two forms need special handling. First there's memory references 560 // like "[x*, :got_lo12:foo]". The base register here will have been the 561 // target of an adrp instruction to load the page address, but the adrp 562 // will have turned into loading the full address *and dereferencing it*, 563 // above. Thus this instruction needs to be dropped otherwise we'll be 564 // dereferencing twice. 565 // 566 // Second there are forms like "[x*, :lo12:foo]" where the code has used 567 // adrp to load the page address into x*. That adrp will have been turned 568 // into loading the full address so just the offset needs to be dropped. 569 570 if parts != nil { 571 if parts.pegRule == ruleARMGOTLow12 { 572 if instructionName != "ldr" { 573 panic("Symbol reference outside of ldr instruction") 574 } 575 576 if skipWS(parts.next) != nil || parts.up.next != nil { 577 panic("can't handle tweak or post-increment with symbol references") 578 } 579 580 // The GOT helper already dereferenced the entry so, at most, just a mov 581 // is needed to put things in the right register. 582 d.writeCommentedNode(statement) 583 if baseAddrReg != args[0] { 584 d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n") 585 } 586 return statement, nil 587 } else if parts.pegRule == ruleLow12BitsSymbolRef { 588 if instructionName != "ldr" { 589 panic("Symbol reference outside of ldr instruction") 590 } 591 592 if skipWS(parts.next) != nil || parts.up.next != nil { 593 panic("can't handle tweak or post-increment with symbol references") 594 } 595 596 // Suppress the offset; adrp loaded the full address. 597 args = append(args, "["+baseAddrReg+"]") 598 changed = true 599 continue 600 } 601 } 602 603 args = append(args, d.contents(fullArg)) 604 605 case ruleLow12BitsSymbolRef: 606 // These are the second instruction in a pair: 607 // adrp x0, symbol // Load the page address into x0 608 // add x1, x0, :lo12:symbol // Adds the page offset. 609 // 610 // The adrp instruction will have been turned into a sequence that loads 611 // the full address, above, thus the offset is turned into zero. If that 612 // results in the instruction being a nop, then it is deleted. 613 if instructionName != "add" { 614 panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName)) 615 } 616 617 if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") { 618 panic("address arithmetic with incorrectly sized register") 619 } 620 621 if args[0] == args[1] { 622 d.writeCommentedNode(statement) 623 return statement, nil 624 } 625 626 args = append(args, "#0") 627 changed = true 628 629 default: 630 panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule])) 631 } 632 633 default: 634 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 635 } 636 } 637 638 if changed { 639 d.writeCommentedNode(statement) 640 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 641 d.output.WriteString(replacement) 642 } else { 643 d.writeNode(statement) 644 } 645 646 return statement, nil 647} 648 649func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) { 650 for symRef != nil && symRef.pegRule == ruleOffset { 651 offset := d.contents(symRef) 652 if offset[0] != '+' && offset[0] != '-' { 653 offset = "+" + offset 654 } 655 offsets = offsets + offset 656 symRef = symRef.next 657 } 658 return symRef, offsets 659} 660 661func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) { 662 if memRef.pegRule != ruleSymbolRef { 663 return "", "", "", false, false, memRef 664 } 665 666 symRef := memRef.up 667 nextRef = memRef.next 668 669 // (Offset* '+')? 670 symRef, offset = d.gatherOffsets(symRef, offset) 671 672 // (LocalSymbol / SymbolName) 673 symbol = d.contents(symRef) 674 if symRef.pegRule == ruleLocalSymbol { 675 symbolIsLocal = true 676 mapped := d.mapLocalSymbol(symbol) 677 if mapped != symbol { 678 symbol = mapped 679 didChange = true 680 } 681 } 682 symRef = symRef.next 683 684 // Offset* 685 symRef, offset = d.gatherOffsets(symRef, offset) 686 687 // ('@' Section / Offset*)? 688 if symRef != nil { 689 assertNodeType(symRef, ruleSection) 690 section = d.contents(symRef) 691 symRef = symRef.next 692 693 symRef, offset = d.gatherOffsets(symRef, offset) 694 } 695 696 if symRef != nil { 697 panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule])) 698 } 699 700 return 701} 702 703/* Intel */ 704 705type instructionType int 706 707const ( 708 instrPush instructionType = iota 709 instrMove 710 // instrTransformingMove is essentially a move, but it performs some 711 // transformation of the data during the process. 712 instrTransformingMove 713 instrJump 714 instrConditionalMove 715 // instrCombine merges the source and destination in some fashion, for example 716 // a 2-operand bitwise operation. 717 instrCombine 718 // instrMemoryVectorCombine is similer to instrCombine, but the source 719 // register must be a memory reference and the destination register 720 // must be a vector register. 721 instrMemoryVectorCombine 722 // instrThreeArg merges two sources into a destination in some fashion. 723 instrThreeArg 724 // instrCompare takes two arguments and writes outputs to the flags register. 725 instrCompare 726 instrOther 727) 728 729func classifyInstruction(instr string, args []*node32) instructionType { 730 switch instr { 731 case "push", "pushq": 732 if len(args) == 1 { 733 return instrPush 734 } 735 736 case "mov", "movq", "vmovq", "movsd", "vmovsd": 737 if len(args) == 2 { 738 return instrMove 739 } 740 741 case "cmovneq", "cmoveq": 742 if len(args) == 2 { 743 return instrConditionalMove 744 } 745 746 case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo": 747 if len(args) == 1 { 748 return instrJump 749 } 750 751 case "orq", "andq", "xorq": 752 if len(args) == 2 { 753 return instrCombine 754 } 755 756 case "cmpq": 757 if len(args) == 2 { 758 return instrCompare 759 } 760 761 case "sarxq", "shlxq", "shrxq": 762 if len(args) == 3 { 763 return instrThreeArg 764 } 765 766 case "vpbroadcastq": 767 if len(args) == 2 { 768 return instrTransformingMove 769 } 770 771 case "movlps", "movhps": 772 if len(args) == 2 { 773 return instrMemoryVectorCombine 774 } 775 } 776 777 return instrOther 778} 779 780func push(w stringWriter) wrapperFunc { 781 return func(k func()) { 782 w.WriteString("\tpushq %rax\n") 783 k() 784 w.WriteString("\txchg %rax, (%rsp)\n") 785 } 786} 787 788func compare(w stringWriter, instr, a, b string) wrapperFunc { 789 return func(k func()) { 790 k() 791 w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b)) 792 } 793} 794 795func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc { 796 d.gotExternalsNeeded[symbol+"@"+section] = struct{}{} 797 798 return func(k func()) { 799 if !redzoneCleared { 800 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 801 } 802 w.WriteString("\tpushf\n") 803 w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination)) 804 w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination)) 805 w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination)) 806 w.WriteString("\tpopf\n") 807 if !redzoneCleared { 808 w.WriteString("\tleaq\t128(%rsp), %rsp\n") 809 } 810 } 811} 812 813func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc { 814 return func(k func()) { 815 if !redzoneCleared { 816 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 817 defer w.WriteString("\tleaq 128(%rsp), %rsp\n") 818 } 819 w.WriteString("\tpushfq\n") 820 k() 821 w.WriteString("\tpopfq\n") 822 } 823} 824 825func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) { 826 candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"} 827 828 var reg string 829NextCandidate: 830 for _, candidate := range candidates { 831 for _, avoid := range avoidRegs { 832 if candidate == avoid { 833 continue NextCandidate 834 } 835 } 836 837 reg = candidate 838 break 839 } 840 841 if len(reg) == 0 { 842 panic("too many excluded registers") 843 } 844 845 return func(k func()) { 846 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 847 w.WriteString("\tpushq " + reg + "\n") 848 k() 849 w.WriteString("\tpopq " + reg + "\n") 850 w.WriteString("\tleaq 128(%rsp), %rsp\n") 851 }, reg 852} 853 854func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc { 855 return func(k func()) { 856 k() 857 prefix := "" 858 if isAVX { 859 prefix = "v" 860 } 861 w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n") 862 } 863} 864 865func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc { 866 return func(k func()) { 867 k() 868 w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n") 869 } 870} 871 872func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 873 return func(k func()) { 874 k() 875 w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n") 876 } 877} 878 879func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc { 880 return func(k func()) { 881 k() 882 w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n") 883 } 884} 885 886func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 887 return func(k func()) { 888 k() 889 // These instructions can only read from memory, so push 890 // tempReg and read from the stack. Note we assume the red zone 891 // was previously cleared by saveRegister(). 892 w.WriteString("\tpushq " + source + "\n") 893 w.WriteString("\t" + instructionName + " (%rsp), " + dest + "\n") 894 w.WriteString("\tleaq 8(%rsp), %rsp\n") 895 } 896} 897 898func isValidLEATarget(reg string) bool { 899 return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm") 900} 901 902func undoConditionalMove(w stringWriter, instr string) wrapperFunc { 903 var invertedCondition string 904 905 switch instr { 906 case "cmoveq": 907 invertedCondition = "ne" 908 case "cmovneq": 909 invertedCondition = "e" 910 default: 911 panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr)) 912 } 913 914 return func(k func()) { 915 w.WriteString("\tj" + invertedCondition + " 999f\n") 916 k() 917 w.WriteString("999:\n") 918 } 919} 920 921func (d *delocation) isRIPRelative(node *node32) bool { 922 return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)" 923} 924 925func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) { 926 assertNodeType(instruction, ruleInstructionName) 927 instructionName := d.contents(instruction) 928 929 argNodes := instructionArgs(instruction.next) 930 931 var wrappers wrapperStack 932 var args []string 933 changed := false 934 935Args: 936 for i, arg := range argNodes { 937 fullArg := arg 938 isIndirect := false 939 940 if arg.pegRule == ruleIndirectionIndicator { 941 arg = arg.next 942 isIndirect = true 943 } 944 945 switch arg.pegRule { 946 case ruleRegisterOrConstant, ruleLocalLabelRef: 947 args = append(args, d.contents(fullArg)) 948 949 case ruleMemoryRef: 950 symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up) 951 changed = didChange 952 953 if symbol == "OPENSSL_ia32cap_P" && section == "" { 954 if instructionName != "leaq" { 955 return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName) 956 } 957 958 if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 { 959 return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName) 960 } 961 962 target := argNodes[1] 963 assertNodeType(target, ruleRegisterOrConstant) 964 reg := d.contents(target) 965 966 if !strings.HasPrefix(reg, "%r") { 967 return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg) 968 } 969 970 changed = true 971 972 // Flag-altering instructions (i.e. addq) are going to be used so the 973 // flags need to be preserved. 974 wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */)) 975 976 wrappers = append(wrappers, func(k func()) { 977 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n") 978 d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n") 979 }) 980 981 break Args 982 } 983 984 switch section { 985 case "": 986 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 987 symbol = localTargetName(symbol) 988 changed = true 989 } 990 991 case "PLT": 992 if classifyInstruction(instructionName, argNodes) != instrJump { 993 return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName) 994 } 995 996 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 997 symbol = localTargetName(symbol) 998 changed = true 999 } else if !symbolIsLocal && !isSynthesized(symbol) { 1000 // Unknown symbol via PLT is an 1001 // out-call from the module, e.g. 1002 // memcpy. 1003 d.redirectors[symbol+"@"+section] = redirectorName(symbol) 1004 symbol = redirectorName(symbol) 1005 } 1006 1007 changed = true 1008 1009 case "GOTPCREL": 1010 if len(offset) > 0 { 1011 return nil, errors.New("loading from GOT with offset is unsupported") 1012 } 1013 if !d.isRIPRelative(memRef) { 1014 return nil, errors.New("GOT access must be IP-relative") 1015 } 1016 1017 useGOT := false 1018 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1019 symbol = localTargetName(symbol) 1020 changed = true 1021 } else if !isSynthesized(symbol) { 1022 useGOT = true 1023 } 1024 1025 classification := classifyInstruction(instructionName, argNodes) 1026 if classification != instrThreeArg && classification != instrCompare && i != 0 { 1027 return nil, errors.New("GOT access must be source operand") 1028 } 1029 1030 // Reduce the instruction to movq symbol@GOTPCREL, targetReg. 1031 var targetReg string 1032 var redzoneCleared bool 1033 switch classification { 1034 case instrPush: 1035 wrappers = append(wrappers, push(d.output)) 1036 targetReg = "%rax" 1037 case instrConditionalMove: 1038 wrappers = append(wrappers, undoConditionalMove(d.output, instructionName)) 1039 fallthrough 1040 case instrMove: 1041 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1042 targetReg = d.contents(argNodes[1]) 1043 case instrCompare: 1044 otherSource := d.contents(argNodes[i^1]) 1045 saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource}) 1046 redzoneCleared = true 1047 wrappers = append(wrappers, saveRegWrapper) 1048 if i == 0 { 1049 wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource)) 1050 } else { 1051 wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg)) 1052 } 1053 targetReg = tempReg 1054 case instrTransformingMove: 1055 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1056 targetReg = d.contents(argNodes[1]) 1057 wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg)) 1058 if isValidLEATarget(targetReg) { 1059 return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.") 1060 } 1061 case instrCombine: 1062 targetReg = d.contents(argNodes[1]) 1063 if !isValidLEATarget(targetReg) { 1064 return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers") 1065 } 1066 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg}) 1067 redzoneCleared = true 1068 wrappers = append(wrappers, saveRegWrapper) 1069 1070 wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg)) 1071 targetReg = tempReg 1072 case instrMemoryVectorCombine: 1073 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1074 targetReg = d.contents(argNodes[1]) 1075 if isValidLEATarget(targetReg) { 1076 return nil, errors.New("target register must be an XMM register") 1077 } 1078 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1079 wrappers = append(wrappers, saveRegWrapper) 1080 redzoneCleared = true 1081 wrappers = append(wrappers, memoryVectorCombineOp(d.output, instructionName, tempReg, targetReg)) 1082 targetReg = tempReg 1083 case instrThreeArg: 1084 if n := len(argNodes); n != 3 { 1085 return nil, fmt.Errorf("three-argument instruction has %d arguments", n) 1086 } 1087 if i != 0 && i != 1 { 1088 return nil, errors.New("GOT access must be from source operand") 1089 } 1090 targetReg = d.contents(argNodes[2]) 1091 1092 otherSource := d.contents(argNodes[1]) 1093 if i == 1 { 1094 otherSource = d.contents(argNodes[0]) 1095 } 1096 1097 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource}) 1098 redzoneCleared = true 1099 wrappers = append(wrappers, saveRegWrapper) 1100 1101 if i == 0 { 1102 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg)) 1103 } else { 1104 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg)) 1105 } 1106 targetReg = tempReg 1107 default: 1108 return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName) 1109 } 1110 1111 if !isValidLEATarget(targetReg) { 1112 // Sometimes the compiler will load from the GOT to an 1113 // XMM register, which is not a valid target of an LEA 1114 // instruction. 1115 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1116 wrappers = append(wrappers, saveRegWrapper) 1117 isAVX := strings.HasPrefix(instructionName, "v") 1118 wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg)) 1119 targetReg = tempReg 1120 if redzoneCleared { 1121 return nil, fmt.Errorf("internal error: Red Zone was already cleared") 1122 } 1123 redzoneCleared = true 1124 } 1125 1126 if symbol == "OPENSSL_ia32cap_P" { 1127 // Flag-altering instructions (i.e. addq) are going to be used so the 1128 // flags need to be preserved. 1129 wrappers = append(wrappers, saveFlags(d.output, redzoneCleared)) 1130 wrappers = append(wrappers, func(k func()) { 1131 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n") 1132 d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n") 1133 }) 1134 } else if useGOT { 1135 wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared)) 1136 } else { 1137 wrappers = append(wrappers, func(k func()) { 1138 d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg)) 1139 }) 1140 } 1141 changed = true 1142 break Args 1143 1144 default: 1145 return nil, fmt.Errorf("Unknown section type %q", section) 1146 } 1147 1148 if !changed && len(section) > 0 { 1149 panic("section was not handled") 1150 } 1151 section = "" 1152 1153 argStr := "" 1154 if isIndirect { 1155 argStr += "*" 1156 } 1157 argStr += symbol 1158 argStr += offset 1159 1160 for ; memRef != nil; memRef = memRef.next { 1161 argStr += d.contents(memRef) 1162 } 1163 1164 args = append(args, argStr) 1165 1166 case ruleGOTAddress: 1167 if instructionName != "leaq" { 1168 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ used outside of lea") 1169 } 1170 if i != 0 || len(argNodes) != 2 { 1171 return nil, fmt.Errorf("Load of _GLOBAL_OFFSET_TABLE_ address didn't have expected form") 1172 } 1173 d.gotDeltaNeeded = true 1174 changed = true 1175 targetReg := d.contents(argNodes[1]) 1176 args = append(args, ".Lboringssl_got_delta(%rip)") 1177 wrappers = append(wrappers, func(k func()) { 1178 k() 1179 d.output.WriteString(fmt.Sprintf("\taddq .Lboringssl_got_delta(%%rip), %s\n", targetReg)) 1180 }) 1181 1182 case ruleGOTLocation: 1183 if instructionName != "movabsq" { 1184 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq") 1185 } 1186 if i != 0 || len(argNodes) != 2 { 1187 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form") 1188 } 1189 1190 d.gotDeltaNeeded = true 1191 changed = true 1192 instructionName = "movq" 1193 assertNodeType(arg.up, ruleLocalSymbol) 1194 baseSymbol := d.mapLocalSymbol(d.contents(arg.up)) 1195 targetReg := d.contents(argNodes[1]) 1196 args = append(args, ".Lboringssl_got_delta(%rip)") 1197 wrappers = append(wrappers, func(k func()) { 1198 k() 1199 d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg)) 1200 }) 1201 1202 case ruleGOTSymbolOffset: 1203 if instructionName != "movabsq" { 1204 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq") 1205 } 1206 if i != 0 || len(argNodes) != 2 { 1207 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form") 1208 } 1209 1210 assertNodeType(arg.up, ruleSymbolName) 1211 symbol := d.contents(arg.up) 1212 if strings.HasPrefix(symbol, ".L") { 1213 symbol = d.mapLocalSymbol(symbol) 1214 } 1215 targetReg := d.contents(argNodes[1]) 1216 1217 var prefix string 1218 isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF") 1219 if isGOTOFF { 1220 prefix = "gotoff" 1221 d.gotOffOffsetsNeeded[symbol] = struct{}{} 1222 } else { 1223 prefix = "got" 1224 d.gotOffsetsNeeded[symbol] = struct{}{} 1225 } 1226 changed = true 1227 1228 wrappers = append(wrappers, func(k func()) { 1229 // Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time 1230 // of writing) emits 64-bit relocations anyway, so the following four bytes 1231 // get stomped. Thus we use 64-bit offsets. 1232 d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg)) 1233 }) 1234 1235 default: 1236 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 1237 } 1238 } 1239 1240 if changed { 1241 d.writeCommentedNode(statement) 1242 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 1243 wrappers.do(func() { 1244 d.output.WriteString(replacement) 1245 }) 1246 } else { 1247 d.writeNode(statement) 1248 } 1249 1250 return statement, nil 1251} 1252 1253func (d *delocation) handleBSS(statement *node32) (*node32, error) { 1254 lastStatement := statement 1255 for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next { 1256 node := skipWS(statement.up) 1257 if node == nil { 1258 d.writeNode(statement) 1259 continue 1260 } 1261 1262 switch node.pegRule { 1263 case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective: 1264 d.writeNode(statement) 1265 1266 case ruleDirective: 1267 directive := node.up 1268 assertNodeType(directive, ruleDirectiveName) 1269 directiveName := d.contents(directive) 1270 if directiveName == "text" || directiveName == "section" || directiveName == "data" { 1271 return lastStatement, nil 1272 } 1273 d.writeNode(statement) 1274 1275 case ruleLabel: 1276 label := node.up 1277 d.writeNode(statement) 1278 1279 if label.pegRule != ruleLocalSymbol { 1280 symbol := d.contents(label) 1281 localSymbol := localTargetName(symbol) 1282 d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol)) 1283 1284 d.bssAccessorsNeeded[symbol] = localSymbol 1285 } 1286 1287 case ruleLabelContainingDirective: 1288 var err error 1289 statement, err = d.processLabelContainingDirective(statement, node.up) 1290 if err != nil { 1291 return nil, err 1292 } 1293 1294 default: 1295 return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement)) 1296 } 1297 } 1298 1299 return lastStatement, nil 1300} 1301 1302func writeAarch64Function(w stringWriter, funcName string, writeContents func(stringWriter)) { 1303 w.WriteString(".p2align 2\n") 1304 w.WriteString(".hidden " + funcName + "\n") 1305 w.WriteString(".type " + funcName + ", @function\n") 1306 w.WriteString(funcName + ":\n") 1307 w.WriteString(".cfi_startproc\n") 1308 // We insert a landing pad (`bti c` instruction) unconditionally at the beginning of 1309 // every generated function so that they can be called indirectly (with `blr` or 1310 // `br x16/x17`). The instruction is encoded in the HINT space as `hint #34` and is 1311 // a no-op on machines or program states not supporting BTI (Branch Target Identification). 1312 // None of the generated function bodies call other functions (with bl or blr), so we only 1313 // insert a landing pad instead of signing and validating $lr with `paciasp` and `autiasp`. 1314 // Normally we would also generate a .note.gnu.property section to annotate the assembly 1315 // file as BTI-compatible, but if the input assembly files are BTI-compatible, they should 1316 // already have those sections so there is no need to add an extra one ourselves. 1317 w.WriteString("\thint #34 // bti c\n") 1318 writeContents(w) 1319 w.WriteString(".cfi_endproc\n") 1320 w.WriteString(".size " + funcName + ", .-" + funcName + "\n") 1321} 1322 1323func transform(w stringWriter, inputs []inputFile) error { 1324 // symbols contains all defined symbols. 1325 symbols := make(map[string]struct{}) 1326 // fileNumbers is the set of IDs seen in .file directives. 1327 fileNumbers := make(map[int]struct{}) 1328 // maxObservedFileNumber contains the largest seen file number in a 1329 // .file directive. Zero is not a valid number. 1330 maxObservedFileNumber := 0 1331 // fileDirectivesContainMD5 is true if the compiler is outputting MD5 1332 // checksums in .file directives. If it does so, then this script needs 1333 // to match that behaviour otherwise warnings result. 1334 fileDirectivesContainMD5 := false 1335 1336 // OPENSSL_ia32cap_get will be synthesized by this script. 1337 symbols["OPENSSL_ia32cap_get"] = struct{}{} 1338 1339 for _, input := range inputs { 1340 forEachPath(input.ast.up, func(node *node32) { 1341 symbol := input.contents[node.begin:node.end] 1342 if _, ok := symbols[symbol]; ok { 1343 panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path)) 1344 } 1345 symbols[symbol] = struct{}{} 1346 }, ruleStatement, ruleLabel, ruleSymbolName) 1347 1348 forEachPath(input.ast.up, func(node *node32) { 1349 assertNodeType(node, ruleLocationDirective) 1350 directive := input.contents[node.begin:node.end] 1351 if !strings.HasPrefix(directive, ".file") { 1352 return 1353 } 1354 parts := strings.Fields(directive) 1355 if len(parts) == 2 { 1356 // This is a .file directive with just a 1357 // filename. Clang appears to generate just one 1358 // of these at the beginning of the output for 1359 // the compilation unit. Ignore it. 1360 return 1361 } 1362 fileNo, err := strconv.Atoi(parts[1]) 1363 if err != nil { 1364 panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive)) 1365 } 1366 1367 if _, ok := fileNumbers[fileNo]; ok { 1368 panic(fmt.Sprintf("Duplicate file number %d observed", fileNo)) 1369 } 1370 fileNumbers[fileNo] = struct{}{} 1371 1372 if fileNo > maxObservedFileNumber { 1373 maxObservedFileNumber = fileNo 1374 } 1375 1376 for _, token := range parts[2:] { 1377 if token == "md5" { 1378 fileDirectivesContainMD5 = true 1379 } 1380 } 1381 }, ruleStatement, ruleLocationDirective) 1382 } 1383 1384 processor := x86_64 1385 if len(inputs) > 0 { 1386 processor = detectProcessor(inputs[0]) 1387 } 1388 1389 commentIndicator := "#" 1390 if processor == aarch64 { 1391 commentIndicator = "//" 1392 } 1393 1394 d := &delocation{ 1395 symbols: symbols, 1396 processor: processor, 1397 commentIndicator: commentIndicator, 1398 output: w, 1399 redirectors: make(map[string]string), 1400 bssAccessorsNeeded: make(map[string]string), 1401 gotExternalsNeeded: make(map[string]struct{}), 1402 gotOffsetsNeeded: make(map[string]struct{}), 1403 gotOffOffsetsNeeded: make(map[string]struct{}), 1404 } 1405 1406 w.WriteString(".text\n") 1407 var fileTrailing string 1408 if fileDirectivesContainMD5 { 1409 fileTrailing = " md5 0x00000000000000000000000000000000" 1410 } 1411 w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing)) 1412 w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1)) 1413 w.WriteString("BORINGSSL_bcm_text_start:\n") 1414 1415 for _, input := range inputs { 1416 if err := d.processInput(input); err != nil { 1417 return err 1418 } 1419 } 1420 1421 w.WriteString(".text\n") 1422 w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1)) 1423 w.WriteString("BORINGSSL_bcm_text_end:\n") 1424 1425 // Emit redirector functions. Each is a single jump instruction. 1426 var redirectorNames []string 1427 for name := range d.redirectors { 1428 redirectorNames = append(redirectorNames, name) 1429 } 1430 sort.Strings(redirectorNames) 1431 1432 for _, name := range redirectorNames { 1433 redirector := d.redirectors[name] 1434 switch d.processor { 1435 case aarch64: 1436 writeAarch64Function(w, redirector, func(w stringWriter) { 1437 w.WriteString("\tb " + name + "\n") 1438 }) 1439 1440 case x86_64: 1441 w.WriteString(".type " + redirector + ", @function\n") 1442 w.WriteString(redirector + ":\n") 1443 w.WriteString("\tjmp\t" + name + "\n") 1444 } 1445 } 1446 1447 var accessorNames []string 1448 for accessor := range d.bssAccessorsNeeded { 1449 accessorNames = append(accessorNames, accessor) 1450 } 1451 sort.Strings(accessorNames) 1452 1453 // Emit BSS accessor functions. Each is a single LEA followed by RET. 1454 for _, name := range accessorNames { 1455 funcName := accessorName(name) 1456 target := d.bssAccessorsNeeded[name] 1457 1458 switch d.processor { 1459 case x86_64: 1460 w.WriteString(".type " + funcName + ", @function\n") 1461 w.WriteString(funcName + ":\n") 1462 w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n") 1463 1464 case aarch64: 1465 writeAarch64Function(w, funcName, func(w stringWriter) { 1466 w.WriteString("\tadrp x0, " + target + "\n") 1467 w.WriteString("\tadd x0, x0, :lo12:" + target + "\n") 1468 w.WriteString("\tret\n") 1469 }) 1470 } 1471 } 1472 1473 switch d.processor { 1474 case aarch64: 1475 externalNames := sortedSet(d.gotExternalsNeeded) 1476 for _, symbol := range externalNames { 1477 writeAarch64Function(w, gotHelperName(symbol), func(w stringWriter) { 1478 w.WriteString("\tadrp x0, :got:" + symbol + "\n") 1479 w.WriteString("\tldr x0, [x0, :got_lo12:" + symbol + "]\n") 1480 w.WriteString("\tret\n") 1481 }) 1482 } 1483 1484 writeAarch64Function(w, ".LOPENSSL_armcap_P_addr", func(w stringWriter) { 1485 w.WriteString("\tadrp x0, OPENSSL_armcap_P\n") 1486 w.WriteString("\tadd x0, x0, :lo12:OPENSSL_armcap_P\n") 1487 w.WriteString("\tret\n") 1488 }) 1489 1490 case x86_64: 1491 externalNames := sortedSet(d.gotExternalsNeeded) 1492 for _, name := range externalNames { 1493 parts := strings.SplitN(name, "@", 2) 1494 symbol, section := parts[0], parts[1] 1495 w.WriteString(".type " + symbol + "_" + section + "_external, @object\n") 1496 w.WriteString(".size " + symbol + "_" + section + "_external, 8\n") 1497 w.WriteString(symbol + "_" + section + "_external:\n") 1498 // Ideally this would be .quad foo@GOTPCREL, but clang's 1499 // assembler cannot emit a 64-bit GOTPCREL relocation. Instead, 1500 // we manually sign-extend the value, knowing that the GOT is 1501 // always at the end, thus foo@GOTPCREL has a positive value. 1502 w.WriteString("\t.long " + symbol + "@" + section + "\n") 1503 w.WriteString("\t.long 0\n") 1504 } 1505 1506 w.WriteString(".type OPENSSL_ia32cap_get, @function\n") 1507 w.WriteString(".globl OPENSSL_ia32cap_get\n") 1508 w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n") 1509 w.WriteString("OPENSSL_ia32cap_get:\n") 1510 w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n") 1511 w.WriteString("\tret\n") 1512 1513 w.WriteString(".extern OPENSSL_ia32cap_P\n") 1514 w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n") 1515 w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n") 1516 w.WriteString("OPENSSL_ia32cap_addr_delta:\n") 1517 w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n") 1518 1519 if d.gotDeltaNeeded { 1520 w.WriteString(".Lboringssl_got_delta:\n") 1521 w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n") 1522 } 1523 1524 for _, name := range sortedSet(d.gotOffsetsNeeded) { 1525 w.WriteString(".Lboringssl_got_" + name + ":\n") 1526 w.WriteString("\t.quad " + name + "@GOT\n") 1527 } 1528 for _, name := range sortedSet(d.gotOffOffsetsNeeded) { 1529 w.WriteString(".Lboringssl_gotoff_" + name + ":\n") 1530 w.WriteString("\t.quad " + name + "@GOTOFF\n") 1531 } 1532 } 1533 1534 w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n") 1535 w.WriteString(".size BORINGSSL_bcm_text_hash, 32\n") 1536 w.WriteString("BORINGSSL_bcm_text_hash:\n") 1537 for _, b := range fipscommon.UninitHashValue { 1538 w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n") 1539 } 1540 1541 return nil 1542} 1543 1544// preprocess runs source through the C preprocessor. 1545func preprocess(cppCommand []string, path string) ([]byte, error) { 1546 var args []string 1547 args = append(args, cppCommand...) 1548 args = append(args, path) 1549 1550 cpp := exec.Command(args[0], args[1:]...) 1551 cpp.Stderr = os.Stderr 1552 var result bytes.Buffer 1553 cpp.Stdout = &result 1554 1555 if err := cpp.Run(); err != nil { 1556 return nil, err 1557 } 1558 1559 return result.Bytes(), nil 1560} 1561 1562func parseInputs(inputs []inputFile, cppCommand []string) error { 1563 for i, input := range inputs { 1564 var contents string 1565 1566 if input.isArchive { 1567 arFile, err := os.Open(input.path) 1568 if err != nil { 1569 return err 1570 } 1571 defer arFile.Close() 1572 1573 ar, err := ar.ParseAR(arFile) 1574 if err != nil { 1575 return err 1576 } 1577 1578 if len(ar) != 1 { 1579 return fmt.Errorf("expected one file in archive, but found %d", len(ar)) 1580 } 1581 1582 for _, c := range ar { 1583 contents = string(c) 1584 } 1585 } else { 1586 var inBytes []byte 1587 var err error 1588 1589 if len(cppCommand) > 0 { 1590 inBytes, err = preprocess(cppCommand, input.path) 1591 } else { 1592 inBytes, err = os.ReadFile(input.path) 1593 } 1594 if err != nil { 1595 return err 1596 } 1597 1598 contents = string(inBytes) 1599 } 1600 1601 asm := Asm{Buffer: contents, Pretty: true} 1602 asm.Init() 1603 if err := asm.Parse(); err != nil { 1604 return fmt.Errorf("error while parsing %q: %s", input.path, err) 1605 } 1606 ast := asm.AST() 1607 1608 inputs[i].contents = contents 1609 inputs[i].ast = ast 1610 } 1611 1612 return nil 1613} 1614 1615// includePathFromHeaderFilePath returns an include directory path based on the 1616// path of a specific header file. It walks up the path and assumes that the 1617// include files are rooted in a directory called "openssl". 1618func includePathFromHeaderFilePath(path string) (string, error) { 1619 dir := path 1620 for { 1621 var file string 1622 dir, file = filepath.Split(dir) 1623 1624 if file == "openssl" { 1625 return dir, nil 1626 } 1627 1628 if len(dir) == 0 { 1629 break 1630 } 1631 dir = dir[:len(dir)-1] 1632 } 1633 1634 return "", fmt.Errorf("failed to find 'openssl' path element in header file path %q", path) 1635} 1636 1637func main() { 1638 // The .a file, if given, is expected to be an archive of textual 1639 // assembly sources. That's odd, but CMake really wants to create 1640 // archive files so it's the only way that we can make it work. 1641 arInput := flag.String("a", "", "Path to a .a file containing assembly sources") 1642 outFile := flag.String("o", "", "Path to output assembly") 1643 ccPath := flag.String("cc", "", "Path to the C compiler for preprocessing inputs") 1644 ccFlags := flag.String("cc-flags", "", "Flags for the C compiler when preprocessing") 1645 1646 flag.Parse() 1647 1648 if len(*outFile) == 0 { 1649 fmt.Fprintf(os.Stderr, "Must give argument to -o.\n") 1650 os.Exit(1) 1651 } 1652 1653 var inputs []inputFile 1654 if len(*arInput) > 0 { 1655 inputs = append(inputs, inputFile{ 1656 path: *arInput, 1657 index: 0, 1658 isArchive: true, 1659 }) 1660 } 1661 1662 includePaths := make(map[string]struct{}) 1663 1664 for i, path := range flag.Args() { 1665 if len(path) == 0 { 1666 continue 1667 } 1668 1669 // Header files are not processed but their path is remembered 1670 // and passed as -I arguments when invoking the preprocessor. 1671 if strings.HasSuffix(path, ".h") { 1672 dir, err := includePathFromHeaderFilePath(path) 1673 if err != nil { 1674 fmt.Fprintf(os.Stderr, "%s\n", err) 1675 os.Exit(1) 1676 } 1677 includePaths[dir] = struct{}{} 1678 continue 1679 } 1680 1681 inputs = append(inputs, inputFile{ 1682 path: path, 1683 index: i + 1, 1684 }) 1685 } 1686 1687 var cppCommand []string 1688 if len(*ccPath) > 0 { 1689 cppCommand = append(cppCommand, *ccPath) 1690 cppCommand = append(cppCommand, strings.Fields(*ccFlags)...) 1691 // Some of ccFlags might be superfluous when running the 1692 // preprocessor, but we don't want the compiler complaining that 1693 // "argument unused during compilation". 1694 cppCommand = append(cppCommand, "-Wno-unused-command-line-argument") 1695 1696 for includePath := range includePaths { 1697 cppCommand = append(cppCommand, "-I"+includePath) 1698 } 1699 1700 // -E requests only preprocessing. 1701 cppCommand = append(cppCommand, "-E") 1702 } 1703 1704 if err := parseInputs(inputs, cppCommand); err != nil { 1705 fmt.Fprintf(os.Stderr, "%s\n", err) 1706 os.Exit(1) 1707 } 1708 1709 out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) 1710 if err != nil { 1711 panic(err) 1712 } 1713 defer out.Close() 1714 1715 if err := transform(out, inputs); err != nil { 1716 fmt.Fprintf(os.Stderr, "%s\n", err) 1717 os.Exit(1) 1718 } 1719} 1720 1721func forEachPath(node *node32, cb func(*node32), rules ...pegRule) { 1722 if node == nil { 1723 return 1724 } 1725 1726 if len(rules) == 0 { 1727 cb(node) 1728 return 1729 } 1730 1731 rule := rules[0] 1732 childRules := rules[1:] 1733 1734 for ; node != nil; node = node.next { 1735 if node.pegRule != rule { 1736 continue 1737 } 1738 1739 if len(childRules) == 0 { 1740 cb(node) 1741 } else { 1742 forEachPath(node.up, cb, childRules...) 1743 } 1744 } 1745} 1746 1747func skipNodes(node *node32, ruleToSkip pegRule) *node32 { 1748 for ; node != nil && node.pegRule == ruleToSkip; node = node.next { 1749 } 1750 return node 1751} 1752 1753func skipWS(node *node32) *node32 { 1754 return skipNodes(node, ruleWS) 1755} 1756 1757func assertNodeType(node *node32, expected pegRule) { 1758 if rule := node.pegRule; rule != expected { 1759 panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected])) 1760 } 1761} 1762 1763type wrapperFunc func(func()) 1764 1765type wrapperStack []wrapperFunc 1766 1767func (w *wrapperStack) do(baseCase func()) { 1768 if len(*w) == 0 { 1769 baseCase() 1770 return 1771 } 1772 1773 wrapper := (*w)[0] 1774 *w = (*w)[1:] 1775 wrapper(func() { w.do(baseCase) }) 1776} 1777 1778// localTargetName returns the name of the local target label for a global 1779// symbol named name. 1780func localTargetName(name string) string { 1781 return ".L" + name + "_local_target" 1782} 1783 1784func isSynthesized(symbol string) bool { 1785 return strings.HasSuffix(symbol, "_bss_get") || 1786 symbol == "OPENSSL_ia32cap_get" || 1787 strings.HasPrefix(symbol, "BORINGSSL_bcm_text_") 1788} 1789 1790func redirectorName(symbol string) string { 1791 return "bcm_redirector_" + symbol 1792} 1793 1794// sectionType returns the type of a section. I.e. a section called “.text.foo” 1795// is a “.text” section. 1796func sectionType(section string) (string, bool) { 1797 if len(section) == 0 || section[0] != '.' { 1798 return "", false 1799 } 1800 1801 i := strings.Index(section[1:], ".") 1802 if i != -1 { 1803 section = section[:i+1] 1804 } 1805 1806 if strings.HasPrefix(section, ".debug_") { 1807 return ".debug", true 1808 } 1809 1810 return section, true 1811} 1812 1813// accessorName returns the name of the accessor function for a BSS symbol 1814// named name. 1815func accessorName(name string) string { 1816 return name + "_bss_get" 1817} 1818 1819func (d *delocation) mapLocalSymbol(symbol string) string { 1820 if d.currentInput.index == 0 { 1821 return symbol 1822 } 1823 return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index) 1824} 1825 1826func detectProcessor(input inputFile) processorType { 1827 for statement := input.ast.up; statement != nil; statement = statement.next { 1828 node := skipNodes(statement.up, ruleWS) 1829 if node == nil || node.pegRule != ruleInstruction { 1830 continue 1831 } 1832 1833 instruction := node.up 1834 instructionName := input.contents[instruction.begin:instruction.end] 1835 1836 switch instructionName { 1837 case "movq", "call", "leaq": 1838 return x86_64 1839 case "str", "bl", "ldr", "st1": 1840 return aarch64 1841 } 1842 } 1843 1844 panic("processed entire input and didn't recognise any instructions.") 1845} 1846 1847func sortedSet(m map[string]struct{}) []string { 1848 ret := make([]string, 0, len(m)) 1849 for key := range m { 1850 ret = append(ret, key) 1851 } 1852 sort.Strings(ret) 1853 return ret 1854} 1855