1// Copyright (c) 2017, Google Inc. 2// 3// Permission to use, copy, modify, and/or distribute this software for any 4// purpose with or without fee is hereby granted, provided that the above 5// copyright notice and this permission notice appear in all copies. 6// 7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15// delocate performs several transformations of textual assembly code. See 16// crypto/fipsmodule/FIPS.md for an overview. 17package main 18 19import ( 20 "bytes" 21 "errors" 22 "flag" 23 "fmt" 24 "os" 25 "os/exec" 26 "path/filepath" 27 "sort" 28 "strconv" 29 "strings" 30 31 "boringssl.googlesource.com/boringssl/util/ar" 32 "boringssl.googlesource.com/boringssl/util/fipstools/fipscommon" 33) 34 35// inputFile represents a textual assembly file. 36type inputFile struct { 37 path string 38 // index is a unique identifer given to this file. It's used for 39 // mapping local symbols. 40 index int 41 // isArchive indicates that the input should be processed as an ar 42 // file. 43 isArchive bool 44 // contents contains the contents of the file. 45 contents string 46 // ast points to the head of the syntax tree. 47 ast *node32 48} 49 50type stringWriter interface { 51 WriteString(string) (int, error) 52} 53 54type processorType int 55 56const ( 57 x86_64 processorType = iota + 1 58 aarch64 59) 60 61// delocation holds the state needed during a delocation operation. 62type delocation struct { 63 processor processorType 64 output stringWriter 65 // commentIndicator starts a comment, e.g. "//" or "#" 66 commentIndicator string 67 68 // symbols is the set of symbols defined in the module. 69 symbols map[string]struct{} 70 // redirectors maps from out-call symbol name to the name of a 71 // redirector function for that symbol. E.g. “memcpy” -> 72 // “bcm_redirector_memcpy”. 73 redirectors map[string]string 74 // bssAccessorsNeeded maps from a BSS symbol name to the symbol that 75 // should be used to reference it. E.g. “P384_data_storage” -> 76 // “P384_data_storage”. 77 bssAccessorsNeeded map[string]string 78 // gotExternalsNeeded is a set of symbol names for which we need 79 // “delta” symbols: symbols that contain the offset from their location 80 // to the memory in question. 81 gotExternalsNeeded map[string]struct{} 82 // gotDeltaNeeded is true if the code needs to load the value of 83 // _GLOBAL_OFFSET_TABLE_. 84 gotDeltaNeeded bool 85 // gotOffsetsNeeded contains the symbols whose @GOT offsets are needed. 86 gotOffsetsNeeded map[string]struct{} 87 // gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed. 88 gotOffOffsetsNeeded map[string]struct{} 89 90 currentInput inputFile 91} 92 93func (d *delocation) contents(node *node32) string { 94 return d.currentInput.contents[node.begin:node.end] 95} 96 97// writeNode writes out an AST node. 98func (d *delocation) writeNode(node *node32) { 99 if _, err := d.output.WriteString(d.contents(node)); err != nil { 100 panic(err) 101 } 102} 103 104func (d *delocation) writeCommentedNode(node *node32) { 105 line := d.contents(node) 106 if _, err := d.output.WriteString(d.commentIndicator + " WAS " + strings.TrimSpace(line) + "\n"); err != nil { 107 panic(err) 108 } 109} 110 111func locateError(err error, with *node32, in inputFile) error { 112 posMap := translatePositions([]rune(in.contents), []int{int(with.begin)}) 113 var line int 114 for _, pos := range posMap { 115 line = pos.line 116 } 117 118 return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err) 119} 120 121func (d *delocation) processInput(input inputFile) (err error) { 122 d.currentInput = input 123 124 var origStatement *node32 125 defer func() { 126 if err := recover(); err != nil { 127 panic(locateError(fmt.Errorf("%s", err), origStatement, input)) 128 } 129 }() 130 131 for statement := input.ast.up; statement != nil; statement = statement.next { 132 assertNodeType(statement, ruleStatement) 133 origStatement = statement 134 135 node := skipWS(statement.up) 136 if node == nil { 137 d.writeNode(statement) 138 continue 139 } 140 141 switch node.pegRule { 142 case ruleGlobalDirective, ruleComment, ruleLocationDirective: 143 d.writeNode(statement) 144 case ruleDirective: 145 statement, err = d.processDirective(statement, node.up) 146 case ruleLabelContainingDirective: 147 statement, err = d.processLabelContainingDirective(statement, node.up) 148 case ruleLabel: 149 statement, err = d.processLabel(statement, node.up) 150 case ruleInstruction: 151 switch d.processor { 152 case x86_64: 153 statement, err = d.processIntelInstruction(statement, node.up) 154 case aarch64: 155 statement, err = d.processAarch64Instruction(statement, node.up) 156 default: 157 panic("unknown processor") 158 } 159 default: 160 panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule])) 161 } 162 163 if err != nil { 164 return locateError(err, origStatement, input) 165 } 166 } 167 168 return nil 169} 170 171func (d *delocation) processDirective(statement, directive *node32) (*node32, error) { 172 assertNodeType(directive, ruleDirectiveName) 173 directiveName := d.contents(directive) 174 175 var args []string 176 forEachPath(directive, func(arg *node32) { 177 // If the argument is a quoted string, use the raw contents. 178 // (Note that this doesn't unescape the string, but that's not 179 // needed so far. 180 if arg.up != nil { 181 arg = arg.up 182 assertNodeType(arg, ruleQuotedArg) 183 if arg.up == nil { 184 args = append(args, "") 185 return 186 } 187 arg = arg.up 188 assertNodeType(arg, ruleQuotedText) 189 } 190 args = append(args, d.contents(arg)) 191 }, ruleArgs, ruleArg) 192 193 switch directiveName { 194 case "comm", "lcomm": 195 if len(args) < 1 { 196 return nil, errors.New("comm directive has no arguments") 197 } 198 d.bssAccessorsNeeded[args[0]] = args[0] 199 d.writeNode(statement) 200 201 case "data": 202 // ASAN and some versions of MSAN are adding a .data section, 203 // and adding references to symbols within it to the code. We 204 // will have to work around this in the future. 205 return nil, errors.New(".data section found in module") 206 207 case "section": 208 section := args[0] 209 210 if section == ".data.rel.ro" { 211 // In a normal build, this is an indication of a 212 // problem but any references from the module to this 213 // section will result in a relocation and thus will 214 // break the integrity check. ASAN can generate these 215 // sections and so we will likely have to work around 216 // that in the future. 217 return nil, errors.New(".data.rel.ro section found in module") 218 } 219 220 sectionType, ok := sectionType(section) 221 if !ok { 222 // Unknown sections are permitted in order to be robust 223 // to different compiler modes. 224 d.writeNode(statement) 225 break 226 } 227 228 switch sectionType { 229 case ".rodata", ".text": 230 // Move .rodata to .text so it may be accessed without 231 // a relocation. GCC with -fmerge-constants will place 232 // strings into separate sections, so we move all 233 // sections named like .rodata. Also move .text.startup 234 // so the self-test function is also in the module. 235 d.writeCommentedNode(statement) 236 d.output.WriteString(".text\n") 237 238 case ".data": 239 // See above about .data 240 return nil, errors.New(".data section found in module") 241 242 case ".init_array", ".fini_array", ".ctors", ".dtors": 243 // init_array/ctors/dtors contains function 244 // pointers to constructor/destructor 245 // functions. These contain relocations, but 246 // they're in a different section anyway. 247 d.writeNode(statement) 248 break 249 250 case ".debug", ".note": 251 d.writeNode(statement) 252 break 253 254 case ".bss": 255 d.writeNode(statement) 256 return d.handleBSS(statement) 257 } 258 259 default: 260 d.writeNode(statement) 261 } 262 263 return statement, nil 264} 265 266func (d *delocation) processSymbolExpr(expr *node32, b *strings.Builder) bool { 267 changed := false 268 assertNodeType(expr, ruleSymbolExpr) 269 270 for expr != nil { 271 atom := expr.up 272 assertNodeType(atom, ruleSymbolAtom) 273 274 for term := atom.up; term != nil; term = skipWS(term.next) { 275 if term.pegRule == ruleSymbolExpr { 276 changed = d.processSymbolExpr(term, b) || changed 277 continue 278 } 279 280 if term.pegRule != ruleLocalSymbol { 281 b.WriteString(d.contents(term)) 282 continue 283 } 284 285 oldSymbol := d.contents(term) 286 newSymbol := d.mapLocalSymbol(oldSymbol) 287 if newSymbol != oldSymbol { 288 changed = true 289 } 290 291 b.WriteString(newSymbol) 292 } 293 294 next := skipWS(atom.next) 295 if next == nil { 296 break 297 } 298 assertNodeType(next, ruleSymbolOperator) 299 b.WriteString(d.contents(next)) 300 next = skipWS(next.next) 301 assertNodeType(next, ruleSymbolExpr) 302 expr = next 303 } 304 return changed 305} 306 307func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) { 308 // The symbols within directives need to be mapped so that local 309 // symbols in two different .s inputs don't collide. 310 changed := false 311 assertNodeType(directive, ruleLabelContainingDirectiveName) 312 name := d.contents(directive) 313 314 node := directive.next 315 assertNodeType(node, ruleWS) 316 317 node = node.next 318 assertNodeType(node, ruleSymbolArgs) 319 320 var args []string 321 for node = skipWS(node.up); node != nil; node = skipWS(node.next) { 322 assertNodeType(node, ruleSymbolArg) 323 arg := node.up 324 assertNodeType(arg, ruleSymbolExpr) 325 326 var b strings.Builder 327 changed = d.processSymbolExpr(arg, &b) || changed 328 329 args = append(args, b.String()) 330 } 331 332 if !changed { 333 d.writeNode(statement) 334 } else { 335 d.writeCommentedNode(statement) 336 d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n") 337 } 338 339 return statement, nil 340} 341 342func (d *delocation) processLabel(statement, label *node32) (*node32, error) { 343 symbol := d.contents(label) 344 345 switch label.pegRule { 346 case ruleLocalLabel: 347 d.output.WriteString(symbol + ":\n") 348 case ruleLocalSymbol: 349 // symbols need to be mapped so that local symbols from two 350 // different .s inputs don't collide. 351 d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n") 352 case ruleSymbolName: 353 d.output.WriteString(localTargetName(symbol) + ":\n") 354 d.writeNode(statement) 355 default: 356 return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule]) 357 } 358 359 return statement, nil 360} 361 362// instructionArgs collects all the arguments to an instruction. 363func instructionArgs(node *node32) (argNodes []*node32) { 364 for node = skipWS(node); node != nil; node = skipWS(node.next) { 365 assertNodeType(node, ruleInstructionArg) 366 argNodes = append(argNodes, node.up) 367 } 368 369 return argNodes 370} 371 372// Aarch64 support 373 374// gotHelperName returns the name of a synthesised function that returns an 375// address from the GOT. 376func gotHelperName(symbol string) string { 377 return ".Lboringssl_loadgot_" + symbol 378} 379 380// loadAarch64Address emits instructions to put the address of |symbol| 381// (optionally adjusted by |offsetStr|) into |targetReg|. 382func (d *delocation) loadAarch64Address(statement *node32, targetReg string, symbol string, offsetStr string) (*node32, error) { 383 // There are two paths here: either the symbol is known to be local in which 384 // case adr is used to get the address (within 1MiB), or a GOT reference is 385 // really needed in which case the code needs to jump to a helper function. 386 // 387 // A helper function is needed because using code appears to be the only way 388 // to load a GOT value. On other platforms we have ".quad foo@GOT" outside of 389 // the module, but on Aarch64 that results in a "COPY" relocation and linker 390 // comments suggest it's a weird hack. So, for each GOT symbol needed, we emit 391 // a function outside of the module that returns the address from the GOT in 392 // x0. 393 394 d.writeCommentedNode(statement) 395 396 _, isKnown := d.symbols[symbol] 397 isLocal := strings.HasPrefix(symbol, ".L") 398 if isKnown || isLocal || isSynthesized(symbol) { 399 if isLocal { 400 symbol = d.mapLocalSymbol(symbol) 401 } else if isKnown { 402 symbol = localTargetName(symbol) 403 } 404 405 d.output.WriteString("\tadr " + targetReg + ", " + symbol + offsetStr + "\n") 406 407 return statement, nil 408 } 409 410 if len(offsetStr) != 0 { 411 panic("non-zero offset for helper-based reference") 412 } 413 414 var helperFunc string 415 if symbol == "OPENSSL_armcap_P" { 416 helperFunc = ".LOPENSSL_armcap_P_addr" 417 } else { 418 // GOT helpers also dereference the GOT entry, thus the subsequent ldr 419 // instruction, which would normally do the dereferencing, needs to be 420 // dropped. GOT helpers have to include the dereference because the 421 // assembler doesn't support ":got_lo12:foo" offsets except in an ldr 422 // instruction. 423 d.gotExternalsNeeded[symbol] = struct{}{} 424 helperFunc = gotHelperName(symbol) 425 } 426 427 // Clear the red-zone. I can't find a definitive answer about whether Linux 428 // Aarch64 includes a red-zone, but Microsoft has a 16-byte one and Apple a 429 // 128-byte one. Thus conservatively clear a 128-byte red-zone. 430 d.output.WriteString("\tsub sp, sp, 128\n") 431 432 // Save x0 (which will be stomped by the return value) and the link register 433 // to the stack. Then save the program counter into the link register and 434 // jump to the helper function. 435 d.output.WriteString("\tstp x0, lr, [sp, #-16]!\n") 436 d.output.WriteString("\tbl " + helperFunc + "\n") 437 438 if targetReg == "x0" { 439 // If the target happens to be x0 then restore the link register from the 440 // stack and send the saved value of x0 to the zero register. 441 d.output.WriteString("\tldp xzr, lr, [sp], #16\n") 442 } else { 443 // Otherwise move the result into place and restore registers. 444 d.output.WriteString("\tmov " + targetReg + ", x0\n") 445 d.output.WriteString("\tldp x0, lr, [sp], #16\n") 446 } 447 448 // Revert the red-zone adjustment. 449 d.output.WriteString("\tadd sp, sp, 128\n") 450 451 return statement, nil 452} 453 454func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) { 455 assertNodeType(instruction, ruleInstructionName) 456 instructionName := d.contents(instruction) 457 458 argNodes := instructionArgs(instruction.next) 459 460 switch instructionName { 461 case "ccmn", "ccmp", "cinc", "cinv", "cneg", "csel", "cset", "csetm", "csinc", "csinv", "csneg": 462 // These functions are special because they take a condition-code name as 463 // an argument and that looks like a symbol reference. 464 d.writeNode(statement) 465 return statement, nil 466 467 case "mrs": 468 // Functions that take special register names also look like a symbol 469 // reference to the parser. 470 d.writeNode(statement) 471 return statement, nil 472 473 case "adrp": 474 // adrp always generates a relocation, even when the target symbol is in the 475 // same segment, because the page-offset of the code isn't known until link 476 // time. Thus adrp instructions are turned into either adr instructions 477 // (limiting the module to 1MiB offsets) or calls to helper functions, both of 478 // which load the full address. Later instructions, which add the low 12 bits 479 // of offset, are tweaked to remove the offset since it's already included. 480 // Loads of GOT symbols are slightly more complex because it's not possible to 481 // avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr 482 // instruction, which would normally do the dereferencing, is dropped 483 // completely. (Or turned into a mov if it targets a different register.) 484 assertNodeType(argNodes[0], ruleRegisterOrConstant) 485 targetReg := d.contents(argNodes[0]) 486 if !strings.HasPrefix(targetReg, "x") { 487 panic("adrp targetting register " + targetReg + ", which has the wrong size") 488 } 489 490 var symbol, offset string 491 switch argNodes[1].pegRule { 492 case ruleGOTSymbolOffset: 493 symbol = d.contents(argNodes[1].up) 494 case ruleMemoryRef: 495 assertNodeType(argNodes[1].up, ruleSymbolRef) 496 node, empty := d.gatherOffsets(argNodes[1].up.up, "") 497 if len(empty) != 0 { 498 panic("prefix offsets found for adrp") 499 } 500 symbol = d.contents(node) 501 _, offset = d.gatherOffsets(node.next, "") 502 default: 503 panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule]) 504 } 505 506 return d.loadAarch64Address(statement, targetReg, symbol, offset) 507 } 508 509 var args []string 510 changed := false 511 512 for _, arg := range argNodes { 513 fullArg := arg 514 515 switch arg.pegRule { 516 case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak: 517 args = append(args, d.contents(fullArg)) 518 519 case ruleGOTSymbolOffset: 520 // These should only be arguments to adrp and thus unreachable. 521 panic("unreachable") 522 523 case ruleMemoryRef: 524 ref := arg.up 525 526 switch ref.pegRule { 527 case ruleSymbolRef: 528 // This is a branch. Either the target needs to be written to a local 529 // version of the symbol to ensure that no relocations are emitted, or 530 // it needs to jump to a redirector function. 531 symbol, offset, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up) 532 changed = didChange 533 534 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 535 symbol = localTargetName(symbol) 536 changed = true 537 } else if !symbolIsLocal && !isSynthesized(symbol) { 538 redirector := redirectorName(symbol) 539 d.redirectors[symbol] = redirector 540 symbol = redirector 541 changed = true 542 } else if didChange && symbolIsLocal && len(offset) > 0 { 543 // didChange is set when the inputFile index is not 0; which is the index of the 544 // first file copied to the output, which is the generated assembly of bcm.c. 545 // In subsequently copied assembly files, local symbols are changed by appending (BCM_ + index) 546 // in order to ensure they don't collide. `index` gets incremented per file. 547 // If there is offset after the symbol, append the `offset`. 548 symbol = symbol + offset 549 } 550 551 args = append(args, symbol) 552 553 case ruleARMBaseIndexScale: 554 parts := ref.up 555 assertNodeType(parts, ruleARMRegister) 556 baseAddrReg := d.contents(parts) 557 parts = skipWS(parts.next) 558 559 // Only two forms need special handling. First there's memory references 560 // like "[x*, :got_lo12:foo]". The base register here will have been the 561 // target of an adrp instruction to load the page address, but the adrp 562 // will have turned into loading the full address *and dereferencing it*, 563 // above. Thus this instruction needs to be dropped otherwise we'll be 564 // dereferencing twice. 565 // 566 // Second there are forms like "[x*, :lo12:foo]" where the code has used 567 // adrp to load the page address into x*. That adrp will have been turned 568 // into loading the full address so just the offset needs to be dropped. 569 570 if parts != nil { 571 if parts.pegRule == ruleARMGOTLow12 { 572 if instructionName != "ldr" { 573 panic("Symbol reference outside of ldr instruction") 574 } 575 576 if skipWS(parts.next) != nil || parts.up.next != nil { 577 panic("can't handle tweak or post-increment with symbol references") 578 } 579 580 // The GOT helper already dereferenced the entry so, at most, just a mov 581 // is needed to put things in the right register. 582 d.writeCommentedNode(statement) 583 if baseAddrReg != args[0] { 584 d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n") 585 } 586 return statement, nil 587 } else if parts.pegRule == ruleLow12BitsSymbolRef { 588 if instructionName != "ldr" { 589 panic("Symbol reference outside of ldr instruction") 590 } 591 592 if skipWS(parts.next) != nil || parts.up.next != nil { 593 panic("can't handle tweak or post-increment with symbol references") 594 } 595 596 // Suppress the offset; adrp loaded the full address. 597 args = append(args, "["+baseAddrReg+"]") 598 changed = true 599 continue 600 } 601 } 602 603 args = append(args, d.contents(fullArg)) 604 605 case ruleLow12BitsSymbolRef: 606 // These are the second instruction in a pair: 607 // adrp x0, symbol // Load the page address into x0 608 // add x1, x0, :lo12:symbol // Adds the page offset. 609 // 610 // The adrp instruction will have been turned into a sequence that loads 611 // the full address, above, thus the offset is turned into zero. If that 612 // results in the instruction being a nop, then it is deleted. 613 if instructionName != "add" { 614 panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName)) 615 } 616 617 if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") { 618 panic("address arithmetic with incorrectly sized register") 619 } 620 621 if args[0] == args[1] { 622 d.writeCommentedNode(statement) 623 return statement, nil 624 } 625 626 args = append(args, "#0") 627 changed = true 628 629 default: 630 panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule])) 631 } 632 633 default: 634 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 635 } 636 } 637 638 if changed { 639 d.writeCommentedNode(statement) 640 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 641 d.output.WriteString(replacement) 642 } else { 643 d.writeNode(statement) 644 } 645 646 return statement, nil 647} 648 649func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) { 650 for symRef != nil && symRef.pegRule == ruleOffset { 651 offset := d.contents(symRef) 652 if offset[0] != '+' && offset[0] != '-' { 653 offset = "+" + offset 654 } 655 offsets = offsets + offset 656 symRef = symRef.next 657 } 658 return symRef, offsets 659} 660 661func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) { 662 if memRef.pegRule != ruleSymbolRef { 663 return "", "", "", false, false, memRef 664 } 665 666 symRef := memRef.up 667 nextRef = memRef.next 668 669 // (Offset* '+')? 670 symRef, offset = d.gatherOffsets(symRef, offset) 671 672 // (LocalSymbol / SymbolName) 673 symbol = d.contents(symRef) 674 if symRef.pegRule == ruleLocalSymbol { 675 symbolIsLocal = true 676 mapped := d.mapLocalSymbol(symbol) 677 if mapped != symbol { 678 symbol = mapped 679 didChange = true 680 } 681 } 682 symRef = symRef.next 683 684 // Offset* 685 symRef, offset = d.gatherOffsets(symRef, offset) 686 687 // ('@' Section / Offset*)? 688 if symRef != nil { 689 assertNodeType(symRef, ruleSection) 690 section = d.contents(symRef) 691 symRef = symRef.next 692 693 symRef, offset = d.gatherOffsets(symRef, offset) 694 } 695 696 if symRef != nil { 697 panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule])) 698 } 699 700 return 701} 702 703/* Intel */ 704 705type instructionType int 706 707const ( 708 instrPush instructionType = iota 709 instrMove 710 // instrTransformingMove is essentially a move, but it performs some 711 // transformation of the data during the process. 712 instrTransformingMove 713 instrJump 714 instrConditionalMove 715 // instrCombine merges the source and destination in some fashion, for example 716 // a 2-operand bitwise operation. 717 instrCombine 718 // instrMemoryVectorCombine is similer to instrCombine, but the source 719 // register must be a memory reference and the destination register 720 // must be a vector register. 721 instrMemoryVectorCombine 722 // instrThreeArg merges two sources into a destination in some fashion. 723 instrThreeArg 724 // instrCompare takes two arguments and writes outputs to the flags register. 725 instrCompare 726 instrOther 727) 728 729func classifyInstruction(instr string, args []*node32) instructionType { 730 switch instr { 731 case "push", "pushq": 732 if len(args) == 1 { 733 return instrPush 734 } 735 736 case "mov", "movq", "vmovq", "movsd", "vmovsd": 737 if len(args) == 2 { 738 return instrMove 739 } 740 741 case "cmovneq", "cmoveq": 742 if len(args) == 2 { 743 return instrConditionalMove 744 } 745 746 case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo": 747 if len(args) == 1 { 748 return instrJump 749 } 750 751 case "orq", "andq", "xorq": 752 if len(args) == 2 { 753 return instrCombine 754 } 755 756 case "cmpq": 757 if len(args) == 2 { 758 return instrCompare 759 } 760 761 case "sarxq", "shlxq", "shrxq": 762 if len(args) == 3 { 763 return instrThreeArg 764 } 765 766 case "vpbroadcastq": 767 if len(args) == 2 { 768 return instrTransformingMove 769 } 770 771 case "movlps", "movhps": 772 if len(args) == 2 { 773 return instrMemoryVectorCombine 774 } 775 } 776 777 return instrOther 778} 779 780func push(w stringWriter) wrapperFunc { 781 return func(k func()) { 782 w.WriteString("\tpushq %rax\n") 783 k() 784 w.WriteString("\txchg %rax, (%rsp)\n") 785 } 786} 787 788func compare(w stringWriter, instr, a, b string) wrapperFunc { 789 return func(k func()) { 790 k() 791 w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b)) 792 } 793} 794 795func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc { 796 d.gotExternalsNeeded[symbol+"@"+section] = struct{}{} 797 798 return func(k func()) { 799 if !redzoneCleared { 800 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 801 } 802 w.WriteString("\tpushf\n") 803 w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination)) 804 w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination)) 805 w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination)) 806 w.WriteString("\tpopf\n") 807 if !redzoneCleared { 808 w.WriteString("\tleaq\t128(%rsp), %rsp\n") 809 } 810 } 811} 812 813func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc { 814 return func(k func()) { 815 if !redzoneCleared { 816 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 817 defer w.WriteString("\tleaq 128(%rsp), %rsp\n") 818 } 819 w.WriteString("\tpushfq\n") 820 k() 821 w.WriteString("\tpopfq\n") 822 } 823} 824 825func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) { 826 candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"} 827 828 var reg string 829NextCandidate: 830 for _, candidate := range candidates { 831 for _, avoid := range avoidRegs { 832 if candidate == avoid { 833 continue NextCandidate 834 } 835 } 836 837 reg = candidate 838 break 839 } 840 841 if len(reg) == 0 { 842 panic("too many excluded registers") 843 } 844 845 return func(k func()) { 846 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 847 w.WriteString("\tpushq " + reg + "\n") 848 k() 849 w.WriteString("\tpopq " + reg + "\n") 850 w.WriteString("\tleaq 128(%rsp), %rsp\n") 851 }, reg 852} 853 854func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc { 855 return func(k func()) { 856 k() 857 prefix := "" 858 if isAVX { 859 prefix = "v" 860 } 861 w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n") 862 } 863} 864 865func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc { 866 return func(k func()) { 867 k() 868 w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n") 869 } 870} 871 872func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 873 return func(k func()) { 874 k() 875 w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n") 876 } 877} 878 879func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc { 880 return func(k func()) { 881 k() 882 w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n") 883 } 884} 885 886func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 887 return func(k func()) { 888 k() 889 // These instructions can only read from memory, so push 890 // tempReg and read from the stack. Note we assume the red zone 891 // was previously cleared by saveRegister(). 892 w.WriteString("\tpushq " + source + "\n") 893 w.WriteString("\t" + instructionName + " (%rsp), " + dest + "\n") 894 w.WriteString("\tleaq 8(%rsp), %rsp\n") 895 } 896} 897 898func isValidLEATarget(reg string) bool { 899 return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm") 900} 901 902func undoConditionalMove(w stringWriter, instr string) wrapperFunc { 903 var invertedCondition string 904 905 switch instr { 906 case "cmoveq": 907 invertedCondition = "ne" 908 case "cmovneq": 909 invertedCondition = "e" 910 default: 911 panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr)) 912 } 913 914 return func(k func()) { 915 w.WriteString("\tj" + invertedCondition + " 999f\n") 916 k() 917 w.WriteString("999:\n") 918 } 919} 920 921func (d *delocation) isRIPRelative(node *node32) bool { 922 return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)" 923} 924 925func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) { 926 assertNodeType(instruction, ruleInstructionName) 927 instructionName := d.contents(instruction) 928 929 argNodes := instructionArgs(instruction.next) 930 931 var wrappers wrapperStack 932 var args []string 933 changed := false 934 935Args: 936 for i, arg := range argNodes { 937 fullArg := arg 938 isIndirect := false 939 940 if arg.pegRule == ruleIndirectionIndicator { 941 arg = arg.next 942 isIndirect = true 943 } 944 945 switch arg.pegRule { 946 case ruleRegisterOrConstant, ruleLocalLabelRef: 947 args = append(args, d.contents(fullArg)) 948 949 case ruleMemoryRef: 950 symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up) 951 changed = didChange 952 953 if symbol == "OPENSSL_ia32cap_P" && section == "" { 954 if instructionName != "leaq" { 955 return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName) 956 } 957 958 if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 { 959 return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName) 960 } 961 962 target := argNodes[1] 963 assertNodeType(target, ruleRegisterOrConstant) 964 reg := d.contents(target) 965 966 if !strings.HasPrefix(reg, "%r") { 967 return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg) 968 } 969 970 changed = true 971 972 // Flag-altering instructions (i.e. addq) are going to be used so the 973 // flags need to be preserved. 974 wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */)) 975 976 wrappers = append(wrappers, func(k func()) { 977 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n") 978 d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n") 979 }) 980 981 break Args 982 } 983 984 switch section { 985 case "": 986 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 987 symbol = localTargetName(symbol) 988 changed = true 989 } 990 991 case "PLT": 992 if classifyInstruction(instructionName, argNodes) != instrJump { 993 return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName) 994 } 995 996 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 997 symbol = localTargetName(symbol) 998 changed = true 999 } else if !symbolIsLocal && !isSynthesized(symbol) { 1000 // Unknown symbol via PLT is an 1001 // out-call from the module, e.g. 1002 // memcpy. 1003 d.redirectors[symbol+"@"+section] = redirectorName(symbol) 1004 symbol = redirectorName(symbol) 1005 } 1006 1007 changed = true 1008 1009 case "GOTPCREL": 1010 if len(offset) > 0 { 1011 return nil, errors.New("loading from GOT with offset is unsupported") 1012 } 1013 if !d.isRIPRelative(memRef) { 1014 return nil, errors.New("GOT access must be IP-relative") 1015 } 1016 1017 useGOT := false 1018 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1019 symbol = localTargetName(symbol) 1020 changed = true 1021 } else if !isSynthesized(symbol) { 1022 useGOT = true 1023 } 1024 1025 classification := classifyInstruction(instructionName, argNodes) 1026 if classification != instrThreeArg && classification != instrCompare && i != 0 { 1027 return nil, errors.New("GOT access must be source operand") 1028 } 1029 1030 // Reduce the instruction to movq symbol@GOTPCREL, targetReg. 1031 var targetReg string 1032 var redzoneCleared bool 1033 switch classification { 1034 case instrPush: 1035 wrappers = append(wrappers, push(d.output)) 1036 targetReg = "%rax" 1037 case instrConditionalMove: 1038 wrappers = append(wrappers, undoConditionalMove(d.output, instructionName)) 1039 fallthrough 1040 case instrMove: 1041 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1042 targetReg = d.contents(argNodes[1]) 1043 case instrCompare: 1044 otherSource := d.contents(argNodes[i^1]) 1045 saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource}) 1046 redzoneCleared = true 1047 wrappers = append(wrappers, saveRegWrapper) 1048 if i == 0 { 1049 wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource)) 1050 } else { 1051 wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg)) 1052 } 1053 targetReg = tempReg 1054 case instrTransformingMove: 1055 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1056 targetReg = d.contents(argNodes[1]) 1057 wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg)) 1058 if isValidLEATarget(targetReg) { 1059 return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.") 1060 } 1061 case instrCombine: 1062 targetReg = d.contents(argNodes[1]) 1063 if !isValidLEATarget(targetReg) { 1064 return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers") 1065 } 1066 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg}) 1067 redzoneCleared = true 1068 wrappers = append(wrappers, saveRegWrapper) 1069 1070 wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg)) 1071 targetReg = tempReg 1072 case instrMemoryVectorCombine: 1073 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1074 targetReg = d.contents(argNodes[1]) 1075 if isValidLEATarget(targetReg) { 1076 return nil, errors.New("target register must be an XMM register") 1077 } 1078 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1079 wrappers = append(wrappers, saveRegWrapper) 1080 redzoneCleared = true 1081 wrappers = append(wrappers, memoryVectorCombineOp(d.output, instructionName, tempReg, targetReg)) 1082 targetReg = tempReg 1083 case instrThreeArg: 1084 if n := len(argNodes); n != 3 { 1085 return nil, fmt.Errorf("three-argument instruction has %d arguments", n) 1086 } 1087 if i != 0 && i != 1 { 1088 return nil, errors.New("GOT access must be from source operand") 1089 } 1090 targetReg = d.contents(argNodes[2]) 1091 1092 otherSource := d.contents(argNodes[1]) 1093 if i == 1 { 1094 otherSource = d.contents(argNodes[0]) 1095 } 1096 1097 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource}) 1098 redzoneCleared = true 1099 wrappers = append(wrappers, saveRegWrapper) 1100 1101 if i == 0 { 1102 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg)) 1103 } else { 1104 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg)) 1105 } 1106 targetReg = tempReg 1107 default: 1108 return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName) 1109 } 1110 1111 if !isValidLEATarget(targetReg) { 1112 // Sometimes the compiler will load from the GOT to an 1113 // XMM register, which is not a valid target of an LEA 1114 // instruction. 1115 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1116 wrappers = append(wrappers, saveRegWrapper) 1117 isAVX := strings.HasPrefix(instructionName, "v") 1118 wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg)) 1119 targetReg = tempReg 1120 if redzoneCleared { 1121 return nil, fmt.Errorf("internal error: Red Zone was already cleared") 1122 } 1123 redzoneCleared = true 1124 } 1125 1126 if symbol == "OPENSSL_ia32cap_P" { 1127 // Flag-altering instructions (i.e. addq) are going to be used so the 1128 // flags need to be preserved. 1129 wrappers = append(wrappers, saveFlags(d.output, redzoneCleared)) 1130 wrappers = append(wrappers, func(k func()) { 1131 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n") 1132 d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n") 1133 }) 1134 } else if useGOT { 1135 wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared)) 1136 } else { 1137 wrappers = append(wrappers, func(k func()) { 1138 d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg)) 1139 }) 1140 } 1141 changed = true 1142 break Args 1143 1144 default: 1145 return nil, fmt.Errorf("Unknown section type %q", section) 1146 } 1147 1148 if !changed && len(section) > 0 { 1149 panic("section was not handled") 1150 } 1151 section = "" 1152 1153 argStr := "" 1154 if isIndirect { 1155 argStr += "*" 1156 } 1157 argStr += symbol 1158 argStr += offset 1159 1160 for ; memRef != nil; memRef = memRef.next { 1161 argStr += d.contents(memRef) 1162 } 1163 1164 args = append(args, argStr) 1165 1166 case ruleGOTLocation: 1167 if instructionName != "movabsq" { 1168 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq") 1169 } 1170 if i != 0 || len(argNodes) != 2 { 1171 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form") 1172 } 1173 1174 d.gotDeltaNeeded = true 1175 changed = true 1176 instructionName = "movq" 1177 assertNodeType(arg.up, ruleLocalSymbol) 1178 baseSymbol := d.mapLocalSymbol(d.contents(arg.up)) 1179 targetReg := d.contents(argNodes[1]) 1180 args = append(args, ".Lboringssl_got_delta(%rip)") 1181 wrappers = append(wrappers, func(k func()) { 1182 k() 1183 d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg)) 1184 }) 1185 1186 case ruleGOTSymbolOffset: 1187 if instructionName != "movabsq" { 1188 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq") 1189 } 1190 if i != 0 || len(argNodes) != 2 { 1191 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form") 1192 } 1193 1194 assertNodeType(arg.up, ruleSymbolName) 1195 symbol := d.contents(arg.up) 1196 if strings.HasPrefix(symbol, ".L") { 1197 symbol = d.mapLocalSymbol(symbol) 1198 } 1199 targetReg := d.contents(argNodes[1]) 1200 1201 var prefix string 1202 isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF") 1203 if isGOTOFF { 1204 prefix = "gotoff" 1205 d.gotOffOffsetsNeeded[symbol] = struct{}{} 1206 } else { 1207 prefix = "got" 1208 d.gotOffsetsNeeded[symbol] = struct{}{} 1209 } 1210 changed = true 1211 1212 wrappers = append(wrappers, func(k func()) { 1213 // Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time 1214 // of writing) emits 64-bit relocations anyway, so the following four bytes 1215 // get stomped. Thus we use 64-bit offsets. 1216 d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg)) 1217 }) 1218 1219 default: 1220 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 1221 } 1222 } 1223 1224 if changed { 1225 d.writeCommentedNode(statement) 1226 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 1227 wrappers.do(func() { 1228 d.output.WriteString(replacement) 1229 }) 1230 } else { 1231 d.writeNode(statement) 1232 } 1233 1234 return statement, nil 1235} 1236 1237func (d *delocation) handleBSS(statement *node32) (*node32, error) { 1238 lastStatement := statement 1239 for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next { 1240 node := skipWS(statement.up) 1241 if node == nil { 1242 d.writeNode(statement) 1243 continue 1244 } 1245 1246 switch node.pegRule { 1247 case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective: 1248 d.writeNode(statement) 1249 1250 case ruleDirective: 1251 directive := node.up 1252 assertNodeType(directive, ruleDirectiveName) 1253 directiveName := d.contents(directive) 1254 if directiveName == "text" || directiveName == "section" || directiveName == "data" { 1255 return lastStatement, nil 1256 } 1257 d.writeNode(statement) 1258 1259 case ruleLabel: 1260 label := node.up 1261 d.writeNode(statement) 1262 1263 if label.pegRule != ruleLocalSymbol { 1264 symbol := d.contents(label) 1265 localSymbol := localTargetName(symbol) 1266 d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol)) 1267 1268 d.bssAccessorsNeeded[symbol] = localSymbol 1269 } 1270 1271 case ruleLabelContainingDirective: 1272 var err error 1273 statement, err = d.processLabelContainingDirective(statement, node.up) 1274 if err != nil { 1275 return nil, err 1276 } 1277 1278 default: 1279 return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement)) 1280 } 1281 } 1282 1283 return lastStatement, nil 1284} 1285 1286func writeAarch64Function(w stringWriter, funcName string, writeContents func(stringWriter)) { 1287 w.WriteString(".p2align 2\n") 1288 w.WriteString(".hidden " + funcName + "\n") 1289 w.WriteString(".type " + funcName + ", @function\n") 1290 w.WriteString(funcName + ":\n") 1291 w.WriteString(".cfi_startproc\n") 1292 // We insert a landing pad (`bti c` instruction) unconditionally at the beginning of 1293 // every generated function so that they can be called indirectly (with `blr` or 1294 // `br x16/x17`). The instruction is encoded in the HINT space as `hint #34` and is 1295 // a no-op on machines or program states not supporting BTI (Branch Target Identification). 1296 // None of the generated function bodies call other functions (with bl or blr), so we only 1297 // insert a landing pad instead of signing and validating $lr with `paciasp` and `autiasp`. 1298 // Normally we would also generate a .note.gnu.property section to annotate the assembly 1299 // file as BTI-compatible, but if the input assembly files are BTI-compatible, they should 1300 // already have those sections so there is no need to add an extra one ourselves. 1301 w.WriteString("\thint #34 // bti c\n") 1302 writeContents(w) 1303 w.WriteString(".cfi_endproc\n") 1304 w.WriteString(".size " + funcName + ", .-" + funcName + "\n") 1305} 1306 1307func transform(w stringWriter, inputs []inputFile) error { 1308 // symbols contains all defined symbols. 1309 symbols := make(map[string]struct{}) 1310 // fileNumbers is the set of IDs seen in .file directives. 1311 fileNumbers := make(map[int]struct{}) 1312 // maxObservedFileNumber contains the largest seen file number in a 1313 // .file directive. Zero is not a valid number. 1314 maxObservedFileNumber := 0 1315 // fileDirectivesContainMD5 is true if the compiler is outputting MD5 1316 // checksums in .file directives. If it does so, then this script needs 1317 // to match that behaviour otherwise warnings result. 1318 fileDirectivesContainMD5 := false 1319 1320 // OPENSSL_ia32cap_get will be synthesized by this script. 1321 symbols["OPENSSL_ia32cap_get"] = struct{}{} 1322 1323 for _, input := range inputs { 1324 forEachPath(input.ast.up, func(node *node32) { 1325 symbol := input.contents[node.begin:node.end] 1326 if _, ok := symbols[symbol]; ok { 1327 panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path)) 1328 } 1329 symbols[symbol] = struct{}{} 1330 }, ruleStatement, ruleLabel, ruleSymbolName) 1331 1332 forEachPath(input.ast.up, func(node *node32) { 1333 assertNodeType(node, ruleLocationDirective) 1334 directive := input.contents[node.begin:node.end] 1335 if !strings.HasPrefix(directive, ".file") { 1336 return 1337 } 1338 parts := strings.Fields(directive) 1339 if len(parts) == 2 { 1340 // This is a .file directive with just a 1341 // filename. Clang appears to generate just one 1342 // of these at the beginning of the output for 1343 // the compilation unit. Ignore it. 1344 return 1345 } 1346 fileNo, err := strconv.Atoi(parts[1]) 1347 if err != nil { 1348 panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive)) 1349 } 1350 1351 if _, ok := fileNumbers[fileNo]; ok { 1352 panic(fmt.Sprintf("Duplicate file number %d observed", fileNo)) 1353 } 1354 fileNumbers[fileNo] = struct{}{} 1355 1356 if fileNo > maxObservedFileNumber { 1357 maxObservedFileNumber = fileNo 1358 } 1359 1360 for _, token := range parts[2:] { 1361 if token == "md5" { 1362 fileDirectivesContainMD5 = true 1363 } 1364 } 1365 }, ruleStatement, ruleLocationDirective) 1366 } 1367 1368 processor := x86_64 1369 if len(inputs) > 0 { 1370 processor = detectProcessor(inputs[0]) 1371 } 1372 1373 commentIndicator := "#" 1374 if processor == aarch64 { 1375 commentIndicator = "//" 1376 } 1377 1378 d := &delocation{ 1379 symbols: symbols, 1380 processor: processor, 1381 commentIndicator: commentIndicator, 1382 output: w, 1383 redirectors: make(map[string]string), 1384 bssAccessorsNeeded: make(map[string]string), 1385 gotExternalsNeeded: make(map[string]struct{}), 1386 gotOffsetsNeeded: make(map[string]struct{}), 1387 gotOffOffsetsNeeded: make(map[string]struct{}), 1388 } 1389 1390 w.WriteString(".text\n") 1391 var fileTrailing string 1392 if fileDirectivesContainMD5 { 1393 fileTrailing = " md5 0x00000000000000000000000000000000" 1394 } 1395 w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing)) 1396 w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1)) 1397 w.WriteString("BORINGSSL_bcm_text_start:\n") 1398 1399 for _, input := range inputs { 1400 if err := d.processInput(input); err != nil { 1401 return err 1402 } 1403 } 1404 1405 w.WriteString(".text\n") 1406 w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1)) 1407 w.WriteString("BORINGSSL_bcm_text_end:\n") 1408 1409 // Emit redirector functions. Each is a single jump instruction. 1410 var redirectorNames []string 1411 for name := range d.redirectors { 1412 redirectorNames = append(redirectorNames, name) 1413 } 1414 sort.Strings(redirectorNames) 1415 1416 for _, name := range redirectorNames { 1417 redirector := d.redirectors[name] 1418 switch d.processor { 1419 case aarch64: 1420 writeAarch64Function(w, redirector, func(w stringWriter) { 1421 w.WriteString("\tb " + name + "\n") 1422 }) 1423 1424 case x86_64: 1425 w.WriteString(".type " + redirector + ", @function\n") 1426 w.WriteString(redirector + ":\n") 1427 w.WriteString("\tjmp\t" + name + "\n") 1428 } 1429 } 1430 1431 var accessorNames []string 1432 for accessor := range d.bssAccessorsNeeded { 1433 accessorNames = append(accessorNames, accessor) 1434 } 1435 sort.Strings(accessorNames) 1436 1437 // Emit BSS accessor functions. Each is a single LEA followed by RET. 1438 for _, name := range accessorNames { 1439 funcName := accessorName(name) 1440 target := d.bssAccessorsNeeded[name] 1441 1442 switch d.processor { 1443 case x86_64: 1444 w.WriteString(".type " + funcName + ", @function\n") 1445 w.WriteString(funcName + ":\n") 1446 w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n") 1447 1448 case aarch64: 1449 writeAarch64Function(w, funcName, func(w stringWriter) { 1450 w.WriteString("\tadrp x0, " + target + "\n") 1451 w.WriteString("\tadd x0, x0, :lo12:" + target + "\n") 1452 w.WriteString("\tret\n") 1453 }) 1454 } 1455 } 1456 1457 switch d.processor { 1458 case aarch64: 1459 externalNames := sortedSet(d.gotExternalsNeeded) 1460 for _, symbol := range externalNames { 1461 writeAarch64Function(w, gotHelperName(symbol), func(w stringWriter) { 1462 w.WriteString("\tadrp x0, :got:" + symbol + "\n") 1463 w.WriteString("\tldr x0, [x0, :got_lo12:" + symbol + "]\n") 1464 w.WriteString("\tret\n") 1465 }) 1466 } 1467 1468 writeAarch64Function(w, ".LOPENSSL_armcap_P_addr", func(w stringWriter) { 1469 w.WriteString("\tadrp x0, OPENSSL_armcap_P\n") 1470 w.WriteString("\tadd x0, x0, :lo12:OPENSSL_armcap_P\n") 1471 w.WriteString("\tret\n") 1472 }) 1473 1474 case x86_64: 1475 externalNames := sortedSet(d.gotExternalsNeeded) 1476 for _, name := range externalNames { 1477 parts := strings.SplitN(name, "@", 2) 1478 symbol, section := parts[0], parts[1] 1479 w.WriteString(".type " + symbol + "_" + section + "_external, @object\n") 1480 w.WriteString(".size " + symbol + "_" + section + "_external, 8\n") 1481 w.WriteString(symbol + "_" + section + "_external:\n") 1482 // Ideally this would be .quad foo@GOTPCREL, but clang's 1483 // assembler cannot emit a 64-bit GOTPCREL relocation. Instead, 1484 // we manually sign-extend the value, knowing that the GOT is 1485 // always at the end, thus foo@GOTPCREL has a positive value. 1486 w.WriteString("\t.long " + symbol + "@" + section + "\n") 1487 w.WriteString("\t.long 0\n") 1488 } 1489 1490 w.WriteString(".type OPENSSL_ia32cap_get, @function\n") 1491 w.WriteString(".globl OPENSSL_ia32cap_get\n") 1492 w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n") 1493 w.WriteString("OPENSSL_ia32cap_get:\n") 1494 w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n") 1495 w.WriteString("\tret\n") 1496 1497 w.WriteString(".extern OPENSSL_ia32cap_P\n") 1498 w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n") 1499 w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n") 1500 w.WriteString("OPENSSL_ia32cap_addr_delta:\n") 1501 w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n") 1502 1503 if d.gotDeltaNeeded { 1504 w.WriteString(".Lboringssl_got_delta:\n") 1505 w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n") 1506 } 1507 1508 for _, name := range sortedSet(d.gotOffsetsNeeded) { 1509 w.WriteString(".Lboringssl_got_" + name + ":\n") 1510 w.WriteString("\t.quad " + name + "@GOT\n") 1511 } 1512 for _, name := range sortedSet(d.gotOffOffsetsNeeded) { 1513 w.WriteString(".Lboringssl_gotoff_" + name + ":\n") 1514 w.WriteString("\t.quad " + name + "@GOTOFF\n") 1515 } 1516 } 1517 1518 w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n") 1519 w.WriteString(".size BORINGSSL_bcm_text_hash, 32\n") 1520 w.WriteString("BORINGSSL_bcm_text_hash:\n") 1521 for _, b := range fipscommon.UninitHashValue { 1522 w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n") 1523 } 1524 1525 return nil 1526} 1527 1528// preprocess runs source through the C preprocessor. 1529func preprocess(cppCommand []string, path string) ([]byte, error) { 1530 var args []string 1531 args = append(args, cppCommand...) 1532 args = append(args, path) 1533 1534 cpp := exec.Command(args[0], args[1:]...) 1535 cpp.Stderr = os.Stderr 1536 var result bytes.Buffer 1537 cpp.Stdout = &result 1538 1539 if err := cpp.Run(); err != nil { 1540 return nil, err 1541 } 1542 1543 return result.Bytes(), nil 1544} 1545 1546func parseInputs(inputs []inputFile, cppCommand []string) error { 1547 for i, input := range inputs { 1548 var contents string 1549 1550 if input.isArchive { 1551 arFile, err := os.Open(input.path) 1552 if err != nil { 1553 return err 1554 } 1555 defer arFile.Close() 1556 1557 ar, err := ar.ParseAR(arFile) 1558 if err != nil { 1559 return err 1560 } 1561 1562 if len(ar) != 1 { 1563 return fmt.Errorf("expected one file in archive, but found %d", len(ar)) 1564 } 1565 1566 for _, c := range ar { 1567 contents = string(c) 1568 } 1569 } else { 1570 var inBytes []byte 1571 var err error 1572 1573 if len(cppCommand) > 0 { 1574 inBytes, err = preprocess(cppCommand, input.path) 1575 } else { 1576 inBytes, err = os.ReadFile(input.path) 1577 } 1578 if err != nil { 1579 return err 1580 } 1581 1582 contents = string(inBytes) 1583 } 1584 1585 asm := Asm{Buffer: contents, Pretty: true} 1586 asm.Init() 1587 if err := asm.Parse(); err != nil { 1588 return fmt.Errorf("error while parsing %q: %s", input.path, err) 1589 } 1590 ast := asm.AST() 1591 1592 inputs[i].contents = contents 1593 inputs[i].ast = ast 1594 } 1595 1596 return nil 1597} 1598 1599// includePathFromHeaderFilePath returns an include directory path based on the 1600// path of a specific header file. It walks up the path and assumes that the 1601// include files are rooted in a directory called "openssl". 1602func includePathFromHeaderFilePath(path string) (string, error) { 1603 dir := path 1604 for { 1605 var file string 1606 dir, file = filepath.Split(dir) 1607 1608 if file == "openssl" { 1609 return dir, nil 1610 } 1611 1612 if len(dir) == 0 { 1613 break 1614 } 1615 dir = dir[:len(dir)-1] 1616 } 1617 1618 return "", fmt.Errorf("failed to find 'openssl' path element in header file path %q", path) 1619} 1620 1621func main() { 1622 // The .a file, if given, is expected to be an archive of textual 1623 // assembly sources. That's odd, but CMake really wants to create 1624 // archive files so it's the only way that we can make it work. 1625 arInput := flag.String("a", "", "Path to a .a file containing assembly sources") 1626 outFile := flag.String("o", "", "Path to output assembly") 1627 ccPath := flag.String("cc", "", "Path to the C compiler for preprocessing inputs") 1628 ccFlags := flag.String("cc-flags", "", "Flags for the C compiler when preprocessing") 1629 1630 flag.Parse() 1631 1632 if len(*outFile) == 0 { 1633 fmt.Fprintf(os.Stderr, "Must give argument to -o.\n") 1634 os.Exit(1) 1635 } 1636 1637 var inputs []inputFile 1638 if len(*arInput) > 0 { 1639 inputs = append(inputs, inputFile{ 1640 path: *arInput, 1641 index: 0, 1642 isArchive: true, 1643 }) 1644 } 1645 1646 includePaths := make(map[string]struct{}) 1647 1648 for i, path := range flag.Args() { 1649 if len(path) == 0 { 1650 continue 1651 } 1652 1653 // Header files are not processed but their path is remembered 1654 // and passed as -I arguments when invoking the preprocessor. 1655 if strings.HasSuffix(path, ".h") { 1656 dir, err := includePathFromHeaderFilePath(path) 1657 if err != nil { 1658 fmt.Fprintf(os.Stderr, "%s\n", err) 1659 os.Exit(1) 1660 } 1661 includePaths[dir] = struct{}{} 1662 continue 1663 } 1664 1665 inputs = append(inputs, inputFile{ 1666 path: path, 1667 index: i + 1, 1668 }) 1669 } 1670 1671 var cppCommand []string 1672 if len(*ccPath) > 0 { 1673 cppCommand = append(cppCommand, *ccPath) 1674 cppCommand = append(cppCommand, strings.Fields(*ccFlags)...) 1675 // Some of ccFlags might be superfluous when running the 1676 // preprocessor, but we don't want the compiler complaining that 1677 // "argument unused during compilation". 1678 cppCommand = append(cppCommand, "-Wno-unused-command-line-argument") 1679 1680 for includePath := range includePaths { 1681 cppCommand = append(cppCommand, "-I"+includePath) 1682 } 1683 1684 // -E requests only preprocessing. 1685 cppCommand = append(cppCommand, "-E") 1686 } 1687 1688 if err := parseInputs(inputs, cppCommand); err != nil { 1689 fmt.Fprintf(os.Stderr, "%s\n", err) 1690 os.Exit(1) 1691 } 1692 1693 out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) 1694 if err != nil { 1695 panic(err) 1696 } 1697 defer out.Close() 1698 1699 if err := transform(out, inputs); err != nil { 1700 fmt.Fprintf(os.Stderr, "%s\n", err) 1701 os.Exit(1) 1702 } 1703} 1704 1705func forEachPath(node *node32, cb func(*node32), rules ...pegRule) { 1706 if node == nil { 1707 return 1708 } 1709 1710 if len(rules) == 0 { 1711 cb(node) 1712 return 1713 } 1714 1715 rule := rules[0] 1716 childRules := rules[1:] 1717 1718 for ; node != nil; node = node.next { 1719 if node.pegRule != rule { 1720 continue 1721 } 1722 1723 if len(childRules) == 0 { 1724 cb(node) 1725 } else { 1726 forEachPath(node.up, cb, childRules...) 1727 } 1728 } 1729} 1730 1731func skipNodes(node *node32, ruleToSkip pegRule) *node32 { 1732 for ; node != nil && node.pegRule == ruleToSkip; node = node.next { 1733 } 1734 return node 1735} 1736 1737func skipWS(node *node32) *node32 { 1738 return skipNodes(node, ruleWS) 1739} 1740 1741func assertNodeType(node *node32, expected pegRule) { 1742 if rule := node.pegRule; rule != expected { 1743 panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected])) 1744 } 1745} 1746 1747type wrapperFunc func(func()) 1748 1749type wrapperStack []wrapperFunc 1750 1751func (w *wrapperStack) do(baseCase func()) { 1752 if len(*w) == 0 { 1753 baseCase() 1754 return 1755 } 1756 1757 wrapper := (*w)[0] 1758 *w = (*w)[1:] 1759 wrapper(func() { w.do(baseCase) }) 1760} 1761 1762// localTargetName returns the name of the local target label for a global 1763// symbol named name. 1764func localTargetName(name string) string { 1765 return ".L" + name + "_local_target" 1766} 1767 1768func isSynthesized(symbol string) bool { 1769 return strings.HasSuffix(symbol, "_bss_get") || 1770 symbol == "OPENSSL_ia32cap_get" || 1771 strings.HasPrefix(symbol, "BORINGSSL_bcm_text_") 1772} 1773 1774func redirectorName(symbol string) string { 1775 return "bcm_redirector_" + symbol 1776} 1777 1778// sectionType returns the type of a section. I.e. a section called “.text.foo” 1779// is a “.text” section. 1780func sectionType(section string) (string, bool) { 1781 if len(section) == 0 || section[0] != '.' { 1782 return "", false 1783 } 1784 1785 i := strings.Index(section[1:], ".") 1786 if i != -1 { 1787 section = section[:i+1] 1788 } 1789 1790 if strings.HasPrefix(section, ".debug_") { 1791 return ".debug", true 1792 } 1793 1794 return section, true 1795} 1796 1797// accessorName returns the name of the accessor function for a BSS symbol 1798// named name. 1799func accessorName(name string) string { 1800 return name + "_bss_get" 1801} 1802 1803func (d *delocation) mapLocalSymbol(symbol string) string { 1804 if d.currentInput.index == 0 { 1805 return symbol 1806 } 1807 return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index) 1808} 1809 1810func detectProcessor(input inputFile) processorType { 1811 for statement := input.ast.up; statement != nil; statement = statement.next { 1812 node := skipNodes(statement.up, ruleWS) 1813 if node == nil || node.pegRule != ruleInstruction { 1814 continue 1815 } 1816 1817 instruction := node.up 1818 instructionName := input.contents[instruction.begin:instruction.end] 1819 1820 switch instructionName { 1821 case "movq", "call", "leaq": 1822 return x86_64 1823 case "str", "bl", "ldr", "st1": 1824 return aarch64 1825 } 1826 } 1827 1828 panic("processed entire input and didn't recognise any instructions.") 1829} 1830 1831func sortedSet(m map[string]struct{}) []string { 1832 ret := make([]string, 0, len(m)) 1833 for key := range m { 1834 ret = append(ret, key) 1835 } 1836 sort.Strings(ret) 1837 return ret 1838} 1839