1// Copyright (c) 2017, Google Inc. 2// 3// Permission to use, copy, modify, and/or distribute this software for any 4// purpose with or without fee is hereby granted, provided that the above 5// copyright notice and this permission notice appear in all copies. 6// 7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ 14 15// delocate performs several transformations of textual assembly code. See 16// crypto/fipsmodule/FIPS.md for an overview. 17package main 18 19import ( 20 "errors" 21 "flag" 22 "fmt" 23 "io/ioutil" 24 "os" 25 "sort" 26 "strconv" 27 "strings" 28 29 "boringssl.googlesource.com/boringssl/util/ar" 30 "boringssl.googlesource.com/boringssl/util/fipstools/fipscommon" 31) 32 33// inputFile represents a textual assembly file. 34type inputFile struct { 35 path string 36 // index is a unique identifer given to this file. It's used for 37 // mapping local symbols. 38 index int 39 // isArchive indicates that the input should be processed as an ar 40 // file. 41 isArchive bool 42 // contents contains the contents of the file. 43 contents string 44 // ast points to the head of the syntax tree. 45 ast *node32 46} 47 48type stringWriter interface { 49 WriteString(string) (int, error) 50} 51 52type processorType int 53 54const ( 55 ppc64le processorType = iota + 1 56 x86_64 57 aarch64 58) 59 60// delocation holds the state needed during a delocation operation. 61type delocation struct { 62 processor processorType 63 output stringWriter 64 // commentIndicator starts a comment, e.g. "//" or "#" 65 commentIndicator string 66 67 // symbols is the set of symbols defined in the module. 68 symbols map[string]struct{} 69 // localEntrySymbols is the set of symbols with .localentry directives. 70 localEntrySymbols map[string]struct{} 71 // redirectors maps from out-call symbol name to the name of a 72 // redirector function for that symbol. E.g. “memcpy” -> 73 // “bcm_redirector_memcpy”. 74 redirectors map[string]string 75 // bssAccessorsNeeded maps from a BSS symbol name to the symbol that 76 // should be used to reference it. E.g. “P384_data_storage” -> 77 // “P384_data_storage”. 78 bssAccessorsNeeded map[string]string 79 // tocLoaders is a set of symbol names for which TOC helper functions 80 // are required. (ppc64le only.) 81 tocLoaders map[string]struct{} 82 // gotExternalsNeeded is a set of symbol names for which we need 83 // “delta” symbols: symbols that contain the offset from their location 84 // to the memory in question. 85 gotExternalsNeeded map[string]struct{} 86 // gotDeltaNeeded is true if the code needs to load the value of 87 // _GLOBAL_OFFSET_TABLE_. 88 gotDeltaNeeded bool 89 // gotOffsetsNeeded contains the symbols whose @GOT offsets are needed. 90 gotOffsetsNeeded map[string]struct{} 91 // gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed. 92 gotOffOffsetsNeeded map[string]struct{} 93 94 currentInput inputFile 95} 96 97func (d *delocation) contents(node *node32) string { 98 return d.currentInput.contents[node.begin:node.end] 99} 100 101// writeNode writes out an AST node. 102func (d *delocation) writeNode(node *node32) { 103 if _, err := d.output.WriteString(d.contents(node)); err != nil { 104 panic(err) 105 } 106} 107 108func (d *delocation) writeCommentedNode(node *node32) { 109 line := d.contents(node) 110 if _, err := d.output.WriteString(d.commentIndicator + " WAS " + strings.TrimSpace(line) + "\n"); err != nil { 111 panic(err) 112 } 113} 114 115func locateError(err error, with *node32, in inputFile) error { 116 posMap := translatePositions([]rune(in.contents), []int{int(with.begin)}) 117 var line int 118 for _, pos := range posMap { 119 line = pos.line 120 } 121 122 return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err) 123} 124 125func (d *delocation) processInput(input inputFile) (err error) { 126 d.currentInput = input 127 128 var origStatement *node32 129 defer func() { 130 if err := recover(); err != nil { 131 panic(locateError(fmt.Errorf("%s", err), origStatement, input)) 132 } 133 }() 134 135 for statement := input.ast.up; statement != nil; statement = statement.next { 136 assertNodeType(statement, ruleStatement) 137 origStatement = statement 138 139 node := skipWS(statement.up) 140 if node == nil { 141 d.writeNode(statement) 142 continue 143 } 144 145 switch node.pegRule { 146 case ruleGlobalDirective, ruleComment, ruleLocationDirective: 147 d.writeNode(statement) 148 case ruleDirective: 149 statement, err = d.processDirective(statement, node.up) 150 case ruleLabelContainingDirective: 151 statement, err = d.processLabelContainingDirective(statement, node.up) 152 case ruleLabel: 153 statement, err = d.processLabel(statement, node.up) 154 case ruleInstruction: 155 switch d.processor { 156 case x86_64: 157 statement, err = d.processIntelInstruction(statement, node.up) 158 case ppc64le: 159 statement, err = d.processPPCInstruction(statement, node.up) 160 case aarch64: 161 statement, err = d.processAarch64Instruction(statement, node.up) 162 default: 163 panic("unknown processor") 164 } 165 default: 166 panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule])) 167 } 168 169 if err != nil { 170 return locateError(err, origStatement, input) 171 } 172 } 173 174 return nil 175} 176 177func (d *delocation) processDirective(statement, directive *node32) (*node32, error) { 178 assertNodeType(directive, ruleDirectiveName) 179 directiveName := d.contents(directive) 180 181 var args []string 182 forEachPath(directive, func(arg *node32) { 183 // If the argument is a quoted string, use the raw contents. 184 // (Note that this doesn't unescape the string, but that's not 185 // needed so far. 186 if arg.up != nil { 187 arg = arg.up 188 assertNodeType(arg, ruleQuotedArg) 189 if arg.up == nil { 190 args = append(args, "") 191 return 192 } 193 arg = arg.up 194 assertNodeType(arg, ruleQuotedText) 195 } 196 args = append(args, d.contents(arg)) 197 }, ruleArgs, ruleArg) 198 199 switch directiveName { 200 case "comm", "lcomm": 201 if len(args) < 1 { 202 return nil, errors.New("comm directive has no arguments") 203 } 204 d.bssAccessorsNeeded[args[0]] = args[0] 205 d.writeNode(statement) 206 207 case "data": 208 // ASAN and some versions of MSAN are adding a .data section, 209 // and adding references to symbols within it to the code. We 210 // will have to work around this in the future. 211 return nil, errors.New(".data section found in module") 212 213 case "section": 214 section := args[0] 215 216 if section == ".data.rel.ro" { 217 // In a normal build, this is an indication of a 218 // problem but any references from the module to this 219 // section will result in a relocation and thus will 220 // break the integrity check. ASAN can generate these 221 // sections and so we will likely have to work around 222 // that in the future. 223 return nil, errors.New(".data.rel.ro section found in module") 224 } 225 226 sectionType, ok := sectionType(section) 227 if !ok { 228 // Unknown sections are permitted in order to be robust 229 // to different compiler modes. 230 d.writeNode(statement) 231 break 232 } 233 234 switch sectionType { 235 case ".rodata", ".text": 236 // Move .rodata to .text so it may be accessed without 237 // a relocation. GCC with -fmerge-constants will place 238 // strings into separate sections, so we move all 239 // sections named like .rodata. Also move .text.startup 240 // so the self-test function is also in the module. 241 d.writeCommentedNode(statement) 242 d.output.WriteString(".text\n") 243 244 case ".data": 245 // See above about .data 246 return nil, errors.New(".data section found in module") 247 248 case ".init_array", ".fini_array", ".ctors", ".dtors": 249 // init_array/ctors/dtors contains function 250 // pointers to constructor/destructor 251 // functions. These contain relocations, but 252 // they're in a different section anyway. 253 d.writeNode(statement) 254 break 255 256 case ".debug", ".note", ".toc": 257 d.writeNode(statement) 258 break 259 260 case ".bss": 261 d.writeNode(statement) 262 return d.handleBSS(statement) 263 } 264 265 default: 266 d.writeNode(statement) 267 } 268 269 return statement, nil 270} 271 272func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) { 273 // The symbols within directives need to be mapped so that local 274 // symbols in two different .s inputs don't collide. 275 changed := false 276 assertNodeType(directive, ruleLabelContainingDirectiveName) 277 name := d.contents(directive) 278 279 node := directive.next 280 assertNodeType(node, ruleWS) 281 282 node = node.next 283 assertNodeType(node, ruleSymbolArgs) 284 285 var args []string 286 for node = skipWS(node.up); node != nil; node = skipWS(node.next) { 287 assertNodeType(node, ruleSymbolArg) 288 arg := node.up 289 var mapped string 290 291 for term := arg; term != nil; term = term.next { 292 if term.pegRule != ruleLocalSymbol { 293 mapped += d.contents(term) 294 continue 295 } 296 297 oldSymbol := d.contents(term) 298 newSymbol := d.mapLocalSymbol(oldSymbol) 299 if newSymbol != oldSymbol { 300 changed = true 301 } 302 303 mapped += newSymbol 304 } 305 306 args = append(args, mapped) 307 } 308 309 if !changed { 310 d.writeNode(statement) 311 } else { 312 d.writeCommentedNode(statement) 313 d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n") 314 } 315 316 if name == ".localentry" { 317 d.output.WriteString(localEntryName(args[0]) + ":\n") 318 } 319 320 return statement, nil 321} 322 323func (d *delocation) processLabel(statement, label *node32) (*node32, error) { 324 symbol := d.contents(label) 325 326 switch label.pegRule { 327 case ruleLocalLabel: 328 d.output.WriteString(symbol + ":\n") 329 case ruleLocalSymbol: 330 // symbols need to be mapped so that local symbols from two 331 // different .s inputs don't collide. 332 d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n") 333 case ruleSymbolName: 334 d.output.WriteString(localTargetName(symbol) + ":\n") 335 d.writeNode(statement) 336 default: 337 return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule]) 338 } 339 340 return statement, nil 341} 342 343// instructionArgs collects all the arguments to an instruction. 344func instructionArgs(node *node32) (argNodes []*node32) { 345 for node = skipWS(node); node != nil; node = skipWS(node.next) { 346 assertNodeType(node, ruleInstructionArg) 347 argNodes = append(argNodes, node.up) 348 } 349 350 return argNodes 351} 352 353// Aarch64 support 354 355// gotHelperName returns the name of a synthesised function that returns an 356// address from the GOT. 357func gotHelperName(symbol string) string { 358 return ".Lboringssl_loadgot_" + symbol 359} 360 361// loadAarch64Address emits instructions to put the address of |symbol| 362// (optionally adjusted by |offsetStr|) into |targetReg|. 363func (d *delocation) loadAarch64Address(statement *node32, targetReg string, symbol string, offsetStr string) (*node32, error) { 364 // There are two paths here: either the symbol is known to be local in which 365 // case adr is used to get the address (within 1MiB), or a GOT reference is 366 // really needed in which case the code needs to jump to a helper function. 367 // 368 // A helper function is needed because using code appears to be the only way 369 // to load a GOT value. On other platforms we have ".quad foo@GOT" outside of 370 // the module, but on Aarch64 that results in a "COPY" relocation and linker 371 // comments suggest it's a weird hack. So, for each GOT symbol needed, we emit 372 // a function outside of the module that returns the address from the GOT in 373 // x0. 374 375 d.writeCommentedNode(statement) 376 377 _, isKnown := d.symbols[symbol] 378 isLocal := strings.HasPrefix(symbol, ".L") 379 if isKnown || isLocal || isSynthesized(symbol) { 380 if isLocal { 381 symbol = d.mapLocalSymbol(symbol) 382 } else if isKnown { 383 symbol = localTargetName(symbol) 384 } 385 386 d.output.WriteString("\tadr " + targetReg + ", " + symbol + offsetStr + "\n") 387 388 return statement, nil 389 } 390 391 if len(offsetStr) != 0 { 392 panic("non-zero offset for helper-based reference") 393 } 394 395 var helperFunc string 396 if symbol == "OPENSSL_armcap_P" { 397 helperFunc = ".LOPENSSL_armcap_P_addr" 398 } else { 399 // GOT helpers also dereference the GOT entry, thus the subsequent ldr 400 // instruction, which would normally do the dereferencing, needs to be 401 // dropped. GOT helpers have to include the dereference because the 402 // assembler doesn't support ":got_lo12:foo" offsets except in an ldr 403 // instruction. 404 d.gotExternalsNeeded[symbol] = struct{}{} 405 helperFunc = gotHelperName(symbol) 406 } 407 408 // Clear the red-zone. I can't find a definitive answer about whether Linux 409 // Aarch64 includes a red-zone, but Microsoft has a 16-byte one and Apple a 410 // 128-byte one. Thus conservatively clear a 128-byte red-zone. 411 d.output.WriteString("\tsub sp, sp, 128\n") 412 413 // Save x0 (which will be stomped by the return value) and the link register 414 // to the stack. Then save the program counter into the link register and 415 // jump to the helper function. 416 d.output.WriteString("\tstp x0, lr, [sp, #-16]!\n") 417 d.output.WriteString("\tbl " + helperFunc + "\n") 418 419 if targetReg == "x0" { 420 // If the target happens to be x0 then restore the link register from the 421 // stack and send the saved value of x0 to the zero register. 422 d.output.WriteString("\tldp xzr, lr, [sp], #16\n") 423 } else { 424 // Otherwise move the result into place and restore registers. 425 d.output.WriteString("\tmov " + targetReg + ", x0\n") 426 d.output.WriteString("\tldp x0, lr, [sp], #16\n") 427 } 428 429 // Revert the red-zone adjustment. 430 d.output.WriteString("\tadd sp, sp, 128\n") 431 432 return statement, nil 433} 434 435func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) { 436 assertNodeType(instruction, ruleInstructionName) 437 instructionName := d.contents(instruction) 438 439 argNodes := instructionArgs(instruction.next) 440 441 switch instructionName { 442 case "ccmn", "ccmp", "cinc", "cinv", "cneg", "csel", "cset", "csetm", "csinc", "csinv", "csneg": 443 // These functions are special because they take a condition-code name as 444 // an argument and that looks like a symbol reference. 445 d.writeNode(statement) 446 return statement, nil 447 448 case "mrs": 449 // Functions that take special register names also look like a symbol 450 // reference to the parser. 451 d.writeNode(statement) 452 return statement, nil 453 454 case "adrp": 455 // adrp always generates a relocation, even when the target symbol is in the 456 // same segment, because the page-offset of the code isn't known until link 457 // time. Thus adrp instructions are turned into either adr instructions 458 // (limiting the module to 1MiB offsets) or calls to helper functions, both of 459 // which load the full address. Later instructions, which add the low 12 bits 460 // of offset, are tweaked to remove the offset since it's already included. 461 // Loads of GOT symbols are slightly more complex because it's not possible to 462 // avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr 463 // instruction, which would normally do the dereferencing, is dropped 464 // completely. (Or turned into a mov if it targets a different register.) 465 assertNodeType(argNodes[0], ruleRegisterOrConstant) 466 targetReg := d.contents(argNodes[0]) 467 if !strings.HasPrefix(targetReg, "x") { 468 panic("adrp targetting register " + targetReg + ", which has the wrong size") 469 } 470 471 var symbol, offset string 472 switch argNodes[1].pegRule { 473 case ruleGOTSymbolOffset: 474 symbol = d.contents(argNodes[1].up) 475 case ruleMemoryRef: 476 assertNodeType(argNodes[1].up, ruleSymbolRef) 477 node, empty := d.gatherOffsets(argNodes[1].up.up, "") 478 if len(empty) != 0 { 479 panic("prefix offsets found for adrp") 480 } 481 symbol = d.contents(node) 482 _, offset = d.gatherOffsets(node.next, "") 483 default: 484 panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule]) 485 } 486 487 return d.loadAarch64Address(statement, targetReg, symbol, offset) 488 } 489 490 var args []string 491 changed := false 492 493 for _, arg := range argNodes { 494 fullArg := arg 495 496 switch arg.pegRule { 497 case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak: 498 args = append(args, d.contents(fullArg)) 499 500 case ruleGOTSymbolOffset: 501 // These should only be arguments to adrp and thus unreachable. 502 panic("unreachable") 503 504 case ruleMemoryRef: 505 ref := arg.up 506 507 switch ref.pegRule { 508 case ruleSymbolRef: 509 // This is a branch. Either the target needs to be written to a local 510 // version of the symbol to ensure that no relocations are emitted, or 511 // it needs to jump to a redirector function. 512 symbol, offset, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up) 513 changed = didChange 514 515 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 516 symbol = localTargetName(symbol) 517 changed = true 518 } else if !symbolIsLocal && !isSynthesized(symbol) { 519 redirector := redirectorName(symbol) 520 d.redirectors[symbol] = redirector 521 symbol = redirector 522 changed = true 523 } else if didChange && symbolIsLocal && len(offset) > 0 { 524 // didChange is set when the inputFile index is not 0; which is the index of the 525 // first file copied to the output, which is the generated assembly of bcm.c. 526 // In subsequently copied assembly files, local symbols are changed by appending (BCM_ + index) 527 // in order to ensure they don't collide. `index` gets incremented per file. 528 // If there is offset after the symbol, append the `offset`. 529 symbol = symbol + offset 530 } 531 532 args = append(args, symbol) 533 534 case ruleARMBaseIndexScale: 535 parts := ref.up 536 assertNodeType(parts, ruleARMRegister) 537 baseAddrReg := d.contents(parts) 538 parts = skipWS(parts.next) 539 540 // Only two forms need special handling. First there's memory references 541 // like "[x*, :got_lo12:foo]". The base register here will have been the 542 // target of an adrp instruction to load the page address, but the adrp 543 // will have turned into loading the full address *and dereferencing it*, 544 // above. Thus this instruction needs to be dropped otherwise we'll be 545 // dereferencing twice. 546 // 547 // Second there are forms like "[x*, :lo12:foo]" where the code has used 548 // adrp to load the page address into x*. That adrp will have been turned 549 // into loading the full address so just the offset needs to be dropped. 550 551 if parts != nil { 552 if parts.pegRule == ruleARMGOTLow12 { 553 if instructionName != "ldr" { 554 panic("Symbol reference outside of ldr instruction") 555 } 556 557 if skipWS(parts.next) != nil || parts.up.next != nil { 558 panic("can't handle tweak or post-increment with symbol references") 559 } 560 561 // The GOT helper already dereferenced the entry so, at most, just a mov 562 // is needed to put things in the right register. 563 d.writeCommentedNode(statement) 564 if baseAddrReg != args[0] { 565 d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n") 566 } 567 return statement, nil 568 } else if parts.pegRule == ruleLow12BitsSymbolRef { 569 if instructionName != "ldr" { 570 panic("Symbol reference outside of ldr instruction") 571 } 572 573 if skipWS(parts.next) != nil || parts.up.next != nil { 574 panic("can't handle tweak or post-increment with symbol references") 575 } 576 577 // Suppress the offset; adrp loaded the full address. 578 args = append(args, "["+baseAddrReg+"]") 579 changed = true 580 continue 581 } 582 } 583 584 args = append(args, d.contents(fullArg)) 585 586 case ruleLow12BitsSymbolRef: 587 // These are the second instruction in a pair: 588 // adrp x0, symbol // Load the page address into x0 589 // add x1, x0, :lo12:symbol // Adds the page offset. 590 // 591 // The adrp instruction will have been turned into a sequence that loads 592 // the full address, above, thus the offset is turned into zero. If that 593 // results in the instruction being a nop, then it is deleted. 594 if instructionName != "add" { 595 panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName)) 596 } 597 598 if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") { 599 panic("address arithmetic with incorrectly sized register") 600 } 601 602 if args[0] == args[1] { 603 d.writeCommentedNode(statement) 604 return statement, nil 605 } 606 607 args = append(args, "#0") 608 changed = true 609 610 default: 611 panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule])) 612 } 613 614 default: 615 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 616 } 617 } 618 619 if changed { 620 d.writeCommentedNode(statement) 621 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 622 d.output.WriteString(replacement) 623 } else { 624 d.writeNode(statement) 625 } 626 627 return statement, nil 628} 629 630/* ppc64le 631 632[PABI]: “64-Bit ELF V2 ABI Specification. Power Architecture.” March 21st, 633 2017 634 635(Also useful is “Power ISA Version 2.07 B”. Note that version three of that 636document is /not/ good as that's POWER9 specific.) 637 638ppc64le doesn't have IP-relative addressing and does a lot to work around this. 639Rather than reference a PLT and GOT direction, it has a single structure called 640the TOC (Table Of Contents). Within the TOC is the contents of .rodata, .data, 641.got, .plt, .bss, etc sections [PABI;3.3]. 642 643A pointer to the TOC is maintained in r2 and the following pattern is used to 644load the address of an element into a register: 645 646 addis <address register>, 2, foo@toc@ha 647 addi <address register>, <address register>, foo@toc@l 648 649The “addis” instruction shifts a signed constant left 16 bits and adds the 650result to its second argument, saving the result in the first argument. The 651“addi” instruction does the same, but without shifting. Thus the “@toc@ha" 652suffix on a symbol means “the top 16 bits of the TOC offset” and “@toc@l” means 653“the bottom 16 bits of the offset”. However, note that both values are signed, 654thus offsets in the top half of a 64KB chunk will have an @ha value that's one 655greater than expected and a negative @l value. 656 657The TOC is specific to a “module” (basically an executable or shared object). 658This means that there's not a single TOC in a process and that r2 needs to 659change as control moves between modules. Thus functions have two entry points: 660the “global” entry point and the “local” entry point. Jumps from within the 661same module can use the local entry while jumps from other modules must use the 662global entry. The global entry establishes the correct value of r2 before 663running the function and the local entry skips that code. 664 665The global entry point for a function is defined by its label. The local entry 666is a power-of-two number of bytes from the global entry, set by the 667“.localentry” directive. (ppc64le instructions are always 32 bits, so an offset 668of 1 or 2 bytes is treated as an offset of zero.) 669 670In order to help the global entry code set r2 to point to the local TOC, r12 is 671set to the address of the global entry point when called [PABI;2.2.1.1]. Thus 672the global entry will typically use an addis+addi pair to add a known offset to 673r12 and store it in r2. For example: 674 675foo: 676 addis 2, 12, .TOC. - foo@ha 677 addi 2, 2, .TOC. - foo@l 678 679(It's worth noting that the '@' operator binds very loosely, so the 3rd 680arguments parse as (.TOC. - foo)@ha and (.TOC. - foo)@l.) 681 682When calling a function, the compiler doesn't know whether that function is in 683the same module or not. Thus it doesn't know whether r12 needs to be set nor 684whether r2 will be clobbered on return. Rather than always assume the worst, 685the linker fixes stuff up once it knows that a call is going out of module: 686 687Firstly, calling, say, memcpy (which we assume to be in a different module) 688won't actually jump directly to memcpy, or even a PLT resolution function. 689It'll call a synthesised function that: 690 a) saves r2 in the caller's stack frame 691 b) loads the address of memcpy@PLT into r12 692 c) jumps to r12. 693 694As this synthesised function loads memcpy@PLT, a call to memcpy from the 695compiled code just references “memcpy” directly, not “memcpy@PLT”. 696 697Since it jumps directly to memcpy@PLT, it can't restore r2 on return. Thus 698calls must be followed by a nop. If the call ends up going out-of-module, the 699linker will rewrite that nop to load r2 from the stack. 700 701Speaking of the stack, the stack pointer is kept in r1 and there's a 288-byte 702red-zone. The format of the stack frame is defined [PABI;2.2.2] and must be 703followed as called functions will write into their parent's stack frame. For 704example, the synthesised out-of-module trampolines will save r2 24 bytes into 705the caller's frame and all non-leaf functions save the return address 16 bytes 706into the caller's frame. 707 708A final point worth noting: some RISC ISAs have r0 wired to zero: all reads 709result in zero and all writes are discarded. POWER does something a little like 710that, but r0 is only special in certain argument positions for certain 711instructions. You just have to read the manual to know which they are. 712 713 714Delocation is easier than Intel because there's just TOC references, but it's 715also harder because there's no IP-relative addressing. 716 717Jumps are IP-relative however, and have a 24-bit immediate value. So we can 718jump to functions that set a register to the needed value. (r3 is the 719return-value register and so that's what is generally used here.) */ 720 721// isPPC64LEAPair recognises an addis+addi pair that's adding the offset of 722// source to relative and writing the result to target. 723func (d *delocation) isPPC64LEAPair(statement *node32) (target, source, relative string, ok bool) { 724 instruction := skipWS(statement.up).up 725 assertNodeType(instruction, ruleInstructionName) 726 name1 := d.contents(instruction) 727 args1 := instructionArgs(instruction.next) 728 729 statement = statement.next 730 instruction = skipWS(statement.up).up 731 assertNodeType(instruction, ruleInstructionName) 732 name2 := d.contents(instruction) 733 args2 := instructionArgs(instruction.next) 734 735 if name1 != "addis" || 736 len(args1) != 3 || 737 name2 != "addi" || 738 len(args2) != 3 { 739 return "", "", "", false 740 } 741 742 target = d.contents(args1[0]) 743 relative = d.contents(args1[1]) 744 source1 := d.contents(args1[2]) 745 source2 := d.contents(args2[2]) 746 747 if !strings.HasSuffix(source1, "@ha") || 748 !strings.HasSuffix(source2, "@l") || 749 source1[:len(source1)-3] != source2[:len(source2)-2] || 750 d.contents(args2[0]) != target || 751 d.contents(args2[1]) != target { 752 return "", "", "", false 753 } 754 755 source = source1[:len(source1)-3] 756 ok = true 757 return 758} 759 760// establishTOC writes the global entry prelude for a function. The standard 761// prelude involves relocations so this version moves the relocation outside 762// the integrity-checked area. 763func establishTOC(w stringWriter) { 764 w.WriteString("999:\n") 765 w.WriteString("\taddis 2, 12, .LBORINGSSL_external_toc-999b@ha\n") 766 w.WriteString("\taddi 2, 2, .LBORINGSSL_external_toc-999b@l\n") 767 w.WriteString("\tld 12, 0(2)\n") 768 w.WriteString("\tadd 2, 2, 12\n") 769} 770 771// loadTOCFuncName returns the name of a synthesized function that sets r3 to 772// the value of “symbol+offset”. 773func loadTOCFuncName(symbol, offset string) string { 774 symbol = strings.Replace(symbol, ".", "_dot_", -1) 775 ret := ".Lbcm_loadtoc_" + symbol 776 if len(offset) != 0 { 777 offset = strings.Replace(offset, "+", "_plus_", -1) 778 offset = strings.Replace(offset, "-", "_minus_", -1) 779 ret += "_" + offset 780 } 781 return ret 782} 783 784func (d *delocation) loadFromTOC(w stringWriter, symbol, offset, dest string) wrapperFunc { 785 d.tocLoaders[symbol+"\x00"+offset] = struct{}{} 786 787 return func(k func()) { 788 w.WriteString("\taddi 1, 1, -288\n") // Clear the red zone. 789 w.WriteString("\tmflr " + dest + "\n") // Stash the link register. 790 w.WriteString("\tstd " + dest + ", -8(1)\n") 791 // The TOC loader will use r3, so stash it if necessary. 792 if dest != "3" { 793 w.WriteString("\tstd 3, -16(1)\n") 794 } 795 796 // Because loadTOCFuncName returns a “.L” name, we don't need a 797 // nop after this call. 798 w.WriteString("\tbl " + loadTOCFuncName(symbol, offset) + "\n") 799 800 // Cycle registers around. We need r3 -> destReg, -8(1) -> 801 // lr and, optionally, -16(1) -> r3. 802 w.WriteString("\tstd 3, -24(1)\n") 803 w.WriteString("\tld 3, -8(1)\n") 804 w.WriteString("\tmtlr 3\n") 805 w.WriteString("\tld " + dest + ", -24(1)\n") 806 if dest != "3" { 807 w.WriteString("\tld 3, -16(1)\n") 808 } 809 w.WriteString("\taddi 1, 1, 288\n") 810 811 k() 812 } 813} 814 815func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) { 816 for symRef != nil && symRef.pegRule == ruleOffset { 817 offset := d.contents(symRef) 818 if offset[0] != '+' && offset[0] != '-' { 819 offset = "+" + offset 820 } 821 offsets = offsets + offset 822 symRef = symRef.next 823 } 824 return symRef, offsets 825} 826 827func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) { 828 if memRef.pegRule != ruleSymbolRef { 829 return "", "", "", false, false, memRef 830 } 831 832 symRef := memRef.up 833 nextRef = memRef.next 834 835 // (Offset* '+')? 836 symRef, offset = d.gatherOffsets(symRef, offset) 837 838 // (LocalSymbol / SymbolName) 839 symbol = d.contents(symRef) 840 if symRef.pegRule == ruleLocalSymbol { 841 symbolIsLocal = true 842 mapped := d.mapLocalSymbol(symbol) 843 if mapped != symbol { 844 symbol = mapped 845 didChange = true 846 } 847 } 848 symRef = symRef.next 849 850 // Offset* 851 symRef, offset = d.gatherOffsets(symRef, offset) 852 853 // ('@' Section / Offset*)? 854 if symRef != nil { 855 assertNodeType(symRef, ruleSection) 856 section = d.contents(symRef) 857 symRef = symRef.next 858 859 symRef, offset = d.gatherOffsets(symRef, offset) 860 } 861 862 if symRef != nil { 863 panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule])) 864 } 865 866 return 867} 868 869func (d *delocation) processPPCInstruction(statement, instruction *node32) (*node32, error) { 870 assertNodeType(instruction, ruleInstructionName) 871 instructionName := d.contents(instruction) 872 isBranch := instructionName[0] == 'b' 873 874 argNodes := instructionArgs(instruction.next) 875 876 var wrappers wrapperStack 877 var args []string 878 changed := false 879 880Args: 881 for i, arg := range argNodes { 882 fullArg := arg 883 isIndirect := false 884 885 if arg.pegRule == ruleIndirectionIndicator { 886 arg = arg.next 887 isIndirect = true 888 } 889 890 switch arg.pegRule { 891 case ruleRegisterOrConstant, ruleLocalLabelRef: 892 args = append(args, d.contents(fullArg)) 893 894 case ruleTOCRefLow: 895 return nil, errors.New("Found low TOC reference outside preamble pattern") 896 897 case ruleTOCRefHigh: 898 target, _, relative, ok := d.isPPC64LEAPair(statement) 899 if !ok { 900 return nil, errors.New("Found high TOC reference outside preamble pattern") 901 } 902 903 if relative != "12" { 904 return nil, fmt.Errorf("preamble is relative to %q, not r12", relative) 905 } 906 907 if target != "2" { 908 return nil, fmt.Errorf("preamble is setting %q, not r2", target) 909 } 910 911 statement = statement.next 912 establishTOC(d.output) 913 instructionName = "" 914 changed = true 915 break Args 916 917 case ruleMemoryRef: 918 symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up) 919 changed = didChange 920 921 if len(symbol) > 0 { 922 if _, localEntrySymbol := d.localEntrySymbols[symbol]; localEntrySymbol && isBranch { 923 symbol = localEntryName(symbol) 924 changed = true 925 } else if _, knownSymbol := d.symbols[symbol]; knownSymbol { 926 symbol = localTargetName(symbol) 927 changed = true 928 } else if !symbolIsLocal && !isSynthesized(symbol) && len(section) == 0 { 929 changed = true 930 d.redirectors[symbol] = redirectorName(symbol) 931 symbol = redirectorName(symbol) 932 // TODO(davidben): This should sanity-check the next 933 // instruction is a nop and ideally remove it. 934 wrappers = append(wrappers, func(k func()) { 935 k() 936 // Like the linker's PLT stubs, redirector functions 937 // expect callers to restore r2. 938 d.output.WriteString("\tld 2, 24(1)\n") 939 }) 940 } 941 } 942 943 switch section { 944 case "": 945 946 case "tls": 947 // This section identifier just tells the 948 // assembler to use r13, the pointer to the 949 // thread-local data [PABI;3.7.3.3]. 950 951 case "toc@ha": 952 // Delete toc@ha instructions. Per 953 // [PABI;3.6.3], the linker is allowed to erase 954 // toc@ha instructions. We take advantage of 955 // this by unconditionally erasing the toc@ha 956 // instructions and doing the full lookup when 957 // processing toc@l. 958 // 959 // Note that any offset here applies before @ha 960 // and @l. That is, 42+foo@toc@ha is 961 // #ha(42+foo-.TOC.), not 42+#ha(foo-.TOC.). Any 962 // corresponding toc@l references are required 963 // by the ABI to have the same offset. The 964 // offset will be incorporated in full when 965 // those are processed. 966 if instructionName != "addis" || len(argNodes) != 3 || i != 2 || args[1] != "2" { 967 return nil, errors.New("can't process toc@ha reference") 968 } 969 changed = true 970 instructionName = "" 971 break Args 972 973 case "toc@l": 974 // Per [PAB;3.6.3], this instruction must take 975 // as input a register which was the output of 976 // a toc@ha computation and compute the actual 977 // address of some symbol. The toc@ha 978 // computation was elided, so we ignore that 979 // input register and compute the address 980 // directly. 981 changed = true 982 983 // For all supported toc@l instructions, the 984 // destination register is the first argument. 985 destReg := args[0] 986 987 wrappers = append(wrappers, d.loadFromTOC(d.output, symbol, offset, destReg)) 988 switch instructionName { 989 case "addi": 990 // The original instruction was: 991 // addi destReg, tocHaReg, offset+symbol@toc@l 992 instructionName = "" 993 994 case "ld", "lhz", "lwz": 995 // The original instruction was: 996 // l?? destReg, offset+symbol@toc@l(tocHaReg) 997 // 998 // We transform that into the 999 // equivalent dereference of destReg: 1000 // l?? destReg, 0(destReg) 1001 origInstructionName := instructionName 1002 instructionName = "" 1003 1004 assertNodeType(memRef, ruleBaseIndexScale) 1005 assertNodeType(memRef.up, ruleRegisterOrConstant) 1006 if memRef.next != nil || memRef.up.next != nil { 1007 return nil, errors.New("expected single register in BaseIndexScale for ld argument") 1008 } 1009 1010 baseReg := destReg 1011 if baseReg == "0" { 1012 // Register zero is special as the base register for a load. 1013 // Avoid it by spilling and using r3 instead. 1014 baseReg = "3" 1015 wrappers = append(wrappers, func(k func()) { 1016 d.output.WriteString("\taddi 1, 1, -288\n") // Clear the red zone. 1017 d.output.WriteString("\tstd " + baseReg + ", -8(1)\n") 1018 d.output.WriteString("\tmr " + baseReg + ", " + destReg + "\n") 1019 k() 1020 d.output.WriteString("\tld " + baseReg + ", -8(1)\n") 1021 d.output.WriteString("\taddi 1, 1, 288\n") // Clear the red zone. 1022 }) 1023 } 1024 1025 wrappers = append(wrappers, func(k func()) { 1026 d.output.WriteString("\t" + origInstructionName + " " + destReg + ", 0(" + baseReg + ")\n") 1027 }) 1028 default: 1029 return nil, fmt.Errorf("can't process TOC argument to %q", instructionName) 1030 } 1031 1032 default: 1033 return nil, fmt.Errorf("Unknown section type %q", section) 1034 } 1035 1036 argStr := "" 1037 if isIndirect { 1038 argStr += "*" 1039 } 1040 argStr += symbol 1041 if len(offset) > 0 { 1042 argStr += offset 1043 } 1044 if len(section) > 0 { 1045 argStr += "@" 1046 argStr += section 1047 } 1048 1049 for ; memRef != nil; memRef = memRef.next { 1050 argStr += d.contents(memRef) 1051 } 1052 1053 args = append(args, argStr) 1054 1055 default: 1056 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 1057 } 1058 } 1059 1060 if changed { 1061 d.writeCommentedNode(statement) 1062 1063 var replacement string 1064 if len(instructionName) > 0 { 1065 replacement = "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 1066 } 1067 1068 wrappers.do(func() { 1069 d.output.WriteString(replacement) 1070 }) 1071 } else { 1072 d.writeNode(statement) 1073 } 1074 1075 return statement, nil 1076} 1077 1078/* Intel */ 1079 1080type instructionType int 1081 1082const ( 1083 instrPush instructionType = iota 1084 instrMove 1085 // instrTransformingMove is essentially a move, but it performs some 1086 // transformation of the data during the process. 1087 instrTransformingMove 1088 instrJump 1089 instrConditionalMove 1090 // instrCombine merges the source and destination in some fashion, for example 1091 // a 2-operand bitwise operation. 1092 instrCombine 1093 // instrMemoryVectorCombine is similer to instrCombine, but the source 1094 // register must be a memory reference and the destination register 1095 // must be a vector register. 1096 instrMemoryVectorCombine 1097 // instrThreeArg merges two sources into a destination in some fashion. 1098 instrThreeArg 1099 // instrCompare takes two arguments and writes outputs to the flags register. 1100 instrCompare 1101 instrOther 1102) 1103 1104func classifyInstruction(instr string, args []*node32) instructionType { 1105 switch instr { 1106 case "push", "pushq": 1107 if len(args) == 1 { 1108 return instrPush 1109 } 1110 1111 case "mov", "movq", "vmovq", "movsd", "vmovsd": 1112 if len(args) == 2 { 1113 return instrMove 1114 } 1115 1116 case "cmovneq", "cmoveq": 1117 if len(args) == 2 { 1118 return instrConditionalMove 1119 } 1120 1121 case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo": 1122 if len(args) == 1 { 1123 return instrJump 1124 } 1125 1126 case "orq", "andq", "xorq": 1127 if len(args) == 2 { 1128 return instrCombine 1129 } 1130 1131 case "cmpq": 1132 if len(args) == 2 { 1133 return instrCompare 1134 } 1135 1136 case "sarxq", "shlxq", "shrxq": 1137 if len(args) == 3 { 1138 return instrThreeArg 1139 } 1140 1141 case "vpbroadcastq": 1142 if len(args) == 2 { 1143 return instrTransformingMove 1144 } 1145 1146 case "movlps", "movhps": 1147 if len(args) == 2 { 1148 return instrMemoryVectorCombine 1149 } 1150 } 1151 1152 return instrOther 1153} 1154 1155func push(w stringWriter) wrapperFunc { 1156 return func(k func()) { 1157 w.WriteString("\tpushq %rax\n") 1158 k() 1159 w.WriteString("\txchg %rax, (%rsp)\n") 1160 } 1161} 1162 1163func compare(w stringWriter, instr, a, b string) wrapperFunc { 1164 return func(k func()) { 1165 k() 1166 w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b)) 1167 } 1168} 1169 1170func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc { 1171 d.gotExternalsNeeded[symbol+"@"+section] = struct{}{} 1172 1173 return func(k func()) { 1174 if !redzoneCleared { 1175 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 1176 } 1177 w.WriteString("\tpushf\n") 1178 w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination)) 1179 w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination)) 1180 w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination)) 1181 w.WriteString("\tpopf\n") 1182 if !redzoneCleared { 1183 w.WriteString("\tleaq\t128(%rsp), %rsp\n") 1184 } 1185 } 1186} 1187 1188func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc { 1189 return func(k func()) { 1190 if !redzoneCleared { 1191 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 1192 defer w.WriteString("\tleaq 128(%rsp), %rsp\n") 1193 } 1194 w.WriteString("\tpushfq\n") 1195 k() 1196 w.WriteString("\tpopfq\n") 1197 } 1198} 1199 1200func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) { 1201 candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"} 1202 1203 var reg string 1204NextCandidate: 1205 for _, candidate := range candidates { 1206 for _, avoid := range avoidRegs { 1207 if candidate == avoid { 1208 continue NextCandidate 1209 } 1210 } 1211 1212 reg = candidate 1213 break 1214 } 1215 1216 if len(reg) == 0 { 1217 panic("too many excluded registers") 1218 } 1219 1220 return func(k func()) { 1221 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 1222 w.WriteString("\tpushq " + reg + "\n") 1223 k() 1224 w.WriteString("\tpopq " + reg + "\n") 1225 w.WriteString("\tleaq 128(%rsp), %rsp\n") 1226 }, reg 1227} 1228 1229func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc { 1230 return func(k func()) { 1231 k() 1232 prefix := "" 1233 if isAVX { 1234 prefix = "v" 1235 } 1236 w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n") 1237 } 1238} 1239 1240func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc { 1241 return func(k func()) { 1242 k() 1243 w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n") 1244 } 1245} 1246 1247func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 1248 return func(k func()) { 1249 k() 1250 w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n") 1251 } 1252} 1253 1254func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc { 1255 return func(k func()) { 1256 k() 1257 w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n") 1258 } 1259} 1260 1261func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 1262 return func(k func()) { 1263 k() 1264 // These instructions can only read from memory, so push 1265 // tempReg and read from the stack. Note we assume the red zone 1266 // was previously cleared by saveRegister(). 1267 w.WriteString("\tpushq " + source + "\n") 1268 w.WriteString("\t" + instructionName + " (%rsp), " + dest + "\n") 1269 w.WriteString("\tleaq 8(%rsp), %rsp\n") 1270 } 1271} 1272 1273func isValidLEATarget(reg string) bool { 1274 return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm") 1275} 1276 1277func undoConditionalMove(w stringWriter, instr string) wrapperFunc { 1278 var invertedCondition string 1279 1280 switch instr { 1281 case "cmoveq": 1282 invertedCondition = "ne" 1283 case "cmovneq": 1284 invertedCondition = "e" 1285 default: 1286 panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr)) 1287 } 1288 1289 return func(k func()) { 1290 w.WriteString("\tj" + invertedCondition + " 999f\n") 1291 k() 1292 w.WriteString("999:\n") 1293 } 1294} 1295 1296func (d *delocation) isRIPRelative(node *node32) bool { 1297 return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)" 1298} 1299 1300func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) { 1301 assertNodeType(instruction, ruleInstructionName) 1302 instructionName := d.contents(instruction) 1303 1304 argNodes := instructionArgs(instruction.next) 1305 1306 var wrappers wrapperStack 1307 var args []string 1308 changed := false 1309 1310Args: 1311 for i, arg := range argNodes { 1312 fullArg := arg 1313 isIndirect := false 1314 1315 if arg.pegRule == ruleIndirectionIndicator { 1316 arg = arg.next 1317 isIndirect = true 1318 } 1319 1320 switch arg.pegRule { 1321 case ruleRegisterOrConstant, ruleLocalLabelRef: 1322 args = append(args, d.contents(fullArg)) 1323 1324 case ruleMemoryRef: 1325 symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up) 1326 changed = didChange 1327 1328 if symbol == "OPENSSL_ia32cap_P" && section == "" { 1329 if instructionName != "leaq" { 1330 return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName) 1331 } 1332 1333 if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 { 1334 return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName) 1335 } 1336 1337 target := argNodes[1] 1338 assertNodeType(target, ruleRegisterOrConstant) 1339 reg := d.contents(target) 1340 1341 if !strings.HasPrefix(reg, "%r") { 1342 return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg) 1343 } 1344 1345 changed = true 1346 1347 // Flag-altering instructions (i.e. addq) are going to be used so the 1348 // flags need to be preserved. 1349 wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */)) 1350 1351 wrappers = append(wrappers, func(k func()) { 1352 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n") 1353 d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n") 1354 }) 1355 1356 break Args 1357 } 1358 1359 switch section { 1360 case "": 1361 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1362 symbol = localTargetName(symbol) 1363 changed = true 1364 } 1365 1366 case "PLT": 1367 if classifyInstruction(instructionName, argNodes) != instrJump { 1368 return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName) 1369 } 1370 1371 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1372 symbol = localTargetName(symbol) 1373 changed = true 1374 } else if !symbolIsLocal && !isSynthesized(symbol) { 1375 // Unknown symbol via PLT is an 1376 // out-call from the module, e.g. 1377 // memcpy. 1378 d.redirectors[symbol+"@"+section] = redirectorName(symbol) 1379 symbol = redirectorName(symbol) 1380 } 1381 1382 changed = true 1383 1384 case "GOTPCREL": 1385 if len(offset) > 0 { 1386 return nil, errors.New("loading from GOT with offset is unsupported") 1387 } 1388 if !d.isRIPRelative(memRef) { 1389 return nil, errors.New("GOT access must be IP-relative") 1390 } 1391 1392 useGOT := false 1393 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 1394 symbol = localTargetName(symbol) 1395 changed = true 1396 } else if !isSynthesized(symbol) { 1397 useGOT = true 1398 } 1399 1400 classification := classifyInstruction(instructionName, argNodes) 1401 if classification != instrThreeArg && classification != instrCompare && i != 0 { 1402 return nil, errors.New("GOT access must be source operand") 1403 } 1404 1405 // Reduce the instruction to movq symbol@GOTPCREL, targetReg. 1406 var targetReg string 1407 var redzoneCleared bool 1408 switch classification { 1409 case instrPush: 1410 wrappers = append(wrappers, push(d.output)) 1411 targetReg = "%rax" 1412 case instrConditionalMove: 1413 wrappers = append(wrappers, undoConditionalMove(d.output, instructionName)) 1414 fallthrough 1415 case instrMove: 1416 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1417 targetReg = d.contents(argNodes[1]) 1418 case instrCompare: 1419 otherSource := d.contents(argNodes[i^1]) 1420 saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource}) 1421 redzoneCleared = true 1422 wrappers = append(wrappers, saveRegWrapper) 1423 if i == 0 { 1424 wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource)) 1425 } else { 1426 wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg)) 1427 } 1428 targetReg = tempReg 1429 case instrTransformingMove: 1430 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1431 targetReg = d.contents(argNodes[1]) 1432 wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg)) 1433 if isValidLEATarget(targetReg) { 1434 return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.") 1435 } 1436 case instrCombine: 1437 targetReg = d.contents(argNodes[1]) 1438 if !isValidLEATarget(targetReg) { 1439 return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers") 1440 } 1441 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg}) 1442 redzoneCleared = true 1443 wrappers = append(wrappers, saveRegWrapper) 1444 1445 wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg)) 1446 targetReg = tempReg 1447 case instrMemoryVectorCombine: 1448 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1449 targetReg = d.contents(argNodes[1]) 1450 if isValidLEATarget(targetReg) { 1451 return nil, errors.New("target register must be an XMM register") 1452 } 1453 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1454 wrappers = append(wrappers, saveRegWrapper) 1455 redzoneCleared = true 1456 wrappers = append(wrappers, memoryVectorCombineOp(d.output, instructionName, tempReg, targetReg)) 1457 targetReg = tempReg 1458 case instrThreeArg: 1459 if n := len(argNodes); n != 3 { 1460 return nil, fmt.Errorf("three-argument instruction has %d arguments", n) 1461 } 1462 if i != 0 && i != 1 { 1463 return nil, errors.New("GOT access must be from source operand") 1464 } 1465 targetReg = d.contents(argNodes[2]) 1466 1467 otherSource := d.contents(argNodes[1]) 1468 if i == 1 { 1469 otherSource = d.contents(argNodes[0]) 1470 } 1471 1472 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource}) 1473 redzoneCleared = true 1474 wrappers = append(wrappers, saveRegWrapper) 1475 1476 if i == 0 { 1477 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg)) 1478 } else { 1479 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg)) 1480 } 1481 targetReg = tempReg 1482 default: 1483 return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName) 1484 } 1485 1486 if !isValidLEATarget(targetReg) { 1487 // Sometimes the compiler will load from the GOT to an 1488 // XMM register, which is not a valid target of an LEA 1489 // instruction. 1490 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1491 wrappers = append(wrappers, saveRegWrapper) 1492 isAVX := strings.HasPrefix(instructionName, "v") 1493 wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg)) 1494 targetReg = tempReg 1495 if redzoneCleared { 1496 return nil, fmt.Errorf("internal error: Red Zone was already cleared") 1497 } 1498 redzoneCleared = true 1499 } 1500 1501 if symbol == "OPENSSL_ia32cap_P" { 1502 // Flag-altering instructions (i.e. addq) are going to be used so the 1503 // flags need to be preserved. 1504 wrappers = append(wrappers, saveFlags(d.output, redzoneCleared)) 1505 wrappers = append(wrappers, func(k func()) { 1506 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n") 1507 d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n") 1508 }) 1509 } else if useGOT { 1510 wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared)) 1511 } else { 1512 wrappers = append(wrappers, func(k func()) { 1513 d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg)) 1514 }) 1515 } 1516 changed = true 1517 break Args 1518 1519 default: 1520 return nil, fmt.Errorf("Unknown section type %q", section) 1521 } 1522 1523 if !changed && len(section) > 0 { 1524 panic("section was not handled") 1525 } 1526 section = "" 1527 1528 argStr := "" 1529 if isIndirect { 1530 argStr += "*" 1531 } 1532 argStr += symbol 1533 argStr += offset 1534 1535 for ; memRef != nil; memRef = memRef.next { 1536 argStr += d.contents(memRef) 1537 } 1538 1539 args = append(args, argStr) 1540 1541 case ruleGOTLocation: 1542 if instructionName != "movabsq" { 1543 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq") 1544 } 1545 if i != 0 || len(argNodes) != 2 { 1546 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form") 1547 } 1548 1549 d.gotDeltaNeeded = true 1550 changed = true 1551 instructionName = "movq" 1552 assertNodeType(arg.up, ruleLocalSymbol) 1553 baseSymbol := d.mapLocalSymbol(d.contents(arg.up)) 1554 targetReg := d.contents(argNodes[1]) 1555 args = append(args, ".Lboringssl_got_delta(%rip)") 1556 wrappers = append(wrappers, func(k func()) { 1557 k() 1558 d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg)) 1559 }) 1560 1561 case ruleGOTSymbolOffset: 1562 if instructionName != "movabsq" { 1563 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq") 1564 } 1565 if i != 0 || len(argNodes) != 2 { 1566 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form") 1567 } 1568 1569 assertNodeType(arg.up, ruleSymbolName) 1570 symbol := d.contents(arg.up) 1571 if strings.HasPrefix(symbol, ".L") { 1572 symbol = d.mapLocalSymbol(symbol) 1573 } 1574 targetReg := d.contents(argNodes[1]) 1575 1576 var prefix string 1577 isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF") 1578 if isGOTOFF { 1579 prefix = "gotoff" 1580 d.gotOffOffsetsNeeded[symbol] = struct{}{} 1581 } else { 1582 prefix = "got" 1583 d.gotOffsetsNeeded[symbol] = struct{}{} 1584 } 1585 changed = true 1586 1587 wrappers = append(wrappers, func(k func()) { 1588 // Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time 1589 // of writing) emits 64-bit relocations anyway, so the following four bytes 1590 // get stomped. Thus we use 64-bit offsets. 1591 d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg)) 1592 }) 1593 1594 default: 1595 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 1596 } 1597 } 1598 1599 if changed { 1600 d.writeCommentedNode(statement) 1601 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 1602 wrappers.do(func() { 1603 d.output.WriteString(replacement) 1604 }) 1605 } else { 1606 d.writeNode(statement) 1607 } 1608 1609 return statement, nil 1610} 1611 1612func (d *delocation) handleBSS(statement *node32) (*node32, error) { 1613 lastStatement := statement 1614 for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next { 1615 node := skipWS(statement.up) 1616 if node == nil { 1617 d.writeNode(statement) 1618 continue 1619 } 1620 1621 switch node.pegRule { 1622 case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective: 1623 d.writeNode(statement) 1624 1625 case ruleDirective: 1626 directive := node.up 1627 assertNodeType(directive, ruleDirectiveName) 1628 directiveName := d.contents(directive) 1629 if directiveName == "text" || directiveName == "section" || directiveName == "data" { 1630 return lastStatement, nil 1631 } 1632 d.writeNode(statement) 1633 1634 case ruleLabel: 1635 label := node.up 1636 d.writeNode(statement) 1637 1638 if label.pegRule != ruleLocalSymbol { 1639 symbol := d.contents(label) 1640 localSymbol := localTargetName(symbol) 1641 d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol)) 1642 1643 d.bssAccessorsNeeded[symbol] = localSymbol 1644 } 1645 1646 case ruleLabelContainingDirective: 1647 var err error 1648 statement, err = d.processLabelContainingDirective(statement, node.up) 1649 if err != nil { 1650 return nil, err 1651 } 1652 1653 default: 1654 return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement)) 1655 } 1656 } 1657 1658 return lastStatement, nil 1659} 1660 1661func writeAarch64Function(w stringWriter, funcName string, writeContents func(stringWriter)) { 1662 w.WriteString(".p2align 2\n") 1663 w.WriteString(".hidden " + funcName + "\n") 1664 w.WriteString(".type " + funcName + ", @function\n") 1665 w.WriteString(funcName + ":\n") 1666 w.WriteString(".cfi_startproc\n") 1667 writeContents(w) 1668 w.WriteString(".cfi_endproc\n") 1669 w.WriteString(".size " + funcName + ", .-" + funcName + "\n") 1670} 1671 1672func transform(w stringWriter, inputs []inputFile) error { 1673 // symbols contains all defined symbols. 1674 symbols := make(map[string]struct{}) 1675 // localEntrySymbols contains all symbols with a .localentry directive. 1676 localEntrySymbols := make(map[string]struct{}) 1677 // fileNumbers is the set of IDs seen in .file directives. 1678 fileNumbers := make(map[int]struct{}) 1679 // maxObservedFileNumber contains the largest seen file number in a 1680 // .file directive. Zero is not a valid number. 1681 maxObservedFileNumber := 0 1682 // fileDirectivesContainMD5 is true if the compiler is outputting MD5 1683 // checksums in .file directives. If it does so, then this script needs 1684 // to match that behaviour otherwise warnings result. 1685 fileDirectivesContainMD5 := false 1686 1687 // OPENSSL_ia32cap_get will be synthesized by this script. 1688 symbols["OPENSSL_ia32cap_get"] = struct{}{} 1689 1690 for _, input := range inputs { 1691 forEachPath(input.ast.up, func(node *node32) { 1692 symbol := input.contents[node.begin:node.end] 1693 if _, ok := symbols[symbol]; ok { 1694 panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path)) 1695 } 1696 symbols[symbol] = struct{}{} 1697 }, ruleStatement, ruleLabel, ruleSymbolName) 1698 1699 forEachPath(input.ast.up, func(node *node32) { 1700 node = node.up 1701 assertNodeType(node, ruleLabelContainingDirectiveName) 1702 directive := input.contents[node.begin:node.end] 1703 if directive != ".localentry" { 1704 return 1705 } 1706 // Extract the first argument. 1707 node = skipWS(node.next) 1708 assertNodeType(node, ruleSymbolArgs) 1709 node = node.up 1710 assertNodeType(node, ruleSymbolArg) 1711 symbol := input.contents[node.begin:node.end] 1712 if _, ok := localEntrySymbols[symbol]; ok { 1713 panic(fmt.Sprintf("Duplicate .localentry directive found: %q in %q", symbol, input.path)) 1714 } 1715 localEntrySymbols[symbol] = struct{}{} 1716 }, ruleStatement, ruleLabelContainingDirective) 1717 1718 forEachPath(input.ast.up, func(node *node32) { 1719 assertNodeType(node, ruleLocationDirective) 1720 directive := input.contents[node.begin:node.end] 1721 if !strings.HasPrefix(directive, ".file") { 1722 return 1723 } 1724 parts := strings.Fields(directive) 1725 if len(parts) == 2 { 1726 // This is a .file directive with just a 1727 // filename. Clang appears to generate just one 1728 // of these at the beginning of the output for 1729 // the compilation unit. Ignore it. 1730 return 1731 } 1732 fileNo, err := strconv.Atoi(parts[1]) 1733 if err != nil { 1734 panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive)) 1735 } 1736 1737 if _, ok := fileNumbers[fileNo]; ok { 1738 panic(fmt.Sprintf("Duplicate file number %d observed", fileNo)) 1739 } 1740 fileNumbers[fileNo] = struct{}{} 1741 1742 if fileNo > maxObservedFileNumber { 1743 maxObservedFileNumber = fileNo 1744 } 1745 1746 for _, token := range parts[2:] { 1747 if token == "md5" { 1748 fileDirectivesContainMD5 = true 1749 } 1750 } 1751 }, ruleStatement, ruleLocationDirective) 1752 } 1753 1754 processor := x86_64 1755 if len(inputs) > 0 { 1756 processor = detectProcessor(inputs[0]) 1757 } 1758 1759 commentIndicator := "#" 1760 if processor == aarch64 { 1761 commentIndicator = "//" 1762 } 1763 1764 d := &delocation{ 1765 symbols: symbols, 1766 localEntrySymbols: localEntrySymbols, 1767 processor: processor, 1768 commentIndicator: commentIndicator, 1769 output: w, 1770 redirectors: make(map[string]string), 1771 bssAccessorsNeeded: make(map[string]string), 1772 tocLoaders: make(map[string]struct{}), 1773 gotExternalsNeeded: make(map[string]struct{}), 1774 gotOffsetsNeeded: make(map[string]struct{}), 1775 gotOffOffsetsNeeded: make(map[string]struct{}), 1776 } 1777 1778 w.WriteString(".text\n") 1779 var fileTrailing string 1780 if fileDirectivesContainMD5 { 1781 fileTrailing = " md5 0x00000000000000000000000000000000" 1782 } 1783 w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing)) 1784 w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1)) 1785 w.WriteString("BORINGSSL_bcm_text_start:\n") 1786 1787 for _, input := range inputs { 1788 if err := d.processInput(input); err != nil { 1789 return err 1790 } 1791 } 1792 1793 w.WriteString(".text\n") 1794 w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1)) 1795 w.WriteString("BORINGSSL_bcm_text_end:\n") 1796 1797 // Emit redirector functions. Each is a single jump instruction. 1798 var redirectorNames []string 1799 for name := range d.redirectors { 1800 redirectorNames = append(redirectorNames, name) 1801 } 1802 sort.Strings(redirectorNames) 1803 1804 for _, name := range redirectorNames { 1805 redirector := d.redirectors[name] 1806 switch d.processor { 1807 case ppc64le: 1808 w.WriteString(".section \".toc\", \"aw\"\n") 1809 w.WriteString(".Lredirector_toc_" + name + ":\n") 1810 w.WriteString(".quad " + name + "\n") 1811 w.WriteString(".text\n") 1812 w.WriteString(".type " + redirector + ", @function\n") 1813 w.WriteString(redirector + ":\n") 1814 // |name| will clobber r2, so save it. This is matched by a restore in 1815 // redirector calls. 1816 w.WriteString("\tstd 2, 24(1)\n") 1817 // Load and call |name|'s global entry point. 1818 w.WriteString("\taddis 12, 2, .Lredirector_toc_" + name + "@toc@ha\n") 1819 w.WriteString("\tld 12, .Lredirector_toc_" + name + "@toc@l(12)\n") 1820 w.WriteString("\tmtctr 12\n") 1821 w.WriteString("\tbctr\n") 1822 1823 case aarch64: 1824 writeAarch64Function(w, redirector, func(w stringWriter) { 1825 w.WriteString("\tb " + name + "\n") 1826 }) 1827 1828 case x86_64: 1829 w.WriteString(".type " + redirector + ", @function\n") 1830 w.WriteString(redirector + ":\n") 1831 w.WriteString("\tjmp\t" + name + "\n") 1832 } 1833 } 1834 1835 var accessorNames []string 1836 for accessor := range d.bssAccessorsNeeded { 1837 accessorNames = append(accessorNames, accessor) 1838 } 1839 sort.Strings(accessorNames) 1840 1841 // Emit BSS accessor functions. Each is a single LEA followed by RET. 1842 for _, name := range accessorNames { 1843 funcName := accessorName(name) 1844 target := d.bssAccessorsNeeded[name] 1845 1846 switch d.processor { 1847 case ppc64le: 1848 w.WriteString(".type " + funcName + ", @function\n") 1849 w.WriteString(funcName + ":\n") 1850 w.WriteString("\taddis 3, 2, " + target + "@toc@ha\n") 1851 w.WriteString("\taddi 3, 3, " + target + "@toc@l\n") 1852 w.WriteString("\tblr\n") 1853 1854 case x86_64: 1855 w.WriteString(".type " + funcName + ", @function\n") 1856 w.WriteString(funcName + ":\n") 1857 w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n") 1858 1859 case aarch64: 1860 writeAarch64Function(w, funcName, func(w stringWriter) { 1861 w.WriteString("\tadrp x0, " + target + "\n") 1862 w.WriteString("\tadd x0, x0, :lo12:" + target + "\n") 1863 w.WriteString("\tret\n") 1864 }) 1865 } 1866 } 1867 1868 switch d.processor { 1869 case ppc64le: 1870 loadTOCNames := sortedSet(d.tocLoaders) 1871 for _, symbolAndOffset := range loadTOCNames { 1872 parts := strings.SplitN(symbolAndOffset, "\x00", 2) 1873 symbol, offset := parts[0], parts[1] 1874 1875 funcName := loadTOCFuncName(symbol, offset) 1876 ref := symbol + offset 1877 1878 w.WriteString(".type " + funcName[2:] + ", @function\n") 1879 w.WriteString(funcName[2:] + ":\n") 1880 w.WriteString(funcName + ":\n") 1881 w.WriteString("\taddis 3, 2, " + ref + "@toc@ha\n") 1882 w.WriteString("\taddi 3, 3, " + ref + "@toc@l\n") 1883 w.WriteString("\tblr\n") 1884 } 1885 1886 w.WriteString(".LBORINGSSL_external_toc:\n") 1887 w.WriteString(".quad .TOC.-.LBORINGSSL_external_toc\n") 1888 1889 case aarch64: 1890 externalNames := sortedSet(d.gotExternalsNeeded) 1891 for _, symbol := range externalNames { 1892 writeAarch64Function(w, gotHelperName(symbol), func(w stringWriter) { 1893 w.WriteString("\tadrp x0, :got:" + symbol + "\n") 1894 w.WriteString("\tldr x0, [x0, :got_lo12:" + symbol + "]\n") 1895 w.WriteString("\tret\n") 1896 }) 1897 } 1898 1899 writeAarch64Function(w, ".LOPENSSL_armcap_P_addr", func(w stringWriter) { 1900 w.WriteString("\tadrp x0, OPENSSL_armcap_P\n") 1901 w.WriteString("\tadd x0, x0, :lo12:OPENSSL_armcap_P\n") 1902 w.WriteString("\tret\n") 1903 }) 1904 1905 case x86_64: 1906 externalNames := sortedSet(d.gotExternalsNeeded) 1907 for _, name := range externalNames { 1908 parts := strings.SplitN(name, "@", 2) 1909 symbol, section := parts[0], parts[1] 1910 w.WriteString(".type " + symbol + "_" + section + "_external, @object\n") 1911 w.WriteString(".size " + symbol + "_" + section + "_external, 8\n") 1912 w.WriteString(symbol + "_" + section + "_external:\n") 1913 // Ideally this would be .quad foo@GOTPCREL, but clang's 1914 // assembler cannot emit a 64-bit GOTPCREL relocation. Instead, 1915 // we manually sign-extend the value, knowing that the GOT is 1916 // always at the end, thus foo@GOTPCREL has a positive value. 1917 w.WriteString("\t.long " + symbol + "@" + section + "\n") 1918 w.WriteString("\t.long 0\n") 1919 } 1920 1921 w.WriteString(".type OPENSSL_ia32cap_get, @function\n") 1922 w.WriteString(".globl OPENSSL_ia32cap_get\n") 1923 w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n") 1924 w.WriteString("OPENSSL_ia32cap_get:\n") 1925 w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n") 1926 w.WriteString("\tret\n") 1927 1928 w.WriteString(".extern OPENSSL_ia32cap_P\n") 1929 w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n") 1930 w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n") 1931 w.WriteString("OPENSSL_ia32cap_addr_delta:\n") 1932 w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n") 1933 1934 if d.gotDeltaNeeded { 1935 w.WriteString(".Lboringssl_got_delta:\n") 1936 w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n") 1937 } 1938 1939 for _, name := range sortedSet(d.gotOffsetsNeeded) { 1940 w.WriteString(".Lboringssl_got_" + name + ":\n") 1941 w.WriteString("\t.quad " + name + "@GOT\n") 1942 } 1943 for _, name := range sortedSet(d.gotOffOffsetsNeeded) { 1944 w.WriteString(".Lboringssl_gotoff_" + name + ":\n") 1945 w.WriteString("\t.quad " + name + "@GOTOFF\n") 1946 } 1947 } 1948 1949 w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n") 1950 w.WriteString(".size BORINGSSL_bcm_text_hash, 32\n") 1951 w.WriteString("BORINGSSL_bcm_text_hash:\n") 1952 for _, b := range fipscommon.UninitHashValue { 1953 w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n") 1954 } 1955 1956 return nil 1957} 1958 1959func parseInputs(inputs []inputFile) error { 1960 for i, input := range inputs { 1961 var contents string 1962 1963 if input.isArchive { 1964 arFile, err := os.Open(input.path) 1965 if err != nil { 1966 return err 1967 } 1968 defer arFile.Close() 1969 1970 ar, err := ar.ParseAR(arFile) 1971 if err != nil { 1972 return err 1973 } 1974 1975 if len(ar) != 1 { 1976 return fmt.Errorf("expected one file in archive, but found %d", len(ar)) 1977 } 1978 1979 for _, c := range ar { 1980 contents = string(c) 1981 } 1982 } else { 1983 inBytes, err := ioutil.ReadFile(input.path) 1984 if err != nil { 1985 return err 1986 } 1987 1988 contents = string(inBytes) 1989 } 1990 1991 asm := Asm{Buffer: contents, Pretty: true} 1992 asm.Init() 1993 if err := asm.Parse(); err != nil { 1994 return fmt.Errorf("error while parsing %q: %s", input.path, err) 1995 } 1996 ast := asm.AST() 1997 1998 inputs[i].contents = contents 1999 inputs[i].ast = ast 2000 } 2001 2002 return nil 2003} 2004 2005func main() { 2006 // The .a file, if given, is expected to be an archive of textual 2007 // assembly sources. That's odd, but CMake really wants to create 2008 // archive files so it's the only way that we can make it work. 2009 arInput := flag.String("a", "", "Path to a .a file containing assembly sources") 2010 outFile := flag.String("o", "", "Path to output assembly") 2011 2012 flag.Parse() 2013 2014 if len(*outFile) == 0 { 2015 fmt.Fprintf(os.Stderr, "Must give argument to -o.\n") 2016 os.Exit(1) 2017 } 2018 2019 var inputs []inputFile 2020 if len(*arInput) > 0 { 2021 inputs = append(inputs, inputFile{ 2022 path: *arInput, 2023 index: 0, 2024 isArchive: true, 2025 }) 2026 } 2027 2028 for i, path := range flag.Args() { 2029 if len(path) == 0 { 2030 continue 2031 } 2032 2033 inputs = append(inputs, inputFile{ 2034 path: path, 2035 index: i + 1, 2036 }) 2037 } 2038 2039 if err := parseInputs(inputs); err != nil { 2040 fmt.Fprintf(os.Stderr, "%s\n", err) 2041 os.Exit(1) 2042 } 2043 2044 out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) 2045 if err != nil { 2046 panic(err) 2047 } 2048 defer out.Close() 2049 2050 if err := transform(out, inputs); err != nil { 2051 fmt.Fprintf(os.Stderr, "%s\n", err) 2052 os.Exit(1) 2053 } 2054} 2055 2056func forEachPath(node *node32, cb func(*node32), rules ...pegRule) { 2057 if node == nil { 2058 return 2059 } 2060 2061 if len(rules) == 0 { 2062 cb(node) 2063 return 2064 } 2065 2066 rule := rules[0] 2067 childRules := rules[1:] 2068 2069 for ; node != nil; node = node.next { 2070 if node.pegRule != rule { 2071 continue 2072 } 2073 2074 if len(childRules) == 0 { 2075 cb(node) 2076 } else { 2077 forEachPath(node.up, cb, childRules...) 2078 } 2079 } 2080} 2081 2082func skipNodes(node *node32, ruleToSkip pegRule) *node32 { 2083 for ; node != nil && node.pegRule == ruleToSkip; node = node.next { 2084 } 2085 return node 2086} 2087 2088func skipWS(node *node32) *node32 { 2089 return skipNodes(node, ruleWS) 2090} 2091 2092func assertNodeType(node *node32, expected pegRule) { 2093 if rule := node.pegRule; rule != expected { 2094 panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected])) 2095 } 2096} 2097 2098type wrapperFunc func(func()) 2099 2100type wrapperStack []wrapperFunc 2101 2102func (w *wrapperStack) do(baseCase func()) { 2103 if len(*w) == 0 { 2104 baseCase() 2105 return 2106 } 2107 2108 wrapper := (*w)[0] 2109 *w = (*w)[1:] 2110 wrapper(func() { w.do(baseCase) }) 2111} 2112 2113// localTargetName returns the name of the local target label for a global 2114// symbol named name. 2115func localTargetName(name string) string { 2116 return ".L" + name + "_local_target" 2117} 2118 2119func localEntryName(name string) string { 2120 return ".L" + name + "_local_entry" 2121} 2122 2123func isSynthesized(symbol string) bool { 2124 return strings.HasSuffix(symbol, "_bss_get") || 2125 symbol == "OPENSSL_ia32cap_get" || 2126 strings.HasPrefix(symbol, "BORINGSSL_bcm_text_") 2127} 2128 2129func redirectorName(symbol string) string { 2130 return "bcm_redirector_" + symbol 2131} 2132 2133// sectionType returns the type of a section. I.e. a section called “.text.foo” 2134// is a “.text” section. 2135func sectionType(section string) (string, bool) { 2136 if len(section) == 0 || section[0] != '.' { 2137 return "", false 2138 } 2139 2140 i := strings.Index(section[1:], ".") 2141 if i != -1 { 2142 section = section[:i+1] 2143 } 2144 2145 if strings.HasPrefix(section, ".debug_") { 2146 return ".debug", true 2147 } 2148 2149 return section, true 2150} 2151 2152// accessorName returns the name of the accessor function for a BSS symbol 2153// named name. 2154func accessorName(name string) string { 2155 return name + "_bss_get" 2156} 2157 2158func (d *delocation) mapLocalSymbol(symbol string) string { 2159 if d.currentInput.index == 0 { 2160 return symbol 2161 } 2162 return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index) 2163} 2164 2165func detectProcessor(input inputFile) processorType { 2166 for statement := input.ast.up; statement != nil; statement = statement.next { 2167 node := skipNodes(statement.up, ruleWS) 2168 if node == nil || node.pegRule != ruleInstruction { 2169 continue 2170 } 2171 2172 instruction := node.up 2173 instructionName := input.contents[instruction.begin:instruction.end] 2174 2175 switch instructionName { 2176 case "movq", "call", "leaq": 2177 return x86_64 2178 case "addis", "addi", "mflr": 2179 return ppc64le 2180 case "str", "bl", "ldr", "st1": 2181 return aarch64 2182 } 2183 } 2184 2185 panic("processed entire input and didn't recognise any instructions.") 2186} 2187 2188func sortedSet(m map[string]struct{}) []string { 2189 ret := make([]string, 0, len(m)) 2190 for key := range m { 2191 ret = append(ret, key) 2192 } 2193 sort.Strings(ret) 2194 return ret 2195} 2196