1// Copyright (c) 2017, Google Inc. 2// 3// Permission to use, copy, modify, and/or distribute this software for any 4// purpose with or without fee is hereby granted, provided that the above 5// copyright notice and this permission notice appear in all copies. 6// 7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15// delocate performs several transformations of textual assembly code. See 16// crypto/fipsmodule/FIPS.md for an overview. 17package main 18 19import ( 20 "bytes" 21 "errors" 22 "flag" 23 "fmt" 24 "os" 25 "os/exec" 26 "path/filepath" 27 "sort" 28 "strconv" 29 "strings" 30 31 "boringssl.googlesource.com/boringssl/util/ar" 32 "boringssl.googlesource.com/boringssl/util/fipstools/fipscommon" 33) 34 35// inputFile represents a textual assembly file. 36type inputFile struct { 37 path string 38 // index is a unique identifer given to this file. It's used for 39 // mapping local symbols. 40 index int 41 // isArchive indicates that the input should be processed as an ar 42 // file. 43 isArchive bool 44 // contents contains the contents of the file. 45 contents string 46 // ast points to the head of the syntax tree. 47 ast *node32 48} 49 50type stringWriter interface { 51 WriteString(string) (int, error) 52} 53 54type processorType int 55 56const ( 57 x86_64 processorType = iota + 1 58 aarch64 59) 60 61// delocation holds the state needed during a delocation operation. 62type delocation struct { 63 processor processorType 64 output stringWriter 65 // commentIndicator starts a comment, e.g. "//" or "#" 66 commentIndicator string 67 68 // symbols is the set of symbols defined in the module. 69 symbols map[string]struct{} 70 // redirectors maps from out-call symbol name to the name of a 71 // redirector function for that symbol. E.g. “memcpy” -> 72 // “bcm_redirector_memcpy”. 73 redirectors map[string]string 74 // bssAccessorsNeeded maps from a BSS symbol name to the symbol that 75 // should be used to reference it. E.g. “P384_data_storage” -> 76 // “P384_data_storage”. 77 bssAccessorsNeeded map[string]string 78 // gotExternalsNeeded is a set of symbol names for which we need 79 // “delta” symbols: symbols that contain the offset from their location 80 // to the memory in question. 81 gotExternalsNeeded map[string]struct{} 82 // gotDeltaNeeded is true if the code needs to load the value of 83 // _GLOBAL_OFFSET_TABLE_. 84 gotDeltaNeeded bool 85 // gotOffsetsNeeded contains the symbols whose @GOT offsets are needed. 86 gotOffsetsNeeded map[string]struct{} 87 // gotOffOffsetsNeeded contains the symbols whose @GOTOFF offsets are needed. 88 gotOffOffsetsNeeded map[string]struct{} 89 90 currentInput inputFile 91} 92 93func (d *delocation) contents(node *node32) string { 94 return d.currentInput.contents[node.begin:node.end] 95} 96 97// writeNode writes out an AST node. 98func (d *delocation) writeNode(node *node32) { 99 if _, err := d.output.WriteString(d.contents(node)); err != nil { 100 panic(err) 101 } 102} 103 104func (d *delocation) writeCommentedNode(node *node32) { 105 line := d.contents(node) 106 if _, err := d.output.WriteString(d.commentIndicator + " WAS " + strings.TrimSpace(line) + "\n"); err != nil { 107 panic(err) 108 } 109} 110 111func locateError(err error, with *node32, in inputFile) error { 112 posMap := translatePositions([]rune(in.contents), []int{int(with.begin)}) 113 var line int 114 for _, pos := range posMap { 115 line = pos.line 116 } 117 118 return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err) 119} 120 121func (d *delocation) processInput(input inputFile) (err error) { 122 d.currentInput = input 123 124 var origStatement *node32 125 defer func() { 126 if err := recover(); err != nil { 127 panic(locateError(fmt.Errorf("%s", err), origStatement, input)) 128 } 129 }() 130 131 for statement := input.ast.up; statement != nil; statement = statement.next { 132 assertNodeType(statement, ruleStatement) 133 origStatement = statement 134 135 node := skipWS(statement.up) 136 if node == nil { 137 d.writeNode(statement) 138 continue 139 } 140 141 switch node.pegRule { 142 case ruleGlobalDirective, ruleComment, ruleLocationDirective: 143 d.writeNode(statement) 144 case ruleDirective: 145 statement, err = d.processDirective(statement, node.up) 146 case ruleLabelContainingDirective: 147 statement, err = d.processLabelContainingDirective(statement, node.up) 148 case ruleLabel: 149 statement, err = d.processLabel(statement, node.up) 150 case ruleInstruction: 151 switch d.processor { 152 case x86_64: 153 statement, err = d.processIntelInstruction(statement, node.up) 154 case aarch64: 155 statement, err = d.processAarch64Instruction(statement, node.up) 156 default: 157 panic("unknown processor") 158 } 159 default: 160 panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule])) 161 } 162 163 if err != nil { 164 return locateError(err, origStatement, input) 165 } 166 } 167 168 return nil 169} 170 171func (d *delocation) processDirective(statement, directive *node32) (*node32, error) { 172 assertNodeType(directive, ruleDirectiveName) 173 directiveName := d.contents(directive) 174 175 var args []string 176 forEachPath(directive, func(arg *node32) { 177 // If the argument is a quoted string, use the raw contents. 178 // (Note that this doesn't unescape the string, but that's not 179 // needed so far. 180 if arg.up != nil { 181 arg = arg.up 182 assertNodeType(arg, ruleQuotedArg) 183 if arg.up == nil { 184 args = append(args, "") 185 return 186 } 187 arg = arg.up 188 assertNodeType(arg, ruleQuotedText) 189 } 190 args = append(args, d.contents(arg)) 191 }, ruleArgs, ruleArg) 192 193 switch directiveName { 194 case "comm", "lcomm": 195 if len(args) < 1 { 196 return nil, errors.New("comm directive has no arguments") 197 } 198 d.bssAccessorsNeeded[args[0]] = args[0] 199 d.writeNode(statement) 200 201 case "data": 202 // ASAN and some versions of MSAN are adding a .data section, 203 // and adding references to symbols within it to the code. We 204 // will have to work around this in the future. 205 return nil, errors.New(".data section found in module") 206 207 case "section": 208 section := args[0] 209 210 if section == ".data.rel.ro" { 211 // In a normal build, this is an indication of a 212 // problem but any references from the module to this 213 // section will result in a relocation and thus will 214 // break the integrity check. ASAN can generate these 215 // sections and so we will likely have to work around 216 // that in the future. 217 return nil, errors.New(".data.rel.ro section found in module") 218 } 219 220 sectionType, ok := sectionType(section) 221 if !ok { 222 // Unknown sections are permitted in order to be robust 223 // to different compiler modes. 224 d.writeNode(statement) 225 break 226 } 227 228 switch sectionType { 229 case ".rodata", ".text": 230 // Move .rodata to .text so it may be accessed without 231 // a relocation. GCC with -fmerge-constants will place 232 // strings into separate sections, so we move all 233 // sections named like .rodata. Also move .text.startup 234 // so the self-test function is also in the module. 235 d.writeCommentedNode(statement) 236 d.output.WriteString(".text\n") 237 238 case ".data": 239 // See above about .data 240 return nil, errors.New(".data section found in module") 241 242 case ".init_array", ".fini_array", ".ctors", ".dtors": 243 // init_array/ctors/dtors contains function 244 // pointers to constructor/destructor 245 // functions. These contain relocations, but 246 // they're in a different section anyway. 247 d.writeNode(statement) 248 break 249 250 case ".debug", ".note": 251 d.writeNode(statement) 252 break 253 254 case ".bss": 255 d.writeNode(statement) 256 return d.handleBSS(statement) 257 } 258 259 default: 260 d.writeNode(statement) 261 } 262 263 return statement, nil 264} 265 266func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) { 267 // The symbols within directives need to be mapped so that local 268 // symbols in two different .s inputs don't collide. 269 changed := false 270 assertNodeType(directive, ruleLabelContainingDirectiveName) 271 name := d.contents(directive) 272 273 node := directive.next 274 assertNodeType(node, ruleWS) 275 276 node = node.next 277 assertNodeType(node, ruleSymbolArgs) 278 279 var args []string 280 for node = skipWS(node.up); node != nil; node = skipWS(node.next) { 281 assertNodeType(node, ruleSymbolArg) 282 arg := node.up 283 var mapped string 284 285 for term := arg; term != nil; term = term.next { 286 if term.pegRule != ruleLocalSymbol { 287 mapped += d.contents(term) 288 continue 289 } 290 291 oldSymbol := d.contents(term) 292 newSymbol := d.mapLocalSymbol(oldSymbol) 293 if newSymbol != oldSymbol { 294 changed = true 295 } 296 297 mapped += newSymbol 298 } 299 300 args = append(args, mapped) 301 } 302 303 if !changed { 304 d.writeNode(statement) 305 } else { 306 d.writeCommentedNode(statement) 307 d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n") 308 } 309 310 return statement, nil 311} 312 313func (d *delocation) processLabel(statement, label *node32) (*node32, error) { 314 symbol := d.contents(label) 315 316 switch label.pegRule { 317 case ruleLocalLabel: 318 d.output.WriteString(symbol + ":\n") 319 case ruleLocalSymbol: 320 // symbols need to be mapped so that local symbols from two 321 // different .s inputs don't collide. 322 d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n") 323 case ruleSymbolName: 324 d.output.WriteString(localTargetName(symbol) + ":\n") 325 d.writeNode(statement) 326 default: 327 return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule]) 328 } 329 330 return statement, nil 331} 332 333// instructionArgs collects all the arguments to an instruction. 334func instructionArgs(node *node32) (argNodes []*node32) { 335 for node = skipWS(node); node != nil; node = skipWS(node.next) { 336 assertNodeType(node, ruleInstructionArg) 337 argNodes = append(argNodes, node.up) 338 } 339 340 return argNodes 341} 342 343// Aarch64 support 344 345// gotHelperName returns the name of a synthesised function that returns an 346// address from the GOT. 347func gotHelperName(symbol string) string { 348 return ".Lboringssl_loadgot_" + symbol 349} 350 351// loadAarch64Address emits instructions to put the address of |symbol| 352// (optionally adjusted by |offsetStr|) into |targetReg|. 353func (d *delocation) loadAarch64Address(statement *node32, targetReg string, symbol string, offsetStr string) (*node32, error) { 354 // There are two paths here: either the symbol is known to be local in which 355 // case adr is used to get the address (within 1MiB), or a GOT reference is 356 // really needed in which case the code needs to jump to a helper function. 357 // 358 // A helper function is needed because using code appears to be the only way 359 // to load a GOT value. On other platforms we have ".quad foo@GOT" outside of 360 // the module, but on Aarch64 that results in a "COPY" relocation and linker 361 // comments suggest it's a weird hack. So, for each GOT symbol needed, we emit 362 // a function outside of the module that returns the address from the GOT in 363 // x0. 364 365 d.writeCommentedNode(statement) 366 367 _, isKnown := d.symbols[symbol] 368 isLocal := strings.HasPrefix(symbol, ".L") 369 if isKnown || isLocal || isSynthesized(symbol) { 370 if isLocal { 371 symbol = d.mapLocalSymbol(symbol) 372 } else if isKnown { 373 symbol = localTargetName(symbol) 374 } 375 376 d.output.WriteString("\tadr " + targetReg + ", " + symbol + offsetStr + "\n") 377 378 return statement, nil 379 } 380 381 if len(offsetStr) != 0 { 382 panic("non-zero offset for helper-based reference") 383 } 384 385 var helperFunc string 386 if symbol == "OPENSSL_armcap_P" { 387 helperFunc = ".LOPENSSL_armcap_P_addr" 388 } else { 389 // GOT helpers also dereference the GOT entry, thus the subsequent ldr 390 // instruction, which would normally do the dereferencing, needs to be 391 // dropped. GOT helpers have to include the dereference because the 392 // assembler doesn't support ":got_lo12:foo" offsets except in an ldr 393 // instruction. 394 d.gotExternalsNeeded[symbol] = struct{}{} 395 helperFunc = gotHelperName(symbol) 396 } 397 398 // Clear the red-zone. I can't find a definitive answer about whether Linux 399 // Aarch64 includes a red-zone, but Microsoft has a 16-byte one and Apple a 400 // 128-byte one. Thus conservatively clear a 128-byte red-zone. 401 d.output.WriteString("\tsub sp, sp, 128\n") 402 403 // Save x0 (which will be stomped by the return value) and the link register 404 // to the stack. Then save the program counter into the link register and 405 // jump to the helper function. 406 d.output.WriteString("\tstp x0, lr, [sp, #-16]!\n") 407 d.output.WriteString("\tbl " + helperFunc + "\n") 408 409 if targetReg == "x0" { 410 // If the target happens to be x0 then restore the link register from the 411 // stack and send the saved value of x0 to the zero register. 412 d.output.WriteString("\tldp xzr, lr, [sp], #16\n") 413 } else { 414 // Otherwise move the result into place and restore registers. 415 d.output.WriteString("\tmov " + targetReg + ", x0\n") 416 d.output.WriteString("\tldp x0, lr, [sp], #16\n") 417 } 418 419 // Revert the red-zone adjustment. 420 d.output.WriteString("\tadd sp, sp, 128\n") 421 422 return statement, nil 423} 424 425func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) { 426 assertNodeType(instruction, ruleInstructionName) 427 instructionName := d.contents(instruction) 428 429 argNodes := instructionArgs(instruction.next) 430 431 switch instructionName { 432 case "ccmn", "ccmp", "cinc", "cinv", "cneg", "csel", "cset", "csetm", "csinc", "csinv", "csneg": 433 // These functions are special because they take a condition-code name as 434 // an argument and that looks like a symbol reference. 435 d.writeNode(statement) 436 return statement, nil 437 438 case "mrs": 439 // Functions that take special register names also look like a symbol 440 // reference to the parser. 441 d.writeNode(statement) 442 return statement, nil 443 444 case "adrp": 445 // adrp always generates a relocation, even when the target symbol is in the 446 // same segment, because the page-offset of the code isn't known until link 447 // time. Thus adrp instructions are turned into either adr instructions 448 // (limiting the module to 1MiB offsets) or calls to helper functions, both of 449 // which load the full address. Later instructions, which add the low 12 bits 450 // of offset, are tweaked to remove the offset since it's already included. 451 // Loads of GOT symbols are slightly more complex because it's not possible to 452 // avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr 453 // instruction, which would normally do the dereferencing, is dropped 454 // completely. (Or turned into a mov if it targets a different register.) 455 assertNodeType(argNodes[0], ruleRegisterOrConstant) 456 targetReg := d.contents(argNodes[0]) 457 if !strings.HasPrefix(targetReg, "x") { 458 panic("adrp targetting register " + targetReg + ", which has the wrong size") 459 } 460 461 var symbol, offset string 462 switch argNodes[1].pegRule { 463 case ruleGOTSymbolOffset: 464 symbol = d.contents(argNodes[1].up) 465 case ruleMemoryRef: 466 assertNodeType(argNodes[1].up, ruleSymbolRef) 467 node, empty := d.gatherOffsets(argNodes[1].up.up, "") 468 if len(empty) != 0 { 469 panic("prefix offsets found for adrp") 470 } 471 symbol = d.contents(node) 472 _, offset = d.gatherOffsets(node.next, "") 473 default: 474 panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule]) 475 } 476 477 return d.loadAarch64Address(statement, targetReg, symbol, offset) 478 } 479 480 var args []string 481 changed := false 482 483 for _, arg := range argNodes { 484 fullArg := arg 485 486 switch arg.pegRule { 487 case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak: 488 args = append(args, d.contents(fullArg)) 489 490 case ruleGOTSymbolOffset: 491 // These should only be arguments to adrp and thus unreachable. 492 panic("unreachable") 493 494 case ruleMemoryRef: 495 ref := arg.up 496 497 switch ref.pegRule { 498 case ruleSymbolRef: 499 // This is a branch. Either the target needs to be written to a local 500 // version of the symbol to ensure that no relocations are emitted, or 501 // it needs to jump to a redirector function. 502 symbol, offset, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up) 503 changed = didChange 504 505 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 506 symbol = localTargetName(symbol) 507 changed = true 508 } else if !symbolIsLocal && !isSynthesized(symbol) { 509 redirector := redirectorName(symbol) 510 d.redirectors[symbol] = redirector 511 symbol = redirector 512 changed = true 513 } else if didChange && symbolIsLocal && len(offset) > 0 { 514 // didChange is set when the inputFile index is not 0; which is the index of the 515 // first file copied to the output, which is the generated assembly of bcm.c. 516 // In subsequently copied assembly files, local symbols are changed by appending (BCM_ + index) 517 // in order to ensure they don't collide. `index` gets incremented per file. 518 // If there is offset after the symbol, append the `offset`. 519 symbol = symbol + offset 520 } 521 522 args = append(args, symbol) 523 524 case ruleARMBaseIndexScale: 525 parts := ref.up 526 assertNodeType(parts, ruleARMRegister) 527 baseAddrReg := d.contents(parts) 528 parts = skipWS(parts.next) 529 530 // Only two forms need special handling. First there's memory references 531 // like "[x*, :got_lo12:foo]". The base register here will have been the 532 // target of an adrp instruction to load the page address, but the adrp 533 // will have turned into loading the full address *and dereferencing it*, 534 // above. Thus this instruction needs to be dropped otherwise we'll be 535 // dereferencing twice. 536 // 537 // Second there are forms like "[x*, :lo12:foo]" where the code has used 538 // adrp to load the page address into x*. That adrp will have been turned 539 // into loading the full address so just the offset needs to be dropped. 540 541 if parts != nil { 542 if parts.pegRule == ruleARMGOTLow12 { 543 if instructionName != "ldr" { 544 panic("Symbol reference outside of ldr instruction") 545 } 546 547 if skipWS(parts.next) != nil || parts.up.next != nil { 548 panic("can't handle tweak or post-increment with symbol references") 549 } 550 551 // The GOT helper already dereferenced the entry so, at most, just a mov 552 // is needed to put things in the right register. 553 d.writeCommentedNode(statement) 554 if baseAddrReg != args[0] { 555 d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n") 556 } 557 return statement, nil 558 } else if parts.pegRule == ruleLow12BitsSymbolRef { 559 if instructionName != "ldr" { 560 panic("Symbol reference outside of ldr instruction") 561 } 562 563 if skipWS(parts.next) != nil || parts.up.next != nil { 564 panic("can't handle tweak or post-increment with symbol references") 565 } 566 567 // Suppress the offset; adrp loaded the full address. 568 args = append(args, "["+baseAddrReg+"]") 569 changed = true 570 continue 571 } 572 } 573 574 args = append(args, d.contents(fullArg)) 575 576 case ruleLow12BitsSymbolRef: 577 // These are the second instruction in a pair: 578 // adrp x0, symbol // Load the page address into x0 579 // add x1, x0, :lo12:symbol // Adds the page offset. 580 // 581 // The adrp instruction will have been turned into a sequence that loads 582 // the full address, above, thus the offset is turned into zero. If that 583 // results in the instruction being a nop, then it is deleted. 584 if instructionName != "add" { 585 panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName)) 586 } 587 588 if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") { 589 panic("address arithmetic with incorrectly sized register") 590 } 591 592 if args[0] == args[1] { 593 d.writeCommentedNode(statement) 594 return statement, nil 595 } 596 597 args = append(args, "#0") 598 changed = true 599 600 default: 601 panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule])) 602 } 603 604 default: 605 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 606 } 607 } 608 609 if changed { 610 d.writeCommentedNode(statement) 611 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 612 d.output.WriteString(replacement) 613 } else { 614 d.writeNode(statement) 615 } 616 617 return statement, nil 618} 619 620func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) { 621 for symRef != nil && symRef.pegRule == ruleOffset { 622 offset := d.contents(symRef) 623 if offset[0] != '+' && offset[0] != '-' { 624 offset = "+" + offset 625 } 626 offsets = offsets + offset 627 symRef = symRef.next 628 } 629 return symRef, offsets 630} 631 632func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) { 633 if memRef.pegRule != ruleSymbolRef { 634 return "", "", "", false, false, memRef 635 } 636 637 symRef := memRef.up 638 nextRef = memRef.next 639 640 // (Offset* '+')? 641 symRef, offset = d.gatherOffsets(symRef, offset) 642 643 // (LocalSymbol / SymbolName) 644 symbol = d.contents(symRef) 645 if symRef.pegRule == ruleLocalSymbol { 646 symbolIsLocal = true 647 mapped := d.mapLocalSymbol(symbol) 648 if mapped != symbol { 649 symbol = mapped 650 didChange = true 651 } 652 } 653 symRef = symRef.next 654 655 // Offset* 656 symRef, offset = d.gatherOffsets(symRef, offset) 657 658 // ('@' Section / Offset*)? 659 if symRef != nil { 660 assertNodeType(symRef, ruleSection) 661 section = d.contents(symRef) 662 symRef = symRef.next 663 664 symRef, offset = d.gatherOffsets(symRef, offset) 665 } 666 667 if symRef != nil { 668 panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule])) 669 } 670 671 return 672} 673 674/* Intel */ 675 676type instructionType int 677 678const ( 679 instrPush instructionType = iota 680 instrMove 681 // instrTransformingMove is essentially a move, but it performs some 682 // transformation of the data during the process. 683 instrTransformingMove 684 instrJump 685 instrConditionalMove 686 // instrCombine merges the source and destination in some fashion, for example 687 // a 2-operand bitwise operation. 688 instrCombine 689 // instrMemoryVectorCombine is similer to instrCombine, but the source 690 // register must be a memory reference and the destination register 691 // must be a vector register. 692 instrMemoryVectorCombine 693 // instrThreeArg merges two sources into a destination in some fashion. 694 instrThreeArg 695 // instrCompare takes two arguments and writes outputs to the flags register. 696 instrCompare 697 instrOther 698) 699 700func classifyInstruction(instr string, args []*node32) instructionType { 701 switch instr { 702 case "push", "pushq": 703 if len(args) == 1 { 704 return instrPush 705 } 706 707 case "mov", "movq", "vmovq", "movsd", "vmovsd": 708 if len(args) == 2 { 709 return instrMove 710 } 711 712 case "cmovneq", "cmoveq": 713 if len(args) == 2 { 714 return instrConditionalMove 715 } 716 717 case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo": 718 if len(args) == 1 { 719 return instrJump 720 } 721 722 case "orq", "andq", "xorq": 723 if len(args) == 2 { 724 return instrCombine 725 } 726 727 case "cmpq": 728 if len(args) == 2 { 729 return instrCompare 730 } 731 732 case "sarxq", "shlxq", "shrxq": 733 if len(args) == 3 { 734 return instrThreeArg 735 } 736 737 case "vpbroadcastq": 738 if len(args) == 2 { 739 return instrTransformingMove 740 } 741 742 case "movlps", "movhps": 743 if len(args) == 2 { 744 return instrMemoryVectorCombine 745 } 746 } 747 748 return instrOther 749} 750 751func push(w stringWriter) wrapperFunc { 752 return func(k func()) { 753 w.WriteString("\tpushq %rax\n") 754 k() 755 w.WriteString("\txchg %rax, (%rsp)\n") 756 } 757} 758 759func compare(w stringWriter, instr, a, b string) wrapperFunc { 760 return func(k func()) { 761 k() 762 w.WriteString(fmt.Sprintf("\t%s %s, %s\n", instr, a, b)) 763 } 764} 765 766func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc { 767 d.gotExternalsNeeded[symbol+"@"+section] = struct{}{} 768 769 return func(k func()) { 770 if !redzoneCleared { 771 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 772 } 773 w.WriteString("\tpushf\n") 774 w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination)) 775 w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination)) 776 w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination)) 777 w.WriteString("\tpopf\n") 778 if !redzoneCleared { 779 w.WriteString("\tleaq\t128(%rsp), %rsp\n") 780 } 781 } 782} 783 784func saveFlags(w stringWriter, redzoneCleared bool) wrapperFunc { 785 return func(k func()) { 786 if !redzoneCleared { 787 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 788 defer w.WriteString("\tleaq 128(%rsp), %rsp\n") 789 } 790 w.WriteString("\tpushfq\n") 791 k() 792 w.WriteString("\tpopfq\n") 793 } 794} 795 796func saveRegister(w stringWriter, avoidRegs []string) (wrapperFunc, string) { 797 candidates := []string{"%rax", "%rbx", "%rcx", "%rdx"} 798 799 var reg string 800NextCandidate: 801 for _, candidate := range candidates { 802 for _, avoid := range avoidRegs { 803 if candidate == avoid { 804 continue NextCandidate 805 } 806 } 807 808 reg = candidate 809 break 810 } 811 812 if len(reg) == 0 { 813 panic("too many excluded registers") 814 } 815 816 return func(k func()) { 817 w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone. 818 w.WriteString("\tpushq " + reg + "\n") 819 k() 820 w.WriteString("\tpopq " + reg + "\n") 821 w.WriteString("\tleaq 128(%rsp), %rsp\n") 822 }, reg 823} 824 825func moveTo(w stringWriter, target string, isAVX bool, source string) wrapperFunc { 826 return func(k func()) { 827 k() 828 prefix := "" 829 if isAVX { 830 prefix = "v" 831 } 832 w.WriteString("\t" + prefix + "movq " + source + ", " + target + "\n") 833 } 834} 835 836func finalTransform(w stringWriter, transformInstruction, reg string) wrapperFunc { 837 return func(k func()) { 838 k() 839 w.WriteString("\t" + transformInstruction + " " + reg + ", " + reg + "\n") 840 } 841} 842 843func combineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 844 return func(k func()) { 845 k() 846 w.WriteString("\t" + instructionName + " " + source + ", " + dest + "\n") 847 } 848} 849 850func threeArgCombineOp(w stringWriter, instructionName, source1, source2, dest string) wrapperFunc { 851 return func(k func()) { 852 k() 853 w.WriteString("\t" + instructionName + " " + source1 + ", " + source2 + ", " + dest + "\n") 854 } 855} 856 857func memoryVectorCombineOp(w stringWriter, instructionName, source, dest string) wrapperFunc { 858 return func(k func()) { 859 k() 860 // These instructions can only read from memory, so push 861 // tempReg and read from the stack. Note we assume the red zone 862 // was previously cleared by saveRegister(). 863 w.WriteString("\tpushq " + source + "\n") 864 w.WriteString("\t" + instructionName + " (%rsp), " + dest + "\n") 865 w.WriteString("\tleaq 8(%rsp), %rsp\n") 866 } 867} 868 869func isValidLEATarget(reg string) bool { 870 return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm") 871} 872 873func undoConditionalMove(w stringWriter, instr string) wrapperFunc { 874 var invertedCondition string 875 876 switch instr { 877 case "cmoveq": 878 invertedCondition = "ne" 879 case "cmovneq": 880 invertedCondition = "e" 881 default: 882 panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr)) 883 } 884 885 return func(k func()) { 886 w.WriteString("\tj" + invertedCondition + " 999f\n") 887 k() 888 w.WriteString("999:\n") 889 } 890} 891 892func (d *delocation) isRIPRelative(node *node32) bool { 893 return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)" 894} 895 896func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) { 897 assertNodeType(instruction, ruleInstructionName) 898 instructionName := d.contents(instruction) 899 900 argNodes := instructionArgs(instruction.next) 901 902 var wrappers wrapperStack 903 var args []string 904 changed := false 905 906Args: 907 for i, arg := range argNodes { 908 fullArg := arg 909 isIndirect := false 910 911 if arg.pegRule == ruleIndirectionIndicator { 912 arg = arg.next 913 isIndirect = true 914 } 915 916 switch arg.pegRule { 917 case ruleRegisterOrConstant, ruleLocalLabelRef: 918 args = append(args, d.contents(fullArg)) 919 920 case ruleMemoryRef: 921 symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up) 922 changed = didChange 923 924 if symbol == "OPENSSL_ia32cap_P" && section == "" { 925 if instructionName != "leaq" { 926 return nil, fmt.Errorf("non-leaq instruction %q referenced OPENSSL_ia32cap_P directly", instructionName) 927 } 928 929 if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 { 930 return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName) 931 } 932 933 target := argNodes[1] 934 assertNodeType(target, ruleRegisterOrConstant) 935 reg := d.contents(target) 936 937 if !strings.HasPrefix(reg, "%r") { 938 return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg) 939 } 940 941 changed = true 942 943 // Flag-altering instructions (i.e. addq) are going to be used so the 944 // flags need to be preserved. 945 wrappers = append(wrappers, saveFlags(d.output, false /* Red Zone not yet cleared */)) 946 947 wrappers = append(wrappers, func(k func()) { 948 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n") 949 d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n") 950 }) 951 952 break Args 953 } 954 955 switch section { 956 case "": 957 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 958 symbol = localTargetName(symbol) 959 changed = true 960 } 961 962 case "PLT": 963 if classifyInstruction(instructionName, argNodes) != instrJump { 964 return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName) 965 } 966 967 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 968 symbol = localTargetName(symbol) 969 changed = true 970 } else if !symbolIsLocal && !isSynthesized(symbol) { 971 // Unknown symbol via PLT is an 972 // out-call from the module, e.g. 973 // memcpy. 974 d.redirectors[symbol+"@"+section] = redirectorName(symbol) 975 symbol = redirectorName(symbol) 976 } 977 978 changed = true 979 980 case "GOTPCREL": 981 if len(offset) > 0 { 982 return nil, errors.New("loading from GOT with offset is unsupported") 983 } 984 if !d.isRIPRelative(memRef) { 985 return nil, errors.New("GOT access must be IP-relative") 986 } 987 988 useGOT := false 989 if _, knownSymbol := d.symbols[symbol]; knownSymbol { 990 symbol = localTargetName(symbol) 991 changed = true 992 } else if !isSynthesized(symbol) { 993 useGOT = true 994 } 995 996 classification := classifyInstruction(instructionName, argNodes) 997 if classification != instrThreeArg && classification != instrCompare && i != 0 { 998 return nil, errors.New("GOT access must be source operand") 999 } 1000 1001 // Reduce the instruction to movq symbol@GOTPCREL, targetReg. 1002 var targetReg string 1003 var redzoneCleared bool 1004 switch classification { 1005 case instrPush: 1006 wrappers = append(wrappers, push(d.output)) 1007 targetReg = "%rax" 1008 case instrConditionalMove: 1009 wrappers = append(wrappers, undoConditionalMove(d.output, instructionName)) 1010 fallthrough 1011 case instrMove: 1012 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1013 targetReg = d.contents(argNodes[1]) 1014 case instrCompare: 1015 otherSource := d.contents(argNodes[i^1]) 1016 saveRegWrapper, tempReg := saveRegister(d.output, []string{otherSource}) 1017 redzoneCleared = true 1018 wrappers = append(wrappers, saveRegWrapper) 1019 if i == 0 { 1020 wrappers = append(wrappers, compare(d.output, instructionName, tempReg, otherSource)) 1021 } else { 1022 wrappers = append(wrappers, compare(d.output, instructionName, otherSource, tempReg)) 1023 } 1024 targetReg = tempReg 1025 case instrTransformingMove: 1026 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1027 targetReg = d.contents(argNodes[1]) 1028 wrappers = append(wrappers, finalTransform(d.output, instructionName, targetReg)) 1029 if isValidLEATarget(targetReg) { 1030 return nil, errors.New("Currently transforming moves are assumed to target XMM registers. Otherwise we'll pop %rax before reading it to do the transform.") 1031 } 1032 case instrCombine: 1033 targetReg = d.contents(argNodes[1]) 1034 if !isValidLEATarget(targetReg) { 1035 return nil, fmt.Errorf("cannot handle combining instructions targeting non-general registers") 1036 } 1037 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg}) 1038 redzoneCleared = true 1039 wrappers = append(wrappers, saveRegWrapper) 1040 1041 wrappers = append(wrappers, combineOp(d.output, instructionName, tempReg, targetReg)) 1042 targetReg = tempReg 1043 case instrMemoryVectorCombine: 1044 assertNodeType(argNodes[1], ruleRegisterOrConstant) 1045 targetReg = d.contents(argNodes[1]) 1046 if isValidLEATarget(targetReg) { 1047 return nil, errors.New("target register must be an XMM register") 1048 } 1049 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1050 wrappers = append(wrappers, saveRegWrapper) 1051 redzoneCleared = true 1052 wrappers = append(wrappers, memoryVectorCombineOp(d.output, instructionName, tempReg, targetReg)) 1053 targetReg = tempReg 1054 case instrThreeArg: 1055 if n := len(argNodes); n != 3 { 1056 return nil, fmt.Errorf("three-argument instruction has %d arguments", n) 1057 } 1058 if i != 0 && i != 1 { 1059 return nil, errors.New("GOT access must be from source operand") 1060 } 1061 targetReg = d.contents(argNodes[2]) 1062 1063 otherSource := d.contents(argNodes[1]) 1064 if i == 1 { 1065 otherSource = d.contents(argNodes[0]) 1066 } 1067 1068 saveRegWrapper, tempReg := saveRegister(d.output, []string{targetReg, otherSource}) 1069 redzoneCleared = true 1070 wrappers = append(wrappers, saveRegWrapper) 1071 1072 if i == 0 { 1073 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, tempReg, otherSource, targetReg)) 1074 } else { 1075 wrappers = append(wrappers, threeArgCombineOp(d.output, instructionName, otherSource, tempReg, targetReg)) 1076 } 1077 targetReg = tempReg 1078 default: 1079 return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName) 1080 } 1081 1082 if !isValidLEATarget(targetReg) { 1083 // Sometimes the compiler will load from the GOT to an 1084 // XMM register, which is not a valid target of an LEA 1085 // instruction. 1086 saveRegWrapper, tempReg := saveRegister(d.output, nil) 1087 wrappers = append(wrappers, saveRegWrapper) 1088 isAVX := strings.HasPrefix(instructionName, "v") 1089 wrappers = append(wrappers, moveTo(d.output, targetReg, isAVX, tempReg)) 1090 targetReg = tempReg 1091 if redzoneCleared { 1092 return nil, fmt.Errorf("internal error: Red Zone was already cleared") 1093 } 1094 redzoneCleared = true 1095 } 1096 1097 if symbol == "OPENSSL_ia32cap_P" { 1098 // Flag-altering instructions (i.e. addq) are going to be used so the 1099 // flags need to be preserved. 1100 wrappers = append(wrappers, saveFlags(d.output, redzoneCleared)) 1101 wrappers = append(wrappers, func(k func()) { 1102 d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + targetReg + "\n") 1103 d.output.WriteString("\taddq\t(" + targetReg + "), " + targetReg + "\n") 1104 }) 1105 } else if useGOT { 1106 wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared)) 1107 } else { 1108 wrappers = append(wrappers, func(k func()) { 1109 d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg)) 1110 }) 1111 } 1112 changed = true 1113 break Args 1114 1115 default: 1116 return nil, fmt.Errorf("Unknown section type %q", section) 1117 } 1118 1119 if !changed && len(section) > 0 { 1120 panic("section was not handled") 1121 } 1122 section = "" 1123 1124 argStr := "" 1125 if isIndirect { 1126 argStr += "*" 1127 } 1128 argStr += symbol 1129 argStr += offset 1130 1131 for ; memRef != nil; memRef = memRef.next { 1132 argStr += d.contents(memRef) 1133 } 1134 1135 args = append(args, argStr) 1136 1137 case ruleGOTLocation: 1138 if instructionName != "movabsq" { 1139 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ lookup didn't use movabsq") 1140 } 1141 if i != 0 || len(argNodes) != 2 { 1142 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ didn't expected form") 1143 } 1144 1145 d.gotDeltaNeeded = true 1146 changed = true 1147 instructionName = "movq" 1148 assertNodeType(arg.up, ruleLocalSymbol) 1149 baseSymbol := d.mapLocalSymbol(d.contents(arg.up)) 1150 targetReg := d.contents(argNodes[1]) 1151 args = append(args, ".Lboringssl_got_delta(%rip)") 1152 wrappers = append(wrappers, func(k func()) { 1153 k() 1154 d.output.WriteString(fmt.Sprintf("\taddq $.Lboringssl_got_delta-%s, %s\n", baseSymbol, targetReg)) 1155 }) 1156 1157 case ruleGOTSymbolOffset: 1158 if instructionName != "movabsq" { 1159 return nil, fmt.Errorf("_GLOBAL_OFFSET_TABLE_ offset didn't use movabsq") 1160 } 1161 if i != 0 || len(argNodes) != 2 { 1162 return nil, fmt.Errorf("movabs of _GLOBAL_OFFSET_TABLE_ offset didn't have expected form") 1163 } 1164 1165 assertNodeType(arg.up, ruleSymbolName) 1166 symbol := d.contents(arg.up) 1167 if strings.HasPrefix(symbol, ".L") { 1168 symbol = d.mapLocalSymbol(symbol) 1169 } 1170 targetReg := d.contents(argNodes[1]) 1171 1172 var prefix string 1173 isGOTOFF := strings.HasSuffix(d.contents(arg), "@GOTOFF") 1174 if isGOTOFF { 1175 prefix = "gotoff" 1176 d.gotOffOffsetsNeeded[symbol] = struct{}{} 1177 } else { 1178 prefix = "got" 1179 d.gotOffsetsNeeded[symbol] = struct{}{} 1180 } 1181 changed = true 1182 1183 wrappers = append(wrappers, func(k func()) { 1184 // Even if one tries to use 32-bit GOT offsets, Clang's linker (at the time 1185 // of writing) emits 64-bit relocations anyway, so the following four bytes 1186 // get stomped. Thus we use 64-bit offsets. 1187 d.output.WriteString(fmt.Sprintf("\tmovq .Lboringssl_%s_%s(%%rip), %s\n", prefix, symbol, targetReg)) 1188 }) 1189 1190 default: 1191 panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule])) 1192 } 1193 } 1194 1195 if changed { 1196 d.writeCommentedNode(statement) 1197 replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n" 1198 wrappers.do(func() { 1199 d.output.WriteString(replacement) 1200 }) 1201 } else { 1202 d.writeNode(statement) 1203 } 1204 1205 return statement, nil 1206} 1207 1208func (d *delocation) handleBSS(statement *node32) (*node32, error) { 1209 lastStatement := statement 1210 for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next { 1211 node := skipWS(statement.up) 1212 if node == nil { 1213 d.writeNode(statement) 1214 continue 1215 } 1216 1217 switch node.pegRule { 1218 case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective: 1219 d.writeNode(statement) 1220 1221 case ruleDirective: 1222 directive := node.up 1223 assertNodeType(directive, ruleDirectiveName) 1224 directiveName := d.contents(directive) 1225 if directiveName == "text" || directiveName == "section" || directiveName == "data" { 1226 return lastStatement, nil 1227 } 1228 d.writeNode(statement) 1229 1230 case ruleLabel: 1231 label := node.up 1232 d.writeNode(statement) 1233 1234 if label.pegRule != ruleLocalSymbol { 1235 symbol := d.contents(label) 1236 localSymbol := localTargetName(symbol) 1237 d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol)) 1238 1239 d.bssAccessorsNeeded[symbol] = localSymbol 1240 } 1241 1242 case ruleLabelContainingDirective: 1243 var err error 1244 statement, err = d.processLabelContainingDirective(statement, node.up) 1245 if err != nil { 1246 return nil, err 1247 } 1248 1249 default: 1250 return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement)) 1251 } 1252 } 1253 1254 return lastStatement, nil 1255} 1256 1257func writeAarch64Function(w stringWriter, funcName string, writeContents func(stringWriter)) { 1258 w.WriteString(".p2align 2\n") 1259 w.WriteString(".hidden " + funcName + "\n") 1260 w.WriteString(".type " + funcName + ", @function\n") 1261 w.WriteString(funcName + ":\n") 1262 w.WriteString(".cfi_startproc\n") 1263 writeContents(w) 1264 w.WriteString(".cfi_endproc\n") 1265 w.WriteString(".size " + funcName + ", .-" + funcName + "\n") 1266} 1267 1268func transform(w stringWriter, inputs []inputFile) error { 1269 // symbols contains all defined symbols. 1270 symbols := make(map[string]struct{}) 1271 // fileNumbers is the set of IDs seen in .file directives. 1272 fileNumbers := make(map[int]struct{}) 1273 // maxObservedFileNumber contains the largest seen file number in a 1274 // .file directive. Zero is not a valid number. 1275 maxObservedFileNumber := 0 1276 // fileDirectivesContainMD5 is true if the compiler is outputting MD5 1277 // checksums in .file directives. If it does so, then this script needs 1278 // to match that behaviour otherwise warnings result. 1279 fileDirectivesContainMD5 := false 1280 1281 // OPENSSL_ia32cap_get will be synthesized by this script. 1282 symbols["OPENSSL_ia32cap_get"] = struct{}{} 1283 1284 for _, input := range inputs { 1285 forEachPath(input.ast.up, func(node *node32) { 1286 symbol := input.contents[node.begin:node.end] 1287 if _, ok := symbols[symbol]; ok { 1288 panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path)) 1289 } 1290 symbols[symbol] = struct{}{} 1291 }, ruleStatement, ruleLabel, ruleSymbolName) 1292 1293 forEachPath(input.ast.up, func(node *node32) { 1294 assertNodeType(node, ruleLocationDirective) 1295 directive := input.contents[node.begin:node.end] 1296 if !strings.HasPrefix(directive, ".file") { 1297 return 1298 } 1299 parts := strings.Fields(directive) 1300 if len(parts) == 2 { 1301 // This is a .file directive with just a 1302 // filename. Clang appears to generate just one 1303 // of these at the beginning of the output for 1304 // the compilation unit. Ignore it. 1305 return 1306 } 1307 fileNo, err := strconv.Atoi(parts[1]) 1308 if err != nil { 1309 panic(fmt.Sprintf("Failed to parse file number from .file: %q", directive)) 1310 } 1311 1312 if _, ok := fileNumbers[fileNo]; ok { 1313 panic(fmt.Sprintf("Duplicate file number %d observed", fileNo)) 1314 } 1315 fileNumbers[fileNo] = struct{}{} 1316 1317 if fileNo > maxObservedFileNumber { 1318 maxObservedFileNumber = fileNo 1319 } 1320 1321 for _, token := range parts[2:] { 1322 if token == "md5" { 1323 fileDirectivesContainMD5 = true 1324 } 1325 } 1326 }, ruleStatement, ruleLocationDirective) 1327 } 1328 1329 processor := x86_64 1330 if len(inputs) > 0 { 1331 processor = detectProcessor(inputs[0]) 1332 } 1333 1334 commentIndicator := "#" 1335 if processor == aarch64 { 1336 commentIndicator = "//" 1337 } 1338 1339 d := &delocation{ 1340 symbols: symbols, 1341 processor: processor, 1342 commentIndicator: commentIndicator, 1343 output: w, 1344 redirectors: make(map[string]string), 1345 bssAccessorsNeeded: make(map[string]string), 1346 gotExternalsNeeded: make(map[string]struct{}), 1347 gotOffsetsNeeded: make(map[string]struct{}), 1348 gotOffOffsetsNeeded: make(map[string]struct{}), 1349 } 1350 1351 w.WriteString(".text\n") 1352 var fileTrailing string 1353 if fileDirectivesContainMD5 { 1354 fileTrailing = " md5 0x00000000000000000000000000000000" 1355 } 1356 w.WriteString(fmt.Sprintf(".file %d \"inserted_by_delocate.c\"%s\n", maxObservedFileNumber+1, fileTrailing)) 1357 w.WriteString(fmt.Sprintf(".loc %d 1 0\n", maxObservedFileNumber+1)) 1358 w.WriteString("BORINGSSL_bcm_text_start:\n") 1359 1360 for _, input := range inputs { 1361 if err := d.processInput(input); err != nil { 1362 return err 1363 } 1364 } 1365 1366 w.WriteString(".text\n") 1367 w.WriteString(fmt.Sprintf(".loc %d 2 0\n", maxObservedFileNumber+1)) 1368 w.WriteString("BORINGSSL_bcm_text_end:\n") 1369 1370 // Emit redirector functions. Each is a single jump instruction. 1371 var redirectorNames []string 1372 for name := range d.redirectors { 1373 redirectorNames = append(redirectorNames, name) 1374 } 1375 sort.Strings(redirectorNames) 1376 1377 for _, name := range redirectorNames { 1378 redirector := d.redirectors[name] 1379 switch d.processor { 1380 case aarch64: 1381 writeAarch64Function(w, redirector, func(w stringWriter) { 1382 w.WriteString("\tb " + name + "\n") 1383 }) 1384 1385 case x86_64: 1386 w.WriteString(".type " + redirector + ", @function\n") 1387 w.WriteString(redirector + ":\n") 1388 w.WriteString("\tjmp\t" + name + "\n") 1389 } 1390 } 1391 1392 var accessorNames []string 1393 for accessor := range d.bssAccessorsNeeded { 1394 accessorNames = append(accessorNames, accessor) 1395 } 1396 sort.Strings(accessorNames) 1397 1398 // Emit BSS accessor functions. Each is a single LEA followed by RET. 1399 for _, name := range accessorNames { 1400 funcName := accessorName(name) 1401 target := d.bssAccessorsNeeded[name] 1402 1403 switch d.processor { 1404 case x86_64: 1405 w.WriteString(".type " + funcName + ", @function\n") 1406 w.WriteString(funcName + ":\n") 1407 w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n") 1408 1409 case aarch64: 1410 writeAarch64Function(w, funcName, func(w stringWriter) { 1411 w.WriteString("\tadrp x0, " + target + "\n") 1412 w.WriteString("\tadd x0, x0, :lo12:" + target + "\n") 1413 w.WriteString("\tret\n") 1414 }) 1415 } 1416 } 1417 1418 switch d.processor { 1419 case aarch64: 1420 externalNames := sortedSet(d.gotExternalsNeeded) 1421 for _, symbol := range externalNames { 1422 writeAarch64Function(w, gotHelperName(symbol), func(w stringWriter) { 1423 w.WriteString("\tadrp x0, :got:" + symbol + "\n") 1424 w.WriteString("\tldr x0, [x0, :got_lo12:" + symbol + "]\n") 1425 w.WriteString("\tret\n") 1426 }) 1427 } 1428 1429 writeAarch64Function(w, ".LOPENSSL_armcap_P_addr", func(w stringWriter) { 1430 w.WriteString("\tadrp x0, OPENSSL_armcap_P\n") 1431 w.WriteString("\tadd x0, x0, :lo12:OPENSSL_armcap_P\n") 1432 w.WriteString("\tret\n") 1433 }) 1434 1435 case x86_64: 1436 externalNames := sortedSet(d.gotExternalsNeeded) 1437 for _, name := range externalNames { 1438 parts := strings.SplitN(name, "@", 2) 1439 symbol, section := parts[0], parts[1] 1440 w.WriteString(".type " + symbol + "_" + section + "_external, @object\n") 1441 w.WriteString(".size " + symbol + "_" + section + "_external, 8\n") 1442 w.WriteString(symbol + "_" + section + "_external:\n") 1443 // Ideally this would be .quad foo@GOTPCREL, but clang's 1444 // assembler cannot emit a 64-bit GOTPCREL relocation. Instead, 1445 // we manually sign-extend the value, knowing that the GOT is 1446 // always at the end, thus foo@GOTPCREL has a positive value. 1447 w.WriteString("\t.long " + symbol + "@" + section + "\n") 1448 w.WriteString("\t.long 0\n") 1449 } 1450 1451 w.WriteString(".type OPENSSL_ia32cap_get, @function\n") 1452 w.WriteString(".globl OPENSSL_ia32cap_get\n") 1453 w.WriteString(localTargetName("OPENSSL_ia32cap_get") + ":\n") 1454 w.WriteString("OPENSSL_ia32cap_get:\n") 1455 w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n") 1456 w.WriteString("\tret\n") 1457 1458 w.WriteString(".extern OPENSSL_ia32cap_P\n") 1459 w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n") 1460 w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n") 1461 w.WriteString("OPENSSL_ia32cap_addr_delta:\n") 1462 w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n") 1463 1464 if d.gotDeltaNeeded { 1465 w.WriteString(".Lboringssl_got_delta:\n") 1466 w.WriteString("\t.quad _GLOBAL_OFFSET_TABLE_-.Lboringssl_got_delta\n") 1467 } 1468 1469 for _, name := range sortedSet(d.gotOffsetsNeeded) { 1470 w.WriteString(".Lboringssl_got_" + name + ":\n") 1471 w.WriteString("\t.quad " + name + "@GOT\n") 1472 } 1473 for _, name := range sortedSet(d.gotOffOffsetsNeeded) { 1474 w.WriteString(".Lboringssl_gotoff_" + name + ":\n") 1475 w.WriteString("\t.quad " + name + "@GOTOFF\n") 1476 } 1477 } 1478 1479 w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n") 1480 w.WriteString(".size BORINGSSL_bcm_text_hash, 32\n") 1481 w.WriteString("BORINGSSL_bcm_text_hash:\n") 1482 for _, b := range fipscommon.UninitHashValue { 1483 w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n") 1484 } 1485 1486 return nil 1487} 1488 1489// preprocess runs source through the C preprocessor. 1490func preprocess(cppCommand []string, path string) ([]byte, error) { 1491 var args []string 1492 args = append(args, cppCommand...) 1493 args = append(args, path) 1494 1495 cpp := exec.Command(args[0], args[1:]...) 1496 cpp.Stderr = os.Stderr 1497 var result bytes.Buffer 1498 cpp.Stdout = &result 1499 1500 if err := cpp.Run(); err != nil { 1501 return nil, err 1502 } 1503 1504 return result.Bytes(), nil 1505} 1506 1507func parseInputs(inputs []inputFile, cppCommand []string) error { 1508 for i, input := range inputs { 1509 var contents string 1510 1511 if input.isArchive { 1512 arFile, err := os.Open(input.path) 1513 if err != nil { 1514 return err 1515 } 1516 defer arFile.Close() 1517 1518 ar, err := ar.ParseAR(arFile) 1519 if err != nil { 1520 return err 1521 } 1522 1523 if len(ar) != 1 { 1524 return fmt.Errorf("expected one file in archive, but found %d", len(ar)) 1525 } 1526 1527 for _, c := range ar { 1528 contents = string(c) 1529 } 1530 } else { 1531 var inBytes []byte 1532 var err error 1533 1534 if len(cppCommand) > 0 { 1535 inBytes, err = preprocess(cppCommand, input.path) 1536 } else { 1537 inBytes, err = os.ReadFile(input.path) 1538 } 1539 if err != nil { 1540 return err 1541 } 1542 1543 contents = string(inBytes) 1544 } 1545 1546 asm := Asm{Buffer: contents, Pretty: true} 1547 asm.Init() 1548 if err := asm.Parse(); err != nil { 1549 return fmt.Errorf("error while parsing %q: %s", input.path, err) 1550 } 1551 ast := asm.AST() 1552 1553 inputs[i].contents = contents 1554 inputs[i].ast = ast 1555 } 1556 1557 return nil 1558} 1559 1560// includePathFromHeaderFilePath returns an include directory path based on the 1561// path of a specific header file. It walks up the path and assumes that the 1562// include files are rooted in a directory called "openssl". 1563func includePathFromHeaderFilePath(path string) (string, error) { 1564 dir := path 1565 for { 1566 var file string 1567 dir, file = filepath.Split(dir) 1568 1569 if file == "openssl" { 1570 return dir, nil 1571 } 1572 1573 if len(dir) == 0 { 1574 break 1575 } 1576 dir = dir[:len(dir)-1] 1577 } 1578 1579 return "", fmt.Errorf("failed to find 'openssl' path element in header file path %q", path) 1580} 1581 1582func main() { 1583 // The .a file, if given, is expected to be an archive of textual 1584 // assembly sources. That's odd, but CMake really wants to create 1585 // archive files so it's the only way that we can make it work. 1586 arInput := flag.String("a", "", "Path to a .a file containing assembly sources") 1587 outFile := flag.String("o", "", "Path to output assembly") 1588 ccPath := flag.String("cc", "", "Path to the C compiler for preprocessing inputs") 1589 ccFlags := flag.String("cc-flags", "", "Flags for the C compiler when preprocessing") 1590 1591 flag.Parse() 1592 1593 if len(*outFile) == 0 { 1594 fmt.Fprintf(os.Stderr, "Must give argument to -o.\n") 1595 os.Exit(1) 1596 } 1597 1598 var inputs []inputFile 1599 if len(*arInput) > 0 { 1600 inputs = append(inputs, inputFile{ 1601 path: *arInput, 1602 index: 0, 1603 isArchive: true, 1604 }) 1605 } 1606 1607 includePaths := make(map[string]struct{}) 1608 1609 for i, path := range flag.Args() { 1610 if len(path) == 0 { 1611 continue 1612 } 1613 1614 // Header files are not processed but their path is remembered 1615 // and passed as -I arguments when invoking the preprocessor. 1616 if strings.HasSuffix(path, ".h") { 1617 dir, err := includePathFromHeaderFilePath(path) 1618 if err != nil { 1619 fmt.Fprintf(os.Stderr, "%s\n", err) 1620 os.Exit(1) 1621 } 1622 includePaths[dir] = struct{}{} 1623 continue 1624 } 1625 1626 inputs = append(inputs, inputFile{ 1627 path: path, 1628 index: i + 1, 1629 }) 1630 } 1631 1632 var cppCommand []string 1633 if len(*ccPath) > 0 { 1634 cppCommand = append(cppCommand, *ccPath) 1635 cppCommand = append(cppCommand, strings.Fields(*ccFlags)...) 1636 // Some of ccFlags might be superfluous when running the 1637 // preprocessor, but we don't want the compiler complaining that 1638 // "argument unused during compilation". 1639 cppCommand = append(cppCommand, "-Wno-unused-command-line-argument") 1640 // We are preprocessing for assembly output and need to simulate that 1641 // environment for arm_arch.h. 1642 cppCommand = append(cppCommand, "-D__ASSEMBLER__=1") 1643 1644 for includePath := range includePaths { 1645 cppCommand = append(cppCommand, "-I"+includePath) 1646 } 1647 1648 // -E requests only preprocessing. 1649 cppCommand = append(cppCommand, "-E") 1650 } 1651 1652 if err := parseInputs(inputs, cppCommand); err != nil { 1653 fmt.Fprintf(os.Stderr, "%s\n", err) 1654 os.Exit(1) 1655 } 1656 1657 out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) 1658 if err != nil { 1659 panic(err) 1660 } 1661 defer out.Close() 1662 1663 if err := transform(out, inputs); err != nil { 1664 fmt.Fprintf(os.Stderr, "%s\n", err) 1665 os.Exit(1) 1666 } 1667} 1668 1669func forEachPath(node *node32, cb func(*node32), rules ...pegRule) { 1670 if node == nil { 1671 return 1672 } 1673 1674 if len(rules) == 0 { 1675 cb(node) 1676 return 1677 } 1678 1679 rule := rules[0] 1680 childRules := rules[1:] 1681 1682 for ; node != nil; node = node.next { 1683 if node.pegRule != rule { 1684 continue 1685 } 1686 1687 if len(childRules) == 0 { 1688 cb(node) 1689 } else { 1690 forEachPath(node.up, cb, childRules...) 1691 } 1692 } 1693} 1694 1695func skipNodes(node *node32, ruleToSkip pegRule) *node32 { 1696 for ; node != nil && node.pegRule == ruleToSkip; node = node.next { 1697 } 1698 return node 1699} 1700 1701func skipWS(node *node32) *node32 { 1702 return skipNodes(node, ruleWS) 1703} 1704 1705func assertNodeType(node *node32, expected pegRule) { 1706 if rule := node.pegRule; rule != expected { 1707 panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected])) 1708 } 1709} 1710 1711type wrapperFunc func(func()) 1712 1713type wrapperStack []wrapperFunc 1714 1715func (w *wrapperStack) do(baseCase func()) { 1716 if len(*w) == 0 { 1717 baseCase() 1718 return 1719 } 1720 1721 wrapper := (*w)[0] 1722 *w = (*w)[1:] 1723 wrapper(func() { w.do(baseCase) }) 1724} 1725 1726// localTargetName returns the name of the local target label for a global 1727// symbol named name. 1728func localTargetName(name string) string { 1729 return ".L" + name + "_local_target" 1730} 1731 1732func isSynthesized(symbol string) bool { 1733 return strings.HasSuffix(symbol, "_bss_get") || 1734 symbol == "OPENSSL_ia32cap_get" || 1735 strings.HasPrefix(symbol, "BORINGSSL_bcm_text_") 1736} 1737 1738func redirectorName(symbol string) string { 1739 return "bcm_redirector_" + symbol 1740} 1741 1742// sectionType returns the type of a section. I.e. a section called “.text.foo” 1743// is a “.text” section. 1744func sectionType(section string) (string, bool) { 1745 if len(section) == 0 || section[0] != '.' { 1746 return "", false 1747 } 1748 1749 i := strings.Index(section[1:], ".") 1750 if i != -1 { 1751 section = section[:i+1] 1752 } 1753 1754 if strings.HasPrefix(section, ".debug_") { 1755 return ".debug", true 1756 } 1757 1758 return section, true 1759} 1760 1761// accessorName returns the name of the accessor function for a BSS symbol 1762// named name. 1763func accessorName(name string) string { 1764 return name + "_bss_get" 1765} 1766 1767func (d *delocation) mapLocalSymbol(symbol string) string { 1768 if d.currentInput.index == 0 { 1769 return symbol 1770 } 1771 return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index) 1772} 1773 1774func detectProcessor(input inputFile) processorType { 1775 for statement := input.ast.up; statement != nil; statement = statement.next { 1776 node := skipNodes(statement.up, ruleWS) 1777 if node == nil || node.pegRule != ruleInstruction { 1778 continue 1779 } 1780 1781 instruction := node.up 1782 instructionName := input.contents[instruction.begin:instruction.end] 1783 1784 switch instructionName { 1785 case "movq", "call", "leaq": 1786 return x86_64 1787 case "str", "bl", "ldr", "st1": 1788 return aarch64 1789 } 1790 } 1791 1792 panic("processed entire input and didn't recognise any instructions.") 1793} 1794 1795func sortedSet(m map[string]struct{}) []string { 1796 ret := make([]string, 0, len(m)) 1797 for key := range m { 1798 ret = append(ret, key) 1799 } 1800 sort.Strings(ret) 1801 return ret 1802} 1803