1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that X86 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 17 #include "llvm/CodeGen/TargetLowering.h" 18 19 namespace llvm { 20 class X86Subtarget; 21 class X86TargetMachine; 22 23 namespace X86ISD { 24 // X86 Specific DAG Nodes 25 enum NodeType : unsigned { 26 // Start the numbering where the builtin ops leave off. 27 FIRST_NUMBER = ISD::BUILTIN_OP_END, 28 29 /// Bit scan forward. 30 BSF, 31 /// Bit scan reverse. 32 BSR, 33 34 /// X86 funnel/double shift i16 instructions. These correspond to 35 /// X86::SHLDW and X86::SHRDW instructions which have different amt 36 /// modulo rules to generic funnel shifts. 37 /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. 38 FSHL, 39 FSHR, 40 41 /// Bitwise logical AND of floating point values. This corresponds 42 /// to X86::ANDPS or X86::ANDPD. 43 FAND, 44 45 /// Bitwise logical OR of floating point values. This corresponds 46 /// to X86::ORPS or X86::ORPD. 47 FOR, 48 49 /// Bitwise logical XOR of floating point values. This corresponds 50 /// to X86::XORPS or X86::XORPD. 51 FXOR, 52 53 /// Bitwise logical ANDNOT of floating point values. This 54 /// corresponds to X86::ANDNPS or X86::ANDNPD. 55 FANDN, 56 57 /// These operations represent an abstract X86 call 58 /// instruction, which includes a bunch of information. In particular the 59 /// operands of these node are: 60 /// 61 /// #0 - The incoming token chain 62 /// #1 - The callee 63 /// #2 - The number of arg bytes the caller pushes on the stack. 64 /// #3 - The number of arg bytes the callee pops off the stack. 65 /// #4 - The value to pass in AL/AX/EAX (optional) 66 /// #5 - The value to pass in DL/DX/EDX (optional) 67 /// 68 /// The result values of these nodes are: 69 /// 70 /// #0 - The outgoing token chain 71 /// #1 - The first register result value (optional) 72 /// #2 - The second register result value (optional) 73 /// 74 CALL, 75 76 /// Same as call except it adds the NoTrack prefix. 77 NT_CALL, 78 79 /// X86 compare and logical compare instructions. 80 CMP, 81 FCMP, 82 COMI, 83 UCOMI, 84 85 /// X86 bit-test instructions. 86 BT, 87 88 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 89 /// operand, usually produced by a CMP instruction. 90 SETCC, 91 92 /// X86 Select 93 SELECTS, 94 95 // Same as SETCC except it's materialized with a sbb and the value is all 96 // one's or all zero's. 97 SETCC_CARRY, // R = carry_bit ? ~0 : 0 98 99 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 100 /// Operands are two FP values to compare; result is a mask of 101 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 102 FSETCC, 103 104 /// X86 FP SETCC, similar to above, but with output as an i1 mask and 105 /// and a version with SAE. 106 FSETCCM, 107 FSETCCM_SAE, 108 109 /// X86 conditional moves. Operand 0 and operand 1 are the two values 110 /// to select from. Operand 2 is the condition code, and operand 3 is the 111 /// flag operand produced by a CMP or TEST instruction. 112 CMOV, 113 114 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 115 /// is the block to branch if condition is true, operand 2 is the 116 /// condition code, and operand 3 is the flag operand produced by a CMP 117 /// or TEST instruction. 118 BRCOND, 119 120 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and 121 /// operand 1 is the target address. 122 NT_BRIND, 123 124 /// Return with a flag operand. Operand 0 is the chain operand, operand 125 /// 1 is the number of bytes of stack to pop. 126 RET_FLAG, 127 128 /// Return from interrupt. Operand 0 is the number of bytes to pop. 129 IRET, 130 131 /// Repeat fill, corresponds to X86::REP_STOSx. 132 REP_STOS, 133 134 /// Repeat move, corresponds to X86::REP_MOVSx. 135 REP_MOVS, 136 137 /// On Darwin, this node represents the result of the popl 138 /// at function entry, used for PIC code. 139 GlobalBaseReg, 140 141 /// A wrapper node for TargetConstantPool, TargetJumpTable, 142 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, 143 /// MCSymbol and TargetBlockAddress. 144 Wrapper, 145 146 /// Special wrapper used under X86-64 PIC mode for RIP 147 /// relative displacements. 148 WrapperRIP, 149 150 /// Copies a 64-bit value from an MMX vector to the low word 151 /// of an XMM vector, with the high word zero filled. 152 MOVQ2DQ, 153 154 /// Copies a 64-bit value from the low word of an XMM vector 155 /// to an MMX vector. 156 MOVDQ2Q, 157 158 /// Copies a 32-bit value from the low word of a MMX 159 /// vector to a GPR. 160 MMX_MOVD2W, 161 162 /// Copies a GPR into the low 32-bit word of a MMX vector 163 /// and zero out the high word. 164 MMX_MOVW2D, 165 166 /// Extract an 8-bit value from a vector and zero extend it to 167 /// i32, corresponds to X86::PEXTRB. 168 PEXTRB, 169 170 /// Extract a 16-bit value from a vector and zero extend it to 171 /// i32, corresponds to X86::PEXTRW. 172 PEXTRW, 173 174 /// Insert any element of a 4 x float vector into any element 175 /// of a destination 4 x floatvector. 176 INSERTPS, 177 178 /// Insert the lower 8-bits of a 32-bit value to a vector, 179 /// corresponds to X86::PINSRB. 180 PINSRB, 181 182 /// Insert the lower 16-bits of a 32-bit value to a vector, 183 /// corresponds to X86::PINSRW. 184 PINSRW, 185 186 /// Shuffle 16 8-bit values within a vector. 187 PSHUFB, 188 189 /// Compute Sum of Absolute Differences. 190 PSADBW, 191 /// Compute Double Block Packed Sum-Absolute-Differences 192 DBPSADBW, 193 194 /// Bitwise Logical AND NOT of Packed FP values. 195 ANDNP, 196 197 /// Blend where the selector is an immediate. 198 BLENDI, 199 200 /// Dynamic (non-constant condition) vector blend where only the sign bits 201 /// of the condition elements are used. This is used to enforce that the 202 /// condition mask is not valid for generic VSELECT optimizations. This 203 /// is also used to implement the intrinsics. 204 /// Operands are in VSELECT order: MASK, TRUE, FALSE 205 BLENDV, 206 207 /// Combined add and sub on an FP vector. 208 ADDSUB, 209 210 // FP vector ops with rounding mode. 211 FADD_RND, 212 FADDS, 213 FADDS_RND, 214 FSUB_RND, 215 FSUBS, 216 FSUBS_RND, 217 FMUL_RND, 218 FMULS, 219 FMULS_RND, 220 FDIV_RND, 221 FDIVS, 222 FDIVS_RND, 223 FMAX_SAE, 224 FMAXS_SAE, 225 FMIN_SAE, 226 FMINS_SAE, 227 FSQRT_RND, 228 FSQRTS, 229 FSQRTS_RND, 230 231 // FP vector get exponent. 232 FGETEXP, 233 FGETEXP_SAE, 234 FGETEXPS, 235 FGETEXPS_SAE, 236 // Extract Normalized Mantissas. 237 VGETMANT, 238 VGETMANT_SAE, 239 VGETMANTS, 240 VGETMANTS_SAE, 241 // FP Scale. 242 SCALEF, 243 SCALEF_RND, 244 SCALEFS, 245 SCALEFS_RND, 246 247 // Unsigned Integer average. 248 AVG, 249 250 /// Integer horizontal add/sub. 251 HADD, 252 HSUB, 253 254 /// Floating point horizontal add/sub. 255 FHADD, 256 FHSUB, 257 258 // Detect Conflicts Within a Vector 259 CONFLICT, 260 261 /// Floating point max and min. 262 FMAX, 263 FMIN, 264 265 /// Commutative FMIN and FMAX. 266 FMAXC, 267 FMINC, 268 269 /// Scalar intrinsic floating point max and min. 270 FMAXS, 271 FMINS, 272 273 /// Floating point reciprocal-sqrt and reciprocal approximation. 274 /// Note that these typically require refinement 275 /// in order to obtain suitable precision. 276 FRSQRT, 277 FRCP, 278 279 // AVX-512 reciprocal approximations with a little more precision. 280 RSQRT14, 281 RSQRT14S, 282 RCP14, 283 RCP14S, 284 285 // Thread Local Storage. 286 TLSADDR, 287 288 // Thread Local Storage. A call to get the start address 289 // of the TLS block for the current module. 290 TLSBASEADDR, 291 292 // Thread Local Storage. When calling to an OS provided 293 // thunk at the address from an earlier relocation. 294 TLSCALL, 295 296 // Exception Handling helpers. 297 EH_RETURN, 298 299 // SjLj exception handling setjmp. 300 EH_SJLJ_SETJMP, 301 302 // SjLj exception handling longjmp. 303 EH_SJLJ_LONGJMP, 304 305 // SjLj exception handling dispatch. 306 EH_SJLJ_SETUP_DISPATCH, 307 308 /// Tail call return. See X86TargetLowering::LowerCall for 309 /// the list of operands. 310 TC_RETURN, 311 312 // Vector move to low scalar and zero higher vector elements. 313 VZEXT_MOVL, 314 315 // Vector integer truncate. 316 VTRUNC, 317 // Vector integer truncate with unsigned/signed saturation. 318 VTRUNCUS, 319 VTRUNCS, 320 321 // Masked version of the above. Used when less than a 128-bit result is 322 // produced since the mask only applies to the lower elements and can't 323 // be represented by a select. 324 // SRC, PASSTHRU, MASK 325 VMTRUNC, 326 VMTRUNCUS, 327 VMTRUNCS, 328 329 // Vector FP extend. 330 VFPEXT, 331 VFPEXT_SAE, 332 VFPEXTS, 333 VFPEXTS_SAE, 334 335 // Vector FP round. 336 VFPROUND, 337 VFPROUND_RND, 338 VFPROUNDS, 339 VFPROUNDS_RND, 340 341 // Masked version of above. Used for v2f64->v4f32. 342 // SRC, PASSTHRU, MASK 343 VMFPROUND, 344 345 // 128-bit vector logical left / right shift 346 VSHLDQ, 347 VSRLDQ, 348 349 // Vector shift elements 350 VSHL, 351 VSRL, 352 VSRA, 353 354 // Vector variable shift 355 VSHLV, 356 VSRLV, 357 VSRAV, 358 359 // Vector shift elements by immediate 360 VSHLI, 361 VSRLI, 362 VSRAI, 363 364 // Shifts of mask registers. 365 KSHIFTL, 366 KSHIFTR, 367 368 // Bit rotate by immediate 369 VROTLI, 370 VROTRI, 371 372 // Vector packed double/float comparison. 373 CMPP, 374 375 // Vector integer comparisons. 376 PCMPEQ, 377 PCMPGT, 378 379 // v8i16 Horizontal minimum and position. 380 PHMINPOS, 381 382 MULTISHIFT, 383 384 /// Vector comparison generating mask bits for fp and 385 /// integer signed and unsigned data types. 386 CMPM, 387 // Vector mask comparison generating mask bits for FP values. 388 CMPMM, 389 // Vector mask comparison with SAE for FP values. 390 CMPMM_SAE, 391 392 // Arithmetic operations with FLAGS results. 393 ADD, 394 SUB, 395 ADC, 396 SBB, 397 SMUL, 398 UMUL, 399 OR, 400 XOR, 401 AND, 402 403 // Bit field extract. 404 BEXTR, 405 BEXTRI, 406 407 // Zero High Bits Starting with Specified Bit Position. 408 BZHI, 409 410 // Parallel extract and deposit. 411 PDEP, 412 PEXT, 413 414 // X86-specific multiply by immediate. 415 MUL_IMM, 416 417 // Vector sign bit extraction. 418 MOVMSK, 419 420 // Vector bitwise comparisons. 421 PTEST, 422 423 // Vector packed fp sign bitwise comparisons. 424 TESTP, 425 426 // OR/AND test for masks. 427 KORTEST, 428 KTEST, 429 430 // ADD for masks. 431 KADD, 432 433 // Several flavors of instructions with vector shuffle behaviors. 434 // Saturated signed/unnsigned packing. 435 PACKSS, 436 PACKUS, 437 // Intra-lane alignr. 438 PALIGNR, 439 // AVX512 inter-lane alignr. 440 VALIGN, 441 PSHUFD, 442 PSHUFHW, 443 PSHUFLW, 444 SHUFP, 445 // VBMI2 Concat & Shift. 446 VSHLD, 447 VSHRD, 448 VSHLDV, 449 VSHRDV, 450 // Shuffle Packed Values at 128-bit granularity. 451 SHUF128, 452 MOVDDUP, 453 MOVSHDUP, 454 MOVSLDUP, 455 MOVLHPS, 456 MOVHLPS, 457 MOVSD, 458 MOVSS, 459 UNPCKL, 460 UNPCKH, 461 VPERMILPV, 462 VPERMILPI, 463 VPERMI, 464 VPERM2X128, 465 466 // Variable Permute (VPERM). 467 // Res = VPERMV MaskV, V0 468 VPERMV, 469 470 // 3-op Variable Permute (VPERMT2). 471 // Res = VPERMV3 V0, MaskV, V1 472 VPERMV3, 473 474 // Bitwise ternary logic. 475 VPTERNLOG, 476 // Fix Up Special Packed Float32/64 values. 477 VFIXUPIMM, 478 VFIXUPIMM_SAE, 479 VFIXUPIMMS, 480 VFIXUPIMMS_SAE, 481 // Range Restriction Calculation For Packed Pairs of Float32/64 values. 482 VRANGE, 483 VRANGE_SAE, 484 VRANGES, 485 VRANGES_SAE, 486 // Reduce - Perform Reduction Transformation on scalar\packed FP. 487 VREDUCE, 488 VREDUCE_SAE, 489 VREDUCES, 490 VREDUCES_SAE, 491 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 492 // Also used by the legacy (V)ROUND intrinsics where we mask out the 493 // scaling part of the immediate. 494 VRNDSCALE, 495 VRNDSCALE_SAE, 496 VRNDSCALES, 497 VRNDSCALES_SAE, 498 // Tests Types Of a FP Values for packed types. 499 VFPCLASS, 500 // Tests Types Of a FP Values for scalar types. 501 VFPCLASSS, 502 503 // Broadcast (splat) scalar or element 0 of a vector. If the operand is 504 // a vector, this node may change the vector length as part of the splat. 505 VBROADCAST, 506 // Broadcast mask to vector. 507 VBROADCASTM, 508 // Broadcast subvector to vector. 509 SUBV_BROADCAST, 510 511 /// SSE4A Extraction and Insertion. 512 EXTRQI, 513 INSERTQI, 514 515 // XOP arithmetic/logical shifts. 516 VPSHA, 517 VPSHL, 518 // XOP signed/unsigned integer comparisons. 519 VPCOM, 520 VPCOMU, 521 // XOP packed permute bytes. 522 VPPERM, 523 // XOP two source permutation. 524 VPERMIL2, 525 526 // Vector multiply packed unsigned doubleword integers. 527 PMULUDQ, 528 // Vector multiply packed signed doubleword integers. 529 PMULDQ, 530 // Vector Multiply Packed UnsignedIntegers with Round and Scale. 531 MULHRS, 532 533 // Multiply and Add Packed Integers. 534 VPMADDUBSW, 535 VPMADDWD, 536 537 // AVX512IFMA multiply and add. 538 // NOTE: These are different than the instruction and perform 539 // op0 x op1 + op2. 540 VPMADD52L, 541 VPMADD52H, 542 543 // VNNI 544 VPDPBUSD, 545 VPDPBUSDS, 546 VPDPWSSD, 547 VPDPWSSDS, 548 549 // FMA nodes. 550 // We use the target independent ISD::FMA for the non-inverted case. 551 FNMADD, 552 FMSUB, 553 FNMSUB, 554 FMADDSUB, 555 FMSUBADD, 556 557 // FMA with rounding mode. 558 FMADD_RND, 559 FNMADD_RND, 560 FMSUB_RND, 561 FNMSUB_RND, 562 FMADDSUB_RND, 563 FMSUBADD_RND, 564 565 // Compress and expand. 566 COMPRESS, 567 EXPAND, 568 569 // Bits shuffle 570 VPSHUFBITQMB, 571 572 // Convert Unsigned/Integer to Floating-Point Value with rounding mode. 573 SINT_TO_FP_RND, 574 UINT_TO_FP_RND, 575 SCALAR_SINT_TO_FP, 576 SCALAR_UINT_TO_FP, 577 SCALAR_SINT_TO_FP_RND, 578 SCALAR_UINT_TO_FP_RND, 579 580 // Vector float/double to signed/unsigned integer. 581 CVTP2SI, 582 CVTP2UI, 583 CVTP2SI_RND, 584 CVTP2UI_RND, 585 // Scalar float/double to signed/unsigned integer. 586 CVTS2SI, 587 CVTS2UI, 588 CVTS2SI_RND, 589 CVTS2UI_RND, 590 591 // Vector float/double to signed/unsigned integer with truncation. 592 CVTTP2SI, 593 CVTTP2UI, 594 CVTTP2SI_SAE, 595 CVTTP2UI_SAE, 596 // Scalar float/double to signed/unsigned integer with truncation. 597 CVTTS2SI, 598 CVTTS2UI, 599 CVTTS2SI_SAE, 600 CVTTS2UI_SAE, 601 602 // Vector signed/unsigned integer to float/double. 603 CVTSI2P, 604 CVTUI2P, 605 606 // Masked versions of above. Used for v2f64->v4f32. 607 // SRC, PASSTHRU, MASK 608 MCVTP2SI, 609 MCVTP2UI, 610 MCVTTP2SI, 611 MCVTTP2UI, 612 MCVTSI2P, 613 MCVTUI2P, 614 615 // Vector float to bfloat16. 616 // Convert TWO packed single data to one packed BF16 data 617 CVTNE2PS2BF16, 618 // Convert packed single data to packed BF16 data 619 CVTNEPS2BF16, 620 // Masked version of above. 621 // SRC, PASSTHRU, MASK 622 MCVTNEPS2BF16, 623 624 // Dot product of BF16 pairs to accumulated into 625 // packed single precision. 626 DPBF16PS, 627 628 // Save xmm argument registers to the stack, according to %al. An operator 629 // is needed so that this can be expanded with control flow. 630 VASTART_SAVE_XMM_REGS, 631 632 // Windows's _chkstk call to do stack probing. 633 WIN_ALLOCA, 634 635 // For allocating variable amounts of stack space when using 636 // segmented stacks. Check if the current stacklet has enough space, and 637 // falls back to heap allocation if not. 638 SEG_ALLOCA, 639 640 // For allocating stack space when using stack clash protector. 641 // Allocation is performed by block, and each block is probed. 642 PROBED_ALLOCA, 643 644 // Memory barriers. 645 MEMBARRIER, 646 MFENCE, 647 648 // Get a random integer and indicate whether it is valid in CF. 649 RDRAND, 650 651 // Get a NIST SP800-90B & C compliant random integer and 652 // indicate whether it is valid in CF. 653 RDSEED, 654 655 // Protection keys 656 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. 657 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is 658 // value for ECX. 659 RDPKRU, 660 WRPKRU, 661 662 // SSE42 string comparisons. 663 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG 664 // will emit one or two instructions based on which results are used. If 665 // flags and index/mask this allows us to use a single instruction since 666 // we won't have to pick and opcode for flags. Instead we can rely on the 667 // DAG to CSE everything and decide at isel. 668 PCMPISTR, 669 PCMPESTR, 670 671 // Test if in transactional execution. 672 XTEST, 673 674 // ERI instructions. 675 RSQRT28, 676 RSQRT28_SAE, 677 RSQRT28S, 678 RSQRT28S_SAE, 679 RCP28, 680 RCP28_SAE, 681 RCP28S, 682 RCP28S_SAE, 683 EXP2, 684 EXP2_SAE, 685 686 // Conversions between float and half-float. 687 CVTPS2PH, 688 CVTPH2PS, 689 CVTPH2PS_SAE, 690 691 // Masked version of above. 692 // SRC, RND, PASSTHRU, MASK 693 MCVTPS2PH, 694 695 // Galois Field Arithmetic Instructions 696 GF2P8AFFINEINVQB, 697 GF2P8AFFINEQB, 698 GF2P8MULB, 699 700 // LWP insert record. 701 LWPINS, 702 703 // User level wait 704 UMWAIT, 705 TPAUSE, 706 707 // Enqueue Stores Instructions 708 ENQCMD, 709 ENQCMDS, 710 711 // For avx512-vp2intersect 712 VP2INTERSECT, 713 714 // User level interrupts - testui 715 TESTUI, 716 717 /// X86 strict FP compare instructions. 718 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 719 STRICT_FCMPS, 720 721 // Vector packed double/float comparison. 722 STRICT_CMPP, 723 724 /// Vector comparison generating mask bits for fp and 725 /// integer signed and unsigned data types. 726 STRICT_CMPM, 727 728 // Vector float/double to signed/unsigned integer with truncation. 729 STRICT_CVTTP2SI, 730 STRICT_CVTTP2UI, 731 732 // Vector FP extend. 733 STRICT_VFPEXT, 734 735 // Vector FP round. 736 STRICT_VFPROUND, 737 738 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 739 // Also used by the legacy (V)ROUND intrinsics where we mask out the 740 // scaling part of the immediate. 741 STRICT_VRNDSCALE, 742 743 // Vector signed/unsigned integer to float/double. 744 STRICT_CVTSI2P, 745 STRICT_CVTUI2P, 746 747 // Strict FMA nodes. 748 STRICT_FNMADD, 749 STRICT_FMSUB, 750 STRICT_FNMSUB, 751 752 // Conversions between float and half-float. 753 STRICT_CVTPS2PH, 754 STRICT_CVTPH2PS, 755 756 // WARNING: Only add nodes here if they are stric FP nodes. Non-memory and 757 // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE. 758 759 // Compare and swap. 760 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 761 LCMPXCHG8_DAG, 762 LCMPXCHG16_DAG, 763 LCMPXCHG16_SAVE_RBX_DAG, 764 765 /// LOCK-prefixed arithmetic read-modify-write instructions. 766 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) 767 LADD, 768 LSUB, 769 LOR, 770 LXOR, 771 LAND, 772 773 // Load, scalar_to_vector, and zero extend. 774 VZEXT_LOAD, 775 776 // extract_vector_elt, store. 777 VEXTRACT_STORE, 778 779 // scalar broadcast from memory 780 VBROADCAST_LOAD, 781 782 // Store FP control world into i16 memory. 783 FNSTCW16m, 784 785 /// This instruction implements FP_TO_SINT with the 786 /// integer destination in memory and a FP reg source. This corresponds 787 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 788 /// has two inputs (token chain and address) and two outputs (int value 789 /// and token chain). Memory VT specifies the type to store to. 790 FP_TO_INT_IN_MEM, 791 792 /// This instruction implements SINT_TO_FP with the 793 /// integer source in memory and FP reg result. This corresponds to the 794 /// X86::FILD*m instructions. It has two inputs (token chain and address) 795 /// and two outputs (FP value and token chain). The integer source type is 796 /// specified by the memory VT. 797 FILD, 798 799 /// This instruction implements a fp->int store from FP stack 800 /// slots. This corresponds to the fist instruction. It takes a 801 /// chain operand, value to store, address, and glue. The memory VT 802 /// specifies the type to store as. 803 FIST, 804 805 /// This instruction implements an extending load to FP stack slots. 806 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 807 /// operand, and ptr to load from. The memory VT specifies the type to 808 /// load from. 809 FLD, 810 811 /// This instruction implements a truncating store from FP stack 812 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 813 /// chain operand, value to store, address, and glue. The memory VT 814 /// specifies the type to store as. 815 FST, 816 817 /// This instruction grabs the address of the next argument 818 /// from a va_list. (reads and modifies the va_list in memory) 819 VAARG_64, 820 821 // Vector truncating store with unsigned/signed saturation 822 VTRUNCSTOREUS, 823 VTRUNCSTORES, 824 // Vector truncating masked store with unsigned/signed saturation 825 VMTRUNCSTOREUS, 826 VMTRUNCSTORES, 827 828 // X86 specific gather and scatter 829 MGATHER, 830 MSCATTER, 831 832 // Key locker nodes that produce flags. 833 AESENC128KL, 834 AESDEC128KL, 835 AESENC256KL, 836 AESDEC256KL, 837 AESENCWIDE128KL, 838 AESDECWIDE128KL, 839 AESENCWIDE256KL, 840 AESDECWIDE256KL, 841 842 // WARNING: Do not add anything in the end unless you want the node to 843 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all 844 // opcodes will be thought as target memory ops! 845 }; 846 } // end namespace X86ISD 847 848 /// Define some predicates that are used for node matching. 849 namespace X86 { 850 /// Returns true if Elt is a constant zero or floating point constant +0.0. 851 bool isZeroNode(SDValue Elt); 852 853 /// Returns true of the given offset can be 854 /// fit into displacement field of the instruction. 855 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 856 bool hasSymbolicDisplacement = true); 857 858 /// Determines whether the callee is required to pop its 859 /// own arguments. Callee pop is necessary to support tail calls. 860 bool isCalleePop(CallingConv::ID CallingConv, 861 bool is64Bit, bool IsVarArg, bool GuaranteeTCO); 862 863 /// If Op is a constant whose elements are all the same constant or 864 /// undefined, return true and return the constant value in \p SplatVal. 865 /// If we have undef bits that don't cover an entire element, we treat these 866 /// as zero if AllowPartialUndefs is set, else we fail and return false. 867 bool isConstantSplat(SDValue Op, APInt &SplatVal, 868 bool AllowPartialUndefs = true); 869 } // end namespace X86 870 871 //===--------------------------------------------------------------------===// 872 // X86 Implementation of the TargetLowering interface 873 class X86TargetLowering final : public TargetLowering { 874 public: 875 explicit X86TargetLowering(const X86TargetMachine &TM, 876 const X86Subtarget &STI); 877 878 unsigned getJumpTableEncoding() const override; 879 bool useSoftFloat() const override; 880 881 void markLibCallAttributes(MachineFunction *MF, unsigned CC, 882 ArgListTy &Args) const override; 883 getScalarShiftAmountTy(const DataLayout &,EVT VT)884 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { 885 return MVT::i8; 886 } 887 888 const MCExpr * 889 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 890 const MachineBasicBlock *MBB, unsigned uid, 891 MCContext &Ctx) const override; 892 893 /// Returns relocation base for the given PIC jumptable. 894 SDValue getPICJumpTableRelocBase(SDValue Table, 895 SelectionDAG &DAG) const override; 896 const MCExpr * 897 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 898 unsigned JTI, MCContext &Ctx) const override; 899 900 /// Return the desired alignment for ByVal aggregate 901 /// function arguments in the caller parameter area. For X86, aggregates 902 /// that contains are placed at 16-byte boundaries while the rest are at 903 /// 4-byte boundaries. 904 unsigned getByValTypeAlignment(Type *Ty, 905 const DataLayout &DL) const override; 906 907 EVT getOptimalMemOpType(const MemOp &Op, 908 const AttributeList &FuncAttributes) const override; 909 910 /// Returns true if it's safe to use load / store of the 911 /// specified type to expand memcpy / memset inline. This is mostly true 912 /// for all types except for some special cases. For example, on X86 913 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 914 /// also does type conversion. Note the specified type doesn't have to be 915 /// legal as the hook is used before type legalization. 916 bool isSafeMemOpType(MVT VT) const override; 917 918 /// Returns true if the target allows unaligned memory accesses of the 919 /// specified type. Returns whether it is "fast" in the last argument. 920 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, 921 MachineMemOperand::Flags Flags, 922 bool *Fast) const override; 923 924 /// Provide custom lowering hooks for some operations. 925 /// 926 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 927 928 /// Replace the results of node with an illegal result 929 /// type with new values built out of custom code. 930 /// 931 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 932 SelectionDAG &DAG) const override; 933 934 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 935 936 /// Return true if the target has native support for 937 /// the specified value type and it is 'desirable' to use the type for the 938 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 939 /// instruction encodings are longer and some i16 instructions are slow. 940 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 941 942 /// Return true if the target has native support for the 943 /// specified value type and it is 'desirable' to use the type. e.g. On x86 944 /// i16 is legal, but undesirable since i16 instruction encodings are longer 945 /// and some i16 instructions are slow. 946 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 947 948 /// Return the newly negated expression if the cost is not expensive and 949 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to 950 /// do the negation. 951 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 952 bool LegalOperations, bool ForCodeSize, 953 NegatibleCost &Cost, 954 unsigned Depth) const override; 955 956 MachineBasicBlock * 957 EmitInstrWithCustomInserter(MachineInstr &MI, 958 MachineBasicBlock *MBB) const override; 959 960 /// This method returns the name of a target specific DAG node. 961 const char *getTargetNodeName(unsigned Opcode) const override; 962 963 /// Do not merge vector stores after legalization because that may conflict 964 /// with x86-specific store splitting optimizations. mergeStoresAfterLegalization(EVT MemVT)965 bool mergeStoresAfterLegalization(EVT MemVT) const override { 966 return !MemVT.isVector(); 967 } 968 969 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 970 const SelectionDAG &DAG) const override; 971 972 bool isCheapToSpeculateCttz() const override; 973 974 bool isCheapToSpeculateCtlz() const override; 975 976 bool isCtlzFast() const override; 977 hasBitPreservingFPLogic(EVT VT)978 bool hasBitPreservingFPLogic(EVT VT) const override { 979 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector(); 980 } 981 isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)982 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 983 // If the pair to store is a mixture of float and int values, we will 984 // save two bitwise instructions and one float-to-int instruction and 985 // increase one store instruction. There is potentially a more 986 // significant benefit because it avoids the float->int domain switch 987 // for input value. So It is more likely a win. 988 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 989 (LTy.isInteger() && HTy.isFloatingPoint())) 990 return true; 991 // If the pair only contains int values, we will save two bitwise 992 // instructions and increase one store instruction (costing one more 993 // store buffer). Since the benefit is more blurred so we leave 994 // such pair out until we get testcase to prove it is a win. 995 return false; 996 } 997 998 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 999 1000 bool hasAndNotCompare(SDValue Y) const override; 1001 1002 bool hasAndNot(SDValue Y) const override; 1003 1004 bool hasBitTest(SDValue X, SDValue Y) const override; 1005 1006 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 1007 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 1008 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 1009 SelectionDAG &DAG) const override; 1010 1011 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 1012 CombineLevel Level) const override; 1013 1014 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; 1015 1016 bool shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)1017 shouldTransformSignedTruncationCheck(EVT XVT, 1018 unsigned KeptBits) const override { 1019 // For vectors, we don't have a preference.. 1020 if (XVT.isVector()) 1021 return false; 1022 1023 auto VTIsOk = [](EVT VT) -> bool { 1024 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 1025 VT == MVT::i64; 1026 }; 1027 1028 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. 1029 // XVT will be larger than KeptBitsVT. 1030 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 1031 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 1032 } 1033 1034 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; 1035 1036 bool shouldSplatInsEltVarIndex(EVT VT) const override; 1037 convertSetCCLogicToBitwiseLogic(EVT VT)1038 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 1039 return VT.isScalarInteger(); 1040 } 1041 1042 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. 1043 MVT hasFastEqualityCompare(unsigned NumBits) const override; 1044 1045 /// Return the value type to use for ISD::SETCC. 1046 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 1047 EVT VT) const override; 1048 1049 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 1050 const APInt &DemandedElts, 1051 TargetLoweringOpt &TLO) const override; 1052 1053 /// Determine which of the bits specified in Mask are known to be either 1054 /// zero or one and return them in the KnownZero/KnownOne bitsets. 1055 void computeKnownBitsForTargetNode(const SDValue Op, 1056 KnownBits &Known, 1057 const APInt &DemandedElts, 1058 const SelectionDAG &DAG, 1059 unsigned Depth = 0) const override; 1060 1061 /// Determine the number of bits in the operation that are sign bits. 1062 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 1063 const APInt &DemandedElts, 1064 const SelectionDAG &DAG, 1065 unsigned Depth) const override; 1066 1067 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, 1068 const APInt &DemandedElts, 1069 APInt &KnownUndef, 1070 APInt &KnownZero, 1071 TargetLoweringOpt &TLO, 1072 unsigned Depth) const override; 1073 1074 bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, 1075 const APInt &DemandedElts, 1076 unsigned MaskIndex, 1077 TargetLoweringOpt &TLO, 1078 unsigned Depth) const; 1079 1080 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1081 const APInt &DemandedBits, 1082 const APInt &DemandedElts, 1083 KnownBits &Known, 1084 TargetLoweringOpt &TLO, 1085 unsigned Depth) const override; 1086 1087 SDValue SimplifyMultipleUseDemandedBitsForTargetNode( 1088 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 1089 SelectionDAG &DAG, unsigned Depth) const override; 1090 1091 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; 1092 1093 SDValue unwrapAddress(SDValue N) const override; 1094 1095 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 1096 1097 bool ExpandInlineAsm(CallInst *CI) const override; 1098 1099 ConstraintType getConstraintType(StringRef Constraint) const override; 1100 1101 /// Examine constraint string and operand type and determine a weight value. 1102 /// The operand object must already have been set up with the operand type. 1103 ConstraintWeight 1104 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1105 const char *constraint) const override; 1106 1107 const char *LowerXConstraint(EVT ConstraintVT) const override; 1108 1109 /// Lower the specified operand into the Ops vector. If it is invalid, don't 1110 /// add anything to Ops. If hasMemory is true it means one of the asm 1111 /// constraint of the inline asm instruction being processed is 'm'. 1112 void LowerAsmOperandForConstraint(SDValue Op, 1113 std::string &Constraint, 1114 std::vector<SDValue> &Ops, 1115 SelectionDAG &DAG) const override; 1116 1117 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode)1118 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1119 if (ConstraintCode == "o") 1120 return InlineAsm::Constraint_o; 1121 else if (ConstraintCode == "v") 1122 return InlineAsm::Constraint_v; 1123 else if (ConstraintCode == "X") 1124 return InlineAsm::Constraint_X; 1125 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1126 } 1127 1128 /// Handle Lowering flag assembly outputs. 1129 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 1130 const SDLoc &DL, 1131 const AsmOperandInfo &Constraint, 1132 SelectionDAG &DAG) const override; 1133 1134 /// Given a physical register constraint 1135 /// (e.g. {edx}), return the register number and the register class for the 1136 /// register. This should only be used for C_Register constraints. On 1137 /// error, this returns a register number of 0. 1138 std::pair<unsigned, const TargetRegisterClass *> 1139 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1140 StringRef Constraint, MVT VT) const override; 1141 1142 /// Return true if the addressing mode represented 1143 /// by AM is legal for this target, for a load/store of the specified type. 1144 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 1145 Type *Ty, unsigned AS, 1146 Instruction *I = nullptr) const override; 1147 1148 /// Return true if the specified immediate is legal 1149 /// icmp immediate, that is the target has icmp instructions which can 1150 /// compare a register against the immediate without having to materialize 1151 /// the immediate into a register. 1152 bool isLegalICmpImmediate(int64_t Imm) const override; 1153 1154 /// Return true if the specified immediate is legal 1155 /// add immediate, that is the target has add instructions which can 1156 /// add a register and the immediate without having to materialize 1157 /// the immediate into a register. 1158 bool isLegalAddImmediate(int64_t Imm) const override; 1159 1160 bool isLegalStoreImmediate(int64_t Imm) const override; 1161 1162 /// Return the cost of the scaling factor used in the addressing 1163 /// mode represented by AM for this target, for a load/store 1164 /// of the specified type. 1165 /// If the AM is supported, the return value must be >= 0. 1166 /// If the AM is not supported, it returns a negative value. 1167 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, 1168 unsigned AS) const override; 1169 1170 /// This is used to enable splatted operand transforms for vector shifts 1171 /// and vector funnel shifts. 1172 bool isVectorShiftByScalarCheap(Type *Ty) const override; 1173 1174 /// Add x86-specific opcodes to the default list. 1175 bool isBinOp(unsigned Opcode) const override; 1176 1177 /// Returns true if the opcode is a commutative binary operation. 1178 bool isCommutativeBinOp(unsigned Opcode) const override; 1179 1180 /// Return true if it's free to truncate a value of 1181 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 1182 /// register EAX to i16 by referencing its sub-register AX. 1183 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 1184 bool isTruncateFree(EVT VT1, EVT VT2) const override; 1185 1186 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 1187 1188 /// Return true if any actual instruction that defines a 1189 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 1190 /// register. This does not necessarily include registers defined in 1191 /// unknown ways, such as incoming arguments, or copies from unknown 1192 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 1193 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 1194 /// all instructions that define 32-bit values implicit zero-extend the 1195 /// result out to 64 bits. 1196 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 1197 bool isZExtFree(EVT VT1, EVT VT2) const override; 1198 bool isZExtFree(SDValue Val, EVT VT2) const override; 1199 1200 bool shouldSinkOperands(Instruction *I, 1201 SmallVectorImpl<Use *> &Ops) const override; 1202 bool shouldConvertPhiType(Type *From, Type *To) const override; 1203 1204 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 1205 /// extend node) is profitable. 1206 bool isVectorLoadExtDesirable(SDValue) const override; 1207 1208 /// Return true if an FMA operation is faster than a pair of fmul and fadd 1209 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 1210 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 1211 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 1212 EVT VT) const override; 1213 1214 /// Return true if it's profitable to narrow 1215 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 1216 /// from i32 to i8 but not from i32 to i16. 1217 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 1218 1219 /// Given an intrinsic, checks if on the target the intrinsic will need to map 1220 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 1221 /// true and stores the intrinsic information into the IntrinsicInfo that was 1222 /// passed to the function. 1223 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 1224 MachineFunction &MF, 1225 unsigned Intrinsic) const override; 1226 1227 /// Returns true if the target can instruction select the 1228 /// specified FP immediate natively. If false, the legalizer will 1229 /// materialize the FP immediate as a load from a constant pool. 1230 bool isFPImmLegal(const APFloat &Imm, EVT VT, 1231 bool ForCodeSize) const override; 1232 1233 /// Targets can use this to indicate that they only support *some* 1234 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1235 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 1236 /// be legal. 1237 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1238 1239 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1240 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1241 /// constant pool entry. 1242 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1243 1244 /// Returns true if lowering to a jump table is allowed. 1245 bool areJTsAllowed(const Function *Fn) const override; 1246 1247 /// If true, then instruction selection should 1248 /// seek to shrink the FP constant of the specified type to a smaller type 1249 /// in order to save space and / or reduce runtime. ShouldShrinkFPConstant(EVT VT)1250 bool ShouldShrinkFPConstant(EVT VT) const override { 1251 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 1252 // expensive than a straight movsd. On the other hand, it's important to 1253 // shrink long double fp constant since fldt is very slow. 1254 return !X86ScalarSSEf64 || VT == MVT::f80; 1255 } 1256 1257 /// Return true if we believe it is correct and profitable to reduce the 1258 /// load node to a smaller type. 1259 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1260 EVT NewVT) const override; 1261 1262 /// Return true if the specified scalar FP type is computed in an SSE 1263 /// register, not on the X87 floating point stack. isScalarFPTypeInSSEReg(EVT VT)1264 bool isScalarFPTypeInSSEReg(EVT VT) const { 1265 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 1266 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 1267 } 1268 1269 /// Returns true if it is beneficial to convert a load of a constant 1270 /// to just the constant itself. 1271 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 1272 Type *Ty) const override; 1273 1274 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; 1275 1276 bool convertSelectOfConstantsToMath(EVT VT) const override; 1277 1278 bool decomposeMulByConstant(LLVMContext &Context, EVT VT, 1279 SDValue C) const override; 1280 1281 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 1282 /// with this index. 1283 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1284 unsigned Index) const override; 1285 1286 /// Scalar ops always have equal or better analysis/performance/power than 1287 /// the vector equivalent, so this always makes sense if the scalar op is 1288 /// supported. shouldScalarizeBinop(SDValue)1289 bool shouldScalarizeBinop(SDValue) const override; 1290 1291 /// Extract of a scalar FP value from index 0 of a vector is free. 1292 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override { 1293 EVT EltVT = VT.getScalarType(); 1294 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; 1295 } 1296 1297 /// Overflow nodes should get combined/lowered to optimal instructions 1298 /// (they should allow eliminating explicit compares by getting flags from 1299 /// math ops). 1300 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 1301 bool MathUsed) const override; 1302 storeOfVectorConstantIsCheap(EVT MemVT,unsigned NumElem,unsigned AddrSpace)1303 bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, 1304 unsigned AddrSpace) const override { 1305 // If we can replace more than 2 scalar stores, there will be a reduction 1306 // in instructions even after we add a vector constant load. 1307 return NumElem > 2; 1308 } 1309 1310 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, 1311 const SelectionDAG &DAG, 1312 const MachineMemOperand &MMO) const override; 1313 1314 /// Intel processors have a unified instruction and data cache getClearCacheBuiltinName()1315 const char * getClearCacheBuiltinName() const override { 1316 return nullptr; // nothing to do, move along. 1317 } 1318 1319 Register getRegisterByName(const char* RegName, LLT VT, 1320 const MachineFunction &MF) const override; 1321 1322 /// If a physical register, this returns the register that receives the 1323 /// exception address on entry to an EH pad. 1324 Register 1325 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 1326 1327 /// If a physical register, this returns the register that receives the 1328 /// exception typeid on entry to a landing pad. 1329 Register 1330 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 1331 1332 virtual bool needsFixedCatchObjects() const override; 1333 1334 /// This method returns a target specific FastISel object, 1335 /// or null if the target does not support "fast" ISel. 1336 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1337 const TargetLibraryInfo *libInfo) const override; 1338 1339 /// If the target has a standard location for the stack protector cookie, 1340 /// returns the address of that location. Otherwise, returns nullptr. 1341 Value *getIRStackGuard(IRBuilder<> &IRB) const override; 1342 1343 bool useLoadStackGuardNode() const override; 1344 bool useStackGuardXorFP() const override; 1345 void insertSSPDeclarations(Module &M) const override; 1346 Value *getSDagStackGuard(const Module &M) const override; 1347 Function *getSSPStackGuardCheck(const Module &M) const override; 1348 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 1349 const SDLoc &DL) const override; 1350 1351 1352 /// Return true if the target stores SafeStack pointer at a fixed offset in 1353 /// some non-standard address space, and populates the address space and 1354 /// offset as appropriate. 1355 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override; 1356 1357 std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, 1358 SDValue Chain, SDValue Pointer, 1359 MachinePointerInfo PtrInfo, 1360 Align Alignment, 1361 SelectionDAG &DAG) const; 1362 1363 /// Customize the preferred legalization strategy for certain types. 1364 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; 1365 softPromoteHalfType()1366 bool softPromoteHalfType() const override { return true; } 1367 1368 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1369 EVT VT) const override; 1370 1371 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1372 CallingConv::ID CC, 1373 EVT VT) const override; 1374 1375 unsigned getVectorTypeBreakdownForCallingConv( 1376 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 1377 unsigned &NumIntermediates, MVT &RegisterVT) const override; 1378 1379 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 1380 1381 bool supportSwiftError() const override; 1382 1383 bool hasStackProbeSymbol(MachineFunction &MF) const override; 1384 bool hasInlineStackProbe(MachineFunction &MF) const override; 1385 StringRef getStackProbeSymbolName(MachineFunction &MF) const override; 1386 1387 unsigned getStackProbeSize(MachineFunction &MF) const; 1388 hasVectorBlend()1389 bool hasVectorBlend() const override { return true; } 1390 getMaxSupportedInterleaveFactor()1391 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 1392 1393 /// Lower interleaved load(s) into target specific 1394 /// instructions/intrinsics. 1395 bool lowerInterleavedLoad(LoadInst *LI, 1396 ArrayRef<ShuffleVectorInst *> Shuffles, 1397 ArrayRef<unsigned> Indices, 1398 unsigned Factor) const override; 1399 1400 /// Lower interleaved store(s) into target specific 1401 /// instructions/intrinsics. 1402 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 1403 unsigned Factor) const override; 1404 1405 SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, 1406 SDValue Addr, SelectionDAG &DAG) 1407 const override; 1408 1409 protected: 1410 std::pair<const TargetRegisterClass *, uint8_t> 1411 findRepresentativeClass(const TargetRegisterInfo *TRI, 1412 MVT VT) const override; 1413 1414 private: 1415 /// Keep a reference to the X86Subtarget around so that we can 1416 /// make the right decision when generating code for different targets. 1417 const X86Subtarget &Subtarget; 1418 1419 /// Select between SSE or x87 floating point ops. 1420 /// When SSE is available, use it for f32 operations. 1421 /// When SSE2 is available, use it for f64 operations. 1422 bool X86ScalarSSEf32; 1423 bool X86ScalarSSEf64; 1424 1425 /// A list of legal FP immediates. 1426 std::vector<APFloat> LegalFPImmediates; 1427 1428 /// Indicate that this x86 target can instruction 1429 /// select the specified FP immediate natively. addLegalFPImmediate(const APFloat & Imm)1430 void addLegalFPImmediate(const APFloat& Imm) { 1431 LegalFPImmediates.push_back(Imm); 1432 } 1433 1434 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 1435 CallingConv::ID CallConv, bool isVarArg, 1436 const SmallVectorImpl<ISD::InputArg> &Ins, 1437 const SDLoc &dl, SelectionDAG &DAG, 1438 SmallVectorImpl<SDValue> &InVals, 1439 uint32_t *RegMask) const; 1440 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 1441 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 1442 const SDLoc &dl, SelectionDAG &DAG, 1443 const CCValAssign &VA, MachineFrameInfo &MFI, 1444 unsigned i) const; 1445 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 1446 const SDLoc &dl, SelectionDAG &DAG, 1447 const CCValAssign &VA, 1448 ISD::ArgFlagsTy Flags, bool isByval) const; 1449 1450 // Call lowering helpers. 1451 1452 /// Check whether the call is eligible for tail call optimization. Targets 1453 /// that want to do tail call optimization should implement this function. 1454 bool IsEligibleForTailCallOptimization(SDValue Callee, 1455 CallingConv::ID CalleeCC, 1456 bool isVarArg, 1457 bool isCalleeStructRet, 1458 bool isCallerStructRet, 1459 Type *RetTy, 1460 const SmallVectorImpl<ISD::OutputArg> &Outs, 1461 const SmallVectorImpl<SDValue> &OutVals, 1462 const SmallVectorImpl<ISD::InputArg> &Ins, 1463 SelectionDAG& DAG) const; 1464 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 1465 SDValue Chain, bool IsTailCall, 1466 bool Is64Bit, int FPDiff, 1467 const SDLoc &dl) const; 1468 1469 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 1470 SelectionDAG &DAG) const; 1471 1472 unsigned getAddressSpace(void) const; 1473 1474 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, 1475 SDValue &Chain) const; 1476 SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const; 1477 1478 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1479 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 1480 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1481 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1482 1483 unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr, 1484 const unsigned char OpFlags = 0) const; 1485 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1486 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1487 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1488 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1489 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 1490 1491 /// Creates target global address or external symbol nodes for calls or 1492 /// other uses. 1493 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, 1494 bool ForCall) const; 1495 1496 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1497 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1498 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1499 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1500 SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; 1501 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1502 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1503 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1504 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 1505 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1506 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1507 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1508 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1509 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1510 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1511 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1512 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 1513 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 1514 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 1515 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 1516 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; 1517 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1518 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 1519 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 1520 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; 1521 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1522 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; 1523 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1524 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1525 1526 SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG, 1527 RTLIB::Libcall Call) const; 1528 1529 SDValue 1530 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1531 const SmallVectorImpl<ISD::InputArg> &Ins, 1532 const SDLoc &dl, SelectionDAG &DAG, 1533 SmallVectorImpl<SDValue> &InVals) const override; 1534 SDValue LowerCall(CallLoweringInfo &CLI, 1535 SmallVectorImpl<SDValue> &InVals) const override; 1536 1537 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1538 const SmallVectorImpl<ISD::OutputArg> &Outs, 1539 const SmallVectorImpl<SDValue> &OutVals, 1540 const SDLoc &dl, SelectionDAG &DAG) const override; 1541 supportSplitCSR(MachineFunction * MF)1542 bool supportSplitCSR(MachineFunction *MF) const override { 1543 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 1544 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 1545 } 1546 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 1547 void insertCopiesSplitCSR( 1548 MachineBasicBlock *Entry, 1549 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 1550 1551 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1552 1553 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1554 1555 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 1556 ISD::NodeType ExtendKind) const override; 1557 1558 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1559 bool isVarArg, 1560 const SmallVectorImpl<ISD::OutputArg> &Outs, 1561 LLVMContext &Context) const override; 1562 1563 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 1564 1565 TargetLoweringBase::AtomicExpansionKind 1566 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 1567 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1568 TargetLoweringBase::AtomicExpansionKind 1569 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1570 1571 LoadInst * 1572 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1573 1574 bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override; 1575 bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override; 1576 1577 bool needsCmpXchgNb(Type *MemType) const; 1578 1579 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, 1580 MachineBasicBlock *DispatchBB, int FI) const; 1581 1582 // Utility function to emit the low-level va_arg code for X86-64. 1583 MachineBasicBlock * 1584 EmitVAARG64WithCustomInserter(MachineInstr &MI, 1585 MachineBasicBlock *MBB) const; 1586 1587 /// Utility function to emit the xmm reg save portion of va_start. 1588 MachineBasicBlock * 1589 EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr, 1590 MachineBasicBlock *BB) const; 1591 1592 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, 1593 MachineInstr &MI2, 1594 MachineBasicBlock *BB) const; 1595 1596 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, 1597 MachineBasicBlock *BB) const; 1598 1599 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 1600 MachineBasicBlock *BB) const; 1601 1602 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, 1603 MachineBasicBlock *BB) const; 1604 1605 MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, 1606 MachineBasicBlock *BB) const; 1607 1608 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, 1609 MachineBasicBlock *BB) const; 1610 1611 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, 1612 MachineBasicBlock *BB) const; 1613 1614 MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, 1615 MachineBasicBlock *BB) const; 1616 1617 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, 1618 MachineBasicBlock *MBB) const; 1619 1620 void emitSetJmpShadowStackFix(MachineInstr &MI, 1621 MachineBasicBlock *MBB) const; 1622 1623 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, 1624 MachineBasicBlock *MBB) const; 1625 1626 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, 1627 MachineBasicBlock *MBB) const; 1628 1629 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, 1630 MachineBasicBlock *MBB) const; 1631 1632 /// Emit flags for the given setcc condition and operands. Also returns the 1633 /// corresponding X86 condition code constant in X86CC. 1634 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, 1635 const SDLoc &dl, SelectionDAG &DAG, 1636 SDValue &X86CC) const; 1637 1638 /// Check if replacement of SQRT with RSQRT should be disabled. 1639 bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override; 1640 1641 /// Use rsqrt* to speed up sqrt calculations. 1642 SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1643 int &RefinementSteps, bool &UseOneConstNR, 1644 bool Reciprocal) const override; 1645 1646 /// Use rcp* to speed up fdiv calculations. 1647 SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1648 int &RefinementSteps) const override; 1649 1650 /// Reassociate floating point divisions into multiply by reciprocal. 1651 unsigned combineRepeatedFPDivisors() const override; 1652 1653 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1654 SmallVectorImpl<SDNode *> &Created) const override; 1655 }; 1656 1657 namespace X86 { 1658 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1659 const TargetLibraryInfo *libInfo); 1660 } // end namespace X86 1661 1662 // X86 specific Gather/Scatter nodes. 1663 // The class has the same order of operands as MaskedGatherScatterSDNode for 1664 // convenience. 1665 class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode { 1666 public: 1667 // This is a intended as a utility and should never be directly created. 1668 X86MaskedGatherScatterSDNode() = delete; 1669 ~X86MaskedGatherScatterSDNode() = delete; 1670 getBasePtr()1671 const SDValue &getBasePtr() const { return getOperand(3); } getIndex()1672 const SDValue &getIndex() const { return getOperand(4); } getMask()1673 const SDValue &getMask() const { return getOperand(2); } getScale()1674 const SDValue &getScale() const { return getOperand(5); } 1675 classof(const SDNode * N)1676 static bool classof(const SDNode *N) { 1677 return N->getOpcode() == X86ISD::MGATHER || 1678 N->getOpcode() == X86ISD::MSCATTER; 1679 } 1680 }; 1681 1682 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { 1683 public: getPassThru()1684 const SDValue &getPassThru() const { return getOperand(1); } 1685 classof(const SDNode * N)1686 static bool classof(const SDNode *N) { 1687 return N->getOpcode() == X86ISD::MGATHER; 1688 } 1689 }; 1690 1691 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { 1692 public: getValue()1693 const SDValue &getValue() const { return getOperand(1); } 1694 classof(const SDNode * N)1695 static bool classof(const SDNode *N) { 1696 return N->getOpcode() == X86ISD::MSCATTER; 1697 } 1698 }; 1699 1700 /// Generate unpacklo/unpackhi shuffle mask. 1701 void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo, 1702 bool Unary); 1703 1704 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation 1705 /// imposed by AVX and specific to the unary pattern. Example: 1706 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> 1707 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> 1708 void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo); 1709 1710 } // end namespace llvm 1711 1712 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 1713