• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 
17 #include "llvm/CodeGen/TargetLowering.h"
18 
19 namespace llvm {
20   class X86Subtarget;
21   class X86TargetMachine;
22 
23   namespace X86ISD {
24     // X86 Specific DAG Nodes
25   enum NodeType : unsigned {
26     // Start the numbering where the builtin ops leave off.
27     FIRST_NUMBER = ISD::BUILTIN_OP_END,
28 
29     /// Bit scan forward.
30     BSF,
31     /// Bit scan reverse.
32     BSR,
33 
34     /// X86 funnel/double shift i16 instructions. These correspond to
35     /// X86::SHLDW and X86::SHRDW instructions which have different amt
36     /// modulo rules to generic funnel shifts.
37     /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
38     FSHL,
39     FSHR,
40 
41     /// Bitwise logical AND of floating point values. This corresponds
42     /// to X86::ANDPS or X86::ANDPD.
43     FAND,
44 
45     /// Bitwise logical OR of floating point values. This corresponds
46     /// to X86::ORPS or X86::ORPD.
47     FOR,
48 
49     /// Bitwise logical XOR of floating point values. This corresponds
50     /// to X86::XORPS or X86::XORPD.
51     FXOR,
52 
53     ///  Bitwise logical ANDNOT of floating point values. This
54     /// corresponds to X86::ANDNPS or X86::ANDNPD.
55     FANDN,
56 
57     /// These operations represent an abstract X86 call
58     /// instruction, which includes a bunch of information.  In particular the
59     /// operands of these node are:
60     ///
61     ///     #0 - The incoming token chain
62     ///     #1 - The callee
63     ///     #2 - The number of arg bytes the caller pushes on the stack.
64     ///     #3 - The number of arg bytes the callee pops off the stack.
65     ///     #4 - The value to pass in AL/AX/EAX (optional)
66     ///     #5 - The value to pass in DL/DX/EDX (optional)
67     ///
68     /// The result values of these nodes are:
69     ///
70     ///     #0 - The outgoing token chain
71     ///     #1 - The first register result value (optional)
72     ///     #2 - The second register result value (optional)
73     ///
74     CALL,
75 
76     /// Same as call except it adds the NoTrack prefix.
77     NT_CALL,
78 
79     /// X86 compare and logical compare instructions.
80     CMP,
81     FCMP,
82     COMI,
83     UCOMI,
84 
85     /// X86 bit-test instructions.
86     BT,
87 
88     /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
89     /// operand, usually produced by a CMP instruction.
90     SETCC,
91 
92     /// X86 Select
93     SELECTS,
94 
95     // Same as SETCC except it's materialized with a sbb and the value is all
96     // one's or all zero's.
97     SETCC_CARRY, // R = carry_bit ? ~0 : 0
98 
99     /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
100     /// Operands are two FP values to compare; result is a mask of
101     /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
102     FSETCC,
103 
104     /// X86 FP SETCC, similar to above, but with output as an i1 mask and
105     /// and a version with SAE.
106     FSETCCM,
107     FSETCCM_SAE,
108 
109     /// X86 conditional moves. Operand 0 and operand 1 are the two values
110     /// to select from. Operand 2 is the condition code, and operand 3 is the
111     /// flag operand produced by a CMP or TEST instruction.
112     CMOV,
113 
114     /// X86 conditional branches. Operand 0 is the chain operand, operand 1
115     /// is the block to branch if condition is true, operand 2 is the
116     /// condition code, and operand 3 is the flag operand produced by a CMP
117     /// or TEST instruction.
118     BRCOND,
119 
120     /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
121     /// operand 1 is the target address.
122     NT_BRIND,
123 
124     /// Return with a flag operand. Operand 0 is the chain operand, operand
125     /// 1 is the number of bytes of stack to pop.
126     RET_FLAG,
127 
128     /// Return from interrupt. Operand 0 is the number of bytes to pop.
129     IRET,
130 
131     /// Repeat fill, corresponds to X86::REP_STOSx.
132     REP_STOS,
133 
134     /// Repeat move, corresponds to X86::REP_MOVSx.
135     REP_MOVS,
136 
137     /// On Darwin, this node represents the result of the popl
138     /// at function entry, used for PIC code.
139     GlobalBaseReg,
140 
141     /// A wrapper node for TargetConstantPool, TargetJumpTable,
142     /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
143     /// MCSymbol and TargetBlockAddress.
144     Wrapper,
145 
146     /// Special wrapper used under X86-64 PIC mode for RIP
147     /// relative displacements.
148     WrapperRIP,
149 
150     /// Copies a 64-bit value from an MMX vector to the low word
151     /// of an XMM vector, with the high word zero filled.
152     MOVQ2DQ,
153 
154     /// Copies a 64-bit value from the low word of an XMM vector
155     /// to an MMX vector.
156     MOVDQ2Q,
157 
158     /// Copies a 32-bit value from the low word of a MMX
159     /// vector to a GPR.
160     MMX_MOVD2W,
161 
162     /// Copies a GPR into the low 32-bit word of a MMX vector
163     /// and zero out the high word.
164     MMX_MOVW2D,
165 
166     /// Extract an 8-bit value from a vector and zero extend it to
167     /// i32, corresponds to X86::PEXTRB.
168     PEXTRB,
169 
170     /// Extract a 16-bit value from a vector and zero extend it to
171     /// i32, corresponds to X86::PEXTRW.
172     PEXTRW,
173 
174     /// Insert any element of a 4 x float vector into any element
175     /// of a destination 4 x floatvector.
176     INSERTPS,
177 
178     /// Insert the lower 8-bits of a 32-bit value to a vector,
179     /// corresponds to X86::PINSRB.
180     PINSRB,
181 
182     /// Insert the lower 16-bits of a 32-bit value to a vector,
183     /// corresponds to X86::PINSRW.
184     PINSRW,
185 
186     /// Shuffle 16 8-bit values within a vector.
187     PSHUFB,
188 
189     /// Compute Sum of Absolute Differences.
190     PSADBW,
191     /// Compute Double Block Packed Sum-Absolute-Differences
192     DBPSADBW,
193 
194     /// Bitwise Logical AND NOT of Packed FP values.
195     ANDNP,
196 
197     /// Blend where the selector is an immediate.
198     BLENDI,
199 
200     /// Dynamic (non-constant condition) vector blend where only the sign bits
201     /// of the condition elements are used. This is used to enforce that the
202     /// condition mask is not valid for generic VSELECT optimizations. This
203     /// is also used to implement the intrinsics.
204     /// Operands are in VSELECT order: MASK, TRUE, FALSE
205     BLENDV,
206 
207     /// Combined add and sub on an FP vector.
208     ADDSUB,
209 
210     //  FP vector ops with rounding mode.
211     FADD_RND,
212     FADDS,
213     FADDS_RND,
214     FSUB_RND,
215     FSUBS,
216     FSUBS_RND,
217     FMUL_RND,
218     FMULS,
219     FMULS_RND,
220     FDIV_RND,
221     FDIVS,
222     FDIVS_RND,
223     FMAX_SAE,
224     FMAXS_SAE,
225     FMIN_SAE,
226     FMINS_SAE,
227     FSQRT_RND,
228     FSQRTS,
229     FSQRTS_RND,
230 
231     // FP vector get exponent.
232     FGETEXP,
233     FGETEXP_SAE,
234     FGETEXPS,
235     FGETEXPS_SAE,
236     // Extract Normalized Mantissas.
237     VGETMANT,
238     VGETMANT_SAE,
239     VGETMANTS,
240     VGETMANTS_SAE,
241     // FP Scale.
242     SCALEF,
243     SCALEF_RND,
244     SCALEFS,
245     SCALEFS_RND,
246 
247     // Unsigned Integer average.
248     AVG,
249 
250     /// Integer horizontal add/sub.
251     HADD,
252     HSUB,
253 
254     /// Floating point horizontal add/sub.
255     FHADD,
256     FHSUB,
257 
258     // Detect Conflicts Within a Vector
259     CONFLICT,
260 
261     /// Floating point max and min.
262     FMAX,
263     FMIN,
264 
265     /// Commutative FMIN and FMAX.
266     FMAXC,
267     FMINC,
268 
269     /// Scalar intrinsic floating point max and min.
270     FMAXS,
271     FMINS,
272 
273     /// Floating point reciprocal-sqrt and reciprocal approximation.
274     /// Note that these typically require refinement
275     /// in order to obtain suitable precision.
276     FRSQRT,
277     FRCP,
278 
279     // AVX-512 reciprocal approximations with a little more precision.
280     RSQRT14,
281     RSQRT14S,
282     RCP14,
283     RCP14S,
284 
285     // Thread Local Storage.
286     TLSADDR,
287 
288     // Thread Local Storage. A call to get the start address
289     // of the TLS block for the current module.
290     TLSBASEADDR,
291 
292     // Thread Local Storage.  When calling to an OS provided
293     // thunk at the address from an earlier relocation.
294     TLSCALL,
295 
296     // Exception Handling helpers.
297     EH_RETURN,
298 
299     // SjLj exception handling setjmp.
300     EH_SJLJ_SETJMP,
301 
302     // SjLj exception handling longjmp.
303     EH_SJLJ_LONGJMP,
304 
305     // SjLj exception handling dispatch.
306     EH_SJLJ_SETUP_DISPATCH,
307 
308     /// Tail call return. See X86TargetLowering::LowerCall for
309     /// the list of operands.
310     TC_RETURN,
311 
312     // Vector move to low scalar and zero higher vector elements.
313     VZEXT_MOVL,
314 
315     // Vector integer truncate.
316     VTRUNC,
317     // Vector integer truncate with unsigned/signed saturation.
318     VTRUNCUS,
319     VTRUNCS,
320 
321     // Masked version of the above. Used when less than a 128-bit result is
322     // produced since the mask only applies to the lower elements and can't
323     // be represented by a select.
324     // SRC, PASSTHRU, MASK
325     VMTRUNC,
326     VMTRUNCUS,
327     VMTRUNCS,
328 
329     // Vector FP extend.
330     VFPEXT,
331     VFPEXT_SAE,
332     VFPEXTS,
333     VFPEXTS_SAE,
334 
335     // Vector FP round.
336     VFPROUND,
337     VFPROUND_RND,
338     VFPROUNDS,
339     VFPROUNDS_RND,
340 
341     // Masked version of above. Used for v2f64->v4f32.
342     // SRC, PASSTHRU, MASK
343     VMFPROUND,
344 
345     // 128-bit vector logical left / right shift
346     VSHLDQ,
347     VSRLDQ,
348 
349     // Vector shift elements
350     VSHL,
351     VSRL,
352     VSRA,
353 
354     // Vector variable shift
355     VSHLV,
356     VSRLV,
357     VSRAV,
358 
359     // Vector shift elements by immediate
360     VSHLI,
361     VSRLI,
362     VSRAI,
363 
364     // Shifts of mask registers.
365     KSHIFTL,
366     KSHIFTR,
367 
368     // Bit rotate by immediate
369     VROTLI,
370     VROTRI,
371 
372     // Vector packed double/float comparison.
373     CMPP,
374 
375     // Vector integer comparisons.
376     PCMPEQ,
377     PCMPGT,
378 
379     // v8i16 Horizontal minimum and position.
380     PHMINPOS,
381 
382     MULTISHIFT,
383 
384     /// Vector comparison generating mask bits for fp and
385     /// integer signed and unsigned data types.
386     CMPM,
387     // Vector mask comparison generating mask bits for FP values.
388     CMPMM,
389     // Vector mask comparison with SAE for FP values.
390     CMPMM_SAE,
391 
392     // Arithmetic operations with FLAGS results.
393     ADD,
394     SUB,
395     ADC,
396     SBB,
397     SMUL,
398     UMUL,
399     OR,
400     XOR,
401     AND,
402 
403     // Bit field extract.
404     BEXTR,
405     BEXTRI,
406 
407     // Zero High Bits Starting with Specified Bit Position.
408     BZHI,
409 
410     // Parallel extract and deposit.
411     PDEP,
412     PEXT,
413 
414     // X86-specific multiply by immediate.
415     MUL_IMM,
416 
417     // Vector sign bit extraction.
418     MOVMSK,
419 
420     // Vector bitwise comparisons.
421     PTEST,
422 
423     // Vector packed fp sign bitwise comparisons.
424     TESTP,
425 
426     // OR/AND test for masks.
427     KORTEST,
428     KTEST,
429 
430     // ADD for masks.
431     KADD,
432 
433     // Several flavors of instructions with vector shuffle behaviors.
434     // Saturated signed/unnsigned packing.
435     PACKSS,
436     PACKUS,
437     // Intra-lane alignr.
438     PALIGNR,
439     // AVX512 inter-lane alignr.
440     VALIGN,
441     PSHUFD,
442     PSHUFHW,
443     PSHUFLW,
444     SHUFP,
445     // VBMI2 Concat & Shift.
446     VSHLD,
447     VSHRD,
448     VSHLDV,
449     VSHRDV,
450     // Shuffle Packed Values at 128-bit granularity.
451     SHUF128,
452     MOVDDUP,
453     MOVSHDUP,
454     MOVSLDUP,
455     MOVLHPS,
456     MOVHLPS,
457     MOVSD,
458     MOVSS,
459     UNPCKL,
460     UNPCKH,
461     VPERMILPV,
462     VPERMILPI,
463     VPERMI,
464     VPERM2X128,
465 
466     // Variable Permute (VPERM).
467     // Res = VPERMV MaskV, V0
468     VPERMV,
469 
470     // 3-op Variable Permute (VPERMT2).
471     // Res = VPERMV3 V0, MaskV, V1
472     VPERMV3,
473 
474     // Bitwise ternary logic.
475     VPTERNLOG,
476     // Fix Up Special Packed Float32/64 values.
477     VFIXUPIMM,
478     VFIXUPIMM_SAE,
479     VFIXUPIMMS,
480     VFIXUPIMMS_SAE,
481     // Range Restriction Calculation For Packed Pairs of Float32/64 values.
482     VRANGE,
483     VRANGE_SAE,
484     VRANGES,
485     VRANGES_SAE,
486     // Reduce - Perform Reduction Transformation on scalar\packed FP.
487     VREDUCE,
488     VREDUCE_SAE,
489     VREDUCES,
490     VREDUCES_SAE,
491     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
492     // Also used by the legacy (V)ROUND intrinsics where we mask out the
493     // scaling part of the immediate.
494     VRNDSCALE,
495     VRNDSCALE_SAE,
496     VRNDSCALES,
497     VRNDSCALES_SAE,
498     // Tests Types Of a FP Values for packed types.
499     VFPCLASS,
500     // Tests Types Of a FP Values for scalar types.
501     VFPCLASSS,
502 
503     // Broadcast (splat) scalar or element 0 of a vector. If the operand is
504     // a vector, this node may change the vector length as part of the splat.
505     VBROADCAST,
506     // Broadcast mask to vector.
507     VBROADCASTM,
508     // Broadcast subvector to vector.
509     SUBV_BROADCAST,
510 
511     /// SSE4A Extraction and Insertion.
512     EXTRQI,
513     INSERTQI,
514 
515     // XOP arithmetic/logical shifts.
516     VPSHA,
517     VPSHL,
518     // XOP signed/unsigned integer comparisons.
519     VPCOM,
520     VPCOMU,
521     // XOP packed permute bytes.
522     VPPERM,
523     // XOP two source permutation.
524     VPERMIL2,
525 
526     // Vector multiply packed unsigned doubleword integers.
527     PMULUDQ,
528     // Vector multiply packed signed doubleword integers.
529     PMULDQ,
530     // Vector Multiply Packed UnsignedIntegers with Round and Scale.
531     MULHRS,
532 
533     // Multiply and Add Packed Integers.
534     VPMADDUBSW,
535     VPMADDWD,
536 
537     // AVX512IFMA multiply and add.
538     // NOTE: These are different than the instruction and perform
539     // op0 x op1 + op2.
540     VPMADD52L,
541     VPMADD52H,
542 
543     // VNNI
544     VPDPBUSD,
545     VPDPBUSDS,
546     VPDPWSSD,
547     VPDPWSSDS,
548 
549     // FMA nodes.
550     // We use the target independent ISD::FMA for the non-inverted case.
551     FNMADD,
552     FMSUB,
553     FNMSUB,
554     FMADDSUB,
555     FMSUBADD,
556 
557     // FMA with rounding mode.
558     FMADD_RND,
559     FNMADD_RND,
560     FMSUB_RND,
561     FNMSUB_RND,
562     FMADDSUB_RND,
563     FMSUBADD_RND,
564 
565     // Compress and expand.
566     COMPRESS,
567     EXPAND,
568 
569     // Bits shuffle
570     VPSHUFBITQMB,
571 
572     // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
573     SINT_TO_FP_RND,
574     UINT_TO_FP_RND,
575     SCALAR_SINT_TO_FP,
576     SCALAR_UINT_TO_FP,
577     SCALAR_SINT_TO_FP_RND,
578     SCALAR_UINT_TO_FP_RND,
579 
580     // Vector float/double to signed/unsigned integer.
581     CVTP2SI,
582     CVTP2UI,
583     CVTP2SI_RND,
584     CVTP2UI_RND,
585     // Scalar float/double to signed/unsigned integer.
586     CVTS2SI,
587     CVTS2UI,
588     CVTS2SI_RND,
589     CVTS2UI_RND,
590 
591     // Vector float/double to signed/unsigned integer with truncation.
592     CVTTP2SI,
593     CVTTP2UI,
594     CVTTP2SI_SAE,
595     CVTTP2UI_SAE,
596     // Scalar float/double to signed/unsigned integer with truncation.
597     CVTTS2SI,
598     CVTTS2UI,
599     CVTTS2SI_SAE,
600     CVTTS2UI_SAE,
601 
602     // Vector signed/unsigned integer to float/double.
603     CVTSI2P,
604     CVTUI2P,
605 
606     // Masked versions of above. Used for v2f64->v4f32.
607     // SRC, PASSTHRU, MASK
608     MCVTP2SI,
609     MCVTP2UI,
610     MCVTTP2SI,
611     MCVTTP2UI,
612     MCVTSI2P,
613     MCVTUI2P,
614 
615     // Vector float to bfloat16.
616     // Convert TWO packed single data to one packed BF16 data
617     CVTNE2PS2BF16,
618     // Convert packed single data to packed BF16 data
619     CVTNEPS2BF16,
620     // Masked version of above.
621     // SRC, PASSTHRU, MASK
622     MCVTNEPS2BF16,
623 
624     // Dot product of BF16 pairs to accumulated into
625     // packed single precision.
626     DPBF16PS,
627 
628     // Save xmm argument registers to the stack, according to %al. An operator
629     // is needed so that this can be expanded with control flow.
630     VASTART_SAVE_XMM_REGS,
631 
632     // Windows's _chkstk call to do stack probing.
633     WIN_ALLOCA,
634 
635     // For allocating variable amounts of stack space when using
636     // segmented stacks. Check if the current stacklet has enough space, and
637     // falls back to heap allocation if not.
638     SEG_ALLOCA,
639 
640     // For allocating stack space when using stack clash protector.
641     // Allocation is performed by block, and each block is probed.
642     PROBED_ALLOCA,
643 
644     // Memory barriers.
645     MEMBARRIER,
646     MFENCE,
647 
648     // Get a random integer and indicate whether it is valid in CF.
649     RDRAND,
650 
651     // Get a NIST SP800-90B & C compliant random integer and
652     // indicate whether it is valid in CF.
653     RDSEED,
654 
655     // Protection keys
656     // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
657     // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
658     // value for ECX.
659     RDPKRU,
660     WRPKRU,
661 
662     // SSE42 string comparisons.
663     // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
664     // will emit one or two instructions based on which results are used. If
665     // flags and index/mask this allows us to use a single instruction since
666     // we won't have to pick and opcode for flags. Instead we can rely on the
667     // DAG to CSE everything and decide at isel.
668     PCMPISTR,
669     PCMPESTR,
670 
671     // Test if in transactional execution.
672     XTEST,
673 
674     // ERI instructions.
675     RSQRT28,
676     RSQRT28_SAE,
677     RSQRT28S,
678     RSQRT28S_SAE,
679     RCP28,
680     RCP28_SAE,
681     RCP28S,
682     RCP28S_SAE,
683     EXP2,
684     EXP2_SAE,
685 
686     // Conversions between float and half-float.
687     CVTPS2PH,
688     CVTPH2PS,
689     CVTPH2PS_SAE,
690 
691     // Masked version of above.
692     // SRC, RND, PASSTHRU, MASK
693     MCVTPS2PH,
694 
695     // Galois Field Arithmetic Instructions
696     GF2P8AFFINEINVQB,
697     GF2P8AFFINEQB,
698     GF2P8MULB,
699 
700     // LWP insert record.
701     LWPINS,
702 
703     // User level wait
704     UMWAIT,
705     TPAUSE,
706 
707     // Enqueue Stores Instructions
708     ENQCMD,
709     ENQCMDS,
710 
711     // For avx512-vp2intersect
712     VP2INTERSECT,
713 
714     // User level interrupts - testui
715     TESTUI,
716 
717     /// X86 strict FP compare instructions.
718     STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
719     STRICT_FCMPS,
720 
721     // Vector packed double/float comparison.
722     STRICT_CMPP,
723 
724     /// Vector comparison generating mask bits for fp and
725     /// integer signed and unsigned data types.
726     STRICT_CMPM,
727 
728     // Vector float/double to signed/unsigned integer with truncation.
729     STRICT_CVTTP2SI,
730     STRICT_CVTTP2UI,
731 
732     // Vector FP extend.
733     STRICT_VFPEXT,
734 
735     // Vector FP round.
736     STRICT_VFPROUND,
737 
738     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
739     // Also used by the legacy (V)ROUND intrinsics where we mask out the
740     // scaling part of the immediate.
741     STRICT_VRNDSCALE,
742 
743     // Vector signed/unsigned integer to float/double.
744     STRICT_CVTSI2P,
745     STRICT_CVTUI2P,
746 
747     // Strict FMA nodes.
748     STRICT_FNMADD,
749     STRICT_FMSUB,
750     STRICT_FNMSUB,
751 
752     // Conversions between float and half-float.
753     STRICT_CVTPS2PH,
754     STRICT_CVTPH2PS,
755 
756     // WARNING: Only add nodes here if they are stric FP nodes. Non-memory and
757     // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
758 
759     // Compare and swap.
760     LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
761     LCMPXCHG8_DAG,
762     LCMPXCHG16_DAG,
763     LCMPXCHG16_SAVE_RBX_DAG,
764 
765     /// LOCK-prefixed arithmetic read-modify-write instructions.
766     /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
767     LADD,
768     LSUB,
769     LOR,
770     LXOR,
771     LAND,
772 
773     // Load, scalar_to_vector, and zero extend.
774     VZEXT_LOAD,
775 
776     // extract_vector_elt, store.
777     VEXTRACT_STORE,
778 
779     // scalar broadcast from memory
780     VBROADCAST_LOAD,
781 
782     // Store FP control world into i16 memory.
783     FNSTCW16m,
784 
785     /// This instruction implements FP_TO_SINT with the
786     /// integer destination in memory and a FP reg source.  This corresponds
787     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
788     /// has two inputs (token chain and address) and two outputs (int value
789     /// and token chain). Memory VT specifies the type to store to.
790     FP_TO_INT_IN_MEM,
791 
792     /// This instruction implements SINT_TO_FP with the
793     /// integer source in memory and FP reg result.  This corresponds to the
794     /// X86::FILD*m instructions. It has two inputs (token chain and address)
795     /// and two outputs (FP value and token chain). The integer source type is
796     /// specified by the memory VT.
797     FILD,
798 
799     /// This instruction implements a fp->int store from FP stack
800     /// slots. This corresponds to the fist instruction. It takes a
801     /// chain operand, value to store, address, and glue. The memory VT
802     /// specifies the type to store as.
803     FIST,
804 
805     /// This instruction implements an extending load to FP stack slots.
806     /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
807     /// operand, and ptr to load from. The memory VT specifies the type to
808     /// load from.
809     FLD,
810 
811     /// This instruction implements a truncating store from FP stack
812     /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
813     /// chain operand, value to store, address, and glue. The memory VT
814     /// specifies the type to store as.
815     FST,
816 
817     /// This instruction grabs the address of the next argument
818     /// from a va_list. (reads and modifies the va_list in memory)
819     VAARG_64,
820 
821     // Vector truncating store with unsigned/signed saturation
822     VTRUNCSTOREUS,
823     VTRUNCSTORES,
824     // Vector truncating masked store with unsigned/signed saturation
825     VMTRUNCSTOREUS,
826     VMTRUNCSTORES,
827 
828     // X86 specific gather and scatter
829     MGATHER,
830     MSCATTER,
831 
832     // Key locker nodes that produce flags.
833     AESENC128KL,
834     AESDEC128KL,
835     AESENC256KL,
836     AESDEC256KL,
837     AESENCWIDE128KL,
838     AESDECWIDE128KL,
839     AESENCWIDE256KL,
840     AESDECWIDE256KL,
841 
842     // WARNING: Do not add anything in the end unless you want the node to
843     // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
844     // opcodes will be thought as target memory ops!
845   };
846   } // end namespace X86ISD
847 
848   /// Define some predicates that are used for node matching.
849   namespace X86 {
850     /// Returns true if Elt is a constant zero or floating point constant +0.0.
851     bool isZeroNode(SDValue Elt);
852 
853     /// Returns true of the given offset can be
854     /// fit into displacement field of the instruction.
855     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
856                                       bool hasSymbolicDisplacement = true);
857 
858     /// Determines whether the callee is required to pop its
859     /// own arguments. Callee pop is necessary to support tail calls.
860     bool isCalleePop(CallingConv::ID CallingConv,
861                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
862 
863     /// If Op is a constant whose elements are all the same constant or
864     /// undefined, return true and return the constant value in \p SplatVal.
865     /// If we have undef bits that don't cover an entire element, we treat these
866     /// as zero if AllowPartialUndefs is set, else we fail and return false.
867     bool isConstantSplat(SDValue Op, APInt &SplatVal,
868                          bool AllowPartialUndefs = true);
869   } // end namespace X86
870 
871   //===--------------------------------------------------------------------===//
872   //  X86 Implementation of the TargetLowering interface
873   class X86TargetLowering final : public TargetLowering {
874   public:
875     explicit X86TargetLowering(const X86TargetMachine &TM,
876                                const X86Subtarget &STI);
877 
878     unsigned getJumpTableEncoding() const override;
879     bool useSoftFloat() const override;
880 
881     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
882                                ArgListTy &Args) const override;
883 
getScalarShiftAmountTy(const DataLayout &,EVT VT)884     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
885       return MVT::i8;
886     }
887 
888     const MCExpr *
889     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
890                               const MachineBasicBlock *MBB, unsigned uid,
891                               MCContext &Ctx) const override;
892 
893     /// Returns relocation base for the given PIC jumptable.
894     SDValue getPICJumpTableRelocBase(SDValue Table,
895                                      SelectionDAG &DAG) const override;
896     const MCExpr *
897     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
898                                  unsigned JTI, MCContext &Ctx) const override;
899 
900     /// Return the desired alignment for ByVal aggregate
901     /// function arguments in the caller parameter area. For X86, aggregates
902     /// that contains are placed at 16-byte boundaries while the rest are at
903     /// 4-byte boundaries.
904     unsigned getByValTypeAlignment(Type *Ty,
905                                    const DataLayout &DL) const override;
906 
907     EVT getOptimalMemOpType(const MemOp &Op,
908                             const AttributeList &FuncAttributes) const override;
909 
910     /// Returns true if it's safe to use load / store of the
911     /// specified type to expand memcpy / memset inline. This is mostly true
912     /// for all types except for some special cases. For example, on X86
913     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
914     /// also does type conversion. Note the specified type doesn't have to be
915     /// legal as the hook is used before type legalization.
916     bool isSafeMemOpType(MVT VT) const override;
917 
918     /// Returns true if the target allows unaligned memory accesses of the
919     /// specified type. Returns whether it is "fast" in the last argument.
920     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
921                                         MachineMemOperand::Flags Flags,
922                                         bool *Fast) const override;
923 
924     /// Provide custom lowering hooks for some operations.
925     ///
926     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
927 
928     /// Replace the results of node with an illegal result
929     /// type with new values built out of custom code.
930     ///
931     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
932                             SelectionDAG &DAG) const override;
933 
934     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
935 
936     /// Return true if the target has native support for
937     /// the specified value type and it is 'desirable' to use the type for the
938     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
939     /// instruction encodings are longer and some i16 instructions are slow.
940     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
941 
942     /// Return true if the target has native support for the
943     /// specified value type and it is 'desirable' to use the type. e.g. On x86
944     /// i16 is legal, but undesirable since i16 instruction encodings are longer
945     /// and some i16 instructions are slow.
946     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
947 
948     /// Return the newly negated expression if the cost is not expensive and
949     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
950     /// do the negation.
951     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
952                                  bool LegalOperations, bool ForCodeSize,
953                                  NegatibleCost &Cost,
954                                  unsigned Depth) const override;
955 
956     MachineBasicBlock *
957     EmitInstrWithCustomInserter(MachineInstr &MI,
958                                 MachineBasicBlock *MBB) const override;
959 
960     /// This method returns the name of a target specific DAG node.
961     const char *getTargetNodeName(unsigned Opcode) const override;
962 
963     /// Do not merge vector stores after legalization because that may conflict
964     /// with x86-specific store splitting optimizations.
mergeStoresAfterLegalization(EVT MemVT)965     bool mergeStoresAfterLegalization(EVT MemVT) const override {
966       return !MemVT.isVector();
967     }
968 
969     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
970                           const SelectionDAG &DAG) const override;
971 
972     bool isCheapToSpeculateCttz() const override;
973 
974     bool isCheapToSpeculateCtlz() const override;
975 
976     bool isCtlzFast() const override;
977 
hasBitPreservingFPLogic(EVT VT)978     bool hasBitPreservingFPLogic(EVT VT) const override {
979       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
980     }
981 
isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)982     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
983       // If the pair to store is a mixture of float and int values, we will
984       // save two bitwise instructions and one float-to-int instruction and
985       // increase one store instruction. There is potentially a more
986       // significant benefit because it avoids the float->int domain switch
987       // for input value. So It is more likely a win.
988       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
989           (LTy.isInteger() && HTy.isFloatingPoint()))
990         return true;
991       // If the pair only contains int values, we will save two bitwise
992       // instructions and increase one store instruction (costing one more
993       // store buffer). Since the benefit is more blurred so we leave
994       // such pair out until we get testcase to prove it is a win.
995       return false;
996     }
997 
998     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
999 
1000     bool hasAndNotCompare(SDValue Y) const override;
1001 
1002     bool hasAndNot(SDValue Y) const override;
1003 
1004     bool hasBitTest(SDValue X, SDValue Y) const override;
1005 
1006     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1007         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1008         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1009         SelectionDAG &DAG) const override;
1010 
1011     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1012                                            CombineLevel Level) const override;
1013 
1014     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1015 
1016     bool
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)1017     shouldTransformSignedTruncationCheck(EVT XVT,
1018                                          unsigned KeptBits) const override {
1019       // For vectors, we don't have a preference..
1020       if (XVT.isVector())
1021         return false;
1022 
1023       auto VTIsOk = [](EVT VT) -> bool {
1024         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1025                VT == MVT::i64;
1026       };
1027 
1028       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1029       // XVT will be larger than KeptBitsVT.
1030       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1031       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1032     }
1033 
1034     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
1035 
1036     bool shouldSplatInsEltVarIndex(EVT VT) const override;
1037 
convertSetCCLogicToBitwiseLogic(EVT VT)1038     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1039       return VT.isScalarInteger();
1040     }
1041 
1042     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1043     MVT hasFastEqualityCompare(unsigned NumBits) const override;
1044 
1045     /// Return the value type to use for ISD::SETCC.
1046     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1047                            EVT VT) const override;
1048 
1049     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1050                                       const APInt &DemandedElts,
1051                                       TargetLoweringOpt &TLO) const override;
1052 
1053     /// Determine which of the bits specified in Mask are known to be either
1054     /// zero or one and return them in the KnownZero/KnownOne bitsets.
1055     void computeKnownBitsForTargetNode(const SDValue Op,
1056                                        KnownBits &Known,
1057                                        const APInt &DemandedElts,
1058                                        const SelectionDAG &DAG,
1059                                        unsigned Depth = 0) const override;
1060 
1061     /// Determine the number of bits in the operation that are sign bits.
1062     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1063                                              const APInt &DemandedElts,
1064                                              const SelectionDAG &DAG,
1065                                              unsigned Depth) const override;
1066 
1067     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1068                                                  const APInt &DemandedElts,
1069                                                  APInt &KnownUndef,
1070                                                  APInt &KnownZero,
1071                                                  TargetLoweringOpt &TLO,
1072                                                  unsigned Depth) const override;
1073 
1074     bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1075                                                     const APInt &DemandedElts,
1076                                                     unsigned MaskIndex,
1077                                                     TargetLoweringOpt &TLO,
1078                                                     unsigned Depth) const;
1079 
1080     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1081                                            const APInt &DemandedBits,
1082                                            const APInt &DemandedElts,
1083                                            KnownBits &Known,
1084                                            TargetLoweringOpt &TLO,
1085                                            unsigned Depth) const override;
1086 
1087     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1088         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1089         SelectionDAG &DAG, unsigned Depth) const override;
1090 
1091     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1092 
1093     SDValue unwrapAddress(SDValue N) const override;
1094 
1095     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1096 
1097     bool ExpandInlineAsm(CallInst *CI) const override;
1098 
1099     ConstraintType getConstraintType(StringRef Constraint) const override;
1100 
1101     /// Examine constraint string and operand type and determine a weight value.
1102     /// The operand object must already have been set up with the operand type.
1103     ConstraintWeight
1104       getSingleConstraintMatchWeight(AsmOperandInfo &info,
1105                                      const char *constraint) const override;
1106 
1107     const char *LowerXConstraint(EVT ConstraintVT) const override;
1108 
1109     /// Lower the specified operand into the Ops vector. If it is invalid, don't
1110     /// add anything to Ops. If hasMemory is true it means one of the asm
1111     /// constraint of the inline asm instruction being processed is 'm'.
1112     void LowerAsmOperandForConstraint(SDValue Op,
1113                                       std::string &Constraint,
1114                                       std::vector<SDValue> &Ops,
1115                                       SelectionDAG &DAG) const override;
1116 
1117     unsigned
getInlineAsmMemConstraint(StringRef ConstraintCode)1118     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1119       if (ConstraintCode == "o")
1120         return InlineAsm::Constraint_o;
1121       else if (ConstraintCode == "v")
1122         return InlineAsm::Constraint_v;
1123       else if (ConstraintCode == "X")
1124         return InlineAsm::Constraint_X;
1125       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1126     }
1127 
1128     /// Handle Lowering flag assembly outputs.
1129     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1130                                         const SDLoc &DL,
1131                                         const AsmOperandInfo &Constraint,
1132                                         SelectionDAG &DAG) const override;
1133 
1134     /// Given a physical register constraint
1135     /// (e.g. {edx}), return the register number and the register class for the
1136     /// register.  This should only be used for C_Register constraints.  On
1137     /// error, this returns a register number of 0.
1138     std::pair<unsigned, const TargetRegisterClass *>
1139     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1140                                  StringRef Constraint, MVT VT) const override;
1141 
1142     /// Return true if the addressing mode represented
1143     /// by AM is legal for this target, for a load/store of the specified type.
1144     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1145                                Type *Ty, unsigned AS,
1146                                Instruction *I = nullptr) const override;
1147 
1148     /// Return true if the specified immediate is legal
1149     /// icmp immediate, that is the target has icmp instructions which can
1150     /// compare a register against the immediate without having to materialize
1151     /// the immediate into a register.
1152     bool isLegalICmpImmediate(int64_t Imm) const override;
1153 
1154     /// Return true if the specified immediate is legal
1155     /// add immediate, that is the target has add instructions which can
1156     /// add a register and the immediate without having to materialize
1157     /// the immediate into a register.
1158     bool isLegalAddImmediate(int64_t Imm) const override;
1159 
1160     bool isLegalStoreImmediate(int64_t Imm) const override;
1161 
1162     /// Return the cost of the scaling factor used in the addressing
1163     /// mode represented by AM for this target, for a load/store
1164     /// of the specified type.
1165     /// If the AM is supported, the return value must be >= 0.
1166     /// If the AM is not supported, it returns a negative value.
1167     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
1168                              unsigned AS) const override;
1169 
1170     /// This is used to enable splatted operand transforms for vector shifts
1171     /// and vector funnel shifts.
1172     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1173 
1174     /// Add x86-specific opcodes to the default list.
1175     bool isBinOp(unsigned Opcode) const override;
1176 
1177     /// Returns true if the opcode is a commutative binary operation.
1178     bool isCommutativeBinOp(unsigned Opcode) const override;
1179 
1180     /// Return true if it's free to truncate a value of
1181     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1182     /// register EAX to i16 by referencing its sub-register AX.
1183     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1184     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1185 
1186     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1187 
1188     /// Return true if any actual instruction that defines a
1189     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1190     /// register. This does not necessarily include registers defined in
1191     /// unknown ways, such as incoming arguments, or copies from unknown
1192     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1193     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1194     /// all instructions that define 32-bit values implicit zero-extend the
1195     /// result out to 64 bits.
1196     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1197     bool isZExtFree(EVT VT1, EVT VT2) const override;
1198     bool isZExtFree(SDValue Val, EVT VT2) const override;
1199 
1200     bool shouldSinkOperands(Instruction *I,
1201                             SmallVectorImpl<Use *> &Ops) const override;
1202     bool shouldConvertPhiType(Type *From, Type *To) const override;
1203 
1204     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1205     /// extend node) is profitable.
1206     bool isVectorLoadExtDesirable(SDValue) const override;
1207 
1208     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1209     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1210     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1211     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1212                                     EVT VT) const override;
1213 
1214     /// Return true if it's profitable to narrow
1215     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1216     /// from i32 to i8 but not from i32 to i16.
1217     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1218 
1219     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1220     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1221     /// true and stores the intrinsic information into the IntrinsicInfo that was
1222     /// passed to the function.
1223     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1224                             MachineFunction &MF,
1225                             unsigned Intrinsic) const override;
1226 
1227     /// Returns true if the target can instruction select the
1228     /// specified FP immediate natively. If false, the legalizer will
1229     /// materialize the FP immediate as a load from a constant pool.
1230     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1231                       bool ForCodeSize) const override;
1232 
1233     /// Targets can use this to indicate that they only support *some*
1234     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1235     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1236     /// be legal.
1237     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1238 
1239     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1240     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1241     /// constant pool entry.
1242     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1243 
1244     /// Returns true if lowering to a jump table is allowed.
1245     bool areJTsAllowed(const Function *Fn) const override;
1246 
1247     /// If true, then instruction selection should
1248     /// seek to shrink the FP constant of the specified type to a smaller type
1249     /// in order to save space and / or reduce runtime.
ShouldShrinkFPConstant(EVT VT)1250     bool ShouldShrinkFPConstant(EVT VT) const override {
1251       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1252       // expensive than a straight movsd. On the other hand, it's important to
1253       // shrink long double fp constant since fldt is very slow.
1254       return !X86ScalarSSEf64 || VT == MVT::f80;
1255     }
1256 
1257     /// Return true if we believe it is correct and profitable to reduce the
1258     /// load node to a smaller type.
1259     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1260                                EVT NewVT) const override;
1261 
1262     /// Return true if the specified scalar FP type is computed in an SSE
1263     /// register, not on the X87 floating point stack.
isScalarFPTypeInSSEReg(EVT VT)1264     bool isScalarFPTypeInSSEReg(EVT VT) const {
1265       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1266              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1267     }
1268 
1269     /// Returns true if it is beneficial to convert a load of a constant
1270     /// to just the constant itself.
1271     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1272                                            Type *Ty) const override;
1273 
1274     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1275 
1276     bool convertSelectOfConstantsToMath(EVT VT) const override;
1277 
1278     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1279                                 SDValue C) const override;
1280 
1281     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1282     /// with this index.
1283     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1284                                  unsigned Index) const override;
1285 
1286     /// Scalar ops always have equal or better analysis/performance/power than
1287     /// the vector equivalent, so this always makes sense if the scalar op is
1288     /// supported.
shouldScalarizeBinop(SDValue)1289     bool shouldScalarizeBinop(SDValue) const override;
1290 
1291     /// Extract of a scalar FP value from index 0 of a vector is free.
1292     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1293       EVT EltVT = VT.getScalarType();
1294       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1295     }
1296 
1297     /// Overflow nodes should get combined/lowered to optimal instructions
1298     /// (they should allow eliminating explicit compares by getting flags from
1299     /// math ops).
1300     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1301                               bool MathUsed) const override;
1302 
storeOfVectorConstantIsCheap(EVT MemVT,unsigned NumElem,unsigned AddrSpace)1303     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1304                                       unsigned AddrSpace) const override {
1305       // If we can replace more than 2 scalar stores, there will be a reduction
1306       // in instructions even after we add a vector constant load.
1307       return NumElem > 2;
1308     }
1309 
1310     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1311                                  const SelectionDAG &DAG,
1312                                  const MachineMemOperand &MMO) const override;
1313 
1314     /// Intel processors have a unified instruction and data cache
getClearCacheBuiltinName()1315     const char * getClearCacheBuiltinName() const override {
1316       return nullptr; // nothing to do, move along.
1317     }
1318 
1319     Register getRegisterByName(const char* RegName, LLT VT,
1320                                const MachineFunction &MF) const override;
1321 
1322     /// If a physical register, this returns the register that receives the
1323     /// exception address on entry to an EH pad.
1324     Register
1325     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1326 
1327     /// If a physical register, this returns the register that receives the
1328     /// exception typeid on entry to a landing pad.
1329     Register
1330     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1331 
1332     virtual bool needsFixedCatchObjects() const override;
1333 
1334     /// This method returns a target specific FastISel object,
1335     /// or null if the target does not support "fast" ISel.
1336     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1337                              const TargetLibraryInfo *libInfo) const override;
1338 
1339     /// If the target has a standard location for the stack protector cookie,
1340     /// returns the address of that location. Otherwise, returns nullptr.
1341     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1342 
1343     bool useLoadStackGuardNode() const override;
1344     bool useStackGuardXorFP() const override;
1345     void insertSSPDeclarations(Module &M) const override;
1346     Value *getSDagStackGuard(const Module &M) const override;
1347     Function *getSSPStackGuardCheck(const Module &M) const override;
1348     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1349                                 const SDLoc &DL) const override;
1350 
1351 
1352     /// Return true if the target stores SafeStack pointer at a fixed offset in
1353     /// some non-standard address space, and populates the address space and
1354     /// offset as appropriate.
1355     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1356 
1357     std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1358                                           SDValue Chain, SDValue Pointer,
1359                                           MachinePointerInfo PtrInfo,
1360                                           Align Alignment,
1361                                           SelectionDAG &DAG) const;
1362 
1363     /// Customize the preferred legalization strategy for certain types.
1364     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1365 
softPromoteHalfType()1366     bool softPromoteHalfType() const override { return true; }
1367 
1368     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1369                                       EVT VT) const override;
1370 
1371     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1372                                            CallingConv::ID CC,
1373                                            EVT VT) const override;
1374 
1375     unsigned getVectorTypeBreakdownForCallingConv(
1376         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1377         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1378 
1379     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1380 
1381     bool supportSwiftError() const override;
1382 
1383     bool hasStackProbeSymbol(MachineFunction &MF) const override;
1384     bool hasInlineStackProbe(MachineFunction &MF) const override;
1385     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1386 
1387     unsigned getStackProbeSize(MachineFunction &MF) const;
1388 
hasVectorBlend()1389     bool hasVectorBlend() const override { return true; }
1390 
getMaxSupportedInterleaveFactor()1391     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1392 
1393     /// Lower interleaved load(s) into target specific
1394     /// instructions/intrinsics.
1395     bool lowerInterleavedLoad(LoadInst *LI,
1396                               ArrayRef<ShuffleVectorInst *> Shuffles,
1397                               ArrayRef<unsigned> Indices,
1398                               unsigned Factor) const override;
1399 
1400     /// Lower interleaved store(s) into target specific
1401     /// instructions/intrinsics.
1402     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1403                                unsigned Factor) const override;
1404 
1405     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1406                                    SDValue Addr, SelectionDAG &DAG)
1407                                    const override;
1408 
1409   protected:
1410     std::pair<const TargetRegisterClass *, uint8_t>
1411     findRepresentativeClass(const TargetRegisterInfo *TRI,
1412                             MVT VT) const override;
1413 
1414   private:
1415     /// Keep a reference to the X86Subtarget around so that we can
1416     /// make the right decision when generating code for different targets.
1417     const X86Subtarget &Subtarget;
1418 
1419     /// Select between SSE or x87 floating point ops.
1420     /// When SSE is available, use it for f32 operations.
1421     /// When SSE2 is available, use it for f64 operations.
1422     bool X86ScalarSSEf32;
1423     bool X86ScalarSSEf64;
1424 
1425     /// A list of legal FP immediates.
1426     std::vector<APFloat> LegalFPImmediates;
1427 
1428     /// Indicate that this x86 target can instruction
1429     /// select the specified FP immediate natively.
addLegalFPImmediate(const APFloat & Imm)1430     void addLegalFPImmediate(const APFloat& Imm) {
1431       LegalFPImmediates.push_back(Imm);
1432     }
1433 
1434     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1435                             CallingConv::ID CallConv, bool isVarArg,
1436                             const SmallVectorImpl<ISD::InputArg> &Ins,
1437                             const SDLoc &dl, SelectionDAG &DAG,
1438                             SmallVectorImpl<SDValue> &InVals,
1439                             uint32_t *RegMask) const;
1440     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1441                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1442                              const SDLoc &dl, SelectionDAG &DAG,
1443                              const CCValAssign &VA, MachineFrameInfo &MFI,
1444                              unsigned i) const;
1445     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1446                              const SDLoc &dl, SelectionDAG &DAG,
1447                              const CCValAssign &VA,
1448                              ISD::ArgFlagsTy Flags, bool isByval) const;
1449 
1450     // Call lowering helpers.
1451 
1452     /// Check whether the call is eligible for tail call optimization. Targets
1453     /// that want to do tail call optimization should implement this function.
1454     bool IsEligibleForTailCallOptimization(SDValue Callee,
1455                                            CallingConv::ID CalleeCC,
1456                                            bool isVarArg,
1457                                            bool isCalleeStructRet,
1458                                            bool isCallerStructRet,
1459                                            Type *RetTy,
1460                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1461                                     const SmallVectorImpl<SDValue> &OutVals,
1462                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1463                                            SelectionDAG& DAG) const;
1464     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1465                                     SDValue Chain, bool IsTailCall,
1466                                     bool Is64Bit, int FPDiff,
1467                                     const SDLoc &dl) const;
1468 
1469     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1470                                          SelectionDAG &DAG) const;
1471 
1472     unsigned getAddressSpace(void) const;
1473 
1474     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1475                             SDValue &Chain) const;
1476     SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1477 
1478     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1479     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1480     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1481     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1482 
1483     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1484                                   const unsigned char OpFlags = 0) const;
1485     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1486     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1487     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1488     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1489     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1490 
1491     /// Creates target global address or external symbol nodes for calls or
1492     /// other uses.
1493     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1494                                   bool ForCall) const;
1495 
1496     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1497     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1498     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1499     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1500     SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1501     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1502     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1503     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1504     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1505     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1506     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1507     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1508     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1509     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1510     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1511     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1512     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1513     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1514     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1515     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1516     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1517     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1518     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1519     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1520     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1521     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1522     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1523     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1524     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1525 
1526     SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
1527                           RTLIB::Libcall Call) const;
1528 
1529     SDValue
1530     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1531                          const SmallVectorImpl<ISD::InputArg> &Ins,
1532                          const SDLoc &dl, SelectionDAG &DAG,
1533                          SmallVectorImpl<SDValue> &InVals) const override;
1534     SDValue LowerCall(CallLoweringInfo &CLI,
1535                       SmallVectorImpl<SDValue> &InVals) const override;
1536 
1537     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1538                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1539                         const SmallVectorImpl<SDValue> &OutVals,
1540                         const SDLoc &dl, SelectionDAG &DAG) const override;
1541 
supportSplitCSR(MachineFunction * MF)1542     bool supportSplitCSR(MachineFunction *MF) const override {
1543       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1544           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1545     }
1546     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1547     void insertCopiesSplitCSR(
1548       MachineBasicBlock *Entry,
1549       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1550 
1551     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1552 
1553     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1554 
1555     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1556                             ISD::NodeType ExtendKind) const override;
1557 
1558     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1559                         bool isVarArg,
1560                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1561                         LLVMContext &Context) const override;
1562 
1563     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1564 
1565     TargetLoweringBase::AtomicExpansionKind
1566     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1567     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1568     TargetLoweringBase::AtomicExpansionKind
1569     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1570 
1571     LoadInst *
1572     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1573 
1574     bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1575     bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1576 
1577     bool needsCmpXchgNb(Type *MemType) const;
1578 
1579     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1580                                 MachineBasicBlock *DispatchBB, int FI) const;
1581 
1582     // Utility function to emit the low-level va_arg code for X86-64.
1583     MachineBasicBlock *
1584     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1585                                   MachineBasicBlock *MBB) const;
1586 
1587     /// Utility function to emit the xmm reg save portion of va_start.
1588     MachineBasicBlock *
1589     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1590                                              MachineBasicBlock *BB) const;
1591 
1592     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1593                                                  MachineInstr &MI2,
1594                                                  MachineBasicBlock *BB) const;
1595 
1596     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1597                                          MachineBasicBlock *BB) const;
1598 
1599     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1600                                            MachineBasicBlock *BB) const;
1601 
1602     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1603                                             MachineBasicBlock *BB) const;
1604 
1605     MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1606                                                MachineBasicBlock *BB) const;
1607 
1608     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1609                                           MachineBasicBlock *BB) const;
1610 
1611     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1612                                           MachineBasicBlock *BB) const;
1613 
1614     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1615                                                 MachineBasicBlock *BB) const;
1616 
1617     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1618                                         MachineBasicBlock *MBB) const;
1619 
1620     void emitSetJmpShadowStackFix(MachineInstr &MI,
1621                                   MachineBasicBlock *MBB) const;
1622 
1623     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1624                                          MachineBasicBlock *MBB) const;
1625 
1626     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1627                                                  MachineBasicBlock *MBB) const;
1628 
1629     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1630                                              MachineBasicBlock *MBB) const;
1631 
1632     /// Emit flags for the given setcc condition and operands. Also returns the
1633     /// corresponding X86 condition code constant in X86CC.
1634     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1635                               const SDLoc &dl, SelectionDAG &DAG,
1636                               SDValue &X86CC) const;
1637 
1638     /// Check if replacement of SQRT with RSQRT should be disabled.
1639     bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1640 
1641     /// Use rsqrt* to speed up sqrt calculations.
1642     SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1643                             int &RefinementSteps, bool &UseOneConstNR,
1644                             bool Reciprocal) const override;
1645 
1646     /// Use rcp* to speed up fdiv calculations.
1647     SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1648                              int &RefinementSteps) const override;
1649 
1650     /// Reassociate floating point divisions into multiply by reciprocal.
1651     unsigned combineRepeatedFPDivisors() const override;
1652 
1653     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1654                           SmallVectorImpl<SDNode *> &Created) const override;
1655   };
1656 
1657   namespace X86 {
1658     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1659                              const TargetLibraryInfo *libInfo);
1660   } // end namespace X86
1661 
1662   // X86 specific Gather/Scatter nodes.
1663   // The class has the same order of operands as MaskedGatherScatterSDNode for
1664   // convenience.
1665   class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1666   public:
1667     // This is a intended as a utility and should never be directly created.
1668     X86MaskedGatherScatterSDNode() = delete;
1669     ~X86MaskedGatherScatterSDNode() = delete;
1670 
getBasePtr()1671     const SDValue &getBasePtr() const { return getOperand(3); }
getIndex()1672     const SDValue &getIndex()   const { return getOperand(4); }
getMask()1673     const SDValue &getMask()    const { return getOperand(2); }
getScale()1674     const SDValue &getScale()   const { return getOperand(5); }
1675 
classof(const SDNode * N)1676     static bool classof(const SDNode *N) {
1677       return N->getOpcode() == X86ISD::MGATHER ||
1678              N->getOpcode() == X86ISD::MSCATTER;
1679     }
1680   };
1681 
1682   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1683   public:
getPassThru()1684     const SDValue &getPassThru() const { return getOperand(1); }
1685 
classof(const SDNode * N)1686     static bool classof(const SDNode *N) {
1687       return N->getOpcode() == X86ISD::MGATHER;
1688     }
1689   };
1690 
1691   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1692   public:
getValue()1693     const SDValue &getValue() const { return getOperand(1); }
1694 
classof(const SDNode * N)1695     static bool classof(const SDNode *N) {
1696       return N->getOpcode() == X86ISD::MSCATTER;
1697     }
1698   };
1699 
1700   /// Generate unpacklo/unpackhi shuffle mask.
1701   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1702                                bool Unary);
1703 
1704   /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1705   /// imposed by AVX and specific to the unary pattern. Example:
1706   /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1707   /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1708   void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1709 
1710 } // end namespace llvm
1711 
1712 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1713