• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains instruction defs that are common to all hw codegen
11// targets.
12//
13//===----------------------------------------------------------------------===//
14
15class AMDGPUInst <dag outs, dag ins, string asm = "",
16  list<dag> pattern = []> : Instruction {
17  field bit isRegisterLoad = 0;
18  field bit isRegisterStore = 0;
19
20  let Namespace = "AMDGPU";
21  let OutOperandList = outs;
22  let InOperandList = ins;
23  let AsmString = asm;
24  let Pattern = pattern;
25  let Itinerary = NullALU;
26
27  // SoftFail is a field the disassembler can use to provide a way for
28  // instructions to not match without killing the whole decode process. It is
29  // mainly used for ARM, but Tablegen expects this field to exist or it fails
30  // to build the decode table.
31  field bits<64> SoftFail = 0;
32
33  let DecoderNamespace = Namespace;
34
35  let TSFlags{63} = isRegisterLoad;
36  let TSFlags{62} = isRegisterStore;
37}
38
39class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
40  list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
41
42  field bits<32> Inst = 0xffffffff;
43}
44
45//===---------------------------------------------------------------------===//
46// Return instruction
47//===---------------------------------------------------------------------===//
48
49class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
50: Instruction {
51
52     let Namespace = "AMDGPU";
53     dag OutOperandList = outs;
54     dag InOperandList = ins;
55     let Pattern = pattern;
56     let AsmString = !strconcat(asmstr, "\n");
57     let isPseudo = 1;
58     let Itinerary = NullALU;
59     bit hasIEEEFlag = 0;
60     bit hasZeroOpFlag = 0;
61     let mayLoad = 0;
62     let mayStore = 0;
63     let hasSideEffects = 0;
64     let isCodeGenOnly = 1;
65}
66
67def TruePredicate : Predicate<"true">;
68
69// Exists to help track down where SubtargetPredicate isn't set rather
70// than letting tablegen crash with an unhelpful error.
71def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
72
73class PredicateControl {
74  Predicate SubtargetPredicate = InvalidPred;
75  list<Predicate> AssemblerPredicates = [];
76  Predicate AssemblerPredicate = TruePredicate;
77  list<Predicate> OtherPredicates = [];
78  list<Predicate> Predicates = !listconcat([SubtargetPredicate,
79                                            AssemblerPredicate],
80                                            AssemblerPredicates,
81                                            OtherPredicates);
82}
83class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
84      PredicateControl;
85
86def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">;
87def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">;
88def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">;
89def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">;
90def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">;
91def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">;
92def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
93def FMA : Predicate<"Subtarget->hasFMA()">;
94
95def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
96
97def u16ImmTarget : AsmOperandClass {
98  let Name = "U16Imm";
99  let RenderMethod = "addImmOperands";
100}
101
102def s16ImmTarget : AsmOperandClass {
103  let Name = "S16Imm";
104  let RenderMethod = "addImmOperands";
105}
106
107let OperandType = "OPERAND_IMMEDIATE" in {
108
109def u32imm : Operand<i32> {
110  let PrintMethod = "printU32ImmOperand";
111}
112
113def u16imm : Operand<i16> {
114  let PrintMethod = "printU16ImmOperand";
115  let ParserMatchClass = u16ImmTarget;
116}
117
118def s16imm : Operand<i16> {
119  let PrintMethod = "printU16ImmOperand";
120  let ParserMatchClass = s16ImmTarget;
121}
122
123def u8imm : Operand<i8> {
124  let PrintMethod = "printU8ImmOperand";
125}
126
127} // End OperandType = "OPERAND_IMMEDIATE"
128
129//===--------------------------------------------------------------------===//
130// Custom Operands
131//===--------------------------------------------------------------------===//
132def brtarget   : Operand<OtherVT>;
133
134//===----------------------------------------------------------------------===//
135// Misc. PatFrags
136//===----------------------------------------------------------------------===//
137
138class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
139  (ops node:$src0, node:$src1),
140  (op $src0, $src1),
141  [{ return N->hasOneUse(); }]
142>;
143
144class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
145  (ops node:$src0, node:$src1, node:$src2),
146  (op $src0, $src1, $src2),
147  [{ return N->hasOneUse(); }]
148>;
149
150let Properties = [SDNPCommutative, SDNPAssociative] in {
151def smax_oneuse : HasOneUseBinOp<smax>;
152def smin_oneuse : HasOneUseBinOp<smin>;
153def umax_oneuse : HasOneUseBinOp<umax>;
154def umin_oneuse : HasOneUseBinOp<umin>;
155def fminnum_oneuse : HasOneUseBinOp<fminnum>;
156def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
157def and_oneuse : HasOneUseBinOp<and>;
158def or_oneuse : HasOneUseBinOp<or>;
159def xor_oneuse : HasOneUseBinOp<xor>;
160} // Properties = [SDNPCommutative, SDNPAssociative]
161
162def add_oneuse : HasOneUseBinOp<add>;
163def sub_oneuse : HasOneUseBinOp<sub>;
164
165def srl_oneuse : HasOneUseBinOp<srl>;
166def shl_oneuse : HasOneUseBinOp<shl>;
167
168def select_oneuse : HasOneUseTernaryOp<select>;
169
170def srl_16 : PatFrag<
171  (ops node:$src0), (srl_oneuse node:$src0, (i32 16))
172>;
173
174
175def hi_i16_elt : PatFrag<
176  (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0))))
177>;
178
179
180def hi_f16_elt : PatLeaf<
181  (vt), [{
182  if (N->getOpcode() != ISD::BITCAST)
183    return false;
184  SDValue Tmp = N->getOperand(0);
185
186  if (Tmp.getOpcode() != ISD::SRL)
187    return false;
188    if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1))
189      return RHS->getZExtValue() == 16;
190    return false;
191}]>;
192
193//===----------------------------------------------------------------------===//
194// PatLeafs for floating-point comparisons
195//===----------------------------------------------------------------------===//
196
197def COND_OEQ : PatLeaf <
198  (cond),
199  [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
200>;
201
202def COND_ONE : PatLeaf <
203  (cond),
204  [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}]
205>;
206
207def COND_OGT : PatLeaf <
208  (cond),
209  [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
210>;
211
212def COND_OGE : PatLeaf <
213  (cond),
214  [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
215>;
216
217def COND_OLT : PatLeaf <
218  (cond),
219  [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}]
220>;
221
222def COND_OLE : PatLeaf <
223  (cond),
224  [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
225>;
226
227def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
228def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
229
230//===----------------------------------------------------------------------===//
231// PatLeafs for unsigned / unordered comparisons
232//===----------------------------------------------------------------------===//
233
234def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>;
235def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>;
236def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>;
237def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>;
238def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>;
239def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>;
240
241// XXX - For some reason R600 version is preferring to use unordered
242// for setne?
243def COND_UNE_NE : PatLeaf <
244  (cond),
245  [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
246>;
247
248//===----------------------------------------------------------------------===//
249// PatLeafs for signed comparisons
250//===----------------------------------------------------------------------===//
251
252def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>;
253def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>;
254def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>;
255def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>;
256
257//===----------------------------------------------------------------------===//
258// PatLeafs for integer equality
259//===----------------------------------------------------------------------===//
260
261def COND_EQ : PatLeaf <
262  (cond),
263  [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}]
264>;
265
266def COND_NE : PatLeaf <
267  (cond),
268  [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}]
269>;
270
271def COND_NULL : PatLeaf <
272  (cond),
273  [{(void)N; return false;}]
274>;
275
276//===----------------------------------------------------------------------===//
277// PatLeafs for Texture Constants
278//===----------------------------------------------------------------------===//
279
280def TEX_ARRAY : PatLeaf<
281  (imm),
282  [{uint32_t TType = (uint32_t)N->getZExtValue();
283    return TType == 9 || TType == 10 || TType == 16;
284  }]
285>;
286
287def TEX_RECT : PatLeaf<
288  (imm),
289  [{uint32_t TType = (uint32_t)N->getZExtValue();
290    return TType == 5;
291  }]
292>;
293
294def TEX_SHADOW : PatLeaf<
295  (imm),
296  [{uint32_t TType = (uint32_t)N->getZExtValue();
297    return (TType >= 6 && TType <= 8) || TType == 13;
298  }]
299>;
300
301def TEX_SHADOW_ARRAY : PatLeaf<
302  (imm),
303  [{uint32_t TType = (uint32_t)N->getZExtValue();
304    return TType == 11 || TType == 12 || TType == 17;
305  }]
306>;
307
308//===----------------------------------------------------------------------===//
309// Load/Store Pattern Fragments
310//===----------------------------------------------------------------------===//
311
312class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
313  return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
314}]>;
315
316class Aligned16Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
317  return cast<MemSDNode>(N)->getAlignment() >= 16;
318}]>;
319
320class LoadFrag <SDPatternOperator op> : PatFrag<(ops node:$ptr), (op node:$ptr)>;
321
322class StoreFrag<SDPatternOperator op> : PatFrag <
323  (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
324>;
325
326class StoreHi16<SDPatternOperator op> : PatFrag <
327  (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)
328>;
329
330class PrivateAddress : CodePatPred<[{
331  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
332}]>;
333
334class ConstantAddress : CodePatPred<[{
335  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
336}]>;
337
338class LocalAddress : CodePatPred<[{
339  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
340}]>;
341
342class GlobalAddress : CodePatPred<[{
343  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
344}]>;
345
346class GlobalLoadAddress : CodePatPred<[{
347  auto AS = cast<MemSDNode>(N)->getAddressSpace();
348  return AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.CONSTANT_ADDRESS;
349}]>;
350
351class FlatLoadAddress : CodePatPred<[{
352  const auto AS = cast<MemSDNode>(N)->getAddressSpace();
353  return AS == AMDGPUASI.FLAT_ADDRESS ||
354         AS == AMDGPUASI.GLOBAL_ADDRESS ||
355         AS == AMDGPUASI.CONSTANT_ADDRESS;
356}]>;
357
358class FlatStoreAddress : CodePatPred<[{
359  const auto AS = cast<MemSDNode>(N)->getAddressSpace();
360  return AS == AMDGPUASI.FLAT_ADDRESS ||
361         AS == AMDGPUASI.GLOBAL_ADDRESS;
362}]>;
363
364class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
365                                              (ld_node node:$ptr), [{
366  LoadSDNode *L = cast<LoadSDNode>(N);
367  return L->getExtensionType() == ISD::ZEXTLOAD ||
368         L->getExtensionType() == ISD::EXTLOAD;
369}]>;
370
371def az_extload : AZExtLoadBase <unindexedload>;
372
373def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
374  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
375}]>;
376
377def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
378  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
379}]>;
380
381def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
382  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
383}]>;
384
385class PrivateLoad <SDPatternOperator op> : LoadFrag <op>, PrivateAddress;
386class PrivateStore <SDPatternOperator op> : StoreFrag <op>, PrivateAddress;
387
388class LocalLoad <SDPatternOperator op> : LoadFrag <op>, LocalAddress;
389class LocalStore <SDPatternOperator op> : StoreFrag <op>, LocalAddress;
390
391class GlobalLoad <SDPatternOperator op> : LoadFrag<op>, GlobalLoadAddress;
392class GlobalStore <SDPatternOperator op> : StoreFrag<op>, GlobalAddress;
393
394class FlatLoad <SDPatternOperator op> : LoadFrag <op>, FlatLoadAddress;
395class FlatStore <SDPatternOperator op> : StoreFrag <op>, FlatStoreAddress;
396
397class ConstantLoad <SDPatternOperator op> : LoadFrag <op>, ConstantAddress;
398
399
400def load_private : PrivateLoad <load>;
401def az_extloadi8_private : PrivateLoad <az_extloadi8>;
402def sextloadi8_private : PrivateLoad <sextloadi8>;
403def az_extloadi16_private : PrivateLoad <az_extloadi16>;
404def sextloadi16_private : PrivateLoad <sextloadi16>;
405
406def store_private : PrivateStore <store>;
407def truncstorei8_private : PrivateStore<truncstorei8>;
408def truncstorei16_private : PrivateStore <truncstorei16>;
409def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
410def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
411
412
413def load_global : GlobalLoad <load>;
414def sextloadi8_global : GlobalLoad <sextloadi8>;
415def az_extloadi8_global : GlobalLoad <az_extloadi8>;
416def sextloadi16_global : GlobalLoad <sextloadi16>;
417def az_extloadi16_global : GlobalLoad <az_extloadi16>;
418def atomic_load_global : GlobalLoad<atomic_load>;
419
420def store_global : GlobalStore <store>;
421def truncstorei8_global : GlobalStore <truncstorei8>;
422def truncstorei16_global : GlobalStore <truncstorei16>;
423def store_atomic_global : GlobalStore<atomic_store>;
424def truncstorei8_hi16_global : StoreHi16 <truncstorei8>, GlobalAddress;
425def truncstorei16_hi16_global : StoreHi16 <truncstorei16>, GlobalAddress;
426
427def load_local : LocalLoad <load>;
428def az_extloadi8_local : LocalLoad <az_extloadi8>;
429def sextloadi8_local : LocalLoad <sextloadi8>;
430def az_extloadi16_local : LocalLoad <az_extloadi16>;
431def sextloadi16_local : LocalLoad <sextloadi16>;
432def atomic_load_32_local : LocalLoad<atomic_load_32>;
433def atomic_load_64_local : LocalLoad<atomic_load_64>;
434
435def store_local : LocalStore <store>;
436def truncstorei8_local : LocalStore <truncstorei8>;
437def truncstorei16_local : LocalStore <truncstorei16>;
438def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress;
439def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress;
440def atomic_store_local : LocalStore <atomic_store>;
441
442def load_align8_local : Aligned8Bytes <
443  (ops node:$ptr), (load_local node:$ptr)
444>;
445
446def load_align16_local : Aligned16Bytes <
447  (ops node:$ptr), (load_local node:$ptr)
448>;
449
450def store_align8_local : Aligned8Bytes <
451  (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
452>;
453
454def store_align16_local : Aligned16Bytes <
455  (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
456>;
457
458def load_flat          : FlatLoad <load>;
459def az_extloadi8_flat  : FlatLoad <az_extloadi8>;
460def sextloadi8_flat    : FlatLoad <sextloadi8>;
461def az_extloadi16_flat : FlatLoad <az_extloadi16>;
462def sextloadi16_flat   : FlatLoad <sextloadi16>;
463def atomic_load_flat   : FlatLoad<atomic_load>;
464
465def store_flat         : FlatStore <store>;
466def truncstorei8_flat  : FlatStore <truncstorei8>;
467def truncstorei16_flat : FlatStore <truncstorei16>;
468def atomic_store_flat  : FlatStore <atomic_store>;
469def truncstorei8_hi16_flat  : StoreHi16<truncstorei8>, FlatStoreAddress;
470def truncstorei16_hi16_flat : StoreHi16<truncstorei16>, FlatStoreAddress;
471
472
473def constant_load : ConstantLoad<load>;
474def sextloadi8_constant : ConstantLoad <sextloadi8>;
475def az_extloadi8_constant : ConstantLoad <az_extloadi8>;
476def sextloadi16_constant : ConstantLoad <sextloadi16>;
477def az_extloadi16_constant : ConstantLoad <az_extloadi16>;
478
479
480class local_binary_atomic_op<SDNode atomic_op> :
481  PatFrag<(ops node:$ptr, node:$value),
482    (atomic_op node:$ptr, node:$value), [{
483  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
484}]>;
485
486def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
487def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
488def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
489def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>;
490def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>;
491def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>;
492def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>;
493def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>;
494def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>;
495def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>;
496def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
497
498def mskor_global : PatFrag<(ops node:$val, node:$ptr),
499                            (AMDGPUstore_mskor node:$val, node:$ptr), [{
500  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
501}]>;
502
503class AtomicCmpSwapLocal <SDNode cmp_swap_node> : PatFrag<
504    (ops node:$ptr, node:$cmp, node:$swap),
505    (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
506      AtomicSDNode *AN = cast<AtomicSDNode>(N);
507      return AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
508}]>;
509
510def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>;
511
512multiclass global_binary_atomic_op<SDNode atomic_op> {
513  def "" : PatFrag<
514        (ops node:$ptr, node:$value),
515        (atomic_op node:$ptr, node:$value),
516        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
517
518  def _noret : PatFrag<
519        (ops node:$ptr, node:$value),
520        (atomic_op node:$ptr, node:$value),
521        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
522
523  def _ret : PatFrag<
524        (ops node:$ptr, node:$value),
525        (atomic_op node:$ptr, node:$value),
526        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
527}
528
529defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
530defm atomic_add_global : global_binary_atomic_op<atomic_load_add>;
531defm atomic_and_global : global_binary_atomic_op<atomic_load_and>;
532defm atomic_max_global : global_binary_atomic_op<atomic_load_max>;
533defm atomic_min_global : global_binary_atomic_op<atomic_load_min>;
534defm atomic_or_global : global_binary_atomic_op<atomic_load_or>;
535defm atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
536defm atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
537defm atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
538defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
539
540// Legacy.
541def AMDGPUatomic_cmp_swap_global : PatFrag<
542  (ops node:$ptr, node:$value),
543  (AMDGPUatomic_cmp_swap node:$ptr, node:$value)>, GlobalAddress;
544
545def atomic_cmp_swap_global : PatFrag<
546  (ops node:$ptr, node:$cmp, node:$value),
547  (atomic_cmp_swap node:$ptr, node:$cmp, node:$value)>, GlobalAddress;
548
549
550def atomic_cmp_swap_global_noret : PatFrag<
551  (ops node:$ptr, node:$cmp, node:$value),
552  (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
553  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
554
555def atomic_cmp_swap_global_ret : PatFrag<
556  (ops node:$ptr, node:$cmp, node:$value),
557  (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
558  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
559
560//===----------------------------------------------------------------------===//
561// Misc Pattern Fragments
562//===----------------------------------------------------------------------===//
563
564class Constants {
565int TWO_PI = 0x40c90fdb;
566int PI = 0x40490fdb;
567int TWO_PI_INV = 0x3e22f983;
568int FP_UINT_MAX_PLUS_1 = 0x4f800000;    // 1 << 32 in floating point encoding
569int FP16_ONE = 0x3C00;
570int FP16_NEG_ONE = 0xBC00;
571int V2FP16_ONE = 0x3C003C00;
572int FP32_ONE = 0x3f800000;
573int FP32_NEG_ONE = 0xbf800000;
574int FP64_ONE = 0x3ff0000000000000;
575int FP64_NEG_ONE = 0xbff0000000000000;
576}
577def CONST : Constants;
578
579def FP_ZERO : PatLeaf <
580  (fpimm),
581  [{return N->getValueAPF().isZero();}]
582>;
583
584def FP_ONE : PatLeaf <
585  (fpimm),
586  [{return N->isExactlyValue(1.0);}]
587>;
588
589def FP_HALF : PatLeaf <
590  (fpimm),
591  [{return N->isExactlyValue(0.5);}]
592>;
593
594/* Generic helper patterns for intrinsics */
595/* -------------------------------------- */
596
597class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
598  : AMDGPUPat <
599  (fpow f32:$src0, f32:$src1),
600  (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
601>;
602
603/* Other helper patterns */
604/* --------------------- */
605
606/* Extract element pattern */
607class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
608                       SubRegIndex sub_reg>
609  : AMDGPUPat<
610  (sub_type (extractelt vec_type:$src, sub_idx)),
611  (EXTRACT_SUBREG $src, sub_reg)
612> {
613  let SubtargetPredicate = TruePredicate;
614}
615
616/* Insert element pattern */
617class Insert_Element <ValueType elem_type, ValueType vec_type,
618                      int sub_idx, SubRegIndex sub_reg>
619  : AMDGPUPat <
620  (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
621  (INSERT_SUBREG $vec, $elem, sub_reg)
622> {
623  let SubtargetPredicate = TruePredicate;
624}
625
626// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
627// can handle COPY instructions.
628// bitconvert pattern
629class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat <
630  (dt (bitconvert (st rc:$src0))),
631  (dt rc:$src0)
632>;
633
634// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
635// can handle COPY instructions.
636class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
637  (vt (AMDGPUdwordaddr (vt rc:$addr))),
638  (vt rc:$addr)
639>;
640
641// BFI_INT patterns
642
643multiclass BFIPatterns <Instruction BFI_INT,
644                        Instruction LoadImm32,
645                        RegisterClass RC64> {
646  // Definition from ISA doc:
647  // (y & x) | (z & ~x)
648  def : AMDGPUPat <
649    (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
650    (BFI_INT $x, $y, $z)
651  >;
652
653  // 64-bit version
654  def : AMDGPUPat <
655    (or (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
656    (REG_SEQUENCE RC64,
657      (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
658               (i32 (EXTRACT_SUBREG $y, sub0)),
659               (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
660      (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
661               (i32 (EXTRACT_SUBREG $y, sub1)),
662               (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
663  >;
664
665  // SHA-256 Ch function
666  // z ^ (x & (y ^ z))
667  def : AMDGPUPat <
668    (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
669    (BFI_INT $x, $y, $z)
670  >;
671
672  // 64-bit version
673  def : AMDGPUPat <
674    (xor i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
675    (REG_SEQUENCE RC64,
676      (BFI_INT (i32 (EXTRACT_SUBREG $x, sub0)),
677               (i32 (EXTRACT_SUBREG $y, sub0)),
678               (i32 (EXTRACT_SUBREG $z, sub0))), sub0,
679      (BFI_INT (i32 (EXTRACT_SUBREG $x, sub1)),
680               (i32 (EXTRACT_SUBREG $y, sub1)),
681               (i32 (EXTRACT_SUBREG $z, sub1))), sub1)
682  >;
683
684  def : AMDGPUPat <
685    (fcopysign f32:$src0, f32:$src1),
686    (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1)
687  >;
688
689  def : AMDGPUPat <
690    (f32 (fcopysign f32:$src0, f64:$src1)),
691    (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0,
692             (i32 (EXTRACT_SUBREG $src1, sub1)))
693  >;
694
695  def : AMDGPUPat <
696    (f64 (fcopysign f64:$src0, f64:$src1)),
697    (REG_SEQUENCE RC64,
698      (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
699      (BFI_INT (LoadImm32 (i32 0x7fffffff)),
700               (i32 (EXTRACT_SUBREG $src0, sub1)),
701               (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
702  >;
703
704  def : AMDGPUPat <
705    (f64 (fcopysign f64:$src0, f32:$src1)),
706    (REG_SEQUENCE RC64,
707      (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
708      (BFI_INT (LoadImm32 (i32 0x7fffffff)),
709               (i32 (EXTRACT_SUBREG $src0, sub1)),
710               $src1), sub1)
711  >;
712}
713
714// SHA-256 Ma patterns
715
716// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
717multiclass SHA256MaPattern <Instruction BFI_INT, Instruction XOR, RegisterClass RC64> {
718  def : AMDGPUPat <
719    (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
720    (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
721  >;
722
723  def : AMDGPUPat <
724    (or (and i64:$x, i64:$z), (and i64:$y, (or i64:$x, i64:$z))),
725    (REG_SEQUENCE RC64,
726      (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub0)),
727                    (i32 (EXTRACT_SUBREG $y, sub0))),
728               (i32 (EXTRACT_SUBREG $z, sub0)),
729               (i32 (EXTRACT_SUBREG $y, sub0))), sub0,
730      (BFI_INT (XOR (i32 (EXTRACT_SUBREG $x, sub1)),
731                    (i32 (EXTRACT_SUBREG $y, sub1))),
732               (i32 (EXTRACT_SUBREG $z, sub1)),
733               (i32 (EXTRACT_SUBREG $y, sub1))), sub1)
734  >;
735}
736
737// Bitfield extract patterns
738
739def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
740  return isMask_32(N->getZExtValue());
741}]>;
742
743def IMMPopCount : SDNodeXForm<imm, [{
744  return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
745                                   MVT::i32);
746}]>;
747
748multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
749  def : AMDGPUPat <
750    (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
751    (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
752  >;
753
754  // x & ((1 << y) - 1)
755  def : AMDGPUPat <
756    (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
757    (UBFE $src, (MOV (i32 0)), $width)
758  >;
759
760  // x & ~(-1 << y)
761  def : AMDGPUPat <
762    (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
763    (UBFE $src, (MOV (i32 0)), $width)
764  >;
765
766  // x & (-1 >> (bitwidth - y))
767  def : AMDGPUPat <
768    (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
769    (UBFE $src, (MOV (i32 0)), $width)
770  >;
771
772  // x << (bitwidth - y) >> (bitwidth - y)
773  def : AMDGPUPat <
774    (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
775    (UBFE $src, (MOV (i32 0)), $width)
776  >;
777
778  def : AMDGPUPat <
779    (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
780    (SBFE $src, (MOV (i32 0)), $width)
781  >;
782}
783
784// rotr pattern
785class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
786  (rotr i32:$src0, i32:$src1),
787  (BIT_ALIGN $src0, $src0, $src1)
788>;
789
790// This matches 16 permutations of
791// max(min(x, y), min(max(x, y), z))
792class IntMed3Pat<Instruction med3Inst,
793                 SDPatternOperator max,
794                 SDPatternOperator max_oneuse,
795                 SDPatternOperator min_oneuse,
796                 ValueType vt = i32> : AMDGPUPat<
797  (max (min_oneuse vt:$src0, vt:$src1),
798       (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
799  (med3Inst $src0, $src1, $src2)
800>;
801
802// Special conversion patterns
803
804def cvt_rpi_i32_f32 : PatFrag <
805  (ops node:$src),
806  (fp_to_sint (ffloor (fadd $src, FP_HALF))),
807  [{ (void) N; return TM.Options.NoNaNsFPMath; }]
808>;
809
810def cvt_flr_i32_f32 : PatFrag <
811  (ops node:$src),
812  (fp_to_sint (ffloor $src)),
813  [{ (void)N; return TM.Options.NoNaNsFPMath; }]
814>;
815
816class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
817  (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
818  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
819                (Inst $src0, $src1, $src2))
820>;
821
822class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
823  (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
824  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
825                (Inst $src0, $src1, $src2))
826>;
827
828class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
829  (fdiv FP_ONE, vt:$src),
830  (RcpInst $src)
831>;
832
833class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat <
834  (AMDGPUrcp (fsqrt vt:$src)),
835  (RsqInst $src)
836>;
837