• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>;
10def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>;
11
12def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
13def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>;
14
15//===----------------------------------------------------------------------===//
16// FLAT classes
17//===----------------------------------------------------------------------===//
18
19class FLAT_Pseudo<string opName, dag outs, dag ins,
20                  string asmOps, list<dag> pattern=[]> :
21  InstSI<outs, ins, "", pattern>,
22  SIMCInstr<opName, SIEncodingFamily.NONE> {
23
24  let isPseudo = 1;
25  let isCodeGenOnly = 1;
26
27  let FLAT = 1;
28
29  let UseNamedOperandTable = 1;
30  let hasSideEffects = 0;
31  let SchedRW = [WriteVMEM];
32
33  string Mnemonic = opName;
34  string AsmOperands = asmOps;
35
36  bits<1> is_flat_global = 0;
37  bits<1> is_flat_scratch = 0;
38
39  bits<1> has_vdst = 1;
40
41  // We need to distinguish having saddr and enabling saddr because
42  // saddr is only valid for scratch and global instructions. Pre-gfx9
43  // these bits were reserved, so we also don't necessarily want to
44  // set these bits to the disabled value for the original flat
45  // segment instructions.
46  bits<1> has_saddr = 0;
47  bits<1> enabled_saddr = 0;
48  bits<7> saddr_value = 0;
49  bits<1> has_vaddr = 1;
50
51  bits<1> has_data = 1;
52  bits<1> has_glc  = 1;
53  bits<1> glcValue = 0;
54  bits<1> has_dlc  = 1;
55  bits<1> dlcValue = 0;
56
57  let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
58    !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
59
60  // TODO: M0 if it could possibly access LDS (before gfx9? only)?
61  let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
62
63  // Internally, FLAT instruction are executed as both an LDS and a
64  // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
65  // and are not considered done until both have been decremented.
66  let VM_CNT = 1;
67  let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1);
68
69  let IsNonFlatSeg = !if(!or(is_flat_global, is_flat_scratch), 1, 0);
70}
71
72class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
73  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
74  Enc64 {
75
76  let isPseudo = 0;
77  let isCodeGenOnly = 0;
78
79  // copy relevant pseudo op flags
80  let SubtargetPredicate = ps.SubtargetPredicate;
81  let AsmMatchConverter  = ps.AsmMatchConverter;
82  let TSFlags = ps.TSFlags;
83  let UseNamedOperandTable = ps.UseNamedOperandTable;
84
85  // encoding fields
86  bits<8> vaddr;
87  bits<8> vdata;
88  bits<7> saddr;
89  bits<8> vdst;
90
91  bits<1> slc;
92  bits<1> glc;
93  bits<1> dlc;
94
95  // Only valid on gfx9
96  bits<1> lds = 0; // XXX - What does this actually do?
97
98  // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
99  bits<2> seg = !if(ps.is_flat_global, 0b10,
100                  !if(ps.is_flat_scratch, 0b01, 0));
101
102  // Signed offset. Highest bit ignored for flat and treated as 12-bit
103  // unsigned for flat acceses.
104  bits<13> offset;
105  bits<1> nv = 0; // XXX - What does this actually do?
106
107  // We don't use tfe right now, and it was removed in gfx9.
108  bits<1> tfe = 0;
109
110  // Only valid on GFX9+
111  let Inst{12-0} = offset;
112  let Inst{13} = lds;
113  let Inst{15-14} = seg;
114
115  let Inst{16}    = !if(ps.has_glc, glc, ps.glcValue);
116  let Inst{17}    = slc;
117  let Inst{24-18} = op;
118  let Inst{31-26} = 0x37; // Encoding.
119  let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
120  let Inst{47-40} = !if(ps.has_data, vdata, ?);
121  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
122
123  // 54-48 is reserved.
124  let Inst{55}    = nv; // nv on GFX9+, TFE before.
125  let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
126}
127
128class GlobalSaddrTable <bit is_saddr, string Name = ""> {
129  bit IsSaddr = is_saddr;
130  string SaddrOp = Name;
131}
132
133// TODO: Is exec allowed for saddr? The disabled value 0x7f is the
134// same encoding value as exec_hi, so it isn't possible to use that if
135// saddr is 32-bit (which isn't handled here yet).
136class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
137  bit HasTiedOutput = 0,
138  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
139  opName,
140  (outs regClass:$vdst),
141  !con(
142    !con(
143      !con((ins VReg_64:$vaddr),
144        !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
145          (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
146          !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
147  " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
148  let has_data = 0;
149  let mayLoad = 1;
150  let has_saddr = HasSaddr;
151  let enabled_saddr = EnableSaddr;
152  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
153  let maybeAtomic = 1;
154
155  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
156  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
157}
158
159class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
160  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
161  opName,
162  (outs),
163  !con(
164    !con((ins VReg_64:$vaddr, vdataClass:$vdata),
165      !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
166        (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
167  " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
168  let mayLoad  = 0;
169  let mayStore = 1;
170  let has_vdst = 0;
171  let has_saddr = HasSaddr;
172  let enabled_saddr = EnableSaddr;
173  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
174  let maybeAtomic = 1;
175}
176
177multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
178  let is_flat_global = 1 in {
179    def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
180      GlobalSaddrTable<0, opName>;
181    def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
182      GlobalSaddrTable<1, opName>;
183  }
184}
185
186multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
187  let is_flat_global = 1 in {
188    def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
189      GlobalSaddrTable<0, opName>;
190    def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
191      GlobalSaddrTable<1, opName>;
192  }
193}
194
195class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
196  bit EnableSaddr = 0>: FLAT_Pseudo<
197  opName,
198  (outs regClass:$vdst),
199  !if(EnableSaddr,
200      (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
201      (ins VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
202  " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> {
203  let has_data = 0;
204  let mayLoad = 1;
205  let has_saddr = 1;
206  let enabled_saddr = EnableSaddr;
207  let has_vaddr = !if(EnableSaddr, 0, 1);
208  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
209  let maybeAtomic = 1;
210}
211
212class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0> : FLAT_Pseudo<
213  opName,
214  (outs),
215  !if(EnableSaddr,
216    (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
217    (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
218  " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
219  let mayLoad  = 0;
220  let mayStore = 1;
221  let has_vdst = 0;
222  let has_saddr = 1;
223  let enabled_saddr = EnableSaddr;
224  let has_vaddr = !if(EnableSaddr, 0, 1);
225  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
226  let maybeAtomic = 1;
227}
228
229multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> {
230  let is_flat_scratch = 1 in {
231    def "" : FLAT_Scratch_Load_Pseudo<opName, regClass>;
232    def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, 1>;
233  }
234}
235
236multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
237  let is_flat_scratch = 1 in {
238    def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>;
239    def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>;
240  }
241}
242
243class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
244                               string asm, list<dag> pattern = []> :
245  FLAT_Pseudo<opName, outs, ins, asm, pattern> {
246    let mayLoad = 1;
247    let mayStore = 1;
248    let has_glc  = 0;
249    let glcValue = 0;
250    let has_dlc  = 0;
251    let dlcValue = 0;
252    let has_vdst = 0;
253    let maybeAtomic = 1;
254}
255
256class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
257                            string asm, list<dag> pattern = []>
258  : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> {
259  let hasPostISelHook = 1;
260  let has_vdst = 1;
261  let glcValue = 1;
262  let dlcValue = 0;
263  let PseudoInstr = NAME # "_RTN";
264}
265
266multiclass FLAT_Atomic_Pseudo<
267  string opName,
268  RegisterClass vdst_rc,
269  ValueType vt,
270  SDPatternOperator atomic = null_frag,
271  ValueType data_vt = vt,
272  RegisterClass data_rc = vdst_rc,
273  bit isFP = isFloatType<data_vt>.ret> {
274  def "" : FLAT_AtomicNoRet_Pseudo <opName,
275    (outs),
276    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
277    " $vaddr, $vdata$offset$slc">,
278    GlobalSaddrTable<0, opName>,
279    AtomicNoRet <opName, 0> {
280    let PseudoInstr = NAME;
281    let FPAtomic = isFP;
282  }
283
284  def _RTN : FLAT_AtomicRet_Pseudo <opName,
285    (outs vdst_rc:$vdst),
286    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
287    " $vdst, $vaddr, $vdata$offset glc$slc",
288    [(set vt:$vdst,
289      (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
290       GlobalSaddrTable<0, opName#"_rtn">,
291       AtomicNoRet <opName, 1>{
292    let FPAtomic = isFP;
293  }
294}
295
296multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
297  string opName,
298  RegisterClass vdst_rc,
299  ValueType vt,
300  SDPatternOperator atomic = null_frag,
301  ValueType data_vt = vt,
302  RegisterClass data_rc = vdst_rc,
303  bit isFP = isFloatType<data_vt>.ret> {
304
305  def "" : FLAT_AtomicNoRet_Pseudo <opName,
306    (outs),
307    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
308    " $vaddr, $vdata, off$offset$slc">,
309    GlobalSaddrTable<0, opName>,
310    AtomicNoRet <opName, 0> {
311    let has_saddr = 1;
312    let PseudoInstr = NAME;
313    let FPAtomic = isFP;
314  }
315
316  def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
317    (outs),
318    (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
319    " $vaddr, $vdata, $saddr$offset$slc">,
320    GlobalSaddrTable<1, opName>,
321    AtomicNoRet <opName#"_saddr", 0> {
322    let has_saddr = 1;
323    let enabled_saddr = 1;
324    let PseudoInstr = NAME#"_SADDR";
325    let FPAtomic = isFP;
326  }
327}
328
329multiclass FLAT_Global_Atomic_Pseudo_RTN<
330  string opName,
331  RegisterClass vdst_rc,
332  ValueType vt,
333  SDPatternOperator atomic = null_frag,
334  ValueType data_vt = vt,
335  RegisterClass data_rc = vdst_rc,
336  bit isFP = isFloatType<data_vt>.ret> {
337
338  def _RTN : FLAT_AtomicRet_Pseudo <opName,
339    (outs vdst_rc:$vdst),
340      (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
341    " $vdst, $vaddr, $vdata, off$offset glc$slc",
342    [(set vt:$vdst,
343      (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
344      GlobalSaddrTable<0, opName#"_rtn">,
345      AtomicNoRet <opName, 1> {
346    let has_saddr = 1;
347    let FPAtomic = isFP;
348  }
349
350  def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
351    (outs vdst_rc:$vdst),
352      (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
353    " $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
354    GlobalSaddrTable<1, opName#"_rtn">,
355    AtomicNoRet <opName#"_saddr", 1> {
356     let has_saddr = 1;
357     let enabled_saddr = 1;
358     let PseudoInstr = NAME#"_SADDR_RTN";
359     let FPAtomic = isFP;
360  }
361}
362
363multiclass FLAT_Global_Atomic_Pseudo<
364  string opName,
365  RegisterClass vdst_rc,
366  ValueType vt,
367  SDPatternOperator atomic_rtn = null_frag,
368  SDPatternOperator atomic_no_rtn = null_frag,
369  ValueType data_vt = vt,
370  RegisterClass data_rc = vdst_rc> :
371    FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>,
372    FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
373
374
375//===----------------------------------------------------------------------===//
376// Flat Instructions
377//===----------------------------------------------------------------------===//
378
379def FLAT_LOAD_UBYTE    : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
380def FLAT_LOAD_SBYTE    : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
381def FLAT_LOAD_USHORT   : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
382def FLAT_LOAD_SSHORT   : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
383def FLAT_LOAD_DWORD    : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
384def FLAT_LOAD_DWORDX2  : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
385def FLAT_LOAD_DWORDX4  : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
386def FLAT_LOAD_DWORDX3  : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
387
388def FLAT_STORE_BYTE    : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
389def FLAT_STORE_SHORT   : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
390def FLAT_STORE_DWORD   : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
391def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
392def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
393def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
394
395let SubtargetPredicate = HasD16LoadStore in {
396def FLAT_LOAD_UBYTE_D16     : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>;
397def FLAT_LOAD_UBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>;
398def FLAT_LOAD_SBYTE_D16     : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>;
399def FLAT_LOAD_SBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>;
400def FLAT_LOAD_SHORT_D16     : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>;
401def FLAT_LOAD_SHORT_D16_HI  : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>;
402
403def FLAT_STORE_BYTE_D16_HI  : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>;
404def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
405}
406
407defm FLAT_ATOMIC_CMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
408                                VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32,
409                                v2i32, VReg_64>;
410
411defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
412                                VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64,
413                                v2i64, VReg_128>;
414
415defm FLAT_ATOMIC_SWAP       : FLAT_Atomic_Pseudo <"flat_atomic_swap",
416                                VGPR_32, i32, atomic_swap_flat_32>;
417
418defm FLAT_ATOMIC_SWAP_X2    : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
419                                VReg_64, i64, atomic_swap_flat_64>;
420
421defm FLAT_ATOMIC_ADD        : FLAT_Atomic_Pseudo <"flat_atomic_add",
422                                VGPR_32, i32, atomic_load_add_flat_32>;
423
424defm FLAT_ATOMIC_SUB        : FLAT_Atomic_Pseudo <"flat_atomic_sub",
425                                VGPR_32, i32, atomic_load_sub_flat_32>;
426
427defm FLAT_ATOMIC_SMIN       : FLAT_Atomic_Pseudo <"flat_atomic_smin",
428                                VGPR_32, i32, atomic_load_min_flat_32>;
429
430defm FLAT_ATOMIC_UMIN       : FLAT_Atomic_Pseudo <"flat_atomic_umin",
431                                VGPR_32, i32, atomic_load_umin_flat_32>;
432
433defm FLAT_ATOMIC_SMAX       : FLAT_Atomic_Pseudo <"flat_atomic_smax",
434                                VGPR_32, i32, atomic_load_max_flat_32>;
435
436defm FLAT_ATOMIC_UMAX       : FLAT_Atomic_Pseudo <"flat_atomic_umax",
437                                VGPR_32, i32, atomic_load_umax_flat_32>;
438
439defm FLAT_ATOMIC_AND        : FLAT_Atomic_Pseudo <"flat_atomic_and",
440                                VGPR_32, i32, atomic_load_and_flat_32>;
441
442defm FLAT_ATOMIC_OR         : FLAT_Atomic_Pseudo <"flat_atomic_or",
443                                VGPR_32, i32, atomic_load_or_flat_32>;
444
445defm FLAT_ATOMIC_XOR        : FLAT_Atomic_Pseudo <"flat_atomic_xor",
446                                VGPR_32, i32, atomic_load_xor_flat_32>;
447
448defm FLAT_ATOMIC_INC        : FLAT_Atomic_Pseudo <"flat_atomic_inc",
449                                VGPR_32, i32, atomic_inc_flat_32>;
450
451defm FLAT_ATOMIC_DEC        : FLAT_Atomic_Pseudo <"flat_atomic_dec",
452                                VGPR_32, i32, atomic_dec_flat_32>;
453
454defm FLAT_ATOMIC_ADD_X2     : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
455                                VReg_64, i64, atomic_load_add_flat_64>;
456
457defm FLAT_ATOMIC_SUB_X2     : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
458                                VReg_64, i64, atomic_load_sub_flat_64>;
459
460defm FLAT_ATOMIC_SMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
461                                VReg_64, i64, atomic_load_min_flat_64>;
462
463defm FLAT_ATOMIC_UMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
464                                VReg_64, i64, atomic_load_umin_flat_64>;
465
466defm FLAT_ATOMIC_SMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
467                                VReg_64, i64, atomic_load_max_flat_64>;
468
469defm FLAT_ATOMIC_UMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
470                                VReg_64, i64, atomic_load_umax_flat_64>;
471
472defm FLAT_ATOMIC_AND_X2     : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
473                                VReg_64, i64, atomic_load_and_flat_64>;
474
475defm FLAT_ATOMIC_OR_X2      : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
476                                VReg_64, i64, atomic_load_or_flat_64>;
477
478defm FLAT_ATOMIC_XOR_X2     : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
479                                VReg_64, i64, atomic_load_xor_flat_64>;
480
481defm FLAT_ATOMIC_INC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
482                                VReg_64, i64, atomic_inc_flat_64>;
483
484defm FLAT_ATOMIC_DEC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
485                                VReg_64, i64, atomic_dec_flat_64>;
486
487// GFX7-, GFX10-only flat instructions.
488let SubtargetPredicate = isGFX7GFX10 in {
489
490defm FLAT_ATOMIC_FCMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
491                                VGPR_32, f32, null_frag, v2f32, VReg_64>;
492
493defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
494                                VReg_64, f64, null_frag, v2f64, VReg_128>;
495
496defm FLAT_ATOMIC_FMIN        : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
497                                VGPR_32, f32>;
498
499defm FLAT_ATOMIC_FMAX        : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
500                                VGPR_32, f32>;
501
502defm FLAT_ATOMIC_FMIN_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
503                                VReg_64, f64>;
504
505defm FLAT_ATOMIC_FMAX_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
506                                VReg_64, f64>;
507
508} // End SubtargetPredicate = isGFX7GFX10
509
510let SubtargetPredicate = HasFlatGlobalInsts in {
511defm GLOBAL_LOAD_UBYTE    : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
512defm GLOBAL_LOAD_SBYTE    : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
513defm GLOBAL_LOAD_USHORT   : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
514defm GLOBAL_LOAD_SSHORT   : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
515defm GLOBAL_LOAD_DWORD    : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
516defm GLOBAL_LOAD_DWORDX2  : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
517defm GLOBAL_LOAD_DWORDX3  : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
518defm GLOBAL_LOAD_DWORDX4  : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
519
520defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>;
521defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
522defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>;
523defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
524defm GLOBAL_LOAD_SHORT_D16    : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
525defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
526
527defm GLOBAL_STORE_BYTE    : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
528defm GLOBAL_STORE_SHORT   : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
529defm GLOBAL_STORE_DWORD   : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
530defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
531defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
532defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
533
534defm GLOBAL_STORE_BYTE_D16_HI  : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
535defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
536
537let is_flat_global = 1 in {
538defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
539                               VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, null_frag,
540                               v2i32, VReg_64>;
541
542defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
543                                  VReg_64, i64, AMDGPUatomic_cmp_swap_global_64,
544                                  null_frag,
545                                  v2i64, VReg_128>;
546
547defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
548                             VGPR_32, i32, atomic_swap_global_32>;
549
550defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
551                                VReg_64, i64, atomic_swap_global_64>;
552
553defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
554                           VGPR_32, i32, atomic_load_add_global_32>;
555
556defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
557                           VGPR_32, i32, atomic_load_sub_global_32>;
558
559defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
560                            VGPR_32, i32, atomic_load_min_global_32>;
561
562defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
563                            VGPR_32, i32, atomic_load_umin_global_32>;
564
565defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
566                            VGPR_32, i32, atomic_load_max_global_32>;
567
568defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
569                            VGPR_32, i32, atomic_load_umax_global_32>;
570
571defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
572                           VGPR_32, i32, atomic_load_and_global_32>;
573
574defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
575                          VGPR_32, i32, atomic_load_or_global_32>;
576
577defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
578                           VGPR_32, i32, atomic_load_xor_global_32>;
579
580defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
581                           VGPR_32, i32, atomic_inc_global_32>;
582
583defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
584                           VGPR_32, i32, atomic_dec_global_32>;
585
586defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
587                              VReg_64, i64, atomic_load_add_global_64>;
588
589defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
590                              VReg_64, i64, atomic_load_sub_global_64>;
591
592defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
593                               VReg_64, i64, atomic_load_min_global_64>;
594
595defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
596                               VReg_64, i64, atomic_load_umin_global_64>;
597
598defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
599                               VReg_64, i64, atomic_load_max_global_64>;
600
601defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
602                               VReg_64, i64, atomic_load_umax_global_64>;
603
604defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
605                              VReg_64, i64, atomic_load_and_global_64>;
606
607defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
608                             VReg_64, i64, atomic_load_or_global_64>;
609
610defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
611                              VReg_64, i64, atomic_load_xor_global_64>;
612
613defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
614                              VReg_64, i64, atomic_inc_global_64>;
615
616defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
617                              VReg_64, i64, atomic_dec_global_64>;
618} // End is_flat_global = 1
619
620} // End SubtargetPredicate = HasFlatGlobalInsts
621
622
623let SubtargetPredicate = HasFlatScratchInsts in {
624defm SCRATCH_LOAD_UBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
625defm SCRATCH_LOAD_SBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
626defm SCRATCH_LOAD_USHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
627defm SCRATCH_LOAD_SSHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>;
628defm SCRATCH_LOAD_DWORD    : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>;
629defm SCRATCH_LOAD_DWORDX2  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>;
630defm SCRATCH_LOAD_DWORDX3  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
631defm SCRATCH_LOAD_DWORDX4  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
632
633defm SCRATCH_LOAD_UBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32>;
634defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32>;
635defm SCRATCH_LOAD_SBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32>;
636defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32>;
637defm SCRATCH_LOAD_SHORT_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32>;
638defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32>;
639
640defm SCRATCH_STORE_BYTE    : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
641defm SCRATCH_STORE_SHORT   : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
642defm SCRATCH_STORE_DWORD   : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
643defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
644defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
645defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
646
647defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
648defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
649
650} // End SubtargetPredicate = HasFlatScratchInsts
651
652let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
653  defm GLOBAL_ATOMIC_FCMPSWAP :
654    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>;
655  defm GLOBAL_ATOMIC_FMIN :
656    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
657  defm GLOBAL_ATOMIC_FMAX :
658    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
659  defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
660    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>;
661  defm GLOBAL_ATOMIC_FMIN_X2 :
662    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
663  defm GLOBAL_ATOMIC_FMAX_X2 :
664    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
665} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
666
667let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in {
668
669defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
670  "global_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret
671>;
672defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
673  "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret
674>;
675
676} // End SubtargetPredicate = HasAtomicFaddInsts
677
678//===----------------------------------------------------------------------===//
679// Flat Patterns
680//===----------------------------------------------------------------------===//
681
682// Patterns for global loads with no offset.
683class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
684  (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
685  (inst $vaddr, $offset, 0, 0, $slc)
686>;
687
688class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
689  (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
690  (inst $vaddr, $offset, 0, 0, $slc, $in)
691>;
692
693class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
694  (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
695  (inst $vaddr, $offset, 0, 0, $slc, $in)
696>;
697
698class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
699  (vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
700  (inst $vaddr, $offset, 0, 0, $slc)
701>;
702
703class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
704  (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
705  (inst $vaddr, $offset, 0, 0, $slc)
706>;
707
708class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
709  (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
710  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
711>;
712
713class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
714  (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
715  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
716>;
717
718class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
719  // atomic store follows atomic binop convention so the address comes
720  // first.
721  (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
722  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
723>;
724
725class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
726  // atomic store follows atomic binop convention so the address comes
727  // first.
728  (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
729  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
730>;
731
732class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
733                     ValueType data_vt = vt> : GCNPat <
734  (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
735  (inst $vaddr, $data, $offset, $slc)
736>;
737
738class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
739  (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
740  (inst $vaddr, $data, $offset, $slc)
741>;
742
743class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
744                     ValueType data_vt = vt> : GCNPat <
745  (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
746  (inst $vaddr, $data, $offset, $slc)
747>;
748
749let OtherPredicates = [HasFlatAddressSpace] in {
750
751def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
752def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
753def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
754def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
755def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
756def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
757def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
758def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
759def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
760def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
761def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
762
763def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
764def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
765
766def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
767def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
768
769foreach vt = Reg32Types.types in {
770def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
771def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
772}
773
774foreach vt = VReg_64.RegTypes in {
775def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt, VReg_64>;
776def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
777}
778
779def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>;
780
781foreach vt = VReg_128.RegTypes in {
782def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
783def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt, VReg_128>;
784}
785
786def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
787def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>;
788
789def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
790def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
791def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global_32, i32>;
792def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>;
793def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>;
794def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>;
795def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>;
796def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
797def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
798def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
799def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
800def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
801def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
802
803def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
804def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
805def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>;
806def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>;
807def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>;
808def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>;
809def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>;
810def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
811def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
812def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
813def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
814def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
815def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
816
817def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
818def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
819
820let OtherPredicates = [D16PreservesUnusedBits] in {
821def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
822def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
823
824def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
825def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
826def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
827def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
828def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
829def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
830
831def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
832def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
833def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
834def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
835def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
836def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
837}
838
839} // End OtherPredicates = [HasFlatAddressSpace]
840
841let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
842
843def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
844def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
845def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
846def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
847def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
848def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
849def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
850def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
851def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
852def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>;
853
854foreach vt = Reg32Types.types in {
855def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, vt>;
856def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, vt, VGPR_32>;
857}
858
859foreach vt = VReg_64.RegTypes in {
860def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, vt>;
861def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt, VReg_64>;
862}
863
864def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
865
866foreach vt = VReg_128.RegTypes in {
867def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, vt>;
868def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, vt, VReg_128>;
869}
870
871def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
872def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
873
874def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>;
875def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>;
876def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>;
877def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>;
878def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>;
879
880let OtherPredicates = [D16PreservesUnusedBits] in {
881def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
882def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
883
884def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
885def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
886def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
887def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>;
888def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>;
889def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>;
890
891def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>;
892def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>;
893def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>;
894def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>;
895def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
896def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
897}
898
899def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>;
900def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64, VReg_64>;
901
902def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
903def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
904def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global_32, i32>;
905def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>;
906def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>;
907def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>;
908def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>;
909def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
910def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
911def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
912def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
913def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
914def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
915
916def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
917def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
918def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>;
919def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>;
920def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>;
921def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>;
922def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>;
923def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
924def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
925def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
926def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
927def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
928def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
929
930def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32,    atomic_fadd_global_noret, f32>;
931def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global_noret, v2f16>;
932
933} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
934
935
936//===----------------------------------------------------------------------===//
937// Target
938//===----------------------------------------------------------------------===//
939
940//===----------------------------------------------------------------------===//
941// CI
942//===----------------------------------------------------------------------===//
943
944class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
945  FLAT_Real <op, ps>,
946  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
947  let AssemblerPredicate = isGFX7Only;
948  let DecoderNamespace="GFX7";
949}
950
951def FLAT_LOAD_UBYTE_ci         : FLAT_Real_ci <0x8,  FLAT_LOAD_UBYTE>;
952def FLAT_LOAD_SBYTE_ci         : FLAT_Real_ci <0x9,  FLAT_LOAD_SBYTE>;
953def FLAT_LOAD_USHORT_ci        : FLAT_Real_ci <0xa,  FLAT_LOAD_USHORT>;
954def FLAT_LOAD_SSHORT_ci        : FLAT_Real_ci <0xb,  FLAT_LOAD_SSHORT>;
955def FLAT_LOAD_DWORD_ci         : FLAT_Real_ci <0xc,  FLAT_LOAD_DWORD>;
956def FLAT_LOAD_DWORDX2_ci       : FLAT_Real_ci <0xd,  FLAT_LOAD_DWORDX2>;
957def FLAT_LOAD_DWORDX4_ci       : FLAT_Real_ci <0xe,  FLAT_LOAD_DWORDX4>;
958def FLAT_LOAD_DWORDX3_ci       : FLAT_Real_ci <0xf,  FLAT_LOAD_DWORDX3>;
959
960def FLAT_STORE_BYTE_ci         : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
961def FLAT_STORE_SHORT_ci        : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
962def FLAT_STORE_DWORD_ci        : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
963def FLAT_STORE_DWORDX2_ci      : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
964def FLAT_STORE_DWORDX4_ci      : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
965def FLAT_STORE_DWORDX3_ci      : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
966
967multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
968  def _ci     : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
969  def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
970}
971
972defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
973defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
974defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
975defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
976defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
977defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
978defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
979defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
980defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
981defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
982defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
983defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
984defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
985defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
986defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
987defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
988defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
989defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
990defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
991defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
992defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
993defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
994defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
995defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
996defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
997defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
998
999// CI Only flat instructions
1000defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
1001defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
1002defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
1003defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
1004defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
1005defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
1006
1007
1008//===----------------------------------------------------------------------===//
1009// VI
1010//===----------------------------------------------------------------------===//
1011
1012class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> :
1013  FLAT_Real <op, ps>,
1014  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
1015  let AssemblerPredicate = isGFX8GFX9;
1016  let DecoderNamespace = "GFX8";
1017}
1018
1019multiclass FLAT_Real_AllAddr_vi<bits<7> op> {
1020  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)>;
1021  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1022}
1023
1024def FLAT_LOAD_UBYTE_vi         : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
1025def FLAT_LOAD_SBYTE_vi         : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
1026def FLAT_LOAD_USHORT_vi        : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
1027def FLAT_LOAD_SSHORT_vi        : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
1028def FLAT_LOAD_DWORD_vi         : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
1029def FLAT_LOAD_DWORDX2_vi       : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
1030def FLAT_LOAD_DWORDX4_vi       : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
1031def FLAT_LOAD_DWORDX3_vi       : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
1032
1033def FLAT_STORE_BYTE_vi         : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
1034def FLAT_STORE_BYTE_D16_HI_vi  : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
1035def FLAT_STORE_SHORT_vi        : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
1036def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
1037def FLAT_STORE_DWORD_vi        : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
1038def FLAT_STORE_DWORDX2_vi      : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
1039def FLAT_STORE_DWORDX4_vi      : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
1040def FLAT_STORE_DWORDX3_vi      : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
1041
1042def FLAT_LOAD_UBYTE_D16_vi    : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
1043def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
1044def FLAT_LOAD_SBYTE_D16_vi    : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
1045def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
1046def FLAT_LOAD_SHORT_D16_vi    : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
1047def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
1048
1049multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> {
1050  def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
1051  def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
1052}
1053
1054multiclass FLAT_Global_Real_Atomics_vi<bits<7> op> :
1055  FLAT_Real_AllAddr_vi<op> {
1056  def _RTN_vi  : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
1057  def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
1058}
1059
1060
1061defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
1062defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
1063defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
1064defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
1065defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
1066defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
1067defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
1068defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
1069defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
1070defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
1071defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
1072defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
1073defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
1074defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
1075defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
1076defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
1077defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
1078defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
1079defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
1080defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
1081defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
1082defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
1083defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
1084defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
1085defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
1086defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
1087
1088defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
1089defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
1090defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
1091defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
1092defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
1093defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
1094defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
1095defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
1096
1097defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Real_AllAddr_vi <0x20>;
1098defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>;
1099defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Real_AllAddr_vi <0x22>;
1100defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>;
1101defm GLOBAL_LOAD_SHORT_D16    : FLAT_Real_AllAddr_vi <0x24>;
1102defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>;
1103
1104defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
1105defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>;
1106defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
1107defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1108defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
1109defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
1110defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
1111defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
1112
1113
1114defm GLOBAL_ATOMIC_SWAP       : FLAT_Global_Real_Atomics_vi <0x40>;
1115defm GLOBAL_ATOMIC_CMPSWAP    : FLAT_Global_Real_Atomics_vi <0x41>;
1116defm GLOBAL_ATOMIC_ADD        : FLAT_Global_Real_Atomics_vi <0x42>;
1117defm GLOBAL_ATOMIC_SUB        : FLAT_Global_Real_Atomics_vi <0x43>;
1118defm GLOBAL_ATOMIC_SMIN       : FLAT_Global_Real_Atomics_vi <0x44>;
1119defm GLOBAL_ATOMIC_UMIN       : FLAT_Global_Real_Atomics_vi <0x45>;
1120defm GLOBAL_ATOMIC_SMAX       : FLAT_Global_Real_Atomics_vi <0x46>;
1121defm GLOBAL_ATOMIC_UMAX       : FLAT_Global_Real_Atomics_vi <0x47>;
1122defm GLOBAL_ATOMIC_AND        : FLAT_Global_Real_Atomics_vi <0x48>;
1123defm GLOBAL_ATOMIC_OR         : FLAT_Global_Real_Atomics_vi <0x49>;
1124defm GLOBAL_ATOMIC_XOR        : FLAT_Global_Real_Atomics_vi <0x4a>;
1125defm GLOBAL_ATOMIC_INC        : FLAT_Global_Real_Atomics_vi <0x4b>;
1126defm GLOBAL_ATOMIC_DEC        : FLAT_Global_Real_Atomics_vi <0x4c>;
1127defm GLOBAL_ATOMIC_SWAP_X2    : FLAT_Global_Real_Atomics_vi <0x60>;
1128defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>;
1129defm GLOBAL_ATOMIC_ADD_X2     : FLAT_Global_Real_Atomics_vi <0x62>;
1130defm GLOBAL_ATOMIC_SUB_X2     : FLAT_Global_Real_Atomics_vi <0x63>;
1131defm GLOBAL_ATOMIC_SMIN_X2    : FLAT_Global_Real_Atomics_vi <0x64>;
1132defm GLOBAL_ATOMIC_UMIN_X2    : FLAT_Global_Real_Atomics_vi <0x65>;
1133defm GLOBAL_ATOMIC_SMAX_X2    : FLAT_Global_Real_Atomics_vi <0x66>;
1134defm GLOBAL_ATOMIC_UMAX_X2    : FLAT_Global_Real_Atomics_vi <0x67>;
1135defm GLOBAL_ATOMIC_AND_X2     : FLAT_Global_Real_Atomics_vi <0x68>;
1136defm GLOBAL_ATOMIC_OR_X2      : FLAT_Global_Real_Atomics_vi <0x69>;
1137defm GLOBAL_ATOMIC_XOR_X2     : FLAT_Global_Real_Atomics_vi <0x6a>;
1138defm GLOBAL_ATOMIC_INC_X2     : FLAT_Global_Real_Atomics_vi <0x6b>;
1139defm GLOBAL_ATOMIC_DEC_X2     : FLAT_Global_Real_Atomics_vi <0x6c>;
1140
1141defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_vi <0x10>;
1142defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_vi <0x11>;
1143defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_vi <0x12>;
1144defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_vi <0x13>;
1145defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_vi <0x14>;
1146defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_vi <0x15>;
1147defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_vi <0x16>;
1148defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_vi <0x17>;
1149defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_vi <0x18>;
1150defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x19>;
1151defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_vi <0x20>;
1152defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x21>;
1153defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_vi <0x22>;
1154defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x23>;
1155defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_vi <0x24>;
1156defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_vi <0x25>;
1157defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_vi <0x1a>;
1158defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1159defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_vi <0x1c>;
1160defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_vi <0x1d>;
1161defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_vi <0x1e>;
1162defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_vi <0x1f>;
1163
1164
1165//===----------------------------------------------------------------------===//
1166// GFX10.
1167//===----------------------------------------------------------------------===//
1168
1169class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
1170    FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
1171  let AssemblerPredicate = isGFX10Plus;
1172  let DecoderNamespace = "GFX10";
1173
1174  let Inst{11-0}  = offset{11-0};
1175  let Inst{12}    = !if(ps.has_dlc, dlc, ps.dlcValue);
1176  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
1177  let Inst{55}    = 0;
1178}
1179
1180
1181multiclass FLAT_Real_Base_gfx10<bits<7> op> {
1182  def _gfx10 :
1183    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>;
1184}
1185
1186multiclass FLAT_Real_RTN_gfx10<bits<7> op> {
1187  def _RTN_gfx10 :
1188    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
1189}
1190
1191multiclass FLAT_Real_SADDR_gfx10<bits<7> op> {
1192  def _SADDR_gfx10 :
1193    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1194}
1195
1196multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
1197  def _SADDR_RTN_gfx10 :
1198    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
1199}
1200
1201
1202multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
1203  FLAT_Real_Base_gfx10<op>,
1204  FLAT_Real_SADDR_gfx10<op>;
1205
1206multiclass FLAT_Real_Atomics_gfx10<bits<7> op> :
1207  FLAT_Real_Base_gfx10<op>,
1208  FLAT_Real_RTN_gfx10<op>;
1209
1210multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
1211  FLAT_Real_AllAddr_gfx10<op>,
1212  FLAT_Real_RTN_gfx10<op>,
1213  FLAT_Real_SADDR_RTN_gfx10<op>;
1214
1215
1216// ENC_FLAT.
1217defm FLAT_LOAD_UBYTE            : FLAT_Real_Base_gfx10<0x008>;
1218defm FLAT_LOAD_SBYTE            : FLAT_Real_Base_gfx10<0x009>;
1219defm FLAT_LOAD_USHORT           : FLAT_Real_Base_gfx10<0x00a>;
1220defm FLAT_LOAD_SSHORT           : FLAT_Real_Base_gfx10<0x00b>;
1221defm FLAT_LOAD_DWORD            : FLAT_Real_Base_gfx10<0x00c>;
1222defm FLAT_LOAD_DWORDX2          : FLAT_Real_Base_gfx10<0x00d>;
1223defm FLAT_LOAD_DWORDX4          : FLAT_Real_Base_gfx10<0x00e>;
1224defm FLAT_LOAD_DWORDX3          : FLAT_Real_Base_gfx10<0x00f>;
1225defm FLAT_STORE_BYTE            : FLAT_Real_Base_gfx10<0x018>;
1226defm FLAT_STORE_BYTE_D16_HI     : FLAT_Real_Base_gfx10<0x019>;
1227defm FLAT_STORE_SHORT           : FLAT_Real_Base_gfx10<0x01a>;
1228defm FLAT_STORE_SHORT_D16_HI    : FLAT_Real_Base_gfx10<0x01b>;
1229defm FLAT_STORE_DWORD           : FLAT_Real_Base_gfx10<0x01c>;
1230defm FLAT_STORE_DWORDX2         : FLAT_Real_Base_gfx10<0x01d>;
1231defm FLAT_STORE_DWORDX4         : FLAT_Real_Base_gfx10<0x01e>;
1232defm FLAT_STORE_DWORDX3         : FLAT_Real_Base_gfx10<0x01f>;
1233defm FLAT_LOAD_UBYTE_D16        : FLAT_Real_Base_gfx10<0x020>;
1234defm FLAT_LOAD_UBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x021>;
1235defm FLAT_LOAD_SBYTE_D16        : FLAT_Real_Base_gfx10<0x022>;
1236defm FLAT_LOAD_SBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x023>;
1237defm FLAT_LOAD_SHORT_D16        : FLAT_Real_Base_gfx10<0x024>;
1238defm FLAT_LOAD_SHORT_D16_HI     : FLAT_Real_Base_gfx10<0x025>;
1239defm FLAT_ATOMIC_SWAP           : FLAT_Real_Atomics_gfx10<0x030>;
1240defm FLAT_ATOMIC_CMPSWAP        : FLAT_Real_Atomics_gfx10<0x031>;
1241defm FLAT_ATOMIC_ADD            : FLAT_Real_Atomics_gfx10<0x032>;
1242defm FLAT_ATOMIC_SUB            : FLAT_Real_Atomics_gfx10<0x033>;
1243defm FLAT_ATOMIC_SMIN           : FLAT_Real_Atomics_gfx10<0x035>;
1244defm FLAT_ATOMIC_UMIN           : FLAT_Real_Atomics_gfx10<0x036>;
1245defm FLAT_ATOMIC_SMAX           : FLAT_Real_Atomics_gfx10<0x037>;
1246defm FLAT_ATOMIC_UMAX           : FLAT_Real_Atomics_gfx10<0x038>;
1247defm FLAT_ATOMIC_AND            : FLAT_Real_Atomics_gfx10<0x039>;
1248defm FLAT_ATOMIC_OR             : FLAT_Real_Atomics_gfx10<0x03a>;
1249defm FLAT_ATOMIC_XOR            : FLAT_Real_Atomics_gfx10<0x03b>;
1250defm FLAT_ATOMIC_INC            : FLAT_Real_Atomics_gfx10<0x03c>;
1251defm FLAT_ATOMIC_DEC            : FLAT_Real_Atomics_gfx10<0x03d>;
1252defm FLAT_ATOMIC_FCMPSWAP       : FLAT_Real_Atomics_gfx10<0x03e>;
1253defm FLAT_ATOMIC_FMIN           : FLAT_Real_Atomics_gfx10<0x03f>;
1254defm FLAT_ATOMIC_FMAX           : FLAT_Real_Atomics_gfx10<0x040>;
1255defm FLAT_ATOMIC_SWAP_X2        : FLAT_Real_Atomics_gfx10<0x050>;
1256defm FLAT_ATOMIC_CMPSWAP_X2     : FLAT_Real_Atomics_gfx10<0x051>;
1257defm FLAT_ATOMIC_ADD_X2         : FLAT_Real_Atomics_gfx10<0x052>;
1258defm FLAT_ATOMIC_SUB_X2         : FLAT_Real_Atomics_gfx10<0x053>;
1259defm FLAT_ATOMIC_SMIN_X2        : FLAT_Real_Atomics_gfx10<0x055>;
1260defm FLAT_ATOMIC_UMIN_X2        : FLAT_Real_Atomics_gfx10<0x056>;
1261defm FLAT_ATOMIC_SMAX_X2        : FLAT_Real_Atomics_gfx10<0x057>;
1262defm FLAT_ATOMIC_UMAX_X2        : FLAT_Real_Atomics_gfx10<0x058>;
1263defm FLAT_ATOMIC_AND_X2         : FLAT_Real_Atomics_gfx10<0x059>;
1264defm FLAT_ATOMIC_OR_X2          : FLAT_Real_Atomics_gfx10<0x05a>;
1265defm FLAT_ATOMIC_XOR_X2         : FLAT_Real_Atomics_gfx10<0x05b>;
1266defm FLAT_ATOMIC_INC_X2         : FLAT_Real_Atomics_gfx10<0x05c>;
1267defm FLAT_ATOMIC_DEC_X2         : FLAT_Real_Atomics_gfx10<0x05d>;
1268defm FLAT_ATOMIC_FCMPSWAP_X2    : FLAT_Real_Atomics_gfx10<0x05e>;
1269defm FLAT_ATOMIC_FMIN_X2        : FLAT_Real_Atomics_gfx10<0x05f>;
1270defm FLAT_ATOMIC_FMAX_X2        : FLAT_Real_Atomics_gfx10<0x060>;
1271
1272
1273// ENC_FLAT_GLBL.
1274defm GLOBAL_LOAD_UBYTE          : FLAT_Real_AllAddr_gfx10<0x008>;
1275defm GLOBAL_LOAD_SBYTE          : FLAT_Real_AllAddr_gfx10<0x009>;
1276defm GLOBAL_LOAD_USHORT         : FLAT_Real_AllAddr_gfx10<0x00a>;
1277defm GLOBAL_LOAD_SSHORT         : FLAT_Real_AllAddr_gfx10<0x00b>;
1278defm GLOBAL_LOAD_DWORD          : FLAT_Real_AllAddr_gfx10<0x00c>;
1279defm GLOBAL_LOAD_DWORDX2        : FLAT_Real_AllAddr_gfx10<0x00d>;
1280defm GLOBAL_LOAD_DWORDX4        : FLAT_Real_AllAddr_gfx10<0x00e>;
1281defm GLOBAL_LOAD_DWORDX3        : FLAT_Real_AllAddr_gfx10<0x00f>;
1282defm GLOBAL_STORE_BYTE          : FLAT_Real_AllAddr_gfx10<0x018>;
1283defm GLOBAL_STORE_BYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x019>;
1284defm GLOBAL_STORE_SHORT         : FLAT_Real_AllAddr_gfx10<0x01a>;
1285defm GLOBAL_STORE_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x01b>;
1286defm GLOBAL_STORE_DWORD         : FLAT_Real_AllAddr_gfx10<0x01c>;
1287defm GLOBAL_STORE_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x01d>;
1288defm GLOBAL_STORE_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x01e>;
1289defm GLOBAL_STORE_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x01f>;
1290defm GLOBAL_LOAD_UBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x020>;
1291defm GLOBAL_LOAD_UBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x021>;
1292defm GLOBAL_LOAD_SBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x022>;
1293defm GLOBAL_LOAD_SBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x023>;
1294defm GLOBAL_LOAD_SHORT_D16      : FLAT_Real_AllAddr_gfx10<0x024>;
1295defm GLOBAL_LOAD_SHORT_D16_HI   : FLAT_Real_AllAddr_gfx10<0x025>;
1296defm GLOBAL_ATOMIC_SWAP         : FLAT_Real_GlblAtomics_gfx10<0x030>;
1297defm GLOBAL_ATOMIC_CMPSWAP      : FLAT_Real_GlblAtomics_gfx10<0x031>;
1298defm GLOBAL_ATOMIC_ADD          : FLAT_Real_GlblAtomics_gfx10<0x032>;
1299defm GLOBAL_ATOMIC_SUB          : FLAT_Real_GlblAtomics_gfx10<0x033>;
1300defm GLOBAL_ATOMIC_SMIN         : FLAT_Real_GlblAtomics_gfx10<0x035>;
1301defm GLOBAL_ATOMIC_UMIN         : FLAT_Real_GlblAtomics_gfx10<0x036>;
1302defm GLOBAL_ATOMIC_SMAX         : FLAT_Real_GlblAtomics_gfx10<0x037>;
1303defm GLOBAL_ATOMIC_UMAX         : FLAT_Real_GlblAtomics_gfx10<0x038>;
1304defm GLOBAL_ATOMIC_AND          : FLAT_Real_GlblAtomics_gfx10<0x039>;
1305defm GLOBAL_ATOMIC_OR           : FLAT_Real_GlblAtomics_gfx10<0x03a>;
1306defm GLOBAL_ATOMIC_XOR          : FLAT_Real_GlblAtomics_gfx10<0x03b>;
1307defm GLOBAL_ATOMIC_INC          : FLAT_Real_GlblAtomics_gfx10<0x03c>;
1308defm GLOBAL_ATOMIC_DEC          : FLAT_Real_GlblAtomics_gfx10<0x03d>;
1309defm GLOBAL_ATOMIC_FCMPSWAP     : FLAT_Real_GlblAtomics_gfx10<0x03e>;
1310defm GLOBAL_ATOMIC_FMIN         : FLAT_Real_GlblAtomics_gfx10<0x03f>;
1311defm GLOBAL_ATOMIC_FMAX         : FLAT_Real_GlblAtomics_gfx10<0x040>;
1312defm GLOBAL_ATOMIC_SWAP_X2      : FLAT_Real_GlblAtomics_gfx10<0x050>;
1313defm GLOBAL_ATOMIC_CMPSWAP_X2   : FLAT_Real_GlblAtomics_gfx10<0x051>;
1314defm GLOBAL_ATOMIC_ADD_X2       : FLAT_Real_GlblAtomics_gfx10<0x052>;
1315defm GLOBAL_ATOMIC_SUB_X2       : FLAT_Real_GlblAtomics_gfx10<0x053>;
1316defm GLOBAL_ATOMIC_SMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x055>;
1317defm GLOBAL_ATOMIC_UMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x056>;
1318defm GLOBAL_ATOMIC_SMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x057>;
1319defm GLOBAL_ATOMIC_UMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x058>;
1320defm GLOBAL_ATOMIC_AND_X2       : FLAT_Real_GlblAtomics_gfx10<0x059>;
1321defm GLOBAL_ATOMIC_OR_X2        : FLAT_Real_GlblAtomics_gfx10<0x05a>;
1322defm GLOBAL_ATOMIC_XOR_X2       : FLAT_Real_GlblAtomics_gfx10<0x05b>;
1323defm GLOBAL_ATOMIC_INC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05c>;
1324defm GLOBAL_ATOMIC_DEC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05d>;
1325defm GLOBAL_ATOMIC_FCMPSWAP_X2  : FLAT_Real_GlblAtomics_gfx10<0x05e>;
1326defm GLOBAL_ATOMIC_FMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x05f>;
1327defm GLOBAL_ATOMIC_FMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x060>;
1328
1329
1330// ENC_FLAT_SCRATCH.
1331defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_gfx10<0x008>;
1332defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_gfx10<0x009>;
1333defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_gfx10<0x00a>;
1334defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_gfx10<0x00b>;
1335defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_gfx10<0x00c>;
1336defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x00d>;
1337defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x00e>;
1338defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x00f>;
1339defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_gfx10<0x018>;
1340defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x019>;
1341defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_gfx10<0x01a>;
1342defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
1343defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_gfx10<0x01c>;
1344defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_gfx10<0x01d>;
1345defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_gfx10<0x01e>;
1346defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_gfx10<0x01f>;
1347defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_gfx10<0x020>;
1348defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x021>;
1349defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_gfx10<0x022>;
1350defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x023>;
1351defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_gfx10<0x024>;
1352defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x025>;
1353
1354let SubtargetPredicate = HasAtomicFaddInsts in {
1355
1356defm GLOBAL_ATOMIC_ADD_F32    : FLAT_Real_AllAddr_vi <0x04d>;
1357defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Real_AllAddr_vi <0x04e>;
1358
1359} // End SubtargetPredicate = HasAtomicFaddInsts
1360