1//===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>; 10def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>; 11 12def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>; 13def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>; 14 15//===----------------------------------------------------------------------===// 16// FLAT classes 17//===----------------------------------------------------------------------===// 18 19class FLAT_Pseudo<string opName, dag outs, dag ins, 20 string asmOps, list<dag> pattern=[]> : 21 InstSI<outs, ins, "", pattern>, 22 SIMCInstr<opName, SIEncodingFamily.NONE> { 23 24 let isPseudo = 1; 25 let isCodeGenOnly = 1; 26 27 let FLAT = 1; 28 29 let UseNamedOperandTable = 1; 30 let hasSideEffects = 0; 31 let SchedRW = [WriteVMEM]; 32 33 string Mnemonic = opName; 34 string AsmOperands = asmOps; 35 36 bits<1> is_flat_global = 0; 37 bits<1> is_flat_scratch = 0; 38 39 bits<1> has_vdst = 1; 40 41 // We need to distinguish having saddr and enabling saddr because 42 // saddr is only valid for scratch and global instructions. Pre-gfx9 43 // these bits were reserved, so we also don't necessarily want to 44 // set these bits to the disabled value for the original flat 45 // segment instructions. 46 bits<1> has_saddr = 0; 47 bits<1> enabled_saddr = 0; 48 bits<7> saddr_value = 0; 49 bits<1> has_vaddr = 1; 50 51 bits<1> has_data = 1; 52 bits<1> has_glc = 1; 53 bits<1> glcValue = 0; 54 bits<1> has_dlc = 1; 55 bits<1> dlcValue = 0; 56 57 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, 58 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); 59 60 // TODO: M0 if it could possibly access LDS (before gfx9? only)? 61 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); 62 63 // Internally, FLAT instruction are executed as both an LDS and a 64 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT 65 // and are not considered done until both have been decremented. 66 let VM_CNT = 1; 67 let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1); 68 69 let IsNonFlatSeg = !if(!or(is_flat_global, is_flat_scratch), 1, 0); 70} 71 72class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : 73 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 74 Enc64 { 75 76 let isPseudo = 0; 77 let isCodeGenOnly = 0; 78 79 // copy relevant pseudo op flags 80 let SubtargetPredicate = ps.SubtargetPredicate; 81 let AsmMatchConverter = ps.AsmMatchConverter; 82 let TSFlags = ps.TSFlags; 83 let UseNamedOperandTable = ps.UseNamedOperandTable; 84 85 // encoding fields 86 bits<8> vaddr; 87 bits<8> vdata; 88 bits<7> saddr; 89 bits<8> vdst; 90 91 bits<1> slc; 92 bits<1> glc; 93 bits<1> dlc; 94 95 // Only valid on gfx9 96 bits<1> lds = 0; // XXX - What does this actually do? 97 98 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved 99 bits<2> seg = !if(ps.is_flat_global, 0b10, 100 !if(ps.is_flat_scratch, 0b01, 0)); 101 102 // Signed offset. Highest bit ignored for flat and treated as 12-bit 103 // unsigned for flat acceses. 104 bits<13> offset; 105 bits<1> nv = 0; // XXX - What does this actually do? 106 107 // We don't use tfe right now, and it was removed in gfx9. 108 bits<1> tfe = 0; 109 110 // Only valid on GFX9+ 111 let Inst{12-0} = offset; 112 let Inst{13} = lds; 113 let Inst{15-14} = seg; 114 115 let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); 116 let Inst{17} = slc; 117 let Inst{24-18} = op; 118 let Inst{31-26} = 0x37; // Encoding. 119 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 120 let Inst{47-40} = !if(ps.has_data, vdata, ?); 121 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 122 123 // 54-48 is reserved. 124 let Inst{55} = nv; // nv on GFX9+, TFE before. 125 let Inst{63-56} = !if(ps.has_vdst, vdst, ?); 126} 127 128class GlobalSaddrTable <bit is_saddr, string Name = ""> { 129 bit IsSaddr = is_saddr; 130 string SaddrOp = Name; 131} 132 133// TODO: Is exec allowed for saddr? The disabled value 0x7f is the 134// same encoding value as exec_hi, so it isn't possible to use that if 135// saddr is 32-bit (which isn't handled here yet). 136class FLAT_Load_Pseudo <string opName, RegisterClass regClass, 137 bit HasTiedOutput = 0, 138 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 139 opName, 140 (outs regClass:$vdst), 141 !con( 142 !con( 143 !con((ins VReg_64:$vaddr), 144 !if(EnableSaddr, (ins SReg_64:$saddr), (ins))), 145 (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 146 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 147 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { 148 let has_data = 0; 149 let mayLoad = 1; 150 let has_saddr = HasSaddr; 151 let enabled_saddr = EnableSaddr; 152 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 153 let maybeAtomic = 1; 154 155 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 156 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 157} 158 159class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, 160 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 161 opName, 162 (outs), 163 !con( 164 !con((ins VReg_64:$vaddr, vdataClass:$vdata), 165 !if(EnableSaddr, (ins SReg_64:$saddr), (ins))), 166 (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 167 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { 168 let mayLoad = 0; 169 let mayStore = 1; 170 let has_vdst = 0; 171 let has_saddr = HasSaddr; 172 let enabled_saddr = EnableSaddr; 173 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 174 let maybeAtomic = 1; 175} 176 177multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { 178 let is_flat_global = 1 in { 179 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>, 180 GlobalSaddrTable<0, opName>; 181 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, 182 GlobalSaddrTable<1, opName>; 183 } 184} 185 186multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { 187 let is_flat_global = 1 in { 188 def "" : FLAT_Store_Pseudo<opName, regClass, 1>, 189 GlobalSaddrTable<0, opName>; 190 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>, 191 GlobalSaddrTable<1, opName>; 192 } 193} 194 195class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, 196 bit EnableSaddr = 0>: FLAT_Pseudo< 197 opName, 198 (outs regClass:$vdst), 199 !if(EnableSaddr, 200 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc), 201 (ins VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 202 " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> { 203 let has_data = 0; 204 let mayLoad = 1; 205 let has_saddr = 1; 206 let enabled_saddr = EnableSaddr; 207 let has_vaddr = !if(EnableSaddr, 0, 1); 208 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 209 let maybeAtomic = 1; 210} 211 212class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0> : FLAT_Pseudo< 213 opName, 214 (outs), 215 !if(EnableSaddr, 216 (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc), 217 (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 218 " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { 219 let mayLoad = 0; 220 let mayStore = 1; 221 let has_vdst = 0; 222 let has_saddr = 1; 223 let enabled_saddr = EnableSaddr; 224 let has_vaddr = !if(EnableSaddr, 0, 1); 225 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 226 let maybeAtomic = 1; 227} 228 229multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> { 230 let is_flat_scratch = 1 in { 231 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass>; 232 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, 1>; 233 } 234} 235 236multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> { 237 let is_flat_scratch = 1 in { 238 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>; 239 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>; 240 } 241} 242 243class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, 244 string asm, list<dag> pattern = []> : 245 FLAT_Pseudo<opName, outs, ins, asm, pattern> { 246 let mayLoad = 1; 247 let mayStore = 1; 248 let has_glc = 0; 249 let glcValue = 0; 250 let has_dlc = 0; 251 let dlcValue = 0; 252 let has_vdst = 0; 253 let maybeAtomic = 1; 254} 255 256class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, 257 string asm, list<dag> pattern = []> 258 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> { 259 let hasPostISelHook = 1; 260 let has_vdst = 1; 261 let glcValue = 1; 262 let dlcValue = 0; 263 let PseudoInstr = NAME # "_RTN"; 264} 265 266multiclass FLAT_Atomic_Pseudo< 267 string opName, 268 RegisterClass vdst_rc, 269 ValueType vt, 270 SDPatternOperator atomic = null_frag, 271 ValueType data_vt = vt, 272 RegisterClass data_rc = vdst_rc, 273 bit isFP = isFloatType<data_vt>.ret> { 274 def "" : FLAT_AtomicNoRet_Pseudo <opName, 275 (outs), 276 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 277 " $vaddr, $vdata$offset$slc">, 278 GlobalSaddrTable<0, opName>, 279 AtomicNoRet <opName, 0> { 280 let PseudoInstr = NAME; 281 let FPAtomic = isFP; 282 } 283 284 def _RTN : FLAT_AtomicRet_Pseudo <opName, 285 (outs vdst_rc:$vdst), 286 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 287 " $vdst, $vaddr, $vdata$offset glc$slc", 288 [(set vt:$vdst, 289 (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, 290 GlobalSaddrTable<0, opName#"_rtn">, 291 AtomicNoRet <opName, 1>{ 292 let FPAtomic = isFP; 293 } 294} 295 296multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< 297 string opName, 298 RegisterClass vdst_rc, 299 ValueType vt, 300 SDPatternOperator atomic = null_frag, 301 ValueType data_vt = vt, 302 RegisterClass data_rc = vdst_rc, 303 bit isFP = isFloatType<data_vt>.ret> { 304 305 def "" : FLAT_AtomicNoRet_Pseudo <opName, 306 (outs), 307 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 308 " $vaddr, $vdata, off$offset$slc">, 309 GlobalSaddrTable<0, opName>, 310 AtomicNoRet <opName, 0> { 311 let has_saddr = 1; 312 let PseudoInstr = NAME; 313 let FPAtomic = isFP; 314 } 315 316 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, 317 (outs), 318 (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc), 319 " $vaddr, $vdata, $saddr$offset$slc">, 320 GlobalSaddrTable<1, opName>, 321 AtomicNoRet <opName#"_saddr", 0> { 322 let has_saddr = 1; 323 let enabled_saddr = 1; 324 let PseudoInstr = NAME#"_SADDR"; 325 let FPAtomic = isFP; 326 } 327} 328 329multiclass FLAT_Global_Atomic_Pseudo_RTN< 330 string opName, 331 RegisterClass vdst_rc, 332 ValueType vt, 333 SDPatternOperator atomic = null_frag, 334 ValueType data_vt = vt, 335 RegisterClass data_rc = vdst_rc, 336 bit isFP = isFloatType<data_vt>.ret> { 337 338 def _RTN : FLAT_AtomicRet_Pseudo <opName, 339 (outs vdst_rc:$vdst), 340 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 341 " $vdst, $vaddr, $vdata, off$offset glc$slc", 342 [(set vt:$vdst, 343 (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, 344 GlobalSaddrTable<0, opName#"_rtn">, 345 AtomicNoRet <opName, 1> { 346 let has_saddr = 1; 347 let FPAtomic = isFP; 348 } 349 350 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, 351 (outs vdst_rc:$vdst), 352 (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc), 353 " $vdst, $vaddr, $vdata, $saddr$offset glc$slc">, 354 GlobalSaddrTable<1, opName#"_rtn">, 355 AtomicNoRet <opName#"_saddr", 1> { 356 let has_saddr = 1; 357 let enabled_saddr = 1; 358 let PseudoInstr = NAME#"_SADDR_RTN"; 359 let FPAtomic = isFP; 360 } 361} 362 363multiclass FLAT_Global_Atomic_Pseudo< 364 string opName, 365 RegisterClass vdst_rc, 366 ValueType vt, 367 SDPatternOperator atomic_rtn = null_frag, 368 SDPatternOperator atomic_no_rtn = null_frag, 369 ValueType data_vt = vt, 370 RegisterClass data_rc = vdst_rc> : 371 FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>, 372 FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>; 373 374 375//===----------------------------------------------------------------------===// 376// Flat Instructions 377//===----------------------------------------------------------------------===// 378 379def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; 380def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; 381def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; 382def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; 383def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; 384def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; 385def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; 386def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; 387 388def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; 389def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; 390def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; 391def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; 392def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; 393def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; 394 395let SubtargetPredicate = HasD16LoadStore in { 396def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; 397def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; 398def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; 399def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; 400def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; 401def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; 402 403def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; 404def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; 405} 406 407defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", 408 VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32, 409 v2i32, VReg_64>; 410 411defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", 412 VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64, 413 v2i64, VReg_128>; 414 415defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", 416 VGPR_32, i32, atomic_swap_flat_32>; 417 418defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", 419 VReg_64, i64, atomic_swap_flat_64>; 420 421defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", 422 VGPR_32, i32, atomic_load_add_flat_32>; 423 424defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", 425 VGPR_32, i32, atomic_load_sub_flat_32>; 426 427defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", 428 VGPR_32, i32, atomic_load_min_flat_32>; 429 430defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", 431 VGPR_32, i32, atomic_load_umin_flat_32>; 432 433defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", 434 VGPR_32, i32, atomic_load_max_flat_32>; 435 436defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", 437 VGPR_32, i32, atomic_load_umax_flat_32>; 438 439defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", 440 VGPR_32, i32, atomic_load_and_flat_32>; 441 442defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", 443 VGPR_32, i32, atomic_load_or_flat_32>; 444 445defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", 446 VGPR_32, i32, atomic_load_xor_flat_32>; 447 448defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", 449 VGPR_32, i32, atomic_inc_flat_32>; 450 451defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", 452 VGPR_32, i32, atomic_dec_flat_32>; 453 454defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", 455 VReg_64, i64, atomic_load_add_flat_64>; 456 457defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", 458 VReg_64, i64, atomic_load_sub_flat_64>; 459 460defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", 461 VReg_64, i64, atomic_load_min_flat_64>; 462 463defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", 464 VReg_64, i64, atomic_load_umin_flat_64>; 465 466defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", 467 VReg_64, i64, atomic_load_max_flat_64>; 468 469defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", 470 VReg_64, i64, atomic_load_umax_flat_64>; 471 472defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", 473 VReg_64, i64, atomic_load_and_flat_64>; 474 475defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", 476 VReg_64, i64, atomic_load_or_flat_64>; 477 478defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", 479 VReg_64, i64, atomic_load_xor_flat_64>; 480 481defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", 482 VReg_64, i64, atomic_inc_flat_64>; 483 484defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", 485 VReg_64, i64, atomic_dec_flat_64>; 486 487// GFX7-, GFX10-only flat instructions. 488let SubtargetPredicate = isGFX7GFX10 in { 489 490defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", 491 VGPR_32, f32, null_frag, v2f32, VReg_64>; 492 493defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", 494 VReg_64, f64, null_frag, v2f64, VReg_128>; 495 496defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", 497 VGPR_32, f32>; 498 499defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", 500 VGPR_32, f32>; 501 502defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", 503 VReg_64, f64>; 504 505defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", 506 VReg_64, f64>; 507 508} // End SubtargetPredicate = isGFX7GFX10 509 510let SubtargetPredicate = HasFlatGlobalInsts in { 511defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; 512defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; 513defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; 514defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; 515defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; 516defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; 517defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; 518defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; 519 520defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>; 521defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>; 522defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>; 523defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>; 524defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>; 525defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>; 526 527defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; 528defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; 529defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; 530defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; 531defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; 532defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; 533 534defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>; 535defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>; 536 537let is_flat_global = 1 in { 538defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", 539 VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, null_frag, 540 v2i32, VReg_64>; 541 542defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", 543 VReg_64, i64, AMDGPUatomic_cmp_swap_global_64, 544 null_frag, 545 v2i64, VReg_128>; 546 547defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", 548 VGPR_32, i32, atomic_swap_global_32>; 549 550defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", 551 VReg_64, i64, atomic_swap_global_64>; 552 553defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", 554 VGPR_32, i32, atomic_load_add_global_32>; 555 556defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", 557 VGPR_32, i32, atomic_load_sub_global_32>; 558 559defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", 560 VGPR_32, i32, atomic_load_min_global_32>; 561 562defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", 563 VGPR_32, i32, atomic_load_umin_global_32>; 564 565defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", 566 VGPR_32, i32, atomic_load_max_global_32>; 567 568defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", 569 VGPR_32, i32, atomic_load_umax_global_32>; 570 571defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", 572 VGPR_32, i32, atomic_load_and_global_32>; 573 574defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", 575 VGPR_32, i32, atomic_load_or_global_32>; 576 577defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", 578 VGPR_32, i32, atomic_load_xor_global_32>; 579 580defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", 581 VGPR_32, i32, atomic_inc_global_32>; 582 583defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", 584 VGPR_32, i32, atomic_dec_global_32>; 585 586defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", 587 VReg_64, i64, atomic_load_add_global_64>; 588 589defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", 590 VReg_64, i64, atomic_load_sub_global_64>; 591 592defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", 593 VReg_64, i64, atomic_load_min_global_64>; 594 595defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", 596 VReg_64, i64, atomic_load_umin_global_64>; 597 598defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", 599 VReg_64, i64, atomic_load_max_global_64>; 600 601defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", 602 VReg_64, i64, atomic_load_umax_global_64>; 603 604defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", 605 VReg_64, i64, atomic_load_and_global_64>; 606 607defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", 608 VReg_64, i64, atomic_load_or_global_64>; 609 610defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", 611 VReg_64, i64, atomic_load_xor_global_64>; 612 613defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", 614 VReg_64, i64, atomic_inc_global_64>; 615 616defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", 617 VReg_64, i64, atomic_dec_global_64>; 618} // End is_flat_global = 1 619 620} // End SubtargetPredicate = HasFlatGlobalInsts 621 622 623let SubtargetPredicate = HasFlatScratchInsts in { 624defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; 625defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; 626defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; 627defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; 628defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; 629defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; 630defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; 631defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; 632 633defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32>; 634defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32>; 635defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32>; 636defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32>; 637defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32>; 638defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32>; 639 640defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; 641defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; 642defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; 643defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; 644defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; 645defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; 646 647defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; 648defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; 649 650} // End SubtargetPredicate = HasFlatScratchInsts 651 652let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { 653 defm GLOBAL_ATOMIC_FCMPSWAP : 654 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>; 655 defm GLOBAL_ATOMIC_FMIN : 656 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; 657 defm GLOBAL_ATOMIC_FMAX : 658 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; 659 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : 660 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>; 661 defm GLOBAL_ATOMIC_FMIN_X2 : 662 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; 663 defm GLOBAL_ATOMIC_FMAX_X2 : 664 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>; 665} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 666 667let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in { 668 669defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < 670 "global_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret 671>; 672defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < 673 "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret 674>; 675 676} // End SubtargetPredicate = HasAtomicFaddInsts 677 678//===----------------------------------------------------------------------===// 679// Flat Patterns 680//===----------------------------------------------------------------------===// 681 682// Patterns for global loads with no offset. 683class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 684 (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))), 685 (inst $vaddr, $offset, 0, 0, $slc) 686>; 687 688class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 689 (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in), 690 (inst $vaddr, $offset, 0, 0, $slc, $in) 691>; 692 693class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 694 (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in), 695 (inst $vaddr, $offset, 0, 0, $slc, $in) 696>; 697 698class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 699 (vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))), 700 (inst $vaddr, $offset, 0, 0, $slc) 701>; 702 703class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 704 (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))), 705 (inst $vaddr, $offset, 0, 0, $slc) 706>; 707 708class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 709 (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)), 710 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 711>; 712 713class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 714 (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)), 715 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 716>; 717 718class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 719 // atomic store follows atomic binop convention so the address comes 720 // first. 721 (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), 722 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 723>; 724 725class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 726 // atomic store follows atomic binop convention so the address comes 727 // first. 728 (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), 729 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 730>; 731 732class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 733 ValueType data_vt = vt> : GCNPat < 734 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), 735 (inst $vaddr, $data, $offset, $slc) 736>; 737 738class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 739 (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), 740 (inst $vaddr, $data, $offset, $slc) 741>; 742 743class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 744 ValueType data_vt = vt> : GCNPat < 745 (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), 746 (inst $vaddr, $data, $offset, $slc) 747>; 748 749let OtherPredicates = [HasFlatAddressSpace] in { 750 751def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; 752def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; 753def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; 754def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; 755def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; 756def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; 757def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; 758def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; 759def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; 760def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; 761def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; 762 763def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>; 764def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; 765 766def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; 767def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; 768 769foreach vt = Reg32Types.types in { 770def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; 771def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>; 772} 773 774foreach vt = VReg_64.RegTypes in { 775def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt, VReg_64>; 776def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>; 777} 778 779def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>; 780 781foreach vt = VReg_128.RegTypes in { 782def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>; 783def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt, VReg_128>; 784} 785 786def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>; 787def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>; 788 789def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>; 790def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>; 791def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global_32, i32>; 792def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>; 793def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>; 794def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>; 795def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>; 796def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>; 797def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>; 798def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>; 799def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>; 800def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>; 801def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>; 802 803def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>; 804def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>; 805def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>; 806def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>; 807def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>; 808def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>; 809def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>; 810def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>; 811def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>; 812def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>; 813def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>; 814def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>; 815def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>; 816 817def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; 818def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; 819 820let OtherPredicates = [D16PreservesUnusedBits] in { 821def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; 822def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; 823 824def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>; 825def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>; 826def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>; 827def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>; 828def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>; 829def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>; 830 831def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>; 832def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>; 833def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>; 834def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>; 835def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>; 836def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; 837} 838 839} // End OtherPredicates = [HasFlatAddressSpace] 840 841let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in { 842 843def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; 844def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; 845def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; 846def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; 847def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; 848def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; 849def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; 850def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; 851def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; 852def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>; 853 854foreach vt = Reg32Types.types in { 855def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, vt>; 856def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, vt, VGPR_32>; 857} 858 859foreach vt = VReg_64.RegTypes in { 860def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, vt>; 861def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt, VReg_64>; 862} 863 864def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; 865 866foreach vt = VReg_128.RegTypes in { 867def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, vt>; 868def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, vt, VReg_128>; 869} 870 871def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>; 872def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>; 873 874def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>; 875def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>; 876def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>; 877def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>; 878def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>; 879 880let OtherPredicates = [D16PreservesUnusedBits] in { 881def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; 882def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; 883 884def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>; 885def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>; 886def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>; 887def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>; 888def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>; 889def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>; 890 891def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>; 892def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>; 893def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>; 894def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>; 895def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>; 896def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; 897} 898 899def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>; 900def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64, VReg_64>; 901 902def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>; 903def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>; 904def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global_32, i32>; 905def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>; 906def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>; 907def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>; 908def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>; 909def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>; 910def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>; 911def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>; 912def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>; 913def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>; 914def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>; 915 916def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>; 917def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>; 918def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>; 919def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>; 920def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>; 921def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>; 922def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>; 923def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>; 924def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>; 925def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>; 926def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>; 927def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>; 928def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>; 929 930def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>; 931def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global_noret, v2f16>; 932 933} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 934 935 936//===----------------------------------------------------------------------===// 937// Target 938//===----------------------------------------------------------------------===// 939 940//===----------------------------------------------------------------------===// 941// CI 942//===----------------------------------------------------------------------===// 943 944class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> : 945 FLAT_Real <op, ps>, 946 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> { 947 let AssemblerPredicate = isGFX7Only; 948 let DecoderNamespace="GFX7"; 949} 950 951def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; 952def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; 953def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; 954def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; 955def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; 956def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; 957def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; 958def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; 959 960def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; 961def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; 962def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; 963def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; 964def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; 965def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; 966 967multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> { 968 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 969 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 970} 971 972defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>; 973defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>; 974defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>; 975defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>; 976defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>; 977defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>; 978defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>; 979defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>; 980defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>; 981defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>; 982defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>; 983defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>; 984defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>; 985defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>; 986defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>; 987defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>; 988defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>; 989defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>; 990defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>; 991defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>; 992defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>; 993defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>; 994defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>; 995defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>; 996defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>; 997defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>; 998 999// CI Only flat instructions 1000defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>; 1001defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>; 1002defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>; 1003defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>; 1004defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>; 1005defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>; 1006 1007 1008//===----------------------------------------------------------------------===// 1009// VI 1010//===----------------------------------------------------------------------===// 1011 1012class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> : 1013 FLAT_Real <op, ps>, 1014 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { 1015 let AssemblerPredicate = isGFX8GFX9; 1016 let DecoderNamespace = "GFX8"; 1017} 1018 1019multiclass FLAT_Real_AllAddr_vi<bits<7> op> { 1020 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)>; 1021 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1022} 1023 1024def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; 1025def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; 1026def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; 1027def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; 1028def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; 1029def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; 1030def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; 1031def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; 1032 1033def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; 1034def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; 1035def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; 1036def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; 1037def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; 1038def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; 1039def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; 1040def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; 1041 1042def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; 1043def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; 1044def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; 1045def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; 1046def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; 1047def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; 1048 1049multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> { 1050 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1051 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1052} 1053 1054multiclass FLAT_Global_Real_Atomics_vi<bits<7> op> : 1055 FLAT_Real_AllAddr_vi<op> { 1056 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1057 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1058} 1059 1060 1061defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>; 1062defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>; 1063defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>; 1064defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>; 1065defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>; 1066defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>; 1067defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>; 1068defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>; 1069defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>; 1070defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>; 1071defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>; 1072defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>; 1073defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>; 1074defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>; 1075defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>; 1076defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>; 1077defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>; 1078defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>; 1079defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>; 1080defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>; 1081defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>; 1082defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>; 1083defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>; 1084defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; 1085defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; 1086defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; 1087 1088defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1089defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1090defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1091defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1092defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1093defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1094defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1095defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1096 1097defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1098defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1099defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1100defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1101defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1102defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1103 1104defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1105defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1106defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1107defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1108defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1109defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1110defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1111defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1112 1113 1114defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; 1115defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; 1116defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>; 1117defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>; 1118defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>; 1119defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>; 1120defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>; 1121defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>; 1122defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>; 1123defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>; 1124defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>; 1125defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>; 1126defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>; 1127defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>; 1128defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>; 1129defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>; 1130defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>; 1131defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>; 1132defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>; 1133defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>; 1134defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>; 1135defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>; 1136defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>; 1137defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; 1138defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; 1139defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; 1140 1141defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1142defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1143defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1144defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1145defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1146defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1147defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1148defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1149defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1150defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1151defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1152defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1153defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1154defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1155defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1156defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1157defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1158defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1159defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1160defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1161defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1162defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1163 1164 1165//===----------------------------------------------------------------------===// 1166// GFX10. 1167//===----------------------------------------------------------------------===// 1168 1169class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> : 1170 FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { 1171 let AssemblerPredicate = isGFX10Plus; 1172 let DecoderNamespace = "GFX10"; 1173 1174 let Inst{11-0} = offset{11-0}; 1175 let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue); 1176 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); 1177 let Inst{55} = 0; 1178} 1179 1180 1181multiclass FLAT_Real_Base_gfx10<bits<7> op> { 1182 def _gfx10 : 1183 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>; 1184} 1185 1186multiclass FLAT_Real_RTN_gfx10<bits<7> op> { 1187 def _RTN_gfx10 : 1188 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1189} 1190 1191multiclass FLAT_Real_SADDR_gfx10<bits<7> op> { 1192 def _SADDR_gfx10 : 1193 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1194} 1195 1196multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> { 1197 def _SADDR_RTN_gfx10 : 1198 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1199} 1200 1201 1202multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> : 1203 FLAT_Real_Base_gfx10<op>, 1204 FLAT_Real_SADDR_gfx10<op>; 1205 1206multiclass FLAT_Real_Atomics_gfx10<bits<7> op> : 1207 FLAT_Real_Base_gfx10<op>, 1208 FLAT_Real_RTN_gfx10<op>; 1209 1210multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> : 1211 FLAT_Real_AllAddr_gfx10<op>, 1212 FLAT_Real_RTN_gfx10<op>, 1213 FLAT_Real_SADDR_RTN_gfx10<op>; 1214 1215 1216// ENC_FLAT. 1217defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; 1218defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; 1219defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; 1220defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; 1221defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; 1222defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; 1223defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; 1224defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; 1225defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; 1226defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; 1227defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; 1228defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; 1229defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; 1230defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; 1231defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; 1232defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; 1233defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; 1234defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; 1235defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; 1236defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; 1237defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; 1238defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; 1239defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; 1240defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; 1241defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; 1242defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; 1243defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; 1244defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; 1245defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; 1246defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; 1247defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; 1248defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; 1249defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; 1250defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; 1251defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; 1252defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; 1253defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; 1254defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; 1255defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; 1256defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; 1257defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; 1258defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; 1259defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; 1260defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; 1261defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; 1262defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; 1263defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; 1264defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; 1265defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; 1266defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; 1267defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; 1268defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; 1269defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>; 1270defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>; 1271 1272 1273// ENC_FLAT_GLBL. 1274defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 1275defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 1276defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 1277defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 1278defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 1279defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 1280defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 1281defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 1282defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 1283defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 1284defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 1285defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 1286defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 1287defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 1288defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 1289defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 1290defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 1291defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 1292defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 1293defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 1294defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 1295defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 1296defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; 1297defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; 1298defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; 1299defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; 1300defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; 1301defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; 1302defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; 1303defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; 1304defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; 1305defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; 1306defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; 1307defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; 1308defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; 1309defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; 1310defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; 1311defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; 1312defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; 1313defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; 1314defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; 1315defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; 1316defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; 1317defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; 1318defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; 1319defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; 1320defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; 1321defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; 1322defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; 1323defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; 1324defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; 1325defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; 1326defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>; 1327defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; 1328 1329 1330// ENC_FLAT_SCRATCH. 1331defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 1332defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 1333defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 1334defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 1335defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 1336defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 1337defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 1338defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 1339defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 1340defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 1341defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 1342defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 1343defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 1344defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 1345defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 1346defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 1347defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 1348defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 1349defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 1350defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 1351defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 1352defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 1353 1354let SubtargetPredicate = HasAtomicFaddInsts in { 1355 1356defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_AllAddr_vi <0x04d>; 1357defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Real_AllAddr_vi <0x04e>; 1358 1359} // End SubtargetPredicate = HasAtomicFaddInsts 1360