1//===-- SIInstructions.td - SI Instruction Defintions ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// This file was originally auto-generated from a GPU register header file and 10// all the instruction definitions were originally commented out. Instructions 11// that are not yet supported remain commented out. 12//===----------------------------------------------------------------------===// 13 14class InterpSlots { 15int P0 = 2; 16int P10 = 0; 17int P20 = 1; 18} 19def INTERP : InterpSlots; 20 21def InterpSlot : Operand<i32> { 22 let PrintMethod = "printInterpSlot"; 23} 24 25def SendMsgImm : Operand<i32> { 26 let PrintMethod = "printSendMsg"; 27} 28 29def isGCN : Predicate<"Subtarget->getGeneration() " 30 ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">, 31 AssemblerPredicate<"FeatureGCN">; 32def isSI : Predicate<"Subtarget->getGeneration() " 33 "== AMDGPUSubtarget::SOUTHERN_ISLANDS">; 34 35def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">; 36 37def SWaitMatchClass : AsmOperandClass { 38 let Name = "SWaitCnt"; 39 let RenderMethod = "addImmOperands"; 40 let ParserMethod = "parseSWaitCntOps"; 41} 42 43def WAIT_FLAG : InstFlag<"printWaitFlag"> { 44 let ParserMatchClass = SWaitMatchClass; 45} 46 47let SubtargetPredicate = isGCN in { 48 49//===----------------------------------------------------------------------===// 50// EXP Instructions 51//===----------------------------------------------------------------------===// 52 53defm EXP : EXP_m; 54 55//===----------------------------------------------------------------------===// 56// SMRD Instructions 57//===----------------------------------------------------------------------===// 58 59let mayLoad = 1 in { 60 61// We are using the SGPR_32 and not the SReg_32 register class for 32-bit 62// SMRD instructions, because the SGPR_32 register class does not include M0 63// and writing to M0 from an SMRD instruction will hang the GPU. 64defm S_LOAD_DWORD : SMRD_Helper <0x00, "s_load_dword", SReg_64, SGPR_32>; 65defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "s_load_dwordx2", SReg_64, SReg_64>; 66defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "s_load_dwordx4", SReg_64, SReg_128>; 67defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "s_load_dwordx8", SReg_64, SReg_256>; 68defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "s_load_dwordx16", SReg_64, SReg_512>; 69 70defm S_BUFFER_LOAD_DWORD : SMRD_Helper < 71 0x08, "s_buffer_load_dword", SReg_128, SGPR_32 72>; 73 74defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper < 75 0x09, "s_buffer_load_dwordx2", SReg_128, SReg_64 76>; 77 78defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper < 79 0x0a, "s_buffer_load_dwordx4", SReg_128, SReg_128 80>; 81 82defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper < 83 0x0b, "s_buffer_load_dwordx8", SReg_128, SReg_256 84>; 85 86defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < 87 0x0c, "s_buffer_load_dwordx16", SReg_128, SReg_512 88>; 89 90} // mayLoad = 1 91 92//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>; 93//def S_DCACHE_INV : SMRD_ <0x0000001f, "s_dcache_inv", []>; 94 95//===----------------------------------------------------------------------===// 96// SOP1 Instructions 97//===----------------------------------------------------------------------===// 98 99let isMoveImm = 1 in { 100 let isReMaterializable = 1 in { 101 defm S_MOV_B32 : SOP1_32 <sop1<0x03, 0x00>, "s_mov_b32", []>; 102 defm S_MOV_B64 : SOP1_64 <sop1<0x04, 0x01>, "s_mov_b64", []>; 103 } // let isRematerializeable = 1 104 105 let Uses = [SCC] in { 106 defm S_CMOV_B32 : SOP1_32 <sop1<0x05, 0x02>, "s_cmov_b32", []>; 107 defm S_CMOV_B64 : SOP1_64 <sop1<0x06, 0x03>, "s_cmov_b64", []>; 108 } // End Uses = [SCC] 109} // End isMoveImm = 1 110 111let Defs = [SCC] in { 112 defm S_NOT_B32 : SOP1_32 <sop1<0x07, 0x04>, "s_not_b32", 113 [(set i32:$dst, (not i32:$src0))] 114 >; 115 116 defm S_NOT_B64 : SOP1_64 <sop1<0x08, 0x05>, "s_not_b64", 117 [(set i64:$dst, (not i64:$src0))] 118 >; 119 defm S_WQM_B32 : SOP1_32 <sop1<0x09, 0x06>, "s_wqm_b32", []>; 120 defm S_WQM_B64 : SOP1_64 <sop1<0x0a, 0x07>, "s_wqm_b64", []>; 121} // End Defs = [SCC] 122 123 124defm S_BREV_B32 : SOP1_32 <sop1<0x0b, 0x08>, "s_brev_b32", 125 [(set i32:$dst, (AMDGPUbrev i32:$src0))] 126>; 127defm S_BREV_B64 : SOP1_64 <sop1<0x0c, 0x09>, "s_brev_b64", []>; 128 129let Defs = [SCC] in { 130 defm S_BCNT0_I32_B32 : SOP1_32 <sop1<0x0d, 0x0a>, "s_bcnt0_i32_b32", []>; 131 defm S_BCNT0_I32_B64 : SOP1_32_64 <sop1<0x0e, 0x0b>, "s_bcnt0_i32_b64", []>; 132 defm S_BCNT1_I32_B32 : SOP1_32 <sop1<0x0f, 0x0c>, "s_bcnt1_i32_b32", 133 [(set i32:$dst, (ctpop i32:$src0))] 134 >; 135 defm S_BCNT1_I32_B64 : SOP1_32_64 <sop1<0x10, 0x0d>, "s_bcnt1_i32_b64", []>; 136} // End Defs = [SCC] 137 138defm S_FF0_I32_B32 : SOP1_32 <sop1<0x11, 0x0e>, "s_ff0_i32_b32", []>; 139defm S_FF0_I32_B64 : SOP1_32_64 <sop1<0x12, 0x0f>, "s_ff0_i32_b64", []>; 140defm S_FF1_I32_B32 : SOP1_32 <sop1<0x13, 0x10>, "s_ff1_i32_b32", 141 [(set i32:$dst, (cttz_zero_undef i32:$src0))] 142>; 143defm S_FF1_I32_B64 : SOP1_32_64 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>; 144 145defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32", 146 [(set i32:$dst, (ctlz_zero_undef i32:$src0))] 147>; 148 149defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>; 150defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32", 151 [(set i32:$dst, (int_AMDGPU_flbit_i32 i32:$src0))] 152>; 153defm S_FLBIT_I32_I64 : SOP1_32_64 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>; 154defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8", 155 [(set i32:$dst, (sext_inreg i32:$src0, i8))] 156>; 157defm S_SEXT_I32_I16 : SOP1_32 <sop1<0x1a, 0x17>, "s_sext_i32_i16", 158 [(set i32:$dst, (sext_inreg i32:$src0, i16))] 159>; 160 161defm S_BITSET0_B32 : SOP1_32 <sop1<0x1b, 0x18>, "s_bitset0_b32", []>; 162defm S_BITSET0_B64 : SOP1_64 <sop1<0x1c, 0x19>, "s_bitset0_b64", []>; 163defm S_BITSET1_B32 : SOP1_32 <sop1<0x1d, 0x1a>, "s_bitset1_b32", []>; 164defm S_BITSET1_B64 : SOP1_64 <sop1<0x1e, 0x1b>, "s_bitset1_b64", []>; 165defm S_GETPC_B64 : SOP1_64_0 <sop1<0x1f, 0x1c>, "s_getpc_b64", []>; 166defm S_SETPC_B64 : SOP1_64 <sop1<0x20, 0x1d>, "s_setpc_b64", []>; 167defm S_SWAPPC_B64 : SOP1_64 <sop1<0x21, 0x1e>, "s_swappc_b64", []>; 168defm S_RFE_B64 : SOP1_64 <sop1<0x22, 0x1f>, "s_rfe_b64", []>; 169 170let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC] in { 171 172defm S_AND_SAVEEXEC_B64 : SOP1_64 <sop1<0x24, 0x20>, "s_and_saveexec_b64", []>; 173defm S_OR_SAVEEXEC_B64 : SOP1_64 <sop1<0x25, 0x21>, "s_or_saveexec_b64", []>; 174defm S_XOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x26, 0x22>, "s_xor_saveexec_b64", []>; 175defm S_ANDN2_SAVEEXEC_B64 : SOP1_64 <sop1<0x27, 0x23>, "s_andn2_saveexec_b64", []>; 176defm S_ORN2_SAVEEXEC_B64 : SOP1_64 <sop1<0x28, 0x24>, "s_orn2_saveexec_b64", []>; 177defm S_NAND_SAVEEXEC_B64 : SOP1_64 <sop1<0x29, 0x25>, "s_nand_saveexec_b64", []>; 178defm S_NOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x2a, 0x26>, "s_nor_saveexec_b64", []>; 179defm S_XNOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x2b, 0x27>, "s_xnor_saveexec_b64", []>; 180 181} // End hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC] 182 183defm S_QUADMASK_B32 : SOP1_32 <sop1<0x2c, 0x28>, "s_quadmask_b32", []>; 184defm S_QUADMASK_B64 : SOP1_64 <sop1<0x2d, 0x29>, "s_quadmask_b64", []>; 185defm S_MOVRELS_B32 : SOP1_32 <sop1<0x2e, 0x2a>, "s_movrels_b32", []>; 186defm S_MOVRELS_B64 : SOP1_64 <sop1<0x2f, 0x2b>, "s_movrels_b64", []>; 187defm S_MOVRELD_B32 : SOP1_32 <sop1<0x30, 0x2c>, "s_movreld_b32", []>; 188defm S_MOVRELD_B64 : SOP1_64 <sop1<0x31, 0x2d>, "s_movreld_b64", []>; 189defm S_CBRANCH_JOIN : SOP1_1 <sop1<0x32, 0x2e>, "s_cbranch_join", []>; 190defm S_MOV_REGRD_B32 : SOP1_32 <sop1<0x33, 0x2f>, "s_mov_regrd_b32", []>; 191let Defs = [SCC] in { 192 defm S_ABS_I32 : SOP1_32 <sop1<0x34, 0x30>, "s_abs_i32", []>; 193} // End Defs = [SCC] 194defm S_MOV_FED_B32 : SOP1_32 <sop1<0x35, 0x31>, "s_mov_fed_b32", []>; 195 196//===----------------------------------------------------------------------===// 197// SOP2 Instructions 198//===----------------------------------------------------------------------===// 199 200let Defs = [SCC] in { // Carry out goes to SCC 201let isCommutable = 1 in { 202defm S_ADD_U32 : SOP2_32 <sop2<0x00>, "s_add_u32", []>; 203defm S_ADD_I32 : SOP2_32 <sop2<0x02>, "s_add_i32", 204 [(set i32:$dst, (add SSrc_32:$src0, SSrc_32:$src1))] 205>; 206} // End isCommutable = 1 207 208defm S_SUB_U32 : SOP2_32 <sop2<0x01>, "s_sub_u32", []>; 209defm S_SUB_I32 : SOP2_32 <sop2<0x03>, "s_sub_i32", 210 [(set i32:$dst, (sub SSrc_32:$src0, SSrc_32:$src1))] 211>; 212 213let Uses = [SCC] in { // Carry in comes from SCC 214let isCommutable = 1 in { 215defm S_ADDC_U32 : SOP2_32 <sop2<0x04>, "s_addc_u32", 216 [(set i32:$dst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>; 217} // End isCommutable = 1 218 219defm S_SUBB_U32 : SOP2_32 <sop2<0x05>, "s_subb_u32", 220 [(set i32:$dst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>; 221} // End Uses = [SCC] 222 223defm S_MIN_I32 : SOP2_32 <sop2<0x06>, "s_min_i32", 224 [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))] 225>; 226defm S_MIN_U32 : SOP2_32 <sop2<0x07>, "s_min_u32", 227 [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))] 228>; 229defm S_MAX_I32 : SOP2_32 <sop2<0x08>, "s_max_i32", 230 [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))] 231>; 232defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32", 233 [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))] 234>; 235} // End Defs = [SCC] 236 237 238let Uses = [SCC] in { 239 defm S_CSELECT_B32 : SOP2_32 <sop2<0x0a>, "s_cselect_b32", []>; 240 defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>; 241} // End Uses = [SCC] 242 243let Defs = [SCC] in { 244defm S_AND_B32 : SOP2_32 <sop2<0x0e, 0x0c>, "s_and_b32", 245 [(set i32:$dst, (and i32:$src0, i32:$src1))] 246>; 247 248defm S_AND_B64 : SOP2_64 <sop2<0x0f, 0x0d>, "s_and_b64", 249 [(set i64:$dst, (and i64:$src0, i64:$src1))] 250>; 251 252defm S_OR_B32 : SOP2_32 <sop2<0x10, 0x0e>, "s_or_b32", 253 [(set i32:$dst, (or i32:$src0, i32:$src1))] 254>; 255 256defm S_OR_B64 : SOP2_64 <sop2<0x11, 0x0f>, "s_or_b64", 257 [(set i64:$dst, (or i64:$src0, i64:$src1))] 258>; 259 260defm S_XOR_B32 : SOP2_32 <sop2<0x12, 0x10>, "s_xor_b32", 261 [(set i32:$dst, (xor i32:$src0, i32:$src1))] 262>; 263 264defm S_XOR_B64 : SOP2_64 <sop2<0x13, 0x11>, "s_xor_b64", 265 [(set i64:$dst, (xor i64:$src0, i64:$src1))] 266>; 267defm S_ANDN2_B32 : SOP2_32 <sop2<0x14, 0x12>, "s_andn2_b32", []>; 268defm S_ANDN2_B64 : SOP2_64 <sop2<0x15, 0x13>, "s_andn2_b64", []>; 269defm S_ORN2_B32 : SOP2_32 <sop2<0x16, 0x14>, "s_orn2_b32", []>; 270defm S_ORN2_B64 : SOP2_64 <sop2<0x17, 0x15>, "s_orn2_b64", []>; 271defm S_NAND_B32 : SOP2_32 <sop2<0x18, 0x16>, "s_nand_b32", []>; 272defm S_NAND_B64 : SOP2_64 <sop2<0x19, 0x17>, "s_nand_b64", []>; 273defm S_NOR_B32 : SOP2_32 <sop2<0x1a, 0x18>, "s_nor_b32", []>; 274defm S_NOR_B64 : SOP2_64 <sop2<0x1b, 0x19>, "s_nor_b64", []>; 275defm S_XNOR_B32 : SOP2_32 <sop2<0x1c, 0x1a>, "s_xnor_b32", []>; 276defm S_XNOR_B64 : SOP2_64 <sop2<0x1d, 0x1b>, "s_xnor_b64", []>; 277} // End Defs = [SCC] 278 279// Use added complexity so these patterns are preferred to the VALU patterns. 280let AddedComplexity = 1 in { 281let Defs = [SCC] in { 282 283defm S_LSHL_B32 : SOP2_32 <sop2<0x1e, 0x1c>, "s_lshl_b32", 284 [(set i32:$dst, (shl i32:$src0, i32:$src1))] 285>; 286defm S_LSHL_B64 : SOP2_64_32 <sop2<0x1f, 0x1d>, "s_lshl_b64", 287 [(set i64:$dst, (shl i64:$src0, i32:$src1))] 288>; 289defm S_LSHR_B32 : SOP2_32 <sop2<0x20, 0x1e>, "s_lshr_b32", 290 [(set i32:$dst, (srl i32:$src0, i32:$src1))] 291>; 292defm S_LSHR_B64 : SOP2_64_32 <sop2<0x21, 0x1f>, "s_lshr_b64", 293 [(set i64:$dst, (srl i64:$src0, i32:$src1))] 294>; 295defm S_ASHR_I32 : SOP2_32 <sop2<0x22, 0x20>, "s_ashr_i32", 296 [(set i32:$dst, (sra i32:$src0, i32:$src1))] 297>; 298defm S_ASHR_I64 : SOP2_64_32 <sop2<0x23, 0x21>, "s_ashr_i64", 299 [(set i64:$dst, (sra i64:$src0, i32:$src1))] 300>; 301} // End Defs = [SCC] 302 303defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32", 304 [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>; 305defm S_BFM_B64 : SOP2_64 <sop2<0x25, 0x23>, "s_bfm_b64", []>; 306defm S_MUL_I32 : SOP2_32 <sop2<0x26, 0x24>, "s_mul_i32", 307 [(set i32:$dst, (mul i32:$src0, i32:$src1))] 308>; 309 310} // End AddedComplexity = 1 311 312let Defs = [SCC] in { 313defm S_BFE_U32 : SOP2_32 <sop2<0x27, 0x25>, "s_bfe_u32", []>; 314defm S_BFE_I32 : SOP2_32 <sop2<0x28, 0x26>, "s_bfe_i32", []>; 315defm S_BFE_U64 : SOP2_64 <sop2<0x29, 0x27>, "s_bfe_u64", []>; 316defm S_BFE_I64 : SOP2_64_32 <sop2<0x2a, 0x28>, "s_bfe_i64", []>; 317} // End Defs = [SCC] 318 319let sdst = 0 in { 320defm S_CBRANCH_G_FORK : SOP2_m < 321 sop2<0x2b, 0x29>, "s_cbranch_g_fork", (outs), 322 (ins SReg_64:$src0, SReg_64:$src1), "s_cbranch_g_fork $src0, $src1", [] 323>; 324} 325 326let Defs = [SCC] in { 327defm S_ABSDIFF_I32 : SOP2_32 <sop2<0x2c, 0x2a>, "s_absdiff_i32", []>; 328} // End Defs = [SCC] 329 330//===----------------------------------------------------------------------===// 331// SOPC Instructions 332//===----------------------------------------------------------------------===// 333 334def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "s_cmp_eq_i32">; 335def S_CMP_LG_I32 : SOPC_32 <0x00000001, "s_cmp_lg_i32">; 336def S_CMP_GT_I32 : SOPC_32 <0x00000002, "s_cmp_gt_i32">; 337def S_CMP_GE_I32 : SOPC_32 <0x00000003, "s_cmp_ge_i32">; 338def S_CMP_LT_I32 : SOPC_32 <0x00000004, "s_cmp_lt_i32">; 339def S_CMP_LE_I32 : SOPC_32 <0x00000005, "s_cmp_le_i32">; 340def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "s_cmp_eq_u32">; 341def S_CMP_LG_U32 : SOPC_32 <0x00000007, "s_cmp_lg_u32">; 342def S_CMP_GT_U32 : SOPC_32 <0x00000008, "s_cmp_gt_u32">; 343def S_CMP_GE_U32 : SOPC_32 <0x00000009, "s_cmp_ge_u32">; 344def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "s_cmp_lt_u32">; 345def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "s_cmp_le_u32">; 346////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "s_bitcmp0_b32", []>; 347////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "s_bitcmp1_b32", []>; 348////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "s_bitcmp0_b64", []>; 349////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "s_bitcmp1_b64", []>; 350//def S_SETVSKIP : SOPC_ <0x00000010, "s_setvskip", []>; 351 352//===----------------------------------------------------------------------===// 353// SOPK Instructions 354//===----------------------------------------------------------------------===// 355 356let isReMaterializable = 1 in { 357defm S_MOVK_I32 : SOPK_32 <sopk<0x00>, "s_movk_i32", []>; 358} // End isReMaterializable = 1 359let Uses = [SCC] in { 360 defm S_CMOVK_I32 : SOPK_32 <sopk<0x02, 0x01>, "s_cmovk_i32", []>; 361} 362 363let isCompare = 1 in { 364 365/* 366This instruction is disabled for now until we can figure out how to teach 367the instruction selector to correctly use the S_CMP* vs V_CMP* 368instructions. 369 370When this instruction is enabled the code generator sometimes produces this 371invalid sequence: 372 373SCC = S_CMPK_EQ_I32 SGPR0, imm 374VCC = COPY SCC 375VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1 376 377defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", 378 [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))] 379>; 380*/ 381 382defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", []>; 383defm S_CMPK_LG_I32 : SOPK_SCC <sopk<0x04, 0x03>, "s_cmpk_lg_i32", []>; 384defm S_CMPK_GT_I32 : SOPK_SCC <sopk<0x05, 0x04>, "s_cmpk_gt_i32", []>; 385defm S_CMPK_GE_I32 : SOPK_SCC <sopk<0x06, 0x05>, "s_cmpk_ge_i32", []>; 386defm S_CMPK_LT_I32 : SOPK_SCC <sopk<0x07, 0x06>, "s_cmpk_lt_i32", []>; 387defm S_CMPK_LE_I32 : SOPK_SCC <sopk<0x08, 0x07>, "s_cmpk_le_i32", []>; 388defm S_CMPK_EQ_U32 : SOPK_SCC <sopk<0x09, 0x08>, "s_cmpk_eq_u32", []>; 389defm S_CMPK_LG_U32 : SOPK_SCC <sopk<0x0a, 0x09>, "s_cmpk_lg_u32", []>; 390defm S_CMPK_GT_U32 : SOPK_SCC <sopk<0x0b, 0x0a>, "s_cmpk_gt_u32", []>; 391defm S_CMPK_GE_U32 : SOPK_SCC <sopk<0x0c, 0x0b>, "s_cmpk_ge_u32", []>; 392defm S_CMPK_LT_U32 : SOPK_SCC <sopk<0x0d, 0x0c>, "s_cmpk_lt_u32", []>; 393defm S_CMPK_LE_U32 : SOPK_SCC <sopk<0x0e, 0x0d>, "s_cmpk_le_u32", []>; 394} // End isCompare = 1 395 396let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0", 397 Constraints = "$sdst = $src0" in { 398 defm S_ADDK_I32 : SOPK_32TIE <sopk<0x0f, 0x0e>, "s_addk_i32", []>; 399 defm S_MULK_I32 : SOPK_32TIE <sopk<0x10, 0x0f>, "s_mulk_i32", []>; 400} 401 402defm S_CBRANCH_I_FORK : SOPK_m < 403 sopk<0x11, 0x10>, "s_cbranch_i_fork", (outs), 404 (ins SReg_64:$sdst, u16imm:$simm16), " $sdst, $simm16" 405>; 406defm S_GETREG_B32 : SOPK_32 <sopk<0x12, 0x11>, "s_getreg_b32", []>; 407defm S_SETREG_B32 : SOPK_m < 408 sopk<0x13, 0x12>, "s_setreg_b32", (outs), 409 (ins SReg_32:$sdst, u16imm:$simm16), " $sdst, $simm16" 410>; 411// FIXME: Not on SI? 412//defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>; 413defm S_SETREG_IMM32_B32 : SOPK_IMM32 < 414 sopk<0x15, 0x14>, "s_setreg_imm32_b32", (outs), 415 (ins i32imm:$imm, u16imm:$simm16), " $imm, $simm16" 416>; 417 418//===----------------------------------------------------------------------===// 419// SOPP Instructions 420//===----------------------------------------------------------------------===// 421 422def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">; 423 424let isTerminator = 1 in { 425 426def S_ENDPGM : SOPP <0x00000001, (ins), "s_endpgm", 427 [(IL_retflag)]> { 428 let simm16 = 0; 429 let isBarrier = 1; 430 let hasCtrlDep = 1; 431} 432 433let isBranch = 1 in { 434def S_BRANCH : SOPP < 435 0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16", 436 [(br bb:$simm16)]> { 437 let isBarrier = 1; 438} 439 440let DisableEncoding = "$scc" in { 441def S_CBRANCH_SCC0 : SOPP < 442 0x00000004, (ins sopp_brtarget:$simm16, SCCReg:$scc), 443 "s_cbranch_scc0 $simm16" 444>; 445def S_CBRANCH_SCC1 : SOPP < 446 0x00000005, (ins sopp_brtarget:$simm16, SCCReg:$scc), 447 "s_cbranch_scc1 $simm16" 448>; 449} // End DisableEncoding = "$scc" 450 451def S_CBRANCH_VCCZ : SOPP < 452 0x00000006, (ins sopp_brtarget:$simm16, VCCReg:$vcc), 453 "s_cbranch_vccz $simm16" 454>; 455def S_CBRANCH_VCCNZ : SOPP < 456 0x00000007, (ins sopp_brtarget:$simm16, VCCReg:$vcc), 457 "s_cbranch_vccnz $simm16" 458>; 459 460let DisableEncoding = "$exec" in { 461def S_CBRANCH_EXECZ : SOPP < 462 0x00000008, (ins sopp_brtarget:$simm16, EXECReg:$exec), 463 "s_cbranch_execz $simm16" 464>; 465def S_CBRANCH_EXECNZ : SOPP < 466 0x00000009, (ins sopp_brtarget:$simm16, EXECReg:$exec), 467 "s_cbranch_execnz $simm16" 468>; 469} // End DisableEncoding = "$exec" 470 471 472} // End isBranch = 1 473} // End isTerminator = 1 474 475let hasSideEffects = 1 in { 476def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier", 477 [(int_AMDGPU_barrier_local)] 478> { 479 let simm16 = 0; 480 let isBarrier = 1; 481 let hasCtrlDep = 1; 482 let mayLoad = 1; 483 let mayStore = 1; 484} 485 486def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">; 487def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">; 488def S_SLEEP : SOPP <0x0000000e, (ins i16imm:$simm16), "s_sleep $simm16">; 489def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$sim16), "s_setprio $sim16">; 490 491let Uses = [EXEC] in { 492 def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "s_sendmsg $simm16", 493 [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)] 494 > { 495 let DisableEncoding = "$m0"; 496 } 497} // End Uses = [EXEC] 498 499def S_SENDMSGHALT : SOPP <0x00000011, (ins i16imm:$simm16), "s_sendmsghalt $simm16">; 500def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">; 501def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> { 502 let simm16 = 0; 503} 504def S_INCPERFLEVEL : SOPP <0x00000014, (ins i16imm:$simm16), "s_incperflevel $simm16">; 505def S_DECPERFLEVEL : SOPP <0x00000015, (ins i16imm:$simm16), "s_decperflevel $simm16">; 506def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> { 507 let simm16 = 0; 508} 509} // End hasSideEffects 510 511//===----------------------------------------------------------------------===// 512// VOPC Instructions 513//===----------------------------------------------------------------------===// 514 515let isCompare = 1, isCommutable = 1 in { 516 517defm V_CMP_F_F32 : VOPC_F32 <vopc<0x0, 0x40>, "v_cmp_f_f32">; 518defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT, "v_cmp_gt_f32">; 519defm V_CMP_EQ_F32 : VOPC_F32 <vopc<0x2, 0x42>, "v_cmp_eq_f32", COND_OEQ>; 520defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE, "v_cmp_ge_f32">; 521defm V_CMP_GT_F32 : VOPC_F32 <vopc<0x4, 0x44>, "v_cmp_gt_f32", COND_OGT>; 522defm V_CMP_LG_F32 : VOPC_F32 <vopc<0x5, 0x45>, "v_cmp_lg_f32", COND_ONE>; 523defm V_CMP_GE_F32 : VOPC_F32 <vopc<0x6, 0x46>, "v_cmp_ge_f32", COND_OGE>; 524defm V_CMP_O_F32 : VOPC_F32 <vopc<0x7, 0x47>, "v_cmp_o_f32", COND_O>; 525defm V_CMP_U_F32 : VOPC_F32 <vopc<0x8, 0x48>, "v_cmp_u_f32", COND_UO>; 526defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32", COND_ULT, "v_cmp_nle_f32">; 527defm V_CMP_NLG_F32 : VOPC_F32 <vopc<0xa, 0x4a>, "v_cmp_nlg_f32", COND_UEQ>; 528defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE, "v_cmp_nlt_f32">; 529defm V_CMP_NLE_F32 : VOPC_F32 <vopc<0xc, 0x4c>, "v_cmp_nle_f32", COND_UGT>; 530defm V_CMP_NEQ_F32 : VOPC_F32 <vopc<0xd, 0x4d>, "v_cmp_neq_f32", COND_UNE>; 531defm V_CMP_NLT_F32 : VOPC_F32 <vopc<0xe, 0x4e>, "v_cmp_nlt_f32", COND_UGE>; 532defm V_CMP_TRU_F32 : VOPC_F32 <vopc<0xf, 0x4f>, "v_cmp_tru_f32">; 533 534 535defm V_CMPX_F_F32 : VOPCX_F32 <vopc<0x10, 0x50>, "v_cmpx_f_f32">; 536defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32", "v_cmpx_gt_f32">; 537defm V_CMPX_EQ_F32 : VOPCX_F32 <vopc<0x12, 0x52>, "v_cmpx_eq_f32">; 538defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32", "v_cmpx_ge_f32">; 539defm V_CMPX_GT_F32 : VOPCX_F32 <vopc<0x14, 0x54>, "v_cmpx_gt_f32">; 540defm V_CMPX_LG_F32 : VOPCX_F32 <vopc<0x15, 0x55>, "v_cmpx_lg_f32">; 541defm V_CMPX_GE_F32 : VOPCX_F32 <vopc<0x16, 0x56>, "v_cmpx_ge_f32">; 542defm V_CMPX_O_F32 : VOPCX_F32 <vopc<0x17, 0x57>, "v_cmpx_o_f32">; 543defm V_CMPX_U_F32 : VOPCX_F32 <vopc<0x18, 0x58>, "v_cmpx_u_f32">; 544defm V_CMPX_NGE_F32 : VOPCX_F32 <vopc<0x19, 0x59>, "v_cmpx_nge_f32">; 545defm V_CMPX_NLG_F32 : VOPCX_F32 <vopc<0x1a, 0x5a>, "v_cmpx_nlg_f32">; 546defm V_CMPX_NGT_F32 : VOPCX_F32 <vopc<0x1b, 0x5b>, "v_cmpx_ngt_f32">; 547defm V_CMPX_NLE_F32 : VOPCX_F32 <vopc<0x1c, 0x5c>, "v_cmpx_nle_f32">; 548defm V_CMPX_NEQ_F32 : VOPCX_F32 <vopc<0x1d, 0x5d>, "v_cmpx_neq_f32">; 549defm V_CMPX_NLT_F32 : VOPCX_F32 <vopc<0x1e, 0x5e>, "v_cmpx_nlt_f32">; 550defm V_CMPX_TRU_F32 : VOPCX_F32 <vopc<0x1f, 0x5f>, "v_cmpx_tru_f32">; 551 552 553defm V_CMP_F_F64 : VOPC_F64 <vopc<0x20, 0x60>, "v_cmp_f_f64">; 554defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT, "v_cmp_gt_f64">; 555defm V_CMP_EQ_F64 : VOPC_F64 <vopc<0x22, 0x62>, "v_cmp_eq_f64", COND_OEQ>; 556defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE, "v_cmp_ge_f64">; 557defm V_CMP_GT_F64 : VOPC_F64 <vopc<0x24, 0x64>, "v_cmp_gt_f64", COND_OGT>; 558defm V_CMP_LG_F64 : VOPC_F64 <vopc<0x25, 0x65>, "v_cmp_lg_f64", COND_ONE>; 559defm V_CMP_GE_F64 : VOPC_F64 <vopc<0x26, 0x66>, "v_cmp_ge_f64", COND_OGE>; 560defm V_CMP_O_F64 : VOPC_F64 <vopc<0x27, 0x67>, "v_cmp_o_f64", COND_O>; 561defm V_CMP_U_F64 : VOPC_F64 <vopc<0x28, 0x68>, "v_cmp_u_f64", COND_UO>; 562defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT, "v_cmp_nle_f64">; 563defm V_CMP_NLG_F64 : VOPC_F64 <vopc<0x2a, 0x6a>, "v_cmp_nlg_f64", COND_UEQ>; 564defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE, "v_cmp_nlt_f64">; 565defm V_CMP_NLE_F64 : VOPC_F64 <vopc<0x2c, 0x6c>, "v_cmp_nle_f64", COND_UGT>; 566defm V_CMP_NEQ_F64 : VOPC_F64 <vopc<0x2d, 0x6d>, "v_cmp_neq_f64", COND_UNE>; 567defm V_CMP_NLT_F64 : VOPC_F64 <vopc<0x2e, 0x6e>, "v_cmp_nlt_f64", COND_UGE>; 568defm V_CMP_TRU_F64 : VOPC_F64 <vopc<0x2f, 0x6f>, "v_cmp_tru_f64">; 569 570 571defm V_CMPX_F_F64 : VOPCX_F64 <vopc<0x30, 0x70>, "v_cmpx_f_f64">; 572defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64", "v_cmpx_gt_f64">; 573defm V_CMPX_EQ_F64 : VOPCX_F64 <vopc<0x32, 0x72>, "v_cmpx_eq_f64">; 574defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64", "v_cmpx_ge_f64">; 575defm V_CMPX_GT_F64 : VOPCX_F64 <vopc<0x34, 0x74>, "v_cmpx_gt_f64">; 576defm V_CMPX_LG_F64 : VOPCX_F64 <vopc<0x35, 0x75>, "v_cmpx_lg_f64">; 577defm V_CMPX_GE_F64 : VOPCX_F64 <vopc<0x36, 0x76>, "v_cmpx_ge_f64">; 578defm V_CMPX_O_F64 : VOPCX_F64 <vopc<0x37, 0x77>, "v_cmpx_o_f64">; 579defm V_CMPX_U_F64 : VOPCX_F64 <vopc<0x38, 0x78>, "v_cmpx_u_f64">; 580defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64", "v_cmpx_nle_f64">; 581defm V_CMPX_NLG_F64 : VOPCX_F64 <vopc<0x3a, 0x7a>, "v_cmpx_nlg_f64">; 582defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64", "v_cmpx_nlt_f64">; 583defm V_CMPX_NLE_F64 : VOPCX_F64 <vopc<0x3c, 0x7c>, "v_cmpx_nle_f64">; 584defm V_CMPX_NEQ_F64 : VOPCX_F64 <vopc<0x3d, 0x7d>, "v_cmpx_neq_f64">; 585defm V_CMPX_NLT_F64 : VOPCX_F64 <vopc<0x3e, 0x7e>, "v_cmpx_nlt_f64">; 586defm V_CMPX_TRU_F64 : VOPCX_F64 <vopc<0x3f, 0x7f>, "v_cmpx_tru_f64">; 587 588 589let SubtargetPredicate = isSICI in { 590 591defm V_CMPS_F_F32 : VOPC_F32 <vopc<0x40>, "v_cmps_f_f32">; 592defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32", COND_NULL, "v_cmps_gt_f32">; 593defm V_CMPS_EQ_F32 : VOPC_F32 <vopc<0x42>, "v_cmps_eq_f32">; 594defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32", COND_NULL, "v_cmps_ge_f32">; 595defm V_CMPS_GT_F32 : VOPC_F32 <vopc<0x44>, "v_cmps_gt_f32">; 596defm V_CMPS_LG_F32 : VOPC_F32 <vopc<0x45>, "v_cmps_lg_f32">; 597defm V_CMPS_GE_F32 : VOPC_F32 <vopc<0x46>, "v_cmps_ge_f32">; 598defm V_CMPS_O_F32 : VOPC_F32 <vopc<0x47>, "v_cmps_o_f32">; 599defm V_CMPS_U_F32 : VOPC_F32 <vopc<0x48>, "v_cmps_u_f32">; 600defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32", COND_NULL, "v_cmps_nle_f32">; 601defm V_CMPS_NLG_F32 : VOPC_F32 <vopc<0x4a>, "v_cmps_nlg_f32">; 602defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32", COND_NULL, "v_cmps_nlt_f32">; 603defm V_CMPS_NLE_F32 : VOPC_F32 <vopc<0x4c>, "v_cmps_nle_f32">; 604defm V_CMPS_NEQ_F32 : VOPC_F32 <vopc<0x4d>, "v_cmps_neq_f32">; 605defm V_CMPS_NLT_F32 : VOPC_F32 <vopc<0x4e>, "v_cmps_nlt_f32">; 606defm V_CMPS_TRU_F32 : VOPC_F32 <vopc<0x4f>, "v_cmps_tru_f32">; 607 608 609defm V_CMPSX_F_F32 : VOPCX_F32 <vopc<0x50>, "v_cmpsx_f_f32">; 610defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32", "v_cmpsx_gt_f32">; 611defm V_CMPSX_EQ_F32 : VOPCX_F32 <vopc<0x52>, "v_cmpsx_eq_f32">; 612defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32", "v_cmpsx_ge_f32">; 613defm V_CMPSX_GT_F32 : VOPCX_F32 <vopc<0x54>, "v_cmpsx_gt_f32">; 614defm V_CMPSX_LG_F32 : VOPCX_F32 <vopc<0x55>, "v_cmpsx_lg_f32">; 615defm V_CMPSX_GE_F32 : VOPCX_F32 <vopc<0x56>, "v_cmpsx_ge_f32">; 616defm V_CMPSX_O_F32 : VOPCX_F32 <vopc<0x57>, "v_cmpsx_o_f32">; 617defm V_CMPSX_U_F32 : VOPCX_F32 <vopc<0x58>, "v_cmpsx_u_f32">; 618defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32", "v_cmpsx_nle_f32">; 619defm V_CMPSX_NLG_F32 : VOPCX_F32 <vopc<0x5a>, "v_cmpsx_nlg_f32">; 620defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32", "v_cmpsx_nlt_f32">; 621defm V_CMPSX_NLE_F32 : VOPCX_F32 <vopc<0x5c>, "v_cmpsx_nle_f32">; 622defm V_CMPSX_NEQ_F32 : VOPCX_F32 <vopc<0x5d>, "v_cmpsx_neq_f32">; 623defm V_CMPSX_NLT_F32 : VOPCX_F32 <vopc<0x5e>, "v_cmpsx_nlt_f32">; 624defm V_CMPSX_TRU_F32 : VOPCX_F32 <vopc<0x5f>, "v_cmpsx_tru_f32">; 625 626 627defm V_CMPS_F_F64 : VOPC_F64 <vopc<0x60>, "v_cmps_f_f64">; 628defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64", COND_NULL, "v_cmps_gt_f64">; 629defm V_CMPS_EQ_F64 : VOPC_F64 <vopc<0x62>, "v_cmps_eq_f64">; 630defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64", COND_NULL, "v_cmps_ge_f64">; 631defm V_CMPS_GT_F64 : VOPC_F64 <vopc<0x64>, "v_cmps_gt_f64">; 632defm V_CMPS_LG_F64 : VOPC_F64 <vopc<0x65>, "v_cmps_lg_f64">; 633defm V_CMPS_GE_F64 : VOPC_F64 <vopc<0x66>, "v_cmps_ge_f64">; 634defm V_CMPS_O_F64 : VOPC_F64 <vopc<0x67>, "v_cmps_o_f64">; 635defm V_CMPS_U_F64 : VOPC_F64 <vopc<0x68>, "v_cmps_u_f64">; 636defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64", COND_NULL, "v_cmps_nle_f64">; 637defm V_CMPS_NLG_F64 : VOPC_F64 <vopc<0x6a>, "v_cmps_nlg_f64">; 638defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64", COND_NULL, "v_cmps_nlt_f64">; 639defm V_CMPS_NLE_F64 : VOPC_F64 <vopc<0x6c>, "v_cmps_nle_f64">; 640defm V_CMPS_NEQ_F64 : VOPC_F64 <vopc<0x6d>, "v_cmps_neq_f64">; 641defm V_CMPS_NLT_F64 : VOPC_F64 <vopc<0x6e>, "v_cmps_nlt_f64">; 642defm V_CMPS_TRU_F64 : VOPC_F64 <vopc<0x6f>, "v_cmps_tru_f64">; 643 644 645defm V_CMPSX_F_F64 : VOPCX_F64 <vopc<0x70>, "v_cmpsx_f_f64">; 646defm V_CMPSX_LT_F64 : VOPCX_F64 <vopc<0x71>, "v_cmpsx_lt_f64", "v_cmpsx_gt_f64">; 647defm V_CMPSX_EQ_F64 : VOPCX_F64 <vopc<0x72>, "v_cmpsx_eq_f64">; 648defm V_CMPSX_LE_F64 : VOPCX_F64 <vopc<0x73>, "v_cmpsx_le_f64", "v_cmpsx_ge_f64">; 649defm V_CMPSX_GT_F64 : VOPCX_F64 <vopc<0x74>, "v_cmpsx_gt_f64">; 650defm V_CMPSX_LG_F64 : VOPCX_F64 <vopc<0x75>, "v_cmpsx_lg_f64">; 651defm V_CMPSX_GE_F64 : VOPCX_F64 <vopc<0x76>, "v_cmpsx_ge_f64">; 652defm V_CMPSX_O_F64 : VOPCX_F64 <vopc<0x77>, "v_cmpsx_o_f64">; 653defm V_CMPSX_U_F64 : VOPCX_F64 <vopc<0x78>, "v_cmpsx_u_f64">; 654defm V_CMPSX_NGE_F64 : VOPCX_F64 <vopc<0x79>, "v_cmpsx_nge_f64", "v_cmpsx_nle_f64">; 655defm V_CMPSX_NLG_F64 : VOPCX_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">; 656defm V_CMPSX_NGT_F64 : VOPCX_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64", "v_cmpsx_nlt_f64">; 657defm V_CMPSX_NLE_F64 : VOPCX_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">; 658defm V_CMPSX_NEQ_F64 : VOPCX_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">; 659defm V_CMPSX_NLT_F64 : VOPCX_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">; 660defm V_CMPSX_TRU_F64 : VOPCX_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">; 661 662} // End SubtargetPredicate = isSICI 663 664defm V_CMP_F_I32 : VOPC_I32 <vopc<0x80, 0xc0>, "v_cmp_f_i32">; 665defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">; 666defm V_CMP_EQ_I32 : VOPC_I32 <vopc<0x82, 0xc2>, "v_cmp_eq_i32", COND_EQ>; 667defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">; 668defm V_CMP_GT_I32 : VOPC_I32 <vopc<0x84, 0xc4>, "v_cmp_gt_i32", COND_SGT>; 669defm V_CMP_NE_I32 : VOPC_I32 <vopc<0x85, 0xc5>, "v_cmp_ne_i32", COND_NE>; 670defm V_CMP_GE_I32 : VOPC_I32 <vopc<0x86, 0xc6>, "v_cmp_ge_i32", COND_SGE>; 671defm V_CMP_T_I32 : VOPC_I32 <vopc<0x87, 0xc7>, "v_cmp_t_i32">; 672 673 674defm V_CMPX_F_I32 : VOPCX_I32 <vopc<0x90, 0xd0>, "v_cmpx_f_i32">; 675defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32", "v_cmpx_gt_i32">; 676defm V_CMPX_EQ_I32 : VOPCX_I32 <vopc<0x92, 0xd2>, "v_cmpx_eq_i32">; 677defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32", "v_cmpx_ge_i32">; 678defm V_CMPX_GT_I32 : VOPCX_I32 <vopc<0x94, 0xd4>, "v_cmpx_gt_i32">; 679defm V_CMPX_NE_I32 : VOPCX_I32 <vopc<0x95, 0xd5>, "v_cmpx_ne_i32">; 680defm V_CMPX_GE_I32 : VOPCX_I32 <vopc<0x96, 0xd6>, "v_cmpx_ge_i32">; 681defm V_CMPX_T_I32 : VOPCX_I32 <vopc<0x97, 0xd7>, "v_cmpx_t_i32">; 682 683 684defm V_CMP_F_I64 : VOPC_I64 <vopc<0xa0, 0xe0>, "v_cmp_f_i64">; 685defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">; 686defm V_CMP_EQ_I64 : VOPC_I64 <vopc<0xa2, 0xe2>, "v_cmp_eq_i64", COND_EQ>; 687defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">; 688defm V_CMP_GT_I64 : VOPC_I64 <vopc<0xa4, 0xe4>, "v_cmp_gt_i64", COND_SGT>; 689defm V_CMP_NE_I64 : VOPC_I64 <vopc<0xa5, 0xe5>, "v_cmp_ne_i64", COND_NE>; 690defm V_CMP_GE_I64 : VOPC_I64 <vopc<0xa6, 0xe6>, "v_cmp_ge_i64", COND_SGE>; 691defm V_CMP_T_I64 : VOPC_I64 <vopc<0xa7, 0xe7>, "v_cmp_t_i64">; 692 693 694defm V_CMPX_F_I64 : VOPCX_I64 <vopc<0xb0, 0xf0>, "v_cmpx_f_i64">; 695defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64", "v_cmpx_gt_i64">; 696defm V_CMPX_EQ_I64 : VOPCX_I64 <vopc<0xb2, 0xf2>, "v_cmpx_eq_i64">; 697defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64", "v_cmpx_ge_i64">; 698defm V_CMPX_GT_I64 : VOPCX_I64 <vopc<0xb4, 0xf4>, "v_cmpx_gt_i64">; 699defm V_CMPX_NE_I64 : VOPCX_I64 <vopc<0xb5, 0xf5>, "v_cmpx_ne_i64">; 700defm V_CMPX_GE_I64 : VOPCX_I64 <vopc<0xb6, 0xf6>, "v_cmpx_ge_i64">; 701defm V_CMPX_T_I64 : VOPCX_I64 <vopc<0xb7, 0xf7>, "v_cmpx_t_i64">; 702 703 704defm V_CMP_F_U32 : VOPC_I32 <vopc<0xc0, 0xc8>, "v_cmp_f_u32">; 705defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT, "v_cmp_gt_u32">; 706defm V_CMP_EQ_U32 : VOPC_I32 <vopc<0xc2, 0xca>, "v_cmp_eq_u32", COND_EQ>; 707defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE, "v_cmp_ge_u32">; 708defm V_CMP_GT_U32 : VOPC_I32 <vopc<0xc4, 0xcc>, "v_cmp_gt_u32", COND_UGT>; 709defm V_CMP_NE_U32 : VOPC_I32 <vopc<0xc5, 0xcd>, "v_cmp_ne_u32", COND_NE>; 710defm V_CMP_GE_U32 : VOPC_I32 <vopc<0xc6, 0xce>, "v_cmp_ge_u32", COND_UGE>; 711defm V_CMP_T_U32 : VOPC_I32 <vopc<0xc7, 0xcf>, "v_cmp_t_u32">; 712 713 714defm V_CMPX_F_U32 : VOPCX_I32 <vopc<0xd0, 0xd8>, "v_cmpx_f_u32">; 715defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32", "v_cmpx_gt_u32">; 716defm V_CMPX_EQ_U32 : VOPCX_I32 <vopc<0xd2, 0xda>, "v_cmpx_eq_u32">; 717defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32", "v_cmpx_le_u32">; 718defm V_CMPX_GT_U32 : VOPCX_I32 <vopc<0xd4, 0xdc>, "v_cmpx_gt_u32">; 719defm V_CMPX_NE_U32 : VOPCX_I32 <vopc<0xd5, 0xdd>, "v_cmpx_ne_u32">; 720defm V_CMPX_GE_U32 : VOPCX_I32 <vopc<0xd6, 0xde>, "v_cmpx_ge_u32">; 721defm V_CMPX_T_U32 : VOPCX_I32 <vopc<0xd7, 0xdf>, "v_cmpx_t_u32">; 722 723 724defm V_CMP_F_U64 : VOPC_I64 <vopc<0xe0, 0xe8>, "v_cmp_f_u64">; 725defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT, "v_cmp_gt_u64">; 726defm V_CMP_EQ_U64 : VOPC_I64 <vopc<0xe2, 0xea>, "v_cmp_eq_u64", COND_EQ>; 727defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE, "v_cmp_ge_u64">; 728defm V_CMP_GT_U64 : VOPC_I64 <vopc<0xe4, 0xec>, "v_cmp_gt_u64", COND_UGT>; 729defm V_CMP_NE_U64 : VOPC_I64 <vopc<0xe5, 0xed>, "v_cmp_ne_u64", COND_NE>; 730defm V_CMP_GE_U64 : VOPC_I64 <vopc<0xe6, 0xee>, "v_cmp_ge_u64", COND_UGE>; 731defm V_CMP_T_U64 : VOPC_I64 <vopc<0xe7, 0xef>, "v_cmp_t_u64">; 732 733defm V_CMPX_F_U64 : VOPCX_I64 <vopc<0xf0, 0xf8>, "v_cmpx_f_u64">; 734defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64", "v_cmpx_gt_u64">; 735defm V_CMPX_EQ_U64 : VOPCX_I64 <vopc<0xf2, 0xfa>, "v_cmpx_eq_u64">; 736defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64", "v_cmpx_ge_u64">; 737defm V_CMPX_GT_U64 : VOPCX_I64 <vopc<0xf4, 0xfc>, "v_cmpx_gt_u64">; 738defm V_CMPX_NE_U64 : VOPCX_I64 <vopc<0xf5, 0xfd>, "v_cmpx_ne_u64">; 739defm V_CMPX_GE_U64 : VOPCX_I64 <vopc<0xf6, 0xfe>, "v_cmpx_ge_u64">; 740defm V_CMPX_T_U64 : VOPCX_I64 <vopc<0xf7, 0xff>, "v_cmpx_t_u64">; 741 742} // End isCompare = 1, isCommutable = 1 743 744defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <vopc<0x88, 0x10>, "v_cmp_class_f32">; 745defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <vopc<0x98, 0x11>, "v_cmpx_class_f32">; 746defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <vopc<0xa8, 0x12>, "v_cmp_class_f64">; 747defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">; 748 749//===----------------------------------------------------------------------===// 750// DS Instructions 751//===----------------------------------------------------------------------===// 752 753defm DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>; 754defm DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>; 755defm DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>; 756defm DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VGPR_32>; 757defm DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VGPR_32>; 758defm DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VGPR_32>; 759defm DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VGPR_32>; 760defm DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VGPR_32>; 761defm DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>; 762defm DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>; 763defm DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>; 764defm DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>; 765defm DS_MSKOR_B32 : DS_1A2D_NORET <0xc, "ds_mskor_b32", VGPR_32>; 766let mayLoad = 0 in { 767defm DS_WRITE_B32 : DS_1A1D_NORET <0xd, "ds_write_b32", VGPR_32>; 768defm DS_WRITE2_B32 : DS_1A1D_Off8_NORET <0xe, "ds_write2_b32", VGPR_32>; 769defm DS_WRITE2ST64_B32 : DS_1A1D_Off8_NORET <0xf, "ds_write2st64_b32", VGPR_32>; 770} 771defm DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>; 772defm DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>; 773defm DS_MIN_F32 : DS_1A2D_NORET <0x12, "ds_min_f32", VGPR_32>; 774defm DS_MAX_F32 : DS_1A2D_NORET <0x13, "ds_max_f32", VGPR_32>; 775 776defm DS_GWS_INIT : DS_1A_GDS <0x19, "ds_gws_init">; 777defm DS_GWS_SEMA_V : DS_1A_GDS <0x1a, "ds_gws_sema_v">; 778defm DS_GWS_SEMA_BR : DS_1A_GDS <0x1b, "ds_gws_sema_br">; 779defm DS_GWS_SEMA_P : DS_1A_GDS <0x1c, "ds_gws_sema_p">; 780defm DS_GWS_BARRIER : DS_1A_GDS <0x1d, "ds_gws_barrier">; 781let mayLoad = 0 in { 782defm DS_WRITE_B8 : DS_1A1D_NORET <0x1e, "ds_write_b8", VGPR_32>; 783defm DS_WRITE_B16 : DS_1A1D_NORET <0x1f, "ds_write_b16", VGPR_32>; 784} 785defm DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">; 786defm DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">; 787defm DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">; 788defm DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">; 789defm DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">; 790defm DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VGPR_32, "ds_min_i32">; 791defm DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VGPR_32, "ds_max_i32">; 792defm DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VGPR_32, "ds_min_u32">; 793defm DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32">; 794defm DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">; 795defm DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">; 796defm DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">; 797defm DS_MSKOR_RTN_B32 : DS_1A2D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">; 798defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>; 799defm DS_WRXCHG2_RTN_B32 : DS_1A2D_RET < 800 0x2e, "ds_wrxchg2_rtn_b32", VReg_64, "", VGPR_32 801>; 802defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET < 803 0x2f, "ds_wrxchg2st64_rtn_b32", VReg_64, "", VGPR_32 804>; 805defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">; 806defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">; 807defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">; 808defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">; 809let SubtargetPredicate = isCI in { 810defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">; 811} // End isCI 812defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>; 813let mayStore = 0 in { 814defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>; 815defm DS_READ2_B32 : DS_1A_Off8_RET <0x37, "ds_read2_b32", VReg_64>; 816defm DS_READ2ST64_B32 : DS_1A_Off8_RET <0x38, "ds_read2st64_b32", VReg_64>; 817defm DS_READ_I8 : DS_1A_RET <0x39, "ds_read_i8", VGPR_32>; 818defm DS_READ_U8 : DS_1A_RET <0x3a, "ds_read_u8", VGPR_32>; 819defm DS_READ_I16 : DS_1A_RET <0x3b, "ds_read_i16", VGPR_32>; 820defm DS_READ_U16 : DS_1A_RET <0x3c, "ds_read_u16", VGPR_32>; 821} 822defm DS_CONSUME : DS_0A_RET <0x3d, "ds_consume">; 823defm DS_APPEND : DS_0A_RET <0x3e, "ds_append">; 824defm DS_ORDERED_COUNT : DS_1A_RET_GDS <0x3f, "ds_ordered_count">; 825defm DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>; 826defm DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>; 827defm DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>; 828defm DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>; 829defm DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>; 830defm DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>; 831defm DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>; 832defm DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>; 833defm DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>; 834defm DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>; 835defm DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>; 836defm DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>; 837defm DS_MSKOR_B64 : DS_1A2D_NORET <0x4c, "ds_mskor_b64", VReg_64>; 838let mayLoad = 0 in { 839defm DS_WRITE_B64 : DS_1A1D_NORET <0x4d, "ds_write_b64", VReg_64>; 840defm DS_WRITE2_B64 : DS_1A1D_Off8_NORET <0x4E, "ds_write2_b64", VReg_64>; 841defm DS_WRITE2ST64_B64 : DS_1A1D_Off8_NORET <0x4f, "ds_write2st64_b64", VReg_64>; 842} 843defm DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>; 844defm DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>; 845defm DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>; 846defm DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>; 847 848defm DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">; 849defm DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">; 850defm DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">; 851defm DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">; 852defm DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">; 853defm DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">; 854defm DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">; 855defm DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">; 856defm DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">; 857defm DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">; 858defm DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">; 859defm DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">; 860defm DS_MSKOR_RTN_B64 : DS_1A2D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">; 861defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">; 862defm DS_WRXCHG2_RTN_B64 : DS_1A2D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_128, "ds_wrxchg2_b64", VReg_64>; 863defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET <0x6f, "ds_wrxchg2st64_rtn_b64", VReg_128, "ds_wrxchg2st64_b64", VReg_64>; 864defm DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">; 865defm DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">; 866defm DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_rtn_f64", VReg_64, "ds_min_f64">; 867defm DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_rtn_f64", VReg_64, "ds_max_f64">; 868 869let mayStore = 0 in { 870defm DS_READ_B64 : DS_1A_RET <0x76, "ds_read_b64", VReg_64>; 871defm DS_READ2_B64 : DS_1A_Off8_RET <0x77, "ds_read2_b64", VReg_128>; 872defm DS_READ2ST64_B64 : DS_1A_Off8_RET <0x78, "ds_read2st64_b64", VReg_128>; 873} 874 875defm DS_ADD_SRC2_U32 : DS_1A <0x80, "ds_add_src2_u32">; 876defm DS_SUB_SRC2_U32 : DS_1A <0x81, "ds_sub_src2_u32">; 877defm DS_RSUB_SRC2_U32 : DS_1A <0x82, "ds_rsub_src2_u32">; 878defm DS_INC_SRC2_U32 : DS_1A <0x83, "ds_inc_src2_u32">; 879defm DS_DEC_SRC2_U32 : DS_1A <0x84, "ds_dec_src2_u32">; 880defm DS_MIN_SRC2_I32 : DS_1A <0x85, "ds_min_src2_i32">; 881defm DS_MAX_SRC2_I32 : DS_1A <0x86, "ds_max_src2_i32">; 882defm DS_MIN_SRC2_U32 : DS_1A <0x87, "ds_min_src2_u32">; 883defm DS_MAX_SRC2_U32 : DS_1A <0x88, "ds_max_src2_u32">; 884defm DS_AND_SRC2_B32 : DS_1A <0x89, "ds_and_src_b32">; 885defm DS_OR_SRC2_B32 : DS_1A <0x8a, "ds_or_src2_b32">; 886defm DS_XOR_SRC2_B32 : DS_1A <0x8b, "ds_xor_src2_b32">; 887defm DS_WRITE_SRC2_B32 : DS_1A <0x8c, "ds_write_src2_b32">; 888 889defm DS_MIN_SRC2_F32 : DS_1A <0x92, "ds_min_src2_f32">; 890defm DS_MAX_SRC2_F32 : DS_1A <0x93, "ds_max_src2_f32">; 891 892defm DS_ADD_SRC2_U64 : DS_1A <0xc0, "ds_add_src2_u64">; 893defm DS_SUB_SRC2_U64 : DS_1A <0xc1, "ds_sub_src2_u64">; 894defm DS_RSUB_SRC2_U64 : DS_1A <0xc2, "ds_rsub_src2_u64">; 895defm DS_INC_SRC2_U64 : DS_1A <0xc3, "ds_inc_src2_u64">; 896defm DS_DEC_SRC2_U64 : DS_1A <0xc4, "ds_dec_src2_u64">; 897defm DS_MIN_SRC2_I64 : DS_1A <0xc5, "ds_min_src2_i64">; 898defm DS_MAX_SRC2_I64 : DS_1A <0xc6, "ds_max_src2_i64">; 899defm DS_MIN_SRC2_U64 : DS_1A <0xc7, "ds_min_src2_u64">; 900defm DS_MAX_SRC2_U64 : DS_1A <0xc8, "ds_max_src2_u64">; 901defm DS_AND_SRC2_B64 : DS_1A <0xc9, "ds_and_src2_b64">; 902defm DS_OR_SRC2_B64 : DS_1A <0xca, "ds_or_src2_b64">; 903defm DS_XOR_SRC2_B64 : DS_1A <0xcb, "ds_xor_src2_b64">; 904defm DS_WRITE_SRC2_B64 : DS_1A <0xcc, "ds_write_src2_b64">; 905 906defm DS_MIN_SRC2_F64 : DS_1A <0xd2, "ds_min_src2_f64">; 907defm DS_MAX_SRC2_F64 : DS_1A <0xd3, "ds_max_src2_f64">; 908 909//let SubtargetPredicate = isCI in { 910// DS_CONDXCHG32_RTN_B64 911// DS_CONDXCHG32_RTN_B128 912//} // End isCI 913 914//===----------------------------------------------------------------------===// 915// MUBUF Instructions 916//===----------------------------------------------------------------------===// 917 918defm BUFFER_LOAD_FORMAT_X : MUBUF_Load_Helper < 919 mubuf<0x00>, "buffer_load_format_x", VGPR_32 920>; 921defm BUFFER_LOAD_FORMAT_XY : MUBUF_Load_Helper < 922 mubuf<0x01>, "buffer_load_format_xy", VReg_64 923>; 924defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Load_Helper < 925 mubuf<0x02>, "buffer_load_format_xyz", VReg_96 926>; 927defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper < 928 mubuf<0x03>, "buffer_load_format_xyzw", VReg_128 929>; 930defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper < 931 mubuf<0x04>, "buffer_store_format_x", VGPR_32 932>; 933defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper < 934 mubuf<0x05>, "buffer_store_format_xy", VReg_64 935>; 936defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper < 937 mubuf<0x06>, "buffer_store_format_xyz", VReg_96 938>; 939defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper < 940 mubuf<0x07>, "buffer_store_format_xyzw", VReg_128 941>; 942defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper < 943 mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global 944>; 945defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper < 946 mubuf<0x09, 0x11>, "buffer_load_sbyte", VGPR_32, i32, sextloadi8_global 947>; 948defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper < 949 mubuf<0x0a, 0x12>, "buffer_load_ushort", VGPR_32, i32, az_extloadi16_global 950>; 951defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper < 952 mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global 953>; 954defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper < 955 mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, global_load 956>; 957defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper < 958 mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, global_load 959>; 960defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper < 961 mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, global_load 962>; 963 964defm BUFFER_STORE_BYTE : MUBUF_Store_Helper < 965 mubuf<0x18>, "buffer_store_byte", VGPR_32, i32, truncstorei8_global 966>; 967 968defm BUFFER_STORE_SHORT : MUBUF_Store_Helper < 969 mubuf<0x1a>, "buffer_store_short", VGPR_32, i32, truncstorei16_global 970>; 971 972defm BUFFER_STORE_DWORD : MUBUF_Store_Helper < 973 mubuf<0x1c>, "buffer_store_dword", VGPR_32, i32, global_store 974>; 975 976defm BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper < 977 mubuf<0x1d>, "buffer_store_dwordx2", VReg_64, v2i32, global_store 978>; 979 980defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < 981 mubuf<0x1e, 0x1f>, "buffer_store_dwordx4", VReg_128, v4i32, global_store 982>; 983 984defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic < 985 mubuf<0x30, 0x40>, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global 986>; 987//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", []>; 988defm BUFFER_ATOMIC_ADD : MUBUF_Atomic < 989 mubuf<0x32, 0x42>, "buffer_atomic_add", VGPR_32, i32, atomic_add_global 990>; 991defm BUFFER_ATOMIC_SUB : MUBUF_Atomic < 992 mubuf<0x33, 0x43>, "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global 993>; 994//def BUFFER_ATOMIC_RSUB : MUBUF_ <mubuf<0x34>, "buffer_atomic_rsub", []>; // isn't on CI & VI 995defm BUFFER_ATOMIC_SMIN : MUBUF_Atomic < 996 mubuf<0x35, 0x44>, "buffer_atomic_smin", VGPR_32, i32, atomic_min_global 997>; 998defm BUFFER_ATOMIC_UMIN : MUBUF_Atomic < 999 mubuf<0x36, 0x45>, "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global 1000>; 1001defm BUFFER_ATOMIC_SMAX : MUBUF_Atomic < 1002 mubuf<0x37, 0x46>, "buffer_atomic_smax", VGPR_32, i32, atomic_max_global 1003>; 1004defm BUFFER_ATOMIC_UMAX : MUBUF_Atomic < 1005 mubuf<0x38, 0x47>, "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global 1006>; 1007defm BUFFER_ATOMIC_AND : MUBUF_Atomic < 1008 mubuf<0x39, 0x48>, "buffer_atomic_and", VGPR_32, i32, atomic_and_global 1009>; 1010defm BUFFER_ATOMIC_OR : MUBUF_Atomic < 1011 mubuf<0x3a, 0x49>, "buffer_atomic_or", VGPR_32, i32, atomic_or_global 1012>; 1013defm BUFFER_ATOMIC_XOR : MUBUF_Atomic < 1014 mubuf<0x3b, 0x4a>, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global 1015>; 1016//def BUFFER_ATOMIC_INC : MUBUF_ <mubuf<0x3c, 0x4b>, "buffer_atomic_inc", []>; 1017//def BUFFER_ATOMIC_DEC : MUBUF_ <mubuf<0x3d, 0x4c>, "buffer_atomic_dec", []>; 1018//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <mubuf<0x3e>, "buffer_atomic_fcmpswap", []>; // isn't on VI 1019//def BUFFER_ATOMIC_FMIN : MUBUF_ <mubuf<0x3f>, "buffer_atomic_fmin", []>; // isn't on VI 1020//def BUFFER_ATOMIC_FMAX : MUBUF_ <mubuf<0x40>, "buffer_atomic_fmax", []>; // isn't on VI 1021//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <mubuf<0x50, 0x60>, "buffer_atomic_swap_x2", []>; 1022//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", []>; 1023//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <mubuf<0x52, 0x62>, "buffer_atomic_add_x2", []>; 1024//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <mubuf<0x53, 0x63>, "buffer_atomic_sub_x2", []>; 1025//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <mubuf<0x54>, "buffer_atomic_rsub_x2", []>; // isn't on CI & VI 1026//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <mubuf<0x55, 0x64>, "buffer_atomic_smin_x2", []>; 1027//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <mubuf<0x56, 0x65>, "buffer_atomic_umin_x2", []>; 1028//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <mubuf<0x57, 0x66>, "buffer_atomic_smax_x2", []>; 1029//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <mubuf<0x58, 0x67>, "buffer_atomic_umax_x2", []>; 1030//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <mubuf<0x59, 0x68>, "buffer_atomic_and_x2", []>; 1031//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <mubuf<0x5a, 0x69>, "buffer_atomic_or_x2", []>; 1032//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <mubuf<0x5b, 0x6a>, "buffer_atomic_xor_x2", []>; 1033//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <mubuf<0x5c, 0x6b>, "buffer_atomic_inc_x2", []>; 1034//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <mubuf<0x5d, 0x6c>, "buffer_atomic_dec_x2", []>; 1035//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <mubuf<0x5e>, "buffer_atomic_fcmpswap_x2", []>; // isn't on VI 1036//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <mubuf<0x5f>, "buffer_atomic_fmin_x2", []>; // isn't on VI 1037//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <mubuf<0x60>, "buffer_atomic_fmax_x2", []>; // isn't on VI 1038//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <mubuf<0x70>, "buffer_wbinvl1_sc", []>; // isn't on CI & VI 1039//def BUFFER_WBINVL1_VOL : MUBUF_WBINVL1 <mubuf<0x70, 0x3f>, "buffer_wbinvl1_vol", []>; // isn't on SI 1040//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <mubuf<0x71, 0x3e>, "buffer_wbinvl1", []>; 1041 1042//===----------------------------------------------------------------------===// 1043// MTBUF Instructions 1044//===----------------------------------------------------------------------===// 1045 1046//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "tbuffer_load_format_x", []>; 1047//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "tbuffer_load_format_xy", []>; 1048//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "tbuffer_load_format_xyz", []>; 1049defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "tbuffer_load_format_xyzw", VReg_128>; 1050defm TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "tbuffer_store_format_x", VGPR_32>; 1051defm TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "tbuffer_store_format_xy", VReg_64>; 1052defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "tbuffer_store_format_xyz", VReg_128>; 1053defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "tbuffer_store_format_xyzw", VReg_128>; 1054 1055//===----------------------------------------------------------------------===// 1056// MIMG Instructions 1057//===----------------------------------------------------------------------===// 1058 1059defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load">; 1060defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip">; 1061//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"image_load_pck", 0x00000002>; 1062//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"image_load_pck_sgn", 0x00000003>; 1063//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"image_load_mip_pck", 0x00000004>; 1064//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"image_load_mip_pck_sgn", 0x00000005>; 1065//def IMAGE_STORE : MIMG_NoPattern_ <"image_store", 0x00000008>; 1066//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"image_store_mip", 0x00000009>; 1067//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"image_store_pck", 0x0000000a>; 1068//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"image_store_mip_pck", 0x0000000b>; 1069defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">; 1070//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"image_atomic_swap", 0x0000000f>; 1071//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"image_atomic_cmpswap", 0x00000010>; 1072//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"image_atomic_add", 0x00000011>; 1073//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"image_atomic_sub", 0x00000012>; 1074//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"image_atomic_rsub", 0x00000013>; 1075//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"image_atomic_smin", 0x00000014>; 1076//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"image_atomic_umin", 0x00000015>; 1077//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"image_atomic_smax", 0x00000016>; 1078//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"image_atomic_umax", 0x00000017>; 1079//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"image_atomic_and", 0x00000018>; 1080//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"image_atomic_or", 0x00000019>; 1081//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"image_atomic_xor", 0x0000001a>; 1082//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"image_atomic_inc", 0x0000001b>; 1083//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"image_atomic_dec", 0x0000001c>; 1084//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d>; 1085//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>; 1086//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>; 1087defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, "image_sample">; 1088defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, "image_sample_cl">; 1089defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "image_sample_d">; 1090defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, "image_sample_d_cl">; 1091defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "image_sample_l">; 1092defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <0x00000025, "image_sample_b">; 1093defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <0x00000026, "image_sample_b_cl">; 1094defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, "image_sample_lz">; 1095defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <0x00000028, "image_sample_c">; 1096defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <0x00000029, "image_sample_c_cl">; 1097defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "image_sample_c_d">; 1098defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, "image_sample_c_d_cl">; 1099defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "image_sample_c_l">; 1100defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <0x0000002d, "image_sample_c_b">; 1101defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <0x0000002e, "image_sample_c_b_cl">; 1102defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, "image_sample_c_lz">; 1103defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <0x00000030, "image_sample_o">; 1104defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <0x00000031, "image_sample_cl_o">; 1105defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, "image_sample_d_o">; 1106defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, "image_sample_d_cl_o">; 1107defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, "image_sample_l_o">; 1108defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <0x00000035, "image_sample_b_o">; 1109defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <0x00000036, "image_sample_b_cl_o">; 1110defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, "image_sample_lz_o">; 1111defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <0x00000038, "image_sample_c_o">; 1112defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <0x00000039, "image_sample_c_cl_o">; 1113defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, "image_sample_c_d_o">; 1114defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, "image_sample_c_d_cl_o">; 1115defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, "image_sample_c_l_o">; 1116defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <0x0000003d, "image_sample_c_b_o">; 1117defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <0x0000003e, "image_sample_c_b_cl_o">; 1118defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, "image_sample_c_lz_o">; 1119defm IMAGE_GATHER4 : MIMG_Gather_WQM <0x00000040, "image_gather4">; 1120defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <0x00000041, "image_gather4_cl">; 1121defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "image_gather4_l">; 1122defm IMAGE_GATHER4_B : MIMG_Gather_WQM <0x00000045, "image_gather4_b">; 1123defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <0x00000046, "image_gather4_b_cl">; 1124defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "image_gather4_lz">; 1125defm IMAGE_GATHER4_C : MIMG_Gather_WQM <0x00000048, "image_gather4_c">; 1126defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <0x00000049, "image_gather4_c_cl">; 1127defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "image_gather4_c_l">; 1128defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <0x0000004d, "image_gather4_c_b">; 1129defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <0x0000004e, "image_gather4_c_b_cl">; 1130defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "image_gather4_c_lz">; 1131defm IMAGE_GATHER4_O : MIMG_Gather_WQM <0x00000050, "image_gather4_o">; 1132defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <0x00000051, "image_gather4_cl_o">; 1133defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "image_gather4_l_o">; 1134defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <0x00000055, "image_gather4_b_o">; 1135defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "image_gather4_b_cl_o">; 1136defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "image_gather4_lz_o">; 1137defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <0x00000058, "image_gather4_c_o">; 1138defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <0x00000059, "image_gather4_c_cl_o">; 1139defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">; 1140defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <0x0000005d, "image_gather4_c_b_o">; 1141defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, "image_gather4_c_b_cl_o">; 1142defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">; 1143defm IMAGE_GET_LOD : MIMG_Sampler_WQM <0x00000060, "image_get_lod">; 1144defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "image_sample_cd">; 1145defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, "image_sample_cd_cl">; 1146defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, "image_sample_c_cd">; 1147defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <0x0000006b, "image_sample_c_cd_cl">; 1148defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <0x0000006c, "image_sample_cd_o">; 1149defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <0x0000006d, "image_sample_cd_cl_o">; 1150defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <0x0000006e, "image_sample_c_cd_o">; 1151defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o">; 1152//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>; 1153//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>; 1154 1155//===----------------------------------------------------------------------===// 1156// Flat Instructions 1157//===----------------------------------------------------------------------===// 1158 1159let Predicates = [HasFlatAddressSpace] in { 1160def FLAT_LOAD_UBYTE : FLAT_Load_Helper <0x00000008, "flat_load_ubyte", VGPR_32>; 1161def FLAT_LOAD_SBYTE : FLAT_Load_Helper <0x00000009, "flat_load_sbyte", VGPR_32>; 1162def FLAT_LOAD_USHORT : FLAT_Load_Helper <0x0000000a, "flat_load_ushort", VGPR_32>; 1163def FLAT_LOAD_SSHORT : FLAT_Load_Helper <0x0000000b, "flat_load_sshort", VGPR_32>; 1164def FLAT_LOAD_DWORD : FLAT_Load_Helper <0x0000000c, "flat_load_dword", VGPR_32>; 1165def FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <0x0000000d, "flat_load_dwordx2", VReg_64>; 1166def FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <0x0000000e, "flat_load_dwordx4", VReg_128>; 1167def FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <0x00000010, "flat_load_dwordx3", VReg_96>; 1168 1169def FLAT_STORE_BYTE : FLAT_Store_Helper < 1170 0x00000018, "flat_store_byte", VGPR_32 1171>; 1172 1173def FLAT_STORE_SHORT : FLAT_Store_Helper < 1174 0x0000001a, "flat_store_short", VGPR_32 1175>; 1176 1177def FLAT_STORE_DWORD : FLAT_Store_Helper < 1178 0x0000001c, "flat_store_dword", VGPR_32 1179>; 1180 1181def FLAT_STORE_DWORDX2 : FLAT_Store_Helper < 1182 0x0000001d, "flat_store_dwordx2", VReg_64 1183>; 1184 1185def FLAT_STORE_DWORDX4 : FLAT_Store_Helper < 1186 0x0000001e, "flat_store_dwordx4", VReg_128 1187>; 1188 1189def FLAT_STORE_DWORDX3 : FLAT_Store_Helper < 1190 0x0000001e, "flat_store_dwordx3", VReg_96 1191>; 1192 1193//def FLAT_ATOMIC_SWAP : FLAT_ <0x00000030, "flat_atomic_swap", []>; 1194//def FLAT_ATOMIC_CMPSWAP : FLAT_ <0x00000031, "flat_atomic_cmpswap", []>; 1195//def FLAT_ATOMIC_ADD : FLAT_ <0x00000032, "flat_atomic_add", []>; 1196//def FLAT_ATOMIC_SUB : FLAT_ <0x00000033, "flat_atomic_sub", []>; 1197//def FLAT_ATOMIC_RSUB : FLAT_ <0x00000034, "flat_atomic_rsub", []>; 1198//def FLAT_ATOMIC_SMIN : FLAT_ <0x00000035, "flat_atomic_smin", []>; 1199//def FLAT_ATOMIC_UMIN : FLAT_ <0x00000036, "flat_atomic_umin", []>; 1200//def FLAT_ATOMIC_SMAX : FLAT_ <0x00000037, "flat_atomic_smax", []>; 1201//def FLAT_ATOMIC_UMAX : FLAT_ <0x00000038, "flat_atomic_umax", []>; 1202//def FLAT_ATOMIC_AND : FLAT_ <0x00000039, "flat_atomic_and", []>; 1203//def FLAT_ATOMIC_OR : FLAT_ <0x0000003a, "flat_atomic_or", []>; 1204//def FLAT_ATOMIC_XOR : FLAT_ <0x0000003b, "flat_atomic_xor", []>; 1205//def FLAT_ATOMIC_INC : FLAT_ <0x0000003c, "flat_atomic_inc", []>; 1206//def FLAT_ATOMIC_DEC : FLAT_ <0x0000003d, "flat_atomic_dec", []>; 1207//def FLAT_ATOMIC_FCMPSWAP : FLAT_ <0x0000003e, "flat_atomic_fcmpswap", []>; 1208//def FLAT_ATOMIC_FMIN : FLAT_ <0x0000003f, "flat_atomic_fmin", []>; 1209//def FLAT_ATOMIC_FMAX : FLAT_ <0x00000040, "flat_atomic_fmax", []>; 1210//def FLAT_ATOMIC_SWAP_X2 : FLAT_X2 <0x00000050, "flat_atomic_swap_x2", []>; 1211//def FLAT_ATOMIC_CMPSWAP_X2 : FLAT_X2 <0x00000051, "flat_atomic_cmpswap_x2", []>; 1212//def FLAT_ATOMIC_ADD_X2 : FLAT_X2 <0x00000052, "flat_atomic_add_x2", []>; 1213//def FLAT_ATOMIC_SUB_X2 : FLAT_X2 <0x00000053, "flat_atomic_sub_x2", []>; 1214//def FLAT_ATOMIC_RSUB_X2 : FLAT_X2 <0x00000054, "flat_atomic_rsub_x2", []>; 1215//def FLAT_ATOMIC_SMIN_X2 : FLAT_X2 <0x00000055, "flat_atomic_smin_x2", []>; 1216//def FLAT_ATOMIC_UMIN_X2 : FLAT_X2 <0x00000056, "flat_atomic_umin_x2", []>; 1217//def FLAT_ATOMIC_SMAX_X2 : FLAT_X2 <0x00000057, "flat_atomic_smax_x2", []>; 1218//def FLAT_ATOMIC_UMAX_X2 : FLAT_X2 <0x00000058, "flat_atomic_umax_x2", []>; 1219//def FLAT_ATOMIC_AND_X2 : FLAT_X2 <0x00000059, "flat_atomic_and_x2", []>; 1220//def FLAT_ATOMIC_OR_X2 : FLAT_X2 <0x0000005a, "flat_atomic_or_x2", []>; 1221//def FLAT_ATOMIC_XOR_X2 : FLAT_X2 <0x0000005b, "flat_atomic_xor_x2", []>; 1222//def FLAT_ATOMIC_INC_X2 : FLAT_X2 <0x0000005c, "flat_atomic_inc_x2", []>; 1223//def FLAT_ATOMIC_DEC_X2 : FLAT_X2 <0x0000005d, "flat_atomic_dec_x2", []>; 1224//def FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_X2 <0x0000005e, "flat_atomic_fcmpswap_x2", []>; 1225//def FLAT_ATOMIC_FMIN_X2 : FLAT_X2 <0x0000005f, "flat_atomic_fmin_x2", []>; 1226//def FLAT_ATOMIC_FMAX_X2 : FLAT_X2 <0x00000060, "flat_atomic_fmax_x2", []>; 1227 1228} // End HasFlatAddressSpace predicate 1229//===----------------------------------------------------------------------===// 1230// VOP1 Instructions 1231//===----------------------------------------------------------------------===// 1232 1233let vdst = 0, src0 = 0 in { 1234defm V_NOP : VOP1_m <vop1<0x0>, (outs), (ins), "v_nop", [], "v_nop">; 1235} 1236 1237let isMoveImm = 1 in { 1238defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>; 1239} // End isMoveImm = 1 1240 1241let Uses = [EXEC] in { 1242 1243// FIXME: Specify SchedRW for READFIRSTLANE_B32 1244 1245def V_READFIRSTLANE_B32 : VOP1 < 1246 0x00000002, 1247 (outs SReg_32:$vdst), 1248 (ins VGPR_32:$src0), 1249 "v_readfirstlane_b32 $vdst, $src0", 1250 [] 1251>; 1252 1253} 1254 1255let SchedRW = [WriteQuarterRate32] in { 1256 1257defm V_CVT_I32_F64 : VOP1Inst <vop1<0x3>, "v_cvt_i32_f64", 1258 VOP_I32_F64, fp_to_sint 1259>; 1260defm V_CVT_F64_I32 : VOP1Inst <vop1<0x4>, "v_cvt_f64_i32", 1261 VOP_F64_I32, sint_to_fp 1262>; 1263defm V_CVT_F32_I32 : VOP1Inst <vop1<0x5>, "v_cvt_f32_i32", 1264 VOP_F32_I32, sint_to_fp 1265>; 1266defm V_CVT_F32_U32 : VOP1Inst <vop1<0x6>, "v_cvt_f32_u32", 1267 VOP_F32_I32, uint_to_fp 1268>; 1269defm V_CVT_U32_F32 : VOP1Inst <vop1<0x7>, "v_cvt_u32_f32", 1270 VOP_I32_F32, fp_to_uint 1271>; 1272defm V_CVT_I32_F32 : VOP1Inst <vop1<0x8>, "v_cvt_i32_f32", 1273 VOP_I32_F32, fp_to_sint 1274>; 1275defm V_MOV_FED_B32 : VOP1Inst <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>; 1276defm V_CVT_F16_F32 : VOP1Inst <vop1<0xa>, "v_cvt_f16_f32", 1277 VOP_I32_F32, fp_to_f16 1278>; 1279defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "v_cvt_f32_f16", 1280 VOP_F32_I32, f16_to_fp 1281>; 1282defm V_CVT_RPI_I32_F32 : VOP1Inst <vop1<0xc>, "v_cvt_rpi_i32_f32", 1283 VOP_I32_F32, cvt_rpi_i32_f32>; 1284defm V_CVT_FLR_I32_F32 : VOP1Inst <vop1<0xd>, "v_cvt_flr_i32_f32", 1285 VOP_I32_F32, cvt_flr_i32_f32>; 1286defm V_CVT_OFF_F32_I4 : VOP1Inst <vop1<0x0e>, "v_cvt_off_f32_i4", VOP_F32_I32>; 1287defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "v_cvt_f32_f64", 1288 VOP_F32_F64, fround 1289>; 1290defm V_CVT_F64_F32 : VOP1Inst <vop1<0x10>, "v_cvt_f64_f32", 1291 VOP_F64_F32, fextend 1292>; 1293defm V_CVT_F32_UBYTE0 : VOP1Inst <vop1<0x11>, "v_cvt_f32_ubyte0", 1294 VOP_F32_I32, AMDGPUcvt_f32_ubyte0 1295>; 1296defm V_CVT_F32_UBYTE1 : VOP1Inst <vop1<0x12>, "v_cvt_f32_ubyte1", 1297 VOP_F32_I32, AMDGPUcvt_f32_ubyte1 1298>; 1299defm V_CVT_F32_UBYTE2 : VOP1Inst <vop1<0x13>, "v_cvt_f32_ubyte2", 1300 VOP_F32_I32, AMDGPUcvt_f32_ubyte2 1301>; 1302defm V_CVT_F32_UBYTE3 : VOP1Inst <vop1<0x14>, "v_cvt_f32_ubyte3", 1303 VOP_F32_I32, AMDGPUcvt_f32_ubyte3 1304>; 1305defm V_CVT_U32_F64 : VOP1Inst <vop1<0x15>, "v_cvt_u32_f64", 1306 VOP_I32_F64, fp_to_uint 1307>; 1308defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16>, "v_cvt_f64_u32", 1309 VOP_F64_I32, uint_to_fp 1310>; 1311 1312} // let SchedRW = [WriteQuarterRate32] 1313 1314defm V_FRACT_F32 : VOP1Inst <vop1<0x20, 0x1b>, "v_fract_f32", 1315 VOP_F32_F32, AMDGPUfract 1316>; 1317defm V_TRUNC_F32 : VOP1Inst <vop1<0x21, 0x1c>, "v_trunc_f32", 1318 VOP_F32_F32, ftrunc 1319>; 1320defm V_CEIL_F32 : VOP1Inst <vop1<0x22, 0x1d>, "v_ceil_f32", 1321 VOP_F32_F32, fceil 1322>; 1323defm V_RNDNE_F32 : VOP1Inst <vop1<0x23, 0x1e>, "v_rndne_f32", 1324 VOP_F32_F32, frint 1325>; 1326defm V_FLOOR_F32 : VOP1Inst <vop1<0x24, 0x1f>, "v_floor_f32", 1327 VOP_F32_F32, ffloor 1328>; 1329defm V_EXP_F32 : VOP1Inst <vop1<0x25, 0x20>, "v_exp_f32", 1330 VOP_F32_F32, fexp2 1331>; 1332 1333let SchedRW = [WriteQuarterRate32] in { 1334 1335defm V_LOG_F32 : VOP1Inst <vop1<0x27, 0x21>, "v_log_f32", 1336 VOP_F32_F32, flog2 1337>; 1338defm V_RCP_F32 : VOP1Inst <vop1<0x2a, 0x22>, "v_rcp_f32", 1339 VOP_F32_F32, AMDGPUrcp 1340>; 1341defm V_RCP_IFLAG_F32 : VOP1Inst <vop1<0x2b, 0x23>, "v_rcp_iflag_f32", 1342 VOP_F32_F32 1343>; 1344defm V_RSQ_F32 : VOP1Inst <vop1<0x2e, 0x24>, "v_rsq_f32", 1345 VOP_F32_F32, AMDGPUrsq 1346>; 1347 1348} //let SchedRW = [WriteQuarterRate32] 1349 1350let SchedRW = [WriteDouble] in { 1351 1352defm V_RCP_F64 : VOP1Inst <vop1<0x2f, 0x25>, "v_rcp_f64", 1353 VOP_F64_F64, AMDGPUrcp 1354>; 1355defm V_RSQ_F64 : VOP1Inst <vop1<0x31, 0x26>, "v_rsq_f64", 1356 VOP_F64_F64, AMDGPUrsq 1357>; 1358 1359} // let SchedRW = [WriteDouble]; 1360 1361defm V_SQRT_F32 : VOP1Inst <vop1<0x33, 0x27>, "v_sqrt_f32", 1362 VOP_F32_F32, fsqrt 1363>; 1364 1365let SchedRW = [WriteDouble] in { 1366 1367defm V_SQRT_F64 : VOP1Inst <vop1<0x34, 0x28>, "v_sqrt_f64", 1368 VOP_F64_F64, fsqrt 1369>; 1370 1371} // let SchedRW = [WriteDouble] 1372 1373defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32", 1374 VOP_F32_F32, AMDGPUsin 1375>; 1376defm V_COS_F32 : VOP1Inst <vop1<0x36, 0x2a>, "v_cos_f32", 1377 VOP_F32_F32, AMDGPUcos 1378>; 1379defm V_NOT_B32 : VOP1Inst <vop1<0x37, 0x2b>, "v_not_b32", VOP_I32_I32>; 1380defm V_BFREV_B32 : VOP1Inst <vop1<0x38, 0x2c>, "v_bfrev_b32", VOP_I32_I32>; 1381defm V_FFBH_U32 : VOP1Inst <vop1<0x39, 0x2d>, "v_ffbh_u32", VOP_I32_I32>; 1382defm V_FFBL_B32 : VOP1Inst <vop1<0x3a, 0x2e>, "v_ffbl_b32", VOP_I32_I32>; 1383defm V_FFBH_I32 : VOP1Inst <vop1<0x3b, 0x2f>, "v_ffbh_i32", VOP_I32_I32>; 1384defm V_FREXP_EXP_I32_F64 : VOP1Inst <vop1<0x3c,0x30>, "v_frexp_exp_i32_f64", 1385 VOP_I32_F64 1386>; 1387defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d, 0x31>, "v_frexp_mant_f64", 1388 VOP_F64_F64 1389>; 1390defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64", VOP_F64_F64>; 1391defm V_FREXP_EXP_I32_F32 : VOP1Inst <vop1<0x3f, 0x33>, "v_frexp_exp_i32_f32", 1392 VOP_I32_F32 1393>; 1394defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32", 1395 VOP_F32_F32 1396>; 1397let vdst = 0, src0 = 0 in { 1398defm V_CLREXCP : VOP1_m <vop1<0x41,0x35>, (outs), (ins), "v_clrexcp", [], 1399 "v_clrexcp" 1400>; 1401} 1402defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_I32_I32>; 1403defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_I32_I32>; 1404defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_I32_I32>; 1405 1406// These instruction only exist on SI and CI 1407let SubtargetPredicate = isSICI in { 1408 1409let SchedRW = [WriteQuarterRate32] in { 1410 1411defm V_LOG_CLAMP_F32 : VOP1InstSI <vop1<0x26>, "v_log_clamp_f32", VOP_F32_F32>; 1412defm V_RCP_CLAMP_F32 : VOP1InstSI <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>; 1413defm V_RCP_LEGACY_F32 : VOP1InstSI <vop1<0x29>, "v_rcp_legacy_f32", VOP_F32_F32>; 1414defm V_RSQ_CLAMP_F32 : VOP1InstSI <vop1<0x2c>, "v_rsq_clamp_f32", 1415 VOP_F32_F32, AMDGPUrsq_clamped 1416>; 1417defm V_RSQ_LEGACY_F32 : VOP1InstSI <vop1<0x2d>, "v_rsq_legacy_f32", 1418 VOP_F32_F32, AMDGPUrsq_legacy 1419>; 1420 1421} // End let SchedRW = [WriteQuarterRate32] 1422 1423let SchedRW = [WriteDouble] in { 1424 1425defm V_RCP_CLAMP_F64 : VOP1InstSI <vop1<0x30>, "v_rcp_clamp_f64", VOP_F64_F64>; 1426defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64", 1427 VOP_F64_F64, AMDGPUrsq_clamped 1428>; 1429 1430} // End SchedRW = [WriteDouble] 1431 1432} // End SubtargetPredicate = isSICI 1433 1434//===----------------------------------------------------------------------===// 1435// VINTRP Instructions 1436//===----------------------------------------------------------------------===// 1437 1438// FIXME: Specify SchedRW for VINTRP insturctions. 1439defm V_INTERP_P1_F32 : VINTRP_m < 1440 0x00000000, "v_interp_p1_f32", 1441 (outs VGPR_32:$dst), 1442 (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), 1443 "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [$m0]", 1444 "$m0">; 1445 1446defm V_INTERP_P2_F32 : VINTRP_m < 1447 0x00000001, "v_interp_p2_f32", 1448 (outs VGPR_32:$dst), 1449 (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), 1450 "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]", 1451 "$src0,$m0", 1452 "$src0 = $dst">; 1453 1454defm V_INTERP_MOV_F32 : VINTRP_m < 1455 0x00000002, "v_interp_mov_f32", 1456 (outs VGPR_32:$dst), 1457 (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), 1458 "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [$m0]", 1459 "$m0">; 1460 1461//===----------------------------------------------------------------------===// 1462// VOP2 Instructions 1463//===----------------------------------------------------------------------===// 1464 1465multiclass V_CNDMASK <vop2 op, string name> { 1466 defm _e32 : VOP2_m < 1467 op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [], 1468 name, name>; 1469 1470 defm _e64 : VOP3_m < 1471 op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64, 1472 name#!cast<string>(VOP_CNDMASK.Asm64), [], name, 3>; 1473} 1474 1475defm V_CNDMASK_B32 : V_CNDMASK<vop2<0x0>, "v_cndmask_b32">; 1476 1477let isCommutable = 1 in { 1478defm V_ADD_F32 : VOP2Inst <vop2<0x3, 0x1>, "v_add_f32", 1479 VOP_F32_F32_F32, fadd 1480>; 1481 1482defm V_SUB_F32 : VOP2Inst <vop2<0x4, 0x2>, "v_sub_f32", VOP_F32_F32_F32, fsub>; 1483defm V_SUBREV_F32 : VOP2Inst <vop2<0x5, 0x3>, "v_subrev_f32", 1484 VOP_F32_F32_F32, null_frag, "v_sub_f32" 1485>; 1486} // End isCommutable = 1 1487 1488let isCommutable = 1 in { 1489 1490defm V_MUL_LEGACY_F32 : VOP2Inst <vop2<0x7, 0x4>, "v_mul_legacy_f32", 1491 VOP_F32_F32_F32, int_AMDGPU_mul 1492>; 1493 1494defm V_MUL_F32 : VOP2Inst <vop2<0x8, 0x5>, "v_mul_f32", 1495 VOP_F32_F32_F32, fmul 1496>; 1497 1498defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9, 0x6>, "v_mul_i32_i24", 1499 VOP_I32_I32_I32, AMDGPUmul_i24 1500>; 1501 1502defm V_MUL_HI_I32_I24 : VOP2Inst <vop2<0xa,0x7>, "v_mul_hi_i32_i24", 1503 VOP_I32_I32_I32 1504>; 1505 1506defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb, 0x8>, "v_mul_u32_u24", 1507 VOP_I32_I32_I32, AMDGPUmul_u24 1508>; 1509 1510defm V_MUL_HI_U32_U24 : VOP2Inst <vop2<0xc,0x9>, "v_mul_hi_u32_u24", 1511 VOP_I32_I32_I32 1512>; 1513 1514defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32, 1515 fminnum>; 1516defm V_MAX_F32 : VOP2Inst <vop2<0x10, 0xb>, "v_max_f32", VOP_F32_F32_F32, 1517 fmaxnum>; 1518defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32>; 1519defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32>; 1520defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32>; 1521defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32>; 1522 1523defm V_LSHRREV_B32 : VOP2Inst < 1524 vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag, 1525 "v_lshr_b32" 1526>; 1527 1528defm V_ASHRREV_I32 : VOP2Inst < 1529 vop2<0x18, 0x11>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag, 1530 "v_ashr_i32" 1531>; 1532 1533defm V_LSHLREV_B32 : VOP2Inst < 1534 vop2<0x1a, 0x12>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag, 1535 "v_lshl_b32" 1536>; 1537 1538defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>; 1539defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>; 1540defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>; 1541 1542defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>; 1543} // End isCommutable = 1 1544 1545defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32">; 1546 1547let isCommutable = 1 in { 1548defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32">; 1549} // End isCommutable = 1 1550 1551let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC 1552// No patterns so that the scalar instructions are always selected. 1553// The scalar versions will be replaced with vector when needed later. 1554 1555// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI, 1556// but the VI instructions behave the same as the SI versions. 1557defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32", 1558 VOP_I32_I32_I32, add 1559>; 1560defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP_I32_I32_I32>; 1561 1562defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32", 1563 VOP_I32_I32_I32, null_frag, "v_sub_i32" 1564>; 1565 1566let Uses = [VCC] in { // Carry-in comes from VCC 1567defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32", 1568 VOP_I32_I32_I32_VCC 1569>; 1570defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32", 1571 VOP_I32_I32_I32_VCC 1572>; 1573defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32", 1574 VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32" 1575>; 1576 1577} // End Uses = [VCC] 1578} // End isCommutable = 1, Defs = [VCC] 1579 1580defm V_READLANE_B32 : VOP2SI_3VI_m < 1581 vop3 <0x001, 0x289>, 1582 "v_readlane_b32", 1583 (outs SReg_32:$vdst), 1584 (ins VGPR_32:$src0, SCSrc_32:$src1), 1585 "v_readlane_b32 $vdst, $src0, $src1" 1586>; 1587 1588defm V_WRITELANE_B32 : VOP2SI_3VI_m < 1589 vop3 <0x002, 0x28a>, 1590 "v_writelane_b32", 1591 (outs VGPR_32:$vdst), 1592 (ins SReg_32:$src0, SCSrc_32:$src1), 1593 "v_writelane_b32 $vdst, $src0, $src1" 1594>; 1595 1596// These instructions only exist on SI and CI 1597let SubtargetPredicate = isSICI in { 1598 1599defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32", 1600 VOP_F32_F32_F32, AMDGPUfmin_legacy 1601>; 1602defm V_MAX_LEGACY_F32 : VOP2InstSI <vop2<0xe>, "v_max_legacy_f32", 1603 VOP_F32_F32_F32, AMDGPUfmax_legacy 1604>; 1605 1606let isCommutable = 1 in { 1607defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32>; 1608defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32", VOP_I32_I32_I32>; 1609defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>; 1610} // End isCommutable = 1 1611} // End let SubtargetPredicate = SICI 1612 1613let isCommutable = 1 in { 1614defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32", 1615 VOP_F32_F32_F32 1616>; 1617} // End isCommutable = 1 1618 1619defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", 1620 VOP_I32_I32_I32 1621>; 1622defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32", 1623 VOP_I32_I32_I32 1624>; 1625defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32", 1626 VOP_I32_I32_I32 1627>; 1628defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32", 1629 VOP_I32_I32_I32 1630>; 1631defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32", 1632 VOP_F32_F32_I32, AMDGPUldexp 1633>; 1634 1635defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32", 1636 VOP_I32_F32_I32>; // TODO: set "Uses = dst" 1637 1638defm V_CVT_PKNORM_I16_F32 : VOP2_VI3_Inst <vop23<0x2d, 0x294>, "v_cvt_pknorm_i16_f32", 1639 VOP_I32_F32_F32 1640>; 1641defm V_CVT_PKNORM_U16_F32 : VOP2_VI3_Inst <vop23<0x2e, 0x295>, "v_cvt_pknorm_u16_f32", 1642 VOP_I32_F32_F32 1643>; 1644defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32", 1645 VOP_I32_F32_F32, int_SI_packf16 1646>; 1647defm V_CVT_PK_U16_U32 : VOP2_VI3_Inst <vop23<0x30, 0x297>, "v_cvt_pk_u16_u32", 1648 VOP_I32_I32_I32 1649>; 1650defm V_CVT_PK_I16_I32 : VOP2_VI3_Inst <vop23<0x31, 0x298>, "v_cvt_pk_i16_i32", 1651 VOP_I32_I32_I32 1652>; 1653 1654//===----------------------------------------------------------------------===// 1655// VOP3 Instructions 1656//===----------------------------------------------------------------------===// 1657 1658let isCommutable = 1 in { 1659defm V_MAD_LEGACY_F32 : VOP3Inst <vop3<0x140, 0x1c0>, "v_mad_legacy_f32", 1660 VOP_F32_F32_F32_F32 1661>; 1662 1663defm V_MAD_F32 : VOP3Inst <vop3<0x141, 0x1c1>, "v_mad_f32", 1664 VOP_F32_F32_F32_F32, fmad 1665>; 1666 1667defm V_MAD_I32_I24 : VOP3Inst <vop3<0x142, 0x1c2>, "v_mad_i32_i24", 1668 VOP_I32_I32_I32_I32, AMDGPUmad_i24 1669>; 1670defm V_MAD_U32_U24 : VOP3Inst <vop3<0x143, 0x1c3>, "v_mad_u32_u24", 1671 VOP_I32_I32_I32_I32, AMDGPUmad_u24 1672>; 1673} // End isCommutable = 1 1674 1675defm V_CUBEID_F32 : VOP3Inst <vop3<0x144, 0x1c4>, "v_cubeid_f32", 1676 VOP_F32_F32_F32_F32 1677>; 1678defm V_CUBESC_F32 : VOP3Inst <vop3<0x145, 0x1c5>, "v_cubesc_f32", 1679 VOP_F32_F32_F32_F32 1680>; 1681defm V_CUBETC_F32 : VOP3Inst <vop3<0x146, 0x1c6>, "v_cubetc_f32", 1682 VOP_F32_F32_F32_F32 1683>; 1684defm V_CUBEMA_F32 : VOP3Inst <vop3<0x147, 0x1c7>, "v_cubema_f32", 1685 VOP_F32_F32_F32_F32 1686>; 1687 1688defm V_BFE_U32 : VOP3Inst <vop3<0x148, 0x1c8>, "v_bfe_u32", 1689 VOP_I32_I32_I32_I32, AMDGPUbfe_u32 1690>; 1691defm V_BFE_I32 : VOP3Inst <vop3<0x149, 0x1c9>, "v_bfe_i32", 1692 VOP_I32_I32_I32_I32, AMDGPUbfe_i32 1693>; 1694 1695defm V_BFI_B32 : VOP3Inst <vop3<0x14a, 0x1ca>, "v_bfi_b32", 1696 VOP_I32_I32_I32_I32, AMDGPUbfi 1697>; 1698 1699let isCommutable = 1 in { 1700defm V_FMA_F32 : VOP3Inst <vop3<0x14b, 0x1cb>, "v_fma_f32", 1701 VOP_F32_F32_F32_F32, fma 1702>; 1703defm V_FMA_F64 : VOP3Inst <vop3<0x14c, 0x1cc>, "v_fma_f64", 1704 VOP_F64_F64_F64_F64, fma 1705>; 1706} // End isCommutable = 1 1707 1708//def V_LERP_U8 : VOP3_U8 <0x0000014d, "v_lerp_u8", []>; 1709defm V_ALIGNBIT_B32 : VOP3Inst <vop3<0x14e, 0x1ce>, "v_alignbit_b32", 1710 VOP_I32_I32_I32_I32 1711>; 1712defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f, 0x1cf>, "v_alignbyte_b32", 1713 VOP_I32_I32_I32_I32 1714>; 1715 1716defm V_MIN3_F32 : VOP3Inst <vop3<0x151, 0x1d0>, "v_min3_f32", 1717 VOP_F32_F32_F32_F32, AMDGPUfmin3>; 1718 1719defm V_MIN3_I32 : VOP3Inst <vop3<0x152, 0x1d1>, "v_min3_i32", 1720 VOP_I32_I32_I32_I32, AMDGPUsmin3 1721>; 1722defm V_MIN3_U32 : VOP3Inst <vop3<0x153, 0x1d2>, "v_min3_u32", 1723 VOP_I32_I32_I32_I32, AMDGPUumin3 1724>; 1725defm V_MAX3_F32 : VOP3Inst <vop3<0x154, 0x1d3>, "v_max3_f32", 1726 VOP_F32_F32_F32_F32, AMDGPUfmax3 1727>; 1728defm V_MAX3_I32 : VOP3Inst <vop3<0x155, 0x1d4>, "v_max3_i32", 1729 VOP_I32_I32_I32_I32, AMDGPUsmax3 1730>; 1731defm V_MAX3_U32 : VOP3Inst <vop3<0x156, 0x1d5>, "v_max3_u32", 1732 VOP_I32_I32_I32_I32, AMDGPUumax3 1733>; 1734defm V_MED3_F32 : VOP3Inst <vop3<0x157, 0x1d6>, "v_med3_f32", 1735 VOP_F32_F32_F32_F32 1736>; 1737defm V_MED3_I32 : VOP3Inst <vop3<0x158, 0x1d7>, "v_med3_i32", 1738 VOP_I32_I32_I32_I32 1739>; 1740defm V_MED3_U32 : VOP3Inst <vop3<0x159, 0x1d8>, "v_med3_u32", 1741 VOP_I32_I32_I32_I32 1742>; 1743 1744//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>; 1745//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>; 1746//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>; 1747defm V_SAD_U32 : VOP3Inst <vop3<0x15d, 0x1dc>, "v_sad_u32", 1748 VOP_I32_I32_I32_I32 1749>; 1750////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>; 1751defm V_DIV_FIXUP_F32 : VOP3Inst < 1752 vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup 1753>; 1754 1755let SchedRW = [WriteDouble] in { 1756 1757defm V_DIV_FIXUP_F64 : VOP3Inst < 1758 vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup 1759>; 1760 1761} // let SchedRW = [WriteDouble] 1762 1763let SchedRW = [WriteDouble] in { 1764let isCommutable = 1 in { 1765 1766defm V_ADD_F64 : VOP3Inst <vop3<0x164, 0x280>, "v_add_f64", 1767 VOP_F64_F64_F64, fadd 1768>; 1769defm V_MUL_F64 : VOP3Inst <vop3<0x165, 0x281>, "v_mul_f64", 1770 VOP_F64_F64_F64, fmul 1771>; 1772 1773defm V_MIN_F64 : VOP3Inst <vop3<0x166, 0x282>, "v_min_f64", 1774 VOP_F64_F64_F64, fminnum 1775>; 1776defm V_MAX_F64 : VOP3Inst <vop3<0x167, 0x283>, "v_max_f64", 1777 VOP_F64_F64_F64, fmaxnum 1778>; 1779 1780} // isCommutable = 1 1781 1782defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64", 1783 VOP_F64_F64_I32, AMDGPUldexp 1784>; 1785 1786} // let SchedRW = [WriteDouble] 1787 1788let isCommutable = 1, SchedRW = [WriteQuarterRate32] in { 1789 1790defm V_MUL_LO_U32 : VOP3Inst <vop3<0x169, 0x285>, "v_mul_lo_u32", 1791 VOP_I32_I32_I32 1792>; 1793defm V_MUL_HI_U32 : VOP3Inst <vop3<0x16a, 0x286>, "v_mul_hi_u32", 1794 VOP_I32_I32_I32 1795>; 1796 1797defm V_MUL_LO_I32 : VOP3Inst <vop3<0x16b, 0x285>, "v_mul_lo_i32", 1798 VOP_I32_I32_I32 1799>; 1800defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32", 1801 VOP_I32_I32_I32 1802>; 1803 1804} // isCommutable = 1, SchedRW = [WriteQuarterRate32] 1805 1806let SchedRW = [WriteFloatFMA, WriteSALU] in { 1807defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d, 0x1e0>, "v_div_scale_f32", []>; 1808} 1809 1810let SchedRW = [WriteDouble, WriteSALU] in { 1811// Double precision division pre-scale. 1812defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>; 1813} // let SchedRW = [WriteDouble] 1814 1815let isCommutable = 1, Uses = [VCC] in { 1816 1817// v_div_fmas_f32: 1818// result = src0 * src1 + src2 1819// if (vcc) 1820// result *= 2^32 1821// 1822defm V_DIV_FMAS_F32 : VOP3_VCC_Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32", 1823 VOP_F32_F32_F32_F32, AMDGPUdiv_fmas 1824>; 1825 1826let SchedRW = [WriteDouble] in { 1827// v_div_fmas_f64: 1828// result = src0 * src1 + src2 1829// if (vcc) 1830// result *= 2^64 1831// 1832defm V_DIV_FMAS_F64 : VOP3_VCC_Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64", 1833 VOP_F64_F64_F64_F64, AMDGPUdiv_fmas 1834>; 1835 1836} // End SchedRW = [WriteDouble] 1837} // End isCommutable = 1 1838 1839//def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>; 1840//def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>; 1841//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "v_mqsad_u8", []>; 1842 1843let SchedRW = [WriteDouble] in { 1844defm V_TRIG_PREOP_F64 : VOP3Inst < 1845 vop3<0x174, 0x292>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop 1846>; 1847 1848} // let SchedRW = [WriteDouble] 1849 1850// These instructions only exist on SI and CI 1851let SubtargetPredicate = isSICI in { 1852 1853defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", VOP_I64_I64_I32>; 1854defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64", VOP_I64_I64_I32>; 1855defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", VOP_I64_I64_I32>; 1856 1857defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32", 1858 VOP_F32_F32_F32_F32>; 1859 1860} // End SubtargetPredicate = isSICI 1861 1862let SubtargetPredicate = isVI in { 1863 1864defm V_LSHLREV_B64 : VOP3Inst <vop3<0, 0x28f>, "v_lshlrev_b64", 1865 VOP_I64_I32_I64 1866>; 1867defm V_LSHRREV_B64 : VOP3Inst <vop3<0, 0x290>, "v_lshrrev_b64", 1868 VOP_I64_I32_I64 1869>; 1870defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64", 1871 VOP_I64_I32_I64 1872>; 1873 1874} // End SubtargetPredicate = isVI 1875 1876//===----------------------------------------------------------------------===// 1877// Pseudo Instructions 1878//===----------------------------------------------------------------------===// 1879let isCodeGenOnly = 1, isPseudo = 1 in { 1880 1881// For use in patterns 1882def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$dst), 1883 (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", [] 1884>; 1885 1886let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { 1887// 64-bit vector move instruction. This is mainly used by the SIFoldOperands 1888// pass to enable folding of inline immediates. 1889def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>; 1890} // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0 1891 1892let hasSideEffects = 1 in { 1893def SGPR_USE : InstSI <(outs),(ins), "", []>; 1894} 1895 1896// SI pseudo instructions. These are used by the CFG structurizer pass 1897// and should be lowered to ISA instructions prior to codegen. 1898 1899let mayLoad = 1, mayStore = 1, hasSideEffects = 1, 1900 Uses = [EXEC], Defs = [EXEC] in { 1901 1902let isBranch = 1, isTerminator = 1 in { 1903 1904def SI_IF: InstSI < 1905 (outs SReg_64:$dst), 1906 (ins SReg_64:$vcc, brtarget:$target), 1907 "", 1908 [(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))] 1909>; 1910 1911def SI_ELSE : InstSI < 1912 (outs SReg_64:$dst), 1913 (ins SReg_64:$src, brtarget:$target), 1914 "", 1915 [(set i64:$dst, (int_SI_else i64:$src, bb:$target))] 1916> { 1917 let Constraints = "$src = $dst"; 1918} 1919 1920def SI_LOOP : InstSI < 1921 (outs), 1922 (ins SReg_64:$saved, brtarget:$target), 1923 "si_loop $saved, $target", 1924 [(int_SI_loop i64:$saved, bb:$target)] 1925>; 1926 1927} // end isBranch = 1, isTerminator = 1 1928 1929def SI_BREAK : InstSI < 1930 (outs SReg_64:$dst), 1931 (ins SReg_64:$src), 1932 "si_else $dst, $src", 1933 [(set i64:$dst, (int_SI_break i64:$src))] 1934>; 1935 1936def SI_IF_BREAK : InstSI < 1937 (outs SReg_64:$dst), 1938 (ins SReg_64:$vcc, SReg_64:$src), 1939 "si_if_break $dst, $vcc, $src", 1940 [(set i64:$dst, (int_SI_if_break i1:$vcc, i64:$src))] 1941>; 1942 1943def SI_ELSE_BREAK : InstSI < 1944 (outs SReg_64:$dst), 1945 (ins SReg_64:$src0, SReg_64:$src1), 1946 "si_else_break $dst, $src0, $src1", 1947 [(set i64:$dst, (int_SI_else_break i64:$src0, i64:$src1))] 1948>; 1949 1950def SI_END_CF : InstSI < 1951 (outs), 1952 (ins SReg_64:$saved), 1953 "si_end_cf $saved", 1954 [(int_SI_end_cf i64:$saved)] 1955>; 1956 1957def SI_KILL : InstSI < 1958 (outs), 1959 (ins VSrc_32:$src), 1960 "si_kill $src", 1961 [(int_AMDGPU_kill f32:$src)] 1962>; 1963 1964} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 1965 // Uses = [EXEC], Defs = [EXEC] 1966 1967let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { 1968 1969//defm SI_ : RegisterLoadStore <VGPR_32, FRAMEri, ADDRIndirect>; 1970 1971let UseNamedOperandTable = 1 in { 1972 1973def SI_RegisterLoad : InstSI < 1974 (outs VGPR_32:$dst, SReg_64:$temp), 1975 (ins FRAMEri32:$addr, i32imm:$chan), 1976 "", [] 1977> { 1978 let isRegisterLoad = 1; 1979 let mayLoad = 1; 1980} 1981 1982class SIRegStore<dag outs> : InstSI < 1983 outs, 1984 (ins VGPR_32:$val, FRAMEri32:$addr, i32imm:$chan), 1985 "", [] 1986> { 1987 let isRegisterStore = 1; 1988 let mayStore = 1; 1989} 1990 1991let usesCustomInserter = 1 in { 1992def SI_RegisterStorePseudo : SIRegStore<(outs)>; 1993} // End usesCustomInserter = 1 1994def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>; 1995 1996 1997} // End UseNamedOperandTable = 1 1998 1999def SI_INDIRECT_SRC : InstSI < 2000 (outs VGPR_32:$dst, SReg_64:$temp), 2001 (ins unknown:$src, VSrc_32:$idx, i32imm:$off), 2002 "si_indirect_src $dst, $temp, $src, $idx, $off", 2003 [] 2004>; 2005 2006class SI_INDIRECT_DST<RegisterClass rc> : InstSI < 2007 (outs rc:$dst, SReg_64:$temp), 2008 (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VGPR_32:$val), 2009 "si_indirect_dst $dst, $temp, $src, $idx, $off, $val", 2010 [] 2011> { 2012 let Constraints = "$src = $dst"; 2013} 2014 2015def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>; 2016def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>; 2017def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>; 2018def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>; 2019def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>; 2020 2021} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0] 2022 2023multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> { 2024 2025 let UseNamedOperandTable = 1 in { 2026 def _SAVE : InstSI < 2027 (outs), 2028 (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, 2029 SReg_32:$scratch_offset), 2030 "", [] 2031 >; 2032 2033 def _RESTORE : InstSI < 2034 (outs sgpr_class:$dst), 2035 (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), 2036 "", [] 2037 >; 2038 } // End UseNamedOperandTable = 1 2039} 2040 2041defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>; 2042defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>; 2043defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>; 2044defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>; 2045defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>; 2046 2047multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> { 2048 let UseNamedOperandTable = 1 in { 2049 def _SAVE : InstSI < 2050 (outs), 2051 (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, 2052 SReg_32:$scratch_offset), 2053 "", [] 2054 >; 2055 2056 def _RESTORE : InstSI < 2057 (outs vgpr_class:$dst), 2058 (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), 2059 "", [] 2060 >; 2061 } // End UseNamedOperandTable = 1 2062} 2063 2064defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>; 2065defm SI_SPILL_V64 : SI_SPILL_VGPR <VReg_64>; 2066defm SI_SPILL_V96 : SI_SPILL_VGPR <VReg_96>; 2067defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>; 2068defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>; 2069defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>; 2070 2071let Defs = [SCC] in { 2072 2073def SI_CONSTDATA_PTR : InstSI < 2074 (outs SReg_64:$dst), 2075 (ins), 2076 "", [(set SReg_64:$dst, (i64 SIconstdata_ptr))] 2077>; 2078 2079} // End Defs = [SCC] 2080 2081} // end IsCodeGenOnly, isPseudo 2082 2083} // end SubtargetPredicate = isGCN 2084 2085let Predicates = [isGCN] in { 2086 2087def : Pat< 2088 (int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2), 2089 (V_CNDMASK_B32_e64 $src2, $src1, 2090 (V_CMP_GT_F32_e64 SRCMODS.NONE, 0, SRCMODS.NONE, $src0, 2091 DSTCLAMP.NONE, DSTOMOD.NONE)) 2092>; 2093 2094def : Pat < 2095 (int_AMDGPU_kilp), 2096 (SI_KILL 0xbf800000) 2097>; 2098 2099/* int_SI_vs_load_input */ 2100def : Pat< 2101 (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr), 2102 (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0) 2103>; 2104 2105/* int_SI_export */ 2106def : Pat < 2107 (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, 2108 f32:$src0, f32:$src1, f32:$src2, f32:$src3), 2109 (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm, 2110 $src0, $src1, $src2, $src3) 2111>; 2112 2113//===----------------------------------------------------------------------===// 2114// SMRD Patterns 2115//===----------------------------------------------------------------------===// 2116 2117multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> { 2118 2119 // 1. SI-CI: Offset as 8bit DWORD immediate 2120 def : Pat < 2121 (constant_load (add i64:$sbase, (i64 IMM8bitDWORD:$offset))), 2122 (vt (Instr_IMM $sbase, (as_dword_i32imm $offset))) 2123 >; 2124 2125 // 2. Offset loaded in an 32bit SGPR 2126 def : Pat < 2127 (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))), 2128 (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset))))) 2129 >; 2130 2131 // 3. No offset at all 2132 def : Pat < 2133 (constant_load i64:$sbase), 2134 (vt (Instr_IMM $sbase, 0)) 2135 >; 2136} 2137 2138multiclass SMRD_Pattern_vi <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> { 2139 2140 // 1. VI: Offset as 20bit immediate in bytes 2141 def : Pat < 2142 (constant_load (add i64:$sbase, (i64 IMM20bit:$offset))), 2143 (vt (Instr_IMM $sbase, (as_i32imm $offset))) 2144 >; 2145 2146 // 2. Offset loaded in an 32bit SGPR 2147 def : Pat < 2148 (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))), 2149 (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset))))) 2150 >; 2151 2152 // 3. No offset at all 2153 def : Pat < 2154 (constant_load i64:$sbase), 2155 (vt (Instr_IMM $sbase, 0)) 2156 >; 2157} 2158 2159let Predicates = [isSICI] in { 2160defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>; 2161defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>; 2162defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>; 2163defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>; 2164defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>; 2165defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>; 2166defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>; 2167} // End Predicates = [isSICI] 2168 2169let Predicates = [isVI] in { 2170defm : SMRD_Pattern_vi <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>; 2171defm : SMRD_Pattern_vi <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>; 2172defm : SMRD_Pattern_vi <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>; 2173defm : SMRD_Pattern_vi <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>; 2174defm : SMRD_Pattern_vi <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>; 2175defm : SMRD_Pattern_vi <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>; 2176defm : SMRD_Pattern_vi <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>; 2177} // End Predicates = [isVI] 2178 2179let Predicates = [isSICI] in { 2180 2181// 1. Offset as 8bit DWORD immediate 2182def : Pat < 2183 (SIload_constant v4i32:$sbase, IMM8bitDWORD:$offset), 2184 (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset)) 2185>; 2186 2187} // End Predicates = [isSICI] 2188 2189// 2. Offset loaded in an 32bit SGPR 2190def : Pat < 2191 (SIload_constant v4i32:$sbase, imm:$offset), 2192 (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset)) 2193>; 2194 2195//===----------------------------------------------------------------------===// 2196// SOP1 Patterns 2197//===----------------------------------------------------------------------===// 2198 2199def : Pat < 2200 (i64 (ctpop i64:$src)), 2201 (i64 (REG_SEQUENCE SReg_64, 2202 (S_BCNT1_I32_B64 $src), sub0, 2203 (S_MOV_B32 0), sub1)) 2204>; 2205 2206//===----------------------------------------------------------------------===// 2207// SOP2 Patterns 2208//===----------------------------------------------------------------------===// 2209 2210// V_ADD_I32_e32/S_ADD_U32 produces carry in VCC/SCC. For the vector 2211// case, the sgpr-copies pass will fix this to use the vector version. 2212def : Pat < 2213 (i32 (addc i32:$src0, i32:$src1)), 2214 (S_ADD_U32 $src0, $src1) 2215>; 2216 2217//===----------------------------------------------------------------------===// 2218// SOPP Patterns 2219//===----------------------------------------------------------------------===// 2220 2221def : Pat < 2222 (int_AMDGPU_barrier_global), 2223 (S_BARRIER) 2224>; 2225 2226//===----------------------------------------------------------------------===// 2227// VOP1 Patterns 2228//===----------------------------------------------------------------------===// 2229 2230let Predicates = [UnsafeFPMath] in { 2231 2232//def : RcpPat<V_RCP_F64_e32, f64>; 2233//defm : RsqPat<V_RSQ_F64_e32, f64>; 2234//defm : RsqPat<V_RSQ_F32_e32, f32>; 2235 2236def : RsqPat<V_RSQ_F32_e32, f32>; 2237def : RsqPat<V_RSQ_F64_e32, f64>; 2238} 2239 2240//===----------------------------------------------------------------------===// 2241// VOP2 Patterns 2242//===----------------------------------------------------------------------===// 2243 2244def : Pat < 2245 (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)), 2246 (V_BCNT_U32_B32_e64 $popcnt, $val) 2247>; 2248 2249def : Pat < 2250 (i32 (select i1:$src0, i32:$src1, i32:$src2)), 2251 (V_CNDMASK_B32_e64 $src2, $src1, $src0) 2252>; 2253 2254/********** ======================= **********/ 2255/********** Image sampling patterns **********/ 2256/********** ======================= **********/ 2257 2258// Image + sampler 2259class SampleRawPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat < 2260 (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm, 2261 i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe), 2262 (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da), 2263 (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc), 2264 $addr, $rsrc, $sampler) 2265>; 2266 2267multiclass SampleRawPatterns<SDPatternOperator name, string opcode> { 2268 def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V1), i32>; 2269 def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>; 2270 def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>; 2271 def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V8), v8i32>; 2272 def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V16), v16i32>; 2273} 2274 2275// Image only 2276class ImagePattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat < 2277 (name vt:$addr, v8i32:$rsrc, i32:$dmask, i32:$unorm, 2278 i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe), 2279 (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da), 2280 (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc), 2281 $addr, $rsrc) 2282>; 2283 2284multiclass ImagePatterns<SDPatternOperator name, string opcode> { 2285 def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V1), i32>; 2286 def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>; 2287 def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>; 2288} 2289 2290// Basic sample 2291defm : SampleRawPatterns<int_SI_image_sample, "IMAGE_SAMPLE">; 2292defm : SampleRawPatterns<int_SI_image_sample_cl, "IMAGE_SAMPLE_CL">; 2293defm : SampleRawPatterns<int_SI_image_sample_d, "IMAGE_SAMPLE_D">; 2294defm : SampleRawPatterns<int_SI_image_sample_d_cl, "IMAGE_SAMPLE_D_CL">; 2295defm : SampleRawPatterns<int_SI_image_sample_l, "IMAGE_SAMPLE_L">; 2296defm : SampleRawPatterns<int_SI_image_sample_b, "IMAGE_SAMPLE_B">; 2297defm : SampleRawPatterns<int_SI_image_sample_b_cl, "IMAGE_SAMPLE_B_CL">; 2298defm : SampleRawPatterns<int_SI_image_sample_lz, "IMAGE_SAMPLE_LZ">; 2299defm : SampleRawPatterns<int_SI_image_sample_cd, "IMAGE_SAMPLE_CD">; 2300defm : SampleRawPatterns<int_SI_image_sample_cd_cl, "IMAGE_SAMPLE_CD_CL">; 2301 2302// Sample with comparison 2303defm : SampleRawPatterns<int_SI_image_sample_c, "IMAGE_SAMPLE_C">; 2304defm : SampleRawPatterns<int_SI_image_sample_c_cl, "IMAGE_SAMPLE_C_CL">; 2305defm : SampleRawPatterns<int_SI_image_sample_c_d, "IMAGE_SAMPLE_C_D">; 2306defm : SampleRawPatterns<int_SI_image_sample_c_d_cl, "IMAGE_SAMPLE_C_D_CL">; 2307defm : SampleRawPatterns<int_SI_image_sample_c_l, "IMAGE_SAMPLE_C_L">; 2308defm : SampleRawPatterns<int_SI_image_sample_c_b, "IMAGE_SAMPLE_C_B">; 2309defm : SampleRawPatterns<int_SI_image_sample_c_b_cl, "IMAGE_SAMPLE_C_B_CL">; 2310defm : SampleRawPatterns<int_SI_image_sample_c_lz, "IMAGE_SAMPLE_C_LZ">; 2311defm : SampleRawPatterns<int_SI_image_sample_c_cd, "IMAGE_SAMPLE_C_CD">; 2312defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl, "IMAGE_SAMPLE_C_CD_CL">; 2313 2314// Sample with offsets 2315defm : SampleRawPatterns<int_SI_image_sample_o, "IMAGE_SAMPLE_O">; 2316defm : SampleRawPatterns<int_SI_image_sample_cl_o, "IMAGE_SAMPLE_CL_O">; 2317defm : SampleRawPatterns<int_SI_image_sample_d_o, "IMAGE_SAMPLE_D_O">; 2318defm : SampleRawPatterns<int_SI_image_sample_d_cl_o, "IMAGE_SAMPLE_D_CL_O">; 2319defm : SampleRawPatterns<int_SI_image_sample_l_o, "IMAGE_SAMPLE_L_O">; 2320defm : SampleRawPatterns<int_SI_image_sample_b_o, "IMAGE_SAMPLE_B_O">; 2321defm : SampleRawPatterns<int_SI_image_sample_b_cl_o, "IMAGE_SAMPLE_B_CL_O">; 2322defm : SampleRawPatterns<int_SI_image_sample_lz_o, "IMAGE_SAMPLE_LZ_O">; 2323defm : SampleRawPatterns<int_SI_image_sample_cd_o, "IMAGE_SAMPLE_CD_O">; 2324defm : SampleRawPatterns<int_SI_image_sample_cd_cl_o, "IMAGE_SAMPLE_CD_CL_O">; 2325 2326// Sample with comparison and offsets 2327defm : SampleRawPatterns<int_SI_image_sample_c_o, "IMAGE_SAMPLE_C_O">; 2328defm : SampleRawPatterns<int_SI_image_sample_c_cl_o, "IMAGE_SAMPLE_C_CL_O">; 2329defm : SampleRawPatterns<int_SI_image_sample_c_d_o, "IMAGE_SAMPLE_C_D_O">; 2330defm : SampleRawPatterns<int_SI_image_sample_c_d_cl_o, "IMAGE_SAMPLE_C_D_CL_O">; 2331defm : SampleRawPatterns<int_SI_image_sample_c_l_o, "IMAGE_SAMPLE_C_L_O">; 2332defm : SampleRawPatterns<int_SI_image_sample_c_b_o, "IMAGE_SAMPLE_C_B_O">; 2333defm : SampleRawPatterns<int_SI_image_sample_c_b_cl_o, "IMAGE_SAMPLE_C_B_CL_O">; 2334defm : SampleRawPatterns<int_SI_image_sample_c_lz_o, "IMAGE_SAMPLE_C_LZ_O">; 2335defm : SampleRawPatterns<int_SI_image_sample_c_cd_o, "IMAGE_SAMPLE_C_CD_O">; 2336defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">; 2337 2338// Gather opcodes 2339// Only the variants which make sense are defined. 2340def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V2, v2i32>; 2341def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V4, v4i32>; 2342def : SampleRawPattern<int_SI_gather4_cl, IMAGE_GATHER4_CL_V4_V4, v4i32>; 2343def : SampleRawPattern<int_SI_gather4_l, IMAGE_GATHER4_L_V4_V4, v4i32>; 2344def : SampleRawPattern<int_SI_gather4_b, IMAGE_GATHER4_B_V4_V4, v4i32>; 2345def : SampleRawPattern<int_SI_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V4, v4i32>; 2346def : SampleRawPattern<int_SI_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V8, v8i32>; 2347def : SampleRawPattern<int_SI_gather4_lz, IMAGE_GATHER4_LZ_V4_V2, v2i32>; 2348def : SampleRawPattern<int_SI_gather4_lz, IMAGE_GATHER4_LZ_V4_V4, v4i32>; 2349 2350def : SampleRawPattern<int_SI_gather4_c, IMAGE_GATHER4_C_V4_V4, v4i32>; 2351def : SampleRawPattern<int_SI_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V4, v4i32>; 2352def : SampleRawPattern<int_SI_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V8, v8i32>; 2353def : SampleRawPattern<int_SI_gather4_c_l, IMAGE_GATHER4_C_L_V4_V4, v4i32>; 2354def : SampleRawPattern<int_SI_gather4_c_l, IMAGE_GATHER4_C_L_V4_V8, v8i32>; 2355def : SampleRawPattern<int_SI_gather4_c_b, IMAGE_GATHER4_C_B_V4_V4, v4i32>; 2356def : SampleRawPattern<int_SI_gather4_c_b, IMAGE_GATHER4_C_B_V4_V8, v8i32>; 2357def : SampleRawPattern<int_SI_gather4_c_b_cl, IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>; 2358def : SampleRawPattern<int_SI_gather4_c_lz, IMAGE_GATHER4_C_LZ_V4_V4, v4i32>; 2359 2360def : SampleRawPattern<int_SI_gather4_o, IMAGE_GATHER4_O_V4_V4, v4i32>; 2361def : SampleRawPattern<int_SI_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V4, v4i32>; 2362def : SampleRawPattern<int_SI_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V8, v8i32>; 2363def : SampleRawPattern<int_SI_gather4_l_o, IMAGE_GATHER4_L_O_V4_V4, v4i32>; 2364def : SampleRawPattern<int_SI_gather4_l_o, IMAGE_GATHER4_L_O_V4_V8, v8i32>; 2365def : SampleRawPattern<int_SI_gather4_b_o, IMAGE_GATHER4_B_O_V4_V4, v4i32>; 2366def : SampleRawPattern<int_SI_gather4_b_o, IMAGE_GATHER4_B_O_V4_V8, v8i32>; 2367def : SampleRawPattern<int_SI_gather4_b_cl_o, IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>; 2368def : SampleRawPattern<int_SI_gather4_lz_o, IMAGE_GATHER4_LZ_O_V4_V4, v4i32>; 2369 2370def : SampleRawPattern<int_SI_gather4_c_o, IMAGE_GATHER4_C_O_V4_V4, v4i32>; 2371def : SampleRawPattern<int_SI_gather4_c_o, IMAGE_GATHER4_C_O_V4_V8, v8i32>; 2372def : SampleRawPattern<int_SI_gather4_c_cl_o, IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>; 2373def : SampleRawPattern<int_SI_gather4_c_l_o, IMAGE_GATHER4_C_L_O_V4_V8, v8i32>; 2374def : SampleRawPattern<int_SI_gather4_c_b_o, IMAGE_GATHER4_C_B_O_V4_V8, v8i32>; 2375def : SampleRawPattern<int_SI_gather4_c_b_cl_o, IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>; 2376def : SampleRawPattern<int_SI_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>; 2377def : SampleRawPattern<int_SI_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>; 2378 2379def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V1, i32>; 2380def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V2, v2i32>; 2381def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>; 2382 2383def : ImagePattern<int_SI_getresinfo, IMAGE_GET_RESINFO_V4_V1, i32>; 2384defm : ImagePatterns<int_SI_image_load, "IMAGE_LOAD">; 2385defm : ImagePatterns<int_SI_image_load_mip, "IMAGE_LOAD_MIP">; 2386 2387/* SIsample for simple 1D texture lookup */ 2388def : Pat < 2389 (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm), 2390 (IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) 2391>; 2392 2393class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat < 2394 (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, imm), 2395 (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) 2396>; 2397 2398class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < 2399 (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_RECT), 2400 (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) 2401>; 2402 2403class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < 2404 (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_ARRAY), 2405 (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) 2406>; 2407 2408class SampleShadowPattern<SDNode name, MIMG opcode, 2409 ValueType vt> : Pat < 2410 (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW), 2411 (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) 2412>; 2413 2414class SampleShadowArrayPattern<SDNode name, MIMG opcode, 2415 ValueType vt> : Pat < 2416 (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY), 2417 (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) 2418>; 2419 2420/* SIsample* for texture lookups consuming more address parameters */ 2421multiclass SamplePatterns<MIMG sample, MIMG sample_c, MIMG sample_l, 2422 MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b, 2423MIMG sample_d, MIMG sample_c_d, ValueType addr_type> { 2424 def : SamplePattern <SIsample, sample, addr_type>; 2425 def : SampleRectPattern <SIsample, sample, addr_type>; 2426 def : SampleArrayPattern <SIsample, sample, addr_type>; 2427 def : SampleShadowPattern <SIsample, sample_c, addr_type>; 2428 def : SampleShadowArrayPattern <SIsample, sample_c, addr_type>; 2429 2430 def : SamplePattern <SIsamplel, sample_l, addr_type>; 2431 def : SampleArrayPattern <SIsamplel, sample_l, addr_type>; 2432 def : SampleShadowPattern <SIsamplel, sample_c_l, addr_type>; 2433 def : SampleShadowArrayPattern <SIsamplel, sample_c_l, addr_type>; 2434 2435 def : SamplePattern <SIsampleb, sample_b, addr_type>; 2436 def : SampleArrayPattern <SIsampleb, sample_b, addr_type>; 2437 def : SampleShadowPattern <SIsampleb, sample_c_b, addr_type>; 2438 def : SampleShadowArrayPattern <SIsampleb, sample_c_b, addr_type>; 2439 2440 def : SamplePattern <SIsampled, sample_d, addr_type>; 2441 def : SampleArrayPattern <SIsampled, sample_d, addr_type>; 2442 def : SampleShadowPattern <SIsampled, sample_c_d, addr_type>; 2443 def : SampleShadowArrayPattern <SIsampled, sample_c_d, addr_type>; 2444} 2445 2446defm : SamplePatterns<IMAGE_SAMPLE_V4_V2, IMAGE_SAMPLE_C_V4_V2, 2447 IMAGE_SAMPLE_L_V4_V2, IMAGE_SAMPLE_C_L_V4_V2, 2448 IMAGE_SAMPLE_B_V4_V2, IMAGE_SAMPLE_C_B_V4_V2, 2449 IMAGE_SAMPLE_D_V4_V2, IMAGE_SAMPLE_C_D_V4_V2, 2450 v2i32>; 2451defm : SamplePatterns<IMAGE_SAMPLE_V4_V4, IMAGE_SAMPLE_C_V4_V4, 2452 IMAGE_SAMPLE_L_V4_V4, IMAGE_SAMPLE_C_L_V4_V4, 2453 IMAGE_SAMPLE_B_V4_V4, IMAGE_SAMPLE_C_B_V4_V4, 2454 IMAGE_SAMPLE_D_V4_V4, IMAGE_SAMPLE_C_D_V4_V4, 2455 v4i32>; 2456defm : SamplePatterns<IMAGE_SAMPLE_V4_V8, IMAGE_SAMPLE_C_V4_V8, 2457 IMAGE_SAMPLE_L_V4_V8, IMAGE_SAMPLE_C_L_V4_V8, 2458 IMAGE_SAMPLE_B_V4_V8, IMAGE_SAMPLE_C_B_V4_V8, 2459 IMAGE_SAMPLE_D_V4_V8, IMAGE_SAMPLE_C_D_V4_V8, 2460 v8i32>; 2461defm : SamplePatterns<IMAGE_SAMPLE_V4_V16, IMAGE_SAMPLE_C_V4_V16, 2462 IMAGE_SAMPLE_L_V4_V16, IMAGE_SAMPLE_C_L_V4_V16, 2463 IMAGE_SAMPLE_B_V4_V16, IMAGE_SAMPLE_C_B_V4_V16, 2464 IMAGE_SAMPLE_D_V4_V16, IMAGE_SAMPLE_C_D_V4_V16, 2465 v16i32>; 2466 2467/* int_SI_imageload for texture fetches consuming varying address parameters */ 2468class ImageLoadPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat < 2469 (name addr_type:$addr, v32i8:$rsrc, imm), 2470 (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc) 2471>; 2472 2473class ImageLoadArrayPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat < 2474 (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY), 2475 (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc) 2476>; 2477 2478class ImageLoadMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat < 2479 (name addr_type:$addr, v32i8:$rsrc, TEX_MSAA), 2480 (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc) 2481>; 2482 2483class ImageLoadArrayMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat < 2484 (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY_MSAA), 2485 (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc) 2486>; 2487 2488multiclass ImageLoadPatterns<MIMG opcode, ValueType addr_type> { 2489 def : ImageLoadPattern <int_SI_imageload, opcode, addr_type>; 2490 def : ImageLoadArrayPattern <int_SI_imageload, opcode, addr_type>; 2491} 2492 2493multiclass ImageLoadMSAAPatterns<MIMG opcode, ValueType addr_type> { 2494 def : ImageLoadMSAAPattern <int_SI_imageload, opcode, addr_type>; 2495 def : ImageLoadArrayMSAAPattern <int_SI_imageload, opcode, addr_type>; 2496} 2497 2498defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V2, v2i32>; 2499defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V4, v4i32>; 2500 2501defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V2, v2i32>; 2502defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V4, v4i32>; 2503 2504/* Image resource information */ 2505def : Pat < 2506 (int_SI_resinfo i32:$mipid, v32i8:$rsrc, imm), 2507 (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc) 2508>; 2509 2510def : Pat < 2511 (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY), 2512 (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc) 2513>; 2514 2515def : Pat < 2516 (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY_MSAA), 2517 (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc) 2518>; 2519 2520/********** ============================================ **********/ 2521/********** Extraction, Insertion, Building and Casting **********/ 2522/********** ============================================ **********/ 2523 2524foreach Index = 0-2 in { 2525 def Extract_Element_v2i32_#Index : Extract_Element < 2526 i32, v2i32, Index, !cast<SubRegIndex>(sub#Index) 2527 >; 2528 def Insert_Element_v2i32_#Index : Insert_Element < 2529 i32, v2i32, Index, !cast<SubRegIndex>(sub#Index) 2530 >; 2531 2532 def Extract_Element_v2f32_#Index : Extract_Element < 2533 f32, v2f32, Index, !cast<SubRegIndex>(sub#Index) 2534 >; 2535 def Insert_Element_v2f32_#Index : Insert_Element < 2536 f32, v2f32, Index, !cast<SubRegIndex>(sub#Index) 2537 >; 2538} 2539 2540foreach Index = 0-3 in { 2541 def Extract_Element_v4i32_#Index : Extract_Element < 2542 i32, v4i32, Index, !cast<SubRegIndex>(sub#Index) 2543 >; 2544 def Insert_Element_v4i32_#Index : Insert_Element < 2545 i32, v4i32, Index, !cast<SubRegIndex>(sub#Index) 2546 >; 2547 2548 def Extract_Element_v4f32_#Index : Extract_Element < 2549 f32, v4f32, Index, !cast<SubRegIndex>(sub#Index) 2550 >; 2551 def Insert_Element_v4f32_#Index : Insert_Element < 2552 f32, v4f32, Index, !cast<SubRegIndex>(sub#Index) 2553 >; 2554} 2555 2556foreach Index = 0-7 in { 2557 def Extract_Element_v8i32_#Index : Extract_Element < 2558 i32, v8i32, Index, !cast<SubRegIndex>(sub#Index) 2559 >; 2560 def Insert_Element_v8i32_#Index : Insert_Element < 2561 i32, v8i32, Index, !cast<SubRegIndex>(sub#Index) 2562 >; 2563 2564 def Extract_Element_v8f32_#Index : Extract_Element < 2565 f32, v8f32, Index, !cast<SubRegIndex>(sub#Index) 2566 >; 2567 def Insert_Element_v8f32_#Index : Insert_Element < 2568 f32, v8f32, Index, !cast<SubRegIndex>(sub#Index) 2569 >; 2570} 2571 2572foreach Index = 0-15 in { 2573 def Extract_Element_v16i32_#Index : Extract_Element < 2574 i32, v16i32, Index, !cast<SubRegIndex>(sub#Index) 2575 >; 2576 def Insert_Element_v16i32_#Index : Insert_Element < 2577 i32, v16i32, Index, !cast<SubRegIndex>(sub#Index) 2578 >; 2579 2580 def Extract_Element_v16f32_#Index : Extract_Element < 2581 f32, v16f32, Index, !cast<SubRegIndex>(sub#Index) 2582 >; 2583 def Insert_Element_v16f32_#Index : Insert_Element < 2584 f32, v16f32, Index, !cast<SubRegIndex>(sub#Index) 2585 >; 2586} 2587 2588def : BitConvert <i32, f32, SReg_32>; 2589def : BitConvert <i32, f32, VGPR_32>; 2590 2591def : BitConvert <f32, i32, SReg_32>; 2592def : BitConvert <f32, i32, VGPR_32>; 2593 2594def : BitConvert <i64, f64, VReg_64>; 2595 2596def : BitConvert <f64, i64, VReg_64>; 2597 2598def : BitConvert <v2f32, v2i32, VReg_64>; 2599def : BitConvert <v2i32, v2f32, VReg_64>; 2600def : BitConvert <v2i32, i64, VReg_64>; 2601def : BitConvert <i64, v2i32, VReg_64>; 2602def : BitConvert <v2f32, i64, VReg_64>; 2603def : BitConvert <i64, v2f32, VReg_64>; 2604def : BitConvert <v2i32, f64, VReg_64>; 2605def : BitConvert <f64, v2i32, VReg_64>; 2606def : BitConvert <v4f32, v4i32, VReg_128>; 2607def : BitConvert <v4i32, v4f32, VReg_128>; 2608 2609def : BitConvert <v8f32, v8i32, SReg_256>; 2610def : BitConvert <v8i32, v8f32, SReg_256>; 2611def : BitConvert <v8i32, v32i8, SReg_256>; 2612def : BitConvert <v32i8, v8i32, SReg_256>; 2613def : BitConvert <v8i32, v32i8, VReg_256>; 2614def : BitConvert <v8i32, v8f32, VReg_256>; 2615def : BitConvert <v8f32, v8i32, VReg_256>; 2616def : BitConvert <v32i8, v8i32, VReg_256>; 2617 2618def : BitConvert <v16i32, v16f32, VReg_512>; 2619def : BitConvert <v16f32, v16i32, VReg_512>; 2620 2621/********** =================== **********/ 2622/********** Src & Dst modifiers **********/ 2623/********** =================== **********/ 2624 2625def : Pat < 2626 (AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod), 2627 (f32 FP_ZERO), (f32 FP_ONE)), 2628 (V_ADD_F32_e64 $src0_modifiers, $src0, 0, 0, 1, $omod) 2629>; 2630 2631/********** ================================ **********/ 2632/********** Floating point absolute/negative **********/ 2633/********** ================================ **********/ 2634 2635// Prevent expanding both fneg and fabs. 2636 2637// FIXME: Should use S_OR_B32 2638def : Pat < 2639 (fneg (fabs f32:$src)), 2640 (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */ 2641>; 2642 2643// FIXME: Should use S_OR_B32 2644def : Pat < 2645 (fneg (fabs f64:$src)), 2646 (REG_SEQUENCE VReg_64, 2647 (i32 (EXTRACT_SUBREG f64:$src, sub0)), 2648 sub0, 2649 (V_OR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1), 2650 (V_MOV_B32_e32 0x80000000)), // Set sign bit. 2651 sub1) 2652>; 2653 2654def : Pat < 2655 (fabs f32:$src), 2656 (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff)) 2657>; 2658 2659def : Pat < 2660 (fneg f32:$src), 2661 (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) 2662>; 2663 2664def : Pat < 2665 (fabs f64:$src), 2666 (REG_SEQUENCE VReg_64, 2667 (i32 (EXTRACT_SUBREG f64:$src, sub0)), 2668 sub0, 2669 (V_AND_B32_e32 (EXTRACT_SUBREG f64:$src, sub1), 2670 (V_MOV_B32_e32 0x7fffffff)), // Set sign bit. 2671 sub1) 2672>; 2673 2674def : Pat < 2675 (fneg f64:$src), 2676 (REG_SEQUENCE VReg_64, 2677 (i32 (EXTRACT_SUBREG f64:$src, sub0)), 2678 sub0, 2679 (V_XOR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1), 2680 (V_MOV_B32_e32 0x80000000)), 2681 sub1) 2682>; 2683 2684/********** ================== **********/ 2685/********** Immediate Patterns **********/ 2686/********** ================== **********/ 2687 2688def : Pat < 2689 (SGPRImm<(i32 imm)>:$imm), 2690 (S_MOV_B32 imm:$imm) 2691>; 2692 2693def : Pat < 2694 (SGPRImm<(f32 fpimm)>:$imm), 2695 (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm))) 2696>; 2697 2698def : Pat < 2699 (i32 imm:$imm), 2700 (V_MOV_B32_e32 imm:$imm) 2701>; 2702 2703def : Pat < 2704 (f32 fpimm:$imm), 2705 (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm))) 2706>; 2707 2708def : Pat < 2709 (i64 InlineImm<i64>:$imm), 2710 (S_MOV_B64 InlineImm<i64>:$imm) 2711>; 2712 2713// XXX - Should this use a s_cmp to set SCC? 2714 2715// Set to sign-extended 64-bit value (true = -1, false = 0) 2716def : Pat < 2717 (i1 imm:$imm), 2718 (S_MOV_B64 (i64 (as_i64imm $imm))) 2719>; 2720 2721def : Pat < 2722 (f64 InlineFPImm<f64>:$imm), 2723 (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineFPImm<f64>:$imm))) 2724>; 2725 2726/********** ===================== **********/ 2727/********** Interpolation Paterns **********/ 2728/********** ===================== **********/ 2729 2730// The value of $params is constant through out the entire kernel. 2731// We need to use S_MOV_B32 $params, because CSE ignores copies, so 2732// without it we end up with a lot of redundant moves. 2733 2734def : Pat < 2735 (int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params), 2736 (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)) 2737>; 2738 2739def : Pat < 2740 (int_SI_fs_interp imm:$attr_chan, imm:$attr, i32:$params, v2i32:$ij), 2741 (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0), 2742 imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)), 2743 (EXTRACT_SUBREG $ij, sub1), 2744 imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)) 2745>; 2746 2747/********** ================== **********/ 2748/********** Intrinsic Patterns **********/ 2749/********** ================== **********/ 2750 2751/* llvm.AMDGPU.pow */ 2752def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>; 2753 2754def : Pat < 2755 (int_AMDGPU_div f32:$src0, f32:$src1), 2756 (V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1)) 2757>; 2758 2759def : Pat < 2760 (int_AMDGPU_cube v4f32:$src), 2761 (REG_SEQUENCE VReg_128, 2762 (V_CUBETC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0), 2763 0 /* src1_modifiers */, (EXTRACT_SUBREG $src, sub1), 2764 0 /* src2_modifiers */, (EXTRACT_SUBREG $src, sub2), 2765 0 /* clamp */, 0 /* omod */), sub0, 2766 (V_CUBESC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0), 2767 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1), 2768 0 /* src2_modifiers */,(EXTRACT_SUBREG $src, sub2), 2769 0 /* clamp */, 0 /* omod */), sub1, 2770 (V_CUBEMA_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0), 2771 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1), 2772 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2), 2773 0 /* clamp */, 0 /* omod */), sub2, 2774 (V_CUBEID_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0), 2775 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1), 2776 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2), 2777 0 /* clamp */, 0 /* omod */), sub3) 2778>; 2779 2780def : Pat < 2781 (i32 (sext i1:$src0)), 2782 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0) 2783>; 2784 2785class Ext32Pat <SDNode ext> : Pat < 2786 (i32 (ext i1:$src0)), 2787 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0) 2788>; 2789 2790def : Ext32Pat <zext>; 2791def : Ext32Pat <anyext>; 2792 2793// Offset in an 32Bit VGPR 2794def : Pat < 2795 (SIload_constant v4i32:$sbase, i32:$voff), 2796 (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0) 2797>; 2798 2799// The multiplication scales from [0,1] to the unsigned integer range 2800def : Pat < 2801 (AMDGPUurecip i32:$src0), 2802 (V_CVT_U32_F32_e32 2803 (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1, 2804 (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0)))) 2805>; 2806 2807def : Pat < 2808 (int_SI_tid), 2809 (V_MBCNT_HI_U32_B32_e64 0xffffffff, 2810 (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0)) 2811>; 2812 2813//===----------------------------------------------------------------------===// 2814// VOP3 Patterns 2815//===----------------------------------------------------------------------===// 2816 2817def : IMad24Pat<V_MAD_I32_I24>; 2818def : UMad24Pat<V_MAD_U32_U24>; 2819 2820def : Pat < 2821 (mulhu i32:$src0, i32:$src1), 2822 (V_MUL_HI_U32 $src0, $src1) 2823>; 2824 2825def : Pat < 2826 (mulhs i32:$src0, i32:$src1), 2827 (V_MUL_HI_I32 $src0, $src1) 2828>; 2829 2830defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>; 2831def : ROTRPattern <V_ALIGNBIT_B32>; 2832 2833/********** ======================= **********/ 2834/********** Load/Store Patterns **********/ 2835/********** ======================= **********/ 2836 2837class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat < 2838 (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))), 2839 (inst $ptr, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1)) 2840>; 2841 2842def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>; 2843def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>; 2844def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>; 2845def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>; 2846def : DSReadPat <DS_READ_B32, i32, local_load>; 2847 2848let AddedComplexity = 100 in { 2849 2850def : DSReadPat <DS_READ_B64, v2i32, local_load_aligned8bytes>; 2851 2852} // End AddedComplexity = 100 2853 2854def : Pat < 2855 (v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, 2856 i8:$offset1))), 2857 (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0), (S_MOV_B32 -1)) 2858>; 2859 2860class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat < 2861 (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)), 2862 (inst $ptr, $value, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1)) 2863>; 2864 2865def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>; 2866def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>; 2867def : DSWritePat <DS_WRITE_B32, i32, local_store>; 2868 2869let AddedComplexity = 100 in { 2870 2871def : DSWritePat <DS_WRITE_B64, v2i32, local_store_aligned8bytes>; 2872} // End AddedComplexity = 100 2873 2874def : Pat < 2875 (local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, 2876 i8:$offset1)), 2877 (DS_WRITE2_B32 $ptr, (EXTRACT_SUBREG $value, sub0), 2878 (EXTRACT_SUBREG $value, sub1), $offset0, $offset1, 2879 (i1 0), (S_MOV_B32 -1)) 2880>; 2881 2882class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat < 2883 (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value), 2884 (inst $ptr, $value, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1)) 2885>; 2886 2887// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec 2888// 2889// We need to use something for the data0, so we set a register to 2890// -1. For the non-rtn variants, the manual says it does 2891// DS[A] = (DS[A] >= D0) ? 0 : DS[A] + 1, and setting D0 to uint_max 2892// will always do the increment so I'm assuming it's the same. 2893// 2894// We also load this -1 with s_mov_b32 / s_mov_b64 even though this 2895// needs to be a VGPR. The SGPR copy pass will fix this, and it's 2896// easier since there is no v_mov_b64. 2897class DSAtomicIncRetPat<DS inst, ValueType vt, 2898 Instruction LoadImm, PatFrag frag> : Pat < 2899 (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)), 2900 (inst $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (i1 0), (S_MOV_B32 -1)) 2901>; 2902 2903 2904class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat < 2905 (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap), 2906 (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1)) 2907>; 2908 2909 2910// 32-bit atomics. 2911def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32, 2912 S_MOV_B32, atomic_load_add_local>; 2913def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32, 2914 S_MOV_B32, atomic_load_sub_local>; 2915 2916def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>; 2917def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>; 2918def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>; 2919def : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>; 2920def : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>; 2921def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>; 2922def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>; 2923def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>; 2924def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>; 2925def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>; 2926 2927def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>; 2928 2929// 64-bit atomics. 2930def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64, 2931 S_MOV_B64, atomic_load_add_local>; 2932def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64, 2933 S_MOV_B64, atomic_load_sub_local>; 2934 2935def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>; 2936def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>; 2937def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>; 2938def : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>; 2939def : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>; 2940def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>; 2941def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>; 2942def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>; 2943def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>; 2944def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>; 2945 2946def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>; 2947 2948 2949//===----------------------------------------------------------------------===// 2950// MUBUF Patterns 2951//===----------------------------------------------------------------------===// 2952 2953multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt, 2954 PatFrag constant_ld> { 2955 def : Pat < 2956 (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, 2957 i16:$offset, i1:$glc, i1:$slc, i1:$tfe))), 2958 (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe) 2959 >; 2960} 2961 2962let Predicates = [isSICI] in { 2963defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>; 2964defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>; 2965defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>; 2966defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>; 2967defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32, constant_load>; 2968defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32, constant_load>; 2969defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>; 2970} // End Predicates = [isSICI] 2971 2972class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat < 2973 (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr, 2974 i32:$soffset, u16imm:$offset))), 2975 (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) 2976>; 2977 2978def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>; 2979def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>; 2980def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>; 2981def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>; 2982def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>; 2983def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>; 2984def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>; 2985 2986// BUFFER_LOAD_DWORD*, addr64=0 2987multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen, 2988 MUBUF bothen> { 2989 2990 def : Pat < 2991 (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset, 2992 imm:$offset, 0, 0, imm:$glc, imm:$slc, 2993 imm:$tfe)), 2994 (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), 2995 (as_i1imm $slc), (as_i1imm $tfe)) 2996 >; 2997 2998 def : Pat < 2999 (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, 3000 imm:$offset, 1, 0, imm:$glc, imm:$slc, 3001 imm:$tfe)), 3002 (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 3003 (as_i1imm $tfe)) 3004 >; 3005 3006 def : Pat < 3007 (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, 3008 imm:$offset, 0, 1, imm:$glc, imm:$slc, 3009 imm:$tfe)), 3010 (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), 3011 (as_i1imm $slc), (as_i1imm $tfe)) 3012 >; 3013 3014 def : Pat < 3015 (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset, 3016 imm:$offset, 1, 1, imm:$glc, imm:$slc, 3017 imm:$tfe)), 3018 (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 3019 (as_i1imm $tfe)) 3020 >; 3021} 3022 3023defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN, 3024 BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>; 3025defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN, 3026 BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>; 3027defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN, 3028 BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>; 3029 3030class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat < 3031 (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset, 3032 u16imm:$offset)), 3033 (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) 3034>; 3035 3036def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>; 3037def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>; 3038def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>; 3039def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>; 3040def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>; 3041 3042/* 3043class MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> : Pat < 3044 (st vt:$value, (MUBUFScratch v4i32:$srsrc, i64:$vaddr, u16imm:$offset)), 3045 (Instr $value, $srsrc, $vaddr, $offset) 3046>; 3047 3048let Predicates = [isSICI] in { 3049def : MUBUFStore_Pattern <BUFFER_STORE_BYTE_ADDR64, i32, truncstorei8_private>; 3050def : MUBUFStore_Pattern <BUFFER_STORE_SHORT_ADDR64, i32, truncstorei16_private>; 3051def : MUBUFStore_Pattern <BUFFER_STORE_DWORD_ADDR64, i32, store_private>; 3052def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2_ADDR64, v2i32, store_private>; 3053def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4_ADDR64, v4i32, store_private>; 3054} // End Predicates = [isSICI] 3055 3056*/ 3057 3058//===----------------------------------------------------------------------===// 3059// MTBUF Patterns 3060//===----------------------------------------------------------------------===// 3061 3062// TBUFFER_STORE_FORMAT_*, addr64=0 3063class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat< 3064 (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr, 3065 i32:$soffset, imm:$inst_offset, imm:$dfmt, 3066 imm:$nfmt, imm:$offen, imm:$idxen, 3067 imm:$glc, imm:$slc, imm:$tfe), 3068 (opcode 3069 $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen), 3070 (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc, 3071 (as_i1imm $slc), (as_i1imm $tfe), $soffset) 3072>; 3073 3074def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>; 3075def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>; 3076def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>; 3077def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>; 3078 3079let SubtargetPredicate = isCI in { 3080 3081defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8", 3082 VOP_I32_I32_I32 3083>; 3084defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8", 3085 VOP_I32_I32_I32 3086>; 3087defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8", 3088 VOP_I32_I32_I32 3089>; 3090 3091let isCommutable = 1 in { 3092defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32", 3093 VOP_I64_I32_I32_I64 3094>; 3095 3096// XXX - Does this set VCC? 3097defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32", 3098 VOP_I64_I32_I32_I64 3099>; 3100} // End isCommutable = 1 3101 3102// Remaining instructions: 3103// FLAT_* 3104// S_CBRANCH_CDBGUSER 3105// S_CBRANCH_CDBGSYS 3106// S_CBRANCH_CDBGSYS_OR_USER 3107// S_CBRANCH_CDBGSYS_AND_USER 3108// S_DCACHE_INV_VOL 3109// DS_NOP 3110// DS_GWS_SEMA_RELEASE_ALL 3111// DS_WRAP_RTN_B32 3112// DS_CNDXCHG32_RTN_B64 3113// DS_WRITE_B96 3114// DS_WRITE_B128 3115// DS_CONDXCHG32_RTN_B128 3116// DS_READ_B96 3117// DS_READ_B128 3118// BUFFER_LOAD_DWORDX3 3119// BUFFER_STORE_DWORDX3 3120 3121} // End isCI 3122 3123//===----------------------------------------------------------------------===// 3124// Flat Patterns 3125//===----------------------------------------------------------------------===// 3126 3127class FLATLoad_Pattern <FLAT Instr_ADDR64, ValueType vt, 3128 PatFrag flat_ld> : 3129 Pat <(vt (flat_ld i64:$ptr)), 3130 (Instr_ADDR64 $ptr) 3131>; 3132 3133def : FLATLoad_Pattern <FLAT_LOAD_SBYTE, i32, sextloadi8_flat>; 3134def : FLATLoad_Pattern <FLAT_LOAD_UBYTE, i32, az_extloadi8_flat>; 3135def : FLATLoad_Pattern <FLAT_LOAD_SSHORT, i32, sextloadi16_flat>; 3136def : FLATLoad_Pattern <FLAT_LOAD_USHORT, i32, az_extloadi16_flat>; 3137def : FLATLoad_Pattern <FLAT_LOAD_DWORD, i32, flat_load>; 3138def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, flat_load>; 3139def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, az_extloadi32_flat>; 3140def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, v2i32, flat_load>; 3141def : FLATLoad_Pattern <FLAT_LOAD_DWORDX4, v4i32, flat_load>; 3142 3143class FLATStore_Pattern <FLAT Instr, ValueType vt, PatFrag st> : 3144 Pat <(st vt:$value, i64:$ptr), 3145 (Instr $value, $ptr) 3146 >; 3147 3148def : FLATStore_Pattern <FLAT_STORE_BYTE, i32, truncstorei8_flat>; 3149def : FLATStore_Pattern <FLAT_STORE_SHORT, i32, truncstorei16_flat>; 3150def : FLATStore_Pattern <FLAT_STORE_DWORD, i32, flat_store>; 3151def : FLATStore_Pattern <FLAT_STORE_DWORDX2, i64, flat_store>; 3152def : FLATStore_Pattern <FLAT_STORE_DWORDX2, v2i32, flat_store>; 3153def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>; 3154 3155/********** ====================== **********/ 3156/********** Indirect adressing **********/ 3157/********** ====================== **********/ 3158 3159multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST IndDst> { 3160 3161 // 1. Extract with offset 3162 def : Pat< 3163 (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))), 3164 (SI_INDIRECT_SRC $vec, $idx, imm:$off) 3165 >; 3166 3167 // 2. Extract without offset 3168 def : Pat< 3169 (eltvt (vector_extract vt:$vec, i32:$idx)), 3170 (SI_INDIRECT_SRC $vec, $idx, 0) 3171 >; 3172 3173 // 3. Insert with offset 3174 def : Pat< 3175 (vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)), 3176 (IndDst $vec, $idx, imm:$off, $val) 3177 >; 3178 3179 // 4. Insert without offset 3180 def : Pat< 3181 (vector_insert vt:$vec, eltvt:$val, i32:$idx), 3182 (IndDst $vec, $idx, 0, $val) 3183 >; 3184} 3185 3186defm : SI_INDIRECT_Pattern <v2f32, f32, SI_INDIRECT_DST_V2>; 3187defm : SI_INDIRECT_Pattern <v4f32, f32, SI_INDIRECT_DST_V4>; 3188defm : SI_INDIRECT_Pattern <v8f32, f32, SI_INDIRECT_DST_V8>; 3189defm : SI_INDIRECT_Pattern <v16f32, f32, SI_INDIRECT_DST_V16>; 3190 3191defm : SI_INDIRECT_Pattern <v2i32, i32, SI_INDIRECT_DST_V2>; 3192defm : SI_INDIRECT_Pattern <v4i32, i32, SI_INDIRECT_DST_V4>; 3193defm : SI_INDIRECT_Pattern <v8i32, i32, SI_INDIRECT_DST_V8>; 3194defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>; 3195 3196//===----------------------------------------------------------------------===// 3197// Conversion Patterns 3198//===----------------------------------------------------------------------===// 3199 3200def : Pat<(i32 (sext_inreg i32:$src, i1)), 3201 (S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16 3202 3203// Handle sext_inreg in i64 3204def : Pat < 3205 (i64 (sext_inreg i64:$src, i1)), 3206 (S_BFE_I64 i64:$src, 0x10000) // 0 | 1 << 16 3207>; 3208 3209def : Pat < 3210 (i64 (sext_inreg i64:$src, i8)), 3211 (S_BFE_I64 i64:$src, 0x80000) // 0 | 8 << 16 3212>; 3213 3214def : Pat < 3215 (i64 (sext_inreg i64:$src, i16)), 3216 (S_BFE_I64 i64:$src, 0x100000) // 0 | 16 << 16 3217>; 3218 3219def : Pat < 3220 (i64 (sext_inreg i64:$src, i32)), 3221 (S_BFE_I64 i64:$src, 0x200000) // 0 | 32 << 16 3222>; 3223 3224class ZExt_i64_i32_Pat <SDNode ext> : Pat < 3225 (i64 (ext i32:$src)), 3226 (REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 0), sub1) 3227>; 3228 3229class ZExt_i64_i1_Pat <SDNode ext> : Pat < 3230 (i64 (ext i1:$src)), 3231 (REG_SEQUENCE VReg_64, 3232 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0, 3233 (S_MOV_B32 0), sub1) 3234>; 3235 3236 3237def : ZExt_i64_i32_Pat<zext>; 3238def : ZExt_i64_i32_Pat<anyext>; 3239def : ZExt_i64_i1_Pat<zext>; 3240def : ZExt_i64_i1_Pat<anyext>; 3241 3242def : Pat < 3243 (i64 (sext i32:$src)), 3244 (REG_SEQUENCE SReg_64, $src, sub0, 3245 (S_ASHR_I32 $src, 31), sub1) 3246>; 3247 3248def : Pat < 3249 (i64 (sext i1:$src)), 3250 (REG_SEQUENCE VReg_64, 3251 (V_CNDMASK_B32_e64 0, -1, $src), sub0, 3252 (V_CNDMASK_B32_e64 0, -1, $src), sub1) 3253>; 3254 3255// If we need to perform a logical operation on i1 values, we need to 3256// use vector comparisons since there is only one SCC register. Vector 3257// comparisions still write to a pair of SGPRs, so treat these as 3258// 64-bit comparisons. When legalizing SGPR copies, instructions 3259// resulting in the copies from SCC to these instructions will be 3260// moved to the VALU. 3261def : Pat < 3262 (i1 (and i1:$src0, i1:$src1)), 3263 (S_AND_B64 $src0, $src1) 3264>; 3265 3266def : Pat < 3267 (i1 (or i1:$src0, i1:$src1)), 3268 (S_OR_B64 $src0, $src1) 3269>; 3270 3271def : Pat < 3272 (i1 (xor i1:$src0, i1:$src1)), 3273 (S_XOR_B64 $src0, $src1) 3274>; 3275 3276def : Pat < 3277 (f32 (sint_to_fp i1:$src)), 3278 (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_NEG_ONE, $src) 3279>; 3280 3281def : Pat < 3282 (f32 (uint_to_fp i1:$src)), 3283 (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_ONE, $src) 3284>; 3285 3286def : Pat < 3287 (f64 (sint_to_fp i1:$src)), 3288 (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)) 3289>; 3290 3291def : Pat < 3292 (f64 (uint_to_fp i1:$src)), 3293 (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)) 3294>; 3295 3296//===----------------------------------------------------------------------===// 3297// Miscellaneous Patterns 3298//===----------------------------------------------------------------------===// 3299 3300def : Pat < 3301 (i32 (trunc i64:$a)), 3302 (EXTRACT_SUBREG $a, sub0) 3303>; 3304 3305def : Pat < 3306 (i1 (trunc i32:$a)), 3307 (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1) 3308>; 3309 3310def : Pat < 3311 (i1 (trunc i64:$a)), 3312 (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), 3313 (EXTRACT_SUBREG $a, sub0)), 1) 3314>; 3315 3316def : Pat < 3317 (i32 (bswap i32:$a)), 3318 (V_BFI_B32 (S_MOV_B32 0x00ff00ff), 3319 (V_ALIGNBIT_B32 $a, $a, 24), 3320 (V_ALIGNBIT_B32 $a, $a, 8)) 3321>; 3322 3323def : Pat < 3324 (f32 (select i1:$src2, f32:$src1, f32:$src0)), 3325 (V_CNDMASK_B32_e64 $src0, $src1, $src2) 3326>; 3327 3328multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> { 3329 def : Pat < 3330 (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)), 3331 (BFM $a, $b) 3332 >; 3333 3334 def : Pat < 3335 (vt (add (vt (shl 1, vt:$a)), -1)), 3336 (BFM $a, (MOV 0)) 3337 >; 3338} 3339 3340defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>; 3341// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>; 3342 3343def : BFEPattern <V_BFE_U32, S_MOV_B32>; 3344 3345//===----------------------------------------------------------------------===// 3346// Fract Patterns 3347//===----------------------------------------------------------------------===// 3348 3349let Predicates = [isSI] in { 3350 3351// V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is 3352// used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient 3353// way to implement it is using V_FRACT_F64. 3354// The workaround for the V_FRACT bug is: 3355// fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999) 3356 3357// Convert (x + (-floor(x)) to fract(x) 3358def : Pat < 3359 (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)), 3360 (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))), 3361 (V_CNDMASK_B64_PSEUDO 3362 $x, 3363 (V_MIN_F64 3364 SRCMODS.NONE, 3365 (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE), 3366 SRCMODS.NONE, 3367 (V_MOV_B64_PSEUDO 0x3fefffffffffffff), 3368 DSTCLAMP.NONE, DSTOMOD.NONE), 3369 (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)) 3370>; 3371 3372// Convert floor(x) to (x - fract(x)) 3373def : Pat < 3374 (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))), 3375 (V_ADD_F64 3376 $mods, 3377 $x, 3378 SRCMODS.NEG, 3379 (V_CNDMASK_B64_PSEUDO 3380 $x, 3381 (V_MIN_F64 3382 SRCMODS.NONE, 3383 (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE), 3384 SRCMODS.NONE, 3385 (V_MOV_B64_PSEUDO 0x3fefffffffffffff), 3386 DSTCLAMP.NONE, DSTOMOD.NONE), 3387 (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)), 3388 DSTCLAMP.NONE, DSTOMOD.NONE) 3389>; 3390 3391} // End Predicates = [isSI] 3392 3393let Predicates = [isCI] in { 3394 3395// Convert (x - floor(x)) to fract(x) 3396def : Pat < 3397 (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)), 3398 (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))), 3399 (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) 3400>; 3401 3402// Convert (x + (-floor(x))) to fract(x) 3403def : Pat < 3404 (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)), 3405 (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))), 3406 (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) 3407>; 3408 3409} // End Predicates = [isCI] 3410 3411//============================================================================// 3412// Miscellaneous Optimization Patterns 3413//============================================================================// 3414 3415def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>; 3416 3417} // End isGCN predicate 3418