1//===-- CIInstructions.td - CI Instruction Defintions ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// Instruction definitions for CI and newer. 10//===----------------------------------------------------------------------===// 11// Remaining instructions: 12// S_CBRANCH_CDBGUSER 13// S_CBRANCH_CDBGSYS 14// S_CBRANCH_CDBGSYS_OR_USER 15// S_CBRANCH_CDBGSYS_AND_USER 16// DS_NOP 17// DS_GWS_SEMA_RELEASE_ALL 18// DS_WRAP_RTN_B32 19// DS_CNDXCHG32_RTN_B64 20// DS_WRITE_B96 21// DS_WRITE_B128 22// DS_CONDXCHG32_RTN_B128 23// DS_READ_B96 24// DS_READ_B128 25// BUFFER_LOAD_DWORDX3 26// BUFFER_STORE_DWORDX3 27 28//===----------------------------------------------------------------------===// 29// VOP1 Instructions 30//===----------------------------------------------------------------------===// 31 32let SubtargetPredicate = isCIVI in { 33 34let SchedRW = [WriteDoubleAdd] in { 35defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64", 36 VOP_F64_F64, ftrunc 37>; 38defm V_CEIL_F64 : VOP1Inst <vop1<0x18>, "v_ceil_f64", 39 VOP_F64_F64, fceil 40>; 41defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64", 42 VOP_F64_F64, ffloor 43>; 44defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64", 45 VOP_F64_F64, frint 46>; 47} // End SchedRW = [WriteDoubleAdd] 48 49let SchedRW = [WriteQuarterRate32] in { 50defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32", 51 VOP_F32_F32 52>; 53defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32", 54 VOP_F32_F32 55>; 56} // End SchedRW = [WriteQuarterRate32] 57 58//===----------------------------------------------------------------------===// 59// VOP3 Instructions 60//===----------------------------------------------------------------------===// 61 62defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8", 63 VOP_I32_I32_I32 64>; 65defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8", 66 VOP_I32_I32_I32 67>; 68defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8", 69 VOP_I32_I32_I32 70>; 71 72let isCommutable = 1 in { 73defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32", 74 VOP_I64_I32_I32_I64 75>; 76 77// XXX - Does this set VCC? 78defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32", 79 VOP_I64_I32_I32_I64 80>; 81} // End isCommutable = 1 82 83 84//===----------------------------------------------------------------------===// 85// DS Instructions 86//===----------------------------------------------------------------------===// 87defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">; 88 89// DS_CONDXCHG32_RTN_B64 90// DS_CONDXCHG32_RTN_B128 91 92//===----------------------------------------------------------------------===// 93// SMRD Instructions 94//===----------------------------------------------------------------------===// 95 96defm S_DCACHE_INV_VOL : SMRD_Inval <smrd<0x1d, 0x22>, 97 "s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>; 98 99//===----------------------------------------------------------------------===// 100// MUBUF Instructions 101//===----------------------------------------------------------------------===// 102 103let DisableSIDecoder = 1 in { 104defm BUFFER_WBINVL1_VOL : MUBUF_Invalidate <mubuf<0x70, 0x3f>, 105 "buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol 106>; 107} 108 109//===----------------------------------------------------------------------===// 110// Flat Instructions 111//===----------------------------------------------------------------------===// 112 113defm FLAT_LOAD_UBYTE : FLAT_Load_Helper < 114 flat<0x8, 0x10>, "flat_load_ubyte", VGPR_32 115>; 116defm FLAT_LOAD_SBYTE : FLAT_Load_Helper < 117 flat<0x9, 0x11>, "flat_load_sbyte", VGPR_32 118>; 119defm FLAT_LOAD_USHORT : FLAT_Load_Helper < 120 flat<0xa, 0x12>, "flat_load_ushort", VGPR_32 121>; 122defm FLAT_LOAD_SSHORT : FLAT_Load_Helper < 123 flat<0xb, 0x13>, "flat_load_sshort", VGPR_32> 124; 125defm FLAT_LOAD_DWORD : FLAT_Load_Helper < 126 flat<0xc, 0x14>, "flat_load_dword", VGPR_32 127>; 128defm FLAT_LOAD_DWORDX2 : FLAT_Load_Helper < 129 flat<0xd, 0x15>, "flat_load_dwordx2", VReg_64 130>; 131defm FLAT_LOAD_DWORDX4 : FLAT_Load_Helper < 132 flat<0xe, 0x17>, "flat_load_dwordx4", VReg_128 133>; 134defm FLAT_LOAD_DWORDX3 : FLAT_Load_Helper < 135 flat<0xf, 0x16>, "flat_load_dwordx3", VReg_96 136>; 137defm FLAT_STORE_BYTE : FLAT_Store_Helper < 138 flat<0x18>, "flat_store_byte", VGPR_32 139>; 140defm FLAT_STORE_SHORT : FLAT_Store_Helper < 141 flat <0x1a>, "flat_store_short", VGPR_32 142>; 143defm FLAT_STORE_DWORD : FLAT_Store_Helper < 144 flat<0x1c>, "flat_store_dword", VGPR_32 145>; 146defm FLAT_STORE_DWORDX2 : FLAT_Store_Helper < 147 flat<0x1d>, "flat_store_dwordx2", VReg_64 148>; 149defm FLAT_STORE_DWORDX4 : FLAT_Store_Helper < 150 flat<0x1e, 0x1f>, "flat_store_dwordx4", VReg_128 151>; 152defm FLAT_STORE_DWORDX3 : FLAT_Store_Helper < 153 flat<0x1f, 0x1e>, "flat_store_dwordx3", VReg_96 154>; 155defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC < 156 flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32, i32, atomic_swap_flat 157>; 158defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC < 159 flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, i32, 160 atomic_cmp_swap_flat, v2i32, VReg_64 161>; 162defm FLAT_ATOMIC_ADD : FLAT_ATOMIC < 163 flat<0x32, 0x42>, "flat_atomic_add", VGPR_32, i32, atomic_add_flat 164>; 165defm FLAT_ATOMIC_SUB : FLAT_ATOMIC < 166 flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32, i32, atomic_sub_flat 167>; 168defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC < 169 flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32, i32, atomic_min_flat 170>; 171defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC < 172 flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32, i32, atomic_umin_flat 173>; 174defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC < 175 flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32, i32, atomic_max_flat 176>; 177defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC < 178 flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32, i32, atomic_umax_flat 179>; 180defm FLAT_ATOMIC_AND : FLAT_ATOMIC < 181 flat<0x39, 0x48>, "flat_atomic_and", VGPR_32, i32, atomic_and_flat 182>; 183defm FLAT_ATOMIC_OR : FLAT_ATOMIC < 184 flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32, i32, atomic_or_flat 185>; 186defm FLAT_ATOMIC_XOR : FLAT_ATOMIC < 187 flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32, i32, atomic_xor_flat 188>; 189defm FLAT_ATOMIC_INC : FLAT_ATOMIC < 190 flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32, i32, atomic_inc_flat 191>; 192defm FLAT_ATOMIC_DEC : FLAT_ATOMIC < 193 flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32, i32, atomic_dec_flat 194>; 195defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC < 196 flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64, i64, atomic_swap_flat 197>; 198defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC < 199 flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, i64, 200 atomic_cmp_swap_flat, v2i64, VReg_128 201>; 202defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC < 203 flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64, i64, atomic_add_flat 204>; 205defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC < 206 flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64, i64, atomic_sub_flat 207>; 208defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC < 209 flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64, i64, atomic_min_flat 210>; 211defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC < 212 flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64, i64, atomic_umin_flat 213>; 214defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC < 215 flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64, i64, atomic_max_flat 216>; 217defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC < 218 flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64, i64, atomic_umax_flat 219>; 220defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC < 221 flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64, i64, atomic_and_flat 222>; 223defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC < 224 flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64, i64, atomic_or_flat 225>; 226defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC < 227 flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64, i64, atomic_xor_flat 228>; 229defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC < 230 flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64, i64, atomic_inc_flat 231>; 232defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC < 233 flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64, i64, atomic_dec_flat 234>; 235 236} // End SubtargetPredicate = isCIVI 237 238// CI Only flat instructions 239 240let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 in { 241 242defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC < 243 flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, f32, 244 null_frag, v2f32, VReg_64 245>; 246defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC < 247 flat<0x3f>, "flat_atomic_fmin", VGPR_32, f32 248>; 249defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC < 250 flat<0x40>, "flat_atomic_fmax", VGPR_32, f32 251>; 252defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC < 253 flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, f64, 254 null_frag, v2f64, VReg_128 255>; 256defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC < 257 flat<0x5f>, "flat_atomic_fmin_x2", VReg_64, f64 258>; 259defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC < 260 flat<0x60>, "flat_atomic_fmax_x2", VReg_64, f64 261>; 262 263} // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 264 265//===----------------------------------------------------------------------===// 266// Flat Patterns 267//===----------------------------------------------------------------------===// 268 269let Predicates = [isCIVI] in { 270 271// Patterns for global loads with no offset. 272class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < 273 (vt (node i64:$addr)), 274 (inst $addr, 0, 0, 0) 275>; 276 277class FlatLoadAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < 278 (vt (node i64:$addr)), 279 (inst $addr, 1, 0, 0) 280>; 281 282def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>; 283def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>; 284def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>; 285def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>; 286def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>; 287def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>; 288def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>; 289 290def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_flat_load, i32>; 291def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_flat_load, i64>; 292 293 294class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < 295 (node vt:$data, i64:$addr), 296 (inst $addr, $data, 0, 0, 0) 297>; 298 299class FlatStoreAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < 300 // atomic store follows atomic binop convention so the address comes 301 // first. 302 (node i64:$addr, vt:$data), 303 (inst $addr, $data, 1, 0, 0) 304>; 305 306def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>; 307def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>; 308def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>; 309def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>; 310def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>; 311 312def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_flat_store, i32>; 313def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_flat_store, i64>; 314 315class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt, 316 ValueType data_vt = vt> : Pat < 317 (vt (node i64:$addr, data_vt:$data)), 318 (inst $addr, $data, 0, 0) 319>; 320 321def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>; 322def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>; 323def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>; 324def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>; 325def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>; 326def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>; 327def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>; 328def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>; 329def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>; 330def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>; 331def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>; 332def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, atomic_cmp_swap_global, i32, v2i32>; 333def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>; 334 335def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>; 336def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>; 337def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>; 338def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>; 339def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>; 340def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>; 341def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>; 342def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>; 343def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>; 344def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>; 345def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>; 346def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, atomic_cmp_swap_global, i64, v2i64>; 347def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>; 348 349} // End Predicates = [isCIVI] 350