1# 2# Copyright (c) 2018 Valve Corporation 3# 4# Permission is hereby granted, free of charge, to any person obtaining a 5# copy of this software and associated documentation files (the "Software"), 6# to deal in the Software without restriction, including without limitation 7# the rights to use, copy, modify, merge, publish, distribute, sublicense, 8# and/or sell copies of the Software, and to permit persons to whom the 9# Software is furnished to do so, subject to the following conditions: 10# 11# The above copyright notice and this permission notice (including the next 12# paragraph) shall be included in all copies or substantial portions of the 13# Software. 14# 15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21# IN THE SOFTWARE. 22# 23 24# Class that represents all the information we have about the opcode 25# NOTE: this must be kept in sync with aco_op_info 26 27import sys 28from enum import Enum, IntEnum, auto 29 30class InstrClass(Enum): 31 Valu32 = "valu32" 32 ValuConvert32 = "valu_convert32" 33 Valu64 = "valu64" 34 ValuQuarterRate32 = "valu_quarter_rate32" 35 ValuFma = "valu_fma" 36 ValuTranscendental32 = "valu_transcendental32" 37 ValuDouble = "valu_double" 38 ValuDoubleAdd = "valu_double_add" 39 ValuDoubleConvert = "valu_double_convert" 40 ValuDoubleTranscendental = "valu_double_transcendental" 41 WMMA = "wmma" 42 Salu = "salu" 43 SMem = "smem" 44 Barrier = "barrier" 45 Branch = "branch" 46 Sendmsg = "sendmsg" 47 DS = "ds" 48 Export = "exp" 49 VMem = "vmem" 50 Waitcnt = "waitcnt" 51 Other = "other" 52 53# Representation of the instruction's microcode encoding format 54# Note: Some Vector ALU Formats can be combined, such that: 55# - VOP2* | VOP3 represents a VOP2 instruction in VOP3 encoding 56# - VOP2* | DPP represents a VOP2 instruction with data parallel primitive. 57# - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing. 58# 59# (*) The same is applicable for VOP1 and VOPC instructions. 60class Format(IntEnum): 61 # Pseudo Instruction Formats 62 PSEUDO = 0 63 PSEUDO_BRANCH = auto() 64 PSEUDO_BARRIER = auto() 65 PSEUDO_REDUCTION = auto() 66 # Scalar ALU & Control Formats 67 SOP1 = auto() 68 SOP2 = auto() 69 SOPK = auto() 70 SOPP = auto() 71 SOPC = auto() 72 # Scalar Memory Format 73 SMEM = auto() 74 # LDS/GDS Format 75 DS = auto() 76 LDSDIR = auto() 77 # Vector Memory Buffer Formats 78 MTBUF = auto() 79 MUBUF = auto() 80 # Vector Memory Image Format 81 MIMG = auto() 82 # Export Format 83 EXP = auto() 84 # Flat Formats 85 FLAT = auto() 86 GLOBAL = auto() 87 SCRATCH = auto() 88 # Vector Parameter Interpolation Formats 89 VINTRP = auto() 90 # Vector ALU Formats 91 VINTERP_INREG = auto() 92 VOPD = auto() 93 VOP1 = 1 << 7 94 VOP2 = 1 << 8 95 VOPC = 1 << 9 96 VOP3 = 1 << 10 97 VOP3P = 1 << 11 98 SDWA = 1 << 12 99 DPP16 = 1 << 13 100 DPP8 = 1 << 14 101 102 def get_builder_fields(self): 103 if self == Format.SOPK: 104 return [('uint16_t', 'imm', None)] 105 elif self == Format.SOPP: 106 return [('uint32_t', 'block', '-1'), 107 ('uint32_t', 'imm', '0')] 108 elif self == Format.SMEM: 109 return [('memory_sync_info', 'sync', 'memory_sync_info()'), 110 ('bool', 'glc', 'false'), 111 ('bool', 'dlc', 'false'), 112 ('bool', 'nv', 'false')] 113 elif self == Format.DS: 114 return [('uint16_t', 'offset0', '0'), 115 ('uint8_t', 'offset1', '0'), 116 ('bool', 'gds', 'false')] 117 elif self == Format.LDSDIR: 118 return [('uint8_t', 'attr', 0), 119 ('uint8_t', 'attr_chan', 0), 120 ('memory_sync_info', 'sync', 'memory_sync_info()'), 121 ('uint8_t', 'wait_vdst', 15)] 122 elif self == Format.MTBUF: 123 return [('unsigned', 'dfmt', None), 124 ('unsigned', 'nfmt', None), 125 ('unsigned', 'offset', None), 126 ('bool', 'offen', None), 127 ('bool', 'idxen', 'false'), 128 ('bool', 'disable_wqm', 'false'), 129 ('bool', 'glc', 'false'), 130 ('bool', 'dlc', 'false'), 131 ('bool', 'slc', 'false'), 132 ('bool', 'tfe', 'false')] 133 elif self == Format.MUBUF: 134 return [('unsigned', 'offset', None), 135 ('bool', 'offen', None), 136 ('bool', 'swizzled', 'false'), 137 ('bool', 'idxen', 'false'), 138 ('bool', 'addr64', 'false'), 139 ('bool', 'disable_wqm', 'false'), 140 ('bool', 'glc', 'false'), 141 ('bool', 'dlc', 'false'), 142 ('bool', 'slc', 'false'), 143 ('bool', 'tfe', 'false'), 144 ('bool', 'lds', 'false')] 145 elif self == Format.MIMG: 146 return [('unsigned', 'dmask', '0xF'), 147 ('bool', 'da', 'false'), 148 ('bool', 'unrm', 'false'), 149 ('bool', 'disable_wqm', 'false'), 150 ('bool', 'glc', 'false'), 151 ('bool', 'dlc', 'false'), 152 ('bool', 'slc', 'false'), 153 ('bool', 'tfe', 'false'), 154 ('bool', 'lwe', 'false'), 155 ('bool', 'r128', 'false'), 156 ('bool', 'a16', 'false'), 157 ('bool', 'd16', 'false')] 158 return [('unsigned', 'attribute', None), 159 ('unsigned', 'component', None)] 160 elif self == Format.EXP: 161 return [('unsigned', 'enabled_mask', None), 162 ('unsigned', 'dest', None), 163 ('bool', 'compr', 'false', 'compressed'), 164 ('bool', 'done', 'false'), 165 ('bool', 'vm', 'false', 'valid_mask')] 166 elif self == Format.PSEUDO_BRANCH: 167 return [('uint32_t', 'target0', '0', 'target[0]'), 168 ('uint32_t', 'target1', '0', 'target[1]')] 169 elif self == Format.PSEUDO_REDUCTION: 170 return [('ReduceOp', 'op', None, 'reduce_op'), 171 ('unsigned', 'cluster_size', '0')] 172 elif self == Format.PSEUDO_BARRIER: 173 return [('memory_sync_info', 'sync', None), 174 ('sync_scope', 'exec_scope', 'scope_invocation')] 175 elif self == Format.VINTRP: 176 return [('unsigned', 'attribute', None), 177 ('unsigned', 'component', None)] 178 elif self == Format.DPP16: 179 return [('uint16_t', 'dpp_ctrl', None), 180 ('uint8_t', 'row_mask', '0xF'), 181 ('uint8_t', 'bank_mask', '0xF'), 182 ('bool', 'bound_ctrl', 'true'), 183 ('bool', 'fetch_inactive', 'true')] 184 elif self == Format.DPP8: 185 return [('uint32_t', 'lane_sel', 0), 186 ('bool', 'fetch_inactive', 'true')] 187 elif self == Format.VOP3P: 188 return [('uint8_t', 'opsel_lo', None), 189 ('uint8_t', 'opsel_hi', None)] 190 elif self == Format.VOPD: 191 return [('aco_opcode', 'opy', None)] 192 elif self == Format.VINTERP_INREG: 193 return [('unsigned', 'wait_exp', 7), 194 ('uint8_t', 'opsel', 0)] 195 elif self in [Format.FLAT, Format.GLOBAL, Format.SCRATCH]: 196 return [('int16_t', 'offset', 0), 197 ('memory_sync_info', 'sync', 'memory_sync_info()'), 198 ('bool', 'glc', 'false'), 199 ('bool', 'slc', 'false'), 200 ('bool', 'lds', 'false'), 201 ('bool', 'nv', 'false')] 202 else: 203 return [] 204 205 def get_builder_field_names(self): 206 return [f[1] for f in self.get_builder_fields()] 207 208 def get_builder_field_dests(self): 209 return [(f[3] if len(f) >= 4 else f[1]) for f in self.get_builder_fields()] 210 211 def get_builder_field_decls(self): 212 return [('%s %s=%s' % (f[0], f[1], f[2]) if f[2] != None else '%s %s' % (f[0], f[1])) for f in self.get_builder_fields()] 213 214 def get_builder_initialization(self, num_operands): 215 res = '' 216 if self == Format.SDWA: 217 for i in range(min(num_operands, 2)): 218 res += 'instr->sel[{0}] = SubdwordSel(op{0}.op.bytes(), 0, false);'.format(i) 219 res += 'instr->dst_sel = SubdwordSel(def0.bytes(), 0, false);\n' 220 elif self in [Format.DPP16, Format.DPP8]: 221 res += 'instr->fetch_inactive &= program->gfx_level >= GFX10;\n' 222 return res 223 224 225class Opcode(object): 226 """Class that represents all the information we have about the opcode 227 NOTE: this must be kept in sync with aco_op_info 228 """ 229 def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, opcode_gfx11, format, input_mod, output_mod, is_atomic, cls, definitions, operands): 230 assert isinstance(name, str) 231 assert isinstance(opcode_gfx7, int) 232 assert isinstance(opcode_gfx9, int) 233 assert isinstance(opcode_gfx10, int) 234 assert isinstance(opcode_gfx11, int) 235 assert isinstance(format, Format) 236 assert isinstance(input_mod, bool) 237 assert isinstance(output_mod, bool) 238 assert isinstance(definitions, int) 239 assert isinstance(operands, int) 240 241 self.name = name 242 self.opcode_gfx7 = opcode_gfx7 243 self.opcode_gfx9 = opcode_gfx9 244 self.opcode_gfx10 = opcode_gfx10 245 self.opcode_gfx11 = opcode_gfx11 246 self.input_mod = "1" if input_mod else "0" 247 self.output_mod = "1" if output_mod else "0" 248 self.is_atomic = "1" if is_atomic else "0" 249 self.format = format 250 self.cls = cls 251 self.definitions = definitions 252 self.operands = operands 253 254 parts = name.replace('_e64', '').rsplit('_', 2) 255 op_dtype = parts[-1] 256 257 op_dtype_sizes = {'{}{}'.format(prefix, size) : size for prefix in 'biuf' for size in [64, 32, 24, 16]} 258 # inline constants are 32-bit for 16-bit integer/typeless instructions: https://reviews.llvm.org/D81841 259 op_dtype_sizes['b16'] = 32 260 op_dtype_sizes['i16'] = 32 261 op_dtype_sizes['u16'] = 32 262 263 # If we can't tell the operand size, default to 32. 264 self.operand_size = op_dtype_sizes.get(op_dtype, 32) 265 266 # exceptions for operands: 267 if 'qsad_' in name: 268 self.operand_size = 0 269 elif 'sad_' in name: 270 self.operand_size = 32 271 elif name in ['v_mad_u64_u32', 'v_mad_i64_i32']: 272 self.operand_size = 0 273 elif self.operand_size == 24: 274 self.operand_size = 32 275 elif op_dtype == 'u8' or op_dtype == 'i8': 276 self.operand_size = 32 277 elif name in ['v_cvt_f32_ubyte0', 'v_cvt_f32_ubyte1', 278 'v_cvt_f32_ubyte2', 'v_cvt_f32_ubyte3']: 279 self.operand_size = 32 280 281 282# Matches PhysReg 283VCC = 106 284M0 = 124 285EXEC_LO = 126 286EXEC = 127 # Some instructins only write lo, so use exec_hi encoding here 287SCC = 253 288 289def src(op1 = 0, op2 = 0, op3 = 0, op4 = 0): 290 return op1 | (op2 << 8) | (op3 << 16) | (op4 << 24) 291 292def dst(def1 = 0, def2 = 0, def3 = 0, def4 = 0): 293 return def1 | (def2 << 8) | (def3 << 16) | (def4 << 24) 294 295# global dictionary of opcodes 296opcodes = {} 297 298def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, opcode_gfx11 = -1, format = Format.PSEUDO, cls = InstrClass.Other, input_mod = False, output_mod = False, is_atomic = False, definitions = 0, operands = 0): 299 assert name not in opcodes 300 opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, opcode_gfx11, format, input_mod, output_mod, is_atomic, cls, definitions, operands) 301 302def default_class(opcodes, cls): 303 for op in opcodes: 304 if isinstance(op[-1], InstrClass): 305 yield op 306 else: 307 yield op + (cls,) 308 309opcode("exp", 0, 0, 0, 0, format = Format.EXP, cls = InstrClass.Export) 310opcode("p_parallelcopy") 311opcode("p_startpgm") 312opcode("p_return") 313opcode("p_phi") 314opcode("p_linear_phi") 315opcode("p_as_uniform") 316opcode("p_unit_test") 317 318opcode("p_create_vector") 319opcode("p_extract_vector") 320opcode("p_split_vector") 321 322# start/end the parts where we can use exec based instructions 323# implicitly 324opcode("p_logical_start") 325opcode("p_logical_end") 326 327# e.g. subgroupMin() in SPIR-V 328opcode("p_reduce", format=Format.PSEUDO_REDUCTION) 329# e.g. subgroupInclusiveMin() 330opcode("p_inclusive_scan", format=Format.PSEUDO_REDUCTION) 331# e.g. subgroupExclusiveMin() 332opcode("p_exclusive_scan", format=Format.PSEUDO_REDUCTION) 333 334opcode("p_branch", format=Format.PSEUDO_BRANCH) 335opcode("p_cbranch", format=Format.PSEUDO_BRANCH) 336opcode("p_cbranch_z", format=Format.PSEUDO_BRANCH) 337opcode("p_cbranch_nz", format=Format.PSEUDO_BRANCH) 338 339opcode("p_barrier", format=Format.PSEUDO_BARRIER) 340 341# Primitive Ordered Pixel Shading pseudo-instructions. 342 343# For querying whether the current wave can enter the ordered section on GFX9-10.3, doing 344# s_add_i32(pops_exiting_wave_id, op0), but in a way that it's different from a usual SALU 345# instruction so that it's easier to maintain the volatility of pops_exiting_wave_id and to handle 346# the polling specially in scheduling. 347# Definitions: 348# - Result SGPR; 349# - Clobbered SCC. 350# Operands: 351# - s1 value to add, usually -(current_wave_ID + 1) (or ~current_wave_ID) to remap the exiting wave 352# ID from wrapping [0, 0x3FF] to monotonic [0, 0xFFFFFFFF]. 353opcode("p_pops_gfx9_add_exiting_wave_id") 354 355# Indicates that the wait for the completion of the ordered section in overlapped waves has been 356# finished on GFX9-10.3. Not lowered to any hardware instructions. 357opcode("p_pops_gfx9_overlapped_wave_wait_done") 358 359# Indicates that a POPS ordered section has ended, hints that overlapping waves can possibly 360# continue execution. The overlapping waves may actually be resumed by this instruction or anywhere 361# later, however, especially taking into account the fact that there can be multiple ordered 362# sections in a wave (for instance, if one is chosen in divergent control flow in the source 363# shader), thus multiple p_pops_gfx9_ordered_section_done instructions. At least one must be present 364# in the program if POPS is used, however, otherwise the location of the end of the ordered section 365# will be undefined. Only needed on GFX9-10.3 (GFX11+ ordered section is until the last export, 366# can't be exited early). Not lowered to any hardware instructions. 367opcode("p_pops_gfx9_ordered_section_done") 368 369opcode("p_spill") 370opcode("p_reload") 371 372# Start/end linear vgprs. p_start_linear_vgpr can take an operand to copy from, into the linear vgpr 373opcode("p_start_linear_vgpr") 374opcode("p_end_linear_vgpr") 375 376opcode("p_end_wqm") 377opcode("p_discard_if") 378opcode("p_demote_to_helper") 379opcode("p_is_helper") 380opcode("p_exit_early_if") 381 382# simulates proper bpermute behavior using v_readlane_b32 383# definitions: result VGPR, temp EXEC, clobbered VCC 384# operands: index, input data 385opcode("p_bpermute_readlane") 386 387# simulates proper wave64 bpermute behavior using shared vgprs (for GFX10/10.3) 388# definitions: result VGPR, temp EXEC, clobbered SCC 389# operands: index * 4, input data, same half (bool) 390opcode("p_bpermute_shared_vgpr") 391 392# simulates proper wave64 bpermute behavior using v_permlane64_b32 (for GFX11+) 393# definitions: result VGPR, temp EXEC, clobbered SCC 394# operands: linear VGPR, index * 4, input data, same half (bool) 395opcode("p_bpermute_permlane") 396 397# creates a lane mask where only the first active lane is selected 398opcode("p_elect") 399 400opcode("p_constaddr") 401opcode("p_resume_shader_address") 402 403# These don't have to be pseudo-ops, but it makes optimization easier to only 404# have to consider two instructions. 405# (src0 >> (index * bits)) & ((1 << bits) - 1) with optional sign extension 406opcode("p_extract") # src1=index, src2=bits, src3=signext 407# (src0 & ((1 << bits) - 1)) << (index * bits) 408opcode("p_insert") # src1=index, src2=bits 409 410opcode("p_init_scratch") 411 412# jumps to a shader epilog 413opcode("p_jump_to_epilog") 414 415# loads and interpolates a fragment shader input with a correct exec mask 416#dst0=result, src0=linear_vgpr, src1=attribute, src2=component, src3=coord1, src4=coord2, src5=m0 417#dst0=result, src0=linear_vgpr, src1=attribute, src2=component, src3=dpp_ctrl, src4=m0 418opcode("p_interp_gfx11") 419 420# performs dual source MRTs swizzling and emits exports on GFX11 421opcode("p_dual_src_export_gfx11") 422 423# Let shader end with specific registers set to wanted value, used by multi part 424# shader to pass arguments to next part. 425opcode("p_end_with_regs") 426 427# SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc) 428SOP2 = { 429 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name 430 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_add_u32", dst(1, SCC), src(1, 1)), 431 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "s_sub_u32", dst(1, SCC), src(1, 1)), 432 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "s_add_i32", dst(1, SCC), src(1, 1)), 433 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "s_sub_i32", dst(1, SCC), src(1, 1)), 434 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "s_addc_u32", dst(1, SCC), src(1, 1, SCC)), 435 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "s_subb_u32", dst(1, SCC), src(1, 1, SCC)), 436 (0x06, 0x06, 0x06, 0x06, 0x06, 0x12, "s_min_i32", dst(1, SCC), src(1, 1)), 437 (0x07, 0x07, 0x07, 0x07, 0x07, 0x13, "s_min_u32", dst(1, SCC), src(1, 1)), 438 (0x08, 0x08, 0x08, 0x08, 0x08, 0x14, "s_max_i32", dst(1, SCC), src(1, 1)), 439 (0x09, 0x09, 0x09, 0x09, 0x09, 0x15, "s_max_u32", dst(1, SCC), src(1, 1)), 440 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x30, "s_cselect_b32", dst(1), src(1, 1, SCC)), 441 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x31, "s_cselect_b64", dst(2), src(2, 2, SCC)), 442 (0x0e, 0x0e, 0x0c, 0x0c, 0x0e, 0x16, "s_and_b32", dst(1, SCC), src(1, 1)), 443 (0x0f, 0x0f, 0x0d, 0x0d, 0x0f, 0x17, "s_and_b64", dst(2, SCC), src(2, 2)), 444 (0x10, 0x10, 0x0e, 0x0e, 0x10, 0x18, "s_or_b32", dst(1, SCC), src(1, 1)), 445 (0x11, 0x11, 0x0f, 0x0f, 0x11, 0x19, "s_or_b64", dst(2, SCC), src(2, 2)), 446 (0x12, 0x12, 0x10, 0x10, 0x12, 0x1a, "s_xor_b32", dst(1, SCC), src(1, 1)), 447 (0x13, 0x13, 0x11, 0x11, 0x13, 0x1b, "s_xor_b64", dst(2, SCC), src(2, 2)), 448 (0x14, 0x14, 0x12, 0x12, 0x14, 0x22, "s_andn2_b32", dst(1, SCC), src(1, 1)), #s_and_not1_b32 in GFX11 449 (0x15, 0x15, 0x13, 0x13, 0x15, 0x23, "s_andn2_b64", dst(2, SCC), src(2, 2)), #s_and_not1_b64 in GFX11 450 (0x16, 0x16, 0x14, 0x14, 0x16, 0x24, "s_orn2_b32", dst(1, SCC), src(1, 1)), #s_or_not1_b32 in GFX11 451 (0x17, 0x17, 0x15, 0x15, 0x17, 0x25, "s_orn2_b64", dst(2, SCC), src(2, 2)), #s_or_not1_b64 in GFX11 452 (0x18, 0x18, 0x16, 0x16, 0x18, 0x1c, "s_nand_b32", dst(1, SCC), src(1, 1)), 453 (0x19, 0x19, 0x17, 0x17, 0x19, 0x1d, "s_nand_b64", dst(2, SCC), src(2, 2)), 454 (0x1a, 0x1a, 0x18, 0x18, 0x1a, 0x1e, "s_nor_b32", dst(1, SCC), src(1, 1)), 455 (0x1b, 0x1b, 0x19, 0x19, 0x1b, 0x1f, "s_nor_b64", dst(2, SCC), src(2, 2)), 456 (0x1c, 0x1c, 0x1a, 0x1a, 0x1c, 0x20, "s_xnor_b32", dst(1, SCC), src(1, 1)), 457 (0x1d, 0x1d, 0x1b, 0x1b, 0x1d, 0x21, "s_xnor_b64", dst(2, SCC), src(2, 2)), 458 (0x1e, 0x1e, 0x1c, 0x1c, 0x1e, 0x08, "s_lshl_b32", dst(1, SCC), src(1, 1)), 459 (0x1f, 0x1f, 0x1d, 0x1d, 0x1f, 0x09, "s_lshl_b64", dst(2, SCC), src(2, 1)), 460 (0x20, 0x20, 0x1e, 0x1e, 0x20, 0x0a, "s_lshr_b32", dst(1, SCC), src(1, 1)), 461 (0x21, 0x21, 0x1f, 0x1f, 0x21, 0x0b, "s_lshr_b64", dst(2, SCC), src(2, 1)), 462 (0x22, 0x22, 0x20, 0x20, 0x22, 0x0c, "s_ashr_i32", dst(1, SCC), src(1, 1)), 463 (0x23, 0x23, 0x21, 0x21, 0x23, 0x0d, "s_ashr_i64", dst(2, SCC), src(2, 1)), 464 (0x24, 0x24, 0x22, 0x22, 0x24, 0x2a, "s_bfm_b32", dst(1), src(1, 1)), 465 (0x25, 0x25, 0x23, 0x23, 0x25, 0x2b, "s_bfm_b64", dst(2), src(1, 1)), 466 (0x26, 0x26, 0x24, 0x24, 0x26, 0x2c, "s_mul_i32", dst(1), src(1, 1)), 467 (0x27, 0x27, 0x25, 0x25, 0x27, 0x26, "s_bfe_u32", dst(1, SCC), src(1, 1)), 468 (0x28, 0x28, 0x26, 0x26, 0x28, 0x27, "s_bfe_i32", dst(1, SCC), src(1, 1)), 469 (0x29, 0x29, 0x27, 0x27, 0x29, 0x28, "s_bfe_u64", dst(2, SCC), src(2, 1)), 470 (0x2a, 0x2a, 0x28, 0x28, 0x2a, 0x29, "s_bfe_i64", dst(2, SCC), src(2, 1)), 471 (0x2b, 0x2b, 0x29, 0x29, -1, -1, "s_cbranch_g_fork", dst(), src(), InstrClass.Branch), 472 (0x2c, 0x2c, 0x2a, 0x2a, 0x2c, 0x06, "s_absdiff_i32", dst(1, SCC), src(1, 1)), 473 ( -1, -1, 0x2b, 0x2b, -1, -1, "s_rfe_restore_b64", dst(), src(), InstrClass.Branch), 474 ( -1, -1, -1, 0x2e, 0x2e, 0x0e, "s_lshl1_add_u32", dst(1, SCC), src(1, 1)), 475 ( -1, -1, -1, 0x2f, 0x2f, 0x0f, "s_lshl2_add_u32", dst(1, SCC), src(1, 1)), 476 ( -1, -1, -1, 0x30, 0x30, 0x10, "s_lshl3_add_u32", dst(1, SCC), src(1, 1)), 477 ( -1, -1, -1, 0x31, 0x31, 0x11, "s_lshl4_add_u32", dst(1, SCC), src(1, 1)), 478 ( -1, -1, -1, 0x32, 0x32, 0x32, "s_pack_ll_b32_b16", dst(1), src(1, 1)), 479 ( -1, -1, -1, 0x33, 0x33, 0x33, "s_pack_lh_b32_b16", dst(1), src(1, 1)), 480 ( -1, -1, -1, 0x34, 0x34, 0x34, "s_pack_hh_b32_b16", dst(1), src(1, 1)), 481 ( -1, -1, -1, -1, -1, 0x35, "s_pack_hl_b32_b16", dst(1), src(1, 1)), 482 ( -1, -1, -1, 0x2c, 0x35, 0x2d, "s_mul_hi_u32", dst(1), src(1, 1)), 483 ( -1, -1, -1, 0x2d, 0x36, 0x2e, "s_mul_hi_i32", dst(1), src(1, 1)), 484 # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP2. 485 ( -1, -1, -1, -1, -1, -1, "p_constaddr_addlo", dst(1, SCC), src(1, 1, 1)), 486 ( -1, -1, -1, -1, -1, -1, "p_resumeaddr_addlo", dst(1, SCC), src(1, 1, 1)), 487} 488for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, defs, ops, cls) in default_class(SOP2, InstrClass.Salu): 489 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOP2, cls, definitions = defs, operands = ops) 490 491 492# SOPK instructions: 0 input (+ imm), 1 output + optional scc 493SOPK = { 494 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name 495 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_movk_i32", dst(1), src()), 496 ( -1, -1, -1, -1, 0x01, 0x01, "s_version", dst(), src()), 497 (0x02, 0x02, 0x01, 0x01, 0x02, 0x02, "s_cmovk_i32", dst(1), src(1, SCC)), 498 (0x03, 0x03, 0x02, 0x02, 0x03, 0x03, "s_cmpk_eq_i32", dst(SCC), src(1)), 499 (0x04, 0x04, 0x03, 0x03, 0x04, 0x04, "s_cmpk_lg_i32", dst(SCC), src(1)), 500 (0x05, 0x05, 0x04, 0x04, 0x05, 0x05, "s_cmpk_gt_i32", dst(SCC), src(1)), 501 (0x06, 0x06, 0x05, 0x05, 0x06, 0x06, "s_cmpk_ge_i32", dst(SCC), src(1)), 502 (0x07, 0x07, 0x06, 0x06, 0x07, 0x07, "s_cmpk_lt_i32", dst(SCC), src(1)), 503 (0x08, 0x08, 0x07, 0x07, 0x08, 0x08, "s_cmpk_le_i32", dst(SCC), src(1)), 504 (0x09, 0x09, 0x08, 0x08, 0x09, 0x09, "s_cmpk_eq_u32", dst(SCC), src(1)), 505 (0x0a, 0x0a, 0x09, 0x09, 0x0a, 0x0a, "s_cmpk_lg_u32", dst(SCC), src(1)), 506 (0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b, "s_cmpk_gt_u32", dst(SCC), src(1)), 507 (0x0c, 0x0c, 0x0b, 0x0b, 0x0c, 0x0c, "s_cmpk_ge_u32", dst(SCC), src(1)), 508 (0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d, "s_cmpk_lt_u32", dst(SCC), src(1)), 509 (0x0e, 0x0e, 0x0d, 0x0d, 0x0e, 0x0e, "s_cmpk_le_u32", dst(SCC), src(1)), 510 (0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f, "s_addk_i32", dst(1, SCC), src(1)), 511 (0x10, 0x10, 0x0f, 0x0f, 0x10, 0x10, "s_mulk_i32", dst(1), src(1)), 512 (0x11, 0x11, 0x10, 0x10, -1, -1, "s_cbranch_i_fork", dst(), src(), InstrClass.Branch), 513 (0x12, 0x12, 0x11, 0x11, 0x12, 0x11, "s_getreg_b32", dst(1), src()), 514 (0x13, 0x13, 0x12, 0x12, 0x13, 0x12, "s_setreg_b32", dst(), src(1)), 515 (0x15, 0x15, 0x14, 0x14, 0x15, 0x13, "s_setreg_imm32_b32", dst(), src(1)), # requires 32bit literal 516 ( -1, -1, 0x15, 0x15, 0x16, 0x14, "s_call_b64", dst(2), src(), InstrClass.Branch), 517 ( -1, -1, -1, -1, 0x17, 0x18, "s_waitcnt_vscnt", dst(), src(1), InstrClass.Waitcnt), 518 ( -1, -1, -1, -1, 0x18, 0x19, "s_waitcnt_vmcnt", dst(), src(1), InstrClass.Waitcnt), 519 ( -1, -1, -1, -1, 0x19, 0x1a, "s_waitcnt_expcnt", dst(), src(1), InstrClass.Waitcnt), 520 ( -1, -1, -1, -1, 0x1a, 0x1b, "s_waitcnt_lgkmcnt", dst(), src(1), InstrClass.Waitcnt), 521 ( -1, -1, -1, -1, 0x1b, 0x16, "s_subvector_loop_begin", dst(), src(), InstrClass.Branch), 522 ( -1, -1, -1, -1, 0x1c, 0x17, "s_subvector_loop_end", dst(), src(), InstrClass.Branch), 523} 524for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, defs, ops, cls) in default_class(SOPK, InstrClass.Salu): 525 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOPK, cls, definitions = defs, operands = ops) 526 527 528# SOP1 instructions: 1 input, 1 output (+optional SCC) 529SOP1 = { 530 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name 531 (0x03, 0x03, 0x00, 0x00, 0x03, 0x00, "s_mov_b32", dst(1), src(1)), 532 (0x04, 0x04, 0x01, 0x01, 0x04, 0x01, "s_mov_b64", dst(2), src(2)), 533 (0x05, 0x05, 0x02, 0x02, 0x05, 0x02, "s_cmov_b32", dst(1), src(1, 1, SCC)), 534 (0x06, 0x06, 0x03, 0x03, 0x06, 0x03, "s_cmov_b64", dst(2), src(2, 2, SCC)), 535 (0x07, 0x07, 0x04, 0x04, 0x07, 0x1e, "s_not_b32", dst(1, SCC), src(1)), 536 (0x08, 0x08, 0x05, 0x05, 0x08, 0x1f, "s_not_b64", dst(2, SCC), src(2)), 537 (0x09, 0x09, 0x06, 0x06, 0x09, 0x1c, "s_wqm_b32", dst(1, SCC), src(1)), 538 (0x0a, 0x0a, 0x07, 0x07, 0x0a, 0x1d, "s_wqm_b64", dst(2, SCC), src(2)), 539 (0x0b, 0x0b, 0x08, 0x08, 0x0b, 0x04, "s_brev_b32", dst(1), src(1)), 540 (0x0c, 0x0c, 0x09, 0x09, 0x0c, 0x05, "s_brev_b64", dst(2), src(2)), 541 (0x0d, 0x0d, 0x0a, 0x0a, 0x0d, 0x16, "s_bcnt0_i32_b32", dst(1, SCC), src(1)), 542 (0x0e, 0x0e, 0x0b, 0x0b, 0x0e, 0x17, "s_bcnt0_i32_b64", dst(1, SCC), src(2)), 543 (0x0f, 0x0f, 0x0c, 0x0c, 0x0f, 0x18, "s_bcnt1_i32_b32", dst(1, SCC), src(1)), 544 (0x10, 0x10, 0x0d, 0x0d, 0x10, 0x19, "s_bcnt1_i32_b64", dst(1, SCC), src(2)), 545 (0x11, 0x11, 0x0e, 0x0e, 0x11, -1, "s_ff0_i32_b32", dst(1), src(1)), 546 (0x12, 0x12, 0x0f, 0x0f, 0x12, -1, "s_ff0_i32_b64", dst(1), src(2)), 547 (0x13, 0x13, 0x10, 0x10, 0x13, 0x08, "s_ff1_i32_b32", dst(1), src(1)), #s_ctz_i32_b32 in GFX11 548 (0x14, 0x14, 0x11, 0x11, 0x14, 0x09, "s_ff1_i32_b64", dst(1), src(2)), #s_ctz_i32_b64 in GFX11 549 (0x15, 0x15, 0x12, 0x12, 0x15, 0x0a, "s_flbit_i32_b32", dst(1), src(1)), #s_clz_i32_u32 in GFX11 550 (0x16, 0x16, 0x13, 0x13, 0x16, 0x0b, "s_flbit_i32_b64", dst(1), src(2)), #s_clz_i32_u64 in GFX11 551 (0x17, 0x17, 0x14, 0x14, 0x17, 0x0c, "s_flbit_i32", dst(1), src(1)), #s_cls_i32 in GFX11 552 (0x18, 0x18, 0x15, 0x15, 0x18, 0x0d, "s_flbit_i32_i64", dst(1), src(2)), #s_cls_i32_i64 in GFX11 553 (0x19, 0x19, 0x16, 0x16, 0x19, 0x0e, "s_sext_i32_i8", dst(1), src(1)), 554 (0x1a, 0x1a, 0x17, 0x17, 0x1a, 0x0f, "s_sext_i32_i16", dst(1), src(1)), 555 (0x1b, 0x1b, 0x18, 0x18, 0x1b, 0x10, "s_bitset0_b32", dst(1), src(1, 1)), 556 (0x1c, 0x1c, 0x19, 0x19, 0x1c, 0x11, "s_bitset0_b64", dst(2), src(1, 2)), 557 (0x1d, 0x1d, 0x1a, 0x1a, 0x1d, 0x12, "s_bitset1_b32", dst(1), src(1, 1)), 558 (0x1e, 0x1e, 0x1b, 0x1b, 0x1e, 0x13, "s_bitset1_b64", dst(2), src(1, 2)), 559 (0x1f, 0x1f, 0x1c, 0x1c, 0x1f, 0x47, "s_getpc_b64", dst(2), src()), 560 (0x20, 0x20, 0x1d, 0x1d, 0x20, 0x48, "s_setpc_b64", dst(), src(2), InstrClass.Branch), 561 (0x21, 0x21, 0x1e, 0x1e, 0x21, 0x49, "s_swappc_b64", dst(2), src(2), InstrClass.Branch), 562 (0x22, 0x22, 0x1f, 0x1f, 0x22, 0x4a, "s_rfe_b64", dst(), src(2), InstrClass.Branch), 563 (0x24, 0x24, 0x20, 0x20, 0x24, 0x21, "s_and_saveexec_b64", dst(2, SCC, EXEC), src(2, EXEC)), 564 (0x25, 0x25, 0x21, 0x21, 0x25, 0x23, "s_or_saveexec_b64", dst(2, SCC, EXEC), src(2, EXEC)), 565 (0x26, 0x26, 0x22, 0x22, 0x26, 0x25, "s_xor_saveexec_b64", dst(2, SCC, EXEC), src(2, EXEC)), 566 (0x27, 0x27, 0x23, 0x23, 0x27, 0x31, "s_andn2_saveexec_b64", dst(2, SCC, EXEC), src(2, EXEC)), #s_and_not1_saveexec_b64 in GFX11 567 (0x28, 0x28, 0x24, 0x24, 0x28, 0x33, "s_orn2_saveexec_b64", dst(2, SCC, EXEC), src(2, EXEC)), #s_or_not1_saveexec_b64 in GFX11 568 (0x29, 0x29, 0x25, 0x25, 0x29, 0x27, "s_nand_saveexec_b64", dst(2, SCC, EXEC), src(2, EXEC)), 569 (0x2a, 0x2a, 0x26, 0x26, 0x2a, 0x29, "s_nor_saveexec_b64", dst(2, SCC, EXEC), src(2, EXEC)), 570 (0x2b, 0x2b, 0x27, 0x27, 0x2b, 0x2b, "s_xnor_saveexec_b64", dst(2, SCC, EXEC), src(2, EXEC)), 571 (0x2c, 0x2c, 0x28, 0x28, 0x2c, 0x1a, "s_quadmask_b32", dst(1, SCC), src(1)), 572 (0x2d, 0x2d, 0x29, 0x29, 0x2d, 0x1b, "s_quadmask_b64", dst(2, SCC), src(2)), # Always writes 0 to the second SGPR 573 (0x2e, 0x2e, 0x2a, 0x2a, 0x2e, 0x40, "s_movrels_b32", dst(1), src(1, M0)), 574 (0x2f, 0x2f, 0x2b, 0x2b, 0x2f, 0x41, "s_movrels_b64", dst(2), src(2, M0)), 575 (0x30, 0x30, 0x2c, 0x2c, 0x30, 0x42, "s_movreld_b32", dst(1), src(1, M0)), 576 (0x31, 0x31, 0x2d, 0x2d, 0x31, 0x43, "s_movreld_b64", dst(2), src(2, M0)), 577 (0x32, 0x32, 0x2e, 0x2e, -1, -1, "s_cbranch_join", dst(), src(), InstrClass.Branch), 578 (0x34, 0x34, 0x30, 0x30, 0x34, 0x15, "s_abs_i32", dst(1, SCC), src(1)), 579 (0x35, 0x35, -1, -1, 0x35, -1, "s_mov_fed_b32", dst(), src()), 580 ( -1, -1, 0x32, 0x32, -1, -1, "s_set_gpr_idx_idx", dst(M0), src(1, M0)), 581 ( -1, -1, -1, 0x33, 0x37, 0x2d, "s_andn1_saveexec_b64", dst(2, SCC, EXEC), src(2, EXEC)), #s_and_not0_savexec_b64 in GFX11 582 ( -1, -1, -1, 0x34, 0x38, 0x2f, "s_orn1_saveexec_b64", dst(2, SCC, EXEC), src(2, EXEC)), #s_or_not0_savexec_b64 in GFX11 583 ( -1, -1, -1, 0x35, 0x39, 0x35, "s_andn1_wrexec_b64", dst(2, SCC, EXEC), src(2, EXEC)), #s_and_not0_wrexec_b64 in GFX11 584 ( -1, -1, -1, 0x36, 0x3a, 0x37, "s_andn2_wrexec_b64", dst(2, SCC, EXEC), src(2, EXEC)), #s_and_not1_wrexec_b64 in GFX11 585 ( -1, -1, -1, 0x37, 0x3b, 0x14, "s_bitreplicate_b64_b32", dst(2), src(1)), 586 ( -1, -1, -1, -1, 0x3c, 0x20, "s_and_saveexec_b32", dst(1, SCC, EXEC_LO), src(1, EXEC_LO)), 587 ( -1, -1, -1, -1, 0x3d, 0x22, "s_or_saveexec_b32", dst(1, SCC, EXEC_LO), src(1, EXEC_LO)), 588 ( -1, -1, -1, -1, 0x3e, 0x24, "s_xor_saveexec_b32", dst(1, SCC, EXEC_LO), src(1, EXEC_LO)), 589 ( -1, -1, -1, -1, 0x3f, 0x30, "s_andn2_saveexec_b32", dst(1, SCC, EXEC_LO), src(1, EXEC_LO)), #s_and_not1_saveexec_b32 in GFX11 590 ( -1, -1, -1, -1, 0x40, 0x32, "s_orn2_saveexec_b32", dst(1, SCC, EXEC_LO), src(1, EXEC_LO)), #s_or_not1_saveexec_b32 in GFX11 591 ( -1, -1, -1, -1, 0x41, 0x26, "s_nand_saveexec_b32", dst(1, SCC, EXEC_LO), src(1, EXEC_LO)), 592 ( -1, -1, -1, -1, 0x42, 0x28, "s_nor_saveexec_b32", dst(1, SCC, EXEC_LO), src(1, EXEC_LO)), 593 ( -1, -1, -1, -1, 0x43, 0x2a, "s_xnor_saveexec_b32", dst(1, SCC, EXEC_LO), src(1, EXEC_LO)), 594 ( -1, -1, -1, -1, 0x44, 0x2c, "s_andn1_saveexec_b32", dst(1, SCC, EXEC_LO), src(1, EXEC_LO)), #s_and_not0_savexec_b32 in GFX11 595 ( -1, -1, -1, -1, 0x45, 0x2e, "s_orn1_saveexec_b32", dst(1, SCC, EXEC_LO), src(1, EXEC_LO)), #s_or_not0_savexec_b32 in GFX11 596 ( -1, -1, -1, -1, 0x46, 0x34, "s_andn1_wrexec_b32", dst(1, SCC, EXEC_LO), src(1, EXEC_LO)), #s_and_not0_wrexec_b32 in GFX11 597 ( -1, -1, -1, -1, 0x47, 0x36, "s_andn2_wrexec_b32", dst(1, SCC, EXEC_LO), src(1, EXEC_LO)), #s_and_not1_wrexec_b32 in GFX11 598 ( -1, -1, -1, -1, 0x49, 0x44, "s_movrelsd_2_b32", dst(1), src(1, M0)), 599 ( -1, -1, -1, -1, -1, 0x4c, "s_sendmsg_rtn_b32", dst(1), src(1)), 600 ( -1, -1, -1, -1, -1, 0x4d, "s_sendmsg_rtn_b64", dst(2), src(1)), 601 # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP1. 602 ( -1, -1, -1, -1, -1, -1, "p_constaddr_getpc", dst(2), src(1)), 603 ( -1, -1, -1, -1, -1, -1, "p_resumeaddr_getpc", dst(2), src(1)), 604 ( -1, -1, -1, -1, -1, -1, "p_load_symbol", dst(1), src(1)), 605} 606for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, defs, ops, cls) in default_class(SOP1, InstrClass.Salu): 607 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOP1, cls, definitions = defs, operands = ops) 608 609 610# SOPC instructions: 2 inputs and 0 outputs (+SCC) 611SOPC = { 612 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name 613 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_cmp_eq_i32", dst(SCC), src(1, 1)), 614 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "s_cmp_lg_i32", dst(SCC), src(1, 1)), 615 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "s_cmp_gt_i32", dst(SCC), src(1, 1)), 616 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "s_cmp_ge_i32", dst(SCC), src(1, 1)), 617 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "s_cmp_lt_i32", dst(SCC), src(1, 1)), 618 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "s_cmp_le_i32", dst(SCC), src(1, 1)), 619 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "s_cmp_eq_u32", dst(SCC), src(1, 1)), 620 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "s_cmp_lg_u32", dst(SCC), src(1, 1)), 621 (0x08, 0x08, 0x08, 0x08, 0x08, 0x08, "s_cmp_gt_u32", dst(SCC), src(1, 1)), 622 (0x09, 0x09, 0x09, 0x09, 0x09, 0x09, "s_cmp_ge_u32", dst(SCC), src(1, 1)), 623 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cmp_lt_u32", dst(SCC), src(1, 1)), 624 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cmp_le_u32", dst(SCC), src(1, 1)), 625 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_bitcmp0_b32", dst(SCC), src(1, 1)), 626 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_bitcmp1_b32", dst(SCC), src(1, 1)), 627 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_bitcmp0_b64", dst(SCC), src(2, 1)), 628 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_bitcmp1_b64", dst(SCC), src(2, 1)), 629 (0x10, 0x10, 0x10, 0x10, -1, -1, "s_setvskip", dst(), src(1, 1)), 630 ( -1, -1, 0x11, 0x11, -1, -1, "s_set_gpr_idx_on", dst(M0), src(1, 1, M0)), 631 ( -1, -1, 0x12, 0x12, 0x12, 0x10, "s_cmp_eq_u64", dst(SCC), src(2, 2)), 632 ( -1, -1, 0x13, 0x13, 0x13, 0x11, "s_cmp_lg_u64", dst(SCC), src(2, 2)), 633} 634for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, defs, ops) in SOPC: 635 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOPC, InstrClass.Salu, definitions = defs, operands = ops) 636 637 638# SOPP instructions: 0 inputs (+optional scc/vcc), 0 outputs 639SOPP = { 640 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name 641 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_nop", dst(), src()), 642 (0x01, 0x01, 0x01, 0x01, 0x01, 0x30, "s_endpgm", dst(), src()), 643 (0x02, 0x02, 0x02, 0x02, 0x02, 0x20, "s_branch", dst(), src(), InstrClass.Branch), 644 ( -1, -1, 0x03, 0x03, 0x03, 0x34, "s_wakeup", dst(), src()), 645 (0x04, 0x04, 0x04, 0x04, 0x04, 0x21, "s_cbranch_scc0", dst(), src(), InstrClass.Branch), 646 (0x05, 0x05, 0x05, 0x05, 0x05, 0x22, "s_cbranch_scc1", dst(), src(), InstrClass.Branch), 647 (0x06, 0x06, 0x06, 0x06, 0x06, 0x23, "s_cbranch_vccz", dst(), src(), InstrClass.Branch), 648 (0x07, 0x07, 0x07, 0x07, 0x07, 0x24, "s_cbranch_vccnz", dst(), src(), InstrClass.Branch), 649 (0x08, 0x08, 0x08, 0x08, 0x08, 0x25, "s_cbranch_execz", dst(), src(), InstrClass.Branch), 650 (0x09, 0x09, 0x09, 0x09, 0x09, 0x26, "s_cbranch_execnz", dst(), src(), InstrClass.Branch), 651 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x3d, "s_barrier", dst(), src(), InstrClass.Barrier), 652 ( -1, 0x0b, 0x0b, 0x0b, 0x0b, 0x01, "s_setkill", dst(), src()), 653 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x09, "s_waitcnt", dst(), src(), InstrClass.Waitcnt), 654 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x02, "s_sethalt", dst(), src()), 655 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x03, "s_sleep", dst(), src()), 656 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x35, "s_setprio", dst(), src()), 657 (0x10, 0x10, 0x10, 0x10, 0x10, 0x36, "s_sendmsg", dst(), src(), InstrClass.Sendmsg), 658 (0x11, 0x11, 0x11, 0x11, 0x11, 0x37, "s_sendmsghalt", dst(), src(), InstrClass.Sendmsg), 659 (0x12, 0x12, 0x12, 0x12, 0x12, 0x10, "s_trap", dst(), src(), InstrClass.Branch), 660 (0x13, 0x13, 0x13, 0x13, 0x13, 0x3c, "s_icache_inv", dst(), src()), 661 (0x14, 0x14, 0x14, 0x14, 0x14, 0x38, "s_incperflevel", dst(), src()), 662 (0x15, 0x15, 0x15, 0x15, 0x15, 0x39, "s_decperflevel", dst(), src()), 663 (0x16, 0x16, 0x16, 0x16, 0x16, 0x3a, "s_ttracedata", dst(), src(M0)), 664 ( -1, 0x17, 0x17, 0x17, 0x17, 0x27, "s_cbranch_cdbgsys", dst(), src(), InstrClass.Branch), 665 ( -1, 0x18, 0x18, 0x18, 0x18, 0x28, "s_cbranch_cdbguser", dst(), src(), InstrClass.Branch), 666 ( -1, 0x19, 0x19, 0x19, 0x19, 0x29, "s_cbranch_cdbgsys_or_user", dst(), src(), InstrClass.Branch), 667 ( -1, 0x1a, 0x1a, 0x1a, 0x1a, 0x2a, "s_cbranch_cdbgsys_and_user", dst(), src(), InstrClass.Branch), 668 ( -1, -1, 0x1b, 0x1b, 0x1b, 0x31, "s_endpgm_saved", dst(), src()), 669 ( -1, -1, 0x1c, 0x1c, -1, -1, "s_set_gpr_idx_off", dst(), src()), 670 ( -1, -1, 0x1d, 0x1d, -1, -1, "s_set_gpr_idx_mode", dst(M0), src(M0)), 671 ( -1, -1, -1, 0x1e, 0x1e, -1, "s_endpgm_ordered_ps_done", dst(), src()), 672 ( -1, -1, -1, -1, 0x1f, 0x1f, "s_code_end", dst(), src()), 673 ( -1, -1, -1, -1, 0x20, 0x04, "s_inst_prefetch", dst(), src()), #s_set_inst_prefetch_distance in GFX11 674 ( -1, -1, -1, -1, 0x21, 0x05, "s_clause", dst(), src()), 675 ( -1, -1, -1, -1, 0x22, 0x0a, "s_wait_idle", dst(), src(), InstrClass.Waitcnt), 676 ( -1, -1, -1, -1, 0x23, 0x08, "s_waitcnt_depctr", dst(), src(), InstrClass.Waitcnt), 677 ( -1, -1, -1, -1, 0x24, 0x11, "s_round_mode", dst(), src()), 678 ( -1, -1, -1, -1, 0x25, 0x12, "s_denorm_mode", dst(), src()), 679 ( -1, -1, -1, -1, 0x26, 0x3b, "s_ttracedata_imm", dst(), src()), 680 ( -1, -1, -1, -1, -1, 0x07, "s_delay_alu", dst(), src(), InstrClass.Waitcnt), 681 ( -1, -1, -1, -1, -1, 0x0b, "s_wait_event", dst(), src()), 682} 683for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, defs, ops, cls) in default_class(SOPP, InstrClass.Salu): 684 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SOPP, cls, definitions = defs, operands = ops) 685 686 687# SMEM instructions: sbase input (2 sgpr), potentially 2 offset inputs, 1 sdata input/output 688# Unlike GFX10, GFX10.3 does not have SMEM store, atomic or scratch instructions 689SMEM = { 690 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name 691 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "s_load_dword"), #s_load_b32 in GFX11 692 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "s_load_dwordx2"), #s_load_b64 in GFX11 693 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "s_load_dwordx4"), #s_load_b128 in GFX11 694 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "s_load_dwordx8"), #s_load_b256 in GFX11 695 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "s_load_dwordx16"), #s_load_b512 in GFX11 696 ( -1, -1, -1, 0x05, 0x05, -1, "s_scratch_load_dword"), 697 ( -1, -1, -1, 0x06, 0x06, -1, "s_scratch_load_dwordx2"), 698 ( -1, -1, -1, 0x07, 0x07, -1, "s_scratch_load_dwordx4"), 699 (0x08, 0x08, 0x08, 0x08, 0x08, 0x08, "s_buffer_load_dword"), #s_buffer_load_b32 in GFX11 700 (0x09, 0x09, 0x09, 0x09, 0x09, 0x09, "s_buffer_load_dwordx2"), #s_buffer_load_b64 in GFX11 701 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_buffer_load_dwordx4"), #s_buffer_load_b128 in GFX11 702 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_buffer_load_dwordx8"), #s_buffer_load_b256 in GFX11 703 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_buffer_load_dwordx16"), #s_buffer_load_b512 in GFX11 704 ( -1, -1, 0x10, 0x10, 0x10, -1, "s_store_dword"), 705 ( -1, -1, 0x11, 0x11, 0x11, -1, "s_store_dwordx2"), 706 ( -1, -1, 0x12, 0x12, 0x12, -1, "s_store_dwordx4"), 707 ( -1, -1, -1, 0x15, 0x15, -1, "s_scratch_store_dword"), 708 ( -1, -1, -1, 0x16, 0x16, -1, "s_scratch_store_dwordx2"), 709 ( -1, -1, -1, 0x17, 0x17, -1, "s_scratch_store_dwordx4"), 710 ( -1, -1, 0x18, 0x18, 0x18, -1, "s_buffer_store_dword"), 711 ( -1, -1, 0x19, 0x19, 0x19, -1, "s_buffer_store_dwordx2"), 712 ( -1, -1, 0x1a, 0x1a, 0x1a, -1, "s_buffer_store_dwordx4"), 713 ( -1, -1, 0x1f, 0x1f, 0x1f, 0x20, "s_gl1_inv"), 714 (0x1f, 0x1f, 0x20, 0x20, 0x20, 0x21, "s_dcache_inv"), 715 ( -1, -1, 0x21, 0x21, 0x21, -1, "s_dcache_wb"), 716 ( -1, 0x1d, 0x22, 0x22, -1, -1, "s_dcache_inv_vol"), 717 ( -1, -1, 0x23, 0x23, -1, -1, "s_dcache_wb_vol"), 718 (0x1e, 0x1e, 0x24, 0x24, 0x24, -1, "s_memtime"), #GFX6-GFX10 719 ( -1, -1, 0x25, 0x25, 0x25, -1, "s_memrealtime"), 720 ( -1, -1, 0x26, 0x26, 0x26, 0x22, "s_atc_probe"), 721 ( -1, -1, 0x27, 0x27, 0x27, 0x23, "s_atc_probe_buffer"), 722 ( -1, -1, -1, 0x28, 0x28, -1, "s_dcache_discard"), 723 ( -1, -1, -1, 0x29, 0x29, -1, "s_dcache_discard_x2"), 724 ( -1, -1, -1, -1, 0x2a, -1, "s_get_waveid_in_workgroup"), 725 ( -1, -1, -1, 0x40, 0x40, -1, "s_buffer_atomic_swap"), 726 ( -1, -1, -1, 0x41, 0x41, -1, "s_buffer_atomic_cmpswap"), 727 ( -1, -1, -1, 0x42, 0x42, -1, "s_buffer_atomic_add"), 728 ( -1, -1, -1, 0x43, 0x43, -1, "s_buffer_atomic_sub"), 729 ( -1, -1, -1, 0x44, 0x44, -1, "s_buffer_atomic_smin"), 730 ( -1, -1, -1, 0x45, 0x45, -1, "s_buffer_atomic_umin"), 731 ( -1, -1, -1, 0x46, 0x46, -1, "s_buffer_atomic_smax"), 732 ( -1, -1, -1, 0x47, 0x47, -1, "s_buffer_atomic_umax"), 733 ( -1, -1, -1, 0x48, 0x48, -1, "s_buffer_atomic_and"), 734 ( -1, -1, -1, 0x49, 0x49, -1, "s_buffer_atomic_or"), 735 ( -1, -1, -1, 0x4a, 0x4a, -1, "s_buffer_atomic_xor"), 736 ( -1, -1, -1, 0x4b, 0x4b, -1, "s_buffer_atomic_inc"), 737 ( -1, -1, -1, 0x4c, 0x4c, -1, "s_buffer_atomic_dec"), 738 ( -1, -1, -1, 0x60, 0x60, -1, "s_buffer_atomic_swap_x2"), 739 ( -1, -1, -1, 0x61, 0x61, -1, "s_buffer_atomic_cmpswap_x2"), 740 ( -1, -1, -1, 0x62, 0x62, -1, "s_buffer_atomic_add_x2"), 741 ( -1, -1, -1, 0x63, 0x63, -1, "s_buffer_atomic_sub_x2"), 742 ( -1, -1, -1, 0x64, 0x64, -1, "s_buffer_atomic_smin_x2"), 743 ( -1, -1, -1, 0x65, 0x65, -1, "s_buffer_atomic_umin_x2"), 744 ( -1, -1, -1, 0x66, 0x66, -1, "s_buffer_atomic_smax_x2"), 745 ( -1, -1, -1, 0x67, 0x67, -1, "s_buffer_atomic_umax_x2"), 746 ( -1, -1, -1, 0x68, 0x68, -1, "s_buffer_atomic_and_x2"), 747 ( -1, -1, -1, 0x69, 0x69, -1, "s_buffer_atomic_or_x2"), 748 ( -1, -1, -1, 0x6a, 0x6a, -1, "s_buffer_atomic_xor_x2"), 749 ( -1, -1, -1, 0x6b, 0x6b, -1, "s_buffer_atomic_inc_x2"), 750 ( -1, -1, -1, 0x6c, 0x6c, -1, "s_buffer_atomic_dec_x2"), 751 ( -1, -1, -1, 0x80, 0x80, -1, "s_atomic_swap"), 752 ( -1, -1, -1, 0x81, 0x81, -1, "s_atomic_cmpswap"), 753 ( -1, -1, -1, 0x82, 0x82, -1, "s_atomic_add"), 754 ( -1, -1, -1, 0x83, 0x83, -1, "s_atomic_sub"), 755 ( -1, -1, -1, 0x84, 0x84, -1, "s_atomic_smin"), 756 ( -1, -1, -1, 0x85, 0x85, -1, "s_atomic_umin"), 757 ( -1, -1, -1, 0x86, 0x86, -1, "s_atomic_smax"), 758 ( -1, -1, -1, 0x87, 0x87, -1, "s_atomic_umax"), 759 ( -1, -1, -1, 0x88, 0x88, -1, "s_atomic_and"), 760 ( -1, -1, -1, 0x89, 0x89, -1, "s_atomic_or"), 761 ( -1, -1, -1, 0x8a, 0x8a, -1, "s_atomic_xor"), 762 ( -1, -1, -1, 0x8b, 0x8b, -1, "s_atomic_inc"), 763 ( -1, -1, -1, 0x8c, 0x8c, -1, "s_atomic_dec"), 764 ( -1, -1, -1, 0xa0, 0xa0, -1, "s_atomic_swap_x2"), 765 ( -1, -1, -1, 0xa1, 0xa1, -1, "s_atomic_cmpswap_x2"), 766 ( -1, -1, -1, 0xa2, 0xa2, -1, "s_atomic_add_x2"), 767 ( -1, -1, -1, 0xa3, 0xa3, -1, "s_atomic_sub_x2"), 768 ( -1, -1, -1, 0xa4, 0xa4, -1, "s_atomic_smin_x2"), 769 ( -1, -1, -1, 0xa5, 0xa5, -1, "s_atomic_umin_x2"), 770 ( -1, -1, -1, 0xa6, 0xa6, -1, "s_atomic_smax_x2"), 771 ( -1, -1, -1, 0xa7, 0xa7, -1, "s_atomic_umax_x2"), 772 ( -1, -1, -1, 0xa8, 0xa8, -1, "s_atomic_and_x2"), 773 ( -1, -1, -1, 0xa9, 0xa9, -1, "s_atomic_or_x2"), 774 ( -1, -1, -1, 0xaa, 0xaa, -1, "s_atomic_xor_x2"), 775 ( -1, -1, -1, 0xab, 0xab, -1, "s_atomic_inc_x2"), 776 ( -1, -1, -1, 0xac, 0xac, -1, "s_atomic_dec_x2"), 777} 778for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in SMEM: 779 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.SMEM, InstrClass.SMem, is_atomic = "atomic" in name) 780 781 782# VOP2 instructions: 2 inputs, 1 output (+ optional vcc) 783# TODO: misses some GFX6_7 opcodes which were shifted to VOP3 in GFX8 784VOP2 = { 785 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name, input modifiers, output modifiers 786 (0x00, 0x00, 0x00, 0x00, 0x01, 0x01, "v_cndmask_b32", True, False, dst(1), src(1, 1, VCC)), 787 (0x01, 0x01, -1, -1, -1, -1, "v_readlane_b32", False, False, dst(1), src(1, 1)), 788 (0x02, 0x02, -1, -1, -1, -1, "v_writelane_b32", False, False, dst(1), src(1, 1, 1)), 789 (0x03, 0x03, 0x01, 0x01, 0x03, 0x03, "v_add_f32", True, True, dst(1), src(1, 1)), 790 (0x04, 0x04, 0x02, 0x02, 0x04, 0x04, "v_sub_f32", True, True, dst(1), src(1, 1)), 791 (0x05, 0x05, 0x03, 0x03, 0x05, 0x05, "v_subrev_f32", True, True, dst(1), src(1, 1)), 792 (0x06, 0x06, -1, -1, 0x06, -1, "v_mac_legacy_f32", True, True, dst(1), src(1, 1, 1)), #GFX6,7,10 793 ( -1, -1, -1, -1, 0x06, 0x06, "v_fmac_legacy_f32", True, True, dst(1), src(1, 1, 1)), #GFX10.3+, v_fmac_dx9_zero_f32 in GFX11 794 (0x07, 0x07, 0x04, 0x04, 0x07, 0x07, "v_mul_legacy_f32", True, True, dst(1), src(1, 1)), #v_mul_dx9_zero_f32 in GFX11 795 (0x08, 0x08, 0x05, 0x05, 0x08, 0x08, "v_mul_f32", True, True, dst(1), src(1, 1)), 796 (0x09, 0x09, 0x06, 0x06, 0x09, 0x09, "v_mul_i32_i24", False, False, dst(1), src(1, 1)), 797 (0x0a, 0x0a, 0x07, 0x07, 0x0a, 0x0a, "v_mul_hi_i32_i24", False, False, dst(1), src(1, 1)), 798 (0x0b, 0x0b, 0x08, 0x08, 0x0b, 0x0b, "v_mul_u32_u24", False, False, dst(1), src(1, 1)), 799 (0x0c, 0x0c, 0x09, 0x09, 0x0c, 0x0c, "v_mul_hi_u32_u24", False, False, dst(1), src(1, 1)), 800 ( -1, -1, -1, 0x39, 0x0d, -1, "v_dot4c_i32_i8", False, False, dst(1), src(1, 1, 1)), 801 (0x0d, 0x0d, -1, -1, -1, -1, "v_min_legacy_f32", True, True, dst(1), src(1, 1)), 802 (0x0e, 0x0e, -1, -1, -1, -1, "v_max_legacy_f32", True, True, dst(1), src(1, 1)), 803 (0x0f, 0x0f, 0x0a, 0x0a, 0x0f, 0x0f, "v_min_f32", True, True, dst(1), src(1, 1)), 804 (0x10, 0x10, 0x0b, 0x0b, 0x10, 0x10, "v_max_f32", True, True, dst(1), src(1, 1)), 805 (0x11, 0x11, 0x0c, 0x0c, 0x11, 0x11, "v_min_i32", False, False, dst(1), src(1, 1)), 806 (0x12, 0x12, 0x0d, 0x0d, 0x12, 0x12, "v_max_i32", False, False, dst(1), src(1, 1)), 807 (0x13, 0x13, 0x0e, 0x0e, 0x13, 0x13, "v_min_u32", False, False, dst(1), src(1, 1)), 808 (0x14, 0x14, 0x0f, 0x0f, 0x14, 0x14, "v_max_u32", False, False, dst(1), src(1, 1)), 809 (0x15, 0x15, -1, -1, -1, -1, "v_lshr_b32", False, False, dst(1), src(1, 1)), 810 (0x16, 0x16, 0x10, 0x10, 0x16, 0x19, "v_lshrrev_b32", False, False, dst(1), src(1, 1)), 811 (0x17, 0x17, -1, -1, -1, -1, "v_ashr_i32", False, False, dst(1), src(1, 1)), 812 (0x18, 0x18, 0x11, 0x11, 0x18, 0x1a, "v_ashrrev_i32", False, False, dst(1), src(1, 1)), 813 (0x19, 0x19, -1, -1, -1, -1, "v_lshl_b32", False, False, dst(1), src(1, 1)), 814 (0x1a, 0x1a, 0x12, 0x12, 0x1a, 0x18, "v_lshlrev_b32", False, False, dst(1), src(1, 1)), 815 (0x1b, 0x1b, 0x13, 0x13, 0x1b, 0x1b, "v_and_b32", False, False, dst(1), src(1, 1)), 816 (0x1c, 0x1c, 0x14, 0x14, 0x1c, 0x1c, "v_or_b32", False, False, dst(1), src(1, 1)), 817 (0x1d, 0x1d, 0x15, 0x15, 0x1d, 0x1d, "v_xor_b32", False, False, dst(1), src(1, 1)), 818 ( -1, -1, -1, -1, 0x1e, 0x1e, "v_xnor_b32", False, False, dst(1), src(1, 1)), 819 (0x1f, 0x1f, 0x16, 0x16, 0x1f, -1, "v_mac_f32", True, True, dst(1), src(1, 1, 1)), 820 (0x20, 0x20, 0x17, 0x17, 0x20, -1, "v_madmk_f32", False, False, dst(1), src(1, 1, 1)), 821 (0x21, 0x21, 0x18, 0x18, 0x21, -1, "v_madak_f32", False, False, dst(1), src(1, 1, 1)), 822 (0x24, 0x24, -1, -1, -1, -1, "v_mbcnt_hi_u32_b32", False, False, dst(1), src(1, 1)), 823 (0x25, 0x25, 0x19, 0x19, -1, -1, "v_add_co_u32", False, False, dst(1, VCC), src(1, 1)), # VOP3B only in RDNA 824 (0x26, 0x26, 0x1a, 0x1a, -1, -1, "v_sub_co_u32", False, False, dst(1, VCC), src(1, 1)), # VOP3B only in RDNA 825 (0x27, 0x27, 0x1b, 0x1b, -1, -1, "v_subrev_co_u32", False, False, dst(1, VCC), src(1, 1)), # VOP3B only in RDNA 826 (0x28, 0x28, 0x1c, 0x1c, 0x28, 0x20, "v_addc_co_u32", False, False, dst(1, VCC), src(1, 1, VCC)), # v_add_co_ci_u32 in RDNA 827 (0x29, 0x29, 0x1d, 0x1d, 0x29, 0x21, "v_subb_co_u32", False, False, dst(1, VCC), src(1, 1, VCC)), # v_sub_co_ci_u32 in RDNA 828 (0x2a, 0x2a, 0x1e, 0x1e, 0x2a, 0x22, "v_subbrev_co_u32", False, False, dst(1, VCC), src(1, 1, VCC)), # v_subrev_co_ci_u32 in RDNA 829 ( -1, -1, -1, -1, 0x2b, 0x2b, "v_fmac_f32", True, True, dst(1), src(1, 1, 1)), 830 ( -1, -1, -1, -1, 0x2c, 0x2c, "v_fmamk_f32", False, False, dst(1), src(1, 1, 1)), 831 ( -1, -1, -1, -1, 0x2d, 0x2d, "v_fmaak_f32", False, False, dst(1), src(1, 1, 1)), 832 (0x2f, 0x2f, -1, -1, 0x2f, 0x2f, "v_cvt_pkrtz_f16_f32", True, False, dst(1), src(1, 1)), #v_cvt_pk_rtz_f16_f32 in GFX11 833 ( -1, -1, 0x1f, 0x1f, 0x32, 0x32, "v_add_f16", True, True, dst(1), src(1, 1)), 834 ( -1, -1, 0x20, 0x20, 0x33, 0x33, "v_sub_f16", True, True, dst(1), src(1, 1)), 835 ( -1, -1, 0x21, 0x21, 0x34, 0x34, "v_subrev_f16", True, True, dst(1), src(1, 1)), 836 ( -1, -1, 0x22, 0x22, 0x35, 0x35, "v_mul_f16", True, True, dst(1), src(1, 1)), 837 ( -1, -1, 0x23, 0x23, -1, -1, "v_mac_f16", True, True, dst(1), src(1, 1, 1)), 838 ( -1, -1, 0x24, 0x24, -1, -1, "v_madmk_f16", False, False, dst(1), src(1, 1, 1)), 839 ( -1, -1, 0x25, 0x25, -1, -1, "v_madak_f16", False, False, dst(1), src(1, 1, 1)), 840 ( -1, -1, 0x26, 0x26, -1, -1, "v_add_u16", False, False, dst(1), src(1, 1)), 841 ( -1, -1, 0x27, 0x27, -1, -1, "v_sub_u16", False, False, dst(1), src(1, 1)), 842 ( -1, -1, 0x28, 0x28, -1, -1, "v_subrev_u16", False, False, dst(1), src(1, 1)), 843 ( -1, -1, 0x29, 0x29, -1, -1, "v_mul_lo_u16", False, False, dst(1), src(1, 1)), 844 ( -1, -1, 0x2a, 0x2a, -1, -1, "v_lshlrev_b16", False, False, dst(1), src(1, 1)), 845 ( -1, -1, 0x2b, 0x2b, -1, -1, "v_lshrrev_b16", False, False, dst(1), src(1, 1)), 846 ( -1, -1, 0x2c, 0x2c, -1, -1, "v_ashrrev_i16", False, False, dst(1), src(1, 1)), 847 ( -1, -1, 0x2d, 0x2d, 0x39, 0x39, "v_max_f16", True, True, dst(1), src(1, 1)), 848 ( -1, -1, 0x2e, 0x2e, 0x3a, 0x3a, "v_min_f16", True, True, dst(1), src(1, 1)), 849 ( -1, -1, 0x2f, 0x2f, -1, -1, "v_max_u16", False, False, dst(1), src(1, 1)), 850 ( -1, -1, 0x30, 0x30, -1, -1, "v_max_i16", False, False, dst(1), src(1, 1)), 851 ( -1, -1, 0x31, 0x31, -1, -1, "v_min_u16", False, False, dst(1), src(1, 1)), 852 ( -1, -1, 0x32, 0x32, -1, -1, "v_min_i16", False, False, dst(1), src(1, 1)), 853 ( -1, -1, 0x33, 0x33, 0x3b, 0x3b, "v_ldexp_f16", False, True, dst(1), src(1, 1)), 854 ( -1, -1, -1, 0x34, 0x25, 0x25, "v_add_u32", False, False, dst(1), src(1, 1)), # called v_add_nc_u32 in RDNA 855 ( -1, -1, -1, 0x35, 0x26, 0x26, "v_sub_u32", False, False, dst(1), src(1, 1)), # called v_sub_nc_u32 in RDNA 856 ( -1, -1, -1, 0x36, 0x27, 0x27, "v_subrev_u32", False, False, dst(1), src(1, 1)), # called v_subrev_nc_u32 in RDNA 857 ( -1, -1, -1, -1, 0x36, 0x36, "v_fmac_f16", True, True, dst(1), src(1, 1, 1)), 858 ( -1, -1, -1, -1, 0x37, 0x37, "v_fmamk_f16", False, False, dst(1), src(1, 1, 1)), 859 ( -1, -1, -1, -1, 0x38, 0x38, "v_fmaak_f16", False, False, dst(1), src(1, 1, 1)), 860 ( -1, -1, -1, -1, 0x3c, 0x3c, "v_pk_fmac_f16", False, False, dst(1), src(1, 1, 1)), 861 ( -1, -1, -1, 0x37, 0x02, 0x02, "v_dot2c_f32_f16", False, False, dst(1), src(1, 1, 1)), #v_dot2acc_f32_f16 in GFX11 862} 863for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod, defs, ops) in VOP2: 864 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP2, InstrClass.Valu32, in_mod, out_mod, definitions = defs, operands = ops) 865 866 867# VOP1 instructions: instructions with 1 input and 1 output 868VOP1 = { 869 # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name, input_modifiers, output_modifiers 870 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "v_nop", False, False, dst(), src()), 871 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "v_mov_b32", False, False, dst(1), src(1)), 872 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "v_readfirstlane_b32", False, False, dst(1), src(1)), 873 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "v_cvt_i32_f64", True, False, dst(1), src(2), InstrClass.ValuDoubleConvert), 874 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "v_cvt_f64_i32", False, True, dst(2), src(1), InstrClass.ValuDoubleConvert), 875 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "v_cvt_f32_i32", False, True, dst(1), src(1)), 876 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "v_cvt_f32_u32", False, True, dst(1), src(1)), 877 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "v_cvt_u32_f32", True, False, dst(1), src(1)), 878 (0x08, 0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False, dst(1), src(1)), 879 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True, dst(1), src(1)), 880 ( -1, -1, -1, -1, -1, -1, "p_cvt_f16_f32_rtne", True, True, dst(1), src(1)), 881 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True, dst(1), src(1)), 882 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False, dst(1), src(1)), #v_cvt_nearest_i32_f32 in GFX11 883 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False, dst(1), src(1)),#v_cvt_floor_i32_f32 in GFX11 884 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "v_cvt_off_f32_i4", False, True, dst(1), src(1)), 885 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "v_cvt_f32_f64", True, True, dst(1), src(2), InstrClass.ValuDoubleConvert), 886 (0x10, 0x10, 0x10, 0x10, 0x10, 0x10, "v_cvt_f64_f32", True, True, dst(2), src(1), InstrClass.ValuDoubleConvert), 887 (0x11, 0x11, 0x11, 0x11, 0x11, 0x11, "v_cvt_f32_ubyte0", False, True, dst(1), src(1)), 888 (0x12, 0x12, 0x12, 0x12, 0x12, 0x12, "v_cvt_f32_ubyte1", False, True, dst(1), src(1)), 889 (0x13, 0x13, 0x13, 0x13, 0x13, 0x13, "v_cvt_f32_ubyte2", False, True, dst(1), src(1)), 890 (0x14, 0x14, 0x14, 0x14, 0x14, 0x14, "v_cvt_f32_ubyte3", False, True, dst(1), src(1)), 891 (0x15, 0x15, 0x15, 0x15, 0x15, 0x15, "v_cvt_u32_f64", True, False, dst(1), src(2), InstrClass.ValuDoubleConvert), 892 (0x16, 0x16, 0x16, 0x16, 0x16, 0x16, "v_cvt_f64_u32", False, True, dst(2), src(1), InstrClass.ValuDoubleConvert), 893 ( -1, 0x17, 0x17, 0x17, 0x17, 0x17, "v_trunc_f64", True, True, dst(2), src(2), InstrClass.ValuDouble), 894 ( -1, 0x18, 0x18, 0x18, 0x18, 0x18, "v_ceil_f64", True, True, dst(2), src(2), InstrClass.ValuDouble), 895 ( -1, 0x19, 0x19, 0x19, 0x19, 0x19, "v_rndne_f64", True, True, dst(2), src(2), InstrClass.ValuDouble), 896 ( -1, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, "v_floor_f64", True, True, dst(2), src(2), InstrClass.ValuDouble), 897 ( -1, -1, -1, -1, 0x1b, 0x1b, "v_pipeflush", False, False, dst(), src()), 898 (0x20, 0x20, 0x1b, 0x1b, 0x20, 0x20, "v_fract_f32", True, True, dst(1), src(1)), 899 (0x21, 0x21, 0x1c, 0x1c, 0x21, 0x21, "v_trunc_f32", True, True, dst(1), src(1)), 900 (0x22, 0x22, 0x1d, 0x1d, 0x22, 0x22, "v_ceil_f32", True, True, dst(1), src(1)), 901 (0x23, 0x23, 0x1e, 0x1e, 0x23, 0x23, "v_rndne_f32", True, True, dst(1), src(1)), 902 (0x24, 0x24, 0x1f, 0x1f, 0x24, 0x24, "v_floor_f32", True, True, dst(1), src(1)), 903 (0x25, 0x25, 0x20, 0x20, 0x25, 0x25, "v_exp_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 904 (0x26, 0x26, -1, -1, -1, -1, "v_log_clamp_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 905 (0x27, 0x27, 0x21, 0x21, 0x27, 0x27, "v_log_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 906 (0x28, 0x28, -1, -1, -1, -1, "v_rcp_clamp_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 907 (0x29, 0x29, -1, -1, -1, -1, "v_rcp_legacy_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 908 (0x2a, 0x2a, 0x22, 0x22, 0x2a, 0x2a, "v_rcp_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 909 (0x2b, 0x2b, 0x23, 0x23, 0x2b, 0x2b, "v_rcp_iflag_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 910 (0x2c, 0x2c, -1, -1, -1, -1, "v_rsq_clamp_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 911 (0x2d, 0x2d, -1, -1, -1, -1, "v_rsq_legacy_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 912 (0x2e, 0x2e, 0x24, 0x24, 0x2e, 0x2e, "v_rsq_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 913 (0x2f, 0x2f, 0x25, 0x25, 0x2f, 0x2f, "v_rcp_f64", True, True, dst(2), src(2), InstrClass.ValuDoubleTranscendental), 914 (0x30, 0x30, -1, -1, -1, -1, "v_rcp_clamp_f64", True, True, dst(2), src(2), InstrClass.ValuDoubleTranscendental), 915 (0x31, 0x31, 0x26, 0x26, 0x31, 0x31, "v_rsq_f64", True, True, dst(2), src(2), InstrClass.ValuDoubleTranscendental), 916 (0x32, 0x32, -1, -1, -1, -1, "v_rsq_clamp_f64", True, True, dst(2), src(2), InstrClass.ValuDoubleTranscendental), 917 (0x33, 0x33, 0x27, 0x27, 0x33, 0x33, "v_sqrt_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 918 (0x34, 0x34, 0x28, 0x28, 0x34, 0x34, "v_sqrt_f64", True, True, dst(2), src(2), InstrClass.ValuDoubleTranscendental), 919 (0x35, 0x35, 0x29, 0x29, 0x35, 0x35, "v_sin_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 920 (0x36, 0x36, 0x2a, 0x2a, 0x36, 0x36, "v_cos_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 921 (0x37, 0x37, 0x2b, 0x2b, 0x37, 0x37, "v_not_b32", False, False, dst(1), src(1)), 922 (0x38, 0x38, 0x2c, 0x2c, 0x38, 0x38, "v_bfrev_b32", False, False, dst(1), src(1)), 923 (0x39, 0x39, 0x2d, 0x2d, 0x39, 0x39, "v_ffbh_u32", False, False, dst(1), src(1)), #v_clz_i32_u32 in GFX11 924 (0x3a, 0x3a, 0x2e, 0x2e, 0x3a, 0x3a, "v_ffbl_b32", False, False, dst(1), src(1)), #v_ctz_i32_b32 in GFX11 925 (0x3b, 0x3b, 0x2f, 0x2f, 0x3b, 0x3b, "v_ffbh_i32", False, False, dst(1), src(1)), #v_cls_i32 in GFX11 926 (0x3c, 0x3c, 0x30, 0x30, 0x3c, 0x3c, "v_frexp_exp_i32_f64", True, False, dst(1), src(2), InstrClass.ValuDouble), 927 (0x3d, 0x3d, 0x31, 0x31, 0x3d, 0x3d, "v_frexp_mant_f64", True, False, dst(2), src(2), InstrClass.ValuDouble), 928 (0x3e, 0x3e, 0x32, 0x32, 0x3e, 0x3e, "v_fract_f64", True, True, dst(2), src(2), InstrClass.ValuDouble), 929 (0x3f, 0x3f, 0x33, 0x33, 0x3f, 0x3f, "v_frexp_exp_i32_f32", True, False, dst(1), src(1)), 930 (0x40, 0x40, 0x34, 0x34, 0x40, 0x40, "v_frexp_mant_f32", True, False, dst(1), src(1)), 931 (0x41, 0x41, 0x35, 0x35, 0x41, -1, "v_clrexcp", False, False, dst(), src()), 932 (0x42, 0x42, 0x36, -1, 0x42, 0x42, "v_movreld_b32", False, False, dst(1), src(1, M0)), 933 (0x43, 0x43, 0x37, -1, 0x43, 0x43, "v_movrels_b32", False, False, dst(1), src(1, M0)), 934 (0x44, 0x44, 0x38, -1, 0x44, 0x44, "v_movrelsd_b32", False, False, dst(1), src(1, M0)), 935 ( -1, -1, -1, -1, 0x48, 0x48, "v_movrelsd_2_b32", False, False, dst(1), src(1, M0)), 936 ( -1, -1, -1, 0x37, -1, -1, "v_screen_partition_4se_b32", False, False, dst(1), src(1)), 937 ( -1, -1, 0x39, 0x39, 0x50, 0x50, "v_cvt_f16_u16", False, True, dst(1), src(1)), 938 ( -1, -1, 0x3a, 0x3a, 0x51, 0x51, "v_cvt_f16_i16", False, True, dst(1), src(1)), 939 ( -1, -1, 0x3b, 0x3b, 0x52, 0x52, "v_cvt_u16_f16", True, False, dst(1), src(1)), 940 ( -1, -1, 0x3c, 0x3c, 0x53, 0x53, "v_cvt_i16_f16", True, False, dst(1), src(1)), 941 ( -1, -1, 0x3d, 0x3d, 0x54, 0x54, "v_rcp_f16", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 942 ( -1, -1, 0x3e, 0x3e, 0x55, 0x55, "v_sqrt_f16", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 943 ( -1, -1, 0x3f, 0x3f, 0x56, 0x56, "v_rsq_f16", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 944 ( -1, -1, 0x40, 0x40, 0x57, 0x57, "v_log_f16", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 945 ( -1, -1, 0x41, 0x41, 0x58, 0x58, "v_exp_f16", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 946 ( -1, -1, 0x42, 0x42, 0x59, 0x59, "v_frexp_mant_f16", True, False, dst(1), src(1)), 947 ( -1, -1, 0x43, 0x43, 0x5a, 0x5a, "v_frexp_exp_i16_f16", True, False, dst(1), src(1)), 948 ( -1, -1, 0x44, 0x44, 0x5b, 0x5b, "v_floor_f16", True, True, dst(1), src(1)), 949 ( -1, -1, 0x45, 0x45, 0x5c, 0x5c, "v_ceil_f16", True, True, dst(1), src(1)), 950 ( -1, -1, 0x46, 0x46, 0x5d, 0x5d, "v_trunc_f16", True, True, dst(1), src(1)), 951 ( -1, -1, 0x47, 0x47, 0x5e, 0x5e, "v_rndne_f16", True, True, dst(1), src(1)), 952 ( -1, -1, 0x48, 0x48, 0x5f, 0x5f, "v_fract_f16", True, True, dst(1), src(1)), 953 ( -1, -1, 0x49, 0x49, 0x60, 0x60, "v_sin_f16", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 954 ( -1, -1, 0x4a, 0x4a, 0x61, 0x61, "v_cos_f16", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 955 ( -1, 0x46, 0x4b, 0x4b, -1, -1, "v_exp_legacy_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 956 ( -1, 0x45, 0x4c, 0x4c, -1, -1, "v_log_legacy_f32", True, True, dst(1), src(1), InstrClass.ValuTranscendental32), 957 ( -1, -1, -1, 0x4f, 0x62, 0x62, "v_sat_pk_u8_i16", False, False, dst(1), src(1)), 958 ( -1, -1, -1, 0x4d, 0x63, 0x63, "v_cvt_norm_i16_f16", True, False, dst(1), src(1)), 959 ( -1, -1, -1, 0x4e, 0x64, 0x64, "v_cvt_norm_u16_f16", True, False, dst(1), src(1)), 960 ( -1, -1, -1, 0x51, 0x65, 0x65, "v_swap_b32", False, False, dst(1, 1), src(1, 1)), 961 ( -1, -1, -1, -1, 0x68, 0x68, "v_swaprel_b32", False, False, dst(1, 1), src(1, 1, M0)), 962 ( -1, -1, -1, -1, -1, 0x67, "v_permlane64_b32", False, False, dst(1), src(1)), #cannot use VOP3 963 ( -1, -1, -1, -1, -1, 0x69, "v_not_b16", False, False, dst(1), src(1)), 964 ( -1, -1, -1, -1, -1, 0x6a, "v_cvt_i32_i16", False, False, dst(1), src(1)), 965 ( -1, -1, -1, -1, -1, 0x6b, "v_cvt_u32_u16", False, False, dst(1), src(1)), 966 ( -1, -1, -1, -1, -1, 0x1c, "v_mov_b16", True, False, dst(1), src(1)), 967} 968for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod, defs, ops, cls) in default_class(VOP1, InstrClass.Valu32): 969 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP1, cls, in_mod, out_mod, definitions = defs, operands = ops) 970 971 972# VOPC instructions: 973 974VOPC_CLASS = { 975 (0x88, 0x88, 0x10, 0x10, 0x88, 0x7e, "v_cmp_class_f32", dst(VCC), src(1, 1)), 976 ( -1, -1, 0x14, 0x14, 0x8f, 0x7d, "v_cmp_class_f16", dst(VCC), src(1, 1)), 977 (0x98, 0x98, 0x11, 0x11, 0x98, 0xfe, "v_cmpx_class_f32", dst(EXEC), src(1, 1)), 978 ( -1, -1, 0x15, 0x15, 0x9f, 0xfd, "v_cmpx_class_f16", dst(EXEC), src(1, 1)), 979 (0xa8, 0xa8, 0x12, 0x12, 0xa8, 0x7f, "v_cmp_class_f64", dst(VCC), src(2, 1), InstrClass.ValuDouble), 980 (0xb8, 0xb8, 0x13, 0x13, 0xb8, 0xff, "v_cmpx_class_f64", dst(EXEC), src(2, 1), InstrClass.ValuDouble), 981} 982for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, defs, ops, cls) in default_class(VOPC_CLASS, InstrClass.Valu32): 983 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, cls, True, False, definitions = defs, operands = ops) 984 985COMPF = ["f", "lt", "eq", "le", "gt", "lg", "ge", "o", "u", "nge", "nlg", "ngt", "nle", "neq", "nlt", "tru"] 986 987for i in range(8): 988 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0x20+i, 0x20+i, 0xc8+i, 0x00+i, "v_cmp_"+COMPF[i]+"_f16") 989 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False, definitions = dst(VCC), operands = src(1, 1)) 990 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0x30+i, 0x30+i, 0xd8+i, 0x80+i, "v_cmpx_"+COMPF[i]+"_f16") 991 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False, definitions = dst(EXEC), operands = src(1, 1)) 992 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0x28+i, 0x28+i, 0xe8+i, 0x08+i, "v_cmp_"+COMPF[i+8]+"_f16") 993 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False, definitions = dst(VCC), operands = src(1, 1)) 994 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0x38+i, 0x38+i, 0xf8+i, 0x88+i, "v_cmpx_"+COMPF[i+8]+"_f16") 995 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False, definitions = dst(EXEC), operands = src(1, 1)) 996 997for i in range(16): 998 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x00+i, 0x00+i, 0x40+i, 0x40+i, 0x00+i, 0x10+i, "v_cmp_"+COMPF[i]+"_f32") 999 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False, definitions = dst(VCC), operands = src(1, 1)) 1000 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x10+i, 0x10+i, 0x50+i, 0x50+i, 0x10+i, 0x90+i, "v_cmpx_"+COMPF[i]+"_f32") 1001 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, True, False, definitions = dst(EXEC), operands = src(1, 1)) 1002 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x20+i, 0x20+i, 0x60+i, 0x60+i, 0x20+i, 0x20+i, "v_cmp_"+COMPF[i]+"_f64") 1003 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.ValuDouble, True, False, definitions = dst(VCC), operands = src(2, 2)) 1004 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x30+i, 0x30+i, 0x70+i, 0x70+i, 0x30+i, 0xa0+i, "v_cmpx_"+COMPF[i]+"_f64") 1005 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.ValuDouble, True, False, definitions = dst(EXEC), operands = src(2, 2)) 1006 # GFX_6_7 1007 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x40+i, 0x40+i, -1, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f32") 1008 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x50+i, 0x50+i, -1, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f32") 1009 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x60+i, 0x60+i, -1, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f64") 1010 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x70+i, 0x70+i, -1, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f64") 1011 1012COMPI = ["f", "lt", "eq", "le", "gt", "lg", "ge", "tru"] 1013 1014# GFX_8_9 1015for i in [0,7]: # only 0 and 7 1016 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xa0+i, 0xa0+i, -1, -1, "v_cmp_"+COMPI[i]+"_i16") 1017 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, definitions = dst(VCC), operands = src(1, 1)) 1018 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xb0+i, 0xb0+i, -1, -1, "v_cmpx_"+COMPI[i]+"_i16") 1019 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, definitions = dst(EXEC), operands = src(1, 1)) 1020 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xa8+i, 0xa8+i, -1, -1, "v_cmp_"+COMPI[i]+"_u16") 1021 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, definitions = dst(VCC), operands = src(1, 1)) 1022 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xb8+i, 0xb8+i, -1, -1, "v_cmpx_"+COMPI[i]+"_u16") 1023 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, definitions = dst(EXEC), operands = src(1, 1)) 1024 1025for i in range(1, 7): # [1..6] 1026 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xa0+i, 0xa0+i, 0x88+i, 0x30+i, "v_cmp_"+COMPI[i]+"_i16") 1027 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, definitions = dst(VCC), operands = src(1, 1)) 1028 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xb0+i, 0xb0+i, 0x98+i, 0xb0+i, "v_cmpx_"+COMPI[i]+"_i16") 1029 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, definitions = dst(EXEC), operands = src(1, 1)) 1030 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xa8+i, 0xa8+i, 0xa8+i, 0x38+i, "v_cmp_"+COMPI[i]+"_u16") 1031 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, definitions = dst(VCC), operands = src(1, 1)) 1032 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, 0xb8+i, 0xb8+i, 0xb8+i, 0xb8+i, "v_cmpx_"+COMPI[i]+"_u16") 1033 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, definitions = dst(EXEC), operands = src(1, 1)) 1034 1035for i in range(8): 1036 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x80+i, 0x80+i, 0xc0+i, 0xc0+i, 0x80+i, 0x40+i, "v_cmp_"+COMPI[i]+"_i32") 1037 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, definitions = dst(VCC), operands = src(1, 1)) 1038 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0x90+i, 0x90+i, 0xd0+i, 0xd0+i, 0x90+i, 0xc0+i, "v_cmpx_"+COMPI[i]+"_i32") 1039 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, definitions = dst(EXEC), operands = src(1, 1)) 1040 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xa0+i, 0xa0+i, 0xe0+i, 0xe0+i, 0xa0+i, 0x50+i, "v_cmp_"+COMPI[i]+"_i64") 1041 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu64, definitions = dst(VCC), operands = src(2, 2)) 1042 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xb0+i, 0xb0+i, 0xf0+i, 0xf0+i, 0xb0+i, 0xd0+i, "v_cmpx_"+COMPI[i]+"_i64") 1043 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu64, definitions = dst(EXEC), operands = src(2, 2)) 1044 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xc0+i, 0xc0+i, 0xc8+i, 0xc8+i, 0xc0+i, 0x48+i, "v_cmp_"+COMPI[i]+"_u32") 1045 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, definitions = dst(VCC), operands = src(1, 1)) 1046 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xd0+i, 0xd0+i, 0xd8+i, 0xd8+i, 0xd0+i, 0xc8+i, "v_cmpx_"+COMPI[i]+"_u32") 1047 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu32, definitions = dst(EXEC), operands = src(1, 1)) 1048 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xe0+i, 0xe0+i, 0xe8+i, 0xe8+i, 0xe0+i, 0x58+i, "v_cmp_"+COMPI[i]+"_u64") 1049 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu64, definitions = dst(VCC), operands = src(2, 2)) 1050 (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (0xf0+i, 0xf0+i, 0xf8+i, 0xf8+i, 0xf0+i, 0xd8+i, "v_cmpx_"+COMPI[i]+"_u64") 1051 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOPC, InstrClass.Valu64, definitions = dst(EXEC), operands = src(2, 2)) 1052 1053 1054# VOPP instructions: packed 16bit instructions - 2 or 3 inputs and 1 output 1055VOPP = { 1056 # opcode, name, input/output modifiers 1057 (0x00, "v_pk_mad_i16", False, dst(1), src(1, 1, 1)), 1058 (0x01, "v_pk_mul_lo_u16", False, dst(1), src(1, 1)), 1059 (0x02, "v_pk_add_i16", False, dst(1), src(1, 1)), 1060 (0x03, "v_pk_sub_i16", False, dst(1), src(1, 1)), 1061 (0x04, "v_pk_lshlrev_b16", False, dst(1), src(1, 1)), 1062 (0x05, "v_pk_lshrrev_b16", False, dst(1), src(1, 1)), 1063 (0x06, "v_pk_ashrrev_i16", False, dst(1), src(1, 1)), 1064 (0x07, "v_pk_max_i16", False, dst(1), src(1, 1)), 1065 (0x08, "v_pk_min_i16", False, dst(1), src(1, 1)), 1066 (0x09, "v_pk_mad_u16", False, dst(1), src(1, 1, 1)), 1067 (0x0a, "v_pk_add_u16", False, dst(1), src(1, 1)), 1068 (0x0b, "v_pk_sub_u16", False, dst(1), src(1, 1)), 1069 (0x0c, "v_pk_max_u16", False, dst(1), src(1, 1)), 1070 (0x0d, "v_pk_min_u16", False, dst(1), src(1, 1)), 1071 (0x0e, "v_pk_fma_f16", True, dst(1), src(1, 1, 1)), 1072 (0x0f, "v_pk_add_f16", True, dst(1), src(1, 1)), 1073 (0x10, "v_pk_mul_f16", True, dst(1), src(1, 1)), 1074 (0x11, "v_pk_min_f16", True, dst(1), src(1, 1)), 1075 (0x12, "v_pk_max_f16", True, dst(1), src(1, 1)), 1076 (0x20, "v_fma_mix_f32", True, dst(1), src(1, 1, 1)), # v_mad_mix_f32 in VEGA ISA, v_fma_mix_f32 in RDNA ISA 1077 (0x21, "v_fma_mixlo_f16", True, dst(1), src(1, 1, 1)), # v_mad_mixlo_f16 in VEGA ISA, v_fma_mixlo_f16 in RDNA ISA 1078 (0x22, "v_fma_mixhi_f16", True, dst(1), src(1, 1, 1)), # v_mad_mixhi_f16 in VEGA ISA, v_fma_mixhi_f16 in RDNA ISA 1079} 1080# note that these are only supported on gfx9+ so we'll need to distinguish between gfx8 and gfx9 here 1081# (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, -1, code, code, code, name) 1082for (code, name, modifiers, defs, ops) in VOPP: 1083 opcode(name, -1, code, code, code, Format.VOP3P, InstrClass.Valu32, modifiers, modifiers, definitions = defs, operands = ops) 1084opcode("v_dot2_i32_i16", -1, 0x26, 0x14, -1, Format.VOP3P, InstrClass.Valu32, definitions = dst(1), operands = src(1, 1, 1)) 1085opcode("v_dot2_u32_u16", -1, 0x27, 0x15, -1, Format.VOP3P, InstrClass.Valu32, definitions = dst(1), operands = src(1, 1, 1)) 1086opcode("v_dot4_i32_iu8", -1, -1, -1, 0x16, Format.VOP3P, InstrClass.Valu32, definitions = dst(1), operands = src(1, 1, 1)) 1087opcode("v_dot4_i32_i8", -1, 0x28, 0x16, -1, Format.VOP3P, InstrClass.Valu32, definitions = dst(1), operands = src(1, 1, 1)) 1088opcode("v_dot4_u32_u8", -1, 0x29, 0x17, 0x17, Format.VOP3P, InstrClass.Valu32, definitions = dst(1), operands = src(1, 1, 1)) 1089opcode("v_dot8_i32_iu4", -1, -1, -1, 0x18, Format.VOP3P, InstrClass.Valu32, definitions = dst(1), operands = src(1, 1, 1)) 1090opcode("v_dot8_u32_u4", -1, 0x2b, 0x19, 0x19, Format.VOP3P, InstrClass.Valu32, definitions = dst(1), operands = src(1, 1, 1)) 1091opcode("v_dot2_f32_f16", -1, 0x23, 0x13, 0x13, Format.VOP3P, InstrClass.Valu32, definitions = dst(1), operands = src(1, 1, 1)) 1092opcode("v_dot2_f32_bf16", -1, -1, -1, 0x1a, Format.VOP3P, InstrClass.Valu32, definitions = dst(1), operands = src(1, 1, 1)) 1093opcode("v_wmma_f32_16x16x16_f16", -1, -1, -1, 0x40, Format.VOP3P, InstrClass.WMMA, False, False) 1094opcode("v_wmma_f32_16x16x16_bf16", -1, -1, -1, 0x41, Format.VOP3P, InstrClass.WMMA, False, False) 1095opcode("v_wmma_f16_16x16x16_f16", -1, -1, -1, 0x42, Format.VOP3P, InstrClass.WMMA, False, False) 1096opcode("v_wmma_bf16_16x16x16_bf16", -1, -1, -1, 0x43, Format.VOP3P, InstrClass.WMMA, False, False) 1097opcode("v_wmma_i32_16x16x16_iu8", -1, -1, -1, 0x44, Format.VOP3P, InstrClass.WMMA, False, False) 1098opcode("v_wmma_i32_16x16x16_iu4", -1, -1, -1, 0x45, Format.VOP3P, InstrClass.WMMA, False, False) 1099 1100 1101# VINTRP (GFX6 - GFX10.3) instructions: 1102VINTRP = { 1103 (0x00, "v_interp_p1_f32", dst(1), src(1, M0)), 1104 (0x01, "v_interp_p2_f32", dst(1), src(1, M0, 1)), 1105 (0x02, "v_interp_mov_f32", dst(1), src(1, M0)), 1106} 1107# (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (code, code, code, code, code, code, name) 1108for (code, name, defs, ops) in VINTRP: 1109 opcode(name, code, code, code, -1, Format.VINTRP, InstrClass.Valu32, definitions = defs, operands = ops) 1110 1111 1112# VINTERP (GFX11+) instructions: 1113VINTERP = { 1114 (0x00, "v_interp_p10_f32_inreg"), 1115 (0x01, "v_interp_p2_f32_inreg"), 1116 (0x02, "v_interp_p10_f16_f32_inreg"), 1117 (0x03, "v_interp_p2_f16_f32_inreg"), 1118 (0x04, "v_interp_p10_rtz_f16_f32_inreg"), 1119 (0x05, "v_interp_p2_rtz_f16_f32_inreg"), 1120} 1121for (code, name) in VINTERP: 1122 opcode(name, -1, -1, -1, code, Format.VINTERP_INREG, InstrClass.Valu32, False, True, definitions = dst(1), operands = src(1, 1, 1)) 1123 1124 1125# VOP3 instructions: 3 inputs, 1 output 1126# VOP3b instructions: have a unique scalar output, e.g. VOP2 with vcc out 1127VOP3 = { 1128 (0x140, 0x140, 0x1c0, 0x1c0, 0x140, -1, "v_mad_legacy_f32", True, True, dst(1), src(1, 1, 1)), # GFX6-GFX10 1129 (0x141, 0x141, 0x1c1, 0x1c1, 0x141, -1, "v_mad_f32", True, True, dst(1), src(1, 1, 1)), 1130 (0x142, 0x142, 0x1c2, 0x1c2, 0x142, 0x20a, "v_mad_i32_i24", False, False, dst(1), src(1, 1, 1)), 1131 (0x143, 0x143, 0x1c3, 0x1c3, 0x143, 0x20b, "v_mad_u32_u24", False, False, dst(1), src(1, 1, 1)), 1132 (0x144, 0x144, 0x1c4, 0x1c4, 0x144, 0x20c, "v_cubeid_f32", True, True, dst(1), src(1, 1, 1)), 1133 (0x145, 0x145, 0x1c5, 0x1c5, 0x145, 0x20d, "v_cubesc_f32", True, True, dst(1), src(1, 1, 1)), 1134 (0x146, 0x146, 0x1c6, 0x1c6, 0x146, 0x20e, "v_cubetc_f32", True, True, dst(1), src(1, 1, 1)), 1135 (0x147, 0x147, 0x1c7, 0x1c7, 0x147, 0x20f, "v_cubema_f32", True, True, dst(1), src(1, 1, 1)), 1136 (0x148, 0x148, 0x1c8, 0x1c8, 0x148, 0x210, "v_bfe_u32", False, False, dst(1), src(1, 1, 1)), 1137 (0x149, 0x149, 0x1c9, 0x1c9, 0x149, 0x211, "v_bfe_i32", False, False, dst(1), src(1, 1, 1)), 1138 (0x14a, 0x14a, 0x1ca, 0x1ca, 0x14a, 0x212, "v_bfi_b32", False, False, dst(1), src(1, 1, 1)), 1139 (0x14b, 0x14b, 0x1cb, 0x1cb, 0x14b, 0x213, "v_fma_f32", True, True, dst(1), src(1, 1, 1), InstrClass.ValuFma), 1140 (0x14c, 0x14c, 0x1cc, 0x1cc, 0x14c, 0x214, "v_fma_f64", True, True, dst(2), src(2, 2, 2), InstrClass.ValuDouble), 1141 (0x14d, 0x14d, 0x1cd, 0x1cd, 0x14d, 0x215, "v_lerp_u8", False, False, dst(1), src(1, 1, 1)), 1142 (0x14e, 0x14e, 0x1ce, 0x1ce, 0x14e, 0x216, "v_alignbit_b32", False, False, dst(1), src(1, 1, 1)), 1143 (0x14f, 0x14f, 0x1cf, 0x1cf, 0x14f, 0x217, "v_alignbyte_b32", False, False, dst(1), src(1, 1, 1)), 1144 (0x150, 0x150, -1, -1, 0x150, 0x218, "v_mullit_f32", True, True, dst(1), src(1, 1, 1)), 1145 (0x151, 0x151, 0x1d0, 0x1d0, 0x151, 0x219, "v_min3_f32", True, True, dst(1), src(1, 1, 1)), 1146 (0x152, 0x152, 0x1d1, 0x1d1, 0x152, 0x21a, "v_min3_i32", False, False, dst(1), src(1, 1, 1)), 1147 (0x153, 0x153, 0x1d2, 0x1d2, 0x153, 0x21b, "v_min3_u32", False, False, dst(1), src(1, 1, 1)), 1148 (0x154, 0x154, 0x1d3, 0x1d3, 0x154, 0x21c, "v_max3_f32", True, True, dst(1), src(1, 1, 1)), 1149 (0x155, 0x155, 0x1d4, 0x1d4, 0x155, 0x21d, "v_max3_i32", False, False, dst(1), src(1, 1, 1)), 1150 (0x156, 0x156, 0x1d5, 0x1d5, 0x156, 0x21e, "v_max3_u32", False, False, dst(1), src(1, 1, 1)), 1151 (0x157, 0x157, 0x1d6, 0x1d6, 0x157, 0x21f, "v_med3_f32", True, True, dst(1), src(1, 1, 1)), 1152 (0x158, 0x158, 0x1d7, 0x1d7, 0x158, 0x220, "v_med3_i32", False, False, dst(1), src(1, 1, 1)), 1153 (0x159, 0x159, 0x1d8, 0x1d8, 0x159, 0x221, "v_med3_u32", False, False, dst(1), src(1, 1, 1)), 1154 (0x15a, 0x15a, 0x1d9, 0x1d9, 0x15a, 0x222, "v_sad_u8", False, False, dst(1), src(1, 1, 1)), 1155 (0x15b, 0x15b, 0x1da, 0x1da, 0x15b, 0x223, "v_sad_hi_u8", False, False, dst(1), src(1, 1, 1)), 1156 (0x15c, 0x15c, 0x1db, 0x1db, 0x15c, 0x224, "v_sad_u16", False, False, dst(1), src(1, 1, 1)), 1157 (0x15d, 0x15d, 0x1dc, 0x1dc, 0x15d, 0x225, "v_sad_u32", False, False, dst(1), src(1, 1, 1)), 1158 (0x15e, 0x15e, 0x1dd, 0x1dd, 0x15e, 0x226, "v_cvt_pk_u8_f32", True, False, dst(1), src(1, 1, 1)), 1159 (0x15f, 0x15f, 0x1de, 0x1de, 0x15f, 0x227, "v_div_fixup_f32", True, True, dst(1), src(1, 1, 1)), 1160 (0x160, 0x160, 0x1df, 0x1df, 0x160, 0x228, "v_div_fixup_f64", True, True, dst(2), src(2, 2, 2)), 1161 (0x161, 0x161, -1, -1, -1, -1, "v_lshl_b64", False, False, dst(2), src(2, 1), InstrClass.Valu64), 1162 (0x162, 0x162, -1, -1, -1, -1, "v_lshr_b64", False, False, dst(2), src(2, 1), InstrClass.Valu64), 1163 (0x163, 0x163, -1, -1, -1, -1, "v_ashr_i64", False, False, dst(2), src(2, 1), InstrClass.Valu64), 1164 (0x164, 0x164, 0x280, 0x280, 0x164, 0x327, "v_add_f64", True, True, dst(2), src(2, 2), InstrClass.ValuDoubleAdd), 1165 (0x165, 0x165, 0x281, 0x281, 0x165, 0x328, "v_mul_f64", True, True, dst(2), src(2, 2), InstrClass.ValuDouble), 1166 (0x166, 0x166, 0x282, 0x282, 0x166, 0x329, "v_min_f64", True, True, dst(2), src(2, 2), InstrClass.ValuDouble), 1167 (0x167, 0x167, 0x283, 0x283, 0x167, 0x32a, "v_max_f64", True, True, dst(2), src(2, 2), InstrClass.ValuDouble), 1168 (0x168, 0x168, 0x284, 0x284, 0x168, 0x32b, "v_ldexp_f64", False, True, dst(2), src(2, 1), InstrClass.ValuDouble), # src1 can take input modifiers 1169 (0x169, 0x169, 0x285, 0x285, 0x169, 0x32c, "v_mul_lo_u32", False, False, dst(1), src(1, 1), InstrClass.ValuQuarterRate32), 1170 (0x16a, 0x16a, 0x286, 0x286, 0x16a, 0x32d, "v_mul_hi_u32", False, False, dst(1), src(1, 1), InstrClass.ValuQuarterRate32), 1171 (0x16b, 0x16b, 0x285, 0x285, 0x16b, 0x32c, "v_mul_lo_i32", False, False, dst(1), src(1, 1), InstrClass.ValuQuarterRate32), # identical to v_mul_lo_u32 1172 (0x16c, 0x16c, 0x287, 0x287, 0x16c, 0x32e, "v_mul_hi_i32", False, False, dst(1), src(1, 1), InstrClass.ValuQuarterRate32), 1173 (0x16d, 0x16d, 0x1e0, 0x1e0, 0x16d, 0x2fc, "v_div_scale_f32", True, True, dst(1, VCC), src(1, 1, 1)), 1174 (0x16e, 0x16e, 0x1e1, 0x1e1, 0x16e, 0x2fd, "v_div_scale_f64", True, True, dst(2, VCC), src(2, 2, 2), InstrClass.ValuDouble), 1175 (0x16f, 0x16f, 0x1e2, 0x1e2, 0x16f, 0x237, "v_div_fmas_f32", True, True, dst(1), src(1, 1, 1, VCC)), 1176 (0x170, 0x170, 0x1e3, 0x1e3, 0x170, 0x238, "v_div_fmas_f64", True, True, dst(2), src(2, 2, 2, VCC), InstrClass.ValuDouble), 1177 (0x171, 0x171, 0x1e4, 0x1e4, 0x171, 0x239, "v_msad_u8", False, False, dst(1), src(1, 1, 1)), 1178 (0x172, 0x172, 0x1e5, 0x1e5, 0x172, 0x23a, "v_qsad_pk_u16_u8", False, False, dst(2), src(2, 1, 2)), 1179 (0x173, 0x173, 0x1e6, 0x1e6, 0x173, 0x23b, "v_mqsad_pk_u16_u8", False, False, dst(2), src(2, 1, 2)), 1180 (0x174, 0x174, 0x292, 0x292, 0x174, 0x32f, "v_trig_preop_f64", False, False, dst(2), src(2, 2), InstrClass.ValuDouble), 1181 ( -1, 0x175, 0x1e7, 0x1e7, 0x175, 0x23d, "v_mqsad_u32_u8", False, False, dst(4), src(2, 1, 4)), 1182 ( -1, 0x176, 0x1e8, 0x1e8, 0x176, 0x2fe, "v_mad_u64_u32", False, False, dst(2, VCC), src(1, 1, 2), InstrClass.Valu64), 1183 ( -1, 0x177, 0x1e9, 0x1e9, 0x177, 0x2ff, "v_mad_i64_i32", False, False, dst(2, VCC), src(1, 1, 2), InstrClass.Valu64), 1184 ( -1, -1, 0x1ea, 0x1ea, -1, -1, "v_mad_legacy_f16", True, True, dst(1), src(1, 1, 1)), 1185 ( -1, -1, 0x1eb, 0x1eb, -1, -1, "v_mad_legacy_u16", False, False, dst(1), src(1, 1, 1)), 1186 ( -1, -1, 0x1ec, 0x1ec, -1, -1, "v_mad_legacy_i16", False, False, dst(1), src(1, 1, 1)), 1187 ( -1, -1, 0x1ed, 0x1ed, 0x344, 0x244, "v_perm_b32", False, False, dst(1), src(1, 1, 1)), 1188 ( -1, -1, 0x1ee, 0x1ee, -1, -1, "v_fma_legacy_f16", True, True, dst(1), src(1, 1, 1), InstrClass.ValuFma), 1189 ( -1, -1, 0x1ef, 0x1ef, -1, -1, "v_div_fixup_legacy_f16", True, True, dst(1), src(1, 1, 1)), 1190 (0x12c, 0x12c, 0x1f0, 0x1f0, -1, -1, "v_cvt_pkaccum_u8_f32", True, False, dst(1), src(1, 1, 1)), 1191 ( -1, -1, -1, 0x1f1, 0x373, 0x259, "v_mad_u32_u16", False, False, dst(1), src(1, 1, 1)), 1192 ( -1, -1, -1, 0x1f2, 0x375, 0x25a, "v_mad_i32_i16", False, False, dst(1), src(1, 1, 1)), 1193 ( -1, -1, -1, 0x1f3, 0x345, 0x245, "v_xad_u32", False, False, dst(1), src(1, 1, 1)), 1194 ( -1, -1, -1, 0x1f4, 0x351, 0x249, "v_min3_f16", True, True, dst(1), src(1, 1, 1)), 1195 ( -1, -1, -1, 0x1f5, 0x352, 0x24a, "v_min3_i16", False, False, dst(1), src(1, 1, 1)), 1196 ( -1, -1, -1, 0x1f6, 0x353, 0x24b, "v_min3_u16", False, False, dst(1), src(1, 1, 1)), 1197 ( -1, -1, -1, 0x1f7, 0x354, 0x24c, "v_max3_f16", True, True, dst(1), src(1, 1, 1)), 1198 ( -1, -1, -1, 0x1f8, 0x355, 0x24d, "v_max3_i16", False, False, dst(1), src(1, 1, 1)), 1199 ( -1, -1, -1, 0x1f9, 0x356, 0x24e, "v_max3_u16", False, False, dst(1), src(1, 1, 1)), 1200 ( -1, -1, -1, 0x1fa, 0x357, 0x24f, "v_med3_f16", True, True, dst(1), src(1, 1, 1)), 1201 ( -1, -1, -1, 0x1fb, 0x358, 0x250, "v_med3_i16", False, False, dst(1), src(1, 1, 1)), 1202 ( -1, -1, -1, 0x1fc, 0x359, 0x251, "v_med3_u16", False, False, dst(1), src(1, 1, 1)), 1203 ( -1, -1, -1, 0x1fd, 0x346, 0x246, "v_lshl_add_u32", False, False, dst(1), src(1, 1, 1)), 1204 ( -1, -1, -1, 0x1fe, 0x347, 0x247, "v_add_lshl_u32", False, False, dst(1), src(1, 1, 1)), 1205 ( -1, -1, -1, 0x1ff, 0x36d, 0x255, "v_add3_u32", False, False, dst(1), src(1, 1, 1)), 1206 ( -1, -1, -1, 0x200, 0x36f, 0x256, "v_lshl_or_b32", False, False, dst(1), src(1, 1, 1)), 1207 ( -1, -1, -1, 0x201, 0x371, 0x257, "v_and_or_b32", False, False, dst(1), src(1, 1, 1)), 1208 ( -1, -1, -1, 0x202, 0x372, 0x258, "v_or3_b32", False, False, dst(1), src(1, 1, 1)), 1209 ( -1, -1, -1, 0x203, -1, -1, "v_mad_f16", True, True, dst(1), src(1, 1, 1)), 1210 ( -1, -1, -1, 0x204, 0x340, 0x241, "v_mad_u16", False, False, dst(1), src(1, 1, 1)), 1211 ( -1, -1, -1, 0x205, 0x35e, 0x253, "v_mad_i16", False, False, dst(1), src(1, 1, 1)), 1212 ( -1, -1, -1, 0x206, 0x34b, 0x248, "v_fma_f16", True, True, dst(1), src(1, 1, 1)), 1213 ( -1, -1, -1, 0x207, 0x35f, 0x254, "v_div_fixup_f16", True, True, dst(1), src(1, 1, 1)), 1214 ( -1, -1, 0x274, 0x274, 0x342, -1, "v_interp_p1ll_f16", True, True, dst(1), src(1, M0)), 1215 ( -1, -1, 0x275, 0x275, 0x343, -1, "v_interp_p1lv_f16", True, True, dst(1), src(1, M0, 1)), 1216 ( -1, -1, 0x276, 0x276, -1, -1, "v_interp_p2_legacy_f16", True, True, dst(1), src(1, M0, 1)), 1217 ( -1, -1, -1, 0x277, 0x35a, -1, "v_interp_p2_f16", True, True, dst(1), src(1, M0, 1)), 1218 (0x12b, 0x12b, 0x288, 0x288, 0x362, 0x31c, "v_ldexp_f32", False, True, dst(1), src(1, 1)), 1219 ( -1, -1, 0x289, 0x289, 0x360, 0x360, "v_readlane_b32_e64", False, False, dst(1), src(1, 1)), 1220 ( -1, -1, 0x28a, 0x28a, 0x361, 0x361, "v_writelane_b32_e64", False, False, dst(1), src(1, 1, 1)), 1221 (0x122, 0x122, 0x28b, 0x28b, 0x364, 0x31e, "v_bcnt_u32_b32", False, False, dst(1), src(1, 1)), 1222 (0x123, 0x123, 0x28c, 0x28c, 0x365, 0x31f, "v_mbcnt_lo_u32_b32", False, False, dst(1), src(1, 1)), 1223 ( -1, -1, 0x28d, 0x28d, 0x366, 0x320, "v_mbcnt_hi_u32_b32_e64", False, False, dst(1), src(1, 1)), 1224 ( -1, -1, 0x28f, 0x28f, 0x2ff, 0x33c, "v_lshlrev_b64", False, False, dst(2), src(1, 2), InstrClass.Valu64), 1225 ( -1, -1, 0x290, 0x290, 0x300, 0x33d, "v_lshrrev_b64", False, False, dst(2), src(1, 2), InstrClass.Valu64), 1226 ( -1, -1, 0x291, 0x291, 0x301, 0x33e, "v_ashrrev_i64", False, False, dst(2), src(1, 2), InstrClass.Valu64), 1227 (0x11e, 0x11e, 0x293, 0x293, 0x363, 0x31d, "v_bfm_b32", False, False, dst(1), src(1, 1)), 1228 (0x12d, 0x12d, 0x294, 0x294, 0x368, 0x321, "v_cvt_pknorm_i16_f32", True, False, dst(1), src(1, 1)), 1229 (0x12e, 0x12e, 0x295, 0x295, 0x369, 0x322, "v_cvt_pknorm_u16_f32", True, False, dst(1), src(1, 1)), 1230 ( -1, -1, 0x296, 0x296, -1, -1, "v_cvt_pkrtz_f16_f32_e64", True, False, dst(1), src(1, 1)), 1231 (0x130, 0x130, 0x297, 0x297, 0x36a, 0x323, "v_cvt_pk_u16_u32", False, False, dst(1), src(1, 1)), 1232 (0x131, 0x131, 0x298, 0x298, 0x36b, 0x324, "v_cvt_pk_i16_i32", False, False, dst(1), src(1, 1)), 1233 ( -1, -1, -1, 0x299, 0x312, 0x312, "v_cvt_pknorm_i16_f16", True, False, dst(1), src(1, 1)), #v_cvt_pk_norm_i16_f32 in GFX11 1234 ( -1, -1, -1, 0x29a, 0x313, 0x313, "v_cvt_pknorm_u16_f16", True, False, dst(1), src(1, 1)), #v_cvt_pk_norm_u16_f32 in GFX11 1235 ( -1, -1, -1, 0x29c, 0x37f, 0x326, "v_add_i32", False, False, dst(1), src(1, 1)), 1236 ( -1, -1, -1, 0x29d, 0x376, 0x325, "v_sub_i32", False, False, dst(1), src(1, 1)), 1237 ( -1, -1, -1, 0x29e, 0x30d, 0x30d, "v_add_i16", False, False, dst(1), src(1, 1)), 1238 ( -1, -1, -1, 0x29f, 0x30e, 0x30e, "v_sub_i16", False, False, dst(1), src(1, 1)), 1239 ( -1, -1, -1, 0x2a0, 0x311, 0x311, "v_pack_b32_f16", True, False, dst(1), src(1, 1)), 1240 ( -1, -1, -1, -1, 0x178, 0x240, "v_xor3_b32", False, False, dst(1), src(1, 1, 1)), 1241 ( -1, -1, -1, -1, 0x377, 0x25b, "v_permlane16_b32", False, False, dst(1), src(1, 1, 1)), 1242 ( -1, -1, -1, -1, 0x378, 0x25c, "v_permlanex16_b32", False, False, dst(1), src(1, 1, 1)), 1243 ( -1, -1, -1, -1, 0x30f, 0x300, "v_add_co_u32_e64", False, False, dst(1, VCC), src(1, 1)), 1244 ( -1, -1, -1, -1, 0x310, 0x301, "v_sub_co_u32_e64", False, False, dst(1, VCC), src(1, 1)), 1245 ( -1, -1, -1, -1, 0x319, 0x302, "v_subrev_co_u32_e64", False, False, dst(1, VCC), src(1, 1)), 1246 ( -1, -1, -1, -1, 0x303, 0x303, "v_add_u16_e64", False, False, dst(1), src(1, 1)), 1247 ( -1, -1, -1, -1, 0x304, 0x304, "v_sub_u16_e64", False, False, dst(1), src(1, 1)), 1248 ( -1, -1, -1, -1, 0x305, 0x305, "v_mul_lo_u16_e64", False, False, dst(1), src(1, 1)), 1249 ( -1, -1, -1, -1, 0x309, 0x309, "v_max_u16_e64", False, False, dst(1), src(1, 1)), 1250 ( -1, -1, -1, -1, 0x30a, 0x30a, "v_max_i16_e64", False, False, dst(1), src(1, 1)), 1251 ( -1, -1, -1, -1, 0x30b, 0x30b, "v_min_u16_e64", False, False, dst(1), src(1, 1)), 1252 ( -1, -1, -1, -1, 0x30c, 0x30c, "v_min_i16_e64", False, False, dst(1), src(1, 1)), 1253 ( -1, -1, -1, -1, 0x307, 0x339, "v_lshrrev_b16_e64", False, False, dst(1), src(1, 1)), 1254 ( -1, -1, -1, -1, 0x308, 0x33a, "v_ashrrev_i16_e64", False, False, dst(1), src(1, 1)), 1255 ( -1, -1, -1, -1, 0x314, 0x338, "v_lshlrev_b16_e64", False, False, dst(1), src(1, 1)), 1256 ( -1, -1, -1, -1, 0x140, 0x209, "v_fma_legacy_f32", True, True, dst(1), src(1, 1, 1), InstrClass.ValuFma), #GFX10.3+, v_fma_dx9_zero_f32 in GFX11 1257 ( -1, -1, -1, -1, -1, 0x25e, "v_maxmin_f32", True, True, dst(1), src(1, 1, 1)), 1258 ( -1, -1, -1, -1, -1, 0x25f, "v_minmax_f32", True, True, dst(1), src(1, 1, 1)), 1259 ( -1, -1, -1, -1, -1, 0x260, "v_maxmin_f16", True, True, dst(1), src(1, 1, 1)), 1260 ( -1, -1, -1, -1, -1, 0x261, "v_minmax_f16", True, True, dst(1), src(1, 1, 1)), 1261 ( -1, -1, -1, -1, -1, 0x262, "v_maxmin_u32", False, False, dst(1), src(1, 1, 1)), 1262 ( -1, -1, -1, -1, -1, 0x263, "v_minmax_u32", False, False, dst(1), src(1, 1, 1)), 1263 ( -1, -1, -1, -1, -1, 0x264, "v_maxmin_i32", False, False, dst(1), src(1, 1, 1)), 1264 ( -1, -1, -1, -1, -1, 0x265, "v_minmax_i32", False, False, dst(1), src(1, 1, 1)), 1265 ( -1, -1, -1, -1, -1, 0x266, "v_dot2_f16_f16", False, False, dst(1), src(1, 1, 1)), 1266 ( -1, -1, -1, -1, -1, 0x267, "v_dot2_bf16_bf16", False, False, dst(1), src(1, 1, 1)), 1267 ( -1, -1, -1, -1, -1, 0x306, "v_cvt_pk_i16_f32", True, False, dst(1), src(1, 1)), 1268 ( -1, -1, -1, -1, -1, 0x307, "v_cvt_pk_u16_f32", True, False, dst(1), src(1, 1)), 1269 ( -1, -1, -1, -1, -1, 0x362, "v_and_b16", False, False, dst(1), src(1, 1)), 1270 ( -1, -1, -1, -1, -1, 0x363, "v_or_b16", False, False, dst(1), src(1, 1)), 1271 ( -1, -1, -1, -1, -1, 0x364, "v_xor_b16", False, False, dst(1), src(1, 1)), 1272 ( -1, -1, -1, -1, -1, 0x25d, "v_cndmask_b16", True, False, dst(1), src(1, 1, VCC)), 1273} 1274for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod, defs, ops, cls) in default_class(VOP3, InstrClass.Valu32): 1275 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP3, cls, in_mod, out_mod, definitions = defs, operands = ops) 1276 1277 1278VOPD = { 1279 (0x00, "v_dual_fmac_f32"), 1280 (0x01, "v_dual_fmaak_f32"), 1281 (0x02, "v_dual_fmamk_f32"), 1282 (0x03, "v_dual_mul_f32"), 1283 (0x04, "v_dual_add_f32"), 1284 (0x05, "v_dual_sub_f32"), 1285 (0x06, "v_dual_subrev_f32"), 1286 (0x07, "v_dual_mul_dx9_zero_f32"), 1287 (0x08, "v_dual_mov_b32"), 1288 (0x09, "v_dual_cndmask_b32"), 1289 (0x0a, "v_dual_max_f32"), 1290 (0x0b, "v_dual_min_f32"), 1291 (0x0c, "v_dual_dot2acc_f32_f16"), 1292 (0x0d, "v_dual_dot2acc_f32_bf16"), 1293 (0x10, "v_dual_add_nc_u32"), 1294 (0x11, "v_dual_lshlrev_b32"), 1295 (0x12, "v_dual_and_b32"), 1296} 1297for gfx11, name in VOPD: 1298 opcode(name, -1, -1, -1, gfx11, format = Format.VOPD, cls = InstrClass.Valu32) 1299 1300 1301# DS instructions: 3 inputs (1 addr, 2 data), 1 output 1302DS = { 1303 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "ds_add_u32"), 1304 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "ds_sub_u32"), 1305 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "ds_rsub_u32"), 1306 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "ds_inc_u32"), 1307 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "ds_dec_u32"), 1308 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "ds_min_i32"), 1309 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "ds_max_i32"), 1310 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "ds_min_u32"), 1311 (0x08, 0x08, 0x08, 0x08, 0x08, 0x08, "ds_max_u32"), 1312 (0x09, 0x09, 0x09, 0x09, 0x09, 0x09, "ds_and_b32"), 1313 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "ds_or_b32"), 1314 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "ds_xor_b32"), 1315 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "ds_mskor_b32"), 1316 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "ds_write_b32"), #ds_store_b32 in GFX11 1317 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "ds_write2_b32"), #ds_store_2addr_b32 in GFX11 1318 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "ds_write2st64_b32"), #ds_store_2addr_stride64_b32 in GFX11 1319 (0x10, 0x10, 0x10, 0x10, 0x10, 0x10, "ds_cmpst_b32"), #ds_cmpstore_b32 in GFX11 1320 (0x11, 0x11, 0x11, 0x11, 0x11, 0x11, "ds_cmpst_f32"), #ds_cmpstore_f32 in GFX11 1321 (0x12, 0x12, 0x12, 0x12, 0x12, 0x12, "ds_min_f32"), 1322 (0x13, 0x13, 0x13, 0x13, 0x13, 0x13, "ds_max_f32"), 1323 ( -1, 0x14, 0x14, 0x14, 0x14, 0x14, "ds_nop"), 1324 ( -1, -1, 0x15, 0x15, 0x15, 0x15, "ds_add_f32"), 1325 ( -1, -1, 0x1d, 0x1d, 0xb0, 0xb0, "ds_write_addtid_b32"), #ds_store_addtid_b32 in GFX11 1326 (0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, "ds_write_b8"), #ds_store_b8 in GFX11 1327 (0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, "ds_write_b16"), #ds_store_b16 in GFX11 1328 (0x20, 0x20, 0x20, 0x20, 0x20, 0x20, "ds_add_rtn_u32"), 1329 (0x21, 0x21, 0x21, 0x21, 0x21, 0x21, "ds_sub_rtn_u32"), 1330 (0x22, 0x22, 0x22, 0x22, 0x22, 0x22, "ds_rsub_rtn_u32"), 1331 (0x23, 0x23, 0x23, 0x23, 0x23, 0x23, "ds_inc_rtn_u32"), 1332 (0x24, 0x24, 0x24, 0x24, 0x24, 0x24, "ds_dec_rtn_u32"), 1333 (0x25, 0x25, 0x25, 0x25, 0x25, 0x25, "ds_min_rtn_i32"), 1334 (0x26, 0x26, 0x26, 0x26, 0x26, 0x26, "ds_max_rtn_i32"), 1335 (0x27, 0x27, 0x27, 0x27, 0x27, 0x27, "ds_min_rtn_u32"), 1336 (0x28, 0x28, 0x28, 0x28, 0x28, 0x28, "ds_max_rtn_u32"), 1337 (0x29, 0x29, 0x29, 0x29, 0x29, 0x29, "ds_and_rtn_b32"), 1338 (0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, "ds_or_rtn_b32"), 1339 (0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, "ds_xor_rtn_b32"), 1340 (0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, "ds_mskor_rtn_b32"), 1341 (0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, "ds_wrxchg_rtn_b32"), #ds_storexchg_rtn_b32 in GFX11 1342 (0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, "ds_wrxchg2_rtn_b32"), #ds_storexchg_2addr_rtn_b32 in GFX11 1343 (0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, "ds_wrxchg2st64_rtn_b32"), #ds_storexchg_2addr_stride64_rtn_b32 in GFX11 1344 (0x30, 0x30, 0x30, 0x30, 0x30, 0x30, "ds_cmpst_rtn_b32"), #ds_cmpstore_rtn_b32 in GFX11 1345 (0x31, 0x31, 0x31, 0x31, 0x31, 0x31, "ds_cmpst_rtn_f32"), #ds_cmpstore_rtn_f32 in GFX11 1346 (0x32, 0x32, 0x32, 0x32, 0x32, 0x32, "ds_min_rtn_f32"), 1347 (0x33, 0x33, 0x33, 0x33, 0x33, 0x33, "ds_max_rtn_f32"), 1348 ( -1, 0x34, 0x34, 0x34, 0x34, 0x34, "ds_wrap_rtn_b32"), 1349 ( -1, -1, 0x35, 0x35, 0x55, 0x79, "ds_add_rtn_f32"), 1350 (0x36, 0x36, 0x36, 0x36, 0x36, 0x36, "ds_read_b32"), #ds_load_b32 in GFX11 1351 (0x37, 0x37, 0x37, 0x37, 0x37, 0x37, "ds_read2_b32"), #ds_load_2addr_b32 in GFX11 1352 (0x38, 0x38, 0x38, 0x38, 0x38, 0x38, "ds_read2st64_b32"), #ds_load_2addr_stride64_b32 in GFX11 1353 (0x39, 0x39, 0x39, 0x39, 0x39, 0x39, "ds_read_i8"), #ds_load_i8 in GFX11 1354 (0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, "ds_read_u8"), #ds_load_u8 in GFX11 1355 (0x3b, 0x3b, 0x3b, 0x3b, 0x3b, 0x3b, "ds_read_i16"), #ds_load_i16 in GFX11 1356 (0x3c, 0x3c, 0x3c, 0x3c, 0x3c, 0x3c, "ds_read_u16"), #ds_load_u16 in GFX11 1357 (0x35, 0x35, 0x3d, 0x3d, 0x35, 0x35, "ds_swizzle_b32"), #data1 & offset, no addr/data2 1358 ( -1, -1, 0x3e, 0x3e, 0xb2, 0xb2, "ds_permute_b32"), 1359 ( -1, -1, 0x3f, 0x3f, 0xb3, 0xb3, "ds_bpermute_b32"), 1360 (0x40, 0x40, 0x40, 0x40, 0x40, 0x40, "ds_add_u64"), 1361 (0x41, 0x41, 0x41, 0x41, 0x41, 0x41, "ds_sub_u64"), 1362 (0x42, 0x42, 0x42, 0x42, 0x42, 0x42, "ds_rsub_u64"), 1363 (0x43, 0x43, 0x43, 0x43, 0x43, 0x43, "ds_inc_u64"), 1364 (0x44, 0x44, 0x44, 0x44, 0x44, 0x44, "ds_dec_u64"), 1365 (0x45, 0x45, 0x45, 0x45, 0x45, 0x45, "ds_min_i64"), 1366 (0x46, 0x46, 0x46, 0x46, 0x46, 0x46, "ds_max_i64"), 1367 (0x47, 0x47, 0x47, 0x47, 0x47, 0x47, "ds_min_u64"), 1368 (0x48, 0x48, 0x48, 0x48, 0x48, 0x48, "ds_max_u64"), 1369 (0x49, 0x49, 0x49, 0x49, 0x49, 0x49, "ds_and_b64"), 1370 (0x4a, 0x4a, 0x4a, 0x4a, 0x4a, 0x4a, "ds_or_b64"), 1371 (0x4b, 0x4b, 0x4b, 0x4b, 0x4b, 0x4b, "ds_xor_b64"), 1372 (0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, "ds_mskor_b64"), 1373 (0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, "ds_write_b64"), #ds_store_b64 in GFX11 1374 (0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, "ds_write2_b64"), #ds_store_2addr_b64 in GFX11 1375 (0x4f, 0x4f, 0x4f, 0x4f, 0x4f, 0x4f, "ds_write2st64_b64"), #ds_store_2addr_stride64_b64 in GFX11 1376 (0x50, 0x50, 0x50, 0x50, 0x50, 0x50, "ds_cmpst_b64"), #ds_cmpstore_b64 in GFX11 1377 (0x51, 0x51, 0x51, 0x51, 0x51, 0x51, "ds_cmpst_f64"), #ds_cmpstore_f64 in GFX11 1378 (0x52, 0x52, 0x52, 0x52, 0x52, 0x52, "ds_min_f64"), 1379 (0x53, 0x53, 0x53, 0x53, 0x53, 0x53, "ds_max_f64"), 1380 ( -1, -1, -1, 0x54, 0xa0, 0xa0, "ds_write_b8_d16_hi"), #ds_store_b8_d16_hi in GFX11 1381 ( -1, -1, -1, 0x55, 0xa1, 0xa1, "ds_write_b16_d16_hi"), #ds_store_b16_d16_hi in GFX11 1382 ( -1, -1, -1, 0x56, 0xa2, 0xa2, "ds_read_u8_d16"), #ds_load_u8_d16 in GFX11 1383 ( -1, -1, -1, 0x57, 0xa3, 0xa3, "ds_read_u8_d16_hi"), #ds_load_u8_d16_hi in GFX11 1384 ( -1, -1, -1, 0x58, 0xa4, 0xa4, "ds_read_i8_d16"), #ds_load_i8_d16 in GFX11 1385 ( -1, -1, -1, 0x59, 0xa5, 0xa5, "ds_read_i8_d16_hi"), #ds_load_i8_d16_hi in GFX11 1386 ( -1, -1, -1, 0x5a, 0xa6, 0xa6, "ds_read_u16_d16"), #ds_load_u16_d16 in GFX11 1387 ( -1, -1, -1, 0x5b, 0xa7, 0xa7, "ds_read_u16_d16_hi"), #ds_load_u16_d16_hi in GFX11 1388 (0x60, 0x60, 0x60, 0x60, 0x60, 0x60, "ds_add_rtn_u64"), 1389 (0x61, 0x61, 0x61, 0x61, 0x61, 0x61, "ds_sub_rtn_u64"), 1390 (0x62, 0x62, 0x62, 0x62, 0x62, 0x62, "ds_rsub_rtn_u64"), 1391 (0x63, 0x63, 0x63, 0x63, 0x63, 0x63, "ds_inc_rtn_u64"), 1392 (0x64, 0x64, 0x64, 0x64, 0x64, 0x64, "ds_dec_rtn_u64"), 1393 (0x65, 0x65, 0x65, 0x65, 0x65, 0x65, "ds_min_rtn_i64"), 1394 (0x66, 0x66, 0x66, 0x66, 0x66, 0x66, "ds_max_rtn_i64"), 1395 (0x67, 0x67, 0x67, 0x67, 0x67, 0x67, "ds_min_rtn_u64"), 1396 (0x68, 0x68, 0x68, 0x68, 0x68, 0x68, "ds_max_rtn_u64"), 1397 (0x69, 0x69, 0x69, 0x69, 0x69, 0x69, "ds_and_rtn_b64"), 1398 (0x6a, 0x6a, 0x6a, 0x6a, 0x6a, 0x6a, "ds_or_rtn_b64"), 1399 (0x6b, 0x6b, 0x6b, 0x6b, 0x6b, 0x6b, "ds_xor_rtn_b64"), 1400 (0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, "ds_mskor_rtn_b64"), 1401 (0x6d, 0x6d, 0x6d, 0x6d, 0x6d, 0x6d, "ds_wrxchg_rtn_b64"), #ds_storexchg_rtn_b64 in GFX11 1402 (0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, "ds_wrxchg2_rtn_b64"), #ds_storexchg_2addr_rtn_b64 in GFX11 1403 (0x6f, 0x6f, 0x6f, 0x6f, 0x6f, 0x6f, "ds_wrxchg2st64_rtn_b64"), #ds_storexchg_2addr_stride64_rtn_b64 in GFX11 1404 (0x70, 0x70, 0x70, 0x70, 0x70, 0x70, "ds_cmpst_rtn_b64"), #ds_cmpstore_rtn_b64 in GFX11 1405 (0x71, 0x71, 0x71, 0x71, 0x71, 0x71, "ds_cmpst_rtn_f64"), #ds_cmpstore_rtn_f64 in GFX11 1406 (0x72, 0x72, 0x72, 0x72, 0x72, 0x72, "ds_min_rtn_f64"), 1407 (0x73, 0x73, 0x73, 0x73, 0x73, 0x73, "ds_max_rtn_f64"), 1408 (0x76, 0x76, 0x76, 0x76, 0x76, 0x76, "ds_read_b64"), #ds_load_b64 in GFX11 1409 (0x77, 0x77, 0x77, 0x77, 0x77, 0x77, "ds_read2_b64"), #ds_load_2addr_b64 in GFX11 1410 (0x78, 0x78, 0x78, 0x78, 0x78, 0x78, "ds_read2st64_b64"), #ds_load_2addr_stride64_b64 in GFX11 1411 ( -1, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, "ds_condxchg32_rtn_b64"), 1412 (0x80, 0x80, 0x80, 0x80, 0x80, -1, "ds_add_src2_u32"), 1413 (0x81, 0x81, 0x81, 0x81, 0x81, -1, "ds_sub_src2_u32"), 1414 (0x82, 0x82, 0x82, 0x82, 0x82, -1, "ds_rsub_src2_u32"), 1415 (0x83, 0x83, 0x83, 0x83, 0x83, -1, "ds_inc_src2_u32"), 1416 (0x84, 0x84, 0x84, 0x84, 0x84, -1, "ds_dec_src2_u32"), 1417 (0x85, 0x85, 0x85, 0x85, 0x85, -1, "ds_min_src2_i32"), 1418 (0x86, 0x86, 0x86, 0x86, 0x86, -1, "ds_max_src2_i32"), 1419 (0x87, 0x87, 0x87, 0x87, 0x87, -1, "ds_min_src2_u32"), 1420 (0x88, 0x88, 0x88, 0x88, 0x88, -1, "ds_max_src2_u32"), 1421 (0x89, 0x89, 0x89, 0x89, 0x89, -1, "ds_and_src2_b32"), 1422 (0x8a, 0x8a, 0x8a, 0x8a, 0x8a, -1, "ds_or_src2_b32"), 1423 (0x8b, 0x8b, 0x8b, 0x8b, 0x8b, -1, "ds_xor_src2_b32"), 1424 (0x8d, 0x8d, 0x8d, 0x8d, 0x8d, -1, "ds_write_src2_b32"), 1425 (0x92, 0x92, 0x92, 0x92, 0x92, -1, "ds_min_src2_f32"), 1426 (0x93, 0x93, 0x93, 0x93, 0x93, -1, "ds_max_src2_f32"), 1427 ( -1, -1, 0x95, 0x95, 0x95, -1, "ds_add_src2_f32"), 1428 ( -1, 0x18, 0x98, 0x98, 0x18, 0x18, "ds_gws_sema_release_all"), 1429 (0x19, 0x19, 0x99, 0x99, 0x19, 0x19, "ds_gws_init"), 1430 (0x1a, 0x1a, 0x9a, 0x9a, 0x1a, 0x1a, "ds_gws_sema_v"), 1431 (0x1b, 0x1b, 0x9b, 0x9b, 0x1b, 0x1b, "ds_gws_sema_br"), 1432 (0x1c, 0x1c, 0x9c, 0x9c, 0x1c, 0x1c, "ds_gws_sema_p"), 1433 (0x1d, 0x1d, 0x9d, 0x9d, 0x1d, 0x1d, "ds_gws_barrier"), 1434 ( -1, -1, 0xb6, 0xb6, 0xb1, 0xb1, "ds_read_addtid_b32"), #ds_load_addtid_b32 in GFX11 1435 (0x3d, 0x3d, 0xbd, 0xbd, 0x3d, 0x3d, "ds_consume"), 1436 (0x3e, 0x3e, 0xbe, 0xbe, 0x3e, 0x3e, "ds_append"), 1437 (0x3f, 0x3f, 0xbf, 0xbf, 0x3f, 0x3f, "ds_ordered_count"), 1438 (0xc0, 0xc0, 0xc0, 0xc0, 0xc0, -1, "ds_add_src2_u64"), 1439 (0xc1, 0xc1, 0xc1, 0xc1, 0xc1, -1, "ds_sub_src2_u64"), 1440 (0xc2, 0xc2, 0xc2, 0xc2, 0xc2, -1, "ds_rsub_src2_u64"), 1441 (0xc3, 0xc3, 0xc3, 0xc3, 0xc3, -1, "ds_inc_src2_u64"), 1442 (0xc4, 0xc4, 0xc4, 0xc4, 0xc4, -1, "ds_dec_src2_u64"), 1443 (0xc5, 0xc5, 0xc5, 0xc5, 0xc5, -1, "ds_min_src2_i64"), 1444 (0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -1, "ds_max_src2_i64"), 1445 (0xc7, 0xc7, 0xc7, 0xc7, 0xc7, -1, "ds_min_src2_u64"), 1446 (0xc8, 0xc8, 0xc8, 0xc8, 0xc8, -1, "ds_max_src2_u64"), 1447 (0xc9, 0xc9, 0xc9, 0xc9, 0xc9, -1, "ds_and_src2_b64"), 1448 (0xca, 0xca, 0xca, 0xca, 0xca, -1, "ds_or_src2_b64"), 1449 (0xcb, 0xcb, 0xcb, 0xcb, 0xcb, -1, "ds_xor_src2_b64"), 1450 (0xcd, 0xcd, 0xcd, 0xcd, 0xcd, -1, "ds_write_src2_b64"), 1451 (0xd2, 0xd2, 0xd2, 0xd2, 0xd2, -1, "ds_min_src2_f64"), 1452 (0xd3, 0xd3, 0xd3, 0xd3, 0xd3, -1, "ds_max_src2_f64"), 1453 ( -1, 0xde, 0xde, 0xde, 0xde, 0xde, "ds_write_b96"), #ds_store_b96 in GFX11 1454 ( -1, 0xdf, 0xdf, 0xdf, 0xdf, 0xdf, "ds_write_b128"), #ds_store_b128 in GFX11 1455 ( -1, 0xfd, 0xfd, -1, -1, -1, "ds_condxchg32_rtn_b128"), 1456 ( -1, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, "ds_read_b96"), #ds_load_b96 in GFX11 1457 ( -1, 0xff, 0xff, 0xff, 0xff, 0xff, "ds_read_b128"), #ds_load_b128 in GFX11 1458 ( -1, -1, -1, -1, -1, 0x7a, "ds_add_gs_reg_rtn"), 1459 ( -1, -1, -1, -1, -1, 0x7b, "ds_sub_gs_reg_rtn"), 1460} 1461for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in DS: 1462 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.DS, InstrClass.DS) 1463 1464 1465# LDSDIR instructions: 1466LDSDIR = { 1467 (0x00, "lds_param_load"), 1468 (0x01, "lds_direct_load"), 1469} 1470for (code, name) in LDSDIR: 1471 opcode(name, -1, -1, -1, code, Format.LDSDIR, InstrClass.DS) 1472 1473# MUBUF instructions: 1474MUBUF = { 1475 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "buffer_load_format_x"), 1476 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "buffer_load_format_xy"), 1477 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "buffer_load_format_xyz"), 1478 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "buffer_load_format_xyzw"), 1479 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "buffer_store_format_x"), 1480 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "buffer_store_format_xy"), 1481 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "buffer_store_format_xyz"), 1482 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "buffer_store_format_xyzw"), 1483 ( -1, -1, 0x08, 0x08, 0x80, 0x08, "buffer_load_format_d16_x"), 1484 ( -1, -1, 0x09, 0x09, 0x81, 0x09, "buffer_load_format_d16_xy"), 1485 ( -1, -1, 0x0a, 0x0a, 0x82, 0x0a, "buffer_load_format_d16_xyz"), 1486 ( -1, -1, 0x0b, 0x0b, 0x83, 0x0b, "buffer_load_format_d16_xyzw"), 1487 ( -1, -1, 0x0c, 0x0c, 0x84, 0x0c, "buffer_store_format_d16_x"), 1488 ( -1, -1, 0x0d, 0x0d, 0x85, 0x0d, "buffer_store_format_d16_xy"), 1489 ( -1, -1, 0x0e, 0x0e, 0x86, 0x0e, "buffer_store_format_d16_xyz"), 1490 ( -1, -1, 0x0f, 0x0f, 0x87, 0x0f, "buffer_store_format_d16_xyzw"), 1491 (0x08, 0x08, 0x10, 0x10, 0x08, 0x10, "buffer_load_ubyte"), 1492 (0x09, 0x09, 0x11, 0x11, 0x09, 0x11, "buffer_load_sbyte"), 1493 (0x0a, 0x0a, 0x12, 0x12, 0x0a, 0x12, "buffer_load_ushort"), 1494 (0x0b, 0x0b, 0x13, 0x13, 0x0b, 0x13, "buffer_load_sshort"), 1495 (0x0c, 0x0c, 0x14, 0x14, 0x0c, 0x14, "buffer_load_dword"), 1496 (0x0d, 0x0d, 0x15, 0x15, 0x0d, 0x15, "buffer_load_dwordx2"), 1497 ( -1, 0x0f, 0x16, 0x16, 0x0f, 0x16, "buffer_load_dwordx3"), 1498 (0x0f, 0x0e, 0x17, 0x17, 0x0e, 0x17, "buffer_load_dwordx4"), 1499 (0x18, 0x18, 0x18, 0x18, 0x18, 0x18, "buffer_store_byte"), 1500 ( -1, -1, -1, 0x19, 0x19, 0x24, "buffer_store_byte_d16_hi"), 1501 (0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x19, "buffer_store_short"), 1502 ( -1, -1, -1, 0x1b, 0x1b, 0x25, "buffer_store_short_d16_hi"), 1503 (0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1a, "buffer_store_dword"), 1504 (0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1b, "buffer_store_dwordx2"), 1505 ( -1, 0x1f, 0x1e, 0x1e, 0x1f, 0x1c, "buffer_store_dwordx3"), 1506 (0x1e, 0x1e, 0x1f, 0x1f, 0x1e, 0x1d, "buffer_store_dwordx4"), 1507 ( -1, -1, -1, 0x20, 0x20, 0x1e, "buffer_load_ubyte_d16"), 1508 ( -1, -1, -1, 0x21, 0x21, 0x21, "buffer_load_ubyte_d16_hi"), 1509 ( -1, -1, -1, 0x22, 0x22, 0x1f, "buffer_load_sbyte_d16"), 1510 ( -1, -1, -1, 0x23, 0x23, 0x22, "buffer_load_sbyte_d16_hi"), 1511 ( -1, -1, -1, 0x24, 0x24, 0x20, "buffer_load_short_d16"), 1512 ( -1, -1, -1, 0x25, 0x25, 0x23, "buffer_load_short_d16_hi"), 1513 ( -1, -1, -1, 0x26, 0x26, 0x26, "buffer_load_format_d16_hi_x"), 1514 ( -1, -1, -1, 0x27, 0x27, 0x27, "buffer_store_format_d16_hi_x"), 1515 ( -1, -1, 0x3d, 0x3d, -1, -1, "buffer_store_lds_dword"), 1516 (0x71, 0x71, 0x3e, 0x3e, -1, -1, "buffer_wbinvl1"), 1517 (0x70, 0x70, 0x3f, 0x3f, -1, -1, "buffer_wbinvl1_vol"), 1518 (0x30, 0x30, 0x40, 0x40, 0x30, 0x33, "buffer_atomic_swap"), 1519 (0x31, 0x31, 0x41, 0x41, 0x31, 0x34, "buffer_atomic_cmpswap"), 1520 (0x32, 0x32, 0x42, 0x42, 0x32, 0x35, "buffer_atomic_add"), 1521 (0x33, 0x33, 0x43, 0x43, 0x33, 0x36, "buffer_atomic_sub"), 1522 (0x34, -1, -1, -1, -1, -1, "buffer_atomic_rsub"), 1523 (0x35, 0x35, 0x44, 0x44, 0x35, 0x38, "buffer_atomic_smin"), 1524 (0x36, 0x36, 0x45, 0x45, 0x36, 0x39, "buffer_atomic_umin"), 1525 (0x37, 0x37, 0x46, 0x46, 0x37, 0x3a, "buffer_atomic_smax"), 1526 (0x38, 0x38, 0x47, 0x47, 0x38, 0x3b, "buffer_atomic_umax"), 1527 (0x39, 0x39, 0x48, 0x48, 0x39, 0x3c, "buffer_atomic_and"), 1528 (0x3a, 0x3a, 0x49, 0x49, 0x3a, 0x3d, "buffer_atomic_or"), 1529 (0x3b, 0x3b, 0x4a, 0x4a, 0x3b, 0x3e, "buffer_atomic_xor"), 1530 (0x3c, 0x3c, 0x4b, 0x4b, 0x3c, 0x3f, "buffer_atomic_inc"), 1531 (0x3d, 0x3d, 0x4c, 0x4c, 0x3d, 0x40, "buffer_atomic_dec"), 1532 (0x3e, 0x3e, -1, -1, 0x3e, 0x50, "buffer_atomic_fcmpswap"), 1533 (0x3f, 0x3f, -1, -1, 0x3f, 0x51, "buffer_atomic_fmin"), 1534 (0x40, 0x40, -1, -1, 0x40, 0x52, "buffer_atomic_fmax"), 1535 (0x50, 0x50, 0x60, 0x60, 0x50, 0x41, "buffer_atomic_swap_x2"), 1536 (0x51, 0x51, 0x61, 0x61, 0x51, 0x42, "buffer_atomic_cmpswap_x2"), 1537 (0x52, 0x52, 0x62, 0x62, 0x52, 0x43, "buffer_atomic_add_x2"), 1538 (0x53, 0x53, 0x63, 0x63, 0x53, 0x44, "buffer_atomic_sub_x2"), 1539 (0x54, -1, -1, -1, -1, -1, "buffer_atomic_rsub_x2"), 1540 (0x55, 0x55, 0x64, 0x64, 0x55, 0x45, "buffer_atomic_smin_x2"), 1541 (0x56, 0x56, 0x65, 0x65, 0x56, 0x46, "buffer_atomic_umin_x2"), 1542 (0x57, 0x57, 0x66, 0x66, 0x57, 0x47, "buffer_atomic_smax_x2"), 1543 (0x58, 0x58, 0x67, 0x67, 0x58, 0x48, "buffer_atomic_umax_x2"), 1544 (0x59, 0x59, 0x68, 0x68, 0x59, 0x49, "buffer_atomic_and_x2"), 1545 (0x5a, 0x5a, 0x69, 0x69, 0x5a, 0x4a, "buffer_atomic_or_x2"), 1546 (0x5b, 0x5b, 0x6a, 0x6a, 0x5b, 0x4b, "buffer_atomic_xor_x2"), 1547 (0x5c, 0x5c, 0x6b, 0x6b, 0x5c, 0x4c, "buffer_atomic_inc_x2"), 1548 (0x5d, 0x5d, 0x6c, 0x6c, 0x5d, 0x4d, "buffer_atomic_dec_x2"), 1549 (0x5e, 0x5e, -1, -1, 0x5e, -1, "buffer_atomic_fcmpswap_x2"), 1550 (0x5f, 0x5f, -1, -1, 0x5f, -1, "buffer_atomic_fmin_x2"), 1551 (0x60, 0x60, -1, -1, 0x60, -1, "buffer_atomic_fmax_x2"), 1552 ( -1, -1, -1, -1, 0x71, 0x2b, "buffer_gl0_inv"), 1553 ( -1, -1, -1, -1, 0x72, 0x2c, "buffer_gl1_inv"), 1554 ( -1, -1, -1, -1, 0x34, 0x37, "buffer_atomic_csub"), #GFX10.3+. seems glc must be set. buffer_atomic_csub_u32 in GFX11 1555 ( -1, -1, -1, -1, -1, 0x31, "buffer_load_lds_b32"), 1556 ( -1, -1, -1, -1, -1, 0x32, "buffer_load_lds_format_x"), 1557 ( -1, -1, -1, -1, -1, 0x2e, "buffer_load_lds_i8"), 1558 ( -1, -1, -1, -1, -1, 0x30, "buffer_load_lds_i16"), 1559 ( -1, -1, -1, -1, -1, 0x2d, "buffer_load_lds_u8"), 1560 ( -1, -1, -1, -1, -1, 0x2f, "buffer_load_lds_u16"), 1561 ( -1, -1, -1, -1, -1, 0x56, "buffer_atomic_add_f32"), 1562} 1563for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in MUBUF: 1564 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.MUBUF, InstrClass.VMem, is_atomic = "atomic" in name) 1565 1566MTBUF = { 1567 (0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"), 1568 (0x01, 0x01, 0x01, 0x01, 0x01, 0x01, "tbuffer_load_format_xy"), 1569 (0x02, 0x02, 0x02, 0x02, 0x02, 0x02, "tbuffer_load_format_xyz"), 1570 (0x03, 0x03, 0x03, 0x03, 0x03, 0x03, "tbuffer_load_format_xyzw"), 1571 (0x04, 0x04, 0x04, 0x04, 0x04, 0x04, "tbuffer_store_format_x"), 1572 (0x05, 0x05, 0x05, 0x05, 0x05, 0x05, "tbuffer_store_format_xy"), 1573 (0x06, 0x06, 0x06, 0x06, 0x06, 0x06, "tbuffer_store_format_xyz"), 1574 (0x07, 0x07, 0x07, 0x07, 0x07, 0x07, "tbuffer_store_format_xyzw"), 1575 ( -1, -1, 0x08, 0x08, 0x08, 0x08, "tbuffer_load_format_d16_x"), 1576 ( -1, -1, 0x09, 0x09, 0x09, 0x09, "tbuffer_load_format_d16_xy"), 1577 ( -1, -1, 0x0a, 0x0a, 0x0a, 0x0a, "tbuffer_load_format_d16_xyz"), 1578 ( -1, -1, 0x0b, 0x0b, 0x0b, 0x0b, "tbuffer_load_format_d16_xyzw"), 1579 ( -1, -1, 0x0c, 0x0c, 0x0c, 0x0c, "tbuffer_store_format_d16_x"), 1580 ( -1, -1, 0x0d, 0x0d, 0x0d, 0x0d, "tbuffer_store_format_d16_xy"), 1581 ( -1, -1, 0x0e, 0x0e, 0x0e, 0x0e, "tbuffer_store_format_d16_xyz"), 1582 ( -1, -1, 0x0f, 0x0f, 0x0f, 0x0f, "tbuffer_store_format_d16_xyzw"), 1583} 1584for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in MTBUF: 1585 opcode(name, gfx7, gfx9, gfx10, gfx11, Format.MTBUF, InstrClass.VMem) 1586 1587 1588IMAGE = { 1589 (0x00, 0x00, "image_load"), 1590 (0x01, 0x01, "image_load_mip"), 1591 (0x02, 0x02, "image_load_pck"), 1592 (0x03, 0x03, "image_load_pck_sgn"), 1593 (0x04, 0x04, "image_load_mip_pck"), 1594 (0x05, 0x05, "image_load_mip_pck_sgn"), 1595 (0x08, 0x06, "image_store"), 1596 (0x09, 0x07, "image_store_mip"), 1597 (0x0a, 0x08, "image_store_pck"), 1598 (0x0b, 0x09, "image_store_mip_pck"), 1599 (0x0e, 0x17, "image_get_resinfo"), 1600 (0x60, 0x38, "image_get_lod"), 1601} 1602# (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (code, code, code, code, code, name) 1603for (code, gfx11, name) in IMAGE: 1604 opcode(name, code, code, code, gfx11, Format.MIMG, InstrClass.VMem) 1605 1606opcode("image_msaa_load", -1, -1, 0x80, 0x18, Format.MIMG, InstrClass.VMem) #GFX10.3+ 1607 1608IMAGE_ATOMIC = { 1609 (0x0f, 0x0f, 0x10, 0x0a, "image_atomic_swap"), 1610 (0x10, 0x10, 0x11, 0x0b, "image_atomic_cmpswap"), 1611 (0x11, 0x11, 0x12, 0x0c, "image_atomic_add"), 1612 (0x12, 0x12, 0x13, 0x0d, "image_atomic_sub"), 1613 (0x13, -1, -1, -1, "image_atomic_rsub"), 1614 (0x14, 0x14, 0x14, 0x0e, "image_atomic_smin"), 1615 (0x15, 0x15, 0x15, 0x0f, "image_atomic_umin"), 1616 (0x16, 0x16, 0x16, 0x10, "image_atomic_smax"), 1617 (0x17, 0x17, 0x17, 0x11, "image_atomic_umax"), 1618 (0x18, 0x18, 0x18, 0x12, "image_atomic_and"), 1619 (0x19, 0x19, 0x19, 0x13, "image_atomic_or"), 1620 (0x1a, 0x1a, 0x1a, 0x14, "image_atomic_xor"), 1621 (0x1b, 0x1b, 0x1b, 0x15, "image_atomic_inc"), 1622 (0x1c, 0x1c, 0x1c, 0x16, "image_atomic_dec"), 1623 (0x1d, 0x1d, -1, -1, "image_atomic_fcmpswap"), 1624 (0x1e, 0x1e, -1, -1, "image_atomic_fmin"), 1625 (0x1f, 0x1f, -1, -1, "image_atomic_fmax"), 1626} 1627# (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (gfx6, gfx7, gfx89, gfx89, ???, gfx11, name) 1628# gfx7 and gfx10 opcodes are the same here 1629for (gfx6, gfx7, gfx89, gfx11, name) in IMAGE_ATOMIC: 1630 opcode(name, gfx7, gfx89, gfx7, gfx11, Format.MIMG, InstrClass.VMem, is_atomic = True) 1631 1632IMAGE_SAMPLE = { 1633 (0x20, 0x1b, "image_sample"), 1634 (0x21, 0x40, "image_sample_cl"), 1635 (0x22, 0x1c, "image_sample_d"), 1636 (0x23, 0x41, "image_sample_d_cl"), 1637 (0x24, 0x1d, "image_sample_l"), 1638 (0x25, 0x1e, "image_sample_b"), 1639 (0x26, 0x42, "image_sample_b_cl"), 1640 (0x27, 0x1f, "image_sample_lz"), 1641 (0x28, 0x20, "image_sample_c"), 1642 (0x29, 0x43, "image_sample_c_cl"), 1643 (0x2a, 0x21, "image_sample_c_d"), 1644 (0x2b, 0x44, "image_sample_c_d_cl"), 1645 (0x2c, 0x22, "image_sample_c_l"), 1646 (0x2d, 0x23, "image_sample_c_b"), 1647 (0x2e, 0x45, "image_sample_c_b_cl"), 1648 (0x2f, 0x24, "image_sample_c_lz"), 1649 (0x30, 0x25, "image_sample_o"), 1650 (0x31, 0x46, "image_sample_cl_o"), 1651 (0x32, 0x26, "image_sample_d_o"), 1652 (0x33, 0x47, "image_sample_d_cl_o"), 1653 (0x34, 0x27, "image_sample_l_o"), 1654 (0x35, 0x28, "image_sample_b_o"), 1655 (0x36, 0x48, "image_sample_b_cl_o"), 1656 (0x37, 0x29, "image_sample_lz_o"), 1657 (0x38, 0x2a, "image_sample_c_o"), 1658 (0x39, 0x49, "image_sample_c_cl_o"), 1659 (0x3a, 0x2b, "image_sample_c_d_o"), 1660 (0x3b, 0x4a, "image_sample_c_d_cl_o"), 1661 (0x3c, 0x2c, "image_sample_c_l_o"), 1662 (0x3d, 0x2d, "image_sample_c_b_o"), 1663 (0x3e, 0x4b, "image_sample_c_b_cl_o"), 1664 (0x3f, 0x2e, "image_sample_c_lz_o"), 1665 (0x68, -1, "image_sample_cd"), 1666 (0x69, -1, "image_sample_cd_cl"), 1667 (0x6a, -1, "image_sample_c_cd"), 1668 (0x6b, -1, "image_sample_c_cd_cl"), 1669 (0x6c, -1, "image_sample_cd_o"), 1670 (0x6d, -1, "image_sample_cd_cl_o"), 1671 (0x6e, -1, "image_sample_c_cd_o"), 1672 (0x6f, -1, "image_sample_c_cd_cl_o"), 1673} 1674# (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (code, code, code, code, code, gfx11, name) 1675for (code, gfx11, name) in IMAGE_SAMPLE: 1676 opcode(name, code, code, code, gfx11, Format.MIMG, InstrClass.VMem) 1677 1678IMAGE_SAMPLE_G16 = { 1679 (0xa2, 0x39, "image_sample_d_g16"), 1680 (0xa3, 0x5f, "image_sample_d_cl_g16"), 1681 (0xaa, 0x3a, "image_sample_c_d_g16"), 1682 (0xab, 0x54, "image_sample_c_d_cl_g16"), 1683 (0xb2, 0x3b, "image_sample_d_o_g16"), 1684 (0xb3, 0x55, "image_sample_d_cl_o_g16"), 1685 (0xba, 0x3c, "image_sample_c_d_o_g16"), 1686 (0xbb, 0x56, "image_sample_c_d_cl_o_g16"), 1687} 1688 1689# (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (-1, -1, -1, -1, code, gfx11, name) 1690for (code, gfx11, name) in IMAGE_SAMPLE_G16: 1691 opcode(name, -1, -1, code, gfx11, Format.MIMG, InstrClass.VMem) 1692 1693IMAGE_GATHER4 = { 1694 (0x40, 0x2f, "image_gather4"), 1695 (0x41, 0x60, "image_gather4_cl"), 1696 #(0x42, "image_gather4h"), VEGA only? 1697 (0x44, 0x30, "image_gather4_l"), # following instructions have different opcodes according to ISA sheet. 1698 (0x45, 0x31, "image_gather4_b"), 1699 (0x46, 0x61, "image_gather4_b_cl"), 1700 (0x47, 0x32, "image_gather4_lz"), 1701 (0x48, 0x33, "image_gather4_c"), 1702 (0x49, 0x62, "image_gather4_c_cl"), # previous instructions have different opcodes according to ISA sheet. 1703 #(0x4a, "image_gather4h_pck"), VEGA only? 1704 #(0x4b, "image_gather8h_pck"), VGEA only? 1705 (0x4c, 0x63, "image_gather4_c_l"), 1706 (0x4d, 0x64, "image_gather4_c_b"), 1707 (0x4e, 0x65, "image_gather4_c_b_cl"), 1708 (0x4f, 0x34, "image_gather4_c_lz"), 1709 (0x50, 0x35, "image_gather4_o"), 1710 (0x51, -1, "image_gather4_cl_o"), 1711 (0x54, -1, "image_gather4_l_o"), 1712 (0x55, -1, "image_gather4_b_o"), 1713 (0x56, -1, "image_gather4_b_cl_o"), 1714 (0x57, 0x36, "image_gather4_lz_o"), 1715 (0x58, -1, "image_gather4_c_o"), 1716 (0x59, -1, "image_gather4_c_cl_o"), 1717 (0x5c, -1, "image_gather4_c_l_o"), 1718 (0x5d, -1, "image_gather4_c_b_o"), 1719 (0x5e, -1, "image_gather4_c_b_cl_o"), 1720 (0x5f, 0x37, "image_gather4_c_lz_o"), 1721} 1722# (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) = (code, code, code, code, code, gfx11, name) 1723for (code, gfx11, name) in IMAGE_GATHER4: 1724 opcode(name, code, code, code, gfx11, Format.MIMG, InstrClass.VMem) 1725 1726opcode("image_bvh_intersect_ray", -1, -1, 0xe6, 0x19, Format.MIMG, InstrClass.VMem) 1727opcode("image_bvh64_intersect_ray", -1, -1, 0xe7, 0x1a, Format.MIMG, InstrClass.VMem) 1728 1729FLAT = { 1730 #GFX7, GFX89,GFX10,GFX11 1731 (0x08, 0x10, 0x08, 0x10, "flat_load_ubyte"), 1732 (0x09, 0x11, 0x09, 0x11, "flat_load_sbyte"), 1733 (0x0a, 0x12, 0x0a, 0x12, "flat_load_ushort"), 1734 (0x0b, 0x13, 0x0b, 0x13, "flat_load_sshort"), 1735 (0x0c, 0x14, 0x0c, 0x14, "flat_load_dword"), 1736 (0x0d, 0x15, 0x0d, 0x15, "flat_load_dwordx2"), 1737 (0x0f, 0x16, 0x0f, 0x16, "flat_load_dwordx3"), 1738 (0x0e, 0x17, 0x0e, 0x17, "flat_load_dwordx4"), 1739 (0x18, 0x18, 0x18, 0x18, "flat_store_byte"), 1740 ( -1, 0x19, 0x19, 0x24, "flat_store_byte_d16_hi"), 1741 (0x1a, 0x1a, 0x1a, 0x19, "flat_store_short"), 1742 ( -1, 0x1b, 0x1b, 0x25, "flat_store_short_d16_hi"), 1743 (0x1c, 0x1c, 0x1c, 0x1a, "flat_store_dword"), 1744 (0x1d, 0x1d, 0x1d, 0x1b, "flat_store_dwordx2"), 1745 (0x1f, 0x1e, 0x1f, 0x1c, "flat_store_dwordx3"), 1746 (0x1e, 0x1f, 0x1e, 0x1d, "flat_store_dwordx4"), 1747 ( -1, 0x20, 0x20, 0x1e, "flat_load_ubyte_d16"), 1748 ( -1, 0x21, 0x21, 0x21, "flat_load_ubyte_d16_hi"), 1749 ( -1, 0x22, 0x22, 0x1f, "flat_load_sbyte_d16"), 1750 ( -1, 0x23, 0x23, 0x22, "flat_load_sbyte_d16_hi"), 1751 ( -1, 0x24, 0x24, 0x20, "flat_load_short_d16"), 1752 ( -1, 0x25, 0x25, 0x23, "flat_load_short_d16_hi"), 1753 (0x30, 0x40, 0x30, 0x33, "flat_atomic_swap"), 1754 (0x31, 0x41, 0x31, 0x34, "flat_atomic_cmpswap"), 1755 (0x32, 0x42, 0x32, 0x35, "flat_atomic_add"), 1756 (0x33, 0x43, 0x33, 0x36, "flat_atomic_sub"), 1757 (0x35, 0x44, 0x35, 0x38, "flat_atomic_smin"), 1758 (0x36, 0x45, 0x36, 0x39, "flat_atomic_umin"), 1759 (0x37, 0x46, 0x37, 0x3a, "flat_atomic_smax"), 1760 (0x38, 0x47, 0x38, 0x3b, "flat_atomic_umax"), 1761 (0x39, 0x48, 0x39, 0x3c, "flat_atomic_and"), 1762 (0x3a, 0x49, 0x3a, 0x3d, "flat_atomic_or"), 1763 (0x3b, 0x4a, 0x3b, 0x3e, "flat_atomic_xor"), 1764 (0x3c, 0x4b, 0x3c, 0x3f, "flat_atomic_inc"), 1765 (0x3d, 0x4c, 0x3d, 0x40, "flat_atomic_dec"), 1766 (0x3e, -1, 0x3e, 0x50, "flat_atomic_fcmpswap"), 1767 (0x3f, -1, 0x3f, 0x51, "flat_atomic_fmin"), 1768 (0x40, -1, 0x40, 0x52, "flat_atomic_fmax"), 1769 (0x50, 0x60, 0x50, 0x41, "flat_atomic_swap_x2"), 1770 (0x51, 0x61, 0x51, 0x42, "flat_atomic_cmpswap_x2"), 1771 (0x52, 0x62, 0x52, 0x43, "flat_atomic_add_x2"), 1772 (0x53, 0x63, 0x53, 0x44, "flat_atomic_sub_x2"), 1773 (0x55, 0x64, 0x55, 0x45, "flat_atomic_smin_x2"), 1774 (0x56, 0x65, 0x56, 0x46, "flat_atomic_umin_x2"), 1775 (0x57, 0x66, 0x57, 0x47, "flat_atomic_smax_x2"), 1776 (0x58, 0x67, 0x58, 0x48, "flat_atomic_umax_x2"), 1777 (0x59, 0x68, 0x59, 0x49, "flat_atomic_and_x2"), 1778 (0x5a, 0x69, 0x5a, 0x4a, "flat_atomic_or_x2"), 1779 (0x5b, 0x6a, 0x5b, 0x4b, "flat_atomic_xor_x2"), 1780 (0x5c, 0x6b, 0x5c, 0x4c, "flat_atomic_inc_x2"), 1781 (0x5d, 0x6c, 0x5d, 0x4d, "flat_atomic_dec_x2"), 1782 (0x5e, -1, 0x5e, -1, "flat_atomic_fcmpswap_x2"), 1783 (0x5f, -1, 0x5f, -1, "flat_atomic_fmin_x2"), 1784 (0x60, -1, 0x60, -1, "flat_atomic_fmax_x2"), 1785 ( -1, -1, -1, 0x56, "flat_atomic_add_f32"), 1786} 1787for (gfx7, gfx8, gfx10, gfx11, name) in FLAT: 1788 opcode(name, gfx7, gfx8, gfx10, gfx11, Format.FLAT, InstrClass.VMem, is_atomic = "atomic" in name) #TODO: also LDS? 1789 1790GLOBAL = { 1791 #GFX89,GFX10,GFX11 1792 (0x10, 0x08, 0x10, "global_load_ubyte"), 1793 (0x11, 0x09, 0x11, "global_load_sbyte"), 1794 (0x12, 0x0a, 0x12, "global_load_ushort"), 1795 (0x13, 0x0b, 0x13, "global_load_sshort"), 1796 (0x14, 0x0c, 0x14, "global_load_dword"), 1797 (0x15, 0x0d, 0x15, "global_load_dwordx2"), 1798 (0x16, 0x0f, 0x16, "global_load_dwordx3"), 1799 (0x17, 0x0e, 0x17, "global_load_dwordx4"), 1800 (0x18, 0x18, 0x18, "global_store_byte"), 1801 (0x19, 0x19, 0x24, "global_store_byte_d16_hi"), 1802 (0x1a, 0x1a, 0x19, "global_store_short"), 1803 (0x1b, 0x1b, 0x25, "global_store_short_d16_hi"), 1804 (0x1c, 0x1c, 0x1a, "global_store_dword"), 1805 (0x1d, 0x1d, 0x1b, "global_store_dwordx2"), 1806 (0x1e, 0x1f, 0x1c, "global_store_dwordx3"), 1807 (0x1f, 0x1e, 0x1d, "global_store_dwordx4"), 1808 (0x20, 0x20, 0x1e, "global_load_ubyte_d16"), 1809 (0x21, 0x21, 0x21, "global_load_ubyte_d16_hi"), 1810 (0x22, 0x22, 0x1f, "global_load_sbyte_d16"), 1811 (0x23, 0x23, 0x22, "global_load_sbyte_d16_hi"), 1812 (0x24, 0x24, 0x20, "global_load_short_d16"), 1813 (0x25, 0x25, 0x23, "global_load_short_d16_hi"), 1814 (0x40, 0x30, 0x33, "global_atomic_swap"), 1815 (0x41, 0x31, 0x34, "global_atomic_cmpswap"), 1816 (0x42, 0x32, 0x35, "global_atomic_add"), 1817 (0x43, 0x33, 0x36, "global_atomic_sub"), 1818 (0x44, 0x35, 0x38, "global_atomic_smin"), 1819 (0x45, 0x36, 0x39, "global_atomic_umin"), 1820 (0x46, 0x37, 0x3a, "global_atomic_smax"), 1821 (0x47, 0x38, 0x3b, "global_atomic_umax"), 1822 (0x48, 0x39, 0x3c, "global_atomic_and"), 1823 (0x49, 0x3a, 0x3d, "global_atomic_or"), 1824 (0x4a, 0x3b, 0x3e, "global_atomic_xor"), 1825 (0x4b, 0x3c, 0x3f, "global_atomic_inc"), 1826 (0x4c, 0x3d, 0x40, "global_atomic_dec"), 1827 ( -1, 0x3e, 0x50, "global_atomic_fcmpswap"), 1828 ( -1, 0x3f, 0x51, "global_atomic_fmin"), 1829 ( -1, 0x40, 0x52, "global_atomic_fmax"), 1830 (0x60, 0x50, 0x41, "global_atomic_swap_x2"), 1831 (0x61, 0x51, 0x42, "global_atomic_cmpswap_x2"), 1832 (0x62, 0x52, 0x43, "global_atomic_add_x2"), 1833 (0x63, 0x53, 0x44, "global_atomic_sub_x2"), 1834 (0x64, 0x55, 0x45, "global_atomic_smin_x2"), 1835 (0x65, 0x56, 0x46, "global_atomic_umin_x2"), 1836 (0x66, 0x57, 0x47, "global_atomic_smax_x2"), 1837 (0x67, 0x58, 0x48, "global_atomic_umax_x2"), 1838 (0x68, 0x59, 0x49, "global_atomic_and_x2"), 1839 (0x69, 0x5a, 0x4a, "global_atomic_or_x2"), 1840 (0x6a, 0x5b, 0x4b, "global_atomic_xor_x2"), 1841 (0x6b, 0x5c, 0x4c, "global_atomic_inc_x2"), 1842 (0x6c, 0x5d, 0x4d, "global_atomic_dec_x2"), 1843 ( -1, 0x5e, -1, "global_atomic_fcmpswap_x2"), 1844 ( -1, 0x5f, -1, "global_atomic_fmin_x2"), 1845 ( -1, 0x60, -1, "global_atomic_fmax_x2"), 1846 ( -1, 0x16, 0x28, "global_load_dword_addtid"), #GFX10.3+ 1847 ( -1, 0x17, 0x29, "global_store_dword_addtid"), #GFX10.3+ 1848 ( -1, 0x34, 0x37, "global_atomic_csub"), #GFX10.3+. seems glc must be set 1849 ( -1, -1, 0x56, "global_atomic_add_f32"), 1850} 1851for (gfx8, gfx10, gfx11, name) in GLOBAL: 1852 opcode(name, -1, gfx8, gfx10, gfx11, Format.GLOBAL, InstrClass.VMem, is_atomic = "atomic" in name) 1853 1854SCRATCH = { 1855 #GFX89,GFX10,GFX11 1856 (0x10, 0x08, 0x10, "scratch_load_ubyte"), 1857 (0x11, 0x09, 0x11, "scratch_load_sbyte"), 1858 (0x12, 0x0a, 0x12, "scratch_load_ushort"), 1859 (0x13, 0x0b, 0x13, "scratch_load_sshort"), 1860 (0x14, 0x0c, 0x14, "scratch_load_dword"), 1861 (0x15, 0x0d, 0x15, "scratch_load_dwordx2"), 1862 (0x16, 0x0f, 0x16, "scratch_load_dwordx3"), 1863 (0x17, 0x0e, 0x17, "scratch_load_dwordx4"), 1864 (0x18, 0x18, 0x18, "scratch_store_byte"), 1865 (0x19, 0x19, 0x24, "scratch_store_byte_d16_hi"), 1866 (0x1a, 0x1a, 0x19, "scratch_store_short"), 1867 (0x1b, 0x1b, 0x25, "scratch_store_short_d16_hi"), 1868 (0x1c, 0x1c, 0x1a, "scratch_store_dword"), 1869 (0x1d, 0x1d, 0x1b, "scratch_store_dwordx2"), 1870 (0x1e, 0x1f, 0x1c, "scratch_store_dwordx3"), 1871 (0x1f, 0x1e, 0x1d, "scratch_store_dwordx4"), 1872 (0x20, 0x20, 0x1e, "scratch_load_ubyte_d16"), 1873 (0x21, 0x21, 0x21, "scratch_load_ubyte_d16_hi"), 1874 (0x22, 0x22, 0x1f, "scratch_load_sbyte_d16"), 1875 (0x23, 0x23, 0x22, "scratch_load_sbyte_d16_hi"), 1876 (0x24, 0x24, 0x20, "scratch_load_short_d16"), 1877 (0x25, 0x25, 0x23, "scratch_load_short_d16_hi"), 1878} 1879for (gfx8, gfx10, gfx11, name) in SCRATCH: 1880 opcode(name, -1, gfx8, gfx10, gfx11, Format.SCRATCH, InstrClass.VMem) 1881 1882# check for duplicate opcode numbers 1883for ver in ['gfx9', 'gfx10', 'gfx11']: 1884 op_to_name = {} 1885 for op in opcodes.values(): 1886 if op.format in [Format.PSEUDO, Format.PSEUDO_BRANCH, Format.PSEUDO_BARRIER, Format.PSEUDO_REDUCTION]: 1887 continue 1888 1889 num = getattr(op, 'opcode_' + ver) 1890 if num == -1: 1891 continue 1892 1893 key = (op.format, num) 1894 1895 if key in op_to_name: 1896 # exceptions 1897 names = set([op_to_name[key], op.name]) 1898 if ver in ['gfx8', 'gfx9', 'gfx11'] and names == set(['v_mul_lo_i32', 'v_mul_lo_u32']): 1899 continue 1900 # v_mad_legacy_f32 is replaced with v_fma_legacy_f32 on GFX10.3 1901 if ver == 'gfx10' and names == set(['v_mad_legacy_f32', 'v_fma_legacy_f32']): 1902 continue 1903 # v_mac_legacy_f32 is replaced with v_fmac_legacy_f32 on GFX10.3 1904 if ver == 'gfx10' and names == set(['v_mac_legacy_f32', 'v_fmac_legacy_f32']): 1905 continue 1906 1907 print('%s and %s share the same opcode number (%s)' % (op_to_name[key], op.name, ver)) 1908 sys.exit(1) 1909 else: 1910 op_to_name[key] = op.name 1911