1# 2# Copyright (c) 2018 Valve Corporation 3# 4# Permission is hereby granted, free of charge, to any person obtaining a 5# copy of this software and associated documentation files (the "Software"), 6# to deal in the Software without restriction, including without limitation 7# the rights to use, copy, modify, merge, publish, distribute, sublicense, 8# and/or sell copies of the Software, and to permit persons to whom the 9# Software is furnished to do so, subject to the following conditions: 10# 11# The above copyright notice and this permission notice (including the next 12# paragraph) shall be included in all copies or substantial portions of the 13# Software. 14# 15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21# IN THE SOFTWARE. 22# 23 24# Class that represents all the information we have about the opcode 25# NOTE: this must be kept in sync with aco_op_info 26 27import sys 28from enum import Enum 29 30class InstrClass(Enum): 31 Valu32 = 0 32 ValuConvert32 = 1 33 Valu64 = 2 34 ValuQuarterRate32 = 3 35 ValuFma = 4 36 ValuTranscendental32 = 5 37 ValuDouble = 6 38 ValuDoubleAdd = 7 39 ValuDoubleConvert = 8 40 ValuDoubleTranscendental = 9 41 Salu = 10 42 SMem = 11 43 Barrier = 12 44 Branch = 13 45 Sendmsg = 14 46 DS = 15 47 Export = 16 48 VMem = 17 49 Waitcnt = 18 50 Other = 19 51 52class Format(Enum): 53 PSEUDO = 0 54 SOP1 = 1 55 SOP2 = 2 56 SOPK = 3 57 SOPP = 4 58 SOPC = 5 59 SMEM = 6 60 DS = 8 61 MTBUF = 9 62 MUBUF = 10 63 MIMG = 11 64 EXP = 12 65 FLAT = 13 66 GLOBAL = 14 67 SCRATCH = 15 68 PSEUDO_BRANCH = 16 69 PSEUDO_BARRIER = 17 70 PSEUDO_REDUCTION = 18 71 VOP3P = 19 72 VOP1 = 1 << 8 73 VOP2 = 1 << 9 74 VOPC = 1 << 10 75 VOP3 = 1 << 11 76 VINTRP = 1 << 12 77 DPP = 1 << 13 78 SDWA = 1 << 14 79 80 def get_builder_fields(self): 81 if self == Format.SOPK: 82 return [('uint16_t', 'imm', None)] 83 elif self == Format.SOPP: 84 return [('uint32_t', 'block', '-1'), 85 ('uint32_t', 'imm', '0')] 86 elif self == Format.SMEM: 87 return [('memory_sync_info', 'sync', 'memory_sync_info()'), 88 ('bool', 'glc', 'false'), 89 ('bool', 'dlc', 'false'), 90 ('bool', 'nv', 'false')] 91 elif self == Format.DS: 92 return [('int16_t', 'offset0', '0'), 93 ('int8_t', 'offset1', '0'), 94 ('bool', 'gds', 'false')] 95 elif self == Format.MTBUF: 96 return [('unsigned', 'dfmt', None), 97 ('unsigned', 'nfmt', None), 98 ('unsigned', 'offset', None), 99 ('bool', 'offen', None), 100 ('bool', 'idxen', 'false'), 101 ('bool', 'disable_wqm', 'false'), 102 ('bool', 'glc', 'false'), 103 ('bool', 'dlc', 'false'), 104 ('bool', 'slc', 'false'), 105 ('bool', 'tfe', 'false')] 106 elif self == Format.MUBUF: 107 return [('unsigned', 'offset', None), 108 ('bool', 'offen', None), 109 ('bool', 'swizzled', 'false'), 110 ('bool', 'idxen', 'false'), 111 ('bool', 'addr64', 'false'), 112 ('bool', 'disable_wqm', 'false'), 113 ('bool', 'glc', 'false'), 114 ('bool', 'dlc', 'false'), 115 ('bool', 'slc', 'false'), 116 ('bool', 'tfe', 'false'), 117 ('bool', 'lds', 'false')] 118 elif self == Format.MIMG: 119 return [('unsigned', 'dmask', '0xF'), 120 ('bool', 'da', 'false'), 121 ('bool', 'unrm', 'true'), 122 ('bool', 'disable_wqm', 'false'), 123 ('bool', 'glc', 'false'), 124 ('bool', 'dlc', 'false'), 125 ('bool', 'slc', 'false'), 126 ('bool', 'tfe', 'false'), 127 ('bool', 'lwe', 'false'), 128 ('bool', 'r128_a16', 'false', 'r128'), 129 ('bool', 'd16', 'false')] 130 return [('unsigned', 'attribute', None), 131 ('unsigned', 'component', None)] 132 elif self == Format.EXP: 133 return [('unsigned', 'enabled_mask', None), 134 ('unsigned', 'dest', None), 135 ('bool', 'compr', 'false', 'compressed'), 136 ('bool', 'done', 'false'), 137 ('bool', 'vm', 'false', 'valid_mask')] 138 elif self == Format.PSEUDO_BRANCH: 139 return [('uint32_t', 'target0', '0', 'target[0]'), 140 ('uint32_t', 'target1', '0', 'target[1]')] 141 elif self == Format.PSEUDO_REDUCTION: 142 return [('ReduceOp', 'op', None, 'reduce_op'), 143 ('unsigned', 'cluster_size', '0')] 144 elif self == Format.PSEUDO_BARRIER: 145 return [('memory_sync_info', 'sync', None), 146 ('sync_scope', 'exec_scope', 'scope_invocation')] 147 elif self == Format.VINTRP: 148 return [('unsigned', 'attribute', None), 149 ('unsigned', 'component', None)] 150 elif self == Format.DPP: 151 return [('uint16_t', 'dpp_ctrl', None), 152 ('uint8_t', 'row_mask', '0xF'), 153 ('uint8_t', 'bank_mask', '0xF'), 154 ('bool', 'bound_ctrl', 'true')] 155 elif self == Format.VOP3P: 156 return [('uint8_t', 'opsel_lo', None), 157 ('uint8_t', 'opsel_hi', None)] 158 elif self in [Format.FLAT, Format.GLOBAL, Format.SCRATCH]: 159 return [('uint16_t', 'offset', 0), 160 ('memory_sync_info', 'sync', 'memory_sync_info()'), 161 ('bool', 'glc', 'false'), 162 ('bool', 'slc', 'false'), 163 ('bool', 'lds', 'false'), 164 ('bool', 'nv', 'false')] 165 else: 166 return [] 167 168 def get_builder_field_names(self): 169 return [f[1] for f in self.get_builder_fields()] 170 171 def get_builder_field_dests(self): 172 return [(f[3] if len(f) >= 4 else f[1]) for f in self.get_builder_fields()] 173 174 def get_builder_field_decls(self): 175 return [('%s %s=%s' % (f[0], f[1], f[2]) if f[2] != None else '%s %s' % (f[0], f[1])) for f in self.get_builder_fields()] 176 177 def get_builder_initialization(self, num_operands): 178 res = '' 179 if self == Format.SDWA: 180 for i in range(min(num_operands, 2)): 181 res += 'instr->sel[{0}] = SubdwordSel(op{0}.op.bytes(), 0, false);'.format(i) 182 res += 'instr->dst_sel = SubdwordSel(def0.bytes(), 0, false);\n' 183 return res 184 185 186class Opcode(object): 187 """Class that represents all the information we have about the opcode 188 NOTE: this must be kept in sync with aco_op_info 189 """ 190 def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic, cls): 191 """Parameters: 192 193 - name is the name of the opcode (prepend nir_op_ for the enum name) 194 - all types are strings that get nir_type_ prepended to them 195 - input_types is a list of types 196 - algebraic_properties is a space-seperated string, where nir_op_is_ is 197 prepended before each entry 198 - const_expr is an expression or series of statements that computes the 199 constant value of the opcode given the constant values of its inputs. 200 """ 201 assert isinstance(name, str) 202 assert isinstance(opcode_gfx7, int) 203 assert isinstance(opcode_gfx9, int) 204 assert isinstance(opcode_gfx10, int) 205 assert isinstance(format, Format) 206 assert isinstance(input_mod, bool) 207 assert isinstance(output_mod, bool) 208 209 self.name = name 210 self.opcode_gfx7 = opcode_gfx7 211 self.opcode_gfx9 = opcode_gfx9 212 self.opcode_gfx10 = opcode_gfx10 213 self.input_mod = "1" if input_mod else "0" 214 self.output_mod = "1" if output_mod else "0" 215 self.is_atomic = "1" if is_atomic else "0" 216 self.format = format 217 self.cls = cls 218 219 parts = name.replace('_e64', '').rsplit('_', 2) 220 op_dtype = parts[-1] 221 222 op_dtype_sizes = {'{}{}'.format(prefix, size) : size for prefix in 'biuf' for size in [64, 32, 24, 16]} 223 # inline constants are 32-bit for 16-bit integer/typeless instructions: https://reviews.llvm.org/D81841 224 op_dtype_sizes['b16'] = 32 225 op_dtype_sizes['i16'] = 32 226 op_dtype_sizes['u16'] = 32 227 228 # If we can't tell the operand size, default to 32. 229 self.operand_size = op_dtype_sizes.get(op_dtype, 32) 230 231 # exceptions for operands: 232 if 'qsad_' in name: 233 self.operand_size = 0 234 elif 'sad_' in name: 235 self.operand_size = 32 236 elif name in ['v_mad_u64_u32', 'v_mad_i64_i32']: 237 self.operand_size = 0 238 elif self.operand_size == 24: 239 self.operand_size = 32 240 elif op_dtype == 'u8' or op_dtype == 'i8': 241 self.operand_size = 32 242 elif name in ['v_cvt_f32_ubyte0', 'v_cvt_f32_ubyte1', 243 'v_cvt_f32_ubyte2', 'v_cvt_f32_ubyte3']: 244 self.operand_size = 32 245 246# global dictionary of opcodes 247opcodes = {} 248 249def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, cls = InstrClass.Other, input_mod = False, output_mod = False, is_atomic = False): 250 assert name not in opcodes 251 opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic, cls) 252 253def default_class(opcodes, cls): 254 for op in opcodes: 255 if isinstance(op[-1], InstrClass): 256 yield op 257 else: 258 yield op + (cls,) 259 260opcode("exp", 0, 0, 0, format = Format.EXP, cls = InstrClass.Export) 261opcode("p_parallelcopy") 262opcode("p_startpgm") 263opcode("p_phi") 264opcode("p_linear_phi") 265opcode("p_as_uniform") 266opcode("p_unit_test") 267 268opcode("p_create_vector") 269opcode("p_extract_vector") 270opcode("p_split_vector") 271 272# start/end the parts where we can use exec based instructions 273# implicitly 274opcode("p_logical_start") 275opcode("p_logical_end") 276 277# e.g. subgroupMin() in SPIR-V 278opcode("p_reduce", format=Format.PSEUDO_REDUCTION) 279# e.g. subgroupInclusiveMin() 280opcode("p_inclusive_scan", format=Format.PSEUDO_REDUCTION) 281# e.g. subgroupExclusiveMin() 282opcode("p_exclusive_scan", format=Format.PSEUDO_REDUCTION) 283 284opcode("p_branch", format=Format.PSEUDO_BRANCH) 285opcode("p_cbranch", format=Format.PSEUDO_BRANCH) 286opcode("p_cbranch_z", format=Format.PSEUDO_BRANCH) 287opcode("p_cbranch_nz", format=Format.PSEUDO_BRANCH) 288 289opcode("p_barrier", format=Format.PSEUDO_BARRIER) 290 291opcode("p_spill") 292opcode("p_reload") 293 294# start/end linear vgprs 295opcode("p_start_linear_vgpr") 296opcode("p_end_linear_vgpr") 297 298opcode("p_wqm") 299opcode("p_discard_if") 300opcode("p_demote_to_helper") 301opcode("p_is_helper") 302opcode("p_exit_early_if") 303 304# simulates proper bpermute behavior when it's unsupported, eg. GFX10 wave64 305opcode("p_bpermute") 306 307# creates a lane mask where only the first active lane is selected 308opcode("p_elect") 309 310opcode("p_constaddr") 311 312# These don't have to be pseudo-ops, but it makes optimization easier to only 313# have to consider two instructions. 314# (src0 >> (index * bits)) & ((1 << bits) - 1) with optional sign extension 315opcode("p_extract") # src1=index, src2=bits, src3=signext 316# (src0 & ((1 << bits) - 1)) << (index * bits) 317opcode("p_insert") # src1=index, src2=bits 318 319 320# SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc) 321SOP2 = { 322 # GFX6, GFX7, GFX8, GFX9, GFX10, name 323 (0x00, 0x00, 0x00, 0x00, 0x00, "s_add_u32"), 324 (0x01, 0x01, 0x01, 0x01, 0x01, "s_sub_u32"), 325 (0x02, 0x02, 0x02, 0x02, 0x02, "s_add_i32"), 326 (0x03, 0x03, 0x03, 0x03, 0x03, "s_sub_i32"), 327 (0x04, 0x04, 0x04, 0x04, 0x04, "s_addc_u32"), 328 (0x05, 0x05, 0x05, 0x05, 0x05, "s_subb_u32"), 329 (0x06, 0x06, 0x06, 0x06, 0x06, "s_min_i32"), 330 (0x07, 0x07, 0x07, 0x07, 0x07, "s_min_u32"), 331 (0x08, 0x08, 0x08, 0x08, 0x08, "s_max_i32"), 332 (0x09, 0x09, 0x09, 0x09, 0x09, "s_max_u32"), 333 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cselect_b32"), 334 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cselect_b64"), 335 (0x0e, 0x0e, 0x0c, 0x0c, 0x0e, "s_and_b32"), 336 (0x0f, 0x0f, 0x0d, 0x0d, 0x0f, "s_and_b64"), 337 (0x10, 0x10, 0x0e, 0x0e, 0x10, "s_or_b32"), 338 (0x11, 0x11, 0x0f, 0x0f, 0x11, "s_or_b64"), 339 (0x12, 0x12, 0x10, 0x10, 0x12, "s_xor_b32"), 340 (0x13, 0x13, 0x11, 0x11, 0x13, "s_xor_b64"), 341 (0x14, 0x14, 0x12, 0x12, 0x14, "s_andn2_b32"), 342 (0x15, 0x15, 0x13, 0x13, 0x15, "s_andn2_b64"), 343 (0x16, 0x16, 0x14, 0x14, 0x16, "s_orn2_b32"), 344 (0x17, 0x17, 0x15, 0x15, 0x17, "s_orn2_b64"), 345 (0x18, 0x18, 0x16, 0x16, 0x18, "s_nand_b32"), 346 (0x19, 0x19, 0x17, 0x17, 0x19, "s_nand_b64"), 347 (0x1a, 0x1a, 0x18, 0x18, 0x1a, "s_nor_b32"), 348 (0x1b, 0x1b, 0x19, 0x19, 0x1b, "s_nor_b64"), 349 (0x1c, 0x1c, 0x1a, 0x1a, 0x1c, "s_xnor_b32"), 350 (0x1d, 0x1d, 0x1b, 0x1b, 0x1d, "s_xnor_b64"), 351 (0x1e, 0x1e, 0x1c, 0x1c, 0x1e, "s_lshl_b32"), 352 (0x1f, 0x1f, 0x1d, 0x1d, 0x1f, "s_lshl_b64"), 353 (0x20, 0x20, 0x1e, 0x1e, 0x20, "s_lshr_b32"), 354 (0x21, 0x21, 0x1f, 0x1f, 0x21, "s_lshr_b64"), 355 (0x22, 0x22, 0x20, 0x20, 0x22, "s_ashr_i32"), 356 (0x23, 0x23, 0x21, 0x21, 0x23, "s_ashr_i64"), 357 (0x24, 0x24, 0x22, 0x22, 0x24, "s_bfm_b32"), 358 (0x25, 0x25, 0x23, 0x23, 0x25, "s_bfm_b64"), 359 (0x26, 0x26, 0x24, 0x24, 0x26, "s_mul_i32"), 360 (0x27, 0x27, 0x25, 0x25, 0x27, "s_bfe_u32"), 361 (0x28, 0x28, 0x26, 0x26, 0x28, "s_bfe_i32"), 362 (0x29, 0x29, 0x27, 0x27, 0x29, "s_bfe_u64"), 363 (0x2a, 0x2a, 0x28, 0x28, 0x2a, "s_bfe_i64"), 364 (0x2b, 0x2b, 0x29, 0x29, -1, "s_cbranch_g_fork", InstrClass.Branch), 365 (0x2c, 0x2c, 0x2a, 0x2a, 0x2c, "s_absdiff_i32"), 366 ( -1, -1, 0x2b, 0x2b, -1, "s_rfe_restore_b64", InstrClass.Branch), 367 ( -1, -1, -1, 0x2e, 0x2e, "s_lshl1_add_u32"), 368 ( -1, -1, -1, 0x2f, 0x2f, "s_lshl2_add_u32"), 369 ( -1, -1, -1, 0x30, 0x30, "s_lshl3_add_u32"), 370 ( -1, -1, -1, 0x31, 0x31, "s_lshl4_add_u32"), 371 ( -1, -1, -1, 0x32, 0x32, "s_pack_ll_b32_b16"), 372 ( -1, -1, -1, 0x33, 0x33, "s_pack_lh_b32_b16"), 373 ( -1, -1, -1, 0x34, 0x34, "s_pack_hh_b32_b16"), 374 ( -1, -1, -1, 0x2c, 0x35, "s_mul_hi_u32"), 375 ( -1, -1, -1, 0x2d, 0x36, "s_mul_hi_i32"), 376 # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP2. 377 ( -1, -1, -1, -1, -1, "p_constaddr_addlo"), 378} 379for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOP2, InstrClass.Salu): 380 opcode(name, gfx7, gfx9, gfx10, Format.SOP2, cls) 381 382 383# SOPK instructions: 0 input (+ imm), 1 output + optional scc 384SOPK = { 385 # GFX6, GFX7, GFX8, GFX9, GFX10, name 386 (0x00, 0x00, 0x00, 0x00, 0x00, "s_movk_i32"), 387 ( -1, -1, -1, -1, 0x01, "s_version"), # GFX10+ 388 (0x02, 0x02, 0x01, 0x01, 0x02, "s_cmovk_i32"), # GFX8_GFX9 389 (0x03, 0x03, 0x02, 0x02, 0x03, "s_cmpk_eq_i32"), 390 (0x04, 0x04, 0x03, 0x03, 0x04, "s_cmpk_lg_i32"), 391 (0x05, 0x05, 0x04, 0x04, 0x05, "s_cmpk_gt_i32"), 392 (0x06, 0x06, 0x05, 0x05, 0x06, "s_cmpk_ge_i32"), 393 (0x07, 0x07, 0x06, 0x06, 0x07, "s_cmpk_lt_i32"), 394 (0x08, 0x08, 0x07, 0x07, 0x08, "s_cmpk_le_i32"), 395 (0x09, 0x09, 0x08, 0x08, 0x09, "s_cmpk_eq_u32"), 396 (0x0a, 0x0a, 0x09, 0x09, 0x0a, "s_cmpk_lg_u32"), 397 (0x0b, 0x0b, 0x0a, 0x0a, 0x0b, "s_cmpk_gt_u32"), 398 (0x0c, 0x0c, 0x0b, 0x0b, 0x0c, "s_cmpk_ge_u32"), 399 (0x0d, 0x0d, 0x0c, 0x0c, 0x0d, "s_cmpk_lt_u32"), 400 (0x0e, 0x0e, 0x0d, 0x0d, 0x0e, "s_cmpk_le_u32"), 401 (0x0f, 0x0f, 0x0e, 0x0e, 0x0f, "s_addk_i32"), 402 (0x10, 0x10, 0x0f, 0x0f, 0x10, "s_mulk_i32"), 403 (0x11, 0x11, 0x10, 0x10, -1, "s_cbranch_i_fork", InstrClass.Branch), 404 (0x12, 0x12, 0x11, 0x11, 0x12, "s_getreg_b32"), 405 (0x13, 0x13, 0x12, 0x12, 0x13, "s_setreg_b32"), 406 (0x15, 0x15, 0x14, 0x14, 0x15, "s_setreg_imm32_b32"), # requires 32bit literal 407 ( -1, -1, 0x15, 0x15, 0x16, "s_call_b64", InstrClass.Branch), 408 ( -1, -1, -1, -1, 0x17, "s_waitcnt_vscnt", InstrClass.Waitcnt), 409 ( -1, -1, -1, -1, 0x18, "s_waitcnt_vmcnt", InstrClass.Waitcnt), 410 ( -1, -1, -1, -1, 0x19, "s_waitcnt_expcnt", InstrClass.Waitcnt), 411 ( -1, -1, -1, -1, 0x1a, "s_waitcnt_lgkmcnt", InstrClass.Waitcnt), 412 ( -1, -1, -1, -1, 0x1b, "s_subvector_loop_begin", InstrClass.Branch), 413 ( -1, -1, -1, -1, 0x1c, "s_subvector_loop_end", InstrClass.Branch), 414} 415for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOPK, InstrClass.Salu): 416 opcode(name, gfx7, gfx9, gfx10, Format.SOPK, cls) 417 418 419# SOP1 instructions: 1 input, 1 output (+optional SCC) 420SOP1 = { 421 # GFX6, GFX7, GFX8, GFX9, GFX10, name 422 (0x03, 0x03, 0x00, 0x00, 0x03, "s_mov_b32"), 423 (0x04, 0x04, 0x01, 0x01, 0x04, "s_mov_b64"), 424 (0x05, 0x05, 0x02, 0x02, 0x05, "s_cmov_b32"), 425 (0x06, 0x06, 0x03, 0x03, 0x06, "s_cmov_b64"), 426 (0x07, 0x07, 0x04, 0x04, 0x07, "s_not_b32"), 427 (0x08, 0x08, 0x05, 0x05, 0x08, "s_not_b64"), 428 (0x09, 0x09, 0x06, 0x06, 0x09, "s_wqm_b32"), 429 (0x0a, 0x0a, 0x07, 0x07, 0x0a, "s_wqm_b64"), 430 (0x0b, 0x0b, 0x08, 0x08, 0x0b, "s_brev_b32"), 431 (0x0c, 0x0c, 0x09, 0x09, 0x0c, "s_brev_b64"), 432 (0x0d, 0x0d, 0x0a, 0x0a, 0x0d, "s_bcnt0_i32_b32"), 433 (0x0e, 0x0e, 0x0b, 0x0b, 0x0e, "s_bcnt0_i32_b64"), 434 (0x0f, 0x0f, 0x0c, 0x0c, 0x0f, "s_bcnt1_i32_b32"), 435 (0x10, 0x10, 0x0d, 0x0d, 0x10, "s_bcnt1_i32_b64"), 436 (0x11, 0x11, 0x0e, 0x0e, 0x11, "s_ff0_i32_b32"), 437 (0x12, 0x12, 0x0f, 0x0f, 0x12, "s_ff0_i32_b64"), 438 (0x13, 0x13, 0x10, 0x10, 0x13, "s_ff1_i32_b32"), 439 (0x14, 0x14, 0x11, 0x11, 0x14, "s_ff1_i32_b64"), 440 (0x15, 0x15, 0x12, 0x12, 0x15, "s_flbit_i32_b32"), 441 (0x16, 0x16, 0x13, 0x13, 0x16, "s_flbit_i32_b64"), 442 (0x17, 0x17, 0x14, 0x14, 0x17, "s_flbit_i32"), 443 (0x18, 0x18, 0x15, 0x15, 0x18, "s_flbit_i32_i64"), 444 (0x19, 0x19, 0x16, 0x16, 0x19, "s_sext_i32_i8"), 445 (0x1a, 0x1a, 0x17, 0x17, 0x1a, "s_sext_i32_i16"), 446 (0x1b, 0x1b, 0x18, 0x18, 0x1b, "s_bitset0_b32"), 447 (0x1c, 0x1c, 0x19, 0x19, 0x1c, "s_bitset0_b64"), 448 (0x1d, 0x1d, 0x1a, 0x1a, 0x1d, "s_bitset1_b32"), 449 (0x1e, 0x1e, 0x1b, 0x1b, 0x1e, "s_bitset1_b64"), 450 (0x1f, 0x1f, 0x1c, 0x1c, 0x1f, "s_getpc_b64"), 451 (0x20, 0x20, 0x1d, 0x1d, 0x20, "s_setpc_b64", InstrClass.Branch), 452 (0x21, 0x21, 0x1e, 0x1e, 0x21, "s_swappc_b64", InstrClass.Branch), 453 (0x22, 0x22, 0x1f, 0x1f, 0x22, "s_rfe_b64", InstrClass.Branch), 454 (0x24, 0x24, 0x20, 0x20, 0x24, "s_and_saveexec_b64"), 455 (0x25, 0x25, 0x21, 0x21, 0x25, "s_or_saveexec_b64"), 456 (0x26, 0x26, 0x22, 0x22, 0x26, "s_xor_saveexec_b64"), 457 (0x27, 0x27, 0x23, 0x23, 0x27, "s_andn2_saveexec_b64"), 458 (0x28, 0x28, 0x24, 0x24, 0x28, "s_orn2_saveexec_b64"), 459 (0x29, 0x29, 0x25, 0x25, 0x29, "s_nand_saveexec_b64"), 460 (0x2a, 0x2a, 0x26, 0x26, 0x2a, "s_nor_saveexec_b64"), 461 (0x2b, 0x2b, 0x27, 0x27, 0x2b, "s_xnor_saveexec_b64"), 462 (0x2c, 0x2c, 0x28, 0x28, 0x2c, "s_quadmask_b32"), 463 (0x2d, 0x2d, 0x29, 0x29, 0x2d, "s_quadmask_b64"), 464 (0x2e, 0x2e, 0x2a, 0x2a, 0x2e, "s_movrels_b32"), 465 (0x2f, 0x2f, 0x2b, 0x2b, 0x2f, "s_movrels_b64"), 466 (0x30, 0x30, 0x2c, 0x2c, 0x30, "s_movreld_b32"), 467 (0x31, 0x31, 0x2d, 0x2d, 0x31, "s_movreld_b64"), 468 (0x32, 0x32, 0x2e, 0x2e, -1, "s_cbranch_join", InstrClass.Branch), 469 (0x34, 0x34, 0x30, 0x30, 0x34, "s_abs_i32"), 470 (0x35, 0x35, -1, -1, 0x35, "s_mov_fed_b32"), 471 ( -1, -1, 0x32, 0x32, -1, "s_set_gpr_idx_idx"), 472 ( -1, -1, -1, 0x33, 0x37, "s_andn1_saveexec_b64"), 473 ( -1, -1, -1, 0x34, 0x38, "s_orn1_saveexec_b64"), 474 ( -1, -1, -1, 0x35, 0x39, "s_andn1_wrexec_b64"), 475 ( -1, -1, -1, 0x36, 0x3a, "s_andn2_wrexec_b64"), 476 ( -1, -1, -1, 0x37, 0x3b, "s_bitreplicate_b64_b32"), 477 ( -1, -1, -1, -1, 0x3c, "s_and_saveexec_b32"), 478 ( -1, -1, -1, -1, 0x3d, "s_or_saveexec_b32"), 479 ( -1, -1, -1, -1, 0x3e, "s_xor_saveexec_b32"), 480 ( -1, -1, -1, -1, 0x3f, "s_andn2_saveexec_b32"), 481 ( -1, -1, -1, -1, 0x40, "s_orn2_saveexec_b32"), 482 ( -1, -1, -1, -1, 0x41, "s_nand_saveexec_b32"), 483 ( -1, -1, -1, -1, 0x42, "s_nor_saveexec_b32"), 484 ( -1, -1, -1, -1, 0x43, "s_xnor_saveexec_b32"), 485 ( -1, -1, -1, -1, 0x44, "s_andn1_saveexec_b32"), 486 ( -1, -1, -1, -1, 0x45, "s_orn1_saveexec_b32"), 487 ( -1, -1, -1, -1, 0x46, "s_andn1_wrexec_b32"), 488 ( -1, -1, -1, -1, 0x47, "s_andn2_wrexec_b32"), 489 ( -1, -1, -1, -1, 0x49, "s_movrelsd_2_b32"), 490 # actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP1. 491 ( -1, -1, -1, -1, -1, "p_constaddr_getpc"), 492} 493for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOP1, InstrClass.Salu): 494 opcode(name, gfx7, gfx9, gfx10, Format.SOP1, cls) 495 496 497# SOPC instructions: 2 inputs and 0 outputs (+SCC) 498SOPC = { 499 # GFX6, GFX7, GFX8, GFX9, GFX10, name 500 (0x00, 0x00, 0x00, 0x00, 0x00, "s_cmp_eq_i32"), 501 (0x01, 0x01, 0x01, 0x01, 0x01, "s_cmp_lg_i32"), 502 (0x02, 0x02, 0x02, 0x02, 0x02, "s_cmp_gt_i32"), 503 (0x03, 0x03, 0x03, 0x03, 0x03, "s_cmp_ge_i32"), 504 (0x04, 0x04, 0x04, 0x04, 0x04, "s_cmp_lt_i32"), 505 (0x05, 0x05, 0x05, 0x05, 0x05, "s_cmp_le_i32"), 506 (0x06, 0x06, 0x06, 0x06, 0x06, "s_cmp_eq_u32"), 507 (0x07, 0x07, 0x07, 0x07, 0x07, "s_cmp_lg_u32"), 508 (0x08, 0x08, 0x08, 0x08, 0x08, "s_cmp_gt_u32"), 509 (0x09, 0x09, 0x09, 0x09, 0x09, "s_cmp_ge_u32"), 510 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cmp_lt_u32"), 511 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cmp_le_u32"), 512 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_bitcmp0_b32"), 513 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_bitcmp1_b32"), 514 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_bitcmp0_b64"), 515 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_bitcmp1_b64"), 516 (0x10, 0x10, 0x10, 0x10, -1, "s_setvskip"), 517 ( -1, -1, 0x11, 0x11, -1, "s_set_gpr_idx_on"), 518 ( -1, -1, 0x12, 0x12, 0x12, "s_cmp_eq_u64"), 519 ( -1, -1, 0x13, 0x13, 0x13, "s_cmp_lg_u64"), 520} 521for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPC: 522 opcode(name, gfx7, gfx9, gfx10, Format.SOPC, InstrClass.Salu) 523 524 525# SOPP instructions: 0 inputs (+optional scc/vcc), 0 outputs 526SOPP = { 527 # GFX6, GFX7, GFX8, GFX9, GFX10, name 528 (0x00, 0x00, 0x00, 0x00, 0x00, "s_nop"), 529 (0x01, 0x01, 0x01, 0x01, 0x01, "s_endpgm"), 530 (0x02, 0x02, 0x02, 0x02, 0x02, "s_branch", InstrClass.Branch), 531 ( -1, -1, 0x03, 0x03, 0x03, "s_wakeup"), 532 (0x04, 0x04, 0x04, 0x04, 0x04, "s_cbranch_scc0", InstrClass.Branch), 533 (0x05, 0x05, 0x05, 0x05, 0x05, "s_cbranch_scc1", InstrClass.Branch), 534 (0x06, 0x06, 0x06, 0x06, 0x06, "s_cbranch_vccz", InstrClass.Branch), 535 (0x07, 0x07, 0x07, 0x07, 0x07, "s_cbranch_vccnz", InstrClass.Branch), 536 (0x08, 0x08, 0x08, 0x08, 0x08, "s_cbranch_execz", InstrClass.Branch), 537 (0x09, 0x09, 0x09, 0x09, 0x09, "s_cbranch_execnz", InstrClass.Branch), 538 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_barrier", InstrClass.Barrier), 539 ( -1, 0x0b, 0x0b, 0x0b, 0x0b, "s_setkill"), 540 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_waitcnt", InstrClass.Waitcnt), 541 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_sethalt"), 542 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_sleep"), 543 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_setprio"), 544 (0x10, 0x10, 0x10, 0x10, 0x10, "s_sendmsg", InstrClass.Sendmsg), 545 (0x11, 0x11, 0x11, 0x11, 0x11, "s_sendmsghalt", InstrClass.Sendmsg), 546 (0x12, 0x12, 0x12, 0x12, 0x12, "s_trap", InstrClass.Branch), 547 (0x13, 0x13, 0x13, 0x13, 0x13, "s_icache_inv"), 548 (0x14, 0x14, 0x14, 0x14, 0x14, "s_incperflevel"), 549 (0x15, 0x15, 0x15, 0x15, 0x15, "s_decperflevel"), 550 (0x16, 0x16, 0x16, 0x16, 0x16, "s_ttracedata"), 551 ( -1, 0x17, 0x17, 0x17, 0x17, "s_cbranch_cdbgsys", InstrClass.Branch), 552 ( -1, 0x18, 0x18, 0x18, 0x18, "s_cbranch_cdbguser", InstrClass.Branch), 553 ( -1, 0x19, 0x19, 0x19, 0x19, "s_cbranch_cdbgsys_or_user", InstrClass.Branch), 554 ( -1, 0x1a, 0x1a, 0x1a, 0x1a, "s_cbranch_cdbgsys_and_user", InstrClass.Branch), 555 ( -1, -1, 0x1b, 0x1b, 0x1b, "s_endpgm_saved"), 556 ( -1, -1, 0x1c, 0x1c, -1, "s_set_gpr_idx_off"), 557 ( -1, -1, 0x1d, 0x1d, -1, "s_set_gpr_idx_mode"), 558 ( -1, -1, -1, 0x1e, 0x1e, "s_endpgm_ordered_ps_done"), 559 ( -1, -1, -1, -1, 0x1f, "s_code_end"), 560 ( -1, -1, -1, -1, 0x20, "s_inst_prefetch"), 561 ( -1, -1, -1, -1, 0x21, "s_clause"), 562 ( -1, -1, -1, -1, 0x22, "s_wait_idle"), 563 ( -1, -1, -1, -1, 0x23, "s_waitcnt_depctr"), 564 ( -1, -1, -1, -1, 0x24, "s_round_mode"), 565 ( -1, -1, -1, -1, 0x25, "s_denorm_mode"), 566 ( -1, -1, -1, -1, 0x26, "s_ttracedata_imm"), 567} 568for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOPP, InstrClass.Salu): 569 opcode(name, gfx7, gfx9, gfx10, Format.SOPP, cls) 570 571 572# SMEM instructions: sbase input (2 sgpr), potentially 2 offset inputs, 1 sdata input/output 573# Unlike GFX10, GFX10.3 does not have SMEM store, atomic or scratch instructions 574SMEM = { 575 # GFX6, GFX7, GFX8, GFX9, GFX10, name 576 (0x00, 0x00, 0x00, 0x00, 0x00, "s_load_dword"), 577 (0x01, 0x01, 0x01, 0x01, 0x01, "s_load_dwordx2"), 578 (0x02, 0x02, 0x02, 0x02, 0x02, "s_load_dwordx4"), 579 (0x03, 0x03, 0x03, 0x03, 0x03, "s_load_dwordx8"), 580 (0x04, 0x04, 0x04, 0x04, 0x04, "s_load_dwordx16"), 581 ( -1, -1, -1, 0x05, 0x05, "s_scratch_load_dword"), 582 ( -1, -1, -1, 0x06, 0x06, "s_scratch_load_dwordx2"), 583 ( -1, -1, -1, 0x07, 0x07, "s_scratch_load_dwordx4"), 584 (0x08, 0x08, 0x08, 0x08, 0x08, "s_buffer_load_dword"), 585 (0x09, 0x09, 0x09, 0x09, 0x09, "s_buffer_load_dwordx2"), 586 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_buffer_load_dwordx4"), 587 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_buffer_load_dwordx8"), 588 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_buffer_load_dwordx16"), 589 ( -1, -1, 0x10, 0x10, 0x10, "s_store_dword"), 590 ( -1, -1, 0x11, 0x11, 0x11, "s_store_dwordx2"), 591 ( -1, -1, 0x12, 0x12, 0x12, "s_store_dwordx4"), 592 ( -1, -1, -1, 0x15, 0x15, "s_scratch_store_dword"), 593 ( -1, -1, -1, 0x16, 0x16, "s_scratch_store_dwordx2"), 594 ( -1, -1, -1, 0x17, 0x17, "s_scratch_store_dwordx4"), 595 ( -1, -1, 0x18, 0x18, 0x18, "s_buffer_store_dword"), 596 ( -1, -1, 0x19, 0x19, 0x19, "s_buffer_store_dwordx2"), 597 ( -1, -1, 0x1a, 0x1a, 0x1a, "s_buffer_store_dwordx4"), 598 ( -1, -1, 0x1f, 0x1f, 0x1f, "s_gl1_inv"), 599 (0x1f, 0x1f, 0x20, 0x20, 0x20, "s_dcache_inv"), 600 ( -1, -1, 0x21, 0x21, 0x21, "s_dcache_wb"), 601 ( -1, 0x1d, 0x22, 0x22, -1, "s_dcache_inv_vol"), 602 ( -1, -1, 0x23, 0x23, -1, "s_dcache_wb_vol"), 603 (0x1e, 0x1e, 0x24, 0x24, 0x24, "s_memtime"), #GFX6-GFX10 604 ( -1, -1, 0x25, 0x25, 0x25, "s_memrealtime"), 605 ( -1, -1, 0x26, 0x26, 0x26, "s_atc_probe"), 606 ( -1, -1, 0x27, 0x27, 0x27, "s_atc_probe_buffer"), 607 ( -1, -1, -1, 0x28, 0x28, "s_dcache_discard"), 608 ( -1, -1, -1, 0x29, 0x29, "s_dcache_discard_x2"), 609 ( -1, -1, -1, -1, 0x2a, "s_get_waveid_in_workgroup"), 610 ( -1, -1, -1, 0x40, 0x40, "s_buffer_atomic_swap"), 611 ( -1, -1, -1, 0x41, 0x41, "s_buffer_atomic_cmpswap"), 612 ( -1, -1, -1, 0x42, 0x42, "s_buffer_atomic_add"), 613 ( -1, -1, -1, 0x43, 0x43, "s_buffer_atomic_sub"), 614 ( -1, -1, -1, 0x44, 0x44, "s_buffer_atomic_smin"), 615 ( -1, -1, -1, 0x45, 0x45, "s_buffer_atomic_umin"), 616 ( -1, -1, -1, 0x46, 0x46, "s_buffer_atomic_smax"), 617 ( -1, -1, -1, 0x47, 0x47, "s_buffer_atomic_umax"), 618 ( -1, -1, -1, 0x48, 0x48, "s_buffer_atomic_and"), 619 ( -1, -1, -1, 0x49, 0x49, "s_buffer_atomic_or"), 620 ( -1, -1, -1, 0x4a, 0x4a, "s_buffer_atomic_xor"), 621 ( -1, -1, -1, 0x4b, 0x4b, "s_buffer_atomic_inc"), 622 ( -1, -1, -1, 0x4c, 0x4c, "s_buffer_atomic_dec"), 623 ( -1, -1, -1, 0x60, 0x60, "s_buffer_atomic_swap_x2"), 624 ( -1, -1, -1, 0x61, 0x61, "s_buffer_atomic_cmpswap_x2"), 625 ( -1, -1, -1, 0x62, 0x62, "s_buffer_atomic_add_x2"), 626 ( -1, -1, -1, 0x63, 0x63, "s_buffer_atomic_sub_x2"), 627 ( -1, -1, -1, 0x64, 0x64, "s_buffer_atomic_smin_x2"), 628 ( -1, -1, -1, 0x65, 0x65, "s_buffer_atomic_umin_x2"), 629 ( -1, -1, -1, 0x66, 0x66, "s_buffer_atomic_smax_x2"), 630 ( -1, -1, -1, 0x67, 0x67, "s_buffer_atomic_umax_x2"), 631 ( -1, -1, -1, 0x68, 0x68, "s_buffer_atomic_and_x2"), 632 ( -1, -1, -1, 0x69, 0x69, "s_buffer_atomic_or_x2"), 633 ( -1, -1, -1, 0x6a, 0x6a, "s_buffer_atomic_xor_x2"), 634 ( -1, -1, -1, 0x6b, 0x6b, "s_buffer_atomic_inc_x2"), 635 ( -1, -1, -1, 0x6c, 0x6c, "s_buffer_atomic_dec_x2"), 636 ( -1, -1, -1, 0x80, 0x80, "s_atomic_swap"), 637 ( -1, -1, -1, 0x81, 0x81, "s_atomic_cmpswap"), 638 ( -1, -1, -1, 0x82, 0x82, "s_atomic_add"), 639 ( -1, -1, -1, 0x83, 0x83, "s_atomic_sub"), 640 ( -1, -1, -1, 0x84, 0x84, "s_atomic_smin"), 641 ( -1, -1, -1, 0x85, 0x85, "s_atomic_umin"), 642 ( -1, -1, -1, 0x86, 0x86, "s_atomic_smax"), 643 ( -1, -1, -1, 0x87, 0x87, "s_atomic_umax"), 644 ( -1, -1, -1, 0x88, 0x88, "s_atomic_and"), 645 ( -1, -1, -1, 0x89, 0x89, "s_atomic_or"), 646 ( -1, -1, -1, 0x8a, 0x8a, "s_atomic_xor"), 647 ( -1, -1, -1, 0x8b, 0x8b, "s_atomic_inc"), 648 ( -1, -1, -1, 0x8c, 0x8c, "s_atomic_dec"), 649 ( -1, -1, -1, 0xa0, 0xa0, "s_atomic_swap_x2"), 650 ( -1, -1, -1, 0xa1, 0xa1, "s_atomic_cmpswap_x2"), 651 ( -1, -1, -1, 0xa2, 0xa2, "s_atomic_add_x2"), 652 ( -1, -1, -1, 0xa3, 0xa3, "s_atomic_sub_x2"), 653 ( -1, -1, -1, 0xa4, 0xa4, "s_atomic_smin_x2"), 654 ( -1, -1, -1, 0xa5, 0xa5, "s_atomic_umin_x2"), 655 ( -1, -1, -1, 0xa6, 0xa6, "s_atomic_smax_x2"), 656 ( -1, -1, -1, 0xa7, 0xa7, "s_atomic_umax_x2"), 657 ( -1, -1, -1, 0xa8, 0xa8, "s_atomic_and_x2"), 658 ( -1, -1, -1, 0xa9, 0xa9, "s_atomic_or_x2"), 659 ( -1, -1, -1, 0xaa, 0xaa, "s_atomic_xor_x2"), 660 ( -1, -1, -1, 0xab, 0xab, "s_atomic_inc_x2"), 661 ( -1, -1, -1, 0xac, 0xac, "s_atomic_dec_x2"), 662} 663for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SMEM: 664 opcode(name, gfx7, gfx9, gfx10, Format.SMEM, InstrClass.SMem, is_atomic = "atomic" in name) 665 666 667# VOP2 instructions: 2 inputs, 1 output (+ optional vcc) 668# TODO: misses some GFX6_7 opcodes which were shifted to VOP3 in GFX8 669VOP2 = { 670 # GFX6, GFX7, GFX8, GFX9, GFX10, name, input/output modifiers 671 (0x01, 0x01, -1, -1, -1, "v_readlane_b32", False), 672 (0x02, 0x02, -1, -1, -1, "v_writelane_b32", False), 673 (0x03, 0x03, 0x01, 0x01, 0x03, "v_add_f32", True), 674 (0x04, 0x04, 0x02, 0x02, 0x04, "v_sub_f32", True), 675 (0x05, 0x05, 0x03, 0x03, 0x05, "v_subrev_f32", True), 676 (0x06, 0x06, -1, -1, 0x06, "v_mac_legacy_f32", True), 677 (0x07, 0x07, 0x04, 0x04, 0x07, "v_mul_legacy_f32", True), 678 (0x08, 0x08, 0x05, 0x05, 0x08, "v_mul_f32", True), 679 (0x09, 0x09, 0x06, 0x06, 0x09, "v_mul_i32_i24", False), 680 (0x0a, 0x0a, 0x07, 0x07, 0x0a, "v_mul_hi_i32_i24", False), 681 (0x0b, 0x0b, 0x08, 0x08, 0x0b, "v_mul_u32_u24", False), 682 (0x0c, 0x0c, 0x09, 0x09, 0x0c, "v_mul_hi_u32_u24", False), 683 ( -1, -1, -1, 0x39, 0x0d, "v_dot4c_i32_i8", False), 684 (0x0d, 0x0d, -1, -1, -1, "v_min_legacy_f32", True), 685 (0x0e, 0x0e, -1, -1, -1, "v_max_legacy_f32", True), 686 (0x0f, 0x0f, 0x0a, 0x0a, 0x0f, "v_min_f32", True), 687 (0x10, 0x10, 0x0b, 0x0b, 0x10, "v_max_f32", True), 688 (0x11, 0x11, 0x0c, 0x0c, 0x11, "v_min_i32", False), 689 (0x12, 0x12, 0x0d, 0x0d, 0x12, "v_max_i32", False), 690 (0x13, 0x13, 0x0e, 0x0e, 0x13, "v_min_u32", False), 691 (0x14, 0x14, 0x0f, 0x0f, 0x14, "v_max_u32", False), 692 (0x15, 0x15, -1, -1, -1, "v_lshr_b32", False), 693 (0x16, 0x16, 0x10, 0x10, 0x16, "v_lshrrev_b32", False), 694 (0x17, 0x17, -1, -1, -1, "v_ashr_i32", False), 695 (0x18, 0x18, 0x11, 0x11, 0x18, "v_ashrrev_i32", False), 696 (0x19, 0x19, -1, -1, -1, "v_lshl_b32", False), 697 (0x1a, 0x1a, 0x12, 0x12, 0x1a, "v_lshlrev_b32", False), 698 (0x1b, 0x1b, 0x13, 0x13, 0x1b, "v_and_b32", False), 699 (0x1c, 0x1c, 0x14, 0x14, 0x1c, "v_or_b32", False), 700 (0x1d, 0x1d, 0x15, 0x15, 0x1d, "v_xor_b32", False), 701 ( -1, -1, -1, -1, 0x1e, "v_xnor_b32", False), 702 (0x1f, 0x1f, 0x16, 0x16, 0x1f, "v_mac_f32", True), 703 (0x20, 0x20, 0x17, 0x17, 0x20, "v_madmk_f32", False), 704 (0x21, 0x21, 0x18, 0x18, 0x21, "v_madak_f32", False), 705 (0x24, 0x24, -1, -1, -1, "v_mbcnt_hi_u32_b32", False), 706 (0x25, 0x25, 0x19, 0x19, -1, "v_add_co_u32", False), # VOP3B only in RDNA 707 (0x26, 0x26, 0x1a, 0x1a, -1, "v_sub_co_u32", False), # VOP3B only in RDNA 708 (0x27, 0x27, 0x1b, 0x1b, -1, "v_subrev_co_u32", False), # VOP3B only in RDNA 709 (0x28, 0x28, 0x1c, 0x1c, 0x28, "v_addc_co_u32", False), # v_add_co_ci_u32 in RDNA 710 (0x29, 0x29, 0x1d, 0x1d, 0x29, "v_subb_co_u32", False), # v_sub_co_ci_u32 in RDNA 711 (0x2a, 0x2a, 0x1e, 0x1e, 0x2a, "v_subbrev_co_u32", False), # v_subrev_co_ci_u32 in RDNA 712 ( -1, -1, -1, -1, 0x2b, "v_fmac_f32", True), 713 ( -1, -1, -1, -1, 0x2c, "v_fmamk_f32", True), 714 ( -1, -1, -1, -1, 0x2d, "v_fmaak_f32", True), 715 (0x2f, 0x2f, -1, -1, 0x2f, "v_cvt_pkrtz_f16_f32", True), 716 ( -1, -1, 0x1f, 0x1f, 0x32, "v_add_f16", True), 717 ( -1, -1, 0x20, 0x20, 0x33, "v_sub_f16", True), 718 ( -1, -1, 0x21, 0x21, 0x34, "v_subrev_f16", True), 719 ( -1, -1, 0x22, 0x22, 0x35, "v_mul_f16", True), 720 ( -1, -1, 0x23, 0x23, -1, "v_mac_f16", True), 721 ( -1, -1, 0x24, 0x24, -1, "v_madmk_f16", False), 722 ( -1, -1, 0x25, 0x25, -1, "v_madak_f16", False), 723 ( -1, -1, 0x26, 0x26, -1, "v_add_u16", False), 724 ( -1, -1, 0x27, 0x27, -1, "v_sub_u16", False), 725 ( -1, -1, 0x28, 0x28, -1, "v_subrev_u16", False), 726 ( -1, -1, 0x29, 0x29, -1, "v_mul_lo_u16", False), 727 ( -1, -1, 0x2a, 0x2a, -1, "v_lshlrev_b16", False), 728 ( -1, -1, 0x2b, 0x2b, -1, "v_lshrrev_b16", False), 729 ( -1, -1, 0x2c, 0x2c, -1, "v_ashrrev_i16", False), 730 ( -1, -1, 0x2d, 0x2d, 0x39, "v_max_f16", True), 731 ( -1, -1, 0x2e, 0x2e, 0x3a, "v_min_f16", True), 732 ( -1, -1, 0x2f, 0x2f, -1, "v_max_u16", False), 733 ( -1, -1, 0x30, 0x30, -1, "v_max_i16", False), 734 ( -1, -1, 0x31, 0x31, -1, "v_min_u16", False), 735 ( -1, -1, 0x32, 0x32, -1, "v_min_i16", False), 736 ( -1, -1, 0x33, 0x33, 0x3b, "v_ldexp_f16", False), 737 ( -1, -1, -1, 0x34, 0x25, "v_add_u32", False), # use v_add_co_u32 on GFX8, called v_add_nc_u32 in RDNA 738 ( -1, -1, -1, 0x35, 0x26, "v_sub_u32", False), # use v_sub_co_u32 on GFX8, called v_sub_nc_u32 in RDNA 739 ( -1, -1, -1, 0x36, 0x27, "v_subrev_u32", False), # use v_subrev_co_u32 on GFX8, called v_subrev_nc_u32 in RDNA 740 ( -1, -1, -1, -1, 0x36, "v_fmac_f16", False), 741 ( -1, -1, -1, -1, 0x37, "v_fmamk_f16", False), 742 ( -1, -1, -1, -1, 0x38, "v_fmaak_f16", False), 743 ( -1, -1, -1, -1, 0x3c, "v_pk_fmac_f16", False), 744} 745for (gfx6, gfx7, gfx8, gfx9, gfx10, name, modifiers) in VOP2: 746 opcode(name, gfx7, gfx9, gfx10, Format.VOP2, InstrClass.Valu32, modifiers, modifiers) 747 748if True: 749 # v_cndmask_b32 can use input modifiers but not output modifiers 750 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00, 0x00, 0x00, 0x00, 0x01, "v_cndmask_b32") 751 opcode(name, gfx7, gfx9, gfx10, Format.VOP2, InstrClass.Valu32, True, False) 752 753 754# VOP1 instructions: instructions with 1 input and 1 output 755VOP1 = { 756 # GFX6, GFX7, GFX8, GFX9, GFX10, name, input_modifiers, output_modifiers 757 (0x00, 0x00, 0x00, 0x00, 0x00, "v_nop", False, False), 758 (0x01, 0x01, 0x01, 0x01, 0x01, "v_mov_b32", False, False), 759 (0x02, 0x02, 0x02, 0x02, 0x02, "v_readfirstlane_b32", False, False), 760 (0x03, 0x03, 0x03, 0x03, 0x03, "v_cvt_i32_f64", True, False, InstrClass.ValuDoubleConvert), 761 (0x04, 0x04, 0x04, 0x04, 0x04, "v_cvt_f64_i32", False, True, InstrClass.ValuDoubleConvert), 762 (0x05, 0x05, 0x05, 0x05, 0x05, "v_cvt_f32_i32", False, True), 763 (0x06, 0x06, 0x06, 0x06, 0x06, "v_cvt_f32_u32", False, True), 764 (0x07, 0x07, 0x07, 0x07, 0x07, "v_cvt_u32_f32", True, False), 765 (0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False), 766 (0x09, 0x09, -1, -1, 0x09, "v_mov_fed_b32", True, False), # LLVM mentions it for GFX8_9 767 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True), 768 ( -1, -1, -1, -1, -1, "p_cvt_f16_f32_rtne", True, True), 769 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True), 770 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False), 771 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False), 772 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "v_cvt_off_f32_i4", False, True), 773 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "v_cvt_f32_f64", True, True, InstrClass.ValuDoubleConvert), 774 (0x10, 0x10, 0x10, 0x10, 0x10, "v_cvt_f64_f32", True, True, InstrClass.ValuDoubleConvert), 775 (0x11, 0x11, 0x11, 0x11, 0x11, "v_cvt_f32_ubyte0", False, True), 776 (0x12, 0x12, 0x12, 0x12, 0x12, "v_cvt_f32_ubyte1", False, True), 777 (0x13, 0x13, 0x13, 0x13, 0x13, "v_cvt_f32_ubyte2", False, True), 778 (0x14, 0x14, 0x14, 0x14, 0x14, "v_cvt_f32_ubyte3", False, True), 779 (0x15, 0x15, 0x15, 0x15, 0x15, "v_cvt_u32_f64", True, False, InstrClass.ValuDoubleConvert), 780 (0x16, 0x16, 0x16, 0x16, 0x16, "v_cvt_f64_u32", False, True, InstrClass.ValuDoubleConvert), 781 ( -1, 0x17, 0x17, 0x17, 0x17, "v_trunc_f64", True, True, InstrClass.ValuDouble), 782 ( -1, 0x18, 0x18, 0x18, 0x18, "v_ceil_f64", True, True, InstrClass.ValuDouble), 783 ( -1, 0x19, 0x19, 0x19, 0x19, "v_rndne_f64", True, True, InstrClass.ValuDouble), 784 ( -1, 0x1a, 0x1a, 0x1a, 0x1a, "v_floor_f64", True, True, InstrClass.ValuDouble), 785 ( -1, -1, -1, -1, 0x1b, "v_pipeflush", False, False), 786 (0x20, 0x20, 0x1b, 0x1b, 0x20, "v_fract_f32", True, True), 787 (0x21, 0x21, 0x1c, 0x1c, 0x21, "v_trunc_f32", True, True), 788 (0x22, 0x22, 0x1d, 0x1d, 0x22, "v_ceil_f32", True, True), 789 (0x23, 0x23, 0x1e, 0x1e, 0x23, "v_rndne_f32", True, True), 790 (0x24, 0x24, 0x1f, 0x1f, 0x24, "v_floor_f32", True, True), 791 (0x25, 0x25, 0x20, 0x20, 0x25, "v_exp_f32", True, True, InstrClass.ValuTranscendental32), 792 (0x26, 0x26, -1, -1, -1, "v_log_clamp_f32", True, True, InstrClass.ValuTranscendental32), 793 (0x27, 0x27, 0x21, 0x21, 0x27, "v_log_f32", True, True, InstrClass.ValuTranscendental32), 794 (0x28, 0x28, -1, -1, -1, "v_rcp_clamp_f32", True, True, InstrClass.ValuTranscendental32), 795 (0x29, 0x29, -1, -1, -1, "v_rcp_legacy_f32", True, True, InstrClass.ValuTranscendental32), 796 (0x2a, 0x2a, 0x22, 0x22, 0x2a, "v_rcp_f32", True, True, InstrClass.ValuTranscendental32), 797 (0x2b, 0x2b, 0x23, 0x23, 0x2b, "v_rcp_iflag_f32", True, True, InstrClass.ValuTranscendental32), 798 (0x2c, 0x2c, -1, -1, -1, "v_rsq_clamp_f32", True, True, InstrClass.ValuTranscendental32), 799 (0x2d, 0x2d, -1, -1, -1, "v_rsq_legacy_f32", True, True, InstrClass.ValuTranscendental32), 800 (0x2e, 0x2e, 0x24, 0x24, 0x2e, "v_rsq_f32", True, True, InstrClass.ValuTranscendental32), 801 (0x2f, 0x2f, 0x25, 0x25, 0x2f, "v_rcp_f64", True, True, InstrClass.ValuDoubleTranscendental), 802 (0x30, 0x30, -1, -1, -1, "v_rcp_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental), 803 (0x31, 0x31, 0x26, 0x26, 0x31, "v_rsq_f64", True, True, InstrClass.ValuDoubleTranscendental), 804 (0x32, 0x32, -1, -1, -1, "v_rsq_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental), 805 (0x33, 0x33, 0x27, 0x27, 0x33, "v_sqrt_f32", True, True, InstrClass.ValuTranscendental32), 806 (0x34, 0x34, 0x28, 0x28, 0x34, "v_sqrt_f64", True, True, InstrClass.ValuDoubleTranscendental), 807 (0x35, 0x35, 0x29, 0x29, 0x35, "v_sin_f32", True, True, InstrClass.ValuTranscendental32), 808 (0x36, 0x36, 0x2a, 0x2a, 0x36, "v_cos_f32", True, True, InstrClass.ValuTranscendental32), 809 (0x37, 0x37, 0x2b, 0x2b, 0x37, "v_not_b32", False, False), 810 (0x38, 0x38, 0x2c, 0x2c, 0x38, "v_bfrev_b32", False, False), 811 (0x39, 0x39, 0x2d, 0x2d, 0x39, "v_ffbh_u32", False, False), 812 (0x3a, 0x3a, 0x2e, 0x2e, 0x3a, "v_ffbl_b32", False, False), 813 (0x3b, 0x3b, 0x2f, 0x2f, 0x3b, "v_ffbh_i32", False, False), 814 (0x3c, 0x3c, 0x30, 0x30, 0x3c, "v_frexp_exp_i32_f64", True, False, InstrClass.ValuDouble), 815 (0x3d, 0x3d, 0x31, 0x31, 0x3d, "v_frexp_mant_f64", True, False, InstrClass.ValuDouble), 816 (0x3e, 0x3e, 0x32, 0x32, 0x3e, "v_fract_f64", True, True, InstrClass.ValuDouble), 817 (0x3f, 0x3f, 0x33, 0x33, 0x3f, "v_frexp_exp_i32_f32", True, False), 818 (0x40, 0x40, 0x34, 0x34, 0x40, "v_frexp_mant_f32", True, False), 819 (0x41, 0x41, 0x35, 0x35, 0x41, "v_clrexcp", False, False), 820 (0x42, 0x42, 0x36, -1, 0x42, "v_movreld_b32", False, False), 821 (0x43, 0x43, 0x37, -1, 0x43, "v_movrels_b32", False, False), 822 (0x44, 0x44, 0x38, -1, 0x44, "v_movrelsd_b32", False, False), 823 ( -1, -1, -1, -1, 0x48, "v_movrelsd_2_b32", False, False), 824 ( -1, -1, -1, 0x37, -1, "v_screen_partition_4se_b32", False, False), 825 ( -1, -1, 0x39, 0x39, 0x50, "v_cvt_f16_u16", False, True), 826 ( -1, -1, 0x3a, 0x3a, 0x51, "v_cvt_f16_i16", False, True), 827 ( -1, -1, 0x3b, 0x3b, 0x52, "v_cvt_u16_f16", True, False), 828 ( -1, -1, 0x3c, 0x3c, 0x53, "v_cvt_i16_f16", True, False), 829 ( -1, -1, 0x3d, 0x3d, 0x54, "v_rcp_f16", True, True, InstrClass.ValuTranscendental32), 830 ( -1, -1, 0x3e, 0x3e, 0x55, "v_sqrt_f16", True, True, InstrClass.ValuTranscendental32), 831 ( -1, -1, 0x3f, 0x3f, 0x56, "v_rsq_f16", True, True, InstrClass.ValuTranscendental32), 832 ( -1, -1, 0x40, 0x40, 0x57, "v_log_f16", True, True, InstrClass.ValuTranscendental32), 833 ( -1, -1, 0x41, 0x41, 0x58, "v_exp_f16", True, True, InstrClass.ValuTranscendental32), 834 ( -1, -1, 0x42, 0x42, 0x59, "v_frexp_mant_f16", True, False), 835 ( -1, -1, 0x43, 0x43, 0x5a, "v_frexp_exp_i16_f16", True, False), 836 ( -1, -1, 0x44, 0x44, 0x5b, "v_floor_f16", True, True), 837 ( -1, -1, 0x45, 0x45, 0x5c, "v_ceil_f16", True, True), 838 ( -1, -1, 0x46, 0x46, 0x5d, "v_trunc_f16", True, True), 839 ( -1, -1, 0x47, 0x47, 0x5e, "v_rndne_f16", True, True), 840 ( -1, -1, 0x48, 0x48, 0x5f, "v_fract_f16", True, True), 841 ( -1, -1, 0x49, 0x49, 0x60, "v_sin_f16", True, True, InstrClass.ValuTranscendental32), 842 ( -1, -1, 0x4a, 0x4a, 0x61, "v_cos_f16", True, True, InstrClass.ValuTranscendental32), 843 ( -1, 0x46, 0x4b, 0x4b, -1, "v_exp_legacy_f32", True, True, InstrClass.ValuTranscendental32), 844 ( -1, 0x45, 0x4c, 0x4c, -1, "v_log_legacy_f32", True, True, InstrClass.ValuTranscendental32), 845 ( -1, -1, -1, 0x4f, 0x62, "v_sat_pk_u8_i16", False, False), 846 ( -1, -1, -1, 0x4d, 0x63, "v_cvt_norm_i16_f16", True, False), 847 ( -1, -1, -1, 0x4e, 0x64, "v_cvt_norm_u16_f16", True, False), 848 ( -1, -1, -1, 0x51, 0x65, "v_swap_b32", False, False), 849 ( -1, -1, -1, -1, 0x68, "v_swaprel_b32", False, False), 850} 851for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod, cls) in default_class(VOP1, InstrClass.Valu32): 852 opcode(name, gfx7, gfx9, gfx10, Format.VOP1, cls, in_mod, out_mod) 853 854 855# VOPC instructions: 856 857VOPC_CLASS = { 858 (0x88, 0x88, 0x10, 0x10, 0x88, "v_cmp_class_f32"), 859 ( -1, -1, 0x14, 0x14, 0x8f, "v_cmp_class_f16"), 860 (0x98, 0x98, 0x11, 0x11, 0x98, "v_cmpx_class_f32"), 861 ( -1, -1, 0x15, 0x15, 0x9f, "v_cmpx_class_f16"), 862 (0xa8, 0xa8, 0x12, 0x12, 0xa8, "v_cmp_class_f64", InstrClass.ValuDouble), 863 (0xb8, 0xb8, 0x13, 0x13, 0xb8, "v_cmpx_class_f64", InstrClass.ValuDouble), 864} 865for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(VOPC_CLASS, InstrClass.Valu32): 866 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, cls, True, False) 867 868COMPF = ["f", "lt", "eq", "le", "gt", "lg", "ge", "o", "u", "nge", "nlg", "ngt", "nle", "neq", "nlt", "tru"] 869 870for i in range(8): 871 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x20+i, 0x20+i, 0xc8+i, "v_cmp_"+COMPF[i]+"_f16") 872 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False) 873 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x30+i, 0x30+i, 0xd8+i, "v_cmpx_"+COMPF[i]+"_f16") 874 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False) 875 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x28+i, 0x28+i, 0xe8+i, "v_cmp_"+COMPF[i+8]+"_f16") 876 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False) 877 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x38+i, 0x38+i, 0xf8+i, "v_cmpx_"+COMPF[i+8]+"_f16") 878 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False) 879 880for i in range(16): 881 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00+i, 0x00+i, 0x40+i, 0x40+i, 0x00+i, "v_cmp_"+COMPF[i]+"_f32") 882 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False) 883 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x10+i, 0x10+i, 0x50+i, 0x50+i, 0x10+i, "v_cmpx_"+COMPF[i]+"_f32") 884 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False) 885 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x20+i, 0x20+i, 0x60+i, 0x60+i, 0x20+i, "v_cmp_"+COMPF[i]+"_f64") 886 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.ValuDouble, True, False) 887 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x30+i, 0x30+i, 0x70+i, 0x70+i, 0x30+i, "v_cmpx_"+COMPF[i]+"_f64") 888 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.ValuDouble, True, False) 889 # GFX_6_7 890 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x40+i, 0x40+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f32") 891 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x50+i, 0x50+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f32") 892 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x60+i, 0x60+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f64") 893 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x70+i, 0x70+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f64") 894 895COMPI = ["f", "lt", "eq", "le", "gt", "lg", "ge", "tru"] 896 897# GFX_8_9 898for i in [0,7]: # only 0 and 7 899 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, -1, "v_cmp_"+COMPI[i]+"_i16") 900 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 901 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, -1, "v_cmpx_"+COMPI[i]+"_i16") 902 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 903 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, -1, "v_cmp_"+COMPI[i]+"_u16") 904 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 905 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, -1, "v_cmpx_"+COMPI[i]+"_u16") 906 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 907 908for i in range(1, 7): # [1..6] 909 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, 0x88+i, "v_cmp_"+COMPI[i]+"_i16") 910 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 911 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, 0x98+i, "v_cmpx_"+COMPI[i]+"_i16") 912 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 913 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, 0xa8+i, "v_cmp_"+COMPI[i]+"_u16") 914 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 915 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, 0xb8+i, "v_cmpx_"+COMPI[i]+"_u16") 916 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 917 918for i in range(8): 919 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x80+i, 0x80+i, 0xc0+i, 0xc0+i, 0x80+i, "v_cmp_"+COMPI[i]+"_i32") 920 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 921 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x90+i, 0x90+i, 0xd0+i, 0xd0+i, 0x90+i, "v_cmpx_"+COMPI[i]+"_i32") 922 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 923 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xa0+i, 0xa0+i, 0xe0+i, 0xe0+i, 0xa0+i, "v_cmp_"+COMPI[i]+"_i64") 924 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64) 925 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xb0+i, 0xb0+i, 0xf0+i, 0xf0+i, 0xb0+i, "v_cmpx_"+COMPI[i]+"_i64") 926 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64) 927 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xc0+i, 0xc0+i, 0xc8+i, 0xc8+i, 0xc0+i, "v_cmp_"+COMPI[i]+"_u32") 928 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 929 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xd0+i, 0xd0+i, 0xd8+i, 0xd8+i, 0xd0+i, "v_cmpx_"+COMPI[i]+"_u32") 930 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32) 931 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xe0+i, 0xe0+i, 0xe8+i, 0xe8+i, 0xe0+i, "v_cmp_"+COMPI[i]+"_u64") 932 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64) 933 (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xf0+i, 0xf0+i, 0xf8+i, 0xf8+i, 0xf0+i, "v_cmpx_"+COMPI[i]+"_u64") 934 opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64) 935 936 937# VOPP instructions: packed 16bit instructions - 1 or 2 inputs and 1 output 938VOPP = { 939 # opcode, name, input/output modifiers 940 (0x00, "v_pk_mad_i16", False), 941 (0x01, "v_pk_mul_lo_u16", False), 942 (0x02, "v_pk_add_i16", False), 943 (0x03, "v_pk_sub_i16", False), 944 (0x04, "v_pk_lshlrev_b16", False), 945 (0x05, "v_pk_lshrrev_b16", False), 946 (0x06, "v_pk_ashrrev_i16", False), 947 (0x07, "v_pk_max_i16", False), 948 (0x08, "v_pk_min_i16", False), 949 (0x09, "v_pk_mad_u16", False), 950 (0x0a, "v_pk_add_u16", False), 951 (0x0b, "v_pk_sub_u16", False), 952 (0x0c, "v_pk_max_u16", False), 953 (0x0d, "v_pk_min_u16", False), 954 (0x0e, "v_pk_fma_f16", True), 955 (0x0f, "v_pk_add_f16", True), 956 (0x10, "v_pk_mul_f16", True), 957 (0x11, "v_pk_min_f16", True), 958 (0x12, "v_pk_max_f16", True), 959 (0x20, "v_fma_mix_f32", True), # v_mad_mix_f32 in VEGA ISA, v_fma_mix_f32 in RDNA ISA 960 (0x21, "v_fma_mixlo_f16", True), # v_mad_mixlo_f16 in VEGA ISA, v_fma_mixlo_f16 in RDNA ISA 961 (0x22, "v_fma_mixhi_f16", True), # v_mad_mixhi_f16 in VEGA ISA, v_fma_mixhi_f16 in RDNA ISA 962} 963# note that these are only supported on gfx9+ so we'll need to distinguish between gfx8 and gfx9 here 964# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, -1, code, code, name) 965for (code, name, modifiers) in VOPP: 966 opcode(name, -1, code, code, Format.VOP3P, InstrClass.Valu32, modifiers, modifiers) 967opcode("v_dot2_i32_i16", -1, 0x26, 0x14, Format.VOP3P, InstrClass.Valu32) 968opcode("v_dot2_u32_u16", -1, 0x27, 0x15, Format.VOP3P, InstrClass.Valu32) 969opcode("v_dot4_i32_i8", -1, 0x28, 0x16, Format.VOP3P, InstrClass.Valu32) 970opcode("v_dot4_u32_u8", -1, 0x29, 0x17, Format.VOP3P, InstrClass.Valu32) 971 972 973# VINTERP instructions: 974VINTRP = { 975 (0x00, "v_interp_p1_f32"), 976 (0x01, "v_interp_p2_f32"), 977 (0x02, "v_interp_mov_f32"), 978} 979# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name) 980for (code, name) in VINTRP: 981 opcode(name, code, code, code, Format.VINTRP, InstrClass.Valu32) 982 983# VOP3 instructions: 3 inputs, 1 output 984# VOP3b instructions: have a unique scalar output, e.g. VOP2 with vcc out 985VOP3 = { 986 (0x140, 0x140, 0x1c0, 0x1c0, 0x140, "v_mad_legacy_f32", True, True), # GFX6-GFX10 987 (0x141, 0x141, 0x1c1, 0x1c1, 0x141, "v_mad_f32", True, True), 988 (0x142, 0x142, 0x1c2, 0x1c2, 0x142, "v_mad_i32_i24", False, False), 989 (0x143, 0x143, 0x1c3, 0x1c3, 0x143, "v_mad_u32_u24", False, False), 990 (0x144, 0x144, 0x1c4, 0x1c4, 0x144, "v_cubeid_f32", True, True), 991 (0x145, 0x145, 0x1c5, 0x1c5, 0x145, "v_cubesc_f32", True, True), 992 (0x146, 0x146, 0x1c6, 0x1c6, 0x146, "v_cubetc_f32", True, True), 993 (0x147, 0x147, 0x1c7, 0x1c7, 0x147, "v_cubema_f32", True, True), 994 (0x148, 0x148, 0x1c8, 0x1c8, 0x148, "v_bfe_u32", False, False), 995 (0x149, 0x149, 0x1c9, 0x1c9, 0x149, "v_bfe_i32", False, False), 996 (0x14a, 0x14a, 0x1ca, 0x1ca, 0x14a, "v_bfi_b32", False, False), 997 (0x14b, 0x14b, 0x1cb, 0x1cb, 0x14b, "v_fma_f32", True, True, InstrClass.ValuFma), 998 (0x14c, 0x14c, 0x1cc, 0x1cc, 0x14c, "v_fma_f64", True, True, InstrClass.ValuDouble), 999 (0x14d, 0x14d, 0x1cd, 0x1cd, 0x14d, "v_lerp_u8", False, False), 1000 (0x14e, 0x14e, 0x1ce, 0x1ce, 0x14e, "v_alignbit_b32", False, False), 1001 (0x14f, 0x14f, 0x1cf, 0x1cf, 0x14f, "v_alignbyte_b32", False, False), 1002 (0x150, 0x150, -1, -1, 0x150, "v_mullit_f32", True, True), 1003 (0x151, 0x151, 0x1d0, 0x1d0, 0x151, "v_min3_f32", True, True), 1004 (0x152, 0x152, 0x1d1, 0x1d1, 0x152, "v_min3_i32", False, False), 1005 (0x153, 0x153, 0x1d2, 0x1d2, 0x153, "v_min3_u32", False, False), 1006 (0x154, 0x154, 0x1d3, 0x1d3, 0x154, "v_max3_f32", True, True), 1007 (0x155, 0x155, 0x1d4, 0x1d4, 0x155, "v_max3_i32", False, False), 1008 (0x156, 0x156, 0x1d5, 0x1d5, 0x156, "v_max3_u32", False, False), 1009 (0x157, 0x157, 0x1d6, 0x1d6, 0x157, "v_med3_f32", True, True), 1010 (0x158, 0x158, 0x1d7, 0x1d7, 0x158, "v_med3_i32", False, False), 1011 (0x159, 0x159, 0x1d8, 0x1d8, 0x159, "v_med3_u32", False, False), 1012 (0x15a, 0x15a, 0x1d9, 0x1d9, 0x15a, "v_sad_u8", False, False), 1013 (0x15b, 0x15b, 0x1da, 0x1da, 0x15b, "v_sad_hi_u8", False, False), 1014 (0x15c, 0x15c, 0x1db, 0x1db, 0x15c, "v_sad_u16", False, False), 1015 (0x15d, 0x15d, 0x1dc, 0x1dc, 0x15d, "v_sad_u32", False, False), 1016 (0x15e, 0x15e, 0x1dd, 0x1dd, 0x15e, "v_cvt_pk_u8_f32", True, False), 1017 (0x15f, 0x15f, 0x1de, 0x1de, 0x15f, "v_div_fixup_f32", True, True), 1018 (0x160, 0x160, 0x1df, 0x1df, 0x160, "v_div_fixup_f64", True, True), 1019 (0x161, 0x161, -1, -1, -1, "v_lshl_b64", False, False, InstrClass.Valu64), 1020 (0x162, 0x162, -1, -1, -1, "v_lshr_b64", False, False, InstrClass.Valu64), 1021 (0x163, 0x163, -1, -1, -1, "v_ashr_i64", False, False, InstrClass.Valu64), 1022 (0x164, 0x164, 0x280, 0x280, 0x164, "v_add_f64", True, True, InstrClass.ValuDoubleAdd), 1023 (0x165, 0x165, 0x281, 0x281, 0x165, "v_mul_f64", True, True, InstrClass.ValuDouble), 1024 (0x166, 0x166, 0x282, 0x282, 0x166, "v_min_f64", True, True, InstrClass.ValuDouble), 1025 (0x167, 0x167, 0x283, 0x283, 0x167, "v_max_f64", True, True, InstrClass.ValuDouble), 1026 (0x168, 0x168, 0x284, 0x284, 0x168, "v_ldexp_f64", False, True, InstrClass.ValuDouble), # src1 can take input modifiers 1027 (0x169, 0x169, 0x285, 0x285, 0x169, "v_mul_lo_u32", False, False, InstrClass.ValuQuarterRate32), 1028 (0x16a, 0x16a, 0x286, 0x286, 0x16a, "v_mul_hi_u32", False, False, InstrClass.ValuQuarterRate32), 1029 (0x16b, 0x16b, 0x285, 0x285, 0x16b, "v_mul_lo_i32", False, False, InstrClass.ValuQuarterRate32), # identical to v_mul_lo_u32 1030 (0x16c, 0x16c, 0x287, 0x287, 0x16c, "v_mul_hi_i32", False, False, InstrClass.ValuQuarterRate32), 1031 (0x16d, 0x16d, 0x1e0, 0x1e0, 0x16d, "v_div_scale_f32", True, True), # writes to VCC 1032 (0x16e, 0x16e, 0x1e1, 0x1e1, 0x16e, "v_div_scale_f64", True, True, InstrClass.ValuDouble), # writes to VCC 1033 (0x16f, 0x16f, 0x1e2, 0x1e2, 0x16f, "v_div_fmas_f32", True, True), # takes VCC input 1034 (0x170, 0x170, 0x1e3, 0x1e3, 0x170, "v_div_fmas_f64", True, True, InstrClass.ValuDouble), # takes VCC input 1035 (0x171, 0x171, 0x1e4, 0x1e4, 0x171, "v_msad_u8", False, False), 1036 (0x172, 0x172, 0x1e5, 0x1e5, 0x172, "v_qsad_pk_u16_u8", False, False), 1037 (0x172, -1, -1, -1, -1, "v_qsad_u8", False, False), # what's the difference? 1038 (0x173, 0x173, 0x1e6, 0x1e6, 0x173, "v_mqsad_pk_u16_u8", False, False), 1039 (0x173, -1, -1, -1, -1, "v_mqsad_u8", False, False), # what's the difference? 1040 (0x174, 0x174, 0x292, 0x292, 0x174, "v_trig_preop_f64", False, False, InstrClass.ValuDouble), 1041 ( -1, 0x175, 0x1e7, 0x1e7, 0x175, "v_mqsad_u32_u8", False, False), 1042 ( -1, 0x176, 0x1e8, 0x1e8, 0x176, "v_mad_u64_u32", False, False, InstrClass.Valu64), 1043 ( -1, 0x177, 0x1e9, 0x1e9, 0x177, "v_mad_i64_i32", False, False, InstrClass.Valu64), 1044 ( -1, -1, 0x1ea, 0x1ea, -1, "v_mad_legacy_f16", True, True), 1045 ( -1, -1, 0x1eb, 0x1eb, -1, "v_mad_legacy_u16", False, False), 1046 ( -1, -1, 0x1ec, 0x1ec, -1, "v_mad_legacy_i16", False, False), 1047 ( -1, -1, 0x1ed, 0x1ed, 0x344, "v_perm_b32", False, False), 1048 ( -1, -1, 0x1ee, 0x1ee, -1, "v_fma_legacy_f16", True, True, InstrClass.ValuFma), 1049 ( -1, -1, 0x1ef, 0x1ef, -1, "v_div_fixup_legacy_f16", True, True), 1050 (0x12c, 0x12c, 0x1f0, 0x1f0, -1, "v_cvt_pkaccum_u8_f32", True, False), 1051 ( -1, -1, -1, 0x1f1, 0x373, "v_mad_u32_u16", False, False), 1052 ( -1, -1, -1, 0x1f2, 0x375, "v_mad_i32_i16", False, False), 1053 ( -1, -1, -1, 0x1f3, 0x345, "v_xad_u32", False, False), 1054 ( -1, -1, -1, 0x1f4, 0x351, "v_min3_f16", True, True), 1055 ( -1, -1, -1, 0x1f5, 0x352, "v_min3_i16", False, False), 1056 ( -1, -1, -1, 0x1f6, 0x353, "v_min3_u16", False, False), 1057 ( -1, -1, -1, 0x1f7, 0x354, "v_max3_f16", True, True), 1058 ( -1, -1, -1, 0x1f8, 0x355, "v_max3_i16", False, False), 1059 ( -1, -1, -1, 0x1f9, 0x356, "v_max3_u16", False, False), 1060 ( -1, -1, -1, 0x1fa, 0x357, "v_med3_f16", True, True), 1061 ( -1, -1, -1, 0x1fb, 0x358, "v_med3_i16", False, False), 1062 ( -1, -1, -1, 0x1fc, 0x359, "v_med3_u16", False, False), 1063 ( -1, -1, -1, 0x1fd, 0x346, "v_lshl_add_u32", False, False), 1064 ( -1, -1, -1, 0x1fe, 0x347, "v_add_lshl_u32", False, False), 1065 ( -1, -1, -1, 0x1ff, 0x36d, "v_add3_u32", False, False), 1066 ( -1, -1, -1, 0x200, 0x36f, "v_lshl_or_b32", False, False), 1067 ( -1, -1, -1, 0x201, 0x371, "v_and_or_b32", False, False), 1068 ( -1, -1, -1, 0x202, 0x372, "v_or3_b32", False, False), 1069 ( -1, -1, -1, 0x203, -1, "v_mad_f16", True, True), 1070 ( -1, -1, -1, 0x204, 0x340, "v_mad_u16", False, False), 1071 ( -1, -1, -1, 0x205, 0x35e, "v_mad_i16", False, False), 1072 ( -1, -1, -1, 0x206, 0x34b, "v_fma_f16", True, True), 1073 ( -1, -1, -1, 0x207, 0x35f, "v_div_fixup_f16", True, True), 1074 ( -1, -1, 0x274, 0x274, 0x342, "v_interp_p1ll_f16", True, True), 1075 ( -1, -1, 0x275, 0x275, 0x343, "v_interp_p1lv_f16", True, True), 1076 ( -1, -1, 0x276, 0x276, -1, "v_interp_p2_legacy_f16", True, True), 1077 ( -1, -1, -1, 0x277, 0x35a, "v_interp_p2_f16", True, True), 1078 (0x12b, 0x12b, 0x288, 0x288, 0x362, "v_ldexp_f32", False, True), 1079 ( -1, -1, 0x289, 0x289, 0x360, "v_readlane_b32_e64", False, False), 1080 ( -1, -1, 0x28a, 0x28a, 0x361, "v_writelane_b32_e64", False, False), 1081 (0x122, 0x122, 0x28b, 0x28b, 0x364, "v_bcnt_u32_b32", False, False), 1082 (0x123, 0x123, 0x28c, 0x28c, 0x365, "v_mbcnt_lo_u32_b32", False, False), 1083 ( -1, -1, 0x28d, 0x28d, 0x366, "v_mbcnt_hi_u32_b32_e64", False, False), 1084 ( -1, -1, 0x28f, 0x28f, 0x2ff, "v_lshlrev_b64", False, False, InstrClass.Valu64), 1085 ( -1, -1, 0x290, 0x290, 0x300, "v_lshrrev_b64", False, False, InstrClass.Valu64), 1086 ( -1, -1, 0x291, 0x291, 0x301, "v_ashrrev_i64", False, False, InstrClass.Valu64), 1087 (0x11e, 0x11e, 0x293, 0x293, 0x363, "v_bfm_b32", False, False), 1088 (0x12d, 0x12d, 0x294, 0x294, 0x368, "v_cvt_pknorm_i16_f32", True, False), 1089 (0x12e, 0x12e, 0x295, 0x295, 0x369, "v_cvt_pknorm_u16_f32", True, False), 1090 (0x12f, 0x12f, 0x296, 0x296, 0x12f, "v_cvt_pkrtz_f16_f32_e64", True, False), # GFX6_7_10 is VOP2 with opcode 0x02f 1091 (0x130, 0x130, 0x297, 0x297, 0x36a, "v_cvt_pk_u16_u32", False, False), 1092 (0x131, 0x131, 0x298, 0x298, 0x36b, "v_cvt_pk_i16_i32", False, False), 1093 ( -1, -1, -1, 0x299, 0x312, "v_cvt_pknorm_i16_f16", True, False), 1094 ( -1, -1, -1, 0x29a, 0x313, "v_cvt_pknorm_u16_f16", True, False), 1095 ( -1, -1, -1, 0x29c, 0x37f, "v_add_i32", False, False), 1096 ( -1, -1, -1, 0x29d, 0x376, "v_sub_i32", False, False), 1097 ( -1, -1, -1, 0x29e, 0x30d, "v_add_i16", False, False), 1098 ( -1, -1, -1, 0x29f, 0x30e, "v_sub_i16", False, False), 1099 ( -1, -1, -1, 0x2a0, 0x311, "v_pack_b32_f16", True, False), 1100 ( -1, -1, -1, -1, 0x178, "v_xor3_b32", False, False), 1101 ( -1, -1, -1, -1, 0x377, "v_permlane16_b32", False, False), 1102 ( -1, -1, -1, -1, 0x378, "v_permlanex16_b32", False, False), 1103 ( -1, -1, -1, -1, 0x30f, "v_add_co_u32_e64", False, False), 1104 ( -1, -1, -1, -1, 0x310, "v_sub_co_u32_e64", False, False), 1105 ( -1, -1, -1, -1, 0x319, "v_subrev_co_u32_e64", False, False), 1106 ( -1, -1, -1, -1, 0x303, "v_add_u16_e64", False, False), 1107 ( -1, -1, -1, -1, 0x304, "v_sub_u16_e64", False, False), 1108 ( -1, -1, -1, -1, 0x305, "v_mul_lo_u16_e64", False, False), 1109 ( -1, -1, -1, -1, 0x309, "v_max_u16_e64", False, False), 1110 ( -1, -1, -1, -1, 0x30a, "v_max_i16_e64", False, False), 1111 ( -1, -1, -1, -1, 0x30b, "v_min_u16_e64", False, False), 1112 ( -1, -1, -1, -1, 0x30c, "v_min_i16_e64", False, False), 1113 ( -1, -1, -1, -1, 0x307, "v_lshrrev_b16_e64", False, False), 1114 ( -1, -1, -1, -1, 0x308, "v_ashrrev_i16_e64", False, False), 1115 ( -1, -1, -1, -1, 0x314, "v_lshlrev_b16_e64", False, False), 1116 ( -1, -1, -1, -1, 0x140, "v_fma_legacy_f32", True, True, InstrClass.ValuFma), #GFX10.3+ 1117} 1118for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod, cls) in default_class(VOP3, InstrClass.Valu32): 1119 opcode(name, gfx7, gfx9, gfx10, Format.VOP3, cls, in_mod, out_mod) 1120 1121 1122# DS instructions: 3 inputs (1 addr, 2 data), 1 output 1123DS = { 1124 (0x00, 0x00, 0x00, 0x00, 0x00, "ds_add_u32"), 1125 (0x01, 0x01, 0x01, 0x01, 0x01, "ds_sub_u32"), 1126 (0x02, 0x02, 0x02, 0x02, 0x02, "ds_rsub_u32"), 1127 (0x03, 0x03, 0x03, 0x03, 0x03, "ds_inc_u32"), 1128 (0x04, 0x04, 0x04, 0x04, 0x04, "ds_dec_u32"), 1129 (0x05, 0x05, 0x05, 0x05, 0x05, "ds_min_i32"), 1130 (0x06, 0x06, 0x06, 0x06, 0x06, "ds_max_i32"), 1131 (0x07, 0x07, 0x07, 0x07, 0x07, "ds_min_u32"), 1132 (0x08, 0x08, 0x08, 0x08, 0x08, "ds_max_u32"), 1133 (0x09, 0x09, 0x09, 0x09, 0x09, "ds_and_b32"), 1134 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "ds_or_b32"), 1135 (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "ds_xor_b32"), 1136 (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "ds_mskor_b32"), 1137 (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "ds_write_b32"), 1138 (0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "ds_write2_b32"), 1139 (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "ds_write2st64_b32"), 1140 (0x10, 0x10, 0x10, 0x10, 0x10, "ds_cmpst_b32"), 1141 (0x11, 0x11, 0x11, 0x11, 0x11, "ds_cmpst_f32"), 1142 (0x12, 0x12, 0x12, 0x12, 0x12, "ds_min_f32"), 1143 (0x13, 0x13, 0x13, 0x13, 0x13, "ds_max_f32"), 1144 ( -1, 0x14, 0x14, 0x14, 0x14, "ds_nop"), 1145 ( -1, -1, 0x15, 0x15, 0x15, "ds_add_f32"), 1146 ( -1, -1, 0x1d, 0x1d, 0xb0, "ds_write_addtid_b32"), 1147 (0x1e, 0x1e, 0x1e, 0x1e, 0x1e, "ds_write_b8"), 1148 (0x1f, 0x1f, 0x1f, 0x1f, 0x1f, "ds_write_b16"), 1149 (0x20, 0x20, 0x20, 0x20, 0x20, "ds_add_rtn_u32"), 1150 (0x21, 0x21, 0x21, 0x21, 0x21, "ds_sub_rtn_u32"), 1151 (0x22, 0x22, 0x22, 0x22, 0x22, "ds_rsub_rtn_u32"), 1152 (0x23, 0x23, 0x23, 0x23, 0x23, "ds_inc_rtn_u32"), 1153 (0x24, 0x24, 0x24, 0x24, 0x24, "ds_dec_rtn_u32"), 1154 (0x25, 0x25, 0x25, 0x25, 0x25, "ds_min_rtn_i32"), 1155 (0x26, 0x26, 0x26, 0x26, 0x26, "ds_max_rtn_i32"), 1156 (0x27, 0x27, 0x27, 0x27, 0x27, "ds_min_rtn_u32"), 1157 (0x28, 0x28, 0x28, 0x28, 0x28, "ds_max_rtn_u32"), 1158 (0x29, 0x29, 0x29, 0x29, 0x29, "ds_and_rtn_b32"), 1159 (0x2a, 0x2a, 0x2a, 0x2a, 0x2a, "ds_or_rtn_b32"), 1160 (0x2b, 0x2b, 0x2b, 0x2b, 0x2b, "ds_xor_rtn_b32"), 1161 (0x2c, 0x2c, 0x2c, 0x2c, 0x2c, "ds_mskor_rtn_b32"), 1162 (0x2d, 0x2d, 0x2d, 0x2d, 0x2d, "ds_wrxchg_rtn_b32"), 1163 (0x2e, 0x2e, 0x2e, 0x2e, 0x2e, "ds_wrxchg2_rtn_b32"), 1164 (0x2f, 0x2f, 0x2f, 0x2f, 0x2f, "ds_wrxchg2st64_rtn_b32"), 1165 (0x30, 0x30, 0x30, 0x30, 0x30, "ds_cmpst_rtn_b32"), 1166 (0x31, 0x31, 0x31, 0x31, 0x31, "ds_cmpst_rtn_f32"), 1167 (0x32, 0x32, 0x32, 0x32, 0x32, "ds_min_rtn_f32"), 1168 (0x33, 0x33, 0x33, 0x33, 0x33, "ds_max_rtn_f32"), 1169 ( -1, 0x34, 0x34, 0x34, 0x34, "ds_wrap_rtn_b32"), 1170 ( -1, -1, 0x35, 0x35, 0x55, "ds_add_rtn_f32"), 1171 (0x36, 0x36, 0x36, 0x36, 0x36, "ds_read_b32"), 1172 (0x37, 0x37, 0x37, 0x37, 0x37, "ds_read2_b32"), 1173 (0x38, 0x38, 0x38, 0x38, 0x38, "ds_read2st64_b32"), 1174 (0x39, 0x39, 0x39, 0x39, 0x39, "ds_read_i8"), 1175 (0x3a, 0x3a, 0x3a, 0x3a, 0x3a, "ds_read_u8"), 1176 (0x3b, 0x3b, 0x3b, 0x3b, 0x3b, "ds_read_i16"), 1177 (0x3c, 0x3c, 0x3c, 0x3c, 0x3c, "ds_read_u16"), 1178 (0x35, 0x35, 0x3d, 0x3d, 0x35, "ds_swizzle_b32"), #data1 & offset, no addr/data2 1179 ( -1, -1, 0x3e, 0x3e, 0xb2, "ds_permute_b32"), 1180 ( -1, -1, 0x3f, 0x3f, 0xb3, "ds_bpermute_b32"), 1181 (0x40, 0x40, 0x40, 0x40, 0x40, "ds_add_u64"), 1182 (0x41, 0x41, 0x41, 0x41, 0x41, "ds_sub_u64"), 1183 (0x42, 0x42, 0x42, 0x42, 0x42, "ds_rsub_u64"), 1184 (0x43, 0x43, 0x43, 0x43, 0x43, "ds_inc_u64"), 1185 (0x44, 0x44, 0x44, 0x44, 0x44, "ds_dec_u64"), 1186 (0x45, 0x45, 0x45, 0x45, 0x45, "ds_min_i64"), 1187 (0x46, 0x46, 0x46, 0x46, 0x46, "ds_max_i64"), 1188 (0x47, 0x47, 0x47, 0x47, 0x47, "ds_min_u64"), 1189 (0x48, 0x48, 0x48, 0x48, 0x48, "ds_max_u64"), 1190 (0x49, 0x49, 0x49, 0x49, 0x49, "ds_and_b64"), 1191 (0x4a, 0x4a, 0x4a, 0x4a, 0x4a, "ds_or_b64"), 1192 (0x4b, 0x4b, 0x4b, 0x4b, 0x4b, "ds_xor_b64"), 1193 (0x4c, 0x4c, 0x4c, 0x4c, 0x4c, "ds_mskor_b64"), 1194 (0x4d, 0x4d, 0x4d, 0x4d, 0x4d, "ds_write_b64"), 1195 (0x4e, 0x4e, 0x4e, 0x4e, 0x4e, "ds_write2_b64"), 1196 (0x4f, 0x4f, 0x4f, 0x4f, 0x4f, "ds_write2st64_b64"), 1197 (0x50, 0x50, 0x50, 0x50, 0x50, "ds_cmpst_b64"), 1198 (0x51, 0x51, 0x51, 0x51, 0x51, "ds_cmpst_f64"), 1199 (0x52, 0x52, 0x52, 0x52, 0x52, "ds_min_f64"), 1200 (0x53, 0x53, 0x53, 0x53, 0x53, "ds_max_f64"), 1201 ( -1, -1, -1, 0x54, 0xa0, "ds_write_b8_d16_hi"), 1202 ( -1, -1, -1, 0x55, 0xa1, "ds_write_b16_d16_hi"), 1203 ( -1, -1, -1, 0x56, 0xa2, "ds_read_u8_d16"), 1204 ( -1, -1, -1, 0x57, 0xa3, "ds_read_u8_d16_hi"), 1205 ( -1, -1, -1, 0x58, 0xa4, "ds_read_i8_d16"), 1206 ( -1, -1, -1, 0x59, 0xa5, "ds_read_i8_d16_hi"), 1207 ( -1, -1, -1, 0x5a, 0xa6, "ds_read_u16_d16"), 1208 ( -1, -1, -1, 0x5b, 0xa7, "ds_read_u16_d16_hi"), 1209 (0x60, 0x60, 0x60, 0x60, 0x60, "ds_add_rtn_u64"), 1210 (0x61, 0x61, 0x61, 0x61, 0x61, "ds_sub_rtn_u64"), 1211 (0x62, 0x62, 0x62, 0x62, 0x62, "ds_rsub_rtn_u64"), 1212 (0x63, 0x63, 0x63, 0x63, 0x63, "ds_inc_rtn_u64"), 1213 (0x64, 0x64, 0x64, 0x64, 0x64, "ds_dec_rtn_u64"), 1214 (0x65, 0x65, 0x65, 0x65, 0x65, "ds_min_rtn_i64"), 1215 (0x66, 0x66, 0x66, 0x66, 0x66, "ds_max_rtn_i64"), 1216 (0x67, 0x67, 0x67, 0x67, 0x67, "ds_min_rtn_u64"), 1217 (0x68, 0x68, 0x68, 0x68, 0x68, "ds_max_rtn_u64"), 1218 (0x69, 0x69, 0x69, 0x69, 0x69, "ds_and_rtn_b64"), 1219 (0x6a, 0x6a, 0x6a, 0x6a, 0x6a, "ds_or_rtn_b64"), 1220 (0x6b, 0x6b, 0x6b, 0x6b, 0x6b, "ds_xor_rtn_b64"), 1221 (0x6c, 0x6c, 0x6c, 0x6c, 0x6c, "ds_mskor_rtn_b64"), 1222 (0x6d, 0x6d, 0x6d, 0x6d, 0x6d, "ds_wrxchg_rtn_b64"), 1223 (0x6e, 0x6e, 0x6e, 0x6e, 0x6e, "ds_wrxchg2_rtn_b64"), 1224 (0x6f, 0x6f, 0x6f, 0x6f, 0x6f, "ds_wrxchg2st64_rtn_b64"), 1225 (0x70, 0x70, 0x70, 0x70, 0x70, "ds_cmpst_rtn_b64"), 1226 (0x71, 0x71, 0x71, 0x71, 0x71, "ds_cmpst_rtn_f64"), 1227 (0x72, 0x72, 0x72, 0x72, 0x72, "ds_min_rtn_f64"), 1228 (0x73, 0x73, 0x73, 0x73, 0x73, "ds_max_rtn_f64"), 1229 (0x76, 0x76, 0x76, 0x76, 0x76, "ds_read_b64"), 1230 (0x77, 0x77, 0x77, 0x77, 0x77, "ds_read2_b64"), 1231 (0x78, 0x78, 0x78, 0x78, 0x78, "ds_read2st64_b64"), 1232 ( -1, 0x7e, 0x7e, 0x7e, 0x7e, "ds_condxchg32_rtn_b64"), 1233 (0x80, 0x80, 0x80, 0x80, 0x80, "ds_add_src2_u32"), 1234 (0x81, 0x81, 0x81, 0x81, 0x81, "ds_sub_src2_u32"), 1235 (0x82, 0x82, 0x82, 0x82, 0x82, "ds_rsub_src2_u32"), 1236 (0x83, 0x83, 0x83, 0x83, 0x83, "ds_inc_src2_u32"), 1237 (0x84, 0x84, 0x84, 0x84, 0x84, "ds_dec_src2_u32"), 1238 (0x85, 0x85, 0x85, 0x85, 0x85, "ds_min_src2_i32"), 1239 (0x86, 0x86, 0x86, 0x86, 0x86, "ds_max_src2_i32"), 1240 (0x87, 0x87, 0x87, 0x87, 0x87, "ds_min_src2_u32"), 1241 (0x88, 0x88, 0x88, 0x88, 0x88, "ds_max_src2_u32"), 1242 (0x89, 0x89, 0x89, 0x89, 0x89, "ds_and_src2_b32"), 1243 (0x8a, 0x8a, 0x8a, 0x8a, 0x8a, "ds_or_src2_b32"), 1244 (0x8b, 0x8b, 0x8b, 0x8b, 0x8b, "ds_xor_src2_b32"), 1245 (0x8d, 0x8d, 0x8d, 0x8d, 0x8d, "ds_write_src2_b32"), 1246 (0x92, 0x92, 0x92, 0x92, 0x92, "ds_min_src2_f32"), 1247 (0x93, 0x93, 0x93, 0x93, 0x93, "ds_max_src2_f32"), 1248 ( -1, -1, 0x95, 0x95, 0x95, "ds_add_src2_f32"), 1249 ( -1, 0x18, 0x98, 0x98, 0x18, "ds_gws_sema_release_all"), 1250 (0x19, 0x19, 0x99, 0x99, 0x19, "ds_gws_init"), 1251 (0x1a, 0x1a, 0x9a, 0x9a, 0x1a, "ds_gws_sema_v"), 1252 (0x1b, 0x1b, 0x9b, 0x9b, 0x1b, "ds_gws_sema_br"), 1253 (0x1c, 0x1c, 0x9c, 0x9c, 0x1c, "ds_gws_sema_p"), 1254 (0x1d, 0x1d, 0x9d, 0x9d, 0x1d, "ds_gws_barrier"), 1255 ( -1, -1, 0xb6, 0xb6, 0xb1, "ds_read_addtid_b32"), 1256 (0x3d, 0x3d, 0xbd, 0xbd, 0x3d, "ds_consume"), 1257 (0x3e, 0x3e, 0xbe, 0xbe, 0x3e, "ds_append"), 1258 (0x3f, 0x3f, 0xbf, 0xbf, 0x3f, "ds_ordered_count"), 1259 (0xc0, 0xc0, 0xc0, 0xc0, 0xc0, "ds_add_src2_u64"), 1260 (0xc1, 0xc1, 0xc1, 0xc1, 0xc1, "ds_sub_src2_u64"), 1261 (0xc2, 0xc2, 0xc2, 0xc2, 0xc2, "ds_rsub_src2_u64"), 1262 (0xc3, 0xc3, 0xc3, 0xc3, 0xc3, "ds_inc_src2_u64"), 1263 (0xc4, 0xc4, 0xc4, 0xc4, 0xc4, "ds_dec_src2_u64"), 1264 (0xc5, 0xc5, 0xc5, 0xc5, 0xc5, "ds_min_src2_i64"), 1265 (0xc6, 0xc6, 0xc6, 0xc6, 0xc6, "ds_max_src2_i64"), 1266 (0xc7, 0xc7, 0xc7, 0xc7, 0xc7, "ds_min_src2_u64"), 1267 (0xc8, 0xc8, 0xc8, 0xc8, 0xc8, "ds_max_src2_u64"), 1268 (0xc9, 0xc9, 0xc9, 0xc9, 0xc9, "ds_and_src2_b64"), 1269 (0xca, 0xca, 0xca, 0xca, 0xca, "ds_or_src2_b64"), 1270 (0xcb, 0xcb, 0xcb, 0xcb, 0xcb, "ds_xor_src2_b64"), 1271 (0xcd, 0xcd, 0xcd, 0xcd, 0xcd, "ds_write_src2_b64"), 1272 (0xd2, 0xd2, 0xd2, 0xd2, 0xd2, "ds_min_src2_f64"), 1273 (0xd3, 0xd3, 0xd3, 0xd3, 0xd3, "ds_max_src2_f64"), 1274 ( -1, 0xde, 0xde, 0xde, 0xde, "ds_write_b96"), 1275 ( -1, 0xdf, 0xdf, 0xdf, 0xdf, "ds_write_b128"), 1276 ( -1, 0xfd, 0xfd, -1, -1, "ds_condxchg32_rtn_b128"), 1277 ( -1, 0xfe, 0xfe, 0xfe, 0xfe, "ds_read_b96"), 1278 ( -1, 0xff, 0xff, 0xff, 0xff, "ds_read_b128"), 1279} 1280for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in DS: 1281 opcode(name, gfx7, gfx9, gfx10, Format.DS, InstrClass.DS) 1282 1283# MUBUF instructions: 1284MUBUF = { 1285 (0x00, 0x00, 0x00, 0x00, 0x00, "buffer_load_format_x"), 1286 (0x01, 0x01, 0x01, 0x01, 0x01, "buffer_load_format_xy"), 1287 (0x02, 0x02, 0x02, 0x02, 0x02, "buffer_load_format_xyz"), 1288 (0x03, 0x03, 0x03, 0x03, 0x03, "buffer_load_format_xyzw"), 1289 (0x04, 0x04, 0x04, 0x04, 0x04, "buffer_store_format_x"), 1290 (0x05, 0x05, 0x05, 0x05, 0x05, "buffer_store_format_xy"), 1291 (0x06, 0x06, 0x06, 0x06, 0x06, "buffer_store_format_xyz"), 1292 (0x07, 0x07, 0x07, 0x07, 0x07, "buffer_store_format_xyzw"), 1293 ( -1, -1, 0x08, 0x08, 0x80, "buffer_load_format_d16_x"), 1294 ( -1, -1, 0x09, 0x09, 0x81, "buffer_load_format_d16_xy"), 1295 ( -1, -1, 0x0a, 0x0a, 0x82, "buffer_load_format_d16_xyz"), 1296 ( -1, -1, 0x0b, 0x0b, 0x83, "buffer_load_format_d16_xyzw"), 1297 ( -1, -1, 0x0c, 0x0c, 0x84, "buffer_store_format_d16_x"), 1298 ( -1, -1, 0x0d, 0x0d, 0x85, "buffer_store_format_d16_xy"), 1299 ( -1, -1, 0x0e, 0x0e, 0x86, "buffer_store_format_d16_xyz"), 1300 ( -1, -1, 0x0f, 0x0f, 0x87, "buffer_store_format_d16_xyzw"), 1301 (0x08, 0x08, 0x10, 0x10, 0x08, "buffer_load_ubyte"), 1302 (0x09, 0x09, 0x11, 0x11, 0x09, "buffer_load_sbyte"), 1303 (0x0a, 0x0a, 0x12, 0x12, 0x0a, "buffer_load_ushort"), 1304 (0x0b, 0x0b, 0x13, 0x13, 0x0b, "buffer_load_sshort"), 1305 (0x0c, 0x0c, 0x14, 0x14, 0x0c, "buffer_load_dword"), 1306 (0x0d, 0x0d, 0x15, 0x15, 0x0d, "buffer_load_dwordx2"), 1307 ( -1, 0x0f, 0x16, 0x16, 0x0f, "buffer_load_dwordx3"), 1308 (0x0f, 0x0e, 0x17, 0x17, 0x0e, "buffer_load_dwordx4"), 1309 (0x18, 0x18, 0x18, 0x18, 0x18, "buffer_store_byte"), 1310 ( -1, -1, -1, 0x19, 0x19, "buffer_store_byte_d16_hi"), 1311 (0x1a, 0x1a, 0x1a, 0x1a, 0x1a, "buffer_store_short"), 1312 ( -1, -1, -1, 0x1b, 0x1b, "buffer_store_short_d16_hi"), 1313 (0x1c, 0x1c, 0x1c, 0x1c, 0x1c, "buffer_store_dword"), 1314 (0x1d, 0x1d, 0x1d, 0x1d, 0x1d, "buffer_store_dwordx2"), 1315 ( -1, 0x1f, 0x1e, 0x1e, 0x1f, "buffer_store_dwordx3"), 1316 (0x1e, 0x1e, 0x1f, 0x1f, 0x1e, "buffer_store_dwordx4"), 1317 ( -1, -1, -1, 0x20, 0x20, "buffer_load_ubyte_d16"), 1318 ( -1, -1, -1, 0x21, 0x21, "buffer_load_ubyte_d16_hi"), 1319 ( -1, -1, -1, 0x22, 0x22, "buffer_load_sbyte_d16"), 1320 ( -1, -1, -1, 0x23, 0x23, "buffer_load_sbyte_d16_hi"), 1321 ( -1, -1, -1, 0x24, 0x24, "buffer_load_short_d16"), 1322 ( -1, -1, -1, 0x25, 0x25, "buffer_load_short_d16_hi"), 1323 ( -1, -1, -1, 0x26, 0x26, "buffer_load_format_d16_hi_x"), 1324 ( -1, -1, -1, 0x27, 0x27, "buffer_store_format_d16_hi_x"), 1325 ( -1, -1, 0x3d, 0x3d, -1, "buffer_store_lds_dword"), 1326 (0x71, 0x71, 0x3e, 0x3e, -1, "buffer_wbinvl1"), 1327 (0x70, 0x70, 0x3f, 0x3f, -1, "buffer_wbinvl1_vol"), 1328 (0x30, 0x30, 0x40, 0x40, 0x30, "buffer_atomic_swap"), 1329 (0x31, 0x31, 0x41, 0x41, 0x31, "buffer_atomic_cmpswap"), 1330 (0x32, 0x32, 0x42, 0x42, 0x32, "buffer_atomic_add"), 1331 (0x33, 0x33, 0x43, 0x43, 0x33, "buffer_atomic_sub"), 1332 (0x34, -1, -1, -1, -1, "buffer_atomic_rsub"), 1333 (0x35, 0x35, 0x44, 0x44, 0x35, "buffer_atomic_smin"), 1334 (0x36, 0x36, 0x45, 0x45, 0x36, "buffer_atomic_umin"), 1335 (0x37, 0x37, 0x46, 0x46, 0x37, "buffer_atomic_smax"), 1336 (0x38, 0x38, 0x47, 0x47, 0x38, "buffer_atomic_umax"), 1337 (0x39, 0x39, 0x48, 0x48, 0x39, "buffer_atomic_and"), 1338 (0x3a, 0x3a, 0x49, 0x49, 0x3a, "buffer_atomic_or"), 1339 (0x3b, 0x3b, 0x4a, 0x4a, 0x3b, "buffer_atomic_xor"), 1340 (0x3c, 0x3c, 0x4b, 0x4b, 0x3c, "buffer_atomic_inc"), 1341 (0x3d, 0x3d, 0x4c, 0x4c, 0x3d, "buffer_atomic_dec"), 1342 (0x3e, 0x3e, -1, -1, 0x3e, "buffer_atomic_fcmpswap"), 1343 (0x3f, 0x3f, -1, -1, 0x3f, "buffer_atomic_fmin"), 1344 (0x40, 0x40, -1, -1, 0x40, "buffer_atomic_fmax"), 1345 (0x50, 0x50, 0x60, 0x60, 0x50, "buffer_atomic_swap_x2"), 1346 (0x51, 0x51, 0x61, 0x61, 0x51, "buffer_atomic_cmpswap_x2"), 1347 (0x52, 0x52, 0x62, 0x62, 0x52, "buffer_atomic_add_x2"), 1348 (0x53, 0x53, 0x63, 0x63, 0x53, "buffer_atomic_sub_x2"), 1349 (0x54, -1, -1, -1, -1, "buffer_atomic_rsub_x2"), 1350 (0x55, 0x55, 0x64, 0x64, 0x55, "buffer_atomic_smin_x2"), 1351 (0x56, 0x56, 0x65, 0x65, 0x56, "buffer_atomic_umin_x2"), 1352 (0x57, 0x57, 0x66, 0x66, 0x57, "buffer_atomic_smax_x2"), 1353 (0x58, 0x58, 0x67, 0x67, 0x58, "buffer_atomic_umax_x2"), 1354 (0x59, 0x59, 0x68, 0x68, 0x59, "buffer_atomic_and_x2"), 1355 (0x5a, 0x5a, 0x69, 0x69, 0x5a, "buffer_atomic_or_x2"), 1356 (0x5b, 0x5b, 0x6a, 0x6a, 0x5b, "buffer_atomic_xor_x2"), 1357 (0x5c, 0x5c, 0x6b, 0x6b, 0x5c, "buffer_atomic_inc_x2"), 1358 (0x5d, 0x5d, 0x6c, 0x6c, 0x5d, "buffer_atomic_dec_x2"), 1359 (0x5e, 0x5e, -1, -1, 0x5e, "buffer_atomic_fcmpswap_x2"), 1360 (0x5f, 0x5f, -1, -1, 0x5f, "buffer_atomic_fmin_x2"), 1361 (0x60, 0x60, -1, -1, 0x60, "buffer_atomic_fmax_x2"), 1362 ( -1, -1, -1, -1, 0x71, "buffer_gl0_inv"), 1363 ( -1, -1, -1, -1, 0x72, "buffer_gl1_inv"), 1364 ( -1, -1, -1, -1, 0x34, "buffer_atomic_csub"), #GFX10.3+. seems glc must be set 1365} 1366for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MUBUF: 1367 opcode(name, gfx7, gfx9, gfx10, Format.MUBUF, InstrClass.VMem, is_atomic = "atomic" in name) 1368 1369MTBUF = { 1370 (0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"), 1371 (0x01, 0x01, 0x01, 0x01, 0x01, "tbuffer_load_format_xy"), 1372 (0x02, 0x02, 0x02, 0x02, 0x02, "tbuffer_load_format_xyz"), 1373 (0x03, 0x03, 0x03, 0x03, 0x03, "tbuffer_load_format_xyzw"), 1374 (0x04, 0x04, 0x04, 0x04, 0x04, "tbuffer_store_format_x"), 1375 (0x05, 0x05, 0x05, 0x05, 0x05, "tbuffer_store_format_xy"), 1376 (0x06, 0x06, 0x06, 0x06, 0x06, "tbuffer_store_format_xyz"), 1377 (0x07, 0x07, 0x07, 0x07, 0x07, "tbuffer_store_format_xyzw"), 1378 ( -1, -1, 0x08, 0x08, 0x08, "tbuffer_load_format_d16_x"), 1379 ( -1, -1, 0x09, 0x09, 0x09, "tbuffer_load_format_d16_xy"), 1380 ( -1, -1, 0x0a, 0x0a, 0x0a, "tbuffer_load_format_d16_xyz"), 1381 ( -1, -1, 0x0b, 0x0b, 0x0b, "tbuffer_load_format_d16_xyzw"), 1382 ( -1, -1, 0x0c, 0x0c, 0x0c, "tbuffer_store_format_d16_x"), 1383 ( -1, -1, 0x0d, 0x0d, 0x0d, "tbuffer_store_format_d16_xy"), 1384 ( -1, -1, 0x0e, 0x0e, 0x0e, "tbuffer_store_format_d16_xyz"), 1385 ( -1, -1, 0x0f, 0x0f, 0x0f, "tbuffer_store_format_d16_xyzw"), 1386} 1387for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MTBUF: 1388 opcode(name, gfx7, gfx9, gfx10, Format.MTBUF, InstrClass.VMem) 1389 1390 1391IMAGE = { 1392 (0x00, "image_load"), 1393 (0x01, "image_load_mip"), 1394 (0x02, "image_load_pck"), 1395 (0x03, "image_load_pck_sgn"), 1396 (0x04, "image_load_mip_pck"), 1397 (0x05, "image_load_mip_pck_sgn"), 1398 (0x08, "image_store"), 1399 (0x09, "image_store_mip"), 1400 (0x0a, "image_store_pck"), 1401 (0x0b, "image_store_mip_pck"), 1402 (0x0e, "image_get_resinfo"), 1403 (0x60, "image_get_lod"), 1404} 1405# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name) 1406for (code, name) in IMAGE: 1407 opcode(name, code, code, code, Format.MIMG, InstrClass.VMem) 1408 1409opcode("image_msaa_load", -1, -1, 0x80, Format.MIMG, InstrClass.VMem) #GFX10.3+ 1410 1411IMAGE_ATOMIC = { 1412 (0x0f, 0x0f, 0x10, "image_atomic_swap"), 1413 (0x10, 0x10, 0x11, "image_atomic_cmpswap"), 1414 (0x11, 0x11, 0x12, "image_atomic_add"), 1415 (0x12, 0x12, 0x13, "image_atomic_sub"), 1416 (0x13, -1, -1, "image_atomic_rsub"), 1417 (0x14, 0x14, 0x14, "image_atomic_smin"), 1418 (0x15, 0x15, 0x15, "image_atomic_umin"), 1419 (0x16, 0x16, 0x16, "image_atomic_smax"), 1420 (0x17, 0x17, 0x17, "image_atomic_umax"), 1421 (0x18, 0x18, 0x18, "image_atomic_and"), 1422 (0x19, 0x19, 0x19, "image_atomic_or"), 1423 (0x1a, 0x1a, 0x1a, "image_atomic_xor"), 1424 (0x1b, 0x1b, 0x1b, "image_atomic_inc"), 1425 (0x1c, 0x1c, 0x1c, "image_atomic_dec"), 1426 (0x1d, 0x1d, -1, "image_atomic_fcmpswap"), 1427 (0x1e, 0x1e, -1, "image_atomic_fmin"), 1428 (0x1f, 0x1f, -1, "image_atomic_fmax"), 1429} 1430# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (gfx6, gfx7, gfx89, gfx89, ???, name) 1431# gfx7 and gfx10 opcodes are the same here 1432for (gfx6, gfx7, gfx89, name) in IMAGE_ATOMIC: 1433 opcode(name, gfx7, gfx89, gfx7, Format.MIMG, InstrClass.VMem, is_atomic = True) 1434 1435IMAGE_SAMPLE = { 1436 (0x20, "image_sample"), 1437 (0x21, "image_sample_cl"), 1438 (0x22, "image_sample_d"), 1439 (0x23, "image_sample_d_cl"), 1440 (0x24, "image_sample_l"), 1441 (0x25, "image_sample_b"), 1442 (0x26, "image_sample_b_cl"), 1443 (0x27, "image_sample_lz"), 1444 (0x28, "image_sample_c"), 1445 (0x29, "image_sample_c_cl"), 1446 (0x2a, "image_sample_c_d"), 1447 (0x2b, "image_sample_c_d_cl"), 1448 (0x2c, "image_sample_c_l"), 1449 (0x2d, "image_sample_c_b"), 1450 (0x2e, "image_sample_c_b_cl"), 1451 (0x2f, "image_sample_c_lz"), 1452 (0x30, "image_sample_o"), 1453 (0x31, "image_sample_cl_o"), 1454 (0x32, "image_sample_d_o"), 1455 (0x33, "image_sample_d_cl_o"), 1456 (0x34, "image_sample_l_o"), 1457 (0x35, "image_sample_b_o"), 1458 (0x36, "image_sample_b_cl_o"), 1459 (0x37, "image_sample_lz_o"), 1460 (0x38, "image_sample_c_o"), 1461 (0x39, "image_sample_c_cl_o"), 1462 (0x3a, "image_sample_c_d_o"), 1463 (0x3b, "image_sample_c_d_cl_o"), 1464 (0x3c, "image_sample_c_l_o"), 1465 (0x3d, "image_sample_c_b_o"), 1466 (0x3e, "image_sample_c_b_cl_o"), 1467 (0x3f, "image_sample_c_lz_o"), 1468 (0x68, "image_sample_cd"), 1469 (0x69, "image_sample_cd_cl"), 1470 (0x6a, "image_sample_c_cd"), 1471 (0x6b, "image_sample_c_cd_cl"), 1472 (0x6c, "image_sample_cd_o"), 1473 (0x6d, "image_sample_cd_cl_o"), 1474 (0x6e, "image_sample_c_cd_o"), 1475 (0x6f, "image_sample_c_cd_cl_o"), 1476} 1477# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name) 1478for (code, name) in IMAGE_SAMPLE: 1479 opcode(name, code, code, code, Format.MIMG, InstrClass.VMem) 1480 1481IMAGE_GATHER4 = { 1482 (0x40, "image_gather4"), 1483 (0x41, "image_gather4_cl"), 1484 #(0x42, "image_gather4h"), VEGA only? 1485 (0x44, "image_gather4_l"), # following instructions have different opcodes according to ISA sheet. 1486 (0x45, "image_gather4_b"), 1487 (0x46, "image_gather4_b_cl"), 1488 (0x47, "image_gather4_lz"), 1489 (0x48, "image_gather4_c"), 1490 (0x49, "image_gather4_c_cl"), # previous instructions have different opcodes according to ISA sheet. 1491 #(0x4a, "image_gather4h_pck"), VEGA only? 1492 #(0x4b, "image_gather8h_pck"), VGEA only? 1493 (0x4c, "image_gather4_c_l"), 1494 (0x4d, "image_gather4_c_b"), 1495 (0x4e, "image_gather4_c_b_cl"), 1496 (0x4f, "image_gather4_c_lz"), 1497 (0x50, "image_gather4_o"), 1498 (0x51, "image_gather4_cl_o"), 1499 (0x54, "image_gather4_l_o"), 1500 (0x55, "image_gather4_b_o"), 1501 (0x56, "image_gather4_b_cl_o"), 1502 (0x57, "image_gather4_lz_o"), 1503 (0x58, "image_gather4_c_o"), 1504 (0x59, "image_gather4_c_cl_o"), 1505 (0x5c, "image_gather4_c_l_o"), 1506 (0x5d, "image_gather4_c_b_o"), 1507 (0x5e, "image_gather4_c_b_cl_o"), 1508 (0x5f, "image_gather4_c_lz_o"), 1509} 1510# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name) 1511for (code, name) in IMAGE_GATHER4: 1512 opcode(name, code, code, code, Format.MIMG, InstrClass.VMem) 1513 1514opcode("image_bvh64_intersect_ray", -1, -1, 231, Format.MIMG, InstrClass.VMem) 1515 1516FLAT = { 1517 #GFX7, GFX8_9, GFX10 1518 (0x08, 0x10, 0x08, "flat_load_ubyte"), 1519 (0x09, 0x11, 0x09, "flat_load_sbyte"), 1520 (0x0a, 0x12, 0x0a, "flat_load_ushort"), 1521 (0x0b, 0x13, 0x0b, "flat_load_sshort"), 1522 (0x0c, 0x14, 0x0c, "flat_load_dword"), 1523 (0x0d, 0x15, 0x0d, "flat_load_dwordx2"), 1524 (0x0f, 0x16, 0x0f, "flat_load_dwordx3"), 1525 (0x0e, 0x17, 0x0e, "flat_load_dwordx4"), 1526 (0x18, 0x18, 0x18, "flat_store_byte"), 1527 ( -1, 0x19, 0x19, "flat_store_byte_d16_hi"), 1528 (0x1a, 0x1a, 0x1a, "flat_store_short"), 1529 ( -1, 0x1b, 0x1b, "flat_store_short_d16_hi"), 1530 (0x1c, 0x1c, 0x1c, "flat_store_dword"), 1531 (0x1d, 0x1d, 0x1d, "flat_store_dwordx2"), 1532 (0x1f, 0x1e, 0x1f, "flat_store_dwordx3"), 1533 (0x1e, 0x1f, 0x1e, "flat_store_dwordx4"), 1534 ( -1, 0x20, 0x20, "flat_load_ubyte_d16"), 1535 ( -1, 0x21, 0x21, "flat_load_ubyte_d16_hi"), 1536 ( -1, 0x22, 0x22, "flat_load_sbyte_d16"), 1537 ( -1, 0x23, 0x23, "flat_load_sbyte_d16_hi"), 1538 ( -1, 0x24, 0x24, "flat_load_short_d16"), 1539 ( -1, 0x25, 0x25, "flat_load_short_d16_hi"), 1540 (0x30, 0x40, 0x30, "flat_atomic_swap"), 1541 (0x31, 0x41, 0x31, "flat_atomic_cmpswap"), 1542 (0x32, 0x42, 0x32, "flat_atomic_add"), 1543 (0x33, 0x43, 0x33, "flat_atomic_sub"), 1544 (0x35, 0x44, 0x35, "flat_atomic_smin"), 1545 (0x36, 0x45, 0x36, "flat_atomic_umin"), 1546 (0x37, 0x46, 0x37, "flat_atomic_smax"), 1547 (0x38, 0x47, 0x38, "flat_atomic_umax"), 1548 (0x39, 0x48, 0x39, "flat_atomic_and"), 1549 (0x3a, 0x49, 0x3a, "flat_atomic_or"), 1550 (0x3b, 0x4a, 0x3b, "flat_atomic_xor"), 1551 (0x3c, 0x4b, 0x3c, "flat_atomic_inc"), 1552 (0x3d, 0x4c, 0x3d, "flat_atomic_dec"), 1553 (0x3e, -1, 0x3e, "flat_atomic_fcmpswap"), 1554 (0x3f, -1, 0x3f, "flat_atomic_fmin"), 1555 (0x40, -1, 0x40, "flat_atomic_fmax"), 1556 (0x50, 0x60, 0x50, "flat_atomic_swap_x2"), 1557 (0x51, 0x61, 0x51, "flat_atomic_cmpswap_x2"), 1558 (0x52, 0x62, 0x52, "flat_atomic_add_x2"), 1559 (0x53, 0x63, 0x53, "flat_atomic_sub_x2"), 1560 (0x55, 0x64, 0x55, "flat_atomic_smin_x2"), 1561 (0x56, 0x65, 0x56, "flat_atomic_umin_x2"), 1562 (0x57, 0x66, 0x57, "flat_atomic_smax_x2"), 1563 (0x58, 0x67, 0x58, "flat_atomic_umax_x2"), 1564 (0x59, 0x68, 0x59, "flat_atomic_and_x2"), 1565 (0x5a, 0x69, 0x5a, "flat_atomic_or_x2"), 1566 (0x5b, 0x6a, 0x5b, "flat_atomic_xor_x2"), 1567 (0x5c, 0x6b, 0x5c, "flat_atomic_inc_x2"), 1568 (0x5d, 0x6c, 0x5d, "flat_atomic_dec_x2"), 1569 (0x5e, -1, 0x5e, "flat_atomic_fcmpswap_x2"), 1570 (0x5f, -1, 0x5f, "flat_atomic_fmin_x2"), 1571 (0x60, -1, 0x60, "flat_atomic_fmax_x2"), 1572} 1573for (gfx7, gfx8, gfx10, name) in FLAT: 1574 opcode(name, gfx7, gfx8, gfx10, Format.FLAT, InstrClass.VMem, is_atomic = "atomic" in name) #TODO: also LDS? 1575 1576GLOBAL = { 1577 #GFX8_9, GFX10 1578 (0x10, 0x08, "global_load_ubyte"), 1579 (0x11, 0x09, "global_load_sbyte"), 1580 (0x12, 0x0a, "global_load_ushort"), 1581 (0x13, 0x0b, "global_load_sshort"), 1582 (0x14, 0x0c, "global_load_dword"), 1583 (0x15, 0x0d, "global_load_dwordx2"), 1584 (0x16, 0x0f, "global_load_dwordx3"), 1585 (0x17, 0x0e, "global_load_dwordx4"), 1586 (0x18, 0x18, "global_store_byte"), 1587 (0x19, 0x19, "global_store_byte_d16_hi"), 1588 (0x1a, 0x1a, "global_store_short"), 1589 (0x1b, 0x1b, "global_store_short_d16_hi"), 1590 (0x1c, 0x1c, "global_store_dword"), 1591 (0x1d, 0x1d, "global_store_dwordx2"), 1592 (0x1e, 0x1f, "global_store_dwordx3"), 1593 (0x1f, 0x1e, "global_store_dwordx4"), 1594 (0x20, 0x20, "global_load_ubyte_d16"), 1595 (0x21, 0x21, "global_load_ubyte_d16_hi"), 1596 (0x22, 0x22, "global_load_sbyte_d16"), 1597 (0x23, 0x23, "global_load_sbyte_d16_hi"), 1598 (0x24, 0x24, "global_load_short_d16"), 1599 (0x25, 0x25, "global_load_short_d16_hi"), 1600 (0x40, 0x30, "global_atomic_swap"), 1601 (0x41, 0x31, "global_atomic_cmpswap"), 1602 (0x42, 0x32, "global_atomic_add"), 1603 (0x43, 0x33, "global_atomic_sub"), 1604 (0x44, 0x35, "global_atomic_smin"), 1605 (0x45, 0x36, "global_atomic_umin"), 1606 (0x46, 0x37, "global_atomic_smax"), 1607 (0x47, 0x38, "global_atomic_umax"), 1608 (0x48, 0x39, "global_atomic_and"), 1609 (0x49, 0x3a, "global_atomic_or"), 1610 (0x4a, 0x3b, "global_atomic_xor"), 1611 (0x4b, 0x3c, "global_atomic_inc"), 1612 (0x4c, 0x3d, "global_atomic_dec"), 1613 ( -1, 0x3e, "global_atomic_fcmpswap"), 1614 ( -1, 0x3f, "global_atomic_fmin"), 1615 ( -1, 0x40, "global_atomic_fmax"), 1616 (0x60, 0x50, "global_atomic_swap_x2"), 1617 (0x61, 0x51, "global_atomic_cmpswap_x2"), 1618 (0x62, 0x52, "global_atomic_add_x2"), 1619 (0x63, 0x53, "global_atomic_sub_x2"), 1620 (0x64, 0x55, "global_atomic_smin_x2"), 1621 (0x65, 0x56, "global_atomic_umin_x2"), 1622 (0x66, 0x57, "global_atomic_smax_x2"), 1623 (0x67, 0x58, "global_atomic_umax_x2"), 1624 (0x68, 0x59, "global_atomic_and_x2"), 1625 (0x69, 0x5a, "global_atomic_or_x2"), 1626 (0x6a, 0x5b, "global_atomic_xor_x2"), 1627 (0x6b, 0x5c, "global_atomic_inc_x2"), 1628 (0x6c, 0x5d, "global_atomic_dec_x2"), 1629 ( -1, 0x5e, "global_atomic_fcmpswap_x2"), 1630 ( -1, 0x5f, "global_atomic_fmin_x2"), 1631 ( -1, 0x60, "global_atomic_fmax_x2"), 1632 ( -1, 0x16, "global_load_dword_addtid"), #GFX10.3+ 1633 ( -1, 0x17, "global_store_dword_addtid"), #GFX10.3+ 1634 ( -1, 0x34, "global_atomic_csub"), #GFX10.3+. seems glc must be set 1635} 1636for (gfx8, gfx10, name) in GLOBAL: 1637 opcode(name, -1, gfx8, gfx10, Format.GLOBAL, InstrClass.VMem, is_atomic = "atomic" in name) 1638 1639SCRATCH = { 1640 #GFX8_9, GFX10 1641 (0x10, 0x08, "scratch_load_ubyte"), 1642 (0x11, 0x09, "scratch_load_sbyte"), 1643 (0x12, 0x0a, "scratch_load_ushort"), 1644 (0x13, 0x0b, "scratch_load_sshort"), 1645 (0x14, 0x0c, "scratch_load_dword"), 1646 (0x15, 0x0d, "scratch_load_dwordx2"), 1647 (0x16, 0x0f, "scratch_load_dwordx3"), 1648 (0x17, 0x0e, "scratch_load_dwordx4"), 1649 (0x18, 0x18, "scratch_store_byte"), 1650 (0x19, 0x19, "scratch_store_byte_d16_hi"), 1651 (0x1a, 0x1a, "scratch_store_short"), 1652 (0x1b, 0x1b, "scratch_store_short_d16_hi"), 1653 (0x1c, 0x1c, "scratch_store_dword"), 1654 (0x1d, 0x1d, "scratch_store_dwordx2"), 1655 (0x1e, 0x1f, "scratch_store_dwordx3"), 1656 (0x1f, 0x1e, "scratch_store_dwordx4"), 1657 (0x20, 0x20, "scratch_load_ubyte_d16"), 1658 (0x21, 0x21, "scratch_load_ubyte_d16_hi"), 1659 (0x22, 0x22, "scratch_load_sbyte_d16"), 1660 (0x23, 0x23, "scratch_load_sbyte_d16_hi"), 1661 (0x24, 0x24, "scratch_load_short_d16"), 1662 (0x25, 0x25, "scratch_load_short_d16_hi"), 1663} 1664for (gfx8, gfx10, name) in SCRATCH: 1665 opcode(name, -1, gfx8, gfx10, Format.SCRATCH, InstrClass.VMem) 1666 1667# check for duplicate opcode numbers 1668for ver in ['gfx9', 'gfx10']: 1669 op_to_name = {} 1670 for op in opcodes.values(): 1671 if op.format in [Format.PSEUDO, Format.PSEUDO_BRANCH, Format.PSEUDO_BARRIER, Format.PSEUDO_REDUCTION]: 1672 continue 1673 1674 num = getattr(op, 'opcode_' + ver) 1675 if num == -1: 1676 continue 1677 1678 key = (op.format, num) 1679 1680 if key in op_to_name: 1681 # exceptions 1682 names = set([op_to_name[key], op.name]) 1683 if ver in ['gfx8', 'gfx9'] and names == set(['v_mul_lo_i32', 'v_mul_lo_u32']): 1684 continue 1685 # v_mad_legacy_f32 is replaced with v_fma_legacy_f32 on GFX10.3 1686 if ver == 'gfx10' and names == set(['v_mad_legacy_f32', 'v_fma_legacy_f32']): 1687 continue 1688 1689 print('%s and %s share the same opcode number (%s)' % (op_to_name[key], op.name, ver)) 1690 sys.exit(1) 1691 else: 1692 op_to_name[key] = op.name 1693 1694