• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2008 Keith Packard
3  * Copyright © 2014 Intel Corporation
4  *
5  * Permission to use, copy, modify, distribute, and sell this software and its
6  * documentation for any purpose is hereby granted without fee, provided that
7  * the above copyright notice appear in all copies and that both that copyright
8  * notice and this permission notice appear in supporting documentation, and
9  * that the name of the copyright holders not be used in advertising or
10  * publicity pertaining to distribution of the software without specific,
11  * written prior permission.  The copyright holders make no representations
12  * about the suitability of this software for any purpose.  It is provided "as
13  * is" without express or implied warranty.
14  *
15  * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
17  * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
18  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
19  * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
20  * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
21  * OF THIS SOFTWARE.
22  */
23 
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 #include "elk_disasm.h"
30 #include "elk_disasm_info.h"
31 #include "elk_eu_defines.h"
32 #include "elk_eu.h"
33 #include "elk_inst.h"
34 #include "elk_isa_info.h"
35 #include "elk_reg.h"
36 #include "elk_shader.h"
37 #include "util/half_float.h"
38 
39 bool
elk_has_jip(const struct intel_device_info * devinfo,enum elk_opcode opcode)40 elk_has_jip(const struct intel_device_info *devinfo, enum elk_opcode opcode)
41 {
42    if (devinfo->ver < 6)
43       return false;
44 
45    return opcode == ELK_OPCODE_IF ||
46           opcode == ELK_OPCODE_ELSE ||
47           opcode == ELK_OPCODE_ENDIF ||
48           opcode == ELK_OPCODE_WHILE ||
49           opcode == ELK_OPCODE_BREAK ||
50           opcode == ELK_OPCODE_CONTINUE ||
51           opcode == ELK_OPCODE_HALT;
52 }
53 
54 bool
elk_has_uip(const struct intel_device_info * devinfo,enum elk_opcode opcode)55 elk_has_uip(const struct intel_device_info *devinfo, enum elk_opcode opcode)
56 {
57    if (devinfo->ver < 6)
58       return false;
59 
60    return (devinfo->ver >= 7 && opcode == ELK_OPCODE_IF) ||
61           (devinfo->ver >= 8 && opcode == ELK_OPCODE_ELSE) ||
62           opcode == ELK_OPCODE_BREAK ||
63           opcode == ELK_OPCODE_CONTINUE ||
64           opcode == ELK_OPCODE_HALT;
65 }
66 
67 static bool
has_branch_ctrl(const struct intel_device_info * devinfo,enum elk_opcode opcode)68 has_branch_ctrl(const struct intel_device_info *devinfo, enum elk_opcode opcode)
69 {
70    if (devinfo->ver < 8)
71       return false;
72 
73    return opcode == ELK_OPCODE_IF ||
74           opcode == ELK_OPCODE_ELSE;
75           /* opcode == ELK_OPCODE_GOTO; */
76 }
77 
78 static bool
is_logic_instruction(unsigned opcode)79 is_logic_instruction(unsigned opcode)
80 {
81    return opcode == ELK_OPCODE_AND ||
82           opcode == ELK_OPCODE_NOT ||
83           opcode == ELK_OPCODE_OR ||
84           opcode == ELK_OPCODE_XOR;
85 }
86 
87 static bool
is_send(unsigned opcode)88 is_send(unsigned opcode)
89 {
90    return opcode == ELK_OPCODE_SEND ||
91           opcode == ELK_OPCODE_SENDC ||
92           opcode == ELK_OPCODE_SENDS ||
93           opcode == ELK_OPCODE_SENDSC;
94 }
95 
96 static bool
is_split_send(UNUSED const struct intel_device_info * devinfo,unsigned opcode)97 is_split_send(UNUSED const struct intel_device_info *devinfo, unsigned opcode)
98 {
99    if (devinfo->ver >= 12)
100       return is_send(opcode);
101    else
102       return opcode == ELK_OPCODE_SENDS ||
103              opcode == ELK_OPCODE_SENDSC;
104 }
105 
106 const char *const elk_conditional_modifier[16] = {
107    [ELK_CONDITIONAL_NONE] = "",
108    [ELK_CONDITIONAL_Z]    = ".z",
109    [ELK_CONDITIONAL_NZ]   = ".nz",
110    [ELK_CONDITIONAL_G]    = ".g",
111    [ELK_CONDITIONAL_GE]   = ".ge",
112    [ELK_CONDITIONAL_L]    = ".l",
113    [ELK_CONDITIONAL_LE]   = ".le",
114    [ELK_CONDITIONAL_R]    = ".r",
115    [ELK_CONDITIONAL_O]    = ".o",
116    [ELK_CONDITIONAL_U]    = ".u",
117 };
118 
119 static const char *const m_negate[2] = {
120    [0] = "",
121    [1] = "-",
122 };
123 
124 static const char *const _abs[2] = {
125    [0] = "",
126    [1] = "(abs)",
127 };
128 
129 static const char *const m_bitnot[2] = { "", "~" };
130 
131 static const char *const vert_stride[16] = {
132    [0] = "0",
133    [1] = "1",
134    [2] = "2",
135    [3] = "4",
136    [4] = "8",
137    [5] = "16",
138    [6] = "32",
139    [15] = "VxH",
140 };
141 
142 static const char *const width[8] = {
143    [0] = "1",
144    [1] = "2",
145    [2] = "4",
146    [3] = "8",
147    [4] = "16",
148 };
149 
150 static const char *const horiz_stride[4] = {
151    [0] = "0",
152    [1] = "1",
153    [2] = "2",
154    [3] = "4"
155 };
156 
157 static const char *const chan_sel[4] = {
158    [0] = "x",
159    [1] = "y",
160    [2] = "z",
161    [3] = "w",
162 };
163 
164 static const char *const debug_ctrl[2] = {
165    [0] = "",
166    [1] = ".breakpoint"
167 };
168 
169 static const char *const saturate[2] = {
170    [0] = "",
171    [1] = ".sat"
172 };
173 
174 static const char *const cmpt_ctrl[2] = {
175    [0] = "",
176    [1] = "compacted"
177 };
178 
179 static const char *const accwr[2] = {
180    [0] = "",
181    [1] = "AccWrEnable"
182 };
183 
184 static const char *const branch_ctrl[2] = {
185    [0] = "",
186    [1] = "BranchCtrl"
187 };
188 
189 static const char *const wectrl[2] = {
190    [0] = "",
191    [1] = "WE_all"
192 };
193 
194 static const char *const exec_size[8] = {
195    [0] = "1",
196    [1] = "2",
197    [2] = "4",
198    [3] = "8",
199    [4] = "16",
200    [5] = "32"
201 };
202 
203 static const char *const pred_inv[2] = {
204    [0] = "+",
205    [1] = "-"
206 };
207 
208 const char *const elk_pred_ctrl_align16[16] = {
209    [1] = "",
210    [2] = ".x",
211    [3] = ".y",
212    [4] = ".z",
213    [5] = ".w",
214    [6] = ".any4h",
215    [7] = ".all4h",
216 };
217 
218 static const char *const pred_ctrl_align1[16] = {
219    [ELK_PREDICATE_NORMAL]        = "",
220    [ELK_PREDICATE_ALIGN1_ANYV]   = ".anyv",
221    [ELK_PREDICATE_ALIGN1_ALLV]   = ".allv",
222    [ELK_PREDICATE_ALIGN1_ANY2H]  = ".any2h",
223    [ELK_PREDICATE_ALIGN1_ALL2H]  = ".all2h",
224    [ELK_PREDICATE_ALIGN1_ANY4H]  = ".any4h",
225    [ELK_PREDICATE_ALIGN1_ALL4H]  = ".all4h",
226    [ELK_PREDICATE_ALIGN1_ANY8H]  = ".any8h",
227    [ELK_PREDICATE_ALIGN1_ALL8H]  = ".all8h",
228    [ELK_PREDICATE_ALIGN1_ANY16H] = ".any16h",
229    [ELK_PREDICATE_ALIGN1_ALL16H] = ".all16h",
230    [ELK_PREDICATE_ALIGN1_ANY32H] = ".any32h",
231    [ELK_PREDICATE_ALIGN1_ALL32H] = ".all32h",
232 };
233 
234 static const char *const xe2_pred_ctrl[4] = {
235    [ELK_PREDICATE_NORMAL]        = "",
236    [XE2_PREDICATE_ANY]           = ".any",
237    [XE2_PREDICATE_ALL]           = ".all",
238 };
239 
240 static const char *const thread_ctrl[4] = {
241    [ELK_THREAD_NORMAL] = "",
242    [ELK_THREAD_ATOMIC] = "atomic",
243    [ELK_THREAD_SWITCH] = "switch",
244 };
245 
246 static const char *const compr_ctrl[4] = {
247    [0] = "",
248    [1] = "sechalf",
249    [2] = "compr",
250    [3] = "compr4",
251 };
252 
253 static const char *const dep_ctrl[4] = {
254    [0] = "",
255    [1] = "NoDDClr",
256    [2] = "NoDDChk",
257    [3] = "NoDDClr,NoDDChk",
258 };
259 
260 static const char *const mask_ctrl[4] = {
261    [0] = "",
262    [1] = "nomask",
263 };
264 
265 static const char *const access_mode[2] = {
266    [0] = "align1",
267    [1] = "align16",
268 };
269 
270 static const char *const reg_file[4] = {
271    [0] = "A",
272    [1] = "g",
273    [2] = "m",
274    [3] = "imm",
275 };
276 
277 static const char *const writemask[16] = {
278    [0x0] = ".",
279    [0x1] = ".x",
280    [0x2] = ".y",
281    [0x3] = ".xy",
282    [0x4] = ".z",
283    [0x5] = ".xz",
284    [0x6] = ".yz",
285    [0x7] = ".xyz",
286    [0x8] = ".w",
287    [0x9] = ".xw",
288    [0xa] = ".yw",
289    [0xb] = ".xyw",
290    [0xc] = ".zw",
291    [0xd] = ".xzw",
292    [0xe] = ".yzw",
293    [0xf] = "",
294 };
295 
296 static const char *const end_of_thread[2] = {
297    [0] = "",
298    [1] = "EOT"
299 };
300 
301 /* SFIDs on Gfx4-5 */
302 static const char *const gfx4_sfid[16] = {
303    [ELK_SFID_NULL]            = "null",
304    [ELK_SFID_MATH]            = "math",
305    [ELK_SFID_SAMPLER]         = "sampler",
306    [ELK_SFID_MESSAGE_GATEWAY] = "gateway",
307    [ELK_SFID_DATAPORT_READ]   = "read",
308    [ELK_SFID_DATAPORT_WRITE]  = "write",
309    [ELK_SFID_URB]             = "urb",
310    [ELK_SFID_THREAD_SPAWNER]  = "thread_spawner",
311    [ELK_SFID_VME]             = "vme",
312 };
313 
314 static const char *const gfx6_sfid[16] = {
315    [ELK_SFID_NULL]                     = "null",
316    [ELK_SFID_MATH]                     = "math",
317    [ELK_SFID_SAMPLER]                  = "sampler",
318    [ELK_SFID_MESSAGE_GATEWAY]          = "gateway",
319    [ELK_SFID_URB]                      = "urb",
320    [ELK_SFID_THREAD_SPAWNER]           = "thread_spawner",
321    [GFX6_SFID_DATAPORT_SAMPLER_CACHE]  = "dp_sampler",
322    [GFX6_SFID_DATAPORT_RENDER_CACHE]   = "render",
323    [GFX6_SFID_DATAPORT_CONSTANT_CACHE] = "const",
324    [GFX7_SFID_DATAPORT_DATA_CACHE]     = "data",
325    [GFX7_SFID_PIXEL_INTERPOLATOR]      = "pixel interp",
326    [HSW_SFID_DATAPORT_DATA_CACHE_1]    = "dp data 1",
327    [HSW_SFID_CRE]                      = "cre",
328    [GFX12_SFID_SLM]                    = "slm",
329    [GFX12_SFID_TGM]                    = "tgm",
330    [GFX12_SFID_UGM]                    = "ugm",
331 };
332 
333 static const char *const gfx7_gateway_subfuncid[8] = {
334    [ELK_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY] = "open",
335    [ELK_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY] = "close",
336    [ELK_MESSAGE_GATEWAY_SFID_FORWARD_MSG] = "forward msg",
337    [ELK_MESSAGE_GATEWAY_SFID_GET_TIMESTAMP] = "get timestamp",
338    [ELK_MESSAGE_GATEWAY_SFID_BARRIER_MSG] = "barrier msg",
339    [ELK_MESSAGE_GATEWAY_SFID_UPDATE_GATEWAY_STATE] = "update state",
340    [ELK_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE] = "mmio read/write",
341 };
342 
343 static const char *const gfx4_dp_read_port_msg_type[4] = {
344    [0b00] = "OWord Block Read",
345    [0b01] = "OWord Dual Block Read",
346    [0b10] = "Media Block Read",
347    [0b11] = "DWord Scattered Read",
348 };
349 
350 static const char *const g45_dp_read_port_msg_type[8] = {
351    [0b000] = "OWord Block Read",
352    [0b010] = "OWord Dual Block Read",
353    [0b100] = "Media Block Read",
354    [0b110] = "DWord Scattered Read",
355    [0b001] = "Render Target UNORM Read",
356    [0b011] = "AVC Loop Filter Read",
357 };
358 
359 static const char *const dp_write_port_msg_type[8] = {
360    [0b000] = "OWord block write",
361    [0b001] = "OWord dual block write",
362    [0b010] = "media block write",
363    [0b011] = "DWord scattered write",
364    [0b100] = "RT write",
365    [0b101] = "streamed VB write",
366    [0b110] = "RT UNORM write", /* G45+ */
367    [0b111] = "flush render cache",
368 };
369 
370 static const char *const dp_rc_msg_type_gfx6[16] = {
371    [ELK_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read",
372    [GFX6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read",
373    [GFX6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read",
374    [GFX6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read",
375    [GFX6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] =
376       "OWORD unaligned block read",
377    [GFX6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read",
378    [GFX6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write",
379    [GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write",
380    [GFX6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] =
381       "OWORD dual block write",
382    [GFX6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write",
383    [GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] =
384       "DWORD scattered write",
385    [GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write",
386    [GFX6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write",
387    [GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORM write",
388 };
389 
390 static const char *const dp_rc_msg_type_gfx7[16] = {
391    [GFX7_DATAPORT_RC_MEDIA_BLOCK_READ] = "media block read",
392    [GFX7_DATAPORT_RC_TYPED_SURFACE_READ] = "typed surface read",
393    [GFX7_DATAPORT_RC_TYPED_ATOMIC_OP] = "typed atomic op",
394    [GFX7_DATAPORT_RC_MEMORY_FENCE] = "memory fence",
395    [GFX7_DATAPORT_RC_MEDIA_BLOCK_WRITE] = "media block write",
396    [GFX7_DATAPORT_RC_RENDER_TARGET_WRITE] = "RT write",
397    [GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE] = "typed surface write"
398 };
399 
400 static const char *const dp_rc_msg_type_gfx9[16] = {
401    [GFX9_DATAPORT_RC_RENDER_TARGET_WRITE] = "RT write",
402    [GFX9_DATAPORT_RC_RENDER_TARGET_READ] = "RT read"
403 };
404 
405 static const char *const *
dp_rc_msg_type(const struct intel_device_info * devinfo)406 dp_rc_msg_type(const struct intel_device_info *devinfo)
407 {
408    return (devinfo->ver >= 9 ? dp_rc_msg_type_gfx9 :
409            devinfo->ver >= 7 ? dp_rc_msg_type_gfx7 :
410            devinfo->ver >= 6 ? dp_rc_msg_type_gfx6 :
411            dp_write_port_msg_type);
412 }
413 
414 static const char *const m_rt_write_subtype[] = {
415    [0b000] = "SIMD16",
416    [0b001] = "SIMD16/RepData",
417    [0b010] = "SIMD8/DualSrcLow",
418    [0b011] = "SIMD8/DualSrcHigh",
419    [0b100] = "SIMD8",
420    [0b101] = "SIMD8/ImageWrite",   /* Gfx6+ */
421    [0b111] = "SIMD16/RepData-111", /* no idea how this is different than 1 */
422 };
423 
424 static const char *const dp_dc0_msg_type_gfx7[16] = {
425    [GFX7_DATAPORT_DC_OWORD_BLOCK_READ] = "DC OWORD block read",
426    [GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ] =
427       "DC unaligned OWORD block read",
428    [GFX7_DATAPORT_DC_OWORD_DUAL_BLOCK_READ] = "DC OWORD dual block read",
429    [GFX7_DATAPORT_DC_DWORD_SCATTERED_READ] = "DC DWORD scattered read",
430    [GFX7_DATAPORT_DC_BYTE_SCATTERED_READ] = "DC byte scattered read",
431    [GFX7_DATAPORT_DC_UNTYPED_SURFACE_READ] = "DC untyped surface read",
432    [GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP] = "DC untyped atomic",
433    [GFX7_DATAPORT_DC_MEMORY_FENCE] = "DC mfence",
434    [GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE] = "DC OWORD block write",
435    [GFX7_DATAPORT_DC_OWORD_DUAL_BLOCK_WRITE] = "DC OWORD dual block write",
436    [GFX7_DATAPORT_DC_DWORD_SCATTERED_WRITE] = "DC DWORD scatterd write",
437    [GFX7_DATAPORT_DC_BYTE_SCATTERED_WRITE] = "DC byte scattered write",
438    [GFX7_DATAPORT_DC_UNTYPED_SURFACE_WRITE] = "DC untyped surface write",
439 };
440 
441 static const char *const dp_oword_block_rw[8] = {
442       [ELK_DATAPORT_OWORD_BLOCK_1_OWORDLOW]  = "1-low",
443       [ELK_DATAPORT_OWORD_BLOCK_1_OWORDHIGH] = "1-high",
444       [ELK_DATAPORT_OWORD_BLOCK_2_OWORDS]    = "2",
445       [ELK_DATAPORT_OWORD_BLOCK_4_OWORDS]    = "4",
446       [ELK_DATAPORT_OWORD_BLOCK_8_OWORDS]    = "8",
447 };
448 
449 static const char *const dp_dc1_msg_type_hsw[32] = {
450    [HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ] = "untyped surface read",
451    [HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP] = "DC untyped atomic op",
452    [HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2] =
453       "DC untyped 4x2 atomic op",
454    [HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_READ] = "DC media block read",
455    [HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ] = "DC typed surface read",
456    [HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP] = "DC typed atomic",
457    [HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2] = "DC typed 4x2 atomic op",
458    [HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE] = "DC untyped surface write",
459    [HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_WRITE] = "DC media block write",
460    [HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP] = "DC atomic counter op",
461    [HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2] =
462       "DC 4x2 atomic counter op",
463    [HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE] = "DC typed surface write",
464    [GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ] = "DC A64 scattered read",
465    [GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ] = "DC A64 untyped surface read",
466    [GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP] = "DC A64 untyped atomic op",
467    [GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ] = "DC A64 oword block read",
468    [GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE] = "DC A64 oword block write",
469    [GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE] = "DC A64 untyped surface write",
470    [GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE] = "DC A64 scattered write",
471    [GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP] =
472       "DC untyped atomic float op",
473    [GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP] =
474       "DC A64 untyped atomic float op",
475    [GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP] =
476       "DC A64 untyped atomic half-integer op",
477    [GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP] =
478       "DC A64 untyped atomic half-float op",
479 };
480 
481 static const char *const aop[16] = {
482    [ELK_AOP_AND]    = "and",
483    [ELK_AOP_OR]     = "or",
484    [ELK_AOP_XOR]    = "xor",
485    [ELK_AOP_MOV]    = "mov",
486    [ELK_AOP_INC]    = "inc",
487    [ELK_AOP_DEC]    = "dec",
488    [ELK_AOP_ADD]    = "add",
489    [ELK_AOP_SUB]    = "sub",
490    [ELK_AOP_REVSUB] = "revsub",
491    [ELK_AOP_IMAX]   = "imax",
492    [ELK_AOP_IMIN]   = "imin",
493    [ELK_AOP_UMAX]   = "umax",
494    [ELK_AOP_UMIN]   = "umin",
495    [ELK_AOP_CMPWR]  = "cmpwr",
496    [ELK_AOP_PREDEC] = "predec",
497 };
498 
499 static const char *const aop_float[5] = {
500    [ELK_AOP_FMAX]   = "fmax",
501    [ELK_AOP_FMIN]   = "fmin",
502    [ELK_AOP_FCMPWR] = "fcmpwr",
503    [ELK_AOP_FADD]   = "fadd",
504 };
505 
506 static const char * const pixel_interpolator_msg_types[4] = {
507     [GFX7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET] = "per_message_offset",
508     [GFX7_PIXEL_INTERPOLATOR_LOC_SAMPLE] = "sample_position",
509     [GFX7_PIXEL_INTERPOLATOR_LOC_CENTROID] = "centroid",
510     [GFX7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET] = "per_slot_offset",
511 };
512 
513 static const char *const math_function[16] = {
514    [ELK_MATH_FUNCTION_INV]    = "inv",
515    [ELK_MATH_FUNCTION_LOG]    = "log",
516    [ELK_MATH_FUNCTION_EXP]    = "exp",
517    [ELK_MATH_FUNCTION_SQRT]   = "sqrt",
518    [ELK_MATH_FUNCTION_RSQ]    = "rsq",
519    [ELK_MATH_FUNCTION_SIN]    = "sin",
520    [ELK_MATH_FUNCTION_COS]    = "cos",
521    [ELK_MATH_FUNCTION_SINCOS] = "sincos",
522    [ELK_MATH_FUNCTION_FDIV]   = "fdiv",
523    [ELK_MATH_FUNCTION_POW]    = "pow",
524    [ELK_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
525    [ELK_MATH_FUNCTION_INT_DIV_QUOTIENT]  = "intdiv",
526    [ELK_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod",
527    [GFX8_MATH_FUNCTION_INVM]  = "invm",
528    [GFX8_MATH_FUNCTION_RSQRTM] = "rsqrtm",
529 };
530 
531 static const char *const sync_function[16] = {
532    [TGL_SYNC_NOP] = "nop",
533    [TGL_SYNC_ALLRD] = "allrd",
534    [TGL_SYNC_ALLWR] = "allwr",
535    [TGL_SYNC_FENCE] = "fence",
536    [TGL_SYNC_BAR] = "bar",
537    [TGL_SYNC_HOST] = "host",
538 };
539 
540 static const char *const math_saturate[2] = {
541    [0] = "",
542    [1] = "sat"
543 };
544 
545 static const char *const math_signed[2] = {
546    [0] = "",
547    [1] = "signed"
548 };
549 
550 static const char *const math_scalar[2] = {
551    [0] = "",
552    [1] = "scalar"
553 };
554 
555 static const char *const math_precision[2] = {
556    [0] = "",
557    [1] = "partial_precision"
558 };
559 
560 static const char *const gfx5_urb_opcode[] = {
561    [0] = "urb_write",
562    [1] = "ff_sync",
563 };
564 
565 static const char *const gfx7_urb_opcode[] = {
566    [ELK_URB_OPCODE_WRITE_HWORD] = "write HWord",
567    [ELK_URB_OPCODE_WRITE_OWORD] = "write OWord",
568    [ELK_URB_OPCODE_READ_HWORD] = "read HWord",
569    [ELK_URB_OPCODE_READ_OWORD] = "read OWord",
570    [GFX7_URB_OPCODE_ATOMIC_MOV] = "atomic mov",  /* Gfx7+ */
571    [GFX7_URB_OPCODE_ATOMIC_INC] = "atomic inc",  /* Gfx7+ */
572    [GFX8_URB_OPCODE_ATOMIC_ADD] = "atomic add",  /* Gfx8+ */
573    [GFX8_URB_OPCODE_SIMD8_WRITE] = "SIMD8 write", /* Gfx8+ */
574    [GFX8_URB_OPCODE_SIMD8_READ] = "SIMD8 read",  /* Gfx8+ */
575    [GFX125_URB_OPCODE_FENCE] = "fence",  /* Gfx12.5+ */
576    /* [10-15] - reserved */
577 };
578 
579 static const char *const urb_swizzle[4] = {
580    [ELK_URB_SWIZZLE_NONE]       = "",
581    [ELK_URB_SWIZZLE_INTERLEAVE] = "interleave",
582    [ELK_URB_SWIZZLE_TRANSPOSE]  = "transpose",
583 };
584 
585 static const char *const urb_allocate[2] = {
586    [0] = "",
587    [1] = "allocate"
588 };
589 
590 static const char *const urb_used[2] = {
591    [0] = "",
592    [1] = "used"
593 };
594 
595 static const char *const urb_complete[2] = {
596    [0] = "",
597    [1] = "complete"
598 };
599 
600 static const char *const gfx5_sampler_msg_type[] = {
601    [GFX5_SAMPLER_MESSAGE_SAMPLE]              = "sample",
602    [GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS]         = "sample_b",
603    [GFX5_SAMPLER_MESSAGE_SAMPLE_LOD]          = "sample_l",
604    [GFX5_SAMPLER_MESSAGE_SAMPLE_COMPARE]      = "sample_c",
605    [GFX5_SAMPLER_MESSAGE_SAMPLE_DERIVS]       = "sample_d",
606    [GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE] = "sample_b_c",
607    [GFX5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE]  = "sample_l_c",
608    [GFX5_SAMPLER_MESSAGE_SAMPLE_LD]           = "ld",
609    [GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4]      = "gather4",
610    [GFX5_SAMPLER_MESSAGE_LOD]                 = "lod",
611    [GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO]      = "resinfo",
612    [GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO]   = "sampleinfo",
613    [GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C]    = "gather4_c",
614    [GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO]   = "gather4_po",
615    [GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c",
616    [HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE] = "sample_d_c",
617    [GFX9_SAMPLER_MESSAGE_SAMPLE_LZ]           = "sample_lz",
618    [GFX9_SAMPLER_MESSAGE_SAMPLE_C_LZ]         = "sample_c_lz",
619    [GFX9_SAMPLER_MESSAGE_SAMPLE_LD_LZ]        = "ld_lz",
620    [GFX9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W]     = "ld2dms_w",
621    [GFX7_SAMPLER_MESSAGE_SAMPLE_LD_MCS]       = "ld_mcs",
622    [GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DMS]       = "ld2dms",
623    [GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DSS]       = "ld2dss",
624 };
625 
626 static const char *const xe2_sampler_msg_type[] = {
627    [GFX5_SAMPLER_MESSAGE_SAMPLE]              = "sample",
628    [GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS]         = "sample_b",
629    [GFX5_SAMPLER_MESSAGE_SAMPLE_LOD]          = "sample_l",
630    [GFX5_SAMPLER_MESSAGE_SAMPLE_COMPARE]      = "sample_c",
631    [GFX5_SAMPLER_MESSAGE_SAMPLE_DERIVS]       = "sample_d",
632    [GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE] = "sample_b_c",
633    [GFX5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE]  = "sample_l_c",
634    [GFX5_SAMPLER_MESSAGE_SAMPLE_LD]           = "ld",
635    [GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4]      = "gather4",
636    [GFX5_SAMPLER_MESSAGE_LOD]                 = "lod",
637    [GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO]      = "resinfo",
638    [GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO]   = "sampleinfo",
639    [GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C]    = "gather4_c",
640    [GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO]   = "gather4_po",
641    [XE2_SAMPLER_MESSAGE_SAMPLE_MLOD]          = "sample_mlod",
642    [XE2_SAMPLER_MESSAGE_SAMPLE_COMPARE_MLOD]  = "sample_c_mlod",
643    [HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE] = "sample_d_c",
644    [GFX9_SAMPLER_MESSAGE_SAMPLE_LZ]           = "sample_lz",
645    [GFX9_SAMPLER_MESSAGE_SAMPLE_C_LZ]         = "sample_c_lz",
646    [GFX9_SAMPLER_MESSAGE_SAMPLE_LD_LZ]        = "ld_lz",
647    [GFX9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W]     = "ld2dms_w",
648    [GFX7_SAMPLER_MESSAGE_SAMPLE_LD_MCS]       = "ld_mcs",
649    [GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DMS]       = "ld2dms",
650    [GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DSS]       = "ld2dss",
651 };
652 
653 static const char *const gfx5_sampler_simd_mode[7] = {
654    [ELK_SAMPLER_SIMD_MODE_SIMD4X2]   = "SIMD4x2",
655    [ELK_SAMPLER_SIMD_MODE_SIMD8]     = "SIMD8",
656    [ELK_SAMPLER_SIMD_MODE_SIMD16]    = "SIMD16",
657    [ELK_SAMPLER_SIMD_MODE_SIMD32_64] = "SIMD32/64",
658    [GFX10_SAMPLER_SIMD_MODE_SIMD8H]  = "SIMD8H",
659    [GFX10_SAMPLER_SIMD_MODE_SIMD16H] = "SIMD16H",
660 };
661 
662 static const char *const xe2_sampler_simd_mode[7] = {
663    [XE2_SAMPLER_SIMD_MODE_SIMD16]  = "SIMD16",
664    [XE2_SAMPLER_SIMD_MODE_SIMD32]  = "SIMD32",
665    [XE2_SAMPLER_SIMD_MODE_SIMD16H] = "SIMD16H",
666    [XE2_SAMPLER_SIMD_MODE_SIMD32H] = "SIMD32H",
667 };
668 
669 static const char *const sampler_target_format[4] = {
670    [0] = "F",
671    [2] = "UD",
672    [3] = "D"
673 };
674 
675 static const char *const lsc_operation[] = {
676    [LSC_OP_LOAD]            = "load",
677    [LSC_OP_LOAD_CMASK]      = "load_cmask",
678    [LSC_OP_STORE]           = "store",
679    [LSC_OP_STORE_CMASK]     = "store_cmask",
680    [LSC_OP_FENCE]           = "fence",
681    [LSC_OP_ATOMIC_INC]      = "atomic_inc",
682    [LSC_OP_ATOMIC_DEC]      = "atomic_dec",
683    [LSC_OP_ATOMIC_LOAD]     = "atomic_load",
684    [LSC_OP_ATOMIC_STORE]    = "atomic_store",
685    [LSC_OP_ATOMIC_ADD]      = "atomic_add",
686    [LSC_OP_ATOMIC_SUB]      = "atomic_sub",
687    [LSC_OP_ATOMIC_MIN]      = "atomic_min",
688    [LSC_OP_ATOMIC_MAX]      = "atomic_max",
689    [LSC_OP_ATOMIC_UMIN]     = "atomic_umin",
690    [LSC_OP_ATOMIC_UMAX]     = "atomic_umax",
691    [LSC_OP_ATOMIC_CMPXCHG]  = "atomic_cmpxchg",
692    [LSC_OP_ATOMIC_FADD]     = "atomic_fadd",
693    [LSC_OP_ATOMIC_FSUB]     = "atomic_fsub",
694    [LSC_OP_ATOMIC_FMIN]     = "atomic_fmin",
695    [LSC_OP_ATOMIC_FMAX]     = "atomic_fmax",
696    [LSC_OP_ATOMIC_FCMPXCHG] = "atomic_fcmpxchg",
697    [LSC_OP_ATOMIC_AND]      = "atomic_and",
698    [LSC_OP_ATOMIC_OR]       = "atomic_or",
699    [LSC_OP_ATOMIC_XOR]      = "atomic_xor",
700 };
701 
702 static const char *const lsc_addr_surface_type[] = {
703    [LSC_ADDR_SURFTYPE_FLAT] = "flat",
704    [LSC_ADDR_SURFTYPE_BSS]  = "bss",
705    [LSC_ADDR_SURFTYPE_SS]   = "ss",
706    [LSC_ADDR_SURFTYPE_BTI]  = "bti",
707 };
708 
709 static const char* const lsc_fence_scope[] = {
710    [LSC_FENCE_THREADGROUP]     = "threadgroup",
711    [LSC_FENCE_LOCAL]           = "local",
712    [LSC_FENCE_TILE]            = "tile",
713    [LSC_FENCE_GPU]             = "gpu",
714    [LSC_FENCE_ALL_GPU]         = "all_gpu",
715    [LSC_FENCE_SYSTEM_RELEASE]  = "system_release",
716    [LSC_FENCE_SYSTEM_ACQUIRE]  = "system_acquire",
717 };
718 
719 static const char* const lsc_flush_type[] = {
720    [LSC_FLUSH_TYPE_NONE]       = "none",
721    [LSC_FLUSH_TYPE_EVICT]      = "evict",
722    [LSC_FLUSH_TYPE_INVALIDATE] = "invalidate",
723    [LSC_FLUSH_TYPE_DISCARD]    = "discard",
724    [LSC_FLUSH_TYPE_CLEAN]      = "clean",
725    [LSC_FLUSH_TYPE_L3ONLY]     = "l3only",
726    [LSC_FLUSH_TYPE_NONE_6]     = "none_6",
727 };
728 
729 static const char* const lsc_addr_size[] = {
730    [LSC_ADDR_SIZE_A16] = "a16",
731    [LSC_ADDR_SIZE_A32] = "a32",
732    [LSC_ADDR_SIZE_A64] = "a64",
733 };
734 
735 static const char* const lsc_backup_fence_routing[] = {
736    [LSC_NORMAL_ROUTING]  = "normal_routing",
737    [LSC_ROUTE_TO_LSC]    = "route_to_lsc",
738 };
739 
740 static const char* const lsc_data_size[] = {
741    [LSC_DATA_SIZE_D8]      = "d8",
742    [LSC_DATA_SIZE_D16]     = "d16",
743    [LSC_DATA_SIZE_D32]     = "d32",
744    [LSC_DATA_SIZE_D64]     = "d64",
745    [LSC_DATA_SIZE_D8U32]   = "d8u32",
746    [LSC_DATA_SIZE_D16U32]  = "d16u32",
747    [LSC_DATA_SIZE_D16BF32] = "d16bf32",
748 };
749 
750 static const char* const lsc_vect_size_str[] = {
751    [LSC_VECT_SIZE_V1] = "V1",
752    [LSC_VECT_SIZE_V2] = "V2",
753    [LSC_VECT_SIZE_V3] = "V3",
754    [LSC_VECT_SIZE_V4] = "V4",
755    [LSC_VECT_SIZE_V8] = "V8",
756    [LSC_VECT_SIZE_V16] = "V16",
757    [LSC_VECT_SIZE_V32] = "V32",
758    [LSC_VECT_SIZE_V64] = "V64",
759 };
760 
761 static const char* const lsc_cmask_str[] = {
762    [LSC_CMASK_X]      = "x",
763    [LSC_CMASK_Y]      = "y",
764    [LSC_CMASK_XY]     = "xy",
765    [LSC_CMASK_Z]      = "z",
766    [LSC_CMASK_XZ]     = "xz",
767    [LSC_CMASK_YZ]     = "yz",
768    [LSC_CMASK_XYZ]    = "xyz",
769    [LSC_CMASK_W]      = "w",
770    [LSC_CMASK_XW]     = "xw",
771    [LSC_CMASK_YW]     = "yw",
772    [LSC_CMASK_XYW]    = "xyw",
773    [LSC_CMASK_ZW]     = "zw",
774    [LSC_CMASK_XZW]    = "xzw",
775    [LSC_CMASK_YZW]    = "yzw",
776    [LSC_CMASK_XYZW]   = "xyzw",
777 };
778 
779 static const char* const lsc_cache_load[] = {
780    [LSC_CACHE_LOAD_L1STATE_L3MOCS]   = "L1STATE_L3MOCS",
781    [LSC_CACHE_LOAD_L1UC_L3UC]        = "L1UC_L3UC",
782    [LSC_CACHE_LOAD_L1UC_L3C]         = "L1UC_L3C",
783    [LSC_CACHE_LOAD_L1C_L3UC]         = "L1C_L3UC",
784    [LSC_CACHE_LOAD_L1C_L3C]          = "L1C_L3C",
785    [LSC_CACHE_LOAD_L1S_L3UC]         = "L1S_L3UC",
786    [LSC_CACHE_LOAD_L1S_L3C]          = "L1S_L3C",
787    [LSC_CACHE_LOAD_L1IAR_L3C]        = "L1IAR_L3C",
788 };
789 
790 static const char* const lsc_cache_store[] = {
791    [LSC_CACHE_STORE_L1STATE_L3MOCS]  = "L1STATE_L3MOCS",
792    [LSC_CACHE_STORE_L1UC_L3UC]       = "L1UC_L3UC",
793    [LSC_CACHE_STORE_L1UC_L3WB]       = "L1UC_L3WB",
794    [LSC_CACHE_STORE_L1WT_L3UC]       = "L1WT_L3UC",
795    [LSC_CACHE_STORE_L1WT_L3WB]       = "L1WT_L3WB",
796    [LSC_CACHE_STORE_L1S_L3UC]        = "L1S_L3UC",
797    [LSC_CACHE_STORE_L1S_L3WB]        = "L1S_L3WB",
798    [LSC_CACHE_STORE_L1WB_L3WB]       = "L1WB_L3WB",
799 };
800 
801 static const char* const xe2_lsc_cache_load[] = {
802    [XE2_LSC_CACHE_LOAD_L1STATE_L3MOCS]   = "L1STATE_L3MOCS",
803    [XE2_LSC_CACHE_LOAD_L1UC_L3UC]        = "L1UC_L3UC",
804    [XE2_LSC_CACHE_LOAD_L1UC_L3C]         = "L1UC_L3C",
805    [XE2_LSC_CACHE_LOAD_L1UC_L3CC]        = "L1UC_L3CC",
806    [XE2_LSC_CACHE_LOAD_L1C_L3UC]         = "L1C_L3UC",
807    [XE2_LSC_CACHE_LOAD_L1C_L3C]          = "L1C_L3C",
808    [XE2_LSC_CACHE_LOAD_L1C_L3CC]         = "L1C_L3CC",
809    [XE2_LSC_CACHE_LOAD_L1S_L3UC]         = "L1S_L3UC",
810    [XE2_LSC_CACHE_LOAD_L1S_L3C]          = "L1S_L3C",
811    [XE2_LSC_CACHE_LOAD_L1IAR_L3IAR]      = "L1IAR_L3IAR",
812 };
813 
814 static const char* const xe2_lsc_cache_store[] = {
815    [XE2_LSC_CACHE_STORE_L1STATE_L3MOCS]  = "L1STATE_L3MOCS",
816    [XE2_LSC_CACHE_STORE_L1UC_L3UC]       = "L1UC_L3UC",
817    [XE2_LSC_CACHE_STORE_L1UC_L3WB]       = "L1UC_L3WB",
818    [XE2_LSC_CACHE_STORE_L1WT_L3UC]       = "L1WT_L3UC",
819    [XE2_LSC_CACHE_STORE_L1WT_L3WB]       = "L1WT_L3WB",
820    [XE2_LSC_CACHE_STORE_L1S_L3UC]        = "L1S_L3UC",
821    [XE2_LSC_CACHE_STORE_L1S_L3WB]        = "L1S_L3WB",
822    [XE2_LSC_CACHE_STORE_L1WB_L3WB]       = "L1WB_L3WB",
823 };
824 
825 static const char* const dpas_systolic_depth[4] = {
826    [0] = "16",
827    [1] = "2",
828    [2] = "4",
829    [3] = "8"
830 };
831 
832 static int column;
833 
834 static int
string(FILE * file,const char * string)835 string(FILE *file, const char *string)
836 {
837    fputs(string, file);
838    column += strlen(string);
839    return 0;
840 }
841 
842 static int
843 format(FILE *f, const char *format, ...) PRINTFLIKE(2, 3);
844 
845 static int
format(FILE * f,const char * format,...)846 format(FILE *f, const char *format, ...)
847 {
848    char buf[1024];
849    va_list args;
850    va_start(args, format);
851 
852    vsnprintf(buf, sizeof(buf) - 1, format, args);
853    va_end(args);
854    string(f, buf);
855    return 0;
856 }
857 
858 static int
newline(FILE * f)859 newline(FILE *f)
860 {
861    putc('\n', f);
862    column = 0;
863    return 0;
864 }
865 
866 static int
pad(FILE * f,int c)867 pad(FILE *f, int c)
868 {
869    do
870       string(f, " ");
871    while (column < c);
872    return 0;
873 }
874 
875 static int
control(FILE * file,const char * name,const char * const ctrl[],unsigned id,int * space)876 control(FILE *file, const char *name, const char *const ctrl[],
877         unsigned id, int *space)
878 {
879    if (!ctrl[id]) {
880       fprintf(file, "*** invalid %s value %d ", name, id);
881       return 1;
882    }
883    if (ctrl[id][0]) {
884       if (space && *space)
885          string(file, " ");
886       string(file, ctrl[id]);
887       if (space)
888          *space = 1;
889    }
890    return 0;
891 }
892 
893 static int
print_opcode(FILE * file,const struct elk_isa_info * isa,enum elk_opcode id)894 print_opcode(FILE *file, const struct elk_isa_info *isa,
895              enum elk_opcode id)
896 {
897    const struct elk_opcode_desc *desc = elk_opcode_desc(isa, id);
898    if (!desc) {
899       format(file, "*** invalid opcode value %d ", id);
900       return 1;
901    }
902    string(file, desc->name);
903    return 0;
904 }
905 
906 static int
reg(FILE * file,unsigned _reg_file,unsigned _reg_nr)907 reg(FILE *file, unsigned _reg_file, unsigned _reg_nr)
908 {
909    int err = 0;
910 
911    /* Clear the Compr4 instruction compression bit. */
912    if (_reg_file == ELK_MESSAGE_REGISTER_FILE)
913       _reg_nr &= ~ELK_MRF_COMPR4;
914 
915    if (_reg_file == ELK_ARCHITECTURE_REGISTER_FILE) {
916       switch (_reg_nr & 0xf0) {
917       case ELK_ARF_NULL:
918          string(file, "null");
919          break;
920       case ELK_ARF_ADDRESS:
921          format(file, "a%d", _reg_nr & 0x0f);
922          break;
923       case ELK_ARF_ACCUMULATOR:
924          format(file, "acc%d", _reg_nr & 0x0f);
925          break;
926       case ELK_ARF_FLAG:
927          format(file, "f%d", _reg_nr & 0x0f);
928          break;
929       case ELK_ARF_MASK:
930          format(file, "mask%d", _reg_nr & 0x0f);
931          break;
932       case ELK_ARF_MASK_STACK:
933          format(file, "ms%d", _reg_nr & 0x0f);
934          break;
935       case ELK_ARF_MASK_STACK_DEPTH:
936          format(file, "msd%d", _reg_nr & 0x0f);
937          break;
938       case ELK_ARF_STATE:
939          format(file, "sr%d", _reg_nr & 0x0f);
940          break;
941       case ELK_ARF_CONTROL:
942          format(file, "cr%d", _reg_nr & 0x0f);
943          break;
944       case ELK_ARF_NOTIFICATION_COUNT:
945          format(file, "n%d", _reg_nr & 0x0f);
946          break;
947       case ELK_ARF_IP:
948          string(file, "ip");
949          return -1;
950          break;
951       case ELK_ARF_TDR:
952          format(file, "tdr0");
953          return -1;
954       case ELK_ARF_TIMESTAMP:
955          format(file, "tm%d", _reg_nr & 0x0f);
956          break;
957       default:
958          format(file, "ARF%d", _reg_nr);
959          break;
960       }
961    } else {
962       err |= control(file, "src reg file", reg_file, _reg_file, NULL);
963       format(file, "%d", _reg_nr);
964    }
965    return err;
966 }
967 
968 static int
dest(FILE * file,const struct elk_isa_info * isa,const elk_inst * inst)969 dest(FILE *file, const struct elk_isa_info *isa, const elk_inst *inst)
970 {
971    const struct intel_device_info *devinfo = isa->devinfo;
972    enum elk_reg_type type = elk_inst_dst_type(devinfo, inst);
973    unsigned elem_size = elk_reg_type_to_size(type);
974    int err = 0;
975 
976    if (is_split_send(devinfo, elk_inst_opcode(isa, inst))) {
977       /* These are fixed for split sends */
978       type = ELK_REGISTER_TYPE_UD;
979       elem_size = 4;
980       if (devinfo->ver >= 12) {
981          err |= reg(file, elk_inst_send_dst_reg_file(devinfo, inst),
982                     elk_inst_dst_da_reg_nr(devinfo, inst));
983          string(file, elk_reg_type_to_letters(type));
984       } else if (elk_inst_dst_address_mode(devinfo, inst) == ELK_ADDRESS_DIRECT) {
985          err |= reg(file, elk_inst_send_dst_reg_file(devinfo, inst),
986                     elk_inst_dst_da_reg_nr(devinfo, inst));
987          unsigned subreg_nr = elk_inst_dst_da16_subreg_nr(devinfo, inst);
988          if (subreg_nr)
989             format(file, ".%u", subreg_nr);
990          string(file, elk_reg_type_to_letters(type));
991       } else {
992          string(file, "g[a0");
993          if (elk_inst_dst_ia_subreg_nr(devinfo, inst))
994             format(file, ".%"PRIu64, elk_inst_dst_ia_subreg_nr(devinfo, inst) /
995                    elem_size);
996          if (elk_inst_send_dst_ia16_addr_imm(devinfo, inst))
997             format(file, " %d", elk_inst_send_dst_ia16_addr_imm(devinfo, inst));
998          string(file, "]<");
999          string(file, elk_reg_type_to_letters(type));
1000       }
1001    } else if (elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1) {
1002       if (elk_inst_dst_address_mode(devinfo, inst) == ELK_ADDRESS_DIRECT) {
1003          err |= reg(file, elk_inst_dst_reg_file(devinfo, inst),
1004                     elk_inst_dst_da_reg_nr(devinfo, inst));
1005          if (err == -1)
1006             return 0;
1007          if (elk_inst_dst_da1_subreg_nr(devinfo, inst))
1008             format(file, ".%"PRIu64, elk_inst_dst_da1_subreg_nr(devinfo, inst) /
1009                    elem_size);
1010          string(file, "<");
1011          err |= control(file, "horiz stride", horiz_stride,
1012                         elk_inst_dst_hstride(devinfo, inst), NULL);
1013          string(file, ">");
1014          string(file, elk_reg_type_to_letters(type));
1015       } else {
1016          string(file, "g[a0");
1017          if (elk_inst_dst_ia_subreg_nr(devinfo, inst))
1018             format(file, ".%"PRIu64, elk_inst_dst_ia_subreg_nr(devinfo, inst) /
1019                    elem_size);
1020          if (elk_inst_dst_ia1_addr_imm(devinfo, inst))
1021             format(file, " %d", elk_inst_dst_ia1_addr_imm(devinfo, inst));
1022          string(file, "]<");
1023          err |= control(file, "horiz stride", horiz_stride,
1024                         elk_inst_dst_hstride(devinfo, inst), NULL);
1025          string(file, ">");
1026          string(file, elk_reg_type_to_letters(type));
1027       }
1028    } else {
1029       if (elk_inst_dst_address_mode(devinfo, inst) == ELK_ADDRESS_DIRECT) {
1030          err |= reg(file, elk_inst_dst_reg_file(devinfo, inst),
1031                     elk_inst_dst_da_reg_nr(devinfo, inst));
1032          if (err == -1)
1033             return 0;
1034          if (elk_inst_dst_da16_subreg_nr(devinfo, inst))
1035             format(file, ".%u", 16 / elem_size);
1036          string(file, "<1>");
1037          err |= control(file, "writemask", writemask,
1038                         elk_inst_da16_writemask(devinfo, inst), NULL);
1039          string(file, elk_reg_type_to_letters(type));
1040       } else {
1041          err = 1;
1042          string(file, "Indirect align16 address mode not supported");
1043       }
1044    }
1045 
1046    return 0;
1047 }
1048 
1049 static int
dest_3src(FILE * file,const struct intel_device_info * devinfo,const elk_inst * inst)1050 dest_3src(FILE *file, const struct intel_device_info *devinfo,
1051           const elk_inst *inst)
1052 {
1053    bool is_align1 = elk_inst_3src_access_mode(devinfo, inst) == ELK_ALIGN_1;
1054    int err = 0;
1055    uint32_t reg_file;
1056    unsigned subreg_nr;
1057    enum elk_reg_type type;
1058 
1059    if (devinfo->ver < 10 && is_align1)
1060       return 0;
1061 
1062    if (devinfo->ver == 6 && elk_inst_3src_a16_dst_reg_file(devinfo, inst))
1063       reg_file = ELK_MESSAGE_REGISTER_FILE;
1064    else if (devinfo->ver >= 12)
1065       reg_file = elk_inst_3src_a1_dst_reg_file(devinfo, inst);
1066    else if (is_align1 && elk_inst_3src_a1_dst_reg_file(devinfo, inst))
1067       reg_file = ELK_ARCHITECTURE_REGISTER_FILE;
1068    else
1069       reg_file = ELK_GENERAL_REGISTER_FILE;
1070 
1071    err |= reg(file, reg_file, elk_inst_3src_dst_reg_nr(devinfo, inst));
1072    if (err == -1)
1073       return 0;
1074 
1075    if (is_align1) {
1076       type = elk_inst_3src_a1_dst_type(devinfo, inst);
1077       subreg_nr = elk_inst_3src_a1_dst_subreg_nr(devinfo, inst);
1078    } else {
1079       type = elk_inst_3src_a16_dst_type(devinfo, inst);
1080       subreg_nr = elk_inst_3src_a16_dst_subreg_nr(devinfo, inst) * 4;
1081    }
1082    subreg_nr /= elk_reg_type_to_size(type);
1083 
1084    if (subreg_nr)
1085       format(file, ".%u", subreg_nr);
1086    string(file, "<1>");
1087 
1088    if (!is_align1) {
1089       err |= control(file, "writemask", writemask,
1090                      elk_inst_3src_a16_dst_writemask(devinfo, inst), NULL);
1091    }
1092    string(file, elk_reg_type_to_letters(type));
1093 
1094    return 0;
1095 }
1096 
1097 static int
dest_dpas_3src(FILE * file,const struct intel_device_info * devinfo,const elk_inst * inst)1098 dest_dpas_3src(FILE *file, const struct intel_device_info *devinfo,
1099                const elk_inst *inst)
1100 {
1101    uint32_t reg_file = elk_inst_dpas_3src_dst_reg_file(devinfo, inst);
1102 
1103    if (reg(file, reg_file, elk_inst_dpas_3src_dst_reg_nr(devinfo, inst)) == -1)
1104       return 0;
1105 
1106    enum elk_reg_type type = elk_inst_dpas_3src_dst_type(devinfo, inst);
1107    unsigned subreg_nr = elk_inst_dpas_3src_dst_subreg_nr(devinfo, inst);
1108 
1109    if (subreg_nr)
1110       format(file, ".%u", subreg_nr);
1111    string(file, "<1>");
1112 
1113    string(file, elk_reg_type_to_letters(type));
1114 
1115    return 0;
1116 }
1117 
1118 static int
src_align1_region(FILE * file,unsigned _vert_stride,unsigned _width,unsigned _horiz_stride)1119 src_align1_region(FILE *file,
1120                   unsigned _vert_stride, unsigned _width,
1121                   unsigned _horiz_stride)
1122 {
1123    int err = 0;
1124    string(file, "<");
1125    err |= control(file, "vert stride", vert_stride, _vert_stride, NULL);
1126    string(file, ",");
1127    err |= control(file, "width", width, _width, NULL);
1128    string(file, ",");
1129    err |= control(file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
1130    string(file, ">");
1131    return err;
1132 }
1133 
1134 static int
src_da1(FILE * file,const struct intel_device_info * devinfo,unsigned opcode,enum elk_reg_type type,unsigned _reg_file,unsigned _vert_stride,unsigned _width,unsigned _horiz_stride,unsigned reg_num,unsigned sub_reg_num,unsigned __abs,unsigned _negate)1135 src_da1(FILE *file,
1136         const struct intel_device_info *devinfo,
1137         unsigned opcode,
1138         enum elk_reg_type type, unsigned _reg_file,
1139         unsigned _vert_stride, unsigned _width, unsigned _horiz_stride,
1140         unsigned reg_num, unsigned sub_reg_num, unsigned __abs,
1141         unsigned _negate)
1142 {
1143    int err = 0;
1144 
1145    if (devinfo->ver >= 8 && is_logic_instruction(opcode))
1146       err |= control(file, "bitnot", m_bitnot, _negate, NULL);
1147    else
1148       err |= control(file, "negate", m_negate, _negate, NULL);
1149 
1150    err |= control(file, "abs", _abs, __abs, NULL);
1151 
1152    err |= reg(file, _reg_file, reg_num);
1153    if (err == -1)
1154       return 0;
1155    if (sub_reg_num) {
1156       unsigned elem_size = elk_reg_type_to_size(type);
1157       format(file, ".%d", sub_reg_num / elem_size);   /* use formal style like spec */
1158    }
1159    src_align1_region(file, _vert_stride, _width, _horiz_stride);
1160    string(file, elk_reg_type_to_letters(type));
1161    return err;
1162 }
1163 
1164 static int
src_ia1(FILE * file,const struct intel_device_info * devinfo,unsigned opcode,enum elk_reg_type type,int _addr_imm,unsigned _addr_subreg_nr,unsigned _negate,unsigned __abs,unsigned _horiz_stride,unsigned _width,unsigned _vert_stride)1165 src_ia1(FILE *file,
1166         const struct intel_device_info *devinfo,
1167         unsigned opcode,
1168         enum elk_reg_type type,
1169         int _addr_imm,
1170         unsigned _addr_subreg_nr,
1171         unsigned _negate,
1172         unsigned __abs,
1173         unsigned _horiz_stride, unsigned _width, unsigned _vert_stride)
1174 {
1175    int err = 0;
1176 
1177    if (devinfo->ver >= 8 && is_logic_instruction(opcode))
1178       err |= control(file, "bitnot", m_bitnot, _negate, NULL);
1179    else
1180       err |= control(file, "negate", m_negate, _negate, NULL);
1181 
1182    err |= control(file, "abs", _abs, __abs, NULL);
1183 
1184    string(file, "g[a0");
1185    if (_addr_subreg_nr)
1186       format(file, ".%d", _addr_subreg_nr);
1187    if (_addr_imm)
1188       format(file, " %d", _addr_imm);
1189    string(file, "]");
1190    src_align1_region(file, _vert_stride, _width, _horiz_stride);
1191    string(file, elk_reg_type_to_letters(type));
1192    return err;
1193 }
1194 
1195 static int
src_swizzle(FILE * file,unsigned swiz)1196 src_swizzle(FILE *file, unsigned swiz)
1197 {
1198    unsigned x = ELK_GET_SWZ(swiz, ELK_CHANNEL_X);
1199    unsigned y = ELK_GET_SWZ(swiz, ELK_CHANNEL_Y);
1200    unsigned z = ELK_GET_SWZ(swiz, ELK_CHANNEL_Z);
1201    unsigned w = ELK_GET_SWZ(swiz, ELK_CHANNEL_W);
1202    int err = 0;
1203 
1204    if (x == y && x == z && x == w) {
1205       string(file, ".");
1206       err |= control(file, "channel select", chan_sel, x, NULL);
1207    } else if (swiz != ELK_SWIZZLE_XYZW) {
1208       string(file, ".");
1209       err |= control(file, "channel select", chan_sel, x, NULL);
1210       err |= control(file, "channel select", chan_sel, y, NULL);
1211       err |= control(file, "channel select", chan_sel, z, NULL);
1212       err |= control(file, "channel select", chan_sel, w, NULL);
1213    }
1214    return err;
1215 }
1216 
1217 static int
src_da16(FILE * file,const struct intel_device_info * devinfo,unsigned opcode,enum elk_reg_type type,unsigned _reg_file,unsigned _vert_stride,unsigned _reg_nr,unsigned _subreg_nr,unsigned __abs,unsigned _negate,unsigned swz_x,unsigned swz_y,unsigned swz_z,unsigned swz_w)1218 src_da16(FILE *file,
1219          const struct intel_device_info *devinfo,
1220          unsigned opcode,
1221          enum elk_reg_type type,
1222          unsigned _reg_file,
1223          unsigned _vert_stride,
1224          unsigned _reg_nr,
1225          unsigned _subreg_nr,
1226          unsigned __abs,
1227          unsigned _negate,
1228          unsigned swz_x, unsigned swz_y, unsigned swz_z, unsigned swz_w)
1229 {
1230    int err = 0;
1231 
1232    if (devinfo->ver >= 8 && is_logic_instruction(opcode))
1233       err |= control(file, "bitnot", m_bitnot, _negate, NULL);
1234    else
1235       err |= control(file, "negate", m_negate, _negate, NULL);
1236 
1237    err |= control(file, "abs", _abs, __abs, NULL);
1238 
1239    err |= reg(file, _reg_file, _reg_nr);
1240    if (err == -1)
1241       return 0;
1242    if (_subreg_nr) {
1243       unsigned elem_size = elk_reg_type_to_size(type);
1244 
1245       /* bit4 for subreg number byte addressing. Make this same meaning as
1246          in da1 case, so output looks consistent. */
1247       format(file, ".%d", 16 / elem_size);
1248    }
1249    string(file, "<");
1250    err |= control(file, "vert stride", vert_stride, _vert_stride, NULL);
1251    string(file, ">");
1252    err |= src_swizzle(file, ELK_SWIZZLE4(swz_x, swz_y, swz_z, swz_w));
1253    string(file, elk_reg_type_to_letters(type));
1254    return err;
1255 }
1256 
1257 static enum elk_vertical_stride
vstride_from_align1_3src_vstride(const struct intel_device_info * devinfo,enum gfx10_align1_3src_vertical_stride vstride)1258 vstride_from_align1_3src_vstride(const struct intel_device_info *devinfo,
1259                                  enum gfx10_align1_3src_vertical_stride vstride)
1260 {
1261    switch (vstride) {
1262    case ELK_ALIGN1_3SRC_VERTICAL_STRIDE_0: return ELK_VERTICAL_STRIDE_0;
1263    case ELK_ALIGN1_3SRC_VERTICAL_STRIDE_2:
1264       if (devinfo->ver >= 12)
1265          return ELK_VERTICAL_STRIDE_1;
1266       else
1267          return ELK_VERTICAL_STRIDE_2;
1268    case ELK_ALIGN1_3SRC_VERTICAL_STRIDE_4: return ELK_VERTICAL_STRIDE_4;
1269    case ELK_ALIGN1_3SRC_VERTICAL_STRIDE_8: return ELK_VERTICAL_STRIDE_8;
1270    default:
1271       unreachable("not reached");
1272    }
1273 }
1274 
1275 static enum elk_horizontal_stride
hstride_from_align1_3src_hstride(enum gfx10_align1_3src_src_horizontal_stride hstride)1276 hstride_from_align1_3src_hstride(enum gfx10_align1_3src_src_horizontal_stride hstride)
1277 {
1278    switch (hstride) {
1279    case ELK_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0: return ELK_HORIZONTAL_STRIDE_0;
1280    case ELK_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1: return ELK_HORIZONTAL_STRIDE_1;
1281    case ELK_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2: return ELK_HORIZONTAL_STRIDE_2;
1282    case ELK_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4: return ELK_HORIZONTAL_STRIDE_4;
1283    default:
1284       unreachable("not reached");
1285    }
1286 }
1287 
1288 static enum elk_vertical_stride
vstride_from_align1_3src_hstride(enum gfx10_align1_3src_src_horizontal_stride hstride)1289 vstride_from_align1_3src_hstride(enum gfx10_align1_3src_src_horizontal_stride hstride)
1290 {
1291    switch (hstride) {
1292    case ELK_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0: return ELK_VERTICAL_STRIDE_0;
1293    case ELK_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1: return ELK_VERTICAL_STRIDE_1;
1294    case ELK_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2: return ELK_VERTICAL_STRIDE_2;
1295    case ELK_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4: return ELK_VERTICAL_STRIDE_4;
1296    default:
1297       unreachable("not reached");
1298    }
1299 }
1300 
1301 /* From "GFX10 Regioning Rules for Align1 Ternary Operations" in the
1302  * "Register Region Restrictions" documentation
1303  */
1304 static enum elk_width
implied_width(enum elk_vertical_stride _vert_stride,enum elk_horizontal_stride _horiz_stride)1305 implied_width(enum elk_vertical_stride _vert_stride,
1306               enum elk_horizontal_stride _horiz_stride)
1307 {
1308    /* "1. Width is 1 when Vertical and Horizontal Strides are both zero." */
1309    if (_vert_stride == ELK_VERTICAL_STRIDE_0 &&
1310        _horiz_stride == ELK_HORIZONTAL_STRIDE_0) {
1311       return ELK_WIDTH_1;
1312 
1313    /* "2. Width is equal to vertical stride when Horizontal Stride is zero." */
1314    } else if (_horiz_stride == ELK_HORIZONTAL_STRIDE_0) {
1315       switch (_vert_stride) {
1316       case ELK_VERTICAL_STRIDE_1: return ELK_WIDTH_1;
1317       case ELK_VERTICAL_STRIDE_2: return ELK_WIDTH_2;
1318       case ELK_VERTICAL_STRIDE_4: return ELK_WIDTH_4;
1319       case ELK_VERTICAL_STRIDE_8: return ELK_WIDTH_8;
1320       case ELK_VERTICAL_STRIDE_0:
1321       default:
1322          unreachable("not reached");
1323       }
1324 
1325    } else {
1326       /* FINISHME: Implement these: */
1327 
1328       /* "3. Width is equal to Vertical Stride/Horizontal Stride when both
1329        *     Strides are non-zero.
1330        *
1331        *  4. Vertical Stride must not be zero if Horizontal Stride is non-zero.
1332        *     This implies Vertical Stride is always greater than Horizontal
1333        *     Stride."
1334        *
1335        * Given these statements and the knowledge that the stride and width
1336        * values are encoded in logarithmic form, we can perform the division
1337        * by just subtracting.
1338        */
1339       return _vert_stride - _horiz_stride;
1340    }
1341 }
1342 
1343 static int
src0_3src(FILE * file,const struct intel_device_info * devinfo,const elk_inst * inst)1344 src0_3src(FILE *file, const struct intel_device_info *devinfo,
1345           const elk_inst *inst)
1346 {
1347    int err = 0;
1348    unsigned reg_nr, subreg_nr;
1349    enum elk_reg_file _file;
1350    enum elk_reg_type type;
1351    enum elk_vertical_stride _vert_stride;
1352    enum elk_width _width;
1353    enum elk_horizontal_stride _horiz_stride;
1354    bool is_scalar_region;
1355    bool is_align1 = elk_inst_3src_access_mode(devinfo, inst) == ELK_ALIGN_1;
1356 
1357    if (devinfo->ver < 10 && is_align1)
1358       return 0;
1359 
1360    if (is_align1) {
1361       if (devinfo->ver >= 12 && !elk_inst_3src_a1_src0_is_imm(devinfo, inst)) {
1362          _file = elk_inst_3src_a1_src0_reg_file(devinfo, inst);
1363       } else if (elk_inst_3src_a1_src0_reg_file(devinfo, inst) ==
1364                  ELK_ALIGN1_3SRC_GENERAL_REGISTER_FILE) {
1365          _file = ELK_GENERAL_REGISTER_FILE;
1366       } else if (elk_inst_3src_a1_src0_type(devinfo, inst) ==
1367                  ELK_REGISTER_TYPE_NF) {
1368          _file = ELK_ARCHITECTURE_REGISTER_FILE;
1369       } else {
1370          _file = ELK_IMMEDIATE_VALUE;
1371          uint16_t imm_val = elk_inst_3src_a1_src0_imm(devinfo, inst);
1372          enum elk_reg_type type = elk_inst_3src_a1_src0_type(devinfo, inst);
1373 
1374          if (type == ELK_REGISTER_TYPE_W) {
1375             format(file, "%dW", imm_val);
1376          } else if (type == ELK_REGISTER_TYPE_UW) {
1377             format(file, "0x%04xUW", imm_val);
1378          } else if (type == ELK_REGISTER_TYPE_HF) {
1379             format(file, "0x%04xHF", imm_val);
1380          }
1381          return 0;
1382       }
1383 
1384       reg_nr = elk_inst_3src_src0_reg_nr(devinfo, inst);
1385       subreg_nr = elk_inst_3src_a1_src0_subreg_nr(devinfo, inst);
1386       type = elk_inst_3src_a1_src0_type(devinfo, inst);
1387       _vert_stride = vstride_from_align1_3src_vstride(
1388          devinfo, elk_inst_3src_a1_src0_vstride(devinfo, inst));
1389       _horiz_stride = hstride_from_align1_3src_hstride(
1390                          elk_inst_3src_a1_src0_hstride(devinfo, inst));
1391       _width = implied_width(_vert_stride, _horiz_stride);
1392    } else {
1393       _file = ELK_GENERAL_REGISTER_FILE;
1394       reg_nr = elk_inst_3src_src0_reg_nr(devinfo, inst);
1395       subreg_nr = elk_inst_3src_a16_src0_subreg_nr(devinfo, inst) * 4;
1396       type = elk_inst_3src_a16_src_type(devinfo, inst);
1397 
1398       if (elk_inst_3src_a16_src0_rep_ctrl(devinfo, inst)) {
1399          _vert_stride = ELK_VERTICAL_STRIDE_0;
1400          _width = ELK_WIDTH_1;
1401          _horiz_stride = ELK_HORIZONTAL_STRIDE_0;
1402       } else {
1403          _vert_stride = ELK_VERTICAL_STRIDE_4;
1404          _width = ELK_WIDTH_4;
1405          _horiz_stride = ELK_HORIZONTAL_STRIDE_1;
1406       }
1407    }
1408    is_scalar_region = _vert_stride == ELK_VERTICAL_STRIDE_0 &&
1409                       _width == ELK_WIDTH_1 &&
1410                       _horiz_stride == ELK_HORIZONTAL_STRIDE_0;
1411 
1412    subreg_nr /= elk_reg_type_to_size(type);
1413 
1414    err |= control(file, "negate", m_negate,
1415                   elk_inst_3src_src0_negate(devinfo, inst), NULL);
1416    err |= control(file, "abs", _abs, elk_inst_3src_src0_abs(devinfo, inst), NULL);
1417 
1418    err |= reg(file, _file, reg_nr);
1419    if (err == -1)
1420       return 0;
1421    if (subreg_nr || is_scalar_region)
1422       format(file, ".%d", subreg_nr);
1423    src_align1_region(file, _vert_stride, _width, _horiz_stride);
1424    if (!is_scalar_region && !is_align1)
1425       err |= src_swizzle(file, elk_inst_3src_a16_src0_swizzle(devinfo, inst));
1426    string(file, elk_reg_type_to_letters(type));
1427    return err;
1428 }
1429 
1430 static int
src1_3src(FILE * file,const struct intel_device_info * devinfo,const elk_inst * inst)1431 src1_3src(FILE *file, const struct intel_device_info *devinfo,
1432           const elk_inst *inst)
1433 {
1434    int err = 0;
1435    unsigned reg_nr, subreg_nr;
1436    enum elk_reg_file _file;
1437    enum elk_reg_type type;
1438    enum elk_vertical_stride _vert_stride;
1439    enum elk_width _width;
1440    enum elk_horizontal_stride _horiz_stride;
1441    bool is_scalar_region;
1442    bool is_align1 = elk_inst_3src_access_mode(devinfo, inst) == ELK_ALIGN_1;
1443 
1444    if (devinfo->ver < 10 && is_align1)
1445       return 0;
1446 
1447    if (is_align1) {
1448       if (devinfo->ver >= 12) {
1449          _file = elk_inst_3src_a1_src1_reg_file(devinfo, inst);
1450       } else if (elk_inst_3src_a1_src1_reg_file(devinfo, inst) ==
1451                  ELK_ALIGN1_3SRC_GENERAL_REGISTER_FILE) {
1452          _file = ELK_GENERAL_REGISTER_FILE;
1453       } else {
1454          _file = ELK_ARCHITECTURE_REGISTER_FILE;
1455       }
1456 
1457       reg_nr = elk_inst_3src_src1_reg_nr(devinfo, inst);
1458       subreg_nr = elk_inst_3src_a1_src1_subreg_nr(devinfo, inst);
1459       type = elk_inst_3src_a1_src1_type(devinfo, inst);
1460 
1461       _vert_stride = vstride_from_align1_3src_vstride(
1462          devinfo, elk_inst_3src_a1_src1_vstride(devinfo, inst));
1463       _horiz_stride = hstride_from_align1_3src_hstride(
1464                          elk_inst_3src_a1_src1_hstride(devinfo, inst));
1465       _width = implied_width(_vert_stride, _horiz_stride);
1466    } else {
1467       _file = ELK_GENERAL_REGISTER_FILE;
1468       reg_nr = elk_inst_3src_src1_reg_nr(devinfo, inst);
1469       subreg_nr = elk_inst_3src_a16_src1_subreg_nr(devinfo, inst) * 4;
1470       type = elk_inst_3src_a16_src_type(devinfo, inst);
1471 
1472       if (elk_inst_3src_a16_src1_rep_ctrl(devinfo, inst)) {
1473          _vert_stride = ELK_VERTICAL_STRIDE_0;
1474          _width = ELK_WIDTH_1;
1475          _horiz_stride = ELK_HORIZONTAL_STRIDE_0;
1476       } else {
1477          _vert_stride = ELK_VERTICAL_STRIDE_4;
1478          _width = ELK_WIDTH_4;
1479          _horiz_stride = ELK_HORIZONTAL_STRIDE_1;
1480       }
1481    }
1482    is_scalar_region = _vert_stride == ELK_VERTICAL_STRIDE_0 &&
1483                       _width == ELK_WIDTH_1 &&
1484                       _horiz_stride == ELK_HORIZONTAL_STRIDE_0;
1485 
1486    subreg_nr /= elk_reg_type_to_size(type);
1487 
1488    err |= control(file, "negate", m_negate,
1489                   elk_inst_3src_src1_negate(devinfo, inst), NULL);
1490    err |= control(file, "abs", _abs, elk_inst_3src_src1_abs(devinfo, inst), NULL);
1491 
1492    err |= reg(file, _file, reg_nr);
1493    if (err == -1)
1494       return 0;
1495    if (subreg_nr || is_scalar_region)
1496       format(file, ".%d", subreg_nr);
1497    src_align1_region(file, _vert_stride, _width, _horiz_stride);
1498    if (!is_scalar_region && !is_align1)
1499       err |= src_swizzle(file, elk_inst_3src_a16_src1_swizzle(devinfo, inst));
1500    string(file, elk_reg_type_to_letters(type));
1501    return err;
1502 }
1503 
1504 static int
src2_3src(FILE * file,const struct intel_device_info * devinfo,const elk_inst * inst)1505 src2_3src(FILE *file, const struct intel_device_info *devinfo,
1506           const elk_inst *inst)
1507 {
1508    int err = 0;
1509    unsigned reg_nr, subreg_nr;
1510    enum elk_reg_file _file;
1511    enum elk_reg_type type;
1512    enum elk_vertical_stride _vert_stride;
1513    enum elk_width _width;
1514    enum elk_horizontal_stride _horiz_stride;
1515    bool is_scalar_region;
1516    bool is_align1 = elk_inst_3src_access_mode(devinfo, inst) == ELK_ALIGN_1;
1517 
1518    if (devinfo->ver < 10 && is_align1)
1519       return 0;
1520 
1521    if (is_align1) {
1522       if (devinfo->ver >= 12 && !elk_inst_3src_a1_src2_is_imm(devinfo, inst)) {
1523          _file = elk_inst_3src_a1_src2_reg_file(devinfo, inst);
1524       } else if (elk_inst_3src_a1_src2_reg_file(devinfo, inst) ==
1525                  ELK_ALIGN1_3SRC_GENERAL_REGISTER_FILE) {
1526          _file = ELK_GENERAL_REGISTER_FILE;
1527       } else {
1528          _file = ELK_IMMEDIATE_VALUE;
1529          uint16_t imm_val = elk_inst_3src_a1_src2_imm(devinfo, inst);
1530          enum elk_reg_type type = elk_inst_3src_a1_src2_type(devinfo, inst);
1531 
1532          if (type == ELK_REGISTER_TYPE_W) {
1533             format(file, "%dW", imm_val);
1534          } else if (type == ELK_REGISTER_TYPE_UW) {
1535             format(file, "0x%04xUW", imm_val);
1536          } else if (type == ELK_REGISTER_TYPE_HF) {
1537             format(file, "0x%04xHF", imm_val);
1538          }
1539          return 0;
1540       }
1541 
1542       reg_nr = elk_inst_3src_src2_reg_nr(devinfo, inst);
1543       subreg_nr = elk_inst_3src_a1_src2_subreg_nr(devinfo, inst);
1544       type = elk_inst_3src_a1_src2_type(devinfo, inst);
1545       /* FINISHME: No vertical stride on src2. Is using the hstride in place
1546        *           correct? Doesn't seem like it, since there's hstride=1 but
1547        *           no vstride=1.
1548        */
1549       _vert_stride = vstride_from_align1_3src_hstride(
1550                         elk_inst_3src_a1_src2_hstride(devinfo, inst));
1551       _horiz_stride = hstride_from_align1_3src_hstride(
1552                          elk_inst_3src_a1_src2_hstride(devinfo, inst));
1553       _width = implied_width(_vert_stride, _horiz_stride);
1554    } else {
1555       _file = ELK_GENERAL_REGISTER_FILE;
1556       reg_nr = elk_inst_3src_src2_reg_nr(devinfo, inst);
1557       subreg_nr = elk_inst_3src_a16_src2_subreg_nr(devinfo, inst) * 4;
1558       type = elk_inst_3src_a16_src_type(devinfo, inst);
1559 
1560       if (elk_inst_3src_a16_src2_rep_ctrl(devinfo, inst)) {
1561          _vert_stride = ELK_VERTICAL_STRIDE_0;
1562          _width = ELK_WIDTH_1;
1563          _horiz_stride = ELK_HORIZONTAL_STRIDE_0;
1564       } else {
1565          _vert_stride = ELK_VERTICAL_STRIDE_4;
1566          _width = ELK_WIDTH_4;
1567          _horiz_stride = ELK_HORIZONTAL_STRIDE_1;
1568       }
1569    }
1570    is_scalar_region = _vert_stride == ELK_VERTICAL_STRIDE_0 &&
1571                       _width == ELK_WIDTH_1 &&
1572                       _horiz_stride == ELK_HORIZONTAL_STRIDE_0;
1573 
1574    subreg_nr /= elk_reg_type_to_size(type);
1575 
1576    err |= control(file, "negate", m_negate,
1577                   elk_inst_3src_src2_negate(devinfo, inst), NULL);
1578    err |= control(file, "abs", _abs, elk_inst_3src_src2_abs(devinfo, inst), NULL);
1579 
1580    err |= reg(file, _file, reg_nr);
1581    if (err == -1)
1582       return 0;
1583    if (subreg_nr || is_scalar_region)
1584       format(file, ".%d", subreg_nr);
1585    src_align1_region(file, _vert_stride, _width, _horiz_stride);
1586    if (!is_scalar_region && !is_align1)
1587       err |= src_swizzle(file, elk_inst_3src_a16_src2_swizzle(devinfo, inst));
1588    string(file, elk_reg_type_to_letters(type));
1589    return err;
1590 }
1591 
1592 static int
src0_dpas_3src(FILE * file,const struct intel_device_info * devinfo,const elk_inst * inst)1593 src0_dpas_3src(FILE *file, const struct intel_device_info *devinfo,
1594                const elk_inst *inst)
1595 {
1596    uint32_t reg_file = elk_inst_dpas_3src_src0_reg_file(devinfo, inst);
1597 
1598    if (reg(file, reg_file, elk_inst_dpas_3src_src0_reg_nr(devinfo, inst)) == -1)
1599       return 0;
1600 
1601    unsigned subreg_nr = elk_inst_dpas_3src_src0_subreg_nr(devinfo, inst);
1602    enum elk_reg_type type = elk_inst_dpas_3src_src0_type(devinfo, inst);
1603 
1604    if (subreg_nr)
1605       format(file, ".%d", subreg_nr);
1606    src_align1_region(file, 1, 1, 0);
1607 
1608    string(file, elk_reg_type_to_letters(type));
1609 
1610    return 0;
1611 }
1612 
1613 static int
src1_dpas_3src(FILE * file,const struct intel_device_info * devinfo,const elk_inst * inst)1614 src1_dpas_3src(FILE *file, const struct intel_device_info *devinfo,
1615                const elk_inst *inst)
1616 {
1617    uint32_t reg_file = elk_inst_dpas_3src_src1_reg_file(devinfo, inst);
1618 
1619    if (reg(file, reg_file, elk_inst_dpas_3src_src1_reg_nr(devinfo, inst)) == -1)
1620       return 0;
1621 
1622    unsigned subreg_nr = elk_inst_dpas_3src_src1_subreg_nr(devinfo, inst);
1623    enum elk_reg_type type = elk_inst_dpas_3src_src1_type(devinfo, inst);
1624 
1625    if (subreg_nr)
1626       format(file, ".%d", subreg_nr);
1627    src_align1_region(file, 1, 1, 0);
1628 
1629    string(file, elk_reg_type_to_letters(type));
1630 
1631    return 0;
1632 }
1633 
1634 static int
src2_dpas_3src(FILE * file,const struct intel_device_info * devinfo,const elk_inst * inst)1635 src2_dpas_3src(FILE *file, const struct intel_device_info *devinfo,
1636                const elk_inst *inst)
1637 {
1638    uint32_t reg_file = elk_inst_dpas_3src_src2_reg_file(devinfo, inst);
1639 
1640    if (reg(file, reg_file, elk_inst_dpas_3src_src2_reg_nr(devinfo, inst)) == -1)
1641       return 0;
1642 
1643    unsigned subreg_nr = elk_inst_dpas_3src_src2_subreg_nr(devinfo, inst);
1644    enum elk_reg_type type = elk_inst_dpas_3src_src2_type(devinfo, inst);
1645 
1646    if (subreg_nr)
1647       format(file, ".%d", subreg_nr);
1648    src_align1_region(file, 1, 1, 0);
1649 
1650    string(file, elk_reg_type_to_letters(type));
1651 
1652    return 0;
1653 }
1654 
1655 static int
imm(FILE * file,const struct elk_isa_info * isa,enum elk_reg_type type,const elk_inst * inst)1656 imm(FILE *file, const struct elk_isa_info *isa, enum elk_reg_type type,
1657     const elk_inst *inst)
1658 {
1659    const struct intel_device_info *devinfo = isa->devinfo;
1660 
1661    switch (type) {
1662    case ELK_REGISTER_TYPE_UQ:
1663       format(file, "0x%016"PRIx64"UQ", elk_inst_imm_uq(devinfo, inst));
1664       break;
1665    case ELK_REGISTER_TYPE_Q:
1666       format(file, "0x%016"PRIx64"Q", elk_inst_imm_uq(devinfo, inst));
1667       break;
1668    case ELK_REGISTER_TYPE_UD:
1669       format(file, "0x%08xUD", elk_inst_imm_ud(devinfo, inst));
1670       break;
1671    case ELK_REGISTER_TYPE_D:
1672       format(file, "%dD", elk_inst_imm_d(devinfo, inst));
1673       break;
1674    case ELK_REGISTER_TYPE_UW:
1675       format(file, "0x%04xUW", (uint16_t) elk_inst_imm_ud(devinfo, inst));
1676       break;
1677    case ELK_REGISTER_TYPE_W:
1678       format(file, "%dW", (int16_t) elk_inst_imm_d(devinfo, inst));
1679       break;
1680    case ELK_REGISTER_TYPE_UV:
1681       format(file, "0x%08xUV", elk_inst_imm_ud(devinfo, inst));
1682       break;
1683    case ELK_REGISTER_TYPE_VF:
1684       format(file, "0x%"PRIx64"VF", elk_inst_bits(inst, 127, 96));
1685       pad(file, 48);
1686       format(file, "/* [%-gF, %-gF, %-gF, %-gF]VF */",
1687              elk_vf_to_float(elk_inst_imm_ud(devinfo, inst)),
1688              elk_vf_to_float(elk_inst_imm_ud(devinfo, inst) >> 8),
1689              elk_vf_to_float(elk_inst_imm_ud(devinfo, inst) >> 16),
1690              elk_vf_to_float(elk_inst_imm_ud(devinfo, inst) >> 24));
1691       break;
1692    case ELK_REGISTER_TYPE_V:
1693       format(file, "0x%08xV", elk_inst_imm_ud(devinfo, inst));
1694       break;
1695    case ELK_REGISTER_TYPE_F:
1696       /* The DIM instruction's src0 uses an F type but contains a
1697        * 64-bit immediate
1698        */
1699       if (elk_inst_opcode(isa, inst) == ELK_OPCODE_DIM) {
1700          format(file, "0x%"PRIx64"F", elk_inst_bits(inst, 127, 64));
1701          pad(file, 48);
1702          format(file, "/* %-gF */", elk_inst_imm_df(devinfo, inst));
1703       } else {
1704          format(file, "0x%"PRIx64"F", elk_inst_bits(inst, 127, 96));
1705          pad(file, 48);
1706          format(file, " /* %-gF */", elk_inst_imm_f(devinfo, inst));
1707       }
1708       break;
1709    case ELK_REGISTER_TYPE_DF:
1710       format(file, "0x%016"PRIx64"DF", elk_inst_imm_uq(devinfo, inst));
1711       pad(file, 48);
1712       format(file, "/* %-gDF */", elk_inst_imm_df(devinfo, inst));
1713       break;
1714    case ELK_REGISTER_TYPE_HF:
1715       format(file, "0x%04xHF",
1716              (uint16_t) elk_inst_imm_ud(devinfo, inst));
1717       pad(file, 48);
1718       format(file, "/* %-gHF */",
1719              _mesa_half_to_float((uint16_t) elk_inst_imm_ud(devinfo, inst)));
1720       break;
1721    case ELK_REGISTER_TYPE_NF:
1722    case ELK_REGISTER_TYPE_UB:
1723    case ELK_REGISTER_TYPE_B:
1724       format(file, "*** invalid immediate type %d ", type);
1725    }
1726    return 0;
1727 }
1728 
1729 static int
src_sends_da(FILE * file,const struct intel_device_info * devinfo,enum elk_reg_type type,enum elk_reg_file _reg_file,unsigned _reg_nr,unsigned _reg_subnr)1730 src_sends_da(FILE *file,
1731              const struct intel_device_info *devinfo,
1732              enum elk_reg_type type,
1733              enum elk_reg_file _reg_file,
1734              unsigned _reg_nr,
1735              unsigned _reg_subnr)
1736 {
1737    int err = 0;
1738 
1739    err |= reg(file, _reg_file, _reg_nr);
1740    if (err == -1)
1741       return 0;
1742    if (_reg_subnr)
1743       format(file, ".1");
1744    string(file, elk_reg_type_to_letters(type));
1745 
1746    return err;
1747 }
1748 
1749 static int
src_sends_ia(FILE * file,const struct intel_device_info * devinfo,enum elk_reg_type type,int _addr_imm,unsigned _addr_subreg_nr)1750 src_sends_ia(FILE *file,
1751              const struct intel_device_info *devinfo,
1752              enum elk_reg_type type,
1753              int _addr_imm,
1754              unsigned _addr_subreg_nr)
1755 {
1756    string(file, "g[a0");
1757    if (_addr_subreg_nr)
1758       format(file, ".1");
1759    if (_addr_imm)
1760       format(file, " %d", _addr_imm);
1761    string(file, "]");
1762    string(file, elk_reg_type_to_letters(type));
1763 
1764    return 0;
1765 }
1766 
1767 static int
src_send_desc_ia(FILE * file,const struct intel_device_info * devinfo,unsigned _addr_subreg_nr)1768 src_send_desc_ia(FILE *file,
1769                  const struct intel_device_info *devinfo,
1770                  unsigned _addr_subreg_nr)
1771 {
1772    string(file, "a0");
1773    if (_addr_subreg_nr)
1774       format(file, ".%d", _addr_subreg_nr);
1775    format(file, "<0>UD");
1776 
1777    return 0;
1778 }
1779 
1780 static int
src0(FILE * file,const struct elk_isa_info * isa,const elk_inst * inst)1781 src0(FILE *file, const struct elk_isa_info *isa, const elk_inst *inst)
1782 {
1783    const struct intel_device_info *devinfo = isa->devinfo;
1784 
1785    if (is_split_send(devinfo, elk_inst_opcode(isa, inst))) {
1786       if (devinfo->ver >= 12) {
1787          return src_sends_da(file,
1788                              devinfo,
1789                              ELK_REGISTER_TYPE_UD,
1790                              elk_inst_send_src0_reg_file(devinfo, inst),
1791                              elk_inst_src0_da_reg_nr(devinfo, inst),
1792                              0);
1793       } else if (elk_inst_send_src0_address_mode(devinfo, inst) == ELK_ADDRESS_DIRECT) {
1794          return src_sends_da(file,
1795                              devinfo,
1796                              ELK_REGISTER_TYPE_UD,
1797                              ELK_GENERAL_REGISTER_FILE,
1798                              elk_inst_src0_da_reg_nr(devinfo, inst),
1799                              elk_inst_src0_da16_subreg_nr(devinfo, inst));
1800       } else {
1801          return src_sends_ia(file,
1802                              devinfo,
1803                              ELK_REGISTER_TYPE_UD,
1804                              elk_inst_send_src0_ia16_addr_imm(devinfo, inst),
1805                              elk_inst_src0_ia_subreg_nr(devinfo, inst));
1806       }
1807    } else if (elk_inst_src0_reg_file(devinfo, inst) == ELK_IMMEDIATE_VALUE) {
1808       return imm(file, isa, elk_inst_src0_type(devinfo, inst), inst);
1809    } else if (elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1) {
1810       if (elk_inst_src0_address_mode(devinfo, inst) == ELK_ADDRESS_DIRECT) {
1811          return src_da1(file,
1812                         devinfo,
1813                         elk_inst_opcode(isa, inst),
1814                         elk_inst_src0_type(devinfo, inst),
1815                         elk_inst_src0_reg_file(devinfo, inst),
1816                         elk_inst_src0_vstride(devinfo, inst),
1817                         elk_inst_src0_width(devinfo, inst),
1818                         elk_inst_src0_hstride(devinfo, inst),
1819                         elk_inst_src0_da_reg_nr(devinfo, inst),
1820                         elk_inst_src0_da1_subreg_nr(devinfo, inst),
1821                         elk_inst_src0_abs(devinfo, inst),
1822                         elk_inst_src0_negate(devinfo, inst));
1823       } else {
1824          return src_ia1(file,
1825                         devinfo,
1826                         elk_inst_opcode(isa, inst),
1827                         elk_inst_src0_type(devinfo, inst),
1828                         elk_inst_src0_ia1_addr_imm(devinfo, inst),
1829                         elk_inst_src0_ia_subreg_nr(devinfo, inst),
1830                         elk_inst_src0_negate(devinfo, inst),
1831                         elk_inst_src0_abs(devinfo, inst),
1832                         elk_inst_src0_hstride(devinfo, inst),
1833                         elk_inst_src0_width(devinfo, inst),
1834                         elk_inst_src0_vstride(devinfo, inst));
1835       }
1836    } else {
1837       if (elk_inst_src0_address_mode(devinfo, inst) == ELK_ADDRESS_DIRECT) {
1838          return src_da16(file,
1839                          devinfo,
1840                          elk_inst_opcode(isa, inst),
1841                          elk_inst_src0_type(devinfo, inst),
1842                          elk_inst_src0_reg_file(devinfo, inst),
1843                          elk_inst_src0_vstride(devinfo, inst),
1844                          elk_inst_src0_da_reg_nr(devinfo, inst),
1845                          elk_inst_src0_da16_subreg_nr(devinfo, inst),
1846                          elk_inst_src0_abs(devinfo, inst),
1847                          elk_inst_src0_negate(devinfo, inst),
1848                          elk_inst_src0_da16_swiz_x(devinfo, inst),
1849                          elk_inst_src0_da16_swiz_y(devinfo, inst),
1850                          elk_inst_src0_da16_swiz_z(devinfo, inst),
1851                          elk_inst_src0_da16_swiz_w(devinfo, inst));
1852       } else {
1853          string(file, "Indirect align16 address mode not supported");
1854          return 1;
1855       }
1856    }
1857 }
1858 
1859 static int
src1(FILE * file,const struct elk_isa_info * isa,const elk_inst * inst)1860 src1(FILE *file, const struct elk_isa_info *isa, const elk_inst *inst)
1861 {
1862    const struct intel_device_info *devinfo = isa->devinfo;
1863 
1864    if (is_split_send(devinfo, elk_inst_opcode(isa, inst))) {
1865       return src_sends_da(file,
1866                           devinfo,
1867                           ELK_REGISTER_TYPE_UD,
1868                           elk_inst_send_src1_reg_file(devinfo, inst),
1869                           elk_inst_send_src1_reg_nr(devinfo, inst),
1870                           0 /* subreg_nr */);
1871    } else if (elk_inst_src1_reg_file(devinfo, inst) == ELK_IMMEDIATE_VALUE) {
1872       return imm(file, isa, elk_inst_src1_type(devinfo, inst), inst);
1873    } else if (elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1) {
1874       if (elk_inst_src1_address_mode(devinfo, inst) == ELK_ADDRESS_DIRECT) {
1875          return src_da1(file,
1876                         devinfo,
1877                         elk_inst_opcode(isa, inst),
1878                         elk_inst_src1_type(devinfo, inst),
1879                         elk_inst_src1_reg_file(devinfo, inst),
1880                         elk_inst_src1_vstride(devinfo, inst),
1881                         elk_inst_src1_width(devinfo, inst),
1882                         elk_inst_src1_hstride(devinfo, inst),
1883                         elk_inst_src1_da_reg_nr(devinfo, inst),
1884                         elk_inst_src1_da1_subreg_nr(devinfo, inst),
1885                         elk_inst_src1_abs(devinfo, inst),
1886                         elk_inst_src1_negate(devinfo, inst));
1887       } else {
1888          return src_ia1(file,
1889                         devinfo,
1890                         elk_inst_opcode(isa, inst),
1891                         elk_inst_src1_type(devinfo, inst),
1892                         elk_inst_src1_ia1_addr_imm(devinfo, inst),
1893                         elk_inst_src1_ia_subreg_nr(devinfo, inst),
1894                         elk_inst_src1_negate(devinfo, inst),
1895                         elk_inst_src1_abs(devinfo, inst),
1896                         elk_inst_src1_hstride(devinfo, inst),
1897                         elk_inst_src1_width(devinfo, inst),
1898                         elk_inst_src1_vstride(devinfo, inst));
1899       }
1900    } else {
1901       if (elk_inst_src1_address_mode(devinfo, inst) == ELK_ADDRESS_DIRECT) {
1902          return src_da16(file,
1903                          devinfo,
1904                          elk_inst_opcode(isa, inst),
1905                          elk_inst_src1_type(devinfo, inst),
1906                          elk_inst_src1_reg_file(devinfo, inst),
1907                          elk_inst_src1_vstride(devinfo, inst),
1908                          elk_inst_src1_da_reg_nr(devinfo, inst),
1909                          elk_inst_src1_da16_subreg_nr(devinfo, inst),
1910                          elk_inst_src1_abs(devinfo, inst),
1911                          elk_inst_src1_negate(devinfo, inst),
1912                          elk_inst_src1_da16_swiz_x(devinfo, inst),
1913                          elk_inst_src1_da16_swiz_y(devinfo, inst),
1914                          elk_inst_src1_da16_swiz_z(devinfo, inst),
1915                          elk_inst_src1_da16_swiz_w(devinfo, inst));
1916       } else {
1917          string(file, "Indirect align16 address mode not supported");
1918          return 1;
1919       }
1920    }
1921 }
1922 
1923 static int
qtr_ctrl(FILE * file,const struct intel_device_info * devinfo,const elk_inst * inst)1924 qtr_ctrl(FILE *file, const struct intel_device_info *devinfo,
1925          const elk_inst *inst)
1926 {
1927    int qtr_ctl = elk_inst_qtr_control(devinfo, inst);
1928    int exec_size = 1 << elk_inst_exec_size(devinfo, inst);
1929    const unsigned nib_ctl = devinfo->ver < 7 || devinfo->ver >= 20 ? 0 :
1930                             elk_inst_nib_control(devinfo, inst);
1931 
1932    if (exec_size < 8 || nib_ctl) {
1933       format(file, " %dN", qtr_ctl * 2 + nib_ctl + 1);
1934    } else if (exec_size == 8) {
1935       switch (qtr_ctl) {
1936       case 0:
1937          string(file, " 1Q");
1938          break;
1939       case 1:
1940          string(file, " 2Q");
1941          break;
1942       case 2:
1943          string(file, " 3Q");
1944          break;
1945       case 3:
1946          string(file, " 4Q");
1947          break;
1948       }
1949    } else if (exec_size == 16) {
1950       if (qtr_ctl < 2)
1951          string(file, " 1H");
1952       else
1953          string(file, " 2H");
1954    }
1955    return 0;
1956 }
1957 
1958 static bool
inst_has_type(const struct elk_isa_info * isa,const elk_inst * inst,enum elk_reg_type type)1959 inst_has_type(const struct elk_isa_info *isa,
1960               const elk_inst *inst,
1961               enum elk_reg_type type)
1962 {
1963    const struct intel_device_info *devinfo = isa->devinfo;
1964    const unsigned num_sources = elk_num_sources_from_inst(isa, inst);
1965 
1966    if (elk_inst_dst_type(devinfo, inst) == type)
1967       return true;
1968 
1969    if (num_sources >= 3) {
1970       if (elk_inst_3src_access_mode(devinfo, inst) == ELK_ALIGN_1)
1971          return elk_inst_3src_a1_src0_type(devinfo, inst) == type ||
1972                 elk_inst_3src_a1_src1_type(devinfo, inst) == type ||
1973                 elk_inst_3src_a1_src2_type(devinfo, inst) == type;
1974       else
1975          return elk_inst_3src_a16_src_type(devinfo, inst) == type;
1976    } else if (num_sources == 2) {
1977       return elk_inst_src0_type(devinfo, inst) == type ||
1978              elk_inst_src1_type(devinfo, inst) == type;
1979    } else {
1980       return elk_inst_src0_type(devinfo, inst) == type;
1981    }
1982 }
1983 
1984 static int
swsb(FILE * file,const struct elk_isa_info * isa,const elk_inst * inst)1985 swsb(FILE *file, const struct elk_isa_info *isa, const elk_inst *inst)
1986 {
1987    const struct intel_device_info *devinfo = isa->devinfo;
1988    const enum elk_opcode opcode = elk_inst_opcode(isa, inst);
1989    const uint32_t x = elk_inst_swsb(devinfo, inst);
1990    const bool is_unordered =
1991       opcode == ELK_OPCODE_SEND || opcode == ELK_OPCODE_SENDC ||
1992       opcode == ELK_OPCODE_MATH || opcode == ELK_OPCODE_DPAS ||
1993       (devinfo->has_64bit_float_via_math_pipe &&
1994        inst_has_type(isa, inst, ELK_REGISTER_TYPE_DF));
1995    const struct tgl_swsb swsb = tgl_swsb_decode(devinfo, is_unordered, x);
1996    if (swsb.regdist)
1997       format(file, " %s@%d",
1998              (swsb.pipe == TGL_PIPE_FLOAT ? "F" :
1999               swsb.pipe == TGL_PIPE_INT ? "I" :
2000               swsb.pipe == TGL_PIPE_LONG ? "L" :
2001               swsb.pipe == TGL_PIPE_ALL ? "A"  : "" ),
2002              swsb.regdist);
2003    if (swsb.mode)
2004       format(file, " $%d%s", swsb.sbid,
2005              (swsb.mode & TGL_SBID_SET ? "" :
2006               swsb.mode & TGL_SBID_DST ? ".dst" : ".src"));
2007    return 0;
2008 }
2009 
2010 #ifdef DEBUG
2011 static __attribute__((__unused__)) int
elk_disassemble_imm(const struct elk_isa_info * isa,uint32_t dw3,uint32_t dw2,uint32_t dw1,uint32_t dw0)2012 elk_disassemble_imm(const struct elk_isa_info *isa,
2013                     uint32_t dw3, uint32_t dw2, uint32_t dw1, uint32_t dw0)
2014 {
2015    elk_inst inst;
2016    inst.data[0] = (((uint64_t) dw1) << 32) | ((uint64_t) dw0);
2017    inst.data[1] = (((uint64_t) dw3) << 32) | ((uint64_t) dw2);
2018    return elk_disassemble_inst(stderr, isa, &inst, false, 0, NULL);
2019 }
2020 #endif
2021 
2022 static void
write_label(FILE * file,const struct intel_device_info * devinfo,const struct elk_label * root_label,int offset,int jump)2023 write_label(FILE *file, const struct intel_device_info *devinfo,
2024             const struct elk_label *root_label,
2025             int offset, int jump)
2026 {
2027    if (root_label != NULL) {
2028       int to_bytes_scale = sizeof(elk_inst) / elk_jump_scale(devinfo);
2029       const struct elk_label *label =
2030          elk_find_label(root_label, offset + jump * to_bytes_scale);
2031       if (label != NULL) {
2032          format(file, " LABEL%d", label->number);
2033       }
2034    }
2035 }
2036 
2037 static void
lsc_disassemble_ex_desc(const struct intel_device_info * devinfo,uint32_t imm_desc,uint32_t imm_ex_desc,FILE * file)2038 lsc_disassemble_ex_desc(const struct intel_device_info *devinfo,
2039                         uint32_t imm_desc,
2040                         uint32_t imm_ex_desc,
2041                         FILE *file)
2042 {
2043    const unsigned addr_type = lsc_msg_desc_addr_type(devinfo, imm_desc);
2044    switch (addr_type) {
2045    case LSC_ADDR_SURFTYPE_FLAT:
2046       format(file, " base_offset %u ",
2047              lsc_flat_ex_desc_base_offset(devinfo, imm_ex_desc));
2048       break;
2049    case LSC_ADDR_SURFTYPE_BSS:
2050    case LSC_ADDR_SURFTYPE_SS:
2051       format(file, " surface_state_index %u ",
2052              lsc_bss_ex_desc_index(devinfo, imm_ex_desc));
2053       break;
2054    case LSC_ADDR_SURFTYPE_BTI:
2055       format(file, " BTI %u ",
2056              lsc_bti_ex_desc_index(devinfo, imm_ex_desc));
2057       format(file, " base_offset %u ",
2058              lsc_bti_ex_desc_base_offset(devinfo, imm_ex_desc));
2059       break;
2060    default:
2061       format(file, "unsupported address surface type %d", addr_type);
2062       break;
2063    }
2064 }
2065 
2066 static inline bool
elk_sfid_is_lsc(unsigned sfid)2067 elk_sfid_is_lsc(unsigned sfid)
2068 {
2069    switch (sfid) {
2070    case GFX12_SFID_UGM:
2071    case GFX12_SFID_SLM:
2072    case GFX12_SFID_TGM:
2073       return true;
2074    default:
2075       break;
2076    }
2077 
2078    return false;
2079 }
2080 
2081 int
elk_disassemble_inst(FILE * file,const struct elk_isa_info * isa,const elk_inst * inst,bool is_compacted,int offset,const struct elk_label * root_label)2082 elk_disassemble_inst(FILE *file, const struct elk_isa_info *isa,
2083                      const elk_inst *inst, bool is_compacted,
2084                      int offset, const struct elk_label *root_label)
2085 {
2086    const struct intel_device_info *devinfo = isa->devinfo;
2087 
2088    int err = 0;
2089    int space = 0;
2090 
2091    const enum elk_opcode opcode = elk_inst_opcode(isa, inst);
2092    const struct elk_opcode_desc *desc = elk_opcode_desc(isa, opcode);
2093 
2094    if (elk_inst_pred_control(devinfo, inst)) {
2095       string(file, "(");
2096       err |= control(file, "predicate inverse", pred_inv,
2097                      elk_inst_pred_inv(devinfo, inst), NULL);
2098       format(file, "f%"PRIu64".%"PRIu64,
2099              devinfo->ver >= 7 ? elk_inst_flag_reg_nr(devinfo, inst) : 0,
2100              elk_inst_flag_subreg_nr(devinfo, inst));
2101       if (devinfo->ver >= 20) {
2102          err |= control(file, "predicate control", xe2_pred_ctrl,
2103                         elk_inst_pred_control(devinfo, inst), NULL);
2104       } else if (elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1) {
2105          err |= control(file, "predicate control align1", pred_ctrl_align1,
2106                         elk_inst_pred_control(devinfo, inst), NULL);
2107       } else {
2108          err |= control(file, "predicate control align16", elk_pred_ctrl_align16,
2109                         elk_inst_pred_control(devinfo, inst), NULL);
2110       }
2111       string(file, ") ");
2112    }
2113 
2114    err |= print_opcode(file, isa, opcode);
2115 
2116    if (!is_send(opcode))
2117       err |= control(file, "saturate", saturate, elk_inst_saturate(devinfo, inst),
2118                      NULL);
2119 
2120    err |= control(file, "debug control", debug_ctrl,
2121                   elk_inst_debug_control(devinfo, inst), NULL);
2122 
2123    if (opcode == ELK_OPCODE_MATH) {
2124       string(file, " ");
2125       err |= control(file, "function", math_function,
2126                      elk_inst_math_function(devinfo, inst), NULL);
2127 
2128    } else if (opcode == ELK_OPCODE_SYNC) {
2129       string(file, " ");
2130       err |= control(file, "function", sync_function,
2131                      elk_inst_cond_modifier(devinfo, inst), NULL);
2132 
2133    } else if (opcode == ELK_OPCODE_DPAS) {
2134       string(file, ".");
2135 
2136       err |= control(file, "systolic depth", dpas_systolic_depth,
2137                      elk_inst_dpas_3src_sdepth(devinfo, inst), NULL);
2138 
2139       const unsigned rcount = elk_inst_dpas_3src_rcount(devinfo, inst) + 1;
2140 
2141       format(file, "x%d", rcount);
2142    } else if (!is_send(opcode) &&
2143               (devinfo->ver < 12 ||
2144                elk_inst_src0_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE ||
2145                type_sz(elk_inst_src0_type(devinfo, inst)) < 8)) {
2146       err |= control(file, "conditional modifier", elk_conditional_modifier,
2147                      elk_inst_cond_modifier(devinfo, inst), NULL);
2148 
2149       /* If we're using the conditional modifier, print which flags reg is
2150        * used for it.  Note that on gfx6+, the embedded-condition SEL and
2151        * control flow doesn't update flags.
2152        */
2153       if (elk_inst_cond_modifier(devinfo, inst) &&
2154           (devinfo->ver < 6 || (opcode != ELK_OPCODE_SEL &&
2155                                 opcode != ELK_OPCODE_CSEL &&
2156                                 opcode != ELK_OPCODE_IF &&
2157                                 opcode != ELK_OPCODE_WHILE))) {
2158          format(file, ".f%"PRIu64".%"PRIu64,
2159                 devinfo->ver >= 7 ? elk_inst_flag_reg_nr(devinfo, inst) : 0,
2160                 elk_inst_flag_subreg_nr(devinfo, inst));
2161       }
2162    }
2163 
2164    if (opcode != ELK_OPCODE_NOP && opcode != ELK_OPCODE_NENOP) {
2165       string(file, "(");
2166       err |= control(file, "execution size", exec_size,
2167                      elk_inst_exec_size(devinfo, inst), NULL);
2168       string(file, ")");
2169    }
2170 
2171    if (opcode == ELK_OPCODE_SEND && devinfo->ver < 6)
2172       format(file, " %"PRIu64, elk_inst_base_mrf(devinfo, inst));
2173 
2174    if (elk_has_uip(devinfo, opcode)) {
2175       /* Instructions that have UIP also have JIP. */
2176       pad(file, 16);
2177       string(file, "JIP: ");
2178       write_label(file, devinfo, root_label, offset, elk_inst_jip(devinfo, inst));
2179 
2180       pad(file, 38);
2181       string(file, "UIP: ");
2182       write_label(file, devinfo, root_label, offset, elk_inst_uip(devinfo, inst));
2183    } else if (elk_has_jip(devinfo, opcode)) {
2184       int jip;
2185       if (devinfo->ver >= 7) {
2186          jip = elk_inst_jip(devinfo, inst);
2187       } else {
2188          jip = elk_inst_gfx6_jump_count(devinfo, inst);
2189       }
2190 
2191       pad(file, 16);
2192       string(file, "JIP: ");
2193       write_label(file, devinfo, root_label, offset, jip);
2194    } else if (devinfo->ver < 6 && (opcode == ELK_OPCODE_BREAK ||
2195                                    opcode == ELK_OPCODE_CONTINUE ||
2196                                    opcode == ELK_OPCODE_ELSE)) {
2197       pad(file, 16);
2198       format(file, "Jump: %d", elk_inst_gfx4_jump_count(devinfo, inst));
2199       pad(file, 32);
2200       format(file, "Pop: %"PRIu64, elk_inst_gfx4_pop_count(devinfo, inst));
2201    } else if (devinfo->ver < 6 && (opcode == ELK_OPCODE_IF ||
2202                                    opcode == ELK_OPCODE_IFF ||
2203                                    opcode == ELK_OPCODE_HALT ||
2204                                    opcode == ELK_OPCODE_WHILE)) {
2205       pad(file, 16);
2206       format(file, "Jump: %d", elk_inst_gfx4_jump_count(devinfo, inst));
2207    } else if (devinfo->ver < 6 && opcode == ELK_OPCODE_ENDIF) {
2208       pad(file, 16);
2209       format(file, "Pop: %"PRIu64, elk_inst_gfx4_pop_count(devinfo, inst));
2210    } else if (opcode == ELK_OPCODE_JMPI) {
2211       pad(file, 16);
2212       err |= src1(file, isa, inst);
2213    } else if (opcode == ELK_OPCODE_DPAS) {
2214       pad(file, 16);
2215       err |= dest_dpas_3src(file, devinfo, inst);
2216 
2217       pad(file, 32);
2218       err |= src0_dpas_3src(file, devinfo, inst);
2219 
2220       pad(file, 48);
2221       err |= src1_dpas_3src(file, devinfo, inst);
2222 
2223       pad(file, 64);
2224       err |= src2_dpas_3src(file, devinfo, inst);
2225 
2226    } else if (desc && desc->nsrc == 3) {
2227       pad(file, 16);
2228       err |= dest_3src(file, devinfo, inst);
2229 
2230       pad(file, 32);
2231       err |= src0_3src(file, devinfo, inst);
2232 
2233       pad(file, 48);
2234       err |= src1_3src(file, devinfo, inst);
2235 
2236       pad(file, 64);
2237       err |= src2_3src(file, devinfo, inst);
2238    } else if (desc) {
2239       if (desc->ndst > 0) {
2240          pad(file, 16);
2241          err |= dest(file, isa, inst);
2242       }
2243 
2244       if (desc->nsrc > 0) {
2245          pad(file, 32);
2246          err |= src0(file, isa, inst);
2247       }
2248 
2249       if (desc->nsrc > 1) {
2250          pad(file, 48);
2251          err |= src1(file, isa, inst);
2252       }
2253    }
2254 
2255    if (is_send(opcode)) {
2256       enum elk_message_target sfid = elk_inst_sfid(devinfo, inst);
2257 
2258       bool has_imm_desc = false, has_imm_ex_desc = false;
2259       uint32_t imm_desc = 0, imm_ex_desc = 0;
2260       if (is_split_send(devinfo, opcode)) {
2261          pad(file, 64);
2262          if (elk_inst_send_sel_reg32_desc(devinfo, inst)) {
2263             /* show the indirect descriptor source */
2264             err |= src_send_desc_ia(file, devinfo, 0);
2265          } else {
2266             has_imm_desc = true;
2267             imm_desc = elk_inst_send_desc(devinfo, inst);
2268             fprintf(file, "0x%08"PRIx32, imm_desc);
2269          }
2270 
2271          pad(file, 80);
2272          if (elk_inst_send_sel_reg32_ex_desc(devinfo, inst)) {
2273             /* show the indirect descriptor source */
2274             err |= src_send_desc_ia(file, devinfo,
2275                                     elk_inst_send_ex_desc_ia_subreg_nr(devinfo, inst));
2276          } else {
2277             has_imm_ex_desc = true;
2278             imm_ex_desc = elk_inst_sends_ex_desc(devinfo, inst);
2279             fprintf(file, "0x%08"PRIx32, imm_ex_desc);
2280          }
2281       } else {
2282          if (elk_inst_src1_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE) {
2283             /* show the indirect descriptor source */
2284             pad(file, 48);
2285             err |= src1(file, isa, inst);
2286             pad(file, 64);
2287          } else {
2288             has_imm_desc = true;
2289             imm_desc = elk_inst_send_desc(devinfo, inst);
2290             pad(file, 48);
2291          }
2292 
2293          /* Print message descriptor as immediate source */
2294          fprintf(file, "0x%08"PRIx64, inst->data[1] >> 32);
2295       }
2296 
2297       newline(file);
2298       pad(file, 16);
2299       space = 0;
2300 
2301       fprintf(file, "            ");
2302       err |= control(file, "SFID", devinfo->ver >= 6 ? gfx6_sfid : gfx4_sfid,
2303                      sfid, &space);
2304       string(file, " MsgDesc:");
2305 
2306       if (!has_imm_desc) {
2307          format(file, " indirect");
2308       } else {
2309          bool unsupported = false;
2310          switch (sfid) {
2311          case ELK_SFID_MATH:
2312             err |= control(file, "math function", math_function,
2313                            elk_inst_math_msg_function(devinfo, inst), &space);
2314             err |= control(file, "math saturate", math_saturate,
2315                            elk_inst_math_msg_saturate(devinfo, inst), &space);
2316             err |= control(file, "math signed", math_signed,
2317                            elk_inst_math_msg_signed_int(devinfo, inst), &space);
2318             err |= control(file, "math scalar", math_scalar,
2319                            elk_inst_math_msg_data_type(devinfo, inst), &space);
2320             err |= control(file, "math precision", math_precision,
2321                            elk_inst_math_msg_precision(devinfo, inst), &space);
2322             break;
2323          case ELK_SFID_SAMPLER:
2324             if (devinfo->ver >= 20) {
2325                err |= control(file, "sampler message", xe2_sampler_msg_type,
2326                               elk_sampler_desc_msg_type(devinfo, imm_desc),
2327                               &space);
2328                err |= control(file, "sampler simd mode", xe2_sampler_simd_mode,
2329                               elk_sampler_desc_simd_mode(devinfo, imm_desc),
2330                               &space);
2331                if (elk_sampler_desc_return_format(devinfo, imm_desc)) {
2332                   string(file, " HP");
2333                }
2334                format(file, " Surface = %u Sampler = %u",
2335                       elk_sampler_desc_binding_table_index(devinfo, imm_desc),
2336                       elk_sampler_desc_sampler(devinfo, imm_desc));
2337             } else if (devinfo->ver >= 5) {
2338                err |= control(file, "sampler message", gfx5_sampler_msg_type,
2339                               elk_sampler_desc_msg_type(devinfo, imm_desc),
2340                               &space);
2341                err |= control(file, "sampler simd mode", gfx5_sampler_simd_mode,
2342                               elk_sampler_desc_simd_mode(devinfo, imm_desc),
2343                               &space);
2344                if (devinfo->ver >= 8 &&
2345                    elk_sampler_desc_return_format(devinfo, imm_desc)) {
2346                   string(file, " HP");
2347                }
2348                format(file, " Surface = %u Sampler = %u",
2349                       elk_sampler_desc_binding_table_index(devinfo, imm_desc),
2350                       elk_sampler_desc_sampler(devinfo, imm_desc));
2351             } else {
2352                format(file, " (bti %u, sampler %u, msg_type %u, ",
2353                       elk_sampler_desc_binding_table_index(devinfo, imm_desc),
2354                       elk_sampler_desc_sampler(devinfo, imm_desc),
2355                       elk_sampler_desc_msg_type(devinfo, imm_desc));
2356                if (devinfo->verx10 != 45) {
2357                   err |= control(file, "sampler target format",
2358                                  sampler_target_format,
2359                                  elk_sampler_desc_return_format(devinfo, imm_desc),
2360                                  NULL);
2361                }
2362                string(file, ")");
2363             }
2364             break;
2365          case GFX6_SFID_DATAPORT_SAMPLER_CACHE:
2366          case GFX6_SFID_DATAPORT_CONSTANT_CACHE:
2367             /* aka ELK_SFID_DATAPORT_READ on Gfx4-5 */
2368             if (devinfo->ver >= 6) {
2369                format(file, " (bti %u, msg_ctrl %u, msg_type %u, write_commit %u)",
2370                       elk_dp_desc_binding_table_index(devinfo, imm_desc),
2371                       elk_dp_desc_msg_control(devinfo, imm_desc),
2372                       elk_dp_desc_msg_type(devinfo, imm_desc),
2373                       devinfo->ver >= 7 ? 0u :
2374                       elk_dp_write_desc_write_commit(devinfo, imm_desc));
2375             } else {
2376                bool is_965 = devinfo->verx10 == 40;
2377                err |= control(file, "DP read message type",
2378                               is_965 ? gfx4_dp_read_port_msg_type :
2379                                        g45_dp_read_port_msg_type,
2380                               elk_dp_read_desc_msg_type(devinfo, imm_desc),
2381                               &space);
2382 
2383                format(file, " MsgCtrl = 0x%u",
2384                       elk_dp_read_desc_msg_control(devinfo, imm_desc));
2385 
2386                format(file, " Surface = %u",
2387                       elk_dp_desc_binding_table_index(devinfo, imm_desc));
2388             }
2389             break;
2390 
2391          case GFX6_SFID_DATAPORT_RENDER_CACHE: {
2392             /* aka ELK_SFID_DATAPORT_WRITE on Gfx4-5 */
2393             unsigned msg_type = elk_fb_write_desc_msg_type(devinfo, imm_desc);
2394 
2395             err |= control(file, "DP rc message type",
2396                            dp_rc_msg_type(devinfo), msg_type, &space);
2397 
2398             bool is_rt_write = msg_type ==
2399                (devinfo->ver >= 6 ? GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
2400                                   : ELK_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE);
2401 
2402             if (is_rt_write) {
2403                err |= control(file, "RT message type", m_rt_write_subtype,
2404                               elk_inst_rt_message_type(devinfo, inst), &space);
2405                if (devinfo->ver >= 6 && elk_inst_rt_slot_group(devinfo, inst))
2406                   string(file, " Hi");
2407                if (elk_fb_write_desc_last_render_target(devinfo, imm_desc))
2408                   string(file, " LastRT");
2409                if (devinfo->ver >= 10 &&
2410                    elk_fb_write_desc_coarse_write(devinfo, imm_desc))
2411                   string(file, " CoarseWrite");
2412                if (devinfo->ver < 7 &&
2413                    elk_fb_write_desc_write_commit(devinfo, imm_desc))
2414                   string(file, " WriteCommit");
2415             } else {
2416                format(file, " MsgCtrl = 0x%u",
2417                       elk_fb_write_desc_msg_control(devinfo, imm_desc));
2418             }
2419 
2420             format(file, " Surface = %u",
2421                    elk_fb_desc_binding_table_index(devinfo, imm_desc));
2422             break;
2423          }
2424 
2425          case ELK_SFID_URB: {
2426             if (devinfo->ver >= 20) {
2427                format(file, " (");
2428                const enum elk_lsc_opcode op = lsc_msg_desc_opcode(devinfo, imm_desc);
2429                err |= control(file, "operation", lsc_operation,
2430                               op, &space);
2431                format(file, ",");
2432                err |= control(file, "addr_size", lsc_addr_size,
2433                               lsc_msg_desc_addr_size(devinfo, imm_desc),
2434                               &space);
2435 
2436                format(file, ",");
2437                err |= control(file, "data_size", lsc_data_size,
2438                               lsc_msg_desc_data_size(devinfo, imm_desc),
2439                               &space);
2440                format(file, ",");
2441                if (elk_lsc_opcode_has_cmask(op)) {
2442                   err |= control(file, "component_mask",
2443                                  lsc_cmask_str,
2444                                  lsc_msg_desc_cmask(devinfo, imm_desc),
2445                                  &space);
2446                } else {
2447                   err |= control(file, "vector_size",
2448                                  lsc_vect_size_str,
2449                                  lsc_msg_desc_vect_size(devinfo, imm_desc),
2450                                  &space);
2451                   if (lsc_msg_desc_transpose(devinfo, imm_desc))
2452                      format(file, ", transpose");
2453                }
2454                switch(op) {
2455                case LSC_OP_LOAD_CMASK:
2456                case LSC_OP_LOAD:
2457                   format(file, ",");
2458                   err |= control(file, "cache_load",
2459                                  lsc_cache_load,
2460                                  lsc_msg_desc_cache_ctrl(devinfo, imm_desc),
2461                                  &space);
2462                   break;
2463                default:
2464                   format(file, ",");
2465                   err |= control(file, "cache_store",
2466                                  lsc_cache_store,
2467                                  lsc_msg_desc_cache_ctrl(devinfo, imm_desc),
2468                                  &space);
2469                   break;
2470                }
2471 
2472                format(file, " dst_len = %u,", lsc_msg_desc_dest_len(devinfo, imm_desc));
2473                format(file, " src0_len = %u,", lsc_msg_desc_src0_len(devinfo, imm_desc));
2474                format(file, " src1_len = %d", elk_message_ex_desc_ex_mlen(devinfo, imm_ex_desc));
2475                err |= control(file, "address_type", lsc_addr_surface_type,
2476                               lsc_msg_desc_addr_type(devinfo, imm_desc), &space);
2477                format(file, " )");
2478             } else {
2479                unsigned urb_opcode = elk_inst_urb_opcode(devinfo, inst);
2480 
2481                format(file, " offset %"PRIu64, elk_inst_urb_global_offset(devinfo, inst));
2482 
2483                space = 1;
2484 
2485                err |= control(file, "urb opcode",
2486                               devinfo->ver >= 7 ? gfx7_urb_opcode
2487                               : gfx5_urb_opcode,
2488                               urb_opcode, &space);
2489 
2490                if (devinfo->ver >= 7 &&
2491                    elk_inst_urb_per_slot_offset(devinfo, inst)) {
2492                   string(file, " per-slot");
2493                }
2494 
2495                if (urb_opcode == GFX8_URB_OPCODE_SIMD8_WRITE ||
2496                    urb_opcode == GFX8_URB_OPCODE_SIMD8_READ) {
2497                   if (elk_inst_urb_channel_mask_present(devinfo, inst))
2498                      string(file, " masked");
2499                } else if (urb_opcode != GFX125_URB_OPCODE_FENCE) {
2500                   err |= control(file, "urb swizzle", urb_swizzle,
2501                                  elk_inst_urb_swizzle_control(devinfo, inst),
2502                                  &space);
2503                }
2504 
2505                if (devinfo->ver < 7) {
2506                   err |= control(file, "urb allocate", urb_allocate,
2507                                  elk_inst_urb_allocate(devinfo, inst), &space);
2508                   err |= control(file, "urb used", urb_used,
2509                                  elk_inst_urb_used(devinfo, inst), &space);
2510                }
2511                if (devinfo->ver < 8) {
2512                   err |= control(file, "urb complete", urb_complete,
2513                                  elk_inst_urb_complete(devinfo, inst), &space);
2514                }
2515             }
2516             break;
2517          }
2518          case ELK_SFID_THREAD_SPAWNER:
2519             break;
2520 
2521          case ELK_SFID_MESSAGE_GATEWAY:
2522             format(file, " (%s)",
2523                    gfx7_gateway_subfuncid[elk_inst_gateway_subfuncid(devinfo, inst)]);
2524             break;
2525 
2526          case GFX12_SFID_SLM:
2527          case GFX12_SFID_TGM:
2528          case GFX12_SFID_UGM: {
2529             assert(devinfo->has_lsc);
2530             format(file, " (");
2531             const enum elk_lsc_opcode op = lsc_msg_desc_opcode(devinfo, imm_desc);
2532             err |= control(file, "operation", lsc_operation,
2533                            op, &space);
2534             format(file, ",");
2535             err |= control(file, "addr_size", lsc_addr_size,
2536                            lsc_msg_desc_addr_size(devinfo, imm_desc),
2537                            &space);
2538 
2539             if (op == LSC_OP_FENCE) {
2540                format(file, ",");
2541                err |= control(file, "scope", lsc_fence_scope,
2542                               lsc_fence_msg_desc_scope(devinfo, imm_desc),
2543                               &space);
2544                format(file, ",");
2545                err |= control(file, "flush_type", lsc_flush_type,
2546                               lsc_fence_msg_desc_flush_type(devinfo, imm_desc),
2547                               &space);
2548                format(file, ",");
2549                err |= control(file, "backup_mode_fence_routing",
2550                               lsc_backup_fence_routing,
2551                               lsc_fence_msg_desc_backup_routing(devinfo, imm_desc),
2552                               &space);
2553             } else {
2554                format(file, ",");
2555                err |= control(file, "data_size", lsc_data_size,
2556                               lsc_msg_desc_data_size(devinfo, imm_desc),
2557                               &space);
2558                format(file, ",");
2559                if (elk_lsc_opcode_has_cmask(op)) {
2560                   err |= control(file, "component_mask",
2561                                  lsc_cmask_str,
2562                                  lsc_msg_desc_cmask(devinfo, imm_desc),
2563                                  &space);
2564                } else {
2565                   err |= control(file, "vector_size",
2566                                  lsc_vect_size_str,
2567                                  lsc_msg_desc_vect_size(devinfo, imm_desc),
2568                                  &space);
2569                   if (lsc_msg_desc_transpose(devinfo, imm_desc))
2570                      format(file, ", transpose");
2571                }
2572                switch(op) {
2573                case LSC_OP_LOAD_CMASK:
2574                case LSC_OP_LOAD:
2575                   format(file, ",");
2576                   err |= control(file, "cache_load",
2577                                  devinfo->ver >= 20 ?
2578                                  xe2_lsc_cache_load :
2579                                  lsc_cache_load,
2580                                  lsc_msg_desc_cache_ctrl(devinfo, imm_desc),
2581                                  &space);
2582                   break;
2583                default:
2584                   format(file, ",");
2585                   err |= control(file, "cache_store",
2586                                  devinfo->ver >= 20 ?
2587                                  xe2_lsc_cache_store :
2588                                  lsc_cache_store,
2589                                  lsc_msg_desc_cache_ctrl(devinfo, imm_desc),
2590                                  &space);
2591                   break;
2592                }
2593             }
2594             format(file, " dst_len = %u,", lsc_msg_desc_dest_len(devinfo, imm_desc));
2595             format(file, " src0_len = %u,", lsc_msg_desc_src0_len(devinfo, imm_desc));
2596 
2597             if (!elk_inst_send_sel_reg32_ex_desc(devinfo, inst))
2598                format(file, " src1_len = %d",
2599                       elk_message_ex_desc_ex_mlen(devinfo, imm_ex_desc));
2600 
2601             err |= control(file, "address_type", lsc_addr_surface_type,
2602                            lsc_msg_desc_addr_type(devinfo, imm_desc), &space);
2603             format(file, " )");
2604             break;
2605          }
2606 
2607          case GFX7_SFID_DATAPORT_DATA_CACHE:
2608             if (devinfo->ver >= 7) {
2609                format(file, " (");
2610                space = 0;
2611 
2612                err |= control(file, "DP DC0 message type",
2613                               dp_dc0_msg_type_gfx7,
2614                               elk_dp_desc_msg_type(devinfo, imm_desc), &space);
2615 
2616                format(file, ", bti %u, ",
2617                       elk_dp_desc_binding_table_index(devinfo, imm_desc));
2618 
2619                switch (elk_inst_dp_msg_type(devinfo, inst)) {
2620                case GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP:
2621                   control(file, "atomic op", aop,
2622                           elk_dp_desc_msg_control(devinfo, imm_desc) & 0xf,
2623                           &space);
2624                   break;
2625                case GFX7_DATAPORT_DC_OWORD_BLOCK_READ:
2626                case GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE: {
2627                   unsigned msg_ctrl = elk_dp_desc_msg_control(devinfo, imm_desc);
2628                   assert(dp_oword_block_rw[msg_ctrl & 7]);
2629                   format(file, "owords = %s, aligned = %d",
2630                         dp_oword_block_rw[msg_ctrl & 7], (msg_ctrl >> 3) & 3);
2631                   break;
2632                }
2633                default:
2634                   format(file, "%u",
2635                          elk_dp_desc_msg_control(devinfo, imm_desc));
2636                }
2637                format(file, ")");
2638             } else {
2639                unsupported = true;
2640             }
2641             break;
2642 
2643          case HSW_SFID_DATAPORT_DATA_CACHE_1: {
2644             if (devinfo->ver >= 7) {
2645                format(file, " (");
2646                space = 0;
2647 
2648                unsigned msg_ctrl = elk_dp_desc_msg_control(devinfo, imm_desc);
2649 
2650                err |= control(file, "DP DC1 message type",
2651                               dp_dc1_msg_type_hsw,
2652                               elk_dp_desc_msg_type(devinfo, imm_desc), &space);
2653 
2654                format(file, ", Surface = %u, ",
2655                       elk_dp_desc_binding_table_index(devinfo, imm_desc));
2656 
2657                switch (elk_inst_dp_msg_type(devinfo, inst)) {
2658                case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP:
2659                case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP:
2660                case HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP:
2661                   format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16);
2662                   FALLTHROUGH;
2663                case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2:
2664                case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2:
2665                case HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2:
2666                case GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
2667                case GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP:
2668                   control(file, "atomic op", aop, msg_ctrl & 0xf, &space);
2669                   break;
2670                case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ:
2671                case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE:
2672                case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ:
2673                case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE:
2674                case GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE:
2675                case GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ: {
2676                   static const char *simd_modes[] = { "4x2", "16", "8" };
2677                   format(file, "SIMD%s, Mask = 0x%x",
2678                          simd_modes[msg_ctrl >> 4], msg_ctrl & 0xf);
2679                   break;
2680                }
2681                case GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
2682                case GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
2683                case GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP:
2684                   format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16);
2685                   control(file, "atomic float op", aop_float, msg_ctrl & 0xf,
2686                           &space);
2687                   break;
2688                case GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE:
2689                case GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ:
2690                   assert(dp_oword_block_rw[msg_ctrl & 7]);
2691                   format(file, "owords = %s, aligned = %d",
2692                         dp_oword_block_rw[msg_ctrl & 7], (msg_ctrl >> 3) & 3);
2693                   break;
2694                default:
2695                   format(file, "0x%x", msg_ctrl);
2696                }
2697                format(file, ")");
2698             } else {
2699                unsupported = true;
2700             }
2701             break;
2702          }
2703 
2704          case GFX7_SFID_PIXEL_INTERPOLATOR:
2705             if (devinfo->ver >= 7) {
2706                format(file, " (%s, %s, 0x%02"PRIx64")",
2707                       elk_inst_pi_nopersp(devinfo, inst) ? "linear" : "persp",
2708                       pixel_interpolator_msg_types[elk_inst_pi_message_type(devinfo, inst)],
2709                       elk_inst_pi_message_data(devinfo, inst));
2710             } else {
2711                unsupported = true;
2712             }
2713             break;
2714 
2715          default:
2716             unsupported = true;
2717             break;
2718          }
2719 
2720          if (unsupported)
2721             format(file, "unsupported shared function ID %d", sfid);
2722 
2723          if (space)
2724             string(file, " ");
2725       }
2726       if (devinfo->verx10 >= 125 &&
2727           elk_inst_send_sel_reg32_ex_desc(devinfo, inst) &&
2728           elk_inst_send_ex_bso(devinfo, inst)) {
2729          format(file, " src1_len = %u",
2730                 (unsigned) elk_inst_send_src1_len(devinfo, inst));
2731 
2732          format(file, " ex_bso");
2733       }
2734       if (elk_sfid_is_lsc(sfid) ||
2735           (sfid == ELK_SFID_URB && devinfo->ver >= 20)) {
2736             lsc_disassemble_ex_desc(devinfo, imm_desc, imm_ex_desc, file);
2737       } else {
2738          if (has_imm_desc)
2739             format(file, " mlen %u", elk_message_desc_mlen(devinfo, imm_desc));
2740          if (has_imm_ex_desc) {
2741             format(file, " ex_mlen %u",
2742                    elk_message_ex_desc_ex_mlen(devinfo, imm_ex_desc));
2743          }
2744          if (has_imm_desc)
2745             format(file, " rlen %u", elk_message_desc_rlen(devinfo, imm_desc));
2746       }
2747    }
2748    pad(file, 64);
2749    if (opcode != ELK_OPCODE_NOP && opcode != ELK_OPCODE_NENOP) {
2750       string(file, "{");
2751       space = 1;
2752       err |= control(file, "access mode", access_mode,
2753                      elk_inst_access_mode(devinfo, inst), &space);
2754       if (devinfo->ver >= 6) {
2755          err |= control(file, "write enable control", wectrl,
2756                         elk_inst_mask_control(devinfo, inst), &space);
2757       } else {
2758          err |= control(file, "mask control", mask_ctrl,
2759                         elk_inst_mask_control(devinfo, inst), &space);
2760       }
2761 
2762       if (devinfo->ver < 12) {
2763          err |= control(file, "dependency control", dep_ctrl,
2764                         ((elk_inst_no_dd_check(devinfo, inst) << 1) |
2765                          elk_inst_no_dd_clear(devinfo, inst)), &space);
2766       }
2767 
2768       if (devinfo->ver >= 6)
2769          err |= qtr_ctrl(file, devinfo, inst);
2770       else {
2771          if (elk_inst_qtr_control(devinfo, inst) == ELK_COMPRESSION_COMPRESSED &&
2772              desc && desc->ndst > 0 &&
2773              elk_inst_dst_reg_file(devinfo, inst) == ELK_MESSAGE_REGISTER_FILE &&
2774              elk_inst_dst_da_reg_nr(devinfo, inst) & ELK_MRF_COMPR4) {
2775             format(file, " compr4");
2776          } else {
2777             err |= control(file, "compression control", compr_ctrl,
2778                            elk_inst_qtr_control(devinfo, inst), &space);
2779          }
2780       }
2781 
2782       if (devinfo->ver >= 12)
2783          err |= swsb(file, isa, inst);
2784 
2785       err |= control(file, "compaction", cmpt_ctrl, is_compacted, &space);
2786       err |= control(file, "thread control", thread_ctrl,
2787                      (devinfo->ver >= 12 ? elk_inst_atomic_control(devinfo, inst) :
2788                                            elk_inst_thread_control(devinfo, inst)),
2789                      &space);
2790       if (has_branch_ctrl(devinfo, opcode)) {
2791          err |= control(file, "branch ctrl", branch_ctrl,
2792                         elk_inst_branch_control(devinfo, inst), &space);
2793       } else if (devinfo->ver >= 6 && devinfo->ver < 20) {
2794          err |= control(file, "acc write control", accwr,
2795                         elk_inst_acc_wr_control(devinfo, inst), &space);
2796       }
2797       if (is_send(opcode))
2798          err |= control(file, "end of thread", end_of_thread,
2799                         elk_inst_eot(devinfo, inst), &space);
2800       if (space)
2801          string(file, " ");
2802       string(file, "}");
2803    }
2804    string(file, ";");
2805    newline(file);
2806    return err;
2807 }
2808 
2809 int
elk_disassemble_find_end(const struct elk_isa_info * isa,const void * assembly,int start)2810 elk_disassemble_find_end(const struct elk_isa_info *isa,
2811                          const void *assembly, int start)
2812 {
2813    const struct intel_device_info *devinfo = isa->devinfo;
2814    int offset = start;
2815 
2816    /* This loop exits when send-with-EOT or when opcode is 0 */
2817    while (true) {
2818       const elk_inst *insn = assembly + offset;
2819 
2820       if (elk_inst_cmpt_control(devinfo, insn)) {
2821          offset += 8;
2822       } else {
2823          offset += 16;
2824       }
2825 
2826       /* Simplistic, but efficient way to terminate disasm */
2827       uint32_t opcode = elk_inst_opcode(isa, insn);
2828       if (opcode == 0 || (is_send(opcode) && elk_inst_eot(devinfo, insn))) {
2829          break;
2830       }
2831    }
2832 
2833    return offset;
2834 }
2835 
2836 void
elk_disassemble_with_errors(const struct elk_isa_info * isa,const void * assembly,int start,FILE * out)2837 elk_disassemble_with_errors(const struct elk_isa_info *isa,
2838                             const void *assembly, int start, FILE *out)
2839 {
2840    int end = elk_disassemble_find_end(isa, assembly, start);
2841 
2842    /* Make a dummy disasm structure that elk_validate_instructions
2843     * can work from.
2844     */
2845    struct elk_disasm_info *elk_disasm_info = elk_disasm_initialize(isa, NULL);
2846    elk_disasm_new_inst_group(elk_disasm_info, start);
2847    elk_disasm_new_inst_group(elk_disasm_info, end);
2848 
2849    elk_validate_instructions(isa, assembly, start, end, elk_disasm_info);
2850 
2851    void *mem_ctx = ralloc_context(NULL);
2852    const struct elk_label *root_label =
2853       elk_label_assembly(isa, assembly, start, end, mem_ctx);
2854 
2855    foreach_list_typed(struct inst_group, group, link,
2856                       &elk_disasm_info->group_list) {
2857       struct exec_node *next_node = exec_node_get_next(&group->link);
2858       if (exec_node_is_tail_sentinel(next_node))
2859          break;
2860 
2861       struct inst_group *next =
2862          exec_node_data(struct inst_group, next_node, link);
2863 
2864       int start_offset = group->offset;
2865       int end_offset = next->offset;
2866 
2867       elk_disassemble(isa, assembly, start_offset, end_offset,
2868                       root_label, out);
2869 
2870       if (group->error) {
2871          fputs(group->error, out);
2872       }
2873    }
2874 
2875    ralloc_free(mem_ctx);
2876    ralloc_free(elk_disasm_info);
2877 }
2878