1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32 #include <sys/stat.h>
33 #include <fcntl.h>
34
35 #include "brw_disasm.h"
36 #include "brw_eu_defines.h"
37 #include "brw_eu.h"
38 #include "brw_shader.h"
39 #include "intel_gfx_ver_enum.h"
40 #include "dev/intel_debug.h"
41
42 #include "util/u_debug.h"
43 #include "util/ralloc.h"
44
45 /* Returns a conditional modifier that negates the condition. */
46 enum brw_conditional_mod
brw_negate_cmod(enum brw_conditional_mod cmod)47 brw_negate_cmod(enum brw_conditional_mod cmod)
48 {
49 switch (cmod) {
50 case BRW_CONDITIONAL_Z:
51 return BRW_CONDITIONAL_NZ;
52 case BRW_CONDITIONAL_NZ:
53 return BRW_CONDITIONAL_Z;
54 case BRW_CONDITIONAL_G:
55 return BRW_CONDITIONAL_LE;
56 case BRW_CONDITIONAL_GE:
57 return BRW_CONDITIONAL_L;
58 case BRW_CONDITIONAL_L:
59 return BRW_CONDITIONAL_GE;
60 case BRW_CONDITIONAL_LE:
61 return BRW_CONDITIONAL_G;
62 default:
63 unreachable("Can't negate this cmod");
64 }
65 }
66
67 /* Returns the corresponding conditional mod for swapping src0 and
68 * src1 in e.g. CMP.
69 */
70 enum brw_conditional_mod
brw_swap_cmod(enum brw_conditional_mod cmod)71 brw_swap_cmod(enum brw_conditional_mod cmod)
72 {
73 switch (cmod) {
74 case BRW_CONDITIONAL_Z:
75 case BRW_CONDITIONAL_NZ:
76 return cmod;
77 case BRW_CONDITIONAL_G:
78 return BRW_CONDITIONAL_L;
79 case BRW_CONDITIONAL_GE:
80 return BRW_CONDITIONAL_LE;
81 case BRW_CONDITIONAL_L:
82 return BRW_CONDITIONAL_G;
83 case BRW_CONDITIONAL_LE:
84 return BRW_CONDITIONAL_GE;
85 default:
86 return BRW_CONDITIONAL_NONE;
87 }
88 }
89
90 /**
91 * Get the least significant bit offset of the i+1-th component of immediate
92 * type \p type. For \p i equal to the two's complement of j, return the
93 * offset of the j-th component starting from the end of the vector. For
94 * scalar register types return zero.
95 */
96 static unsigned
imm_shift(enum brw_reg_type type,unsigned i)97 imm_shift(enum brw_reg_type type, unsigned i)
98 {
99 assert(type != BRW_REGISTER_TYPE_UV && type != BRW_REGISTER_TYPE_V &&
100 "Not implemented.");
101
102 if (type == BRW_REGISTER_TYPE_VF)
103 return 8 * (i & 3);
104 else
105 return 0;
106 }
107
108 /**
109 * Swizzle an arbitrary immediate \p x of the given type according to the
110 * permutation specified as \p swz.
111 */
112 uint32_t
brw_swizzle_immediate(enum brw_reg_type type,uint32_t x,unsigned swz)113 brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz)
114 {
115 if (imm_shift(type, 1)) {
116 const unsigned n = 32 / imm_shift(type, 1);
117 uint32_t y = 0;
118
119 for (unsigned i = 0; i < n; i++) {
120 /* Shift the specified component all the way to the right and left to
121 * discard any undesired L/MSBs, then shift it right into component i.
122 */
123 y |= x >> imm_shift(type, (i & ~3) + BRW_GET_SWZ(swz, i & 3))
124 << imm_shift(type, ~0u)
125 >> imm_shift(type, ~0u - i);
126 }
127
128 return y;
129 } else {
130 return x;
131 }
132 }
133
134 unsigned
brw_get_default_exec_size(struct brw_codegen * p)135 brw_get_default_exec_size(struct brw_codegen *p)
136 {
137 return p->current->exec_size;
138 }
139
140 unsigned
brw_get_default_group(struct brw_codegen * p)141 brw_get_default_group(struct brw_codegen *p)
142 {
143 return p->current->group;
144 }
145
146 unsigned
brw_get_default_access_mode(struct brw_codegen * p)147 brw_get_default_access_mode(struct brw_codegen *p)
148 {
149 return p->current->access_mode;
150 }
151
152 struct tgl_swsb
brw_get_default_swsb(struct brw_codegen * p)153 brw_get_default_swsb(struct brw_codegen *p)
154 {
155 return p->current->swsb;
156 }
157
158 void
brw_set_default_exec_size(struct brw_codegen * p,unsigned value)159 brw_set_default_exec_size(struct brw_codegen *p, unsigned value)
160 {
161 p->current->exec_size = value;
162 }
163
brw_set_default_predicate_control(struct brw_codegen * p,enum brw_predicate pc)164 void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc)
165 {
166 p->current->predicate = pc;
167 }
168
brw_set_default_predicate_inverse(struct brw_codegen * p,bool predicate_inverse)169 void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse)
170 {
171 p->current->pred_inv = predicate_inverse;
172 }
173
brw_set_default_flag_reg(struct brw_codegen * p,int reg,int subreg)174 void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg)
175 {
176 assert(subreg < 2);
177 p->current->flag_subreg = reg * 2 + subreg;
178 }
179
brw_set_default_access_mode(struct brw_codegen * p,unsigned access_mode)180 void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode )
181 {
182 p->current->access_mode = access_mode;
183 }
184
185 void
brw_set_default_compression_control(struct brw_codegen * p,enum brw_compression compression_control)186 brw_set_default_compression_control(struct brw_codegen *p,
187 enum brw_compression compression_control)
188 {
189 switch (compression_control) {
190 case BRW_COMPRESSION_NONE:
191 /* This is the "use the first set of bits of dmask/vmask/arf
192 * according to execsize" option.
193 */
194 p->current->group = 0;
195 break;
196 case BRW_COMPRESSION_2NDHALF:
197 /* For SIMD8, this is "use the second set of 8 bits." */
198 p->current->group = 8;
199 break;
200 case BRW_COMPRESSION_COMPRESSED:
201 /* For SIMD16 instruction compression, use the first set of 16 bits
202 * since we don't do SIMD32 dispatch.
203 */
204 p->current->group = 0;
205 break;
206 default:
207 unreachable("not reached");
208 }
209 }
210
211 /**
212 * Apply the range of channel enable signals given by
213 * [group, group + exec_size) to the instruction passed as argument.
214 */
215 void
brw_inst_set_group(const struct intel_device_info * devinfo,brw_inst * inst,unsigned group)216 brw_inst_set_group(const struct intel_device_info *devinfo,
217 brw_inst *inst, unsigned group)
218 {
219 if (devinfo->ver >= 20) {
220 assert(group % 8 == 0 && group < 32);
221 brw_inst_set_qtr_control(devinfo, inst, group / 8);
222
223 } else {
224 assert(group % 4 == 0 && group < 32);
225 brw_inst_set_qtr_control(devinfo, inst, group / 8);
226 brw_inst_set_nib_control(devinfo, inst, (group / 4) % 2);
227
228 }
229 }
230
231 void
brw_set_default_group(struct brw_codegen * p,unsigned group)232 brw_set_default_group(struct brw_codegen *p, unsigned group)
233 {
234 p->current->group = group;
235 }
236
brw_set_default_mask_control(struct brw_codegen * p,unsigned value)237 void brw_set_default_mask_control( struct brw_codegen *p, unsigned value )
238 {
239 p->current->mask_control = value;
240 }
241
brw_set_default_saturate(struct brw_codegen * p,bool enable)242 void brw_set_default_saturate( struct brw_codegen *p, bool enable )
243 {
244 p->current->saturate = enable;
245 }
246
brw_set_default_acc_write_control(struct brw_codegen * p,unsigned value)247 void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value)
248 {
249 p->current->acc_wr_control = value;
250 }
251
brw_set_default_swsb(struct brw_codegen * p,struct tgl_swsb value)252 void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value)
253 {
254 p->current->swsb = value;
255 }
256
brw_push_insn_state(struct brw_codegen * p)257 void brw_push_insn_state( struct brw_codegen *p )
258 {
259 assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
260 *(p->current + 1) = *p->current;
261 p->current++;
262 }
263
brw_pop_insn_state(struct brw_codegen * p)264 void brw_pop_insn_state( struct brw_codegen *p )
265 {
266 assert(p->current != p->stack);
267 p->current--;
268 }
269
270
271 /***********************************************************************
272 */
273 void
brw_init_codegen(const struct brw_isa_info * isa,struct brw_codegen * p,void * mem_ctx)274 brw_init_codegen(const struct brw_isa_info *isa,
275 struct brw_codegen *p, void *mem_ctx)
276 {
277 memset(p, 0, sizeof(*p));
278
279 p->isa = isa;
280 p->devinfo = isa->devinfo;
281 /*
282 * Set the initial instruction store array size to 1024, if found that
283 * isn't enough, then it will double the store size at brw_next_insn()
284 * until out of memory.
285 */
286 p->store_size = 1024;
287 p->store = rzalloc_array(mem_ctx, brw_inst, p->store_size);
288 p->nr_insn = 0;
289 p->current = p->stack;
290 memset(p->current, 0, sizeof(p->current[0]));
291
292 p->mem_ctx = mem_ctx;
293
294 /* Some defaults?
295 */
296 brw_set_default_exec_size(p, BRW_EXECUTE_8);
297 brw_set_default_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
298 brw_set_default_saturate(p, 0);
299 brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
300
301 /* Set up control flow stack */
302 p->if_stack_depth = 0;
303 p->if_stack_array_size = 16;
304 p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size);
305
306 p->loop_stack_depth = 0;
307 p->loop_stack_array_size = 16;
308 p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
309 p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
310 }
311
312
brw_get_program(struct brw_codegen * p,unsigned * sz)313 const unsigned *brw_get_program( struct brw_codegen *p,
314 unsigned *sz )
315 {
316 *sz = p->next_insn_offset;
317 return (const unsigned *)p->store;
318 }
319
320 const struct brw_shader_reloc *
brw_get_shader_relocs(struct brw_codegen * p,unsigned * num_relocs)321 brw_get_shader_relocs(struct brw_codegen *p, unsigned *num_relocs)
322 {
323 *num_relocs = p->num_relocs;
324 return p->relocs;
325 }
326
327 DEBUG_GET_ONCE_OPTION(shader_bin_dump_path, "INTEL_SHADER_BIN_DUMP_PATH", NULL);
328
brw_should_dump_shader_bin(void)329 bool brw_should_dump_shader_bin(void)
330 {
331 return debug_get_option_shader_bin_dump_path() != NULL;
332 }
333
brw_dump_shader_bin(void * assembly,int start_offset,int end_offset,const char * identifier)334 void brw_dump_shader_bin(void *assembly, int start_offset, int end_offset,
335 const char *identifier)
336 {
337 char *name = ralloc_asprintf(NULL, "%s/%s.bin",
338 debug_get_option_shader_bin_dump_path(),
339 identifier);
340
341 int fd = open(name, O_CREAT | O_WRONLY, 0777);
342 ralloc_free(name);
343
344 if (fd < 0)
345 return;
346
347 struct stat sb;
348 if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
349 close(fd);
350 return;
351 }
352
353 size_t to_write = end_offset - start_offset;
354 void *write_ptr = assembly + start_offset;
355
356 while (to_write) {
357 ssize_t ret = write(fd, write_ptr, to_write);
358
359 if (ret <= 0) {
360 close(fd);
361 return;
362 }
363
364 to_write -= ret;
365 write_ptr += ret;
366 }
367
368 close(fd);
369 }
370
brw_try_override_assembly(struct brw_codegen * p,int start_offset,const char * identifier)371 bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
372 const char *identifier)
373 {
374 const char *read_path = getenv("INTEL_SHADER_ASM_READ_PATH");
375 if (!read_path) {
376 return false;
377 }
378
379 char *name = ralloc_asprintf(NULL, "%s/%s.bin", read_path, identifier);
380
381 int fd = open(name, O_RDONLY);
382 ralloc_free(name);
383
384 if (fd == -1) {
385 return false;
386 }
387
388 struct stat sb;
389 if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
390 close(fd);
391 return false;
392 }
393
394 p->nr_insn -= (p->next_insn_offset - start_offset) / sizeof(brw_inst);
395 p->nr_insn += sb.st_size / sizeof(brw_inst);
396
397 p->next_insn_offset = start_offset + sb.st_size;
398 p->store_size = (start_offset + sb.st_size) / sizeof(brw_inst);
399 p->store = (brw_inst *)reralloc_size(p->mem_ctx, p->store, p->next_insn_offset);
400 assert(p->store);
401
402 ssize_t ret = read(fd, (char *)p->store + start_offset, sb.st_size);
403 close(fd);
404 if (ret != sb.st_size) {
405 return false;
406 }
407
408 ASSERTED bool valid =
409 brw_validate_instructions(p->isa, p->store,
410 start_offset, p->next_insn_offset,
411 NULL);
412 assert(valid);
413
414 return true;
415 }
416
417 const struct brw_label *
brw_find_label(const struct brw_label * root,int offset)418 brw_find_label(const struct brw_label *root, int offset)
419 {
420 const struct brw_label *curr = root;
421
422 if (curr != NULL)
423 {
424 do {
425 if (curr->offset == offset)
426 return curr;
427
428 curr = curr->next;
429 } while (curr != NULL);
430 }
431
432 return curr;
433 }
434
435 void
brw_create_label(struct brw_label ** labels,int offset,void * mem_ctx)436 brw_create_label(struct brw_label **labels, int offset, void *mem_ctx)
437 {
438 if (*labels != NULL) {
439 struct brw_label *curr = *labels;
440 struct brw_label *prev;
441
442 do {
443 prev = curr;
444
445 if (curr->offset == offset)
446 return;
447
448 curr = curr->next;
449 } while (curr != NULL);
450
451 curr = ralloc(mem_ctx, struct brw_label);
452 curr->offset = offset;
453 curr->number = prev->number + 1;
454 curr->next = NULL;
455 prev->next = curr;
456 } else {
457 struct brw_label *root = ralloc(mem_ctx, struct brw_label);
458 root->number = 0;
459 root->offset = offset;
460 root->next = NULL;
461 *labels = root;
462 }
463 }
464
465 const struct brw_label *
brw_label_assembly(const struct brw_isa_info * isa,const void * assembly,int start,int end,void * mem_ctx)466 brw_label_assembly(const struct brw_isa_info *isa,
467 const void *assembly, int start, int end, void *mem_ctx)
468 {
469 const struct intel_device_info *const devinfo = isa->devinfo;
470
471 struct brw_label *root_label = NULL;
472
473 int to_bytes_scale = sizeof(brw_inst) / brw_jump_scale(devinfo);
474
475 for (int offset = start; offset < end;) {
476 const brw_inst *inst = (const brw_inst *) ((const char *) assembly + offset);
477 brw_inst uncompacted;
478
479 bool is_compact = brw_inst_cmpt_control(devinfo, inst);
480
481 if (is_compact) {
482 brw_compact_inst *compacted = (brw_compact_inst *)inst;
483 brw_uncompact_instruction(isa, &uncompacted, compacted);
484 inst = &uncompacted;
485 }
486
487 if (brw_has_uip(devinfo, brw_inst_opcode(isa, inst))) {
488 /* Instructions that have UIP also have JIP. */
489 brw_create_label(&root_label,
490 offset + brw_inst_uip(devinfo, inst) * to_bytes_scale, mem_ctx);
491 brw_create_label(&root_label,
492 offset + brw_inst_jip(devinfo, inst) * to_bytes_scale, mem_ctx);
493 } else if (brw_has_jip(devinfo, brw_inst_opcode(isa, inst))) {
494 int jip = brw_inst_jip(devinfo, inst);
495
496 brw_create_label(&root_label, offset + jip * to_bytes_scale, mem_ctx);
497 }
498
499 if (is_compact) {
500 offset += sizeof(brw_compact_inst);
501 } else {
502 offset += sizeof(brw_inst);
503 }
504 }
505
506 return root_label;
507 }
508
509 void
brw_disassemble_with_labels(const struct brw_isa_info * isa,const void * assembly,int start,int end,FILE * out)510 brw_disassemble_with_labels(const struct brw_isa_info *isa,
511 const void *assembly, int start, int end, FILE *out)
512 {
513 void *mem_ctx = ralloc_context(NULL);
514 const struct brw_label *root_label =
515 brw_label_assembly(isa, assembly, start, end, mem_ctx);
516
517 brw_disassemble(isa, assembly, start, end, root_label, out);
518
519 ralloc_free(mem_ctx);
520 }
521
522 void
brw_disassemble(const struct brw_isa_info * isa,const void * assembly,int start,int end,const struct brw_label * root_label,FILE * out)523 brw_disassemble(const struct brw_isa_info *isa,
524 const void *assembly, int start, int end,
525 const struct brw_label *root_label, FILE *out)
526 {
527 const struct intel_device_info *devinfo = isa->devinfo;
528
529 bool dump_hex = INTEL_DEBUG(DEBUG_HEX);
530
531 for (int offset = start; offset < end;) {
532 const brw_inst *insn = (const brw_inst *)((char *)assembly + offset);
533 brw_inst uncompacted;
534
535 if (root_label != NULL) {
536 const struct brw_label *label = brw_find_label(root_label, offset);
537 if (label != NULL) {
538 fprintf(out, "\nLABEL%d:\n", label->number);
539 }
540 }
541
542 bool compacted = brw_inst_cmpt_control(devinfo, insn);
543 if (0)
544 fprintf(out, "0x%08x: ", offset);
545
546 if (compacted) {
547 brw_compact_inst *compacted = (brw_compact_inst *)insn;
548 if (dump_hex) {
549 unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
550 const unsigned int blank_spaces = 24;
551 for (int i = 0 ; i < 8; i = i + 4) {
552 fprintf(out, "%02x %02x %02x %02x ",
553 insn_ptr[i],
554 insn_ptr[i + 1],
555 insn_ptr[i + 2],
556 insn_ptr[i + 3]);
557 }
558 /* Make compacted instructions hex value output vertically aligned
559 * with uncompacted instructions hex value
560 */
561 fprintf(out, "%*c", blank_spaces, ' ');
562 }
563
564 brw_uncompact_instruction(isa, &uncompacted, compacted);
565 insn = &uncompacted;
566 } else {
567 if (dump_hex) {
568 unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
569 for (int i = 0 ; i < 16; i = i + 4) {
570 fprintf(out, "%02x %02x %02x %02x ",
571 insn_ptr[i],
572 insn_ptr[i + 1],
573 insn_ptr[i + 2],
574 insn_ptr[i + 3]);
575 }
576 }
577 }
578
579 brw_disassemble_inst(out, isa, insn, compacted, offset, root_label);
580
581 if (compacted) {
582 offset += sizeof(brw_compact_inst);
583 } else {
584 offset += sizeof(brw_inst);
585 }
586 }
587 }
588
589 static const struct opcode_desc opcode_descs[] = {
590 /* IR, HW, name, nsrc, ndst, gfx_vers assuming Gfx9+ */
591 { BRW_OPCODE_ILLEGAL, 0, "illegal", 0, 0, GFX_ALL },
592 { BRW_OPCODE_SYNC, 1, "sync", 1, 0, GFX_GE(GFX12) },
593 { BRW_OPCODE_MOV, 1, "mov", 1, 1, GFX_LT(GFX12) },
594 { BRW_OPCODE_MOV, 97, "mov", 1, 1, GFX_GE(GFX12) },
595 { BRW_OPCODE_SEL, 2, "sel", 2, 1, GFX_LT(GFX12) },
596 { BRW_OPCODE_SEL, 98, "sel", 2, 1, GFX_GE(GFX12) },
597 { BRW_OPCODE_MOVI, 3, "movi", 2, 1, GFX_LT(GFX12) },
598 { BRW_OPCODE_MOVI, 99, "movi", 2, 1, GFX_GE(GFX12) },
599 { BRW_OPCODE_NOT, 4, "not", 1, 1, GFX_LT(GFX12) },
600 { BRW_OPCODE_NOT, 100, "not", 1, 1, GFX_GE(GFX12) },
601 { BRW_OPCODE_AND, 5, "and", 2, 1, GFX_LT(GFX12) },
602 { BRW_OPCODE_AND, 101, "and", 2, 1, GFX_GE(GFX12) },
603 { BRW_OPCODE_OR, 6, "or", 2, 1, GFX_LT(GFX12) },
604 { BRW_OPCODE_OR, 102, "or", 2, 1, GFX_GE(GFX12) },
605 { BRW_OPCODE_XOR, 7, "xor", 2, 1, GFX_LT(GFX12) },
606 { BRW_OPCODE_XOR, 103, "xor", 2, 1, GFX_GE(GFX12) },
607 { BRW_OPCODE_SHR, 8, "shr", 2, 1, GFX_LT(GFX12) },
608 { BRW_OPCODE_SHR, 104, "shr", 2, 1, GFX_GE(GFX12) },
609 { BRW_OPCODE_SHL, 9, "shl", 2, 1, GFX_LT(GFX12) },
610 { BRW_OPCODE_SHL, 105, "shl", 2, 1, GFX_GE(GFX12) },
611 { BRW_OPCODE_SMOV, 10, "smov", 0, 0, GFX_LT(GFX12) },
612 { BRW_OPCODE_SMOV, 106, "smov", 0, 0, GFX_GE(GFX12) },
613 { BRW_OPCODE_ASR, 12, "asr", 2, 1, GFX_LT(GFX12) },
614 { BRW_OPCODE_ASR, 108, "asr", 2, 1, GFX_GE(GFX12) },
615 { BRW_OPCODE_ROR, 14, "ror", 2, 1, GFX11 },
616 { BRW_OPCODE_ROR, 110, "ror", 2, 1, GFX_GE(GFX12) },
617 { BRW_OPCODE_ROL, 15, "rol", 2, 1, GFX11 },
618 { BRW_OPCODE_ROL, 111, "rol", 2, 1, GFX_GE(GFX12) },
619 { BRW_OPCODE_CMP, 16, "cmp", 2, 1, GFX_LT(GFX12) },
620 { BRW_OPCODE_CMP, 112, "cmp", 2, 1, GFX_GE(GFX12) },
621 { BRW_OPCODE_CMPN, 17, "cmpn", 2, 1, GFX_LT(GFX12) },
622 { BRW_OPCODE_CMPN, 113, "cmpn", 2, 1, GFX_GE(GFX12) },
623 { BRW_OPCODE_CSEL, 18, "csel", 3, 1, GFX_LT(GFX12) },
624 { BRW_OPCODE_CSEL, 114, "csel", 3, 1, GFX_GE(GFX12) },
625 { BRW_OPCODE_BFREV, 23, "bfrev", 1, 1, GFX_LT(GFX12) },
626 { BRW_OPCODE_BFREV, 119, "bfrev", 1, 1, GFX_GE(GFX12) },
627 { BRW_OPCODE_BFE, 24, "bfe", 3, 1, GFX_LT(GFX12) },
628 { BRW_OPCODE_BFE, 120, "bfe", 3, 1, GFX_GE(GFX12) },
629 { BRW_OPCODE_BFI1, 25, "bfi1", 2, 1, GFX_LT(GFX12) },
630 { BRW_OPCODE_BFI1, 121, "bfi1", 2, 1, GFX_GE(GFX12) },
631 { BRW_OPCODE_BFI2, 26, "bfi2", 3, 1, GFX_LT(GFX12) },
632 { BRW_OPCODE_BFI2, 122, "bfi2", 3, 1, GFX_GE(GFX12) },
633 { BRW_OPCODE_JMPI, 32, "jmpi", 0, 0, GFX_ALL },
634 { BRW_OPCODE_BRD, 33, "brd", 0, 0, GFX_ALL },
635 { BRW_OPCODE_IF, 34, "if", 0, 0, GFX_ALL },
636 { BRW_OPCODE_BRC, 35, "brc", 0, 0, GFX_ALL },
637 { BRW_OPCODE_ELSE, 36, "else", 0, 0, GFX_ALL },
638 { BRW_OPCODE_ENDIF, 37, "endif", 0, 0, GFX_ALL },
639 { BRW_OPCODE_DO, 38, "do", 0, 0, 0 }, /* Pseudo opcode. */
640 { BRW_OPCODE_WHILE, 39, "while", 0, 0, GFX_ALL },
641 { BRW_OPCODE_BREAK, 40, "break", 0, 0, GFX_ALL },
642 { BRW_OPCODE_CONTINUE, 41, "cont", 0, 0, GFX_ALL },
643 { BRW_OPCODE_HALT, 42, "halt", 0, 0, GFX_ALL },
644 { BRW_OPCODE_CALLA, 43, "calla", 0, 0, GFX_ALL },
645 { BRW_OPCODE_CALL, 44, "call", 0, 0, GFX_ALL },
646 { BRW_OPCODE_RET, 45, "ret", 0, 0, GFX_ALL },
647 { BRW_OPCODE_GOTO, 46, "goto", 0, 0, GFX_ALL },
648 { BRW_OPCODE_WAIT, 48, "wait", 0, 1, GFX_LT(GFX12) },
649 { BRW_OPCODE_SEND, 49, "send", 1, 1, GFX_LT(GFX12) },
650 { BRW_OPCODE_SENDC, 50, "sendc", 1, 1, GFX_LT(GFX12) },
651 { BRW_OPCODE_SEND, 49, "send", 2, 1, GFX_GE(GFX12) },
652 { BRW_OPCODE_SENDC, 50, "sendc", 2, 1, GFX_GE(GFX12) },
653 { BRW_OPCODE_SENDS, 51, "sends", 2, 1, GFX_LT(GFX12) },
654 { BRW_OPCODE_SENDSC, 52, "sendsc", 2, 1, GFX_LT(GFX12) },
655 { BRW_OPCODE_MATH, 56, "math", 2, 1, GFX_ALL },
656 { BRW_OPCODE_ADD, 64, "add", 2, 1, GFX_ALL },
657 { BRW_OPCODE_MUL, 65, "mul", 2, 1, GFX_ALL },
658 { BRW_OPCODE_AVG, 66, "avg", 2, 1, GFX_ALL },
659 { BRW_OPCODE_FRC, 67, "frc", 1, 1, GFX_ALL },
660 { BRW_OPCODE_RNDU, 68, "rndu", 1, 1, GFX_ALL },
661 { BRW_OPCODE_RNDD, 69, "rndd", 1, 1, GFX_ALL },
662 { BRW_OPCODE_RNDE, 70, "rnde", 1, 1, GFX_ALL },
663 { BRW_OPCODE_RNDZ, 71, "rndz", 1, 1, GFX_ALL },
664 { BRW_OPCODE_MAC, 72, "mac", 2, 1, GFX_ALL },
665 { BRW_OPCODE_MACH, 73, "mach", 2, 1, GFX_ALL },
666 { BRW_OPCODE_LZD, 74, "lzd", 1, 1, GFX_ALL },
667 { BRW_OPCODE_FBH, 75, "fbh", 1, 1, GFX_ALL },
668 { BRW_OPCODE_FBL, 76, "fbl", 1, 1, GFX_ALL },
669 { BRW_OPCODE_CBIT, 77, "cbit", 1, 1, GFX_ALL },
670 { BRW_OPCODE_ADDC, 78, "addc", 2, 1, GFX_ALL },
671 { BRW_OPCODE_SUBB, 79, "subb", 2, 1, GFX_ALL },
672 { BRW_OPCODE_SAD2, 80, "sad2", 2, 1, GFX_ALL },
673 { BRW_OPCODE_SADA2, 81, "sada2", 2, 1, GFX_ALL },
674 { BRW_OPCODE_ADD3, 82, "add3", 3, 1, GFX_GE(GFX125) },
675 { BRW_OPCODE_DP4, 84, "dp4", 2, 1, GFX_LT(GFX11) },
676 { BRW_OPCODE_DPH, 85, "dph", 2, 1, GFX_LT(GFX11) },
677 { BRW_OPCODE_DP3, 86, "dp3", 2, 1, GFX_LT(GFX11) },
678 { BRW_OPCODE_DP2, 87, "dp2", 2, 1, GFX_LT(GFX11) },
679 { BRW_OPCODE_DP4A, 88, "dp4a", 3, 1, GFX_GE(GFX12) },
680 { BRW_OPCODE_LINE, 89, "line", 2, 1, GFX9 },
681 { BRW_OPCODE_DPAS, 89, "dpas", 3, 1, GFX_GE(GFX125) },
682 { BRW_OPCODE_PLN, 90, "pln", 2, 1, GFX9 },
683 { BRW_OPCODE_MAD, 91, "mad", 3, 1, GFX_ALL },
684 { BRW_OPCODE_LRP, 92, "lrp", 3, 1, GFX9 },
685 { BRW_OPCODE_MADM, 93, "madm", 3, 1, GFX_ALL },
686 { BRW_OPCODE_NOP, 126, "nop", 0, 0, GFX_LT(GFX12) },
687 { BRW_OPCODE_NOP, 96, "nop", 0, 0, GFX_GE(GFX12) }
688 };
689
690 void
brw_init_isa_info(struct brw_isa_info * isa,const struct intel_device_info * devinfo)691 brw_init_isa_info(struct brw_isa_info *isa,
692 const struct intel_device_info *devinfo)
693 {
694 assert(devinfo->ver >= 9);
695
696 isa->devinfo = devinfo;
697
698 enum gfx_ver ver = gfx_ver_from_devinfo(devinfo);
699
700 memset(isa->ir_to_descs, 0, sizeof(isa->ir_to_descs));
701 memset(isa->hw_to_descs, 0, sizeof(isa->hw_to_descs));
702
703 for (unsigned i = 0; i < ARRAY_SIZE(opcode_descs); i++) {
704 if (opcode_descs[i].gfx_vers & ver) {
705 const unsigned e = opcode_descs[i].ir;
706 const unsigned h = opcode_descs[i].hw;
707 assert(e < ARRAY_SIZE(isa->ir_to_descs) && !isa->ir_to_descs[e]);
708 assert(h < ARRAY_SIZE(isa->hw_to_descs) && !isa->hw_to_descs[h]);
709 isa->ir_to_descs[e] = &opcode_descs[i];
710 isa->hw_to_descs[h] = &opcode_descs[i];
711 }
712 }
713 }
714
715 /**
716 * Return the matching opcode_desc for the specified IR opcode and hardware
717 * generation, or NULL if the opcode is not supported by the device.
718 */
719 const struct opcode_desc *
brw_opcode_desc(const struct brw_isa_info * isa,enum opcode op)720 brw_opcode_desc(const struct brw_isa_info *isa, enum opcode op)
721 {
722 return op < ARRAY_SIZE(isa->ir_to_descs) ? isa->ir_to_descs[op] : NULL;
723 }
724
725 /**
726 * Return the matching opcode_desc for the specified HW opcode and hardware
727 * generation, or NULL if the opcode is not supported by the device.
728 */
729 const struct opcode_desc *
brw_opcode_desc_from_hw(const struct brw_isa_info * isa,unsigned hw)730 brw_opcode_desc_from_hw(const struct brw_isa_info *isa, unsigned hw)
731 {
732 return hw < ARRAY_SIZE(isa->hw_to_descs) ? isa->hw_to_descs[hw] : NULL;
733 }
734
735 unsigned
brw_num_sources_from_inst(const struct brw_isa_info * isa,const brw_inst * inst)736 brw_num_sources_from_inst(const struct brw_isa_info *isa,
737 const brw_inst *inst)
738 {
739 const struct intel_device_info *devinfo = isa->devinfo;
740 const struct opcode_desc *desc =
741 brw_opcode_desc(isa, brw_inst_opcode(isa, inst));
742 unsigned math_function;
743
744 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) {
745 math_function = brw_inst_math_function(devinfo, inst);
746 } else {
747 assert(desc->nsrc < 4);
748 return desc->nsrc;
749 }
750
751 switch (math_function) {
752 case BRW_MATH_FUNCTION_INV:
753 case BRW_MATH_FUNCTION_LOG:
754 case BRW_MATH_FUNCTION_EXP:
755 case BRW_MATH_FUNCTION_SQRT:
756 case BRW_MATH_FUNCTION_RSQ:
757 case BRW_MATH_FUNCTION_SIN:
758 case BRW_MATH_FUNCTION_COS:
759 case BRW_MATH_FUNCTION_SINCOS:
760 case GFX8_MATH_FUNCTION_INVM:
761 case GFX8_MATH_FUNCTION_RSQRTM:
762 return 1;
763 case BRW_MATH_FUNCTION_FDIV:
764 case BRW_MATH_FUNCTION_POW:
765 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
766 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
767 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
768 return 2;
769 default:
770 unreachable("not reached");
771 }
772 }
773