1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32 #include <sys/stat.h>
33 #include <fcntl.h>
34
35 #include "elk_disasm.h"
36 #include "elk_eu_defines.h"
37 #include "elk_eu.h"
38 #include "elk_shader.h"
39 #include "../intel_gfx_ver_enum.h"
40 #include "dev/intel_debug.h"
41
42 #include "util/u_debug.h"
43 #include "util/ralloc.h"
44
45 /* Returns a conditional modifier that negates the condition. */
46 enum elk_conditional_mod
elk_negate_cmod(enum elk_conditional_mod cmod)47 elk_negate_cmod(enum elk_conditional_mod cmod)
48 {
49 switch (cmod) {
50 case ELK_CONDITIONAL_Z:
51 return ELK_CONDITIONAL_NZ;
52 case ELK_CONDITIONAL_NZ:
53 return ELK_CONDITIONAL_Z;
54 case ELK_CONDITIONAL_G:
55 return ELK_CONDITIONAL_LE;
56 case ELK_CONDITIONAL_GE:
57 return ELK_CONDITIONAL_L;
58 case ELK_CONDITIONAL_L:
59 return ELK_CONDITIONAL_GE;
60 case ELK_CONDITIONAL_LE:
61 return ELK_CONDITIONAL_G;
62 default:
63 unreachable("Can't negate this cmod");
64 }
65 }
66
67 /* Returns the corresponding conditional mod for swapping src0 and
68 * src1 in e.g. CMP.
69 */
70 enum elk_conditional_mod
elk_swap_cmod(enum elk_conditional_mod cmod)71 elk_swap_cmod(enum elk_conditional_mod cmod)
72 {
73 switch (cmod) {
74 case ELK_CONDITIONAL_Z:
75 case ELK_CONDITIONAL_NZ:
76 return cmod;
77 case ELK_CONDITIONAL_G:
78 return ELK_CONDITIONAL_L;
79 case ELK_CONDITIONAL_GE:
80 return ELK_CONDITIONAL_LE;
81 case ELK_CONDITIONAL_L:
82 return ELK_CONDITIONAL_G;
83 case ELK_CONDITIONAL_LE:
84 return ELK_CONDITIONAL_GE;
85 default:
86 return ELK_CONDITIONAL_NONE;
87 }
88 }
89
90 /**
91 * Get the least significant bit offset of the i+1-th component of immediate
92 * type \p type. For \p i equal to the two's complement of j, return the
93 * offset of the j-th component starting from the end of the vector. For
94 * scalar register types return zero.
95 */
96 static unsigned
imm_shift(enum elk_reg_type type,unsigned i)97 imm_shift(enum elk_reg_type type, unsigned i)
98 {
99 assert(type != ELK_REGISTER_TYPE_UV && type != ELK_REGISTER_TYPE_V &&
100 "Not implemented.");
101
102 if (type == ELK_REGISTER_TYPE_VF)
103 return 8 * (i & 3);
104 else
105 return 0;
106 }
107
108 /**
109 * Swizzle an arbitrary immediate \p x of the given type according to the
110 * permutation specified as \p swz.
111 */
112 uint32_t
elk_swizzle_immediate(enum elk_reg_type type,uint32_t x,unsigned swz)113 elk_swizzle_immediate(enum elk_reg_type type, uint32_t x, unsigned swz)
114 {
115 if (imm_shift(type, 1)) {
116 const unsigned n = 32 / imm_shift(type, 1);
117 uint32_t y = 0;
118
119 for (unsigned i = 0; i < n; i++) {
120 /* Shift the specified component all the way to the right and left to
121 * discard any undesired L/MSBs, then shift it right into component i.
122 */
123 y |= x >> imm_shift(type, (i & ~3) + ELK_GET_SWZ(swz, i & 3))
124 << imm_shift(type, ~0u)
125 >> imm_shift(type, ~0u - i);
126 }
127
128 return y;
129 } else {
130 return x;
131 }
132 }
133
134 unsigned
elk_get_default_exec_size(struct elk_codegen * p)135 elk_get_default_exec_size(struct elk_codegen *p)
136 {
137 return p->current->exec_size;
138 }
139
140 unsigned
elk_get_default_group(struct elk_codegen * p)141 elk_get_default_group(struct elk_codegen *p)
142 {
143 return p->current->group;
144 }
145
146 unsigned
elk_get_default_access_mode(struct elk_codegen * p)147 elk_get_default_access_mode(struct elk_codegen *p)
148 {
149 return p->current->access_mode;
150 }
151
152 struct tgl_swsb
elk_get_default_swsb(struct elk_codegen * p)153 elk_get_default_swsb(struct elk_codegen *p)
154 {
155 return p->current->swsb;
156 }
157
158 void
elk_set_default_exec_size(struct elk_codegen * p,unsigned value)159 elk_set_default_exec_size(struct elk_codegen *p, unsigned value)
160 {
161 p->current->exec_size = value;
162 }
163
elk_set_default_predicate_control(struct elk_codegen * p,enum elk_predicate pc)164 void elk_set_default_predicate_control(struct elk_codegen *p, enum elk_predicate pc)
165 {
166 p->current->predicate = pc;
167 }
168
elk_set_default_predicate_inverse(struct elk_codegen * p,bool predicate_inverse)169 void elk_set_default_predicate_inverse(struct elk_codegen *p, bool predicate_inverse)
170 {
171 p->current->pred_inv = predicate_inverse;
172 }
173
elk_set_default_flag_reg(struct elk_codegen * p,int reg,int subreg)174 void elk_set_default_flag_reg(struct elk_codegen *p, int reg, int subreg)
175 {
176 assert(subreg < 2);
177 p->current->flag_subreg = reg * 2 + subreg;
178 }
179
elk_set_default_access_mode(struct elk_codegen * p,unsigned access_mode)180 void elk_set_default_access_mode( struct elk_codegen *p, unsigned access_mode )
181 {
182 p->current->access_mode = access_mode;
183 }
184
185 void
elk_set_default_compression_control(struct elk_codegen * p,enum elk_compression compression_control)186 elk_set_default_compression_control(struct elk_codegen *p,
187 enum elk_compression compression_control)
188 {
189 switch (compression_control) {
190 case ELK_COMPRESSION_NONE:
191 /* This is the "use the first set of bits of dmask/vmask/arf
192 * according to execsize" option.
193 */
194 p->current->group = 0;
195 break;
196 case ELK_COMPRESSION_2NDHALF:
197 /* For SIMD8, this is "use the second set of 8 bits." */
198 p->current->group = 8;
199 break;
200 case ELK_COMPRESSION_COMPRESSED:
201 /* For SIMD16 instruction compression, use the first set of 16 bits
202 * since we don't do SIMD32 dispatch.
203 */
204 p->current->group = 0;
205 break;
206 default:
207 unreachable("not reached");
208 }
209
210 if (p->devinfo->ver <= 6) {
211 p->current->compressed =
212 (compression_control == ELK_COMPRESSION_COMPRESSED);
213 }
214 }
215
216 /**
217 * Enable or disable instruction compression on the given instruction leaving
218 * the currently selected channel enable group untouched.
219 */
220 void
elk_inst_set_compression(const struct intel_device_info * devinfo,elk_inst * inst,bool on)221 elk_inst_set_compression(const struct intel_device_info *devinfo,
222 elk_inst *inst, bool on)
223 {
224 if (devinfo->ver >= 6) {
225 /* No-op, the EU will figure out for us whether the instruction needs to
226 * be compressed.
227 */
228 } else {
229 /* The channel group and compression controls are non-orthogonal, there
230 * are two possible representations for uncompressed instructions and we
231 * may need to preserve the current one to avoid changing the selected
232 * channel group inadvertently.
233 */
234 if (on)
235 elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_COMPRESSED);
236 else if (elk_inst_qtr_control(devinfo, inst)
237 == ELK_COMPRESSION_COMPRESSED)
238 elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_NONE);
239 }
240 }
241
242 void
elk_set_default_compression(struct elk_codegen * p,bool on)243 elk_set_default_compression(struct elk_codegen *p, bool on)
244 {
245 p->current->compressed = on;
246 }
247
248 /**
249 * Apply the range of channel enable signals given by
250 * [group, group + exec_size) to the instruction passed as argument.
251 */
252 void
elk_inst_set_group(const struct intel_device_info * devinfo,elk_inst * inst,unsigned group)253 elk_inst_set_group(const struct intel_device_info *devinfo,
254 elk_inst *inst, unsigned group)
255 {
256 if (devinfo->ver >= 20) {
257 assert(group % 8 == 0 && group < 32);
258 elk_inst_set_qtr_control(devinfo, inst, group / 8);
259
260 } else if (devinfo->ver >= 7) {
261 assert(group % 4 == 0 && group < 32);
262 elk_inst_set_qtr_control(devinfo, inst, group / 8);
263 elk_inst_set_nib_control(devinfo, inst, (group / 4) % 2);
264
265 } else if (devinfo->ver == 6) {
266 assert(group % 8 == 0 && group < 32);
267 elk_inst_set_qtr_control(devinfo, inst, group / 8);
268
269 } else {
270 assert(group % 8 == 0 && group < 16);
271 /* The channel group and compression controls are non-orthogonal, there
272 * are two possible representations for group zero and we may need to
273 * preserve the current one to avoid changing the selected compression
274 * enable inadvertently.
275 */
276 if (group == 8)
277 elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_2NDHALF);
278 else if (elk_inst_qtr_control(devinfo, inst) == ELK_COMPRESSION_2NDHALF)
279 elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_NONE);
280 }
281 }
282
283 void
elk_set_default_group(struct elk_codegen * p,unsigned group)284 elk_set_default_group(struct elk_codegen *p, unsigned group)
285 {
286 p->current->group = group;
287 }
288
elk_set_default_mask_control(struct elk_codegen * p,unsigned value)289 void elk_set_default_mask_control( struct elk_codegen *p, unsigned value )
290 {
291 p->current->mask_control = value;
292 }
293
elk_set_default_saturate(struct elk_codegen * p,bool enable)294 void elk_set_default_saturate( struct elk_codegen *p, bool enable )
295 {
296 p->current->saturate = enable;
297 }
298
elk_set_default_acc_write_control(struct elk_codegen * p,unsigned value)299 void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value)
300 {
301 p->current->acc_wr_control = value;
302 }
303
elk_set_default_swsb(struct elk_codegen * p,struct tgl_swsb value)304 void elk_set_default_swsb(struct elk_codegen *p, struct tgl_swsb value)
305 {
306 p->current->swsb = value;
307 }
308
elk_push_insn_state(struct elk_codegen * p)309 void elk_push_insn_state( struct elk_codegen *p )
310 {
311 assert(p->current != &p->stack[ELK_EU_MAX_INSN_STACK-1]);
312 *(p->current + 1) = *p->current;
313 p->current++;
314 }
315
elk_pop_insn_state(struct elk_codegen * p)316 void elk_pop_insn_state( struct elk_codegen *p )
317 {
318 assert(p->current != p->stack);
319 p->current--;
320 }
321
322
323 /***********************************************************************
324 */
325 void
elk_init_codegen(const struct elk_isa_info * isa,struct elk_codegen * p,void * mem_ctx)326 elk_init_codegen(const struct elk_isa_info *isa,
327 struct elk_codegen *p, void *mem_ctx)
328 {
329 memset(p, 0, sizeof(*p));
330
331 p->isa = isa;
332 p->devinfo = isa->devinfo;
333 p->automatic_exec_sizes = true;
334 /*
335 * Set the initial instruction store array size to 1024, if found that
336 * isn't enough, then it will double the store size at elk_next_insn()
337 * until out of memory.
338 */
339 p->store_size = 1024;
340 p->store = rzalloc_array(mem_ctx, elk_inst, p->store_size);
341 p->nr_insn = 0;
342 p->current = p->stack;
343 memset(p->current, 0, sizeof(p->current[0]));
344
345 p->mem_ctx = mem_ctx;
346
347 /* Some defaults?
348 */
349 elk_set_default_exec_size(p, ELK_EXECUTE_8);
350 elk_set_default_mask_control(p, ELK_MASK_ENABLE); /* what does this do? */
351 elk_set_default_saturate(p, 0);
352 elk_set_default_compression_control(p, ELK_COMPRESSION_NONE);
353
354 /* Set up control flow stack */
355 p->if_stack_depth = 0;
356 p->if_stack_array_size = 16;
357 p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size);
358
359 p->loop_stack_depth = 0;
360 p->loop_stack_array_size = 16;
361 p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
362 p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
363 }
364
365
elk_get_program(struct elk_codegen * p,unsigned * sz)366 const unsigned *elk_get_program( struct elk_codegen *p,
367 unsigned *sz )
368 {
369 *sz = p->next_insn_offset;
370 return (const unsigned *)p->store;
371 }
372
373 const struct elk_shader_reloc *
elk_get_shader_relocs(struct elk_codegen * p,unsigned * num_relocs)374 elk_get_shader_relocs(struct elk_codegen *p, unsigned *num_relocs)
375 {
376 *num_relocs = p->num_relocs;
377 return p->relocs;
378 }
379
380 DEBUG_GET_ONCE_OPTION(shader_bin_dump_path, "INTEL_SHADER_BIN_DUMP_PATH", NULL);
381
elk_should_dump_shader_bin(void)382 bool elk_should_dump_shader_bin(void)
383 {
384 return debug_get_option_shader_bin_dump_path() != NULL;
385 }
386
elk_dump_shader_bin(void * assembly,int start_offset,int end_offset,const char * identifier)387 void elk_dump_shader_bin(void *assembly, int start_offset, int end_offset,
388 const char *identifier)
389 {
390 char *name = ralloc_asprintf(NULL, "%s/%s.bin",
391 debug_get_option_shader_bin_dump_path(),
392 identifier);
393
394 int fd = open(name, O_CREAT | O_WRONLY, 0777);
395 ralloc_free(name);
396
397 if (fd < 0)
398 return;
399
400 struct stat sb;
401 if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
402 close(fd);
403 return;
404 }
405
406 size_t to_write = end_offset - start_offset;
407 void *write_ptr = assembly + start_offset;
408
409 while (to_write) {
410 ssize_t ret = write(fd, write_ptr, to_write);
411
412 if (ret <= 0) {
413 close(fd);
414 return;
415 }
416
417 to_write -= ret;
418 write_ptr += ret;
419 }
420
421 close(fd);
422 }
423
elk_try_override_assembly(struct elk_codegen * p,int start_offset,const char * identifier)424 bool elk_try_override_assembly(struct elk_codegen *p, int start_offset,
425 const char *identifier)
426 {
427 const char *read_path = getenv("INTEL_SHADER_ASM_READ_PATH");
428 if (!read_path) {
429 return false;
430 }
431
432 char *name = ralloc_asprintf(NULL, "%s/%s.bin", read_path, identifier);
433
434 int fd = open(name, O_RDONLY);
435 ralloc_free(name);
436
437 if (fd == -1) {
438 return false;
439 }
440
441 struct stat sb;
442 if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
443 close(fd);
444 return false;
445 }
446
447 p->nr_insn -= (p->next_insn_offset - start_offset) / sizeof(elk_inst);
448 p->nr_insn += sb.st_size / sizeof(elk_inst);
449
450 p->next_insn_offset = start_offset + sb.st_size;
451 p->store_size = (start_offset + sb.st_size) / sizeof(elk_inst);
452 p->store = (elk_inst *)reralloc_size(p->mem_ctx, p->store, p->next_insn_offset);
453 assert(p->store);
454
455 ssize_t ret = read(fd, (char *)p->store + start_offset, sb.st_size);
456 close(fd);
457 if (ret != sb.st_size) {
458 return false;
459 }
460
461 ASSERTED bool valid =
462 elk_validate_instructions(p->isa, p->store,
463 start_offset, p->next_insn_offset,
464 NULL);
465 assert(valid);
466
467 return true;
468 }
469
470 const struct elk_label *
elk_find_label(const struct elk_label * root,int offset)471 elk_find_label(const struct elk_label *root, int offset)
472 {
473 const struct elk_label *curr = root;
474
475 if (curr != NULL)
476 {
477 do {
478 if (curr->offset == offset)
479 return curr;
480
481 curr = curr->next;
482 } while (curr != NULL);
483 }
484
485 return curr;
486 }
487
488 void
elk_create_label(struct elk_label ** labels,int offset,void * mem_ctx)489 elk_create_label(struct elk_label **labels, int offset, void *mem_ctx)
490 {
491 if (*labels != NULL) {
492 struct elk_label *curr = *labels;
493 struct elk_label *prev;
494
495 do {
496 prev = curr;
497
498 if (curr->offset == offset)
499 return;
500
501 curr = curr->next;
502 } while (curr != NULL);
503
504 curr = ralloc(mem_ctx, struct elk_label);
505 curr->offset = offset;
506 curr->number = prev->number + 1;
507 curr->next = NULL;
508 prev->next = curr;
509 } else {
510 struct elk_label *root = ralloc(mem_ctx, struct elk_label);
511 root->number = 0;
512 root->offset = offset;
513 root->next = NULL;
514 *labels = root;
515 }
516 }
517
518 const struct elk_label *
elk_label_assembly(const struct elk_isa_info * isa,const void * assembly,int start,int end,void * mem_ctx)519 elk_label_assembly(const struct elk_isa_info *isa,
520 const void *assembly, int start, int end, void *mem_ctx)
521 {
522 const struct intel_device_info *const devinfo = isa->devinfo;
523
524 struct elk_label *root_label = NULL;
525
526 int to_bytes_scale = sizeof(elk_inst) / elk_jump_scale(devinfo);
527
528 for (int offset = start; offset < end;) {
529 const elk_inst *inst = (const elk_inst *) ((const char *) assembly + offset);
530 elk_inst uncompacted;
531
532 bool is_compact = elk_inst_cmpt_control(devinfo, inst);
533
534 if (is_compact) {
535 elk_compact_inst *compacted = (elk_compact_inst *)inst;
536 elk_uncompact_instruction(isa, &uncompacted, compacted);
537 inst = &uncompacted;
538 }
539
540 if (elk_has_uip(devinfo, elk_inst_opcode(isa, inst))) {
541 /* Instructions that have UIP also have JIP. */
542 elk_create_label(&root_label,
543 offset + elk_inst_uip(devinfo, inst) * to_bytes_scale, mem_ctx);
544 elk_create_label(&root_label,
545 offset + elk_inst_jip(devinfo, inst) * to_bytes_scale, mem_ctx);
546 } else if (elk_has_jip(devinfo, elk_inst_opcode(isa, inst))) {
547 int jip;
548 if (devinfo->ver >= 7) {
549 jip = elk_inst_jip(devinfo, inst);
550 } else {
551 jip = elk_inst_gfx6_jump_count(devinfo, inst);
552 }
553
554 elk_create_label(&root_label, offset + jip * to_bytes_scale, mem_ctx);
555 }
556
557 if (is_compact) {
558 offset += sizeof(elk_compact_inst);
559 } else {
560 offset += sizeof(elk_inst);
561 }
562 }
563
564 return root_label;
565 }
566
567 void
elk_disassemble_with_labels(const struct elk_isa_info * isa,const void * assembly,int start,int end,FILE * out)568 elk_disassemble_with_labels(const struct elk_isa_info *isa,
569 const void *assembly, int start, int end, FILE *out)
570 {
571 void *mem_ctx = ralloc_context(NULL);
572 const struct elk_label *root_label =
573 elk_label_assembly(isa, assembly, start, end, mem_ctx);
574
575 elk_disassemble(isa, assembly, start, end, root_label, out);
576
577 ralloc_free(mem_ctx);
578 }
579
580 void
elk_disassemble(const struct elk_isa_info * isa,const void * assembly,int start,int end,const struct elk_label * root_label,FILE * out)581 elk_disassemble(const struct elk_isa_info *isa,
582 const void *assembly, int start, int end,
583 const struct elk_label *root_label, FILE *out)
584 {
585 const struct intel_device_info *devinfo = isa->devinfo;
586
587 bool dump_hex = INTEL_DEBUG(DEBUG_HEX);
588
589 for (int offset = start; offset < end;) {
590 const elk_inst *insn = (const elk_inst *)((char *)assembly + offset);
591 elk_inst uncompacted;
592
593 if (root_label != NULL) {
594 const struct elk_label *label = elk_find_label(root_label, offset);
595 if (label != NULL) {
596 fprintf(out, "\nLABEL%d:\n", label->number);
597 }
598 }
599
600 bool compacted = elk_inst_cmpt_control(devinfo, insn);
601 if (0)
602 fprintf(out, "0x%08x: ", offset);
603
604 if (compacted) {
605 elk_compact_inst *compacted = (elk_compact_inst *)insn;
606 if (dump_hex) {
607 unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
608 const unsigned int blank_spaces = 24;
609 for (int i = 0 ; i < 8; i = i + 4) {
610 fprintf(out, "%02x %02x %02x %02x ",
611 insn_ptr[i],
612 insn_ptr[i + 1],
613 insn_ptr[i + 2],
614 insn_ptr[i + 3]);
615 }
616 /* Make compacted instructions hex value output vertically aligned
617 * with uncompacted instructions hex value
618 */
619 fprintf(out, "%*c", blank_spaces, ' ');
620 }
621
622 elk_uncompact_instruction(isa, &uncompacted, compacted);
623 insn = &uncompacted;
624 } else {
625 if (dump_hex) {
626 unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
627 for (int i = 0 ; i < 16; i = i + 4) {
628 fprintf(out, "%02x %02x %02x %02x ",
629 insn_ptr[i],
630 insn_ptr[i + 1],
631 insn_ptr[i + 2],
632 insn_ptr[i + 3]);
633 }
634 }
635 }
636
637 elk_disassemble_inst(out, isa, insn, compacted, offset, root_label);
638
639 if (compacted) {
640 offset += sizeof(elk_compact_inst);
641 } else {
642 offset += sizeof(elk_inst);
643 }
644 }
645 }
646
647 static const struct elk_opcode_desc opcode_descs[] = {
648 /* IR, HW, name, nsrc, ndst, gfx_vers */
649 { ELK_OPCODE_ILLEGAL, 0, "illegal", 0, 0, GFX_ALL },
650 { ELK_OPCODE_SYNC, 1, "sync", 1, 0, GFX_GE(GFX12) },
651 { ELK_OPCODE_MOV, 1, "mov", 1, 1, GFX_LT(GFX12) },
652 { ELK_OPCODE_MOV, 97, "mov", 1, 1, GFX_GE(GFX12) },
653 { ELK_OPCODE_SEL, 2, "sel", 2, 1, GFX_LT(GFX12) },
654 { ELK_OPCODE_SEL, 98, "sel", 2, 1, GFX_GE(GFX12) },
655 { ELK_OPCODE_MOVI, 3, "movi", 2, 1, GFX_GE(GFX45) & GFX_LT(GFX12) },
656 { ELK_OPCODE_MOVI, 99, "movi", 2, 1, GFX_GE(GFX12) },
657 { ELK_OPCODE_NOT, 4, "not", 1, 1, GFX_LT(GFX12) },
658 { ELK_OPCODE_NOT, 100, "not", 1, 1, GFX_GE(GFX12) },
659 { ELK_OPCODE_AND, 5, "and", 2, 1, GFX_LT(GFX12) },
660 { ELK_OPCODE_AND, 101, "and", 2, 1, GFX_GE(GFX12) },
661 { ELK_OPCODE_OR, 6, "or", 2, 1, GFX_LT(GFX12) },
662 { ELK_OPCODE_OR, 102, "or", 2, 1, GFX_GE(GFX12) },
663 { ELK_OPCODE_XOR, 7, "xor", 2, 1, GFX_LT(GFX12) },
664 { ELK_OPCODE_XOR, 103, "xor", 2, 1, GFX_GE(GFX12) },
665 { ELK_OPCODE_SHR, 8, "shr", 2, 1, GFX_LT(GFX12) },
666 { ELK_OPCODE_SHR, 104, "shr", 2, 1, GFX_GE(GFX12) },
667 { ELK_OPCODE_SHL, 9, "shl", 2, 1, GFX_LT(GFX12) },
668 { ELK_OPCODE_SHL, 105, "shl", 2, 1, GFX_GE(GFX12) },
669 { ELK_OPCODE_DIM, 10, "dim", 1, 1, GFX75 },
670 { ELK_OPCODE_SMOV, 10, "smov", 0, 0, GFX_GE(GFX8) & GFX_LT(GFX12) },
671 { ELK_OPCODE_SMOV, 106, "smov", 0, 0, GFX_GE(GFX12) },
672 { ELK_OPCODE_ASR, 12, "asr", 2, 1, GFX_LT(GFX12) },
673 { ELK_OPCODE_ASR, 108, "asr", 2, 1, GFX_GE(GFX12) },
674 { ELK_OPCODE_ROR, 14, "ror", 2, 1, GFX11 },
675 { ELK_OPCODE_ROR, 110, "ror", 2, 1, GFX_GE(GFX12) },
676 { ELK_OPCODE_ROL, 15, "rol", 2, 1, GFX11 },
677 { ELK_OPCODE_ROL, 111, "rol", 2, 1, GFX_GE(GFX12) },
678 { ELK_OPCODE_CMP, 16, "cmp", 2, 1, GFX_LT(GFX12) },
679 { ELK_OPCODE_CMP, 112, "cmp", 2, 1, GFX_GE(GFX12) },
680 { ELK_OPCODE_CMPN, 17, "cmpn", 2, 1, GFX_LT(GFX12) },
681 { ELK_OPCODE_CMPN, 113, "cmpn", 2, 1, GFX_GE(GFX12) },
682 { ELK_OPCODE_CSEL, 18, "csel", 3, 1, GFX_GE(GFX8) & GFX_LT(GFX12) },
683 { ELK_OPCODE_CSEL, 114, "csel", 3, 1, GFX_GE(GFX12) },
684 { ELK_OPCODE_F32TO16, 19, "f32to16", 1, 1, GFX7 | GFX75 },
685 { ELK_OPCODE_F16TO32, 20, "f16to32", 1, 1, GFX7 | GFX75 },
686 { ELK_OPCODE_BFREV, 23, "bfrev", 1, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
687 { ELK_OPCODE_BFREV, 119, "bfrev", 1, 1, GFX_GE(GFX12) },
688 { ELK_OPCODE_BFE, 24, "bfe", 3, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
689 { ELK_OPCODE_BFE, 120, "bfe", 3, 1, GFX_GE(GFX12) },
690 { ELK_OPCODE_BFI1, 25, "bfi1", 2, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
691 { ELK_OPCODE_BFI1, 121, "bfi1", 2, 1, GFX_GE(GFX12) },
692 { ELK_OPCODE_BFI2, 26, "bfi2", 3, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
693 { ELK_OPCODE_BFI2, 122, "bfi2", 3, 1, GFX_GE(GFX12) },
694 { ELK_OPCODE_JMPI, 32, "jmpi", 0, 0, GFX_ALL },
695 { ELK_OPCODE_BRD, 33, "brd", 0, 0, GFX_GE(GFX7) },
696 { ELK_OPCODE_IF, 34, "if", 0, 0, GFX_ALL },
697 { ELK_OPCODE_IFF, 35, "iff", 0, 0, GFX_LE(GFX5) },
698 { ELK_OPCODE_BRC, 35, "brc", 0, 0, GFX_GE(GFX7) },
699 { ELK_OPCODE_ELSE, 36, "else", 0, 0, GFX_ALL },
700 { ELK_OPCODE_ENDIF, 37, "endif", 0, 0, GFX_ALL },
701 { ELK_OPCODE_DO, 38, "do", 0, 0, GFX_LE(GFX5) },
702 { ELK_OPCODE_CASE, 38, "case", 0, 0, GFX6 },
703 { ELK_OPCODE_WHILE, 39, "while", 0, 0, GFX_ALL },
704 { ELK_OPCODE_BREAK, 40, "break", 0, 0, GFX_ALL },
705 { ELK_OPCODE_CONTINUE, 41, "cont", 0, 0, GFX_ALL },
706 { ELK_OPCODE_HALT, 42, "halt", 0, 0, GFX_ALL },
707 { ELK_OPCODE_CALLA, 43, "calla", 0, 0, GFX_GE(GFX75) },
708 { ELK_OPCODE_MSAVE, 44, "msave", 0, 0, GFX_LE(GFX5) },
709 { ELK_OPCODE_CALL, 44, "call", 0, 0, GFX_GE(GFX6) },
710 { ELK_OPCODE_MREST, 45, "mrest", 0, 0, GFX_LE(GFX5) },
711 { ELK_OPCODE_RET, 45, "ret", 0, 0, GFX_GE(GFX6) },
712 { ELK_OPCODE_PUSH, 46, "push", 0, 0, GFX_LE(GFX5) },
713 { ELK_OPCODE_FORK, 46, "fork", 0, 0, GFX6 },
714 { ELK_OPCODE_GOTO, 46, "goto", 0, 0, GFX_GE(GFX8) },
715 { ELK_OPCODE_POP, 47, "pop", 2, 0, GFX_LE(GFX5) },
716 { ELK_OPCODE_WAIT, 48, "wait", 0, 1, GFX_LT(GFX12) },
717 { ELK_OPCODE_SEND, 49, "send", 1, 1, GFX_LT(GFX12) },
718 { ELK_OPCODE_SENDC, 50, "sendc", 1, 1, GFX_LT(GFX12) },
719 { ELK_OPCODE_SEND, 49, "send", 2, 1, GFX_GE(GFX12) },
720 { ELK_OPCODE_SENDC, 50, "sendc", 2, 1, GFX_GE(GFX12) },
721 { ELK_OPCODE_SENDS, 51, "sends", 2, 1, GFX_GE(GFX9) & GFX_LT(GFX12) },
722 { ELK_OPCODE_SENDSC, 52, "sendsc", 2, 1, GFX_GE(GFX9) & GFX_LT(GFX12) },
723 { ELK_OPCODE_MATH, 56, "math", 2, 1, GFX_GE(GFX6) },
724 { ELK_OPCODE_ADD, 64, "add", 2, 1, GFX_ALL },
725 { ELK_OPCODE_MUL, 65, "mul", 2, 1, GFX_ALL },
726 { ELK_OPCODE_AVG, 66, "avg", 2, 1, GFX_ALL },
727 { ELK_OPCODE_FRC, 67, "frc", 1, 1, GFX_ALL },
728 { ELK_OPCODE_RNDU, 68, "rndu", 1, 1, GFX_ALL },
729 { ELK_OPCODE_RNDD, 69, "rndd", 1, 1, GFX_ALL },
730 { ELK_OPCODE_RNDE, 70, "rnde", 1, 1, GFX_ALL },
731 { ELK_OPCODE_RNDZ, 71, "rndz", 1, 1, GFX_ALL },
732 { ELK_OPCODE_MAC, 72, "mac", 2, 1, GFX_ALL },
733 { ELK_OPCODE_MACH, 73, "mach", 2, 1, GFX_ALL },
734 { ELK_OPCODE_LZD, 74, "lzd", 1, 1, GFX_ALL },
735 { ELK_OPCODE_FBH, 75, "fbh", 1, 1, GFX_GE(GFX7) },
736 { ELK_OPCODE_FBL, 76, "fbl", 1, 1, GFX_GE(GFX7) },
737 { ELK_OPCODE_CBIT, 77, "cbit", 1, 1, GFX_GE(GFX7) },
738 { ELK_OPCODE_ADDC, 78, "addc", 2, 1, GFX_GE(GFX7) },
739 { ELK_OPCODE_SUBB, 79, "subb", 2, 1, GFX_GE(GFX7) },
740 { ELK_OPCODE_SAD2, 80, "sad2", 2, 1, GFX_ALL },
741 { ELK_OPCODE_SADA2, 81, "sada2", 2, 1, GFX_ALL },
742 { ELK_OPCODE_ADD3, 82, "add3", 3, 1, GFX_GE(GFX125) },
743 { ELK_OPCODE_DP4, 84, "dp4", 2, 1, GFX_LT(GFX11) },
744 { ELK_OPCODE_DPH, 85, "dph", 2, 1, GFX_LT(GFX11) },
745 { ELK_OPCODE_DP3, 86, "dp3", 2, 1, GFX_LT(GFX11) },
746 { ELK_OPCODE_DP2, 87, "dp2", 2, 1, GFX_LT(GFX11) },
747 { ELK_OPCODE_DP4A, 88, "dp4a", 3, 1, GFX_GE(GFX12) },
748 { ELK_OPCODE_LINE, 89, "line", 2, 1, GFX_LE(GFX10) },
749 { ELK_OPCODE_DPAS, 89, "dpas", 3, 1, GFX_GE(GFX125) },
750 { ELK_OPCODE_PLN, 90, "pln", 2, 1, GFX_GE(GFX45) & GFX_LE(GFX10) },
751 { ELK_OPCODE_MAD, 91, "mad", 3, 1, GFX_GE(GFX6) },
752 { ELK_OPCODE_LRP, 92, "lrp", 3, 1, GFX_GE(GFX6) & GFX_LE(GFX10) },
753 { ELK_OPCODE_MADM, 93, "madm", 3, 1, GFX_GE(GFX8) },
754 { ELK_OPCODE_NENOP, 125, "nenop", 0, 0, GFX45 },
755 { ELK_OPCODE_NOP, 126, "nop", 0, 0, GFX_LT(GFX12) },
756 { ELK_OPCODE_NOP, 96, "nop", 0, 0, GFX_GE(GFX12) }
757 };
758
759 void
elk_init_isa_info(struct elk_isa_info * isa,const struct intel_device_info * devinfo)760 elk_init_isa_info(struct elk_isa_info *isa,
761 const struct intel_device_info *devinfo)
762 {
763 isa->devinfo = devinfo;
764
765 enum gfx_ver ver = gfx_ver_from_devinfo(devinfo);
766
767 memset(isa->ir_to_descs, 0, sizeof(isa->ir_to_descs));
768 memset(isa->hw_to_descs, 0, sizeof(isa->hw_to_descs));
769
770 for (unsigned i = 0; i < ARRAY_SIZE(opcode_descs); i++) {
771 if (opcode_descs[i].gfx_vers & ver) {
772 const unsigned e = opcode_descs[i].ir;
773 const unsigned h = opcode_descs[i].hw;
774 assert(e < ARRAY_SIZE(isa->ir_to_descs) && !isa->ir_to_descs[e]);
775 assert(h < ARRAY_SIZE(isa->hw_to_descs) && !isa->hw_to_descs[h]);
776 isa->ir_to_descs[e] = &opcode_descs[i];
777 isa->hw_to_descs[h] = &opcode_descs[i];
778 }
779 }
780 }
781
782 /**
783 * Return the matching opcode_desc for the specified IR opcode and hardware
784 * generation, or NULL if the opcode is not supported by the device.
785 */
786 const struct elk_opcode_desc *
elk_opcode_desc(const struct elk_isa_info * isa,enum elk_opcode op)787 elk_opcode_desc(const struct elk_isa_info *isa, enum elk_opcode op)
788 {
789 return op < ARRAY_SIZE(isa->ir_to_descs) ? isa->ir_to_descs[op] : NULL;
790 }
791
792 /**
793 * Return the matching opcode_desc for the specified HW opcode and hardware
794 * generation, or NULL if the opcode is not supported by the device.
795 */
796 const struct elk_opcode_desc *
elk_opcode_desc_from_hw(const struct elk_isa_info * isa,unsigned hw)797 elk_opcode_desc_from_hw(const struct elk_isa_info *isa, unsigned hw)
798 {
799 return hw < ARRAY_SIZE(isa->hw_to_descs) ? isa->hw_to_descs[hw] : NULL;
800 }
801
802 unsigned
elk_num_sources_from_inst(const struct elk_isa_info * isa,const elk_inst * inst)803 elk_num_sources_from_inst(const struct elk_isa_info *isa,
804 const elk_inst *inst)
805 {
806 const struct intel_device_info *devinfo = isa->devinfo;
807 const struct elk_opcode_desc *desc =
808 elk_opcode_desc(isa, elk_inst_opcode(isa, inst));
809 unsigned math_function;
810
811 if (elk_inst_opcode(isa, inst) == ELK_OPCODE_MATH) {
812 math_function = elk_inst_math_function(devinfo, inst);
813 } else if (devinfo->ver < 6 &&
814 elk_inst_opcode(isa, inst) == ELK_OPCODE_SEND) {
815 if (elk_inst_sfid(devinfo, inst) == ELK_SFID_MATH) {
816 /* src1 must be a descriptor (including the information to determine
817 * that the SEND is doing an extended math operation), but src0 can
818 * actually be null since it serves as the source of the implicit GRF
819 * to MRF move.
820 *
821 * If we stop using that functionality, we'll have to revisit this.
822 */
823 return 2;
824 } else {
825 /* Send instructions are allowed to have null sources since they use
826 * the base_mrf field to specify which message register source.
827 */
828 return 0;
829 }
830 } else {
831 assert(desc->nsrc < 4);
832 return desc->nsrc;
833 }
834
835 switch (math_function) {
836 case ELK_MATH_FUNCTION_INV:
837 case ELK_MATH_FUNCTION_LOG:
838 case ELK_MATH_FUNCTION_EXP:
839 case ELK_MATH_FUNCTION_SQRT:
840 case ELK_MATH_FUNCTION_RSQ:
841 case ELK_MATH_FUNCTION_SIN:
842 case ELK_MATH_FUNCTION_COS:
843 case ELK_MATH_FUNCTION_SINCOS:
844 case GFX8_MATH_FUNCTION_INVM:
845 case GFX8_MATH_FUNCTION_RSQRTM:
846 return 1;
847 case ELK_MATH_FUNCTION_FDIV:
848 case ELK_MATH_FUNCTION_POW:
849 case ELK_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
850 case ELK_MATH_FUNCTION_INT_DIV_QUOTIENT:
851 case ELK_MATH_FUNCTION_INT_DIV_REMAINDER:
852 return 2;
853 default:
854 unreachable("not reached");
855 }
856 }
857