• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 #include <sys/stat.h>
33 #include <fcntl.h>
34 
35 #include "elk_disasm.h"
36 #include "elk_eu_defines.h"
37 #include "elk_eu.h"
38 #include "elk_shader.h"
39 #include "../intel_gfx_ver_enum.h"
40 #include "dev/intel_debug.h"
41 
42 #include "util/u_debug.h"
43 #include "util/ralloc.h"
44 
45 /* Returns a conditional modifier that negates the condition. */
46 enum elk_conditional_mod
elk_negate_cmod(enum elk_conditional_mod cmod)47 elk_negate_cmod(enum elk_conditional_mod cmod)
48 {
49    switch (cmod) {
50    case ELK_CONDITIONAL_Z:
51       return ELK_CONDITIONAL_NZ;
52    case ELK_CONDITIONAL_NZ:
53       return ELK_CONDITIONAL_Z;
54    case ELK_CONDITIONAL_G:
55       return ELK_CONDITIONAL_LE;
56    case ELK_CONDITIONAL_GE:
57       return ELK_CONDITIONAL_L;
58    case ELK_CONDITIONAL_L:
59       return ELK_CONDITIONAL_GE;
60    case ELK_CONDITIONAL_LE:
61       return ELK_CONDITIONAL_G;
62    default:
63       unreachable("Can't negate this cmod");
64    }
65 }
66 
67 /* Returns the corresponding conditional mod for swapping src0 and
68  * src1 in e.g. CMP.
69  */
70 enum elk_conditional_mod
elk_swap_cmod(enum elk_conditional_mod cmod)71 elk_swap_cmod(enum elk_conditional_mod cmod)
72 {
73    switch (cmod) {
74    case ELK_CONDITIONAL_Z:
75    case ELK_CONDITIONAL_NZ:
76       return cmod;
77    case ELK_CONDITIONAL_G:
78       return ELK_CONDITIONAL_L;
79    case ELK_CONDITIONAL_GE:
80       return ELK_CONDITIONAL_LE;
81    case ELK_CONDITIONAL_L:
82       return ELK_CONDITIONAL_G;
83    case ELK_CONDITIONAL_LE:
84       return ELK_CONDITIONAL_GE;
85    default:
86       return ELK_CONDITIONAL_NONE;
87    }
88 }
89 
90 /**
91  * Get the least significant bit offset of the i+1-th component of immediate
92  * type \p type.  For \p i equal to the two's complement of j, return the
93  * offset of the j-th component starting from the end of the vector.  For
94  * scalar register types return zero.
95  */
96 static unsigned
imm_shift(enum elk_reg_type type,unsigned i)97 imm_shift(enum elk_reg_type type, unsigned i)
98 {
99    assert(type != ELK_REGISTER_TYPE_UV && type != ELK_REGISTER_TYPE_V &&
100           "Not implemented.");
101 
102    if (type == ELK_REGISTER_TYPE_VF)
103       return 8 * (i & 3);
104    else
105       return 0;
106 }
107 
108 /**
109  * Swizzle an arbitrary immediate \p x of the given type according to the
110  * permutation specified as \p swz.
111  */
112 uint32_t
elk_swizzle_immediate(enum elk_reg_type type,uint32_t x,unsigned swz)113 elk_swizzle_immediate(enum elk_reg_type type, uint32_t x, unsigned swz)
114 {
115    if (imm_shift(type, 1)) {
116       const unsigned n = 32 / imm_shift(type, 1);
117       uint32_t y = 0;
118 
119       for (unsigned i = 0; i < n; i++) {
120          /* Shift the specified component all the way to the right and left to
121           * discard any undesired L/MSBs, then shift it right into component i.
122           */
123          y |= x >> imm_shift(type, (i & ~3) + ELK_GET_SWZ(swz, i & 3))
124                 << imm_shift(type, ~0u)
125                 >> imm_shift(type, ~0u - i);
126       }
127 
128       return y;
129    } else {
130       return x;
131    }
132 }
133 
134 unsigned
elk_get_default_exec_size(struct elk_codegen * p)135 elk_get_default_exec_size(struct elk_codegen *p)
136 {
137    return p->current->exec_size;
138 }
139 
140 unsigned
elk_get_default_group(struct elk_codegen * p)141 elk_get_default_group(struct elk_codegen *p)
142 {
143    return p->current->group;
144 }
145 
146 unsigned
elk_get_default_access_mode(struct elk_codegen * p)147 elk_get_default_access_mode(struct elk_codegen *p)
148 {
149    return p->current->access_mode;
150 }
151 
152 struct tgl_swsb
elk_get_default_swsb(struct elk_codegen * p)153 elk_get_default_swsb(struct elk_codegen *p)
154 {
155    return p->current->swsb;
156 }
157 
158 void
elk_set_default_exec_size(struct elk_codegen * p,unsigned value)159 elk_set_default_exec_size(struct elk_codegen *p, unsigned value)
160 {
161    p->current->exec_size = value;
162 }
163 
elk_set_default_predicate_control(struct elk_codegen * p,enum elk_predicate pc)164 void elk_set_default_predicate_control(struct elk_codegen *p, enum elk_predicate pc)
165 {
166    p->current->predicate = pc;
167 }
168 
elk_set_default_predicate_inverse(struct elk_codegen * p,bool predicate_inverse)169 void elk_set_default_predicate_inverse(struct elk_codegen *p, bool predicate_inverse)
170 {
171    p->current->pred_inv = predicate_inverse;
172 }
173 
elk_set_default_flag_reg(struct elk_codegen * p,int reg,int subreg)174 void elk_set_default_flag_reg(struct elk_codegen *p, int reg, int subreg)
175 {
176    assert(subreg < 2);
177    p->current->flag_subreg = reg * 2 + subreg;
178 }
179 
elk_set_default_access_mode(struct elk_codegen * p,unsigned access_mode)180 void elk_set_default_access_mode( struct elk_codegen *p, unsigned access_mode )
181 {
182    p->current->access_mode = access_mode;
183 }
184 
185 void
elk_set_default_compression_control(struct elk_codegen * p,enum elk_compression compression_control)186 elk_set_default_compression_control(struct elk_codegen *p,
187 			    enum elk_compression compression_control)
188 {
189    switch (compression_control) {
190    case ELK_COMPRESSION_NONE:
191       /* This is the "use the first set of bits of dmask/vmask/arf
192        * according to execsize" option.
193        */
194       p->current->group = 0;
195       break;
196    case ELK_COMPRESSION_2NDHALF:
197       /* For SIMD8, this is "use the second set of 8 bits." */
198       p->current->group = 8;
199       break;
200    case ELK_COMPRESSION_COMPRESSED:
201       /* For SIMD16 instruction compression, use the first set of 16 bits
202        * since we don't do SIMD32 dispatch.
203        */
204       p->current->group = 0;
205       break;
206    default:
207       unreachable("not reached");
208    }
209 
210    if (p->devinfo->ver <= 6) {
211       p->current->compressed =
212          (compression_control == ELK_COMPRESSION_COMPRESSED);
213    }
214 }
215 
216 /**
217  * Enable or disable instruction compression on the given instruction leaving
218  * the currently selected channel enable group untouched.
219  */
220 void
elk_inst_set_compression(const struct intel_device_info * devinfo,elk_inst * inst,bool on)221 elk_inst_set_compression(const struct intel_device_info *devinfo,
222                          elk_inst *inst, bool on)
223 {
224    if (devinfo->ver >= 6) {
225       /* No-op, the EU will figure out for us whether the instruction needs to
226        * be compressed.
227        */
228    } else {
229       /* The channel group and compression controls are non-orthogonal, there
230        * are two possible representations for uncompressed instructions and we
231        * may need to preserve the current one to avoid changing the selected
232        * channel group inadvertently.
233        */
234       if (on)
235          elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_COMPRESSED);
236       else if (elk_inst_qtr_control(devinfo, inst)
237                == ELK_COMPRESSION_COMPRESSED)
238          elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_NONE);
239    }
240 }
241 
242 void
elk_set_default_compression(struct elk_codegen * p,bool on)243 elk_set_default_compression(struct elk_codegen *p, bool on)
244 {
245    p->current->compressed = on;
246 }
247 
248 /**
249  * Apply the range of channel enable signals given by
250  * [group, group + exec_size) to the instruction passed as argument.
251  */
252 void
elk_inst_set_group(const struct intel_device_info * devinfo,elk_inst * inst,unsigned group)253 elk_inst_set_group(const struct intel_device_info *devinfo,
254                    elk_inst *inst, unsigned group)
255 {
256    if (devinfo->ver >= 20) {
257       assert(group % 8 == 0 && group < 32);
258       elk_inst_set_qtr_control(devinfo, inst, group / 8);
259 
260    } else if (devinfo->ver >= 7) {
261       assert(group % 4 == 0 && group < 32);
262       elk_inst_set_qtr_control(devinfo, inst, group / 8);
263       elk_inst_set_nib_control(devinfo, inst, (group / 4) % 2);
264 
265    } else if (devinfo->ver == 6) {
266       assert(group % 8 == 0 && group < 32);
267       elk_inst_set_qtr_control(devinfo, inst, group / 8);
268 
269    } else {
270       assert(group % 8 == 0 && group < 16);
271       /* The channel group and compression controls are non-orthogonal, there
272        * are two possible representations for group zero and we may need to
273        * preserve the current one to avoid changing the selected compression
274        * enable inadvertently.
275        */
276       if (group == 8)
277          elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_2NDHALF);
278       else if (elk_inst_qtr_control(devinfo, inst) == ELK_COMPRESSION_2NDHALF)
279          elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_NONE);
280    }
281 }
282 
283 void
elk_set_default_group(struct elk_codegen * p,unsigned group)284 elk_set_default_group(struct elk_codegen *p, unsigned group)
285 {
286    p->current->group = group;
287 }
288 
elk_set_default_mask_control(struct elk_codegen * p,unsigned value)289 void elk_set_default_mask_control( struct elk_codegen *p, unsigned value )
290 {
291    p->current->mask_control = value;
292 }
293 
elk_set_default_saturate(struct elk_codegen * p,bool enable)294 void elk_set_default_saturate( struct elk_codegen *p, bool enable )
295 {
296    p->current->saturate = enable;
297 }
298 
elk_set_default_acc_write_control(struct elk_codegen * p,unsigned value)299 void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value)
300 {
301    p->current->acc_wr_control = value;
302 }
303 
elk_set_default_swsb(struct elk_codegen * p,struct tgl_swsb value)304 void elk_set_default_swsb(struct elk_codegen *p, struct tgl_swsb value)
305 {
306    p->current->swsb = value;
307 }
308 
elk_push_insn_state(struct elk_codegen * p)309 void elk_push_insn_state( struct elk_codegen *p )
310 {
311    assert(p->current != &p->stack[ELK_EU_MAX_INSN_STACK-1]);
312    *(p->current + 1) = *p->current;
313    p->current++;
314 }
315 
elk_pop_insn_state(struct elk_codegen * p)316 void elk_pop_insn_state( struct elk_codegen *p )
317 {
318    assert(p->current != p->stack);
319    p->current--;
320 }
321 
322 
323 /***********************************************************************
324  */
325 void
elk_init_codegen(const struct elk_isa_info * isa,struct elk_codegen * p,void * mem_ctx)326 elk_init_codegen(const struct elk_isa_info *isa,
327                  struct elk_codegen *p, void *mem_ctx)
328 {
329    memset(p, 0, sizeof(*p));
330 
331    p->isa = isa;
332    p->devinfo = isa->devinfo;
333    p->automatic_exec_sizes = true;
334    /*
335     * Set the initial instruction store array size to 1024, if found that
336     * isn't enough, then it will double the store size at elk_next_insn()
337     * until out of memory.
338     */
339    p->store_size = 1024;
340    p->store = rzalloc_array(mem_ctx, elk_inst, p->store_size);
341    p->nr_insn = 0;
342    p->current = p->stack;
343    memset(p->current, 0, sizeof(p->current[0]));
344 
345    p->mem_ctx = mem_ctx;
346 
347    /* Some defaults?
348     */
349    elk_set_default_exec_size(p, ELK_EXECUTE_8);
350    elk_set_default_mask_control(p, ELK_MASK_ENABLE); /* what does this do? */
351    elk_set_default_saturate(p, 0);
352    elk_set_default_compression_control(p, ELK_COMPRESSION_NONE);
353 
354    /* Set up control flow stack */
355    p->if_stack_depth = 0;
356    p->if_stack_array_size = 16;
357    p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size);
358 
359    p->loop_stack_depth = 0;
360    p->loop_stack_array_size = 16;
361    p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
362    p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
363 }
364 
365 
elk_get_program(struct elk_codegen * p,unsigned * sz)366 const unsigned *elk_get_program( struct elk_codegen *p,
367 			       unsigned *sz )
368 {
369    *sz = p->next_insn_offset;
370    return (const unsigned *)p->store;
371 }
372 
373 const struct elk_shader_reloc *
elk_get_shader_relocs(struct elk_codegen * p,unsigned * num_relocs)374 elk_get_shader_relocs(struct elk_codegen *p, unsigned *num_relocs)
375 {
376    *num_relocs = p->num_relocs;
377    return p->relocs;
378 }
379 
380 DEBUG_GET_ONCE_OPTION(shader_bin_dump_path, "INTEL_SHADER_BIN_DUMP_PATH", NULL);
381 
elk_should_dump_shader_bin(void)382 bool elk_should_dump_shader_bin(void)
383 {
384    return debug_get_option_shader_bin_dump_path() != NULL;
385 }
386 
elk_dump_shader_bin(void * assembly,int start_offset,int end_offset,const char * identifier)387 void elk_dump_shader_bin(void *assembly, int start_offset, int end_offset,
388                          const char *identifier)
389 {
390    char *name = ralloc_asprintf(NULL, "%s/%s.bin",
391                                 debug_get_option_shader_bin_dump_path(),
392                                 identifier);
393 
394    int fd = open(name, O_CREAT | O_WRONLY, 0777);
395    ralloc_free(name);
396 
397    if (fd < 0)
398       return;
399 
400    struct stat sb;
401    if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
402       close(fd);
403       return;
404    }
405 
406    size_t to_write = end_offset - start_offset;
407    void *write_ptr = assembly + start_offset;
408 
409    while (to_write) {
410       ssize_t ret = write(fd, write_ptr, to_write);
411 
412       if (ret <= 0) {
413          close(fd);
414          return;
415       }
416 
417       to_write -= ret;
418       write_ptr += ret;
419    }
420 
421    close(fd);
422 }
423 
elk_try_override_assembly(struct elk_codegen * p,int start_offset,const char * identifier)424 bool elk_try_override_assembly(struct elk_codegen *p, int start_offset,
425                                const char *identifier)
426 {
427    const char *read_path = getenv("INTEL_SHADER_ASM_READ_PATH");
428    if (!read_path) {
429       return false;
430    }
431 
432    char *name = ralloc_asprintf(NULL, "%s/%s.bin", read_path, identifier);
433 
434    int fd = open(name, O_RDONLY);
435    ralloc_free(name);
436 
437    if (fd == -1) {
438       return false;
439    }
440 
441    struct stat sb;
442    if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
443       close(fd);
444       return false;
445    }
446 
447    p->nr_insn -= (p->next_insn_offset - start_offset) / sizeof(elk_inst);
448    p->nr_insn += sb.st_size / sizeof(elk_inst);
449 
450    p->next_insn_offset = start_offset + sb.st_size;
451    p->store_size = (start_offset + sb.st_size) / sizeof(elk_inst);
452    p->store = (elk_inst *)reralloc_size(p->mem_ctx, p->store, p->next_insn_offset);
453    assert(p->store);
454 
455    ssize_t ret = read(fd, (char *)p->store + start_offset, sb.st_size);
456    close(fd);
457    if (ret != sb.st_size) {
458       return false;
459    }
460 
461    ASSERTED bool valid =
462       elk_validate_instructions(p->isa, p->store,
463                                 start_offset, p->next_insn_offset,
464                                 NULL);
465    assert(valid);
466 
467    return true;
468 }
469 
470 const struct elk_label *
elk_find_label(const struct elk_label * root,int offset)471 elk_find_label(const struct elk_label *root, int offset)
472 {
473    const struct elk_label *curr = root;
474 
475    if (curr != NULL)
476    {
477       do {
478          if (curr->offset == offset)
479             return curr;
480 
481          curr = curr->next;
482       } while (curr != NULL);
483    }
484 
485    return curr;
486 }
487 
488 void
elk_create_label(struct elk_label ** labels,int offset,void * mem_ctx)489 elk_create_label(struct elk_label **labels, int offset, void *mem_ctx)
490 {
491    if (*labels != NULL) {
492       struct elk_label *curr = *labels;
493       struct elk_label *prev;
494 
495       do {
496          prev = curr;
497 
498          if (curr->offset == offset)
499             return;
500 
501          curr = curr->next;
502       } while (curr != NULL);
503 
504       curr = ralloc(mem_ctx, struct elk_label);
505       curr->offset = offset;
506       curr->number = prev->number + 1;
507       curr->next = NULL;
508       prev->next = curr;
509    } else {
510       struct elk_label *root = ralloc(mem_ctx, struct elk_label);
511       root->number = 0;
512       root->offset = offset;
513       root->next = NULL;
514       *labels = root;
515    }
516 }
517 
518 const struct elk_label *
elk_label_assembly(const struct elk_isa_info * isa,const void * assembly,int start,int end,void * mem_ctx)519 elk_label_assembly(const struct elk_isa_info *isa,
520                    const void *assembly, int start, int end, void *mem_ctx)
521 {
522    const struct intel_device_info *const devinfo = isa->devinfo;
523 
524    struct elk_label *root_label = NULL;
525 
526    int to_bytes_scale = sizeof(elk_inst) / elk_jump_scale(devinfo);
527 
528    for (int offset = start; offset < end;) {
529       const elk_inst *inst = (const elk_inst *) ((const char *) assembly + offset);
530       elk_inst uncompacted;
531 
532       bool is_compact = elk_inst_cmpt_control(devinfo, inst);
533 
534       if (is_compact) {
535          elk_compact_inst *compacted = (elk_compact_inst *)inst;
536          elk_uncompact_instruction(isa, &uncompacted, compacted);
537          inst = &uncompacted;
538       }
539 
540       if (elk_has_uip(devinfo, elk_inst_opcode(isa, inst))) {
541          /* Instructions that have UIP also have JIP. */
542          elk_create_label(&root_label,
543             offset + elk_inst_uip(devinfo, inst) * to_bytes_scale, mem_ctx);
544          elk_create_label(&root_label,
545             offset + elk_inst_jip(devinfo, inst) * to_bytes_scale, mem_ctx);
546       } else if (elk_has_jip(devinfo, elk_inst_opcode(isa, inst))) {
547          int jip;
548          if (devinfo->ver >= 7) {
549             jip = elk_inst_jip(devinfo, inst);
550          } else {
551             jip = elk_inst_gfx6_jump_count(devinfo, inst);
552          }
553 
554          elk_create_label(&root_label, offset + jip * to_bytes_scale, mem_ctx);
555       }
556 
557       if (is_compact) {
558          offset += sizeof(elk_compact_inst);
559       } else {
560          offset += sizeof(elk_inst);
561       }
562    }
563 
564    return root_label;
565 }
566 
567 void
elk_disassemble_with_labels(const struct elk_isa_info * isa,const void * assembly,int start,int end,FILE * out)568 elk_disassemble_with_labels(const struct elk_isa_info *isa,
569                             const void *assembly, int start, int end, FILE *out)
570 {
571    void *mem_ctx = ralloc_context(NULL);
572    const struct elk_label *root_label =
573       elk_label_assembly(isa, assembly, start, end, mem_ctx);
574 
575    elk_disassemble(isa, assembly, start, end, root_label, out);
576 
577    ralloc_free(mem_ctx);
578 }
579 
580 void
elk_disassemble(const struct elk_isa_info * isa,const void * assembly,int start,int end,const struct elk_label * root_label,FILE * out)581 elk_disassemble(const struct elk_isa_info *isa,
582                 const void *assembly, int start, int end,
583                 const struct elk_label *root_label, FILE *out)
584 {
585    const struct intel_device_info *devinfo = isa->devinfo;
586 
587    bool dump_hex = INTEL_DEBUG(DEBUG_HEX);
588 
589    for (int offset = start; offset < end;) {
590       const elk_inst *insn = (const elk_inst *)((char *)assembly + offset);
591       elk_inst uncompacted;
592 
593       if (root_label != NULL) {
594         const struct elk_label *label = elk_find_label(root_label, offset);
595         if (label != NULL) {
596            fprintf(out, "\nLABEL%d:\n", label->number);
597         }
598       }
599 
600       bool compacted = elk_inst_cmpt_control(devinfo, insn);
601       if (0)
602          fprintf(out, "0x%08x: ", offset);
603 
604       if (compacted) {
605          elk_compact_inst *compacted = (elk_compact_inst *)insn;
606          if (dump_hex) {
607             unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
608             const unsigned int blank_spaces = 24;
609             for (int i = 0 ; i < 8; i = i + 4) {
610                fprintf(out, "%02x %02x %02x %02x ",
611                        insn_ptr[i],
612                        insn_ptr[i + 1],
613                        insn_ptr[i + 2],
614                        insn_ptr[i + 3]);
615             }
616             /* Make compacted instructions hex value output vertically aligned
617              * with uncompacted instructions hex value
618              */
619             fprintf(out, "%*c", blank_spaces, ' ');
620          }
621 
622          elk_uncompact_instruction(isa, &uncompacted, compacted);
623          insn = &uncompacted;
624       } else {
625          if (dump_hex) {
626             unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
627             for (int i = 0 ; i < 16; i = i + 4) {
628                fprintf(out, "%02x %02x %02x %02x ",
629                        insn_ptr[i],
630                        insn_ptr[i + 1],
631                        insn_ptr[i + 2],
632                        insn_ptr[i + 3]);
633             }
634          }
635       }
636 
637       elk_disassemble_inst(out, isa, insn, compacted, offset, root_label);
638 
639       if (compacted) {
640          offset += sizeof(elk_compact_inst);
641       } else {
642          offset += sizeof(elk_inst);
643       }
644    }
645 }
646 
647 static const struct elk_opcode_desc opcode_descs[] = {
648    /* IR,                 HW,  name,      nsrc, ndst, gfx_vers */
649    { ELK_OPCODE_ILLEGAL,  0,   "illegal", 0,    0,    GFX_ALL },
650    { ELK_OPCODE_SYNC,     1,   "sync",    1,    0,    GFX_GE(GFX12) },
651    { ELK_OPCODE_MOV,      1,   "mov",     1,    1,    GFX_LT(GFX12) },
652    { ELK_OPCODE_MOV,      97,  "mov",     1,    1,    GFX_GE(GFX12) },
653    { ELK_OPCODE_SEL,      2,   "sel",     2,    1,    GFX_LT(GFX12) },
654    { ELK_OPCODE_SEL,      98,  "sel",     2,    1,    GFX_GE(GFX12) },
655    { ELK_OPCODE_MOVI,     3,   "movi",    2,    1,    GFX_GE(GFX45) & GFX_LT(GFX12) },
656    { ELK_OPCODE_MOVI,     99,  "movi",    2,    1,    GFX_GE(GFX12) },
657    { ELK_OPCODE_NOT,      4,   "not",     1,    1,    GFX_LT(GFX12) },
658    { ELK_OPCODE_NOT,      100, "not",     1,    1,    GFX_GE(GFX12) },
659    { ELK_OPCODE_AND,      5,   "and",     2,    1,    GFX_LT(GFX12) },
660    { ELK_OPCODE_AND,      101, "and",     2,    1,    GFX_GE(GFX12) },
661    { ELK_OPCODE_OR,       6,   "or",      2,    1,    GFX_LT(GFX12) },
662    { ELK_OPCODE_OR,       102, "or",      2,    1,    GFX_GE(GFX12) },
663    { ELK_OPCODE_XOR,      7,   "xor",     2,    1,    GFX_LT(GFX12) },
664    { ELK_OPCODE_XOR,      103, "xor",     2,    1,    GFX_GE(GFX12) },
665    { ELK_OPCODE_SHR,      8,   "shr",     2,    1,    GFX_LT(GFX12) },
666    { ELK_OPCODE_SHR,      104, "shr",     2,    1,    GFX_GE(GFX12) },
667    { ELK_OPCODE_SHL,      9,   "shl",     2,    1,    GFX_LT(GFX12) },
668    { ELK_OPCODE_SHL,      105, "shl",     2,    1,    GFX_GE(GFX12) },
669    { ELK_OPCODE_DIM,      10,  "dim",     1,    1,    GFX75 },
670    { ELK_OPCODE_SMOV,     10,  "smov",    0,    0,    GFX_GE(GFX8) & GFX_LT(GFX12) },
671    { ELK_OPCODE_SMOV,     106, "smov",    0,    0,    GFX_GE(GFX12) },
672    { ELK_OPCODE_ASR,      12,  "asr",     2,    1,    GFX_LT(GFX12) },
673    { ELK_OPCODE_ASR,      108, "asr",     2,    1,    GFX_GE(GFX12) },
674    { ELK_OPCODE_ROR,      14,  "ror",     2,    1,    GFX11 },
675    { ELK_OPCODE_ROR,      110, "ror",     2,    1,    GFX_GE(GFX12) },
676    { ELK_OPCODE_ROL,      15,  "rol",     2,    1,    GFX11 },
677    { ELK_OPCODE_ROL,      111, "rol",     2,    1,    GFX_GE(GFX12) },
678    { ELK_OPCODE_CMP,      16,  "cmp",     2,    1,    GFX_LT(GFX12) },
679    { ELK_OPCODE_CMP,      112, "cmp",     2,    1,    GFX_GE(GFX12) },
680    { ELK_OPCODE_CMPN,     17,  "cmpn",    2,    1,    GFX_LT(GFX12) },
681    { ELK_OPCODE_CMPN,     113, "cmpn",    2,    1,    GFX_GE(GFX12) },
682    { ELK_OPCODE_CSEL,     18,  "csel",    3,    1,    GFX_GE(GFX8) & GFX_LT(GFX12) },
683    { ELK_OPCODE_CSEL,     114, "csel",    3,    1,    GFX_GE(GFX12) },
684    { ELK_OPCODE_F32TO16,  19,  "f32to16", 1,    1,    GFX7 | GFX75 },
685    { ELK_OPCODE_F16TO32,  20,  "f16to32", 1,    1,    GFX7 | GFX75 },
686    { ELK_OPCODE_BFREV,    23,  "bfrev",   1,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
687    { ELK_OPCODE_BFREV,    119, "bfrev",   1,    1,    GFX_GE(GFX12) },
688    { ELK_OPCODE_BFE,      24,  "bfe",     3,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
689    { ELK_OPCODE_BFE,      120, "bfe",     3,    1,    GFX_GE(GFX12) },
690    { ELK_OPCODE_BFI1,     25,  "bfi1",    2,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
691    { ELK_OPCODE_BFI1,     121, "bfi1",    2,    1,    GFX_GE(GFX12) },
692    { ELK_OPCODE_BFI2,     26,  "bfi2",    3,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
693    { ELK_OPCODE_BFI2,     122, "bfi2",    3,    1,    GFX_GE(GFX12) },
694    { ELK_OPCODE_JMPI,     32,  "jmpi",    0,    0,    GFX_ALL },
695    { ELK_OPCODE_BRD,      33,  "brd",     0,    0,    GFX_GE(GFX7) },
696    { ELK_OPCODE_IF,       34,  "if",      0,    0,    GFX_ALL },
697    { ELK_OPCODE_IFF,      35,  "iff",     0,    0,    GFX_LE(GFX5) },
698    { ELK_OPCODE_BRC,      35,  "brc",     0,    0,    GFX_GE(GFX7) },
699    { ELK_OPCODE_ELSE,     36,  "else",    0,    0,    GFX_ALL },
700    { ELK_OPCODE_ENDIF,    37,  "endif",   0,    0,    GFX_ALL },
701    { ELK_OPCODE_DO,       38,  "do",      0,    0,    GFX_LE(GFX5) },
702    { ELK_OPCODE_CASE,     38,  "case",    0,    0,    GFX6 },
703    { ELK_OPCODE_WHILE,    39,  "while",   0,    0,    GFX_ALL },
704    { ELK_OPCODE_BREAK,    40,  "break",   0,    0,    GFX_ALL },
705    { ELK_OPCODE_CONTINUE, 41,  "cont",    0,    0,    GFX_ALL },
706    { ELK_OPCODE_HALT,     42,  "halt",    0,    0,    GFX_ALL },
707    { ELK_OPCODE_CALLA,    43,  "calla",   0,    0,    GFX_GE(GFX75) },
708    { ELK_OPCODE_MSAVE,    44,  "msave",   0,    0,    GFX_LE(GFX5) },
709    { ELK_OPCODE_CALL,     44,  "call",    0,    0,    GFX_GE(GFX6) },
710    { ELK_OPCODE_MREST,    45,  "mrest",   0,    0,    GFX_LE(GFX5) },
711    { ELK_OPCODE_RET,      45,  "ret",     0,    0,    GFX_GE(GFX6) },
712    { ELK_OPCODE_PUSH,     46,  "push",    0,    0,    GFX_LE(GFX5) },
713    { ELK_OPCODE_FORK,     46,  "fork",    0,    0,    GFX6 },
714    { ELK_OPCODE_GOTO,     46,  "goto",    0,    0,    GFX_GE(GFX8) },
715    { ELK_OPCODE_POP,      47,  "pop",     2,    0,    GFX_LE(GFX5) },
716    { ELK_OPCODE_WAIT,     48,  "wait",    0,    1,    GFX_LT(GFX12) },
717    { ELK_OPCODE_SEND,     49,  "send",    1,    1,    GFX_LT(GFX12) },
718    { ELK_OPCODE_SENDC,    50,  "sendc",   1,    1,    GFX_LT(GFX12) },
719    { ELK_OPCODE_SEND,     49,  "send",    2,    1,    GFX_GE(GFX12) },
720    { ELK_OPCODE_SENDC,    50,  "sendc",   2,    1,    GFX_GE(GFX12) },
721    { ELK_OPCODE_SENDS,    51,  "sends",   2,    1,    GFX_GE(GFX9) & GFX_LT(GFX12) },
722    { ELK_OPCODE_SENDSC,   52,  "sendsc",  2,    1,    GFX_GE(GFX9) & GFX_LT(GFX12) },
723    { ELK_OPCODE_MATH,     56,  "math",    2,    1,    GFX_GE(GFX6) },
724    { ELK_OPCODE_ADD,      64,  "add",     2,    1,    GFX_ALL },
725    { ELK_OPCODE_MUL,      65,  "mul",     2,    1,    GFX_ALL },
726    { ELK_OPCODE_AVG,      66,  "avg",     2,    1,    GFX_ALL },
727    { ELK_OPCODE_FRC,      67,  "frc",     1,    1,    GFX_ALL },
728    { ELK_OPCODE_RNDU,     68,  "rndu",    1,    1,    GFX_ALL },
729    { ELK_OPCODE_RNDD,     69,  "rndd",    1,    1,    GFX_ALL },
730    { ELK_OPCODE_RNDE,     70,  "rnde",    1,    1,    GFX_ALL },
731    { ELK_OPCODE_RNDZ,     71,  "rndz",    1,    1,    GFX_ALL },
732    { ELK_OPCODE_MAC,      72,  "mac",     2,    1,    GFX_ALL },
733    { ELK_OPCODE_MACH,     73,  "mach",    2,    1,    GFX_ALL },
734    { ELK_OPCODE_LZD,      74,  "lzd",     1,    1,    GFX_ALL },
735    { ELK_OPCODE_FBH,      75,  "fbh",     1,    1,    GFX_GE(GFX7) },
736    { ELK_OPCODE_FBL,      76,  "fbl",     1,    1,    GFX_GE(GFX7) },
737    { ELK_OPCODE_CBIT,     77,  "cbit",    1,    1,    GFX_GE(GFX7) },
738    { ELK_OPCODE_ADDC,     78,  "addc",    2,    1,    GFX_GE(GFX7) },
739    { ELK_OPCODE_SUBB,     79,  "subb",    2,    1,    GFX_GE(GFX7) },
740    { ELK_OPCODE_SAD2,     80,  "sad2",    2,    1,    GFX_ALL },
741    { ELK_OPCODE_SADA2,    81,  "sada2",   2,    1,    GFX_ALL },
742    { ELK_OPCODE_ADD3,     82,  "add3",    3,    1,    GFX_GE(GFX125) },
743    { ELK_OPCODE_DP4,      84,  "dp4",     2,    1,    GFX_LT(GFX11) },
744    { ELK_OPCODE_DPH,      85,  "dph",     2,    1,    GFX_LT(GFX11) },
745    { ELK_OPCODE_DP3,      86,  "dp3",     2,    1,    GFX_LT(GFX11) },
746    { ELK_OPCODE_DP2,      87,  "dp2",     2,    1,    GFX_LT(GFX11) },
747    { ELK_OPCODE_DP4A,     88,  "dp4a",    3,    1,    GFX_GE(GFX12) },
748    { ELK_OPCODE_LINE,     89,  "line",    2,    1,    GFX_LE(GFX10) },
749    { ELK_OPCODE_DPAS,     89,  "dpas",    3,    1,    GFX_GE(GFX125) },
750    { ELK_OPCODE_PLN,      90,  "pln",     2,    1,    GFX_GE(GFX45) & GFX_LE(GFX10) },
751    { ELK_OPCODE_MAD,      91,  "mad",     3,    1,    GFX_GE(GFX6) },
752    { ELK_OPCODE_LRP,      92,  "lrp",     3,    1,    GFX_GE(GFX6) & GFX_LE(GFX10) },
753    { ELK_OPCODE_MADM,     93,  "madm",    3,    1,    GFX_GE(GFX8) },
754    { ELK_OPCODE_NENOP,    125, "nenop",   0,    0,    GFX45 },
755    { ELK_OPCODE_NOP,      126, "nop",     0,    0,    GFX_LT(GFX12) },
756    { ELK_OPCODE_NOP,      96,  "nop",     0,    0,    GFX_GE(GFX12) }
757 };
758 
759 void
elk_init_isa_info(struct elk_isa_info * isa,const struct intel_device_info * devinfo)760 elk_init_isa_info(struct elk_isa_info *isa,
761                   const struct intel_device_info *devinfo)
762 {
763    isa->devinfo = devinfo;
764 
765    enum gfx_ver ver = gfx_ver_from_devinfo(devinfo);
766 
767    memset(isa->ir_to_descs, 0, sizeof(isa->ir_to_descs));
768    memset(isa->hw_to_descs, 0, sizeof(isa->hw_to_descs));
769 
770    for (unsigned i = 0; i < ARRAY_SIZE(opcode_descs); i++) {
771       if (opcode_descs[i].gfx_vers & ver) {
772          const unsigned e = opcode_descs[i].ir;
773          const unsigned h = opcode_descs[i].hw;
774          assert(e < ARRAY_SIZE(isa->ir_to_descs) && !isa->ir_to_descs[e]);
775          assert(h < ARRAY_SIZE(isa->hw_to_descs) && !isa->hw_to_descs[h]);
776          isa->ir_to_descs[e] = &opcode_descs[i];
777          isa->hw_to_descs[h] = &opcode_descs[i];
778       }
779    }
780 }
781 
782 /**
783  * Return the matching opcode_desc for the specified IR opcode and hardware
784  * generation, or NULL if the opcode is not supported by the device.
785  */
786 const struct elk_opcode_desc *
elk_opcode_desc(const struct elk_isa_info * isa,enum elk_opcode op)787 elk_opcode_desc(const struct elk_isa_info *isa, enum elk_opcode op)
788 {
789    return op < ARRAY_SIZE(isa->ir_to_descs) ? isa->ir_to_descs[op] : NULL;
790 }
791 
792 /**
793  * Return the matching opcode_desc for the specified HW opcode and hardware
794  * generation, or NULL if the opcode is not supported by the device.
795  */
796 const struct elk_opcode_desc *
elk_opcode_desc_from_hw(const struct elk_isa_info * isa,unsigned hw)797 elk_opcode_desc_from_hw(const struct elk_isa_info *isa, unsigned hw)
798 {
799    return hw < ARRAY_SIZE(isa->hw_to_descs) ? isa->hw_to_descs[hw] : NULL;
800 }
801 
802 unsigned
elk_num_sources_from_inst(const struct elk_isa_info * isa,const elk_inst * inst)803 elk_num_sources_from_inst(const struct elk_isa_info *isa,
804                           const elk_inst *inst)
805 {
806    const struct intel_device_info *devinfo = isa->devinfo;
807    const struct elk_opcode_desc *desc =
808       elk_opcode_desc(isa, elk_inst_opcode(isa, inst));
809    unsigned math_function;
810 
811    if (elk_inst_opcode(isa, inst) == ELK_OPCODE_MATH) {
812       math_function = elk_inst_math_function(devinfo, inst);
813    } else if (devinfo->ver < 6 &&
814               elk_inst_opcode(isa, inst) == ELK_OPCODE_SEND) {
815       if (elk_inst_sfid(devinfo, inst) == ELK_SFID_MATH) {
816          /* src1 must be a descriptor (including the information to determine
817           * that the SEND is doing an extended math operation), but src0 can
818           * actually be null since it serves as the source of the implicit GRF
819           * to MRF move.
820           *
821           * If we stop using that functionality, we'll have to revisit this.
822           */
823          return 2;
824       } else {
825          /* Send instructions are allowed to have null sources since they use
826           * the base_mrf field to specify which message register source.
827           */
828          return 0;
829       }
830    } else {
831       assert(desc->nsrc < 4);
832       return desc->nsrc;
833    }
834 
835    switch (math_function) {
836    case ELK_MATH_FUNCTION_INV:
837    case ELK_MATH_FUNCTION_LOG:
838    case ELK_MATH_FUNCTION_EXP:
839    case ELK_MATH_FUNCTION_SQRT:
840    case ELK_MATH_FUNCTION_RSQ:
841    case ELK_MATH_FUNCTION_SIN:
842    case ELK_MATH_FUNCTION_COS:
843    case ELK_MATH_FUNCTION_SINCOS:
844    case GFX8_MATH_FUNCTION_INVM:
845    case GFX8_MATH_FUNCTION_RSQRTM:
846       return 1;
847    case ELK_MATH_FUNCTION_FDIV:
848    case ELK_MATH_FUNCTION_POW:
849    case ELK_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
850    case ELK_MATH_FUNCTION_INT_DIV_QUOTIENT:
851    case ELK_MATH_FUNCTION_INT_DIV_REMAINDER:
852       return 2;
853    default:
854       unreachable("not reached");
855    }
856 }
857