• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014-2015 Broadcom
3  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #include "util/blob.h"
26 #include "util/u_debug.h"
27 #include "util/disk_cache.h"
28 #include "util/u_memory.h"
29 #include "util/perf/cpu_trace.h"
30 #include "util/ralloc.h"
31 #include "pipe/p_screen.h"
32 
33 #include "compiler/nir/nir.h"
34 #include "compiler/nir/nir_control_flow.h"
35 #include "compiler/nir/nir_builder.h"
36 #include "compiler/nir/nir_serialize.h"
37 #include "compiler/shader_enums.h"
38 
39 #include "tgsi_to_nir.h"
40 #include "tgsi/tgsi_parse.h"
41 #include "tgsi/tgsi_dump.h"
42 #include "tgsi/tgsi_info.h"
43 #include "tgsi/tgsi_scan.h"
44 #include "tgsi/tgsi_from_mesa.h"
45 
46 #define SWIZ(X, Y, Z, W) (unsigned[4]){      \
47       TGSI_SWIZZLE_##X,                      \
48       TGSI_SWIZZLE_##Y,                      \
49       TGSI_SWIZZLE_##Z,                      \
50       TGSI_SWIZZLE_##W,                      \
51    }
52 
53 struct ttn_reg_info {
54    /** nir register handle containing this TGSI index. */
55    nir_def *reg;
56    nir_variable *var;
57    /** Offset (in vec4s) from the start of var for this TGSI index. */
58    int offset;
59 };
60 
61 struct ttn_compile {
62    union tgsi_full_token *token;
63    nir_builder build;
64    struct tgsi_shader_info *scan;
65 
66    struct ttn_reg_info *output_regs;
67    struct ttn_reg_info *temp_regs;
68    nir_def **imm_defs;
69 
70    unsigned num_samp_types;
71    nir_alu_type *samp_types;
72 
73    nir_def *addr_reg;
74 
75    nir_variable **inputs;
76    nir_variable **outputs;
77    nir_variable *samplers[PIPE_MAX_SAMPLERS];
78    nir_variable *images[PIPE_MAX_SHADER_IMAGES];
79    nir_variable *ssbo[PIPE_MAX_SHADER_BUFFERS];
80    uint32_t ubo_sizes[PIPE_MAX_CONSTANT_BUFFERS];
81 
82    unsigned num_samplers;
83    unsigned num_images;
84    unsigned num_msaa_images;
85 
86    nir_variable *input_var_face;
87    nir_variable *input_var_position;
88    nir_variable *input_var_point;
89    nir_variable *clipdist;
90 
91    /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
92    unsigned next_imm;
93 
94    bool cap_face_is_sysval;
95    bool cap_position_is_sysval;
96    bool cap_point_is_sysval;
97    bool cap_samplers_as_deref;
98    bool cap_integers;
99    bool cap_compact_arrays;
100 };
101 
102 #define ttn_swizzle(b, src, x, y, z, w) \
103    nir_swizzle(b, src, SWIZ(x, y, z, w), 4)
104 #define ttn_channel(b, src, swiz) \
105    nir_channel(b, src, TGSI_SWIZZLE_##swiz)
106 
107 static gl_varying_slot
tgsi_varying_semantic_to_slot(unsigned semantic,unsigned index)108 tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index)
109 {
110    switch (semantic) {
111    case TGSI_SEMANTIC_POSITION:
112       return VARYING_SLOT_POS;
113    case TGSI_SEMANTIC_COLOR:
114       if (index == 0)
115          return VARYING_SLOT_COL0;
116       else
117          return VARYING_SLOT_COL1;
118    case TGSI_SEMANTIC_BCOLOR:
119       if (index == 0)
120          return VARYING_SLOT_BFC0;
121       else
122          return VARYING_SLOT_BFC1;
123    case TGSI_SEMANTIC_FOG:
124       return VARYING_SLOT_FOGC;
125    case TGSI_SEMANTIC_PSIZE:
126       return VARYING_SLOT_PSIZ;
127    case TGSI_SEMANTIC_GENERIC:
128       assert(index < 32);
129       return VARYING_SLOT_VAR0 + index;
130    case TGSI_SEMANTIC_FACE:
131       return VARYING_SLOT_FACE;
132    case TGSI_SEMANTIC_EDGEFLAG:
133       return VARYING_SLOT_EDGE;
134    case TGSI_SEMANTIC_PRIMID:
135       return VARYING_SLOT_PRIMITIVE_ID;
136    case TGSI_SEMANTIC_CLIPDIST:
137       if (index == 0)
138          return VARYING_SLOT_CLIP_DIST0;
139       else
140          return VARYING_SLOT_CLIP_DIST1;
141    case TGSI_SEMANTIC_CLIPVERTEX:
142       return VARYING_SLOT_CLIP_VERTEX;
143    case TGSI_SEMANTIC_TEXCOORD:
144       assert(index < 8);
145       return VARYING_SLOT_TEX0 + index;
146    case TGSI_SEMANTIC_PCOORD:
147       return VARYING_SLOT_PNTC;
148    case TGSI_SEMANTIC_VIEWPORT_INDEX:
149       return VARYING_SLOT_VIEWPORT;
150    case TGSI_SEMANTIC_LAYER:
151       return VARYING_SLOT_LAYER;
152    case TGSI_SEMANTIC_TESSINNER:
153       return VARYING_SLOT_TESS_LEVEL_INNER;
154    case TGSI_SEMANTIC_TESSOUTER:
155       return VARYING_SLOT_TESS_LEVEL_OUTER;
156    default:
157       fprintf(stderr, "Bad TGSI semantic: %d/%d\n", semantic, index);
158       abort();
159    }
160 }
161 
162 static enum gl_frag_depth_layout
ttn_get_depth_layout(unsigned tgsi_fs_depth_layout)163 ttn_get_depth_layout(unsigned tgsi_fs_depth_layout)
164 {
165    switch (tgsi_fs_depth_layout) {
166    case TGSI_FS_DEPTH_LAYOUT_NONE:
167       return FRAG_DEPTH_LAYOUT_NONE;
168    case TGSI_FS_DEPTH_LAYOUT_ANY:
169       return FRAG_DEPTH_LAYOUT_ANY;
170    case TGSI_FS_DEPTH_LAYOUT_GREATER:
171       return FRAG_DEPTH_LAYOUT_GREATER;
172    case TGSI_FS_DEPTH_LAYOUT_LESS:
173       return FRAG_DEPTH_LAYOUT_LESS;
174    case TGSI_FS_DEPTH_LAYOUT_UNCHANGED:
175       return FRAG_DEPTH_LAYOUT_UNCHANGED;
176    default:
177       unreachable("bad TGSI FS depth layout");
178    }
179 }
180 
181 static enum glsl_interp_mode
ttn_translate_interp_mode(unsigned tgsi_interp)182 ttn_translate_interp_mode(unsigned tgsi_interp)
183 {
184    switch (tgsi_interp) {
185    case TGSI_INTERPOLATE_CONSTANT:
186       return INTERP_MODE_FLAT;
187    case TGSI_INTERPOLATE_LINEAR:
188       return INTERP_MODE_NOPERSPECTIVE;
189    case TGSI_INTERPOLATE_PERSPECTIVE:
190       return INTERP_MODE_SMOOTH;
191    case TGSI_INTERPOLATE_COLOR:
192       return INTERP_MODE_NONE;
193    default:
194       unreachable("bad TGSI interpolation mode");
195    }
196 }
197 
198 static void
ttn_emit_declaration(struct ttn_compile * c)199 ttn_emit_declaration(struct ttn_compile *c)
200 {
201    nir_builder *b = &c->build;
202    struct tgsi_full_declaration *decl = &c->token->FullDeclaration;
203    unsigned array_size = decl->Range.Last - decl->Range.First + 1;
204    unsigned file = decl->Declaration.File;
205    unsigned i;
206 
207    if (file == TGSI_FILE_TEMPORARY) {
208       if (decl->Declaration.Array) {
209          /* for arrays, we create variables instead of registers: */
210          nir_variable *var =
211             nir_variable_create(b->shader, nir_var_shader_temp,
212                                 glsl_array_type(glsl_vec4_type(), array_size, 0),
213                                 ralloc_asprintf(b->shader, "arr_%d",
214                                                 decl->Array.ArrayID));
215 
216          for (i = 0; i < array_size; i++) {
217             /* point all the matching slots to the same var,
218              * with appropriate offset set, mostly just so
219              * we know what to do when tgsi does a non-indirect
220              * access
221              */
222             c->temp_regs[decl->Range.First + i].reg = NULL;
223             c->temp_regs[decl->Range.First + i].var = var;
224             c->temp_regs[decl->Range.First + i].offset = i;
225          }
226       } else {
227          for (i = 0; i < array_size; i++) {
228             nir_def *reg = nir_decl_reg(b, 4, 32, 0);
229             c->temp_regs[decl->Range.First + i].reg = reg;
230             c->temp_regs[decl->Range.First + i].var = NULL;
231             c->temp_regs[decl->Range.First + i].offset = 0;
232          }
233       }
234    } else if (file == TGSI_FILE_ADDRESS) {
235       c->addr_reg = nir_decl_reg(b, 4, 32, 0);
236    } else if (file == TGSI_FILE_SYSTEM_VALUE) {
237       /* Nothing to record for system values. */
238    } else if (file == TGSI_FILE_BUFFER) {
239       /* Nothing to record for buffers. */
240    } else if (file == TGSI_FILE_IMAGE) {
241       /* Nothing to record for images. */
242    } else if (file == TGSI_FILE_SAMPLER) {
243       /* Nothing to record for samplers. */
244    } else if (file == TGSI_FILE_SAMPLER_VIEW) {
245       struct tgsi_declaration_sampler_view *sview = &decl->SamplerView;
246       nir_alu_type type;
247 
248       assert((sview->ReturnTypeX == sview->ReturnTypeY) &&
249              (sview->ReturnTypeX == sview->ReturnTypeZ) &&
250              (sview->ReturnTypeX == sview->ReturnTypeW));
251 
252       switch (sview->ReturnTypeX) {
253       case TGSI_RETURN_TYPE_SINT:
254          type = nir_type_int32;
255          break;
256       case TGSI_RETURN_TYPE_UINT:
257          type = nir_type_uint32;
258          break;
259       case TGSI_RETURN_TYPE_FLOAT:
260       default:
261          type = nir_type_float32;
262          break;
263       }
264 
265       for (i = 0; i < array_size; i++) {
266          c->samp_types[decl->Range.First + i] = type;
267       }
268    } else {
269       bool is_array = (array_size > 1);
270 
271       assert(file == TGSI_FILE_INPUT ||
272              file == TGSI_FILE_OUTPUT ||
273              file == TGSI_FILE_CONSTANT);
274 
275       /* nothing to do for UBOs: */
276       if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension &&
277           decl->Dim.Index2D != 0) {
278          b->shader->info.num_ubos =
279             MAX2(b->shader->info.num_ubos, decl->Dim.Index2D);
280          c->ubo_sizes[decl->Dim.Index2D] =
281             MAX2(c->ubo_sizes[decl->Dim.Index2D], decl->Range.Last * 16);
282          return;
283       }
284 
285       if ((file == TGSI_FILE_INPUT) || (file == TGSI_FILE_OUTPUT)) {
286          is_array = (is_array && decl->Declaration.Array &&
287                      (decl->Array.ArrayID != 0));
288       }
289 
290       for (i = 0; i < array_size; i++) {
291          unsigned idx = decl->Range.First + i;
292          nir_variable *var = rzalloc(b->shader, nir_variable);
293 
294          var->data.driver_location = idx;
295 
296          var->type = glsl_vec4_type();
297          if (is_array)
298             var->type = glsl_array_type(var->type, array_size, 0);
299 
300          switch (file) {
301          case TGSI_FILE_INPUT:
302             var->data.read_only = true;
303             var->data.mode = nir_var_shader_in;
304             var->name = ralloc_asprintf(var, "in_%d", idx);
305 
306             if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
307                if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
308                   var->type = glsl_bool_type();
309                   if (c->cap_face_is_sysval) {
310                      var->data.mode = nir_var_system_value;
311                      var->data.location = SYSTEM_VALUE_FRONT_FACE;
312                   } else {
313                      var->data.location = VARYING_SLOT_FACE;
314                   }
315                   c->input_var_face = var;
316                } else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
317                   if (c->cap_position_is_sysval) {
318                      var->data.mode = nir_var_system_value;
319                      var->data.location = SYSTEM_VALUE_FRAG_COORD;
320                   } else {
321                      var->data.location = VARYING_SLOT_POS;
322                   }
323                   c->input_var_position = var;
324                } else if (decl->Semantic.Name == TGSI_SEMANTIC_PCOORD) {
325                   if (c->cap_point_is_sysval) {
326                      var->data.mode = nir_var_system_value;
327                      var->data.location = SYSTEM_VALUE_POINT_COORD;
328                   } else {
329                      var->data.location = VARYING_SLOT_PNTC;
330                   }
331                   c->input_var_point = var;
332                } else {
333                   var->data.location =
334                      tgsi_varying_semantic_to_slot(decl->Semantic.Name,
335                                                    decl->Semantic.Index);
336                }
337             } else {
338                assert(!decl->Declaration.Semantic);
339                var->data.location = VERT_ATTRIB_GENERIC0 + idx;
340             }
341             var->data.index = 0;
342             var->data.interpolation =
343                ttn_translate_interp_mode(decl->Interp.Interpolate);
344 
345             c->inputs[idx] = var;
346 
347             for (int i = 0; i < array_size; i++)
348                b->shader->info.inputs_read |= 1ull << (var->data.location + i);
349 
350             break;
351          case TGSI_FILE_OUTPUT: {
352             int semantic_name = decl->Semantic.Name;
353             int semantic_index = decl->Semantic.Index;
354             /* Since we can't load from outputs in the IR, we make temporaries
355              * for the outputs and emit stores to the real outputs at the end of
356              * the shader.
357              */
358             nir_def *reg = nir_decl_reg(b, 4, 32,
359                                             is_array ? array_size : 0);
360 
361             var->data.mode = nir_var_shader_out;
362             var->name = ralloc_asprintf(var, "out_%d", idx);
363             var->data.index = 0;
364             var->data.interpolation =
365                ttn_translate_interp_mode(decl->Interp.Interpolate);
366             var->data.patch = semantic_name == TGSI_SEMANTIC_TESSINNER ||
367                               semantic_name == TGSI_SEMANTIC_TESSOUTER ||
368                               semantic_name == TGSI_SEMANTIC_PATCH;
369 
370             if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
371                switch (semantic_name) {
372                case TGSI_SEMANTIC_COLOR: {
373                   /* TODO tgsi loses some information, so we cannot
374                    * actually differentiate here between DSB and MRT
375                    * at this point.  But so far no drivers using tgsi-
376                    * to-nir support dual source blend:
377                    */
378                   bool dual_src_blend = false;
379                   if (dual_src_blend && (semantic_index == 1)) {
380                      var->data.location = FRAG_RESULT_DATA0;
381                      var->data.index = 1;
382                   } else {
383                      if (c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
384                         var->data.location = FRAG_RESULT_COLOR;
385                      else
386                         var->data.location = FRAG_RESULT_DATA0 + semantic_index;
387                   }
388                   break;
389                }
390                case TGSI_SEMANTIC_POSITION:
391                   var->data.location = FRAG_RESULT_DEPTH;
392                   var->type = glsl_float_type();
393                   break;
394                case TGSI_SEMANTIC_STENCIL:
395                   var->data.location = FRAG_RESULT_STENCIL;
396                   var->type = glsl_int_type();
397                   break;
398                case TGSI_SEMANTIC_SAMPLEMASK:
399                   var->data.location = FRAG_RESULT_SAMPLE_MASK;
400                   var->type = glsl_int_type();
401                   break;
402 
403                default:
404                   fprintf(stderr, "Bad TGSI semantic: %d/%d\n",
405                           decl->Semantic.Name, decl->Semantic.Index);
406                   abort();
407                }
408             } else {
409                var->data.location =
410                   tgsi_varying_semantic_to_slot(semantic_name, semantic_index);
411                if (var->data.location == VARYING_SLOT_FOGC ||
412                    var->data.location == VARYING_SLOT_PSIZ) {
413                   var->type = glsl_float_type();
414                } else if (var->data.location == VARYING_SLOT_LAYER) {
415                   var->type = glsl_int_type();
416                } else if (c->cap_compact_arrays &&
417                           var->data.location == VARYING_SLOT_CLIP_DIST0) {
418                   var->type = glsl_array_type(glsl_float_type(),
419                                               b->shader->info.clip_distance_array_size,
420                                               sizeof(float));
421                   c->clipdist = var;
422                }
423             }
424 
425             if (is_array) {
426                unsigned j;
427                for (j = 0; j < array_size; j++) {
428                   c->output_regs[idx + j].offset = i + j;
429                   c->output_regs[idx + j].reg = reg;
430                }
431             } else {
432                c->output_regs[idx].offset = i;
433                c->output_regs[idx].reg = reg;
434             }
435 
436             c->outputs[idx] = var;
437 
438             if (c->cap_compact_arrays && var->data.location == VARYING_SLOT_CLIP_DIST1) {
439                /* ignore this entirely */
440                continue;
441             }
442 
443             for (int i = 0; i < array_size; i++)
444                b->shader->info.outputs_written |= 1ull << (var->data.location + i);
445          }
446             break;
447          case TGSI_FILE_CONSTANT:
448             var->data.mode = nir_var_uniform;
449             var->name = ralloc_asprintf(var, "uniform_%d", idx);
450             var->data.location = idx;
451             break;
452          default:
453             unreachable("bad declaration file");
454             return;
455          }
456 
457          nir_shader_add_variable(b->shader, var);
458 
459          if (is_array)
460             break;
461       }
462 
463    }
464 }
465 
466 static void
ttn_emit_immediate(struct ttn_compile * c)467 ttn_emit_immediate(struct ttn_compile *c)
468 {
469    nir_builder *b = &c->build;
470    struct tgsi_full_immediate *tgsi_imm = &c->token->FullImmediate;
471    nir_load_const_instr *load_const;
472    int i;
473 
474    load_const = nir_load_const_instr_create(b->shader, 4, 32);
475    c->imm_defs[c->next_imm] = &load_const->def;
476    c->next_imm++;
477 
478    for (i = 0; i < load_const->def.num_components; i++)
479       load_const->value[i].u32 = tgsi_imm->u[i].Uint;
480 
481    nir_builder_instr_insert(b, &load_const->instr);
482 }
483 
484 static nir_def *
485 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect);
486 
487 /* generate either a constant or indirect deref chain for accessing an
488  * array variable.
489  */
490 static nir_deref_instr *
ttn_array_deref(struct ttn_compile * c,nir_variable * var,unsigned offset,struct tgsi_ind_register * indirect)491 ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset,
492                 struct tgsi_ind_register *indirect)
493 {
494    nir_deref_instr *deref = nir_build_deref_var(&c->build, var);
495    nir_def *index = nir_imm_int(&c->build, offset);
496    if (indirect)
497       index = nir_iadd(&c->build, index, ttn_src_for_indirect(c, indirect));
498    return nir_build_deref_array(&c->build, deref, index);
499 }
500 
501 /* Special case: Turn the frontface varying into a load of the
502  * frontface variable, and create the vector as required by TGSI.
503  */
504 static nir_def *
ttn_emulate_tgsi_front_face(struct ttn_compile * c)505 ttn_emulate_tgsi_front_face(struct ttn_compile *c)
506 {
507    nir_def *tgsi_frontface[4];
508 
509    if (c->cap_face_is_sysval) {
510       /* When it's a system value, it should be an integer vector: (F, 0, 0, 1)
511        * F is 0xffffffff if front-facing, 0 if not.
512        */
513 
514       nir_def *frontface = nir_load_front_face(&c->build, 1);
515 
516       tgsi_frontface[0] = nir_bcsel(&c->build,
517                              frontface,
518                              nir_imm_int(&c->build, 0xffffffff),
519                              nir_imm_int(&c->build, 0));
520       tgsi_frontface[1] = nir_imm_int(&c->build, 0);
521       tgsi_frontface[2] = nir_imm_int(&c->build, 0);
522       tgsi_frontface[3] = nir_imm_int(&c->build, 1);
523    } else {
524       /* When it's an input, it should be a float vector: (F, 0.0, 0.0, 1.0)
525        * F is positive if front-facing, negative if not.
526        */
527 
528       assert(c->input_var_face);
529       nir_def *frontface = nir_load_var(&c->build, c->input_var_face);
530 
531       tgsi_frontface[0] = nir_bcsel(&c->build,
532                              frontface,
533                              nir_imm_float(&c->build, 1.0),
534                              nir_imm_float(&c->build, -1.0));
535       tgsi_frontface[1] = nir_imm_float(&c->build, 0.0);
536       tgsi_frontface[2] = nir_imm_float(&c->build, 0.0);
537       tgsi_frontface[3] = nir_imm_float(&c->build, 1.0);
538    }
539 
540    return nir_vec(&c->build, tgsi_frontface, 4);
541 }
542 
543 static nir_src
ttn_src_for_file_and_index(struct ttn_compile * c,unsigned file,unsigned index,struct tgsi_ind_register * indirect,struct tgsi_dimension * dim,struct tgsi_ind_register * dimind,bool src_is_float)544 ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
545                            struct tgsi_ind_register *indirect,
546                            struct tgsi_dimension *dim,
547                            struct tgsi_ind_register *dimind,
548                            bool src_is_float)
549 {
550    nir_builder *b = &c->build;
551    nir_src src;
552 
553    memset(&src, 0, sizeof(src));
554 
555    switch (file) {
556    case TGSI_FILE_TEMPORARY:
557       if (c->temp_regs[index].var) {
558          unsigned offset = c->temp_regs[index].offset;
559          nir_variable *var = c->temp_regs[index].var;
560          nir_def *load = nir_load_deref(&c->build,
561                ttn_array_deref(c, var, offset, indirect));
562 
563          src = nir_src_for_ssa(load);
564       } else {
565          assert(!indirect);
566          src = nir_src_for_ssa(nir_load_reg(b, c->temp_regs[index].reg));
567       }
568       assert(!dim);
569       break;
570 
571    case TGSI_FILE_ADDRESS:
572       src = nir_src_for_ssa(nir_load_reg(b, c->addr_reg));
573       assert(!dim);
574       break;
575 
576    case TGSI_FILE_IMMEDIATE:
577       src = nir_src_for_ssa(c->imm_defs[index]);
578       assert(!indirect);
579       assert(!dim);
580       break;
581 
582    case TGSI_FILE_SYSTEM_VALUE: {
583       nir_def *load;
584 
585       assert(!indirect);
586       assert(!dim);
587 
588       switch (c->scan->system_value_semantic_name[index]) {
589       case TGSI_SEMANTIC_VERTEXID_NOBASE:
590          load = nir_load_vertex_id_zero_base(b);
591          break;
592       case TGSI_SEMANTIC_VERTEXID:
593          load = nir_load_vertex_id(b);
594          break;
595       case TGSI_SEMANTIC_BASEVERTEX:
596          load = nir_load_base_vertex(b);
597          break;
598       case TGSI_SEMANTIC_INSTANCEID:
599          load = nir_load_instance_id(b);
600          break;
601       case TGSI_SEMANTIC_FACE:
602          assert(c->cap_face_is_sysval);
603          load = ttn_emulate_tgsi_front_face(c);
604          break;
605       case TGSI_SEMANTIC_POSITION:
606          assert(c->cap_position_is_sysval);
607          load = nir_load_frag_coord(b);
608          break;
609       case TGSI_SEMANTIC_PCOORD:
610          assert(c->cap_point_is_sysval);
611          load = nir_load_point_coord(b);
612          break;
613       case TGSI_SEMANTIC_THREAD_ID:
614          load = nir_load_local_invocation_id(b);
615          break;
616       case TGSI_SEMANTIC_BLOCK_ID:
617          load = nir_load_workgroup_id(b);
618          break;
619       case TGSI_SEMANTIC_BLOCK_SIZE:
620          load = nir_load_workgroup_size(b);
621          break;
622       case TGSI_SEMANTIC_CS_USER_DATA_AMD:
623          load = nir_load_user_data_amd(b);
624          break;
625       case TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL:
626          load = nir_load_tess_level_inner_default(b);
627          break;
628       case TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL:
629          load = nir_load_tess_level_outer_default(b);
630          break;
631       case TGSI_SEMANTIC_SAMPLEID:
632          load = nir_load_sample_id(b);
633          b->shader->info.fs.uses_sample_shading = true;
634          break;
635       default:
636          unreachable("bad system value");
637       }
638 
639       if (load->num_components == 2)
640          load = nir_swizzle(b, load, SWIZ(X, Y, Y, Y), 4);
641       else if (load->num_components == 3)
642          load = nir_swizzle(b, load, SWIZ(X, Y, Z, Z), 4);
643 
644       src = nir_src_for_ssa(load);
645       break;
646    }
647 
648    case TGSI_FILE_INPUT:
649       if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
650           c->scan->input_semantic_name[index] == TGSI_SEMANTIC_FACE) {
651          assert(!c->cap_face_is_sysval && c->input_var_face);
652          return nir_src_for_ssa(ttn_emulate_tgsi_front_face(c));
653       } else if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
654           c->scan->input_semantic_name[index] == TGSI_SEMANTIC_POSITION) {
655          assert(!c->cap_position_is_sysval && c->input_var_position);
656          return nir_src_for_ssa(nir_load_var(&c->build, c->input_var_position));
657       } else if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
658           c->scan->input_semantic_name[index] == TGSI_SEMANTIC_PCOORD) {
659          assert(!c->cap_point_is_sysval && c->input_var_point);
660          return nir_src_for_ssa(nir_load_var(&c->build, c->input_var_point));
661       } else {
662          /* Indirection on input arrays isn't supported by TTN. */
663          assert(!dim);
664          nir_deref_instr *deref = nir_build_deref_var(&c->build,
665                                                       c->inputs[index]);
666          return nir_src_for_ssa(nir_load_deref(&c->build, deref));
667       }
668       break;
669 
670    case TGSI_FILE_OUTPUT:
671       if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
672          c->outputs[index]->data.fb_fetch_output = 1;
673          nir_deref_instr *deref = nir_build_deref_var(&c->build,
674                                                       c->outputs[index]);
675          return nir_src_for_ssa(nir_load_deref(&c->build, deref));
676       }
677       unreachable("unsupported output read");
678       break;
679 
680    case TGSI_FILE_CONSTANT: {
681       nir_intrinsic_instr *load;
682       nir_intrinsic_op op;
683       unsigned srcn = 0;
684 
685       if (dim && (dim->Index > 0 || dim->Indirect)) {
686          op = nir_intrinsic_load_ubo;
687       } else {
688          op = nir_intrinsic_load_uniform;
689       }
690 
691       load = nir_intrinsic_instr_create(b->shader, op);
692       if (op == nir_intrinsic_load_uniform) {
693          nir_intrinsic_set_dest_type(load, src_is_float ? nir_type_float :
694                                                           nir_type_int);
695       }
696 
697       load->num_components = 4;
698       if (dim && (dim->Index > 0 || dim->Indirect)) {
699          if (dimind) {
700             load->src[srcn] =
701                ttn_src_for_file_and_index(c, dimind->File, dimind->Index,
702                                           NULL, NULL, NULL, false);
703          } else {
704             /* UBOs start at index 1 in TGSI: */
705             load->src[srcn] =
706                nir_src_for_ssa(nir_imm_int(b, dim->Index - 1));
707          }
708          srcn++;
709       }
710 
711       nir_def *offset;
712       if (op == nir_intrinsic_load_ubo) {
713          /* UBO loads don't have a base offset. */
714          offset = nir_imm_int(b, index);
715          if (indirect) {
716             offset = nir_iadd(b, offset, ttn_src_for_indirect(c, indirect));
717          }
718          /* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
719          offset = nir_ishl_imm(b, offset, 4);
720          nir_intrinsic_set_align(load, 16, 0);
721 
722          /* Set a very conservative base/range of the access: 16 bytes if not
723           * indirect at all, offset to the end of the UBO if the offset is
724           * indirect, and totally unknown if the block number is indirect.
725           */
726          uint32_t base = index * 16;
727          nir_intrinsic_set_range_base(load, base);
728          if (dimind)
729             nir_intrinsic_set_range(load, ~0);
730          else if (indirect)
731             nir_intrinsic_set_range(load, c->ubo_sizes[dim->Index] - base);
732          else
733             nir_intrinsic_set_range(load, base + 16);
734       } else {
735          nir_intrinsic_set_base(load, index);
736          if (indirect) {
737             offset = ttn_src_for_indirect(c, indirect);
738             nir_intrinsic_set_range(load, c->build.shader->num_uniforms * 16 - index);
739          } else {
740             offset = nir_imm_int(b, 0);
741             nir_intrinsic_set_range(load, 1);
742          }
743       }
744       load->src[srcn++] = nir_src_for_ssa(offset);
745 
746       nir_def_init(&load->instr, &load->def, 4, 32);
747       nir_builder_instr_insert(b, &load->instr);
748 
749       src = nir_src_for_ssa(&load->def);
750       break;
751    }
752 
753    default:
754       unreachable("bad src file");
755    }
756 
757 
758    return src;
759 }
760 
761 static nir_def *
ttn_src_for_indirect(struct ttn_compile * c,struct tgsi_ind_register * indirect)762 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect)
763 {
764    nir_builder *b = &c->build;
765    nir_alu_src src;
766    memset(&src, 0, sizeof(src));
767    for (int i = 0; i < 4; i++)
768       src.swizzle[i] = indirect->Swizzle;
769    src.src = ttn_src_for_file_and_index(c,
770                                         indirect->File,
771                                         indirect->Index,
772                                         NULL, NULL, NULL,
773                                         false);
774    return nir_mov_alu(b, src, 1);
775 }
776 
777 static nir_variable *
ttn_get_var(struct ttn_compile * c,struct tgsi_full_dst_register * tgsi_fdst)778 ttn_get_var(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
779 {
780    struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
781    unsigned index = tgsi_dst->Index;
782 
783    if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
784       /* we should not have an indirect when there is no var! */
785       if (!c->temp_regs[index].var)
786          assert(!tgsi_dst->Indirect);
787       return c->temp_regs[index].var;
788    }
789 
790    return NULL;
791 }
792 
793 static nir_def *
ttn_get_src(struct ttn_compile * c,struct tgsi_full_src_register * tgsi_fsrc,int src_idx)794 ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc,
795             int src_idx)
796 {
797    nir_builder *b = &c->build;
798    struct tgsi_src_register *tgsi_src = &tgsi_fsrc->Register;
799    enum tgsi_opcode opcode = c->token->FullInstruction.Instruction.Opcode;
800    unsigned tgsi_src_type = tgsi_opcode_infer_src_type(opcode, src_idx);
801    bool src_is_float = (tgsi_src_type == TGSI_TYPE_FLOAT ||
802                         tgsi_src_type == TGSI_TYPE_DOUBLE ||
803                         tgsi_src_type == TGSI_TYPE_UNTYPED);
804    nir_alu_src src;
805 
806    memset(&src, 0, sizeof(src));
807 
808    if (tgsi_src->File == TGSI_FILE_NULL) {
809       return nir_imm_float(b, 0.0);
810    } else if (tgsi_src->File == TGSI_FILE_SAMPLER ||
811               tgsi_src->File == TGSI_FILE_IMAGE ||
812               tgsi_src->File == TGSI_FILE_BUFFER) {
813       /* Only the index of the resource gets used in texturing, and it will
814        * handle looking that up on its own instead of using the nir_alu_src.
815        */
816       assert(!tgsi_src->Indirect);
817       return NULL;
818    } else {
819       struct tgsi_ind_register *ind = NULL;
820       struct tgsi_dimension *dim = NULL;
821       struct tgsi_ind_register *dimind = NULL;
822       if (tgsi_src->Indirect)
823          ind = &tgsi_fsrc->Indirect;
824       if (tgsi_src->Dimension) {
825          dim = &tgsi_fsrc->Dimension;
826          if (dim->Indirect)
827             dimind = &tgsi_fsrc->DimIndirect;
828       }
829       src.src = ttn_src_for_file_and_index(c,
830                                            tgsi_src->File,
831                                            tgsi_src->Index,
832                                            ind, dim, dimind,
833                                            src_is_float);
834    }
835 
836    src.swizzle[0] = tgsi_src->SwizzleX;
837    src.swizzle[1] = tgsi_src->SwizzleY;
838    src.swizzle[2] = tgsi_src->SwizzleZ;
839    src.swizzle[3] = tgsi_src->SwizzleW;
840 
841    nir_def *def = nir_mov_alu(b, src, 4);
842 
843    if (tgsi_type_is_64bit(tgsi_src_type))
844       def = nir_bitcast_vector(b, def, 64);
845 
846    if (tgsi_src->Absolute) {
847       assert(src_is_float);
848       def = nir_fabs(b, def);
849    }
850 
851    if (tgsi_src->Negate) {
852       if (src_is_float)
853          def = nir_fneg(b, def);
854       else
855          def = nir_ineg(b, def);
856    }
857 
858    return def;
859 }
860 
861 static nir_def *
ttn_alu(nir_builder * b,nir_op op,unsigned dest_bitsize,nir_def ** src)862 ttn_alu(nir_builder *b, nir_op op, unsigned dest_bitsize, nir_def **src)
863 {
864    nir_def *def = nir_build_alu_src_arr(b, op, src);
865    if (def->bit_size == 1)
866       def = nir_ineg(b, nir_b2iN(b, def, dest_bitsize));
867    assert(def->bit_size == dest_bitsize);
868    if (dest_bitsize == 64) {
869       /* Replicate before bitcasting, so we end up with 4x32 at the end */
870       if (def->num_components == 1)
871          def = nir_replicate(b, def, 2);
872 
873       if (def->num_components > 2) {
874          /* 32 -> 64 bit conversion ops are supposed to only convert the first
875           * two components, and we need to truncate here to avoid creating a
876           * vec8 after bitcasting the destination.
877           */
878          def = nir_trim_vector(b, def, 2);
879       }
880       def = nir_bitcast_vector(b, def, 32);
881    }
882    return def;
883 }
884 
885 /* EXP - Approximate Exponential Base 2
886  *  dst.x = 2^{\lfloor src.x\rfloor}
887  *  dst.y = src.x - \lfloor src.x\rfloor
888  *  dst.z = 2^{src.x}
889  *  dst.w = 1.0
890  */
891 static nir_def *
ttn_exp(nir_builder * b,nir_def ** src)892 ttn_exp(nir_builder *b, nir_def **src)
893 {
894    nir_def *srcx = ttn_channel(b, src[0], X);
895 
896    return nir_vec4(b, nir_fexp2(b, nir_ffloor(b, srcx)),
897                       nir_fsub(b, srcx, nir_ffloor(b, srcx)),
898                       nir_fexp2(b, srcx),
899                       nir_imm_float(b, 1.0));
900 }
901 
902 /* LOG - Approximate Logarithm Base 2
903  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
904  *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
905  *  dst.z = \log_2{|src.x|}
906  *  dst.w = 1.0
907  */
908 static nir_def *
ttn_log(nir_builder * b,nir_def ** src)909 ttn_log(nir_builder *b, nir_def **src)
910 {
911    nir_def *abs_srcx = nir_fabs(b, ttn_channel(b, src[0], X));
912    nir_def *log2 = nir_flog2(b, abs_srcx);
913 
914    return nir_vec4(b, nir_ffloor(b, log2),
915                       nir_fdiv(b, abs_srcx, nir_fexp2(b, nir_ffloor(b, log2))),
916                       nir_flog2(b, abs_srcx),
917                       nir_imm_float(b, 1.0));
918 }
919 
920 /* DST - Distance Vector
921  *   dst.x = 1.0
922  *   dst.y = src0.y \times src1.y
923  *   dst.z = src0.z
924  *   dst.w = src1.w
925  */
926 static nir_def *
ttn_dst(nir_builder * b,nir_def ** src)927 ttn_dst(nir_builder *b, nir_def **src)
928 {
929    return nir_vec4(b, nir_imm_float(b, 1.0),
930                       nir_fmul(b, ttn_channel(b, src[0], Y),
931                                   ttn_channel(b, src[1], Y)),
932                       ttn_channel(b, src[0], Z),
933                       ttn_channel(b, src[1], W));
934 }
935 
936 /* LIT - Light Coefficients
937  *  dst.x = 1.0
938  *  dst.y = max(src.x, 0.0)
939  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
940  *  dst.w = 1.0
941  */
942 static nir_def *
ttn_lit(nir_builder * b,nir_def ** src)943 ttn_lit(nir_builder *b, nir_def **src)
944 {
945    nir_def *src0_y = ttn_channel(b, src[0], Y);
946    nir_def *wclamp = nir_fmax(b, nir_fmin(b, ttn_channel(b, src[0], W),
947                                               nir_imm_float(b, 128.0)),
948                                   nir_imm_float(b, -128.0));
949    nir_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
950                                wclamp);
951    nir_def *z = nir_bcsel(b, nir_flt_imm(b, ttn_channel(b, src[0], X), 0.0),
952                                  nir_imm_float(b, 0.0), pow);
953 
954    return nir_vec4(b, nir_imm_float(b, 1.0),
955                       nir_fmax(b, ttn_channel(b, src[0], X),
956                                   nir_imm_float(b, 0.0)),
957                       z, nir_imm_float(b, 1.0));
958 }
959 
960 static void
ttn_barrier(nir_builder * b)961 ttn_barrier(nir_builder *b)
962 {
963    nir_barrier(b, .execution_scope = SCOPE_WORKGROUP);
964 }
965 
966 static void
ttn_kill(nir_builder * b)967 ttn_kill(nir_builder *b)
968 {
969    nir_discard(b);
970    b->shader->info.fs.uses_discard = true;
971 }
972 
973 static void
ttn_kill_if(nir_builder * b,nir_def ** src)974 ttn_kill_if(nir_builder *b, nir_def **src)
975 {
976    /* flt must be exact, because NaN shouldn't discard. (apps rely on this) */
977    b->exact = true;
978    nir_def *cmp = nir_bany(b, nir_flt_imm(b, src[0], 0.0));
979    b->exact = false;
980 
981    nir_discard_if(b, cmp);
982    b->shader->info.fs.uses_discard = true;
983 }
984 
985 static void
get_texture_info(unsigned texture,enum glsl_sampler_dim * dim,bool * is_shadow,bool * is_array)986 get_texture_info(unsigned texture,
987                  enum glsl_sampler_dim *dim,
988                  bool *is_shadow,
989                  bool *is_array)
990 {
991    assert(is_array);
992    *is_array = false;
993 
994    if (is_shadow)
995       *is_shadow = false;
996 
997    switch (texture) {
998    case TGSI_TEXTURE_BUFFER:
999       *dim = GLSL_SAMPLER_DIM_BUF;
1000       break;
1001    case TGSI_TEXTURE_1D:
1002       *dim = GLSL_SAMPLER_DIM_1D;
1003       break;
1004    case TGSI_TEXTURE_1D_ARRAY:
1005       *dim = GLSL_SAMPLER_DIM_1D;
1006       *is_array = true;
1007       break;
1008    case TGSI_TEXTURE_SHADOW1D:
1009       *dim = GLSL_SAMPLER_DIM_1D;
1010       *is_shadow = true;
1011       break;
1012    case TGSI_TEXTURE_SHADOW1D_ARRAY:
1013       *dim = GLSL_SAMPLER_DIM_1D;
1014       *is_shadow = true;
1015       *is_array = true;
1016       break;
1017    case TGSI_TEXTURE_2D:
1018       *dim = GLSL_SAMPLER_DIM_2D;
1019       break;
1020    case TGSI_TEXTURE_2D_ARRAY:
1021       *dim = GLSL_SAMPLER_DIM_2D;
1022       *is_array = true;
1023       break;
1024    case TGSI_TEXTURE_2D_MSAA:
1025       *dim = GLSL_SAMPLER_DIM_MS;
1026       break;
1027    case TGSI_TEXTURE_2D_ARRAY_MSAA:
1028       *dim = GLSL_SAMPLER_DIM_MS;
1029       *is_array = true;
1030       break;
1031    case TGSI_TEXTURE_SHADOW2D:
1032       *dim = GLSL_SAMPLER_DIM_2D;
1033       *is_shadow = true;
1034       break;
1035    case TGSI_TEXTURE_SHADOW2D_ARRAY:
1036       *dim = GLSL_SAMPLER_DIM_2D;
1037       *is_shadow = true;
1038       *is_array = true;
1039       break;
1040    case TGSI_TEXTURE_3D:
1041       *dim = GLSL_SAMPLER_DIM_3D;
1042       break;
1043    case TGSI_TEXTURE_CUBE:
1044       *dim = GLSL_SAMPLER_DIM_CUBE;
1045       break;
1046    case TGSI_TEXTURE_CUBE_ARRAY:
1047       *dim = GLSL_SAMPLER_DIM_CUBE;
1048       *is_array = true;
1049       break;
1050    case TGSI_TEXTURE_SHADOWCUBE:
1051       *dim = GLSL_SAMPLER_DIM_CUBE;
1052       *is_shadow = true;
1053       break;
1054    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1055       *dim = GLSL_SAMPLER_DIM_CUBE;
1056       *is_shadow = true;
1057       *is_array = true;
1058       break;
1059    case TGSI_TEXTURE_RECT:
1060       *dim = GLSL_SAMPLER_DIM_RECT;
1061       break;
1062    case TGSI_TEXTURE_SHADOWRECT:
1063       *dim = GLSL_SAMPLER_DIM_RECT;
1064       *is_shadow = true;
1065       break;
1066    default:
1067       fprintf(stderr, "Unknown TGSI texture target %d\n", texture);
1068       abort();
1069    }
1070 }
1071 
1072 static enum glsl_base_type
base_type_for_alu_type(nir_alu_type type)1073 base_type_for_alu_type(nir_alu_type type)
1074 {
1075    type = nir_alu_type_get_base_type(type);
1076 
1077    switch (type) {
1078    case nir_type_float:
1079       return GLSL_TYPE_FLOAT;
1080    case nir_type_int:
1081       return GLSL_TYPE_INT;
1082    case nir_type_uint:
1083       return GLSL_TYPE_UINT;
1084    default:
1085       unreachable("invalid type");
1086    }
1087 }
1088 
1089 static nir_variable *
get_sampler_var(struct ttn_compile * c,int binding,enum glsl_sampler_dim dim,bool is_shadow,bool is_array,enum glsl_base_type base_type,nir_texop op)1090 get_sampler_var(struct ttn_compile *c, int binding,
1091                 enum glsl_sampler_dim dim,
1092                 bool is_shadow,
1093                 bool is_array,
1094                 enum glsl_base_type base_type,
1095                 nir_texop op)
1096 {
1097    nir_variable *var = c->samplers[binding];
1098    if (!var) {
1099       const struct glsl_type *type =
1100          glsl_sampler_type(dim, is_shadow, is_array, base_type);
1101       var = nir_variable_create(c->build.shader, nir_var_uniform, type,
1102                                 "sampler");
1103       var->data.binding = binding;
1104       var->data.explicit_binding = true;
1105 
1106       c->samplers[binding] = var;
1107       c->num_samplers = MAX2(c->num_samplers, binding + 1);
1108 
1109       /* Record textures used */
1110       BITSET_SET(c->build.shader->info.textures_used, binding);
1111       if (op == nir_texop_txf || op == nir_texop_txf_ms)
1112          BITSET_SET(c->build.shader->info.textures_used_by_txf, binding);
1113       BITSET_SET(c->build.shader->info.samplers_used, binding);
1114    }
1115 
1116    return var;
1117 }
1118 
1119 static nir_variable *
get_image_var(struct ttn_compile * c,int binding,enum glsl_sampler_dim dim,bool is_array,enum glsl_base_type base_type,enum gl_access_qualifier access,enum pipe_format format)1120 get_image_var(struct ttn_compile *c, int binding,
1121               enum glsl_sampler_dim dim,
1122               bool is_array,
1123               enum glsl_base_type base_type,
1124               enum gl_access_qualifier access,
1125               enum pipe_format format)
1126 {
1127    nir_variable *var = c->images[binding];
1128 
1129    if (!var) {
1130       const struct glsl_type *type = glsl_image_type(dim, is_array, base_type);
1131 
1132       var = nir_variable_create(c->build.shader, nir_var_image, type, "image");
1133       var->data.binding = binding;
1134       var->data.explicit_binding = true;
1135       var->data.access = access;
1136       var->data.image.format = format;
1137 
1138       c->images[binding] = var;
1139       c->num_images = MAX2(c->num_images, binding + 1);
1140       if (dim == GLSL_SAMPLER_DIM_MS)
1141          c->num_msaa_images = c->num_images;
1142    }
1143 
1144    return var;
1145 }
1146 
1147 static void
add_ssbo_var(struct ttn_compile * c,int binding)1148 add_ssbo_var(struct ttn_compile *c, int binding)
1149 {
1150    nir_variable *var = c->ssbo[binding];
1151 
1152    if (!var) {
1153       /* A length of 0 is used to denote unsized arrays */
1154       const struct glsl_type *type = glsl_array_type(glsl_uint_type(), 0, 0);
1155 
1156       struct glsl_struct_field field = {
1157             .type = type,
1158             .name = "data",
1159             .location = -1,
1160       };
1161 
1162       var = nir_variable_create(c->build.shader, nir_var_mem_ssbo, type, "ssbo");
1163       var->data.binding = binding;
1164       var->interface_type =
1165          glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430,
1166                              false, "data");
1167       c->ssbo[binding] = var;
1168    }
1169 }
1170 
1171 static nir_def *
ttn_tex(struct ttn_compile * c,nir_def ** src)1172 ttn_tex(struct ttn_compile *c, nir_def **src)
1173 {
1174    nir_builder *b = &c->build;
1175    struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1176    nir_tex_instr *instr;
1177    nir_texop op;
1178    unsigned num_srcs, samp = 1, sview, i;
1179 
1180    switch (tgsi_inst->Instruction.Opcode) {
1181    case TGSI_OPCODE_TEX:
1182       op = nir_texop_tex;
1183       num_srcs = 1;
1184       break;
1185    case TGSI_OPCODE_TEX2:
1186       op = nir_texop_tex;
1187       num_srcs = 1;
1188       samp = 2;
1189       break;
1190    case TGSI_OPCODE_TXP:
1191       op = nir_texop_tex;
1192       num_srcs = 2;
1193       break;
1194    case TGSI_OPCODE_TXB:
1195       op = nir_texop_txb;
1196       num_srcs = 2;
1197       break;
1198    case TGSI_OPCODE_TXB2:
1199       op = nir_texop_txb;
1200       num_srcs = 2;
1201       samp = 2;
1202       break;
1203    case TGSI_OPCODE_TXL:
1204    case TGSI_OPCODE_TEX_LZ:
1205       op = nir_texop_txl;
1206       num_srcs = 2;
1207       break;
1208    case TGSI_OPCODE_TXL2:
1209       op = nir_texop_txl;
1210       num_srcs = 2;
1211       samp = 2;
1212       break;
1213    case TGSI_OPCODE_TXF:
1214    case TGSI_OPCODE_TXF_LZ:
1215       if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
1216           tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) {
1217          op = nir_texop_txf_ms;
1218       } else {
1219          op = nir_texop_txf;
1220       }
1221       num_srcs = 2;
1222       break;
1223    case TGSI_OPCODE_TXD:
1224       op = nir_texop_txd;
1225       num_srcs = 3;
1226       samp = 3;
1227       break;
1228    case TGSI_OPCODE_LODQ:
1229       op = nir_texop_lod;
1230       num_srcs = 1;
1231       break;
1232 
1233    default:
1234       fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode);
1235       abort();
1236    }
1237 
1238    if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
1239        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
1240        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
1241        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
1242        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
1243        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
1244        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1245       num_srcs++;
1246    }
1247 
1248    /* Deref sources */
1249    num_srcs += 2;
1250 
1251    num_srcs += tgsi_inst->Texture.NumOffsets;
1252 
1253    instr = nir_tex_instr_create(b->shader, num_srcs);
1254    instr->op = op;
1255 
1256    get_texture_info(tgsi_inst->Texture.Texture,
1257                     &instr->sampler_dim, &instr->is_shadow, &instr->is_array);
1258 
1259    instr->coord_components =
1260       glsl_get_sampler_dim_coordinate_components(instr->sampler_dim);
1261 
1262    if (instr->is_array)
1263       instr->coord_components++;
1264 
1265    assert(tgsi_inst->Src[samp].Register.File == TGSI_FILE_SAMPLER);
1266 
1267    /* TODO if we supported any opc's which take an explicit SVIEW
1268     * src, we would use that here instead.  But for the "legacy"
1269     * texture opc's the SVIEW index is same as SAMP index:
1270     */
1271    sview = tgsi_inst->Src[samp].Register.Index;
1272 
1273    nir_alu_type sampler_type =
1274       sview < c->num_samp_types ? c->samp_types[sview] : nir_type_float32;
1275 
1276    if (op == nir_texop_lod) {
1277       instr->dest_type = nir_type_float32;
1278    } else {
1279       instr->dest_type = sampler_type;
1280    }
1281 
1282    nir_variable *var =
1283       get_sampler_var(c, sview, instr->sampler_dim,
1284                       instr->is_shadow,
1285                       instr->is_array,
1286                       base_type_for_alu_type(sampler_type),
1287                       op);
1288 
1289    nir_deref_instr *deref = nir_build_deref_var(b, var);
1290 
1291    unsigned src_number = 0;
1292 
1293    instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
1294                                                 &deref->def);
1295    src_number++;
1296    instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref,
1297                                                 &deref->def);
1298    src_number++;
1299 
1300    instr->src[src_number] =
1301       nir_tex_src_for_ssa(nir_tex_src_coord,
1302                           nir_trim_vector(b, src[0], instr->coord_components));
1303    src_number++;
1304 
1305    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1306       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_projector,
1307                                                    ttn_channel(b, src[0], W));
1308       src_number++;
1309    }
1310 
1311    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB) {
1312       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_bias,
1313                                                    ttn_channel(b, src[0], W));
1314       src_number++;
1315    }
1316 
1317    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
1318       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_bias,
1319                                                    ttn_channel(b, src[1], X));
1320       src_number++;
1321    }
1322 
1323    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
1324        tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
1325       if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ)
1326          instr->src[src_number].src = nir_src_for_ssa(nir_imm_int(b, 0));
1327       else
1328          instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1329       instr->src[src_number].src_type = nir_tex_src_lod;
1330       src_number++;
1331    }
1332 
1333    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
1334       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_lod,
1335                                                    ttn_channel(b, src[1], X));
1336       src_number++;
1337    }
1338 
1339    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF ||
1340        tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF_LZ) {
1341       if (op == nir_texop_txf_ms) {
1342          instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
1343                                                       ttn_channel(b, src[0], W));
1344       } else {
1345          if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF_LZ)
1346             instr->src[src_number].src = nir_src_for_ssa(nir_imm_int(b, 0));
1347          else
1348             instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1349          instr->src[src_number].src_type = nir_tex_src_lod;
1350       }
1351       src_number++;
1352    }
1353 
1354    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1355       instr->src[src_number] =
1356          nir_tex_src_for_ssa(nir_tex_src_ddx,
1357                nir_trim_vector(b, src[1], nir_tex_instr_src_size(instr, src_number)));
1358       src_number++;
1359       instr->src[src_number] =
1360          nir_tex_src_for_ssa(nir_tex_src_ddy,
1361                nir_trim_vector(b, src[2], nir_tex_instr_src_size(instr, src_number)));
1362       src_number++;
1363    }
1364 
1365    if (instr->is_shadow) {
1366       if (instr->coord_components == 4)
1367          instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1368       else if (instr->coord_components == 3)
1369          instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1370       else
1371          instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
1372 
1373       instr->src[src_number].src_type = nir_tex_src_comparator;
1374       src_number++;
1375    }
1376 
1377    for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++) {
1378       struct tgsi_texture_offset *tex_offset = &tgsi_inst->TexOffsets[i];
1379       /* since TexOffset ins't using tgsi_full_src_register we get to
1380        * do some extra gymnastics:
1381        */
1382       nir_alu_src src;
1383 
1384       memset(&src, 0, sizeof(src));
1385 
1386       src.src = ttn_src_for_file_and_index(c,
1387                                            tex_offset->File,
1388                                            tex_offset->Index,
1389                                            NULL, NULL, NULL,
1390                                            true);
1391 
1392       src.swizzle[0] = tex_offset->SwizzleX;
1393       src.swizzle[1] = tex_offset->SwizzleY;
1394       src.swizzle[2] = tex_offset->SwizzleZ;
1395       src.swizzle[3] = TGSI_SWIZZLE_W;
1396 
1397       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_offset,
1398                                                    nir_mov_alu(b, src, nir_tex_instr_src_size(instr, src_number)));
1399       src_number++;
1400    }
1401 
1402    assert(src_number == num_srcs);
1403    assert(src_number == instr->num_srcs);
1404 
1405    nir_def_init(&instr->instr, &instr->def,
1406                 nir_tex_instr_dest_size(instr), 32);
1407    nir_builder_instr_insert(b, &instr->instr);
1408    return nir_pad_vector_imm_int(b, &instr->def, 0, 4);
1409 }
1410 
1411 /* TGSI_OPCODE_TXQ is actually two distinct operations:
1412  *
1413  *     dst.x = texture\_width(unit, lod)
1414  *     dst.y = texture\_height(unit, lod)
1415  *     dst.z = texture\_depth(unit, lod)
1416  *     dst.w = texture\_levels(unit)
1417  *
1418  * dst.xyz map to NIR txs opcode, and dst.w maps to query_levels
1419  */
1420 static nir_def *
ttn_txq(struct ttn_compile * c,nir_def ** src)1421 ttn_txq(struct ttn_compile *c, nir_def **src)
1422 {
1423    nir_builder *b = &c->build;
1424    struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1425    nir_tex_instr *txs, *qlv;
1426 
1427    txs = nir_tex_instr_create(b->shader, 2);
1428    txs->op = nir_texop_txs;
1429    txs->dest_type = nir_type_uint32;
1430    get_texture_info(tgsi_inst->Texture.Texture,
1431                     &txs->sampler_dim, &txs->is_shadow, &txs->is_array);
1432 
1433    qlv = nir_tex_instr_create(b->shader, 1);
1434    qlv->op = nir_texop_query_levels;
1435    qlv->dest_type = nir_type_uint32;
1436    get_texture_info(tgsi_inst->Texture.Texture,
1437                     &qlv->sampler_dim, &qlv->is_shadow, &qlv->is_array);
1438 
1439    assert(tgsi_inst->Src[1].Register.File == TGSI_FILE_SAMPLER);
1440    int sview = tgsi_inst->Src[1].Register.Index;
1441 
1442    nir_alu_type sampler_type =
1443       sview < c->num_samp_types ? c->samp_types[sview] : nir_type_float32;
1444 
1445    nir_variable *var =
1446       get_sampler_var(c, sview, txs->sampler_dim,
1447                       txs->is_shadow,
1448                       txs->is_array,
1449                       base_type_for_alu_type(sampler_type),
1450                       nir_texop_txs);
1451 
1452    nir_deref_instr *deref = nir_build_deref_var(b, var);
1453 
1454    txs->src[0] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
1455                                      &deref->def);
1456 
1457    qlv->src[0] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
1458                                      &deref->def);
1459 
1460    /* lod: */
1461    txs->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod,
1462                                      ttn_channel(b, src[0], X));
1463 
1464    nir_def_init(&txs->instr, &txs->def, nir_tex_instr_dest_size(txs), 32);
1465    nir_builder_instr_insert(b, &txs->instr);
1466 
1467    nir_def_init(&qlv->instr, &qlv->def, 1, 32);
1468    nir_builder_instr_insert(b, &qlv->instr);
1469 
1470    return nir_vector_insert_imm(b,
1471                                 nir_pad_vector_imm_int(b, &txs->def, 0, 4),
1472                                 &qlv->def, 3);
1473 }
1474 
1475 static enum glsl_base_type
get_image_base_type(struct tgsi_full_instruction * tgsi_inst)1476 get_image_base_type(struct tgsi_full_instruction *tgsi_inst)
1477 {
1478    const struct util_format_description *desc =
1479       util_format_description(tgsi_inst->Memory.Format);
1480 
1481    if (desc->channel[0].pure_integer) {
1482       if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
1483          return GLSL_TYPE_INT;
1484       else
1485          return GLSL_TYPE_UINT;
1486    }
1487    return GLSL_TYPE_FLOAT;
1488 }
1489 
1490 static enum gl_access_qualifier
get_mem_qualifier(struct tgsi_full_instruction * tgsi_inst)1491 get_mem_qualifier(struct tgsi_full_instruction *tgsi_inst)
1492 {
1493    enum gl_access_qualifier access = 0;
1494 
1495    if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_COHERENT)
1496       access |= ACCESS_COHERENT;
1497    if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT)
1498       access |= ACCESS_RESTRICT;
1499    if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
1500       access |= ACCESS_VOLATILE;
1501    if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY)
1502       access |= ACCESS_NON_TEMPORAL;
1503 
1504    return access;
1505 }
1506 
1507 static nir_def *
ttn_mem(struct ttn_compile * c,nir_def ** src)1508 ttn_mem(struct ttn_compile *c, nir_def **src)
1509 {
1510    nir_builder *b = &c->build;
1511    struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1512    nir_intrinsic_instr *instr = NULL;
1513    unsigned resource_index, addr_src_index, file;
1514 
1515    switch (tgsi_inst->Instruction.Opcode) {
1516    case TGSI_OPCODE_LOAD:
1517       assert(!tgsi_inst->Src[0].Register.Indirect);
1518       resource_index = tgsi_inst->Src[0].Register.Index;
1519       file = tgsi_inst->Src[0].Register.File;
1520       addr_src_index = 1;
1521       break;
1522    case TGSI_OPCODE_STORE:
1523       assert(!tgsi_inst->Dst[0].Register.Indirect);
1524       resource_index = tgsi_inst->Dst[0].Register.Index;
1525       file = tgsi_inst->Dst[0].Register.File;
1526       addr_src_index = 0;
1527       break;
1528    default:
1529       unreachable("unexpected memory opcode");
1530    }
1531 
1532    if (file == TGSI_FILE_BUFFER) {
1533       nir_intrinsic_op op;
1534 
1535       switch (tgsi_inst->Instruction.Opcode) {
1536       case TGSI_OPCODE_LOAD:
1537          op = nir_intrinsic_load_ssbo;
1538          break;
1539       case TGSI_OPCODE_STORE:
1540          op = nir_intrinsic_store_ssbo;
1541          break;
1542       default:
1543          unreachable("unexpected buffer opcode");
1544       }
1545 
1546       add_ssbo_var(c, resource_index);
1547 
1548       instr = nir_intrinsic_instr_create(b->shader, op);
1549       instr->num_components = util_last_bit(tgsi_inst->Dst[0].Register.WriteMask);
1550       nir_intrinsic_set_access(instr, get_mem_qualifier(tgsi_inst));
1551       nir_intrinsic_set_align(instr, 4, 0);
1552 
1553       unsigned i = 0;
1554       if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE)
1555          instr->src[i++] = nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W),
1556                                                        instr->num_components));
1557       instr->src[i++] = nir_src_for_ssa(nir_imm_int(b, resource_index));
1558       instr->src[i++] = nir_src_for_ssa(ttn_channel(b, src[addr_src_index], X));
1559 
1560       if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE)
1561          nir_intrinsic_set_write_mask(instr, tgsi_inst->Dst[0].Register.WriteMask);
1562 
1563    } else if (file == TGSI_FILE_IMAGE) {
1564       nir_intrinsic_op op;
1565 
1566       switch (tgsi_inst->Instruction.Opcode) {
1567       case TGSI_OPCODE_LOAD:
1568          op = nir_intrinsic_image_deref_load;
1569          break;
1570       case TGSI_OPCODE_STORE:
1571          op = nir_intrinsic_image_deref_store;
1572          break;
1573       default:
1574          unreachable("unexpected file opcode");
1575       }
1576 
1577       instr = nir_intrinsic_instr_create(b->shader, op);
1578 
1579       /* Set the image variable dereference. */
1580       enum glsl_sampler_dim dim;
1581       bool is_array;
1582       get_texture_info(tgsi_inst->Memory.Texture, &dim, NULL, &is_array);
1583 
1584       enum glsl_base_type base_type = get_image_base_type(tgsi_inst);
1585       enum gl_access_qualifier access = get_mem_qualifier(tgsi_inst);
1586 
1587       nir_variable *image =
1588          get_image_var(c, resource_index,
1589                        dim, is_array, base_type, access,
1590                        tgsi_inst->Memory.Format);
1591       nir_deref_instr *image_deref = nir_build_deref_var(b, image);
1592       const struct glsl_type *type = image_deref->type;
1593 
1594       nir_intrinsic_set_access(instr, image_deref->var->data.access);
1595 
1596       instr->src[0] = nir_src_for_ssa(&image_deref->def);
1597       instr->src[1] = nir_src_for_ssa(src[addr_src_index]);
1598 
1599       /* Set the sample argument, which is undefined for single-sample images. */
1600       if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS) {
1601          instr->src[2] = nir_src_for_ssa(ttn_channel(b, src[addr_src_index], W));
1602       } else {
1603          instr->src[2] = nir_src_for_ssa(nir_undef(b, 1, 32));
1604       }
1605 
1606       if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_LOAD) {
1607          instr->src[3] = nir_src_for_ssa(nir_imm_int(b, 0)); /* LOD */
1608       }
1609 
1610       unsigned num_components = util_last_bit(tgsi_inst->Dst[0].Register.WriteMask);
1611 
1612       if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE) {
1613          instr->src[3] = nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W),
1614                                                      num_components));
1615          instr->src[4] = nir_src_for_ssa(nir_imm_int(b, 0)); /* LOD */
1616       }
1617 
1618       instr->num_components = num_components;
1619    } else {
1620       unreachable("unexpected file");
1621    }
1622 
1623 
1624    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_LOAD) {
1625       nir_def_init(&instr->instr, &instr->def, instr->num_components, 32);
1626       nir_builder_instr_insert(b, &instr->instr);
1627       return nir_pad_vector_imm_int(b, &instr->def, 0, 4);
1628    } else {
1629       nir_builder_instr_insert(b, &instr->instr);
1630       return NULL;
1631    }
1632 }
1633 
1634 static const nir_op op_trans[TGSI_OPCODE_LAST] = {
1635    [TGSI_OPCODE_ARL] = 0,
1636    [TGSI_OPCODE_MOV] = nir_op_mov,
1637    [TGSI_OPCODE_FBFETCH] = nir_op_mov,
1638    [TGSI_OPCODE_LIT] = 0,
1639    [TGSI_OPCODE_RCP] = nir_op_frcp,
1640    [TGSI_OPCODE_RSQ] = nir_op_frsq,
1641    [TGSI_OPCODE_EXP] = 0,
1642    [TGSI_OPCODE_LOG] = 0,
1643    [TGSI_OPCODE_MUL] = nir_op_fmul,
1644    [TGSI_OPCODE_ADD] = nir_op_fadd,
1645    [TGSI_OPCODE_DP3] = 0,
1646    [TGSI_OPCODE_DP4] = 0,
1647    [TGSI_OPCODE_DST] = 0,
1648    [TGSI_OPCODE_MIN] = nir_op_fmin,
1649    [TGSI_OPCODE_MAX] = nir_op_fmax,
1650    [TGSI_OPCODE_SLT] = nir_op_slt,
1651    [TGSI_OPCODE_SGE] = nir_op_sge,
1652    [TGSI_OPCODE_MAD] = nir_op_ffma,
1653    [TGSI_OPCODE_TEX_LZ] = 0,
1654    [TGSI_OPCODE_LRP] = 0,
1655    [TGSI_OPCODE_SQRT] = nir_op_fsqrt,
1656    [TGSI_OPCODE_FRC] = nir_op_ffract,
1657    [TGSI_OPCODE_TXF_LZ] = 0,
1658    [TGSI_OPCODE_FLR] = nir_op_ffloor,
1659    [TGSI_OPCODE_ROUND] = nir_op_fround_even,
1660    [TGSI_OPCODE_EX2] = nir_op_fexp2,
1661    [TGSI_OPCODE_LG2] = nir_op_flog2,
1662    [TGSI_OPCODE_POW] = nir_op_fpow,
1663    [TGSI_OPCODE_COS] = nir_op_fcos,
1664    [TGSI_OPCODE_DDX] = nir_op_fddx,
1665    [TGSI_OPCODE_DDY] = nir_op_fddy,
1666    [TGSI_OPCODE_KILL] = 0,
1667    [TGSI_OPCODE_PK2H] = 0, /* XXX */
1668    [TGSI_OPCODE_PK2US] = 0, /* XXX */
1669    [TGSI_OPCODE_PK4B] = 0, /* XXX */
1670    [TGSI_OPCODE_PK4UB] = 0, /* XXX */
1671    [TGSI_OPCODE_SEQ] = nir_op_seq,
1672    [TGSI_OPCODE_SGT] = 0,
1673    [TGSI_OPCODE_SIN] = nir_op_fsin,
1674    [TGSI_OPCODE_SNE] = nir_op_sne,
1675    [TGSI_OPCODE_SLE] = 0,
1676    [TGSI_OPCODE_TEX] = 0,
1677    [TGSI_OPCODE_TXD] = 0,
1678    [TGSI_OPCODE_TXP] = 0,
1679    [TGSI_OPCODE_UP2H] = 0, /* XXX */
1680    [TGSI_OPCODE_UP2US] = 0, /* XXX */
1681    [TGSI_OPCODE_UP4B] = 0, /* XXX */
1682    [TGSI_OPCODE_UP4UB] = 0, /* XXX */
1683    [TGSI_OPCODE_ARR] = 0,
1684 
1685    /* No function calls, yet. */
1686    [TGSI_OPCODE_CAL] = 0, /* XXX */
1687    [TGSI_OPCODE_RET] = 0, /* XXX */
1688 
1689    [TGSI_OPCODE_SSG] = nir_op_fsign,
1690    [TGSI_OPCODE_CMP] = 0,
1691    [TGSI_OPCODE_TXB] = 0,
1692    [TGSI_OPCODE_DIV] = nir_op_fdiv,
1693    [TGSI_OPCODE_DP2] = 0,
1694    [TGSI_OPCODE_TXL] = 0,
1695 
1696    [TGSI_OPCODE_BRK] = 0,
1697    [TGSI_OPCODE_IF] = 0,
1698    [TGSI_OPCODE_UIF] = 0,
1699    [TGSI_OPCODE_ELSE] = 0,
1700    [TGSI_OPCODE_ENDIF] = 0,
1701 
1702    [TGSI_OPCODE_DDX_FINE] = nir_op_fddx_fine,
1703    [TGSI_OPCODE_DDY_FINE] = nir_op_fddy_fine,
1704 
1705    [TGSI_OPCODE_CEIL] = nir_op_fceil,
1706    [TGSI_OPCODE_I2F] = nir_op_i2f32,
1707    [TGSI_OPCODE_NOT] = nir_op_inot,
1708    [TGSI_OPCODE_TRUNC] = nir_op_ftrunc,
1709    [TGSI_OPCODE_SHL] = nir_op_ishl,
1710    [TGSI_OPCODE_AND] = nir_op_iand,
1711    [TGSI_OPCODE_OR] = nir_op_ior,
1712    [TGSI_OPCODE_MOD] = nir_op_umod,
1713    [TGSI_OPCODE_XOR] = nir_op_ixor,
1714    [TGSI_OPCODE_TXF] = 0,
1715    [TGSI_OPCODE_TXQ] = 0,
1716 
1717    [TGSI_OPCODE_CONT] = 0,
1718 
1719    [TGSI_OPCODE_EMIT] = 0, /* XXX */
1720    [TGSI_OPCODE_ENDPRIM] = 0, /* XXX */
1721 
1722    [TGSI_OPCODE_BGNLOOP] = 0,
1723    [TGSI_OPCODE_BGNSUB] = 0, /* XXX: no function calls */
1724    [TGSI_OPCODE_ENDLOOP] = 0,
1725    [TGSI_OPCODE_ENDSUB] = 0, /* XXX: no function calls */
1726 
1727    [TGSI_OPCODE_NOP] = 0,
1728    [TGSI_OPCODE_FSEQ] = nir_op_feq,
1729    [TGSI_OPCODE_FSGE] = nir_op_fge,
1730    [TGSI_OPCODE_FSLT] = nir_op_flt,
1731    [TGSI_OPCODE_FSNE] = nir_op_fneu,
1732 
1733    [TGSI_OPCODE_KILL_IF] = 0,
1734 
1735    [TGSI_OPCODE_END] = 0,
1736 
1737    [TGSI_OPCODE_F2I] = nir_op_f2i32,
1738    [TGSI_OPCODE_IDIV] = nir_op_idiv,
1739    [TGSI_OPCODE_IMAX] = nir_op_imax,
1740    [TGSI_OPCODE_IMIN] = nir_op_imin,
1741    [TGSI_OPCODE_INEG] = nir_op_ineg,
1742    [TGSI_OPCODE_ISGE] = nir_op_ige,
1743    [TGSI_OPCODE_ISHR] = nir_op_ishr,
1744    [TGSI_OPCODE_ISLT] = nir_op_ilt,
1745    [TGSI_OPCODE_F2U] = nir_op_f2u32,
1746    [TGSI_OPCODE_U2F] = nir_op_u2f32,
1747    [TGSI_OPCODE_UADD] = nir_op_iadd,
1748    [TGSI_OPCODE_UDIV] = nir_op_udiv,
1749    [TGSI_OPCODE_UMAD] = 0,
1750    [TGSI_OPCODE_UMAX] = nir_op_umax,
1751    [TGSI_OPCODE_UMIN] = nir_op_umin,
1752    [TGSI_OPCODE_UMOD] = nir_op_umod,
1753    [TGSI_OPCODE_UMUL] = nir_op_imul,
1754    [TGSI_OPCODE_USEQ] = nir_op_ieq,
1755    [TGSI_OPCODE_USGE] = nir_op_uge,
1756    [TGSI_OPCODE_USHR] = nir_op_ushr,
1757    [TGSI_OPCODE_USLT] = nir_op_ult,
1758    [TGSI_OPCODE_USNE] = nir_op_ine,
1759 
1760    [TGSI_OPCODE_SWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
1761    [TGSI_OPCODE_CASE] = 0, /* not emitted by glsl_to_tgsi.cpp */
1762    [TGSI_OPCODE_DEFAULT] = 0, /* not emitted by glsl_to_tgsi.cpp */
1763    [TGSI_OPCODE_ENDSWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
1764 
1765    /* XXX: SAMPLE opcodes */
1766 
1767    [TGSI_OPCODE_UARL] = nir_op_mov,
1768    [TGSI_OPCODE_UCMP] = 0,
1769    [TGSI_OPCODE_IABS] = nir_op_iabs,
1770    [TGSI_OPCODE_ISSG] = nir_op_isign,
1771 
1772    [TGSI_OPCODE_LOAD] = 0,
1773    [TGSI_OPCODE_STORE] = 0,
1774 
1775    /* XXX: atomics */
1776 
1777    [TGSI_OPCODE_TEX2] = 0,
1778    [TGSI_OPCODE_TXB2] = 0,
1779    [TGSI_OPCODE_TXL2] = 0,
1780 
1781    [TGSI_OPCODE_IMUL_HI] = nir_op_imul_high,
1782    [TGSI_OPCODE_UMUL_HI] = nir_op_umul_high,
1783 
1784    [TGSI_OPCODE_TG4] = 0,
1785    [TGSI_OPCODE_LODQ] = 0,
1786 
1787    [TGSI_OPCODE_IBFE] = nir_op_ibitfield_extract,
1788    [TGSI_OPCODE_UBFE] = nir_op_ubitfield_extract,
1789    [TGSI_OPCODE_BFI] = nir_op_bitfield_insert,
1790    [TGSI_OPCODE_BREV] = nir_op_bitfield_reverse,
1791    [TGSI_OPCODE_POPC] = nir_op_bit_count,
1792    [TGSI_OPCODE_LSB] = nir_op_find_lsb,
1793    [TGSI_OPCODE_IMSB] = nir_op_ifind_msb,
1794    [TGSI_OPCODE_UMSB] = nir_op_ufind_msb,
1795 
1796    [TGSI_OPCODE_INTERP_CENTROID] = 0, /* XXX */
1797    [TGSI_OPCODE_INTERP_SAMPLE] = 0, /* XXX */
1798    [TGSI_OPCODE_INTERP_OFFSET] = 0, /* XXX */
1799 
1800    [TGSI_OPCODE_F2D] = nir_op_f2f64,
1801    [TGSI_OPCODE_D2F] = nir_op_f2f32,
1802    [TGSI_OPCODE_DMUL] = nir_op_fmul,
1803    [TGSI_OPCODE_D2U] = nir_op_f2u32,
1804    [TGSI_OPCODE_U2D] = nir_op_u2f64,
1805 
1806    [TGSI_OPCODE_U64ADD] = nir_op_iadd,
1807    [TGSI_OPCODE_U64MUL] = nir_op_imul,
1808    [TGSI_OPCODE_U64DIV] = nir_op_udiv,
1809    [TGSI_OPCODE_U64SNE] = nir_op_ine,
1810    [TGSI_OPCODE_I64NEG] = nir_op_ineg,
1811    [TGSI_OPCODE_I64ABS] = nir_op_iabs,
1812 };
1813 
1814 static void
ttn_emit_instruction(struct ttn_compile * c)1815 ttn_emit_instruction(struct ttn_compile *c)
1816 {
1817    nir_builder *b = &c->build;
1818    struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1819    unsigned i;
1820    unsigned tgsi_op = tgsi_inst->Instruction.Opcode;
1821    struct tgsi_full_dst_register *tgsi_dst = &tgsi_inst->Dst[0];
1822 
1823    if (tgsi_op == TGSI_OPCODE_END)
1824       return;
1825 
1826    nir_def *src[TGSI_FULL_MAX_SRC_REGISTERS];
1827    for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
1828       src[i] = ttn_get_src(c, &tgsi_inst->Src[i], i);
1829    }
1830 
1831    unsigned tgsi_dst_type = tgsi_opcode_infer_dst_type(tgsi_op, 0);
1832 
1833    /* The destination bitsize of the NIR opcode (not TGSI, where it's always
1834     * 32 bits). This needs to be passed into ttn_alu() because it can't be
1835     * inferred for comparison opcodes.
1836     */
1837    unsigned dst_bitsize = tgsi_type_is_64bit(tgsi_dst_type) ? 64 : 32;
1838 
1839    /* If this is non-NULL after the switch, it will be written to the
1840     * corresponding register/variable/etc after.
1841     */
1842    nir_def *dst = NULL;
1843 
1844    switch (tgsi_op) {
1845    case TGSI_OPCODE_RSQ:
1846       dst = nir_frsq(b, ttn_channel(b, src[0], X));
1847       break;
1848 
1849    case TGSI_OPCODE_SQRT:
1850       dst = nir_fsqrt(b, ttn_channel(b, src[0], X));
1851       break;
1852 
1853    case TGSI_OPCODE_RCP:
1854       dst = nir_frcp(b, ttn_channel(b, src[0], X));
1855       break;
1856 
1857    case TGSI_OPCODE_EX2:
1858       dst = nir_fexp2(b, ttn_channel(b, src[0], X));
1859       break;
1860 
1861    case TGSI_OPCODE_LG2:
1862       dst = nir_flog2(b, ttn_channel(b, src[0], X));
1863       break;
1864 
1865    case TGSI_OPCODE_POW:
1866       dst = nir_fpow(b, ttn_channel(b, src[0], X), ttn_channel(b, src[1], X));
1867       break;
1868 
1869    case TGSI_OPCODE_COS:
1870       dst = nir_fcos(b, ttn_channel(b, src[0], X));
1871       break;
1872 
1873    case TGSI_OPCODE_SIN:
1874       dst = nir_fsin(b, ttn_channel(b, src[0], X));
1875       break;
1876 
1877    case TGSI_OPCODE_ARL:
1878       dst = nir_f2i32(b, nir_ffloor(b, src[0]));
1879       break;
1880 
1881    case TGSI_OPCODE_EXP:
1882       dst = ttn_exp(b, src);
1883       break;
1884 
1885    case TGSI_OPCODE_LOG:
1886       dst = ttn_log(b, src);
1887       break;
1888 
1889    case TGSI_OPCODE_DST:
1890       dst = ttn_dst(b, src);
1891       break;
1892 
1893    case TGSI_OPCODE_LIT:
1894       dst = ttn_lit(b, src);
1895       break;
1896 
1897    case TGSI_OPCODE_DP2:
1898       dst = nir_fdot2(b, src[0], src[1]);
1899       break;
1900 
1901    case TGSI_OPCODE_DP3:
1902       dst = nir_fdot3(b, src[0], src[1]);
1903       break;
1904 
1905    case TGSI_OPCODE_DP4:
1906       dst = nir_fdot4(b, src[0], src[1]);
1907       break;
1908 
1909    case TGSI_OPCODE_UMAD:
1910       dst = nir_iadd(b, nir_imul(b, src[0], src[1]), src[2]);
1911       break;
1912 
1913    case TGSI_OPCODE_LRP:
1914       dst = nir_flrp(b, src[2], src[1], src[0]);
1915       break;
1916 
1917    case TGSI_OPCODE_KILL:
1918       ttn_kill(b);
1919       break;
1920 
1921    case TGSI_OPCODE_ARR:
1922       dst = nir_f2i32(b, nir_fround_even(b, src[0]));
1923       break;
1924 
1925    case TGSI_OPCODE_CMP:
1926       dst = nir_bcsel(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)),
1927                       src[1], src[2]);
1928       break;
1929 
1930    case TGSI_OPCODE_UCMP:
1931       dst = nir_bcsel(b, nir_ine(b, src[0], nir_imm_int(b, 0)),
1932                       src[1], src[2]);
1933       break;
1934 
1935    case TGSI_OPCODE_SGT:
1936       dst = nir_slt(b, src[1], src[0]);
1937       break;
1938 
1939    case TGSI_OPCODE_SLE:
1940       dst = nir_sge(b, src[1], src[0]);
1941       break;
1942 
1943    case TGSI_OPCODE_KILL_IF:
1944       ttn_kill_if(b, src);
1945       break;
1946 
1947    case TGSI_OPCODE_TEX:
1948    case TGSI_OPCODE_TEX_LZ:
1949    case TGSI_OPCODE_TXP:
1950    case TGSI_OPCODE_TXL:
1951    case TGSI_OPCODE_TXB:
1952    case TGSI_OPCODE_TXD:
1953    case TGSI_OPCODE_TEX2:
1954    case TGSI_OPCODE_TXL2:
1955    case TGSI_OPCODE_TXB2:
1956    case TGSI_OPCODE_TXF:
1957    case TGSI_OPCODE_TXF_LZ:
1958    case TGSI_OPCODE_TG4:
1959    case TGSI_OPCODE_LODQ:
1960       dst = ttn_tex(c, src);
1961       break;
1962 
1963    case TGSI_OPCODE_TXQ:
1964       dst = ttn_txq(c, src);
1965       break;
1966 
1967    case TGSI_OPCODE_LOAD:
1968    case TGSI_OPCODE_STORE:
1969       dst = ttn_mem(c, src);
1970       break;
1971 
1972    case TGSI_OPCODE_NOP:
1973       break;
1974 
1975    case TGSI_OPCODE_IF:
1976       nir_push_if(b, nir_fneu_imm(b, nir_channel(b, src[0], 0), 0.0));
1977       break;
1978 
1979    case TGSI_OPCODE_UIF:
1980       nir_push_if(b, nir_ine_imm(b, nir_channel(b, src[0], 0), 0));
1981       break;
1982 
1983    case TGSI_OPCODE_ELSE:
1984       nir_push_else(&c->build, NULL);
1985       break;
1986 
1987    case TGSI_OPCODE_ENDIF:
1988       nir_pop_if(&c->build, NULL);
1989       break;
1990 
1991    case TGSI_OPCODE_BGNLOOP:
1992       nir_push_loop(&c->build);
1993       break;
1994 
1995    case TGSI_OPCODE_BRK:
1996       nir_jump(b, nir_jump_break);
1997       break;
1998 
1999    case TGSI_OPCODE_CONT:
2000       nir_jump(b, nir_jump_continue);
2001       break;
2002 
2003    case TGSI_OPCODE_ENDLOOP:
2004       nir_pop_loop(&c->build, NULL);
2005       break;
2006 
2007    case TGSI_OPCODE_BARRIER:
2008       ttn_barrier(b);
2009       break;
2010 
2011    default:
2012       if (op_trans[tgsi_op] != 0 || tgsi_op == TGSI_OPCODE_MOV) {
2013          dst = ttn_alu(b, op_trans[tgsi_op], dst_bitsize, src);
2014       } else {
2015          fprintf(stderr, "unknown TGSI opcode: %s\n",
2016                  tgsi_get_opcode_name(tgsi_op));
2017          abort();
2018       }
2019       break;
2020    }
2021 
2022    if (dst == NULL)
2023       return;
2024 
2025    if (tgsi_inst->Instruction.Saturate)
2026       dst = nir_fsat(b, dst);
2027 
2028    if (dst->num_components == 1)
2029       dst = nir_replicate(b, dst, 4);
2030    else if (dst->num_components == 2)
2031       dst = nir_pad_vector_imm_int(b, dst, 0, 4); /* for 64->32 conversions */
2032 
2033    assert(dst->num_components == 4);
2034 
2035    /* Finally, copy the SSA def to the NIR variable/register */
2036    nir_variable *var = ttn_get_var(c, tgsi_dst);
2037    if (var) {
2038       unsigned index = tgsi_dst->Register.Index;
2039       unsigned offset = c->temp_regs[index].offset;
2040       struct tgsi_ind_register *indirect = tgsi_dst->Register.Indirect ?
2041                                            &tgsi_dst->Indirect : NULL;
2042       nir_store_deref(b, ttn_array_deref(c, var, offset, indirect), dst,
2043                       tgsi_dst->Register.WriteMask);
2044    } else {
2045       unsigned index = tgsi_dst->Register.Index;
2046       nir_def *reg = NULL;
2047       unsigned base_offset = 0;
2048 
2049       if (tgsi_dst->Register.File == TGSI_FILE_TEMPORARY) {
2050          assert(!c->temp_regs[index].var && "handled above");
2051          assert(!tgsi_dst->Register.Indirect);
2052 
2053          reg = c->temp_regs[index].reg;
2054          base_offset = c->temp_regs[index].offset;
2055       } else if (tgsi_dst->Register.File == TGSI_FILE_OUTPUT) {
2056          reg = c->output_regs[index].reg;
2057          base_offset = c->output_regs[index].offset;
2058       } else if (tgsi_dst->Register.File == TGSI_FILE_ADDRESS) {
2059          assert(index == 0);
2060          reg = c->addr_reg;
2061       }
2062 
2063       if (tgsi_dst->Register.Indirect) {
2064          nir_def *indirect = ttn_src_for_indirect(c, &tgsi_dst->Indirect);
2065          nir_store_reg_indirect(b, dst, reg, indirect, .base = base_offset,
2066                                 .write_mask = tgsi_dst->Register.WriteMask);
2067       } else {
2068          nir_build_store_reg(b, dst, reg, .base = base_offset,
2069                              .write_mask = tgsi_dst->Register.WriteMask);
2070       }
2071    }
2072 }
2073 
2074 /**
2075  * Puts a NIR intrinsic to store of each TGSI_FILE_OUTPUT value to the output
2076  * variables at the end of the shader.
2077  *
2078  * We don't generate these incrementally as the TGSI_FILE_OUTPUT values are
2079  * written, because there's no output load intrinsic, which means we couldn't
2080  * handle writemasks.
2081  */
2082 static void
ttn_add_output_stores(struct ttn_compile * c)2083 ttn_add_output_stores(struct ttn_compile *c)
2084 {
2085    nir_builder *b = &c->build;
2086 
2087    for (int i = 0; i < c->build.shader->num_outputs; i++) {
2088       nir_variable *var = c->outputs[i];
2089       if (!var)
2090          continue;
2091 
2092       nir_def *store_value =
2093          nir_build_load_reg(b, 4, 32, c->output_regs[i].reg,
2094                             .base = c->output_regs[i].offset);
2095 
2096       uint32_t store_mask = BITFIELD_MASK(store_value->num_components);
2097       if (c->build.shader->info.stage == MESA_SHADER_FRAGMENT) {
2098          /* TGSI uses TGSI_SEMANTIC_POSITION.z for the depth output
2099           * and TGSI_SEMANTIC_STENCIL.y for the stencil output,
2100           * while NIR uses a single-component output.
2101           */
2102          if (var->data.location == FRAG_RESULT_DEPTH)
2103             store_value = nir_channel(b, store_value, 2);
2104          else if (var->data.location == FRAG_RESULT_STENCIL)
2105             store_value = nir_channel(b, store_value, 1);
2106          else if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
2107             store_value = nir_channel(b, store_value, 0);
2108       } else {
2109          /* FOGC, LAYER, and PSIZ are scalar values */
2110          if (var->data.location == VARYING_SLOT_FOGC ||
2111              var->data.location == VARYING_SLOT_LAYER ||
2112              var->data.location == VARYING_SLOT_PSIZ) {
2113             store_value = nir_channel(b, store_value, 0);
2114          }
2115          if (var->data.location == VARYING_SLOT_CLIP_DIST0)
2116             store_mask = BITFIELD_MASK(MIN2(c->build.shader->info.clip_distance_array_size, 4));
2117          else if (var->data.location == VARYING_SLOT_CLIP_DIST1) {
2118             if (c->build.shader->info.clip_distance_array_size > 4)
2119                store_mask = BITFIELD_MASK(c->build.shader->info.clip_distance_array_size - 4);
2120             else
2121                store_mask = 0;
2122          }
2123       }
2124 
2125       if (c->cap_compact_arrays &&
2126           (var->data.location == VARYING_SLOT_CLIP_DIST0 ||
2127            var->data.location == VARYING_SLOT_CLIP_DIST1)) {
2128          if (!store_mask)
2129             continue;
2130 
2131          nir_deref_instr *deref = nir_build_deref_var(b, c->clipdist);
2132          nir_def *zero = nir_imm_zero(b, 1, 32);
2133          unsigned offset = var->data.location == VARYING_SLOT_CLIP_DIST1 ? 4 : 0;
2134          unsigned size = var->data.location == VARYING_SLOT_CLIP_DIST1 ?
2135                           b->shader->info.clip_distance_array_size :
2136                           MIN2(4, b->shader->info.clip_distance_array_size);
2137          for (unsigned i = offset; i < size; i++) {
2138             /* deref the array member and store each component */
2139             nir_deref_instr *component_deref = nir_build_deref_array_imm(b, deref, i);
2140             nir_def *val = zero;
2141             if (store_mask & BITFIELD_BIT(i - offset))
2142                val = nir_channel(b, store_value, i - offset);
2143             nir_store_deref(b, component_deref, val, 0x1);
2144          }
2145       } else {
2146          nir_store_deref(b, nir_build_deref_var(b, var), store_value, store_mask);
2147       }
2148    }
2149 }
2150 
2151 /**
2152  * Parses the given TGSI tokens.
2153  */
2154 static void
ttn_parse_tgsi(struct ttn_compile * c,const void * tgsi_tokens)2155 ttn_parse_tgsi(struct ttn_compile *c, const void *tgsi_tokens)
2156 {
2157    struct tgsi_parse_context parser;
2158    ASSERTED int ret;
2159 
2160    ret = tgsi_parse_init(&parser, tgsi_tokens);
2161    assert(ret == TGSI_PARSE_OK);
2162 
2163    while (!tgsi_parse_end_of_tokens(&parser)) {
2164       tgsi_parse_token(&parser);
2165       c->token = &parser.FullToken;
2166 
2167       switch (parser.FullToken.Token.Type) {
2168       case TGSI_TOKEN_TYPE_DECLARATION:
2169          ttn_emit_declaration(c);
2170          break;
2171 
2172       case TGSI_TOKEN_TYPE_INSTRUCTION:
2173          ttn_emit_instruction(c);
2174          break;
2175 
2176       case TGSI_TOKEN_TYPE_IMMEDIATE:
2177          ttn_emit_immediate(c);
2178          break;
2179       }
2180    }
2181 
2182    tgsi_parse_free(&parser);
2183 }
2184 
2185 static void
ttn_read_pipe_caps(struct ttn_compile * c,struct pipe_screen * screen)2186 ttn_read_pipe_caps(struct ttn_compile *c,
2187                    struct pipe_screen *screen)
2188 {
2189    c->cap_samplers_as_deref = screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF);
2190    c->cap_face_is_sysval = screen->get_param(screen, PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL);
2191    c->cap_position_is_sysval = screen->get_param(screen, PIPE_CAP_FS_POSITION_IS_SYSVAL);
2192    c->cap_point_is_sysval = screen->get_param(screen, PIPE_CAP_FS_POINT_IS_SYSVAL);
2193    c->cap_integers = screen->get_shader_param(screen, c->scan->processor, PIPE_SHADER_CAP_INTEGERS);
2194    c->cap_compact_arrays = screen->get_param(screen, PIPE_CAP_NIR_COMPACT_ARRAYS);
2195 }
2196 
2197 #define BITSET_SET32(bitset, u32_mask) do { \
2198    STATIC_ASSERT(sizeof((bitset)[0]) >= sizeof(u32_mask)); \
2199    BITSET_ZERO(bitset); \
2200    (bitset)[0] = (u32_mask); \
2201 } while (0)
2202 
2203 /**
2204  * Initializes a TGSI-to-NIR compiler.
2205  */
2206 static struct ttn_compile *
ttn_compile_init(const void * tgsi_tokens,const nir_shader_compiler_options * options,struct pipe_screen * screen)2207 ttn_compile_init(const void *tgsi_tokens,
2208                  const nir_shader_compiler_options *options,
2209                  struct pipe_screen *screen)
2210 {
2211    struct ttn_compile *c;
2212    struct nir_shader *s;
2213    struct tgsi_shader_info scan;
2214 
2215    assert(options || screen);
2216    c = rzalloc(NULL, struct ttn_compile);
2217 
2218    tgsi_scan_shader(tgsi_tokens, &scan);
2219    c->scan = &scan;
2220 
2221    if (!options) {
2222       options =
2223          screen->get_compiler_options(screen, PIPE_SHADER_IR_NIR, scan.processor);
2224    }
2225 
2226    c->build = nir_builder_init_simple_shader(tgsi_processor_to_shader_stage(scan.processor),
2227                                              options, "TTN");
2228 
2229    s = c->build.shader;
2230 
2231    if (screen) {
2232       ttn_read_pipe_caps(c, screen);
2233    } else {
2234       /* TTN used to be hard coded to always make FACE a sysval,
2235        * so it makes sense to preserve that behavior so users don't break. */
2236       c->cap_face_is_sysval = true;
2237    }
2238 
2239    s->info.subgroup_size = SUBGROUP_SIZE_UNIFORM;
2240 
2241    if (s->info.stage == MESA_SHADER_FRAGMENT)
2242       s->info.fs.untyped_color_outputs = true;
2243 
2244    s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
2245    s->num_uniforms = scan.const_file_max[0] + 1;
2246    s->num_outputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
2247    s->info.num_ssbos = util_last_bit(scan.shader_buffers_declared);
2248    s->info.num_ubos = util_last_bit(scan.const_buffers_declared >> 1);
2249    s->info.num_images = util_last_bit(scan.images_declared);
2250    BITSET_SET32(s->info.images_used, scan.images_declared);
2251    BITSET_SET32(s->info.image_buffers, scan.images_buffers);
2252    BITSET_SET32(s->info.msaa_images, scan.msaa_images_declared);
2253    s->info.num_textures = util_last_bit(scan.samplers_declared);
2254    BITSET_SET32(s->info.textures_used, scan.samplers_declared);
2255    BITSET_ZERO(s->info.textures_used_by_txf); /* No scan information yet */
2256    BITSET_SET32(s->info.samplers_used, scan.samplers_declared);
2257    s->info.internal = false;
2258 
2259    /* Default for TGSI is separate, this is assumed throughout the tree */
2260    s->info.separate_shader = true;
2261 
2262    for (unsigned i = 0; i < TGSI_PROPERTY_COUNT; i++) {
2263       unsigned value = scan.properties[i];
2264 
2265       switch (i) {
2266       case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2267          break; /* handled in ttn_emit_declaration */
2268       case TGSI_PROPERTY_FS_COORD_ORIGIN:
2269          if (s->info.stage == MESA_SHADER_FRAGMENT)
2270             s->info.fs.origin_upper_left = value == TGSI_FS_COORD_ORIGIN_UPPER_LEFT;
2271          break;
2272       case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
2273          if (s->info.stage == MESA_SHADER_FRAGMENT)
2274             s->info.fs.pixel_center_integer = value == TGSI_FS_COORD_PIXEL_CENTER_INTEGER;
2275          break;
2276       case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
2277          if (s->info.stage == MESA_SHADER_FRAGMENT)
2278             s->info.fs.depth_layout = ttn_get_depth_layout(value);
2279          break;
2280       case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION:
2281          if (s->info.stage == MESA_SHADER_VERTEX)
2282             s->info.vs.window_space_position = value;
2283          break;
2284       case TGSI_PROPERTY_NEXT_SHADER:
2285          s->info.next_stage = tgsi_processor_to_shader_stage(value);
2286          break;
2287       case TGSI_PROPERTY_VS_BLIT_SGPRS_AMD:
2288          if (s->info.stage == MESA_SHADER_VERTEX)
2289             s->info.vs.blit_sgprs_amd = value;
2290          break;
2291       case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
2292          if (s->info.stage == MESA_SHADER_COMPUTE)
2293             s->info.workgroup_size[0] = value;
2294          break;
2295       case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
2296          if (s->info.stage == MESA_SHADER_COMPUTE)
2297             s->info.workgroup_size[1] = value;
2298          break;
2299       case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
2300          if (s->info.stage == MESA_SHADER_COMPUTE)
2301             s->info.workgroup_size[2] = value;
2302          break;
2303       case TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD:
2304          if (s->info.stage == MESA_SHADER_COMPUTE)
2305             s->info.cs.user_data_components_amd = value;
2306          break;
2307       case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
2308          s->info.clip_distance_array_size = value;
2309          break;
2310       case TGSI_PROPERTY_LEGACY_MATH_RULES:
2311          s->info.use_legacy_math_rules = value;
2312          break;
2313       default:
2314          if (value) {
2315             fprintf(stderr, "tgsi_to_nir: unhandled TGSI property %u = %u\n",
2316                     i, value);
2317             unreachable("unhandled TGSI property");
2318          }
2319       }
2320    }
2321 
2322    if (s->info.stage == MESA_SHADER_COMPUTE &&
2323        (!s->info.workgroup_size[0] ||
2324         !s->info.workgroup_size[1] ||
2325         !s->info.workgroup_size[2]))
2326       s->info.workgroup_size_variable = true;
2327 
2328    c->inputs = rzalloc_array(c, struct nir_variable *, s->num_inputs);
2329    c->outputs = rzalloc_array(c, struct nir_variable *, s->num_outputs);
2330 
2331    c->output_regs = rzalloc_array(c, struct ttn_reg_info,
2332                                   scan.file_max[TGSI_FILE_OUTPUT] + 1);
2333    c->temp_regs = rzalloc_array(c, struct ttn_reg_info,
2334                                 scan.file_max[TGSI_FILE_TEMPORARY] + 1);
2335    c->imm_defs = rzalloc_array(c, nir_def *,
2336                                scan.file_max[TGSI_FILE_IMMEDIATE] + 1);
2337 
2338    c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2339    c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types);
2340 
2341    ttn_parse_tgsi(c, tgsi_tokens);
2342    ttn_add_output_stores(c);
2343 
2344    nir_validate_shader(c->build.shader, "TTN: after parsing TGSI and creating the NIR shader");
2345 
2346    return c;
2347 }
2348 
2349 static void
ttn_optimize_nir(nir_shader * nir)2350 ttn_optimize_nir(nir_shader *nir)
2351 {
2352    bool progress;
2353 
2354    do {
2355       progress = false;
2356 
2357       NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2358 
2359       /* Linking deals with unused inputs/outputs, but here we can remove
2360        * things local to the shader in the hopes that we can cleanup other
2361        * things. This pass will also remove variables with only stores, so we
2362        * might be able to make progress after it.
2363        */
2364       NIR_PASS(progress, nir, nir_remove_dead_variables,
2365                nir_var_function_temp | nir_var_shader_temp |
2366                nir_var_mem_shared,
2367                NULL);
2368 
2369       NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
2370       NIR_PASS(progress, nir, nir_opt_dead_write_vars);
2371 
2372       if (nir->options->lower_to_scalar) {
2373          NIR_PASS_V(nir, nir_lower_alu_to_scalar,
2374                     nir->options->lower_to_scalar_filter, NULL);
2375          NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
2376       }
2377 
2378       NIR_PASS_V(nir, nir_lower_alu);
2379       NIR_PASS_V(nir, nir_lower_pack);
2380       NIR_PASS(progress, nir, nir_copy_prop);
2381       NIR_PASS(progress, nir, nir_opt_remove_phis);
2382       NIR_PASS(progress, nir, nir_opt_dce);
2383       if (nir_opt_loop(nir)) {
2384          progress = true;
2385          NIR_PASS(progress, nir, nir_copy_prop);
2386          NIR_PASS(progress, nir, nir_opt_dce);
2387       }
2388       NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
2389       NIR_PASS(progress, nir, nir_opt_dead_cf);
2390       NIR_PASS(progress, nir, nir_opt_cse);
2391       NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
2392 
2393       NIR_PASS(progress, nir, nir_opt_phi_precision);
2394       NIR_PASS(progress, nir, nir_opt_algebraic);
2395       NIR_PASS(progress, nir, nir_opt_constant_folding);
2396 
2397       if (!nir->info.flrp_lowered) {
2398          unsigned lower_flrp =
2399             (nir->options->lower_flrp16 ? 16 : 0) |
2400             (nir->options->lower_flrp32 ? 32 : 0) |
2401             (nir->options->lower_flrp64 ? 64 : 0);
2402 
2403          if (lower_flrp) {
2404             bool lower_flrp_progress = false;
2405 
2406             NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp,
2407                      lower_flrp,
2408                      false /* always_precise */);
2409             if (lower_flrp_progress) {
2410                NIR_PASS(progress, nir,
2411                         nir_opt_constant_folding);
2412                progress = true;
2413             }
2414          }
2415 
2416          /* Nothing should rematerialize any flrps, so we only need to do this
2417           * lowering once.
2418           */
2419          nir->info.flrp_lowered = true;
2420       }
2421 
2422       NIR_PASS(progress, nir, nir_opt_undef);
2423       NIR_PASS(progress, nir, nir_opt_conditional_discard);
2424       if (nir->options->max_unroll_iterations) {
2425          NIR_PASS(progress, nir, nir_opt_loop_unroll);
2426       }
2427    } while (progress);
2428 }
2429 
2430 static bool
lower_clipdistance_to_array(nir_shader * nir)2431 lower_clipdistance_to_array(nir_shader *nir)
2432 {
2433    bool progress = false;
2434    nir_variable *dist0 = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_CLIP_DIST0);
2435    nir_variable *dist1 = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_CLIP_DIST1);
2436    /* resize VARYING_SLOT_CLIP_DIST0 to the full array size */
2437    dist0->type = glsl_array_type(glsl_float_type(), nir->info.clip_distance_array_size, sizeof(float));
2438    struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
2439    nir_foreach_function_impl(impl, nir) {
2440       bool func_progress = false;
2441       nir_builder b = nir_builder_at(nir_before_impl(impl));
2442       /* create a new deref for the arrayed clipdistance variable at the start of the function */
2443       nir_deref_instr *clipdist_deref = nir_build_deref_var(&b, dist0);
2444       nir_def *zero = nir_imm_zero(&b, 1, 32);
2445       nir_foreach_block(block, impl) {
2446          nir_foreach_instr_safe(instr, block) {
2447             /* filter through until a clipdistance store is reached */
2448             if (instr->type != nir_instr_type_intrinsic)
2449                continue;
2450             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2451             if (intr->intrinsic != nir_intrinsic_store_deref)
2452                continue;
2453             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2454             nir_variable *var = nir_deref_instr_get_variable(deref);
2455             if (var != dist0 && (!dist1 || var != dist1))
2456                continue;
2457             b.cursor = nir_before_instr(instr);
2458             uint32_t wrmask = nir_intrinsic_write_mask(intr);
2459             unsigned offset = var == dist1 ? 4 : 0;
2460             /* iterate over the store's writemask for components */
2461             for (unsigned i = 0; i < nir->info.clip_distance_array_size; i++) {
2462                /* deref the array member and store each component */
2463                nir_deref_instr *component_deref = nir_build_deref_array_imm(&b, clipdist_deref, i);
2464                nir_def *val = zero;
2465                if (wrmask & BITFIELD_BIT(i - offset))
2466                   val = nir_channel(&b, intr->src[1].ssa, i - offset);
2467                nir_store_deref(&b, component_deref, val, 0x1);
2468             }
2469             func_progress = true;
2470             /* immediately remove the old store, save the original deref */
2471             nir_instr_remove(instr);
2472             _mesa_set_add(deletes, deref);
2473          }
2474       }
2475       if (func_progress)
2476          nir_metadata_preserve(impl, nir_metadata_none);
2477       /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
2478       set_foreach_remove(deletes, he)
2479          nir_instr_remove((void*)he->key);
2480    }
2481    /* VARYING_SLOT_CLIP_DIST1 is no longer used and can be removed */
2482    if (dist1)
2483       exec_node_remove(&dist1->node);
2484    return progress;
2485 }
2486 
2487 /**
2488  * Finalizes the NIR in a similar way as st_glsl_to_nir does.
2489  *
2490  * Drivers expect that these passes are already performed,
2491  * so we have to do it here too.
2492  */
2493 static void
ttn_finalize_nir(struct ttn_compile * c,struct pipe_screen * screen)2494 ttn_finalize_nir(struct ttn_compile *c, struct pipe_screen *screen)
2495 {
2496    struct nir_shader *nir = c->build.shader;
2497 
2498    MESA_TRACE_FUNC();
2499 
2500    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2501    NIR_PASS_V(nir, nir_lower_reg_intrinsics_to_ssa);
2502 
2503    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
2504    NIR_PASS_V(nir, nir_split_var_copies);
2505    NIR_PASS_V(nir, nir_lower_var_copies);
2506    NIR_PASS_V(nir, nir_lower_system_values);
2507    NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
2508 
2509    if (!screen->get_param(screen, PIPE_CAP_TEXRECT)) {
2510       const struct nir_lower_tex_options opts = { .lower_rect = true, };
2511       NIR_PASS_V(nir, nir_lower_tex, &opts);
2512    }
2513 
2514    /* driver needs clipdistance as array<float> */
2515    if ((nir->info.outputs_written &
2516         (BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0) | BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))) &&
2517        screen->get_param(screen, PIPE_CAP_NIR_COMPACT_ARRAYS)) {
2518       NIR_PASS_V(nir, lower_clipdistance_to_array);
2519    }
2520 
2521    if (nir->options->lower_uniforms_to_ubo)
2522       NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, !c->cap_integers);
2523 
2524    if (nir->options->lower_int64_options)
2525       NIR_PASS_V(nir, nir_lower_int64);
2526 
2527    if (!c->cap_samplers_as_deref)
2528       NIR_PASS_V(nir, nir_lower_samplers);
2529 
2530    if (screen->finalize_nir) {
2531       char *msg = screen->finalize_nir(screen, nir);
2532       free(msg);
2533    } else {
2534       ttn_optimize_nir(nir);
2535       nir_shader_gather_info(nir, c->build.impl);
2536    }
2537 
2538    nir->info.num_images = c->num_images;
2539    nir->info.num_textures = c->num_samplers;
2540 
2541    nir_validate_shader(nir, "TTN: after all optimizations");
2542 }
2543 
save_nir_to_disk_cache(struct disk_cache * cache,uint8_t key[CACHE_KEY_SIZE],const nir_shader * s)2544 static void save_nir_to_disk_cache(struct disk_cache *cache,
2545                                    uint8_t key[CACHE_KEY_SIZE],
2546                                    const nir_shader *s)
2547 {
2548    struct blob blob = {0};
2549 
2550    blob_init(&blob);
2551    /* Because we cannot fully trust disk_cache_put
2552     * (EGL_ANDROID_blob_cache) we add the shader size,
2553     * which we'll check after disk_cache_get().
2554     */
2555    if (blob_reserve_uint32(&blob) != 0) {
2556       blob_finish(&blob);
2557       return;
2558    }
2559 
2560    nir_serialize(&blob, s, true);
2561    *(uint32_t *)blob.data = blob.size;
2562 
2563    disk_cache_put(cache, key, blob.data, blob.size, NULL);
2564    blob_finish(&blob);
2565 }
2566 
2567 static nir_shader *
load_nir_from_disk_cache(struct disk_cache * cache,struct pipe_screen * screen,uint8_t key[CACHE_KEY_SIZE],unsigned processor)2568 load_nir_from_disk_cache(struct disk_cache *cache,
2569                          struct pipe_screen *screen,
2570                          uint8_t key[CACHE_KEY_SIZE],
2571                          unsigned processor)
2572 {
2573    const nir_shader_compiler_options *options =
2574       screen->get_compiler_options(screen, PIPE_SHADER_IR_NIR, processor);
2575    struct blob_reader blob_reader;
2576    size_t size;
2577    nir_shader *s;
2578 
2579    uint32_t *buffer = (uint32_t *)disk_cache_get(cache, key, &size);
2580    if (!buffer)
2581       return NULL;
2582 
2583    /* Match found. No need to check crc32 or other things.
2584     * disk_cache_get is supposed to do that for us.
2585     * However we do still check if the first element is indeed the size,
2586     * as we cannot fully trust disk_cache_get (EGL_ANDROID_blob_cache) */
2587    if (buffer[0] != size) {
2588       return NULL;
2589    }
2590 
2591    size -= 4;
2592    blob_reader_init(&blob_reader, buffer + 1, size);
2593    s = nir_deserialize(NULL, options, &blob_reader);
2594    free(buffer); /* buffer was malloc-ed */
2595    return s;
2596 }
2597 
2598 struct nir_shader *
tgsi_to_nir(const void * tgsi_tokens,struct pipe_screen * screen,bool allow_disk_cache)2599 tgsi_to_nir(const void *tgsi_tokens,
2600             struct pipe_screen *screen,
2601             bool allow_disk_cache)
2602 {
2603    struct disk_cache *cache = NULL;
2604    struct ttn_compile *c;
2605    struct nir_shader *s = NULL;
2606    uint8_t key[CACHE_KEY_SIZE];
2607    unsigned processor;
2608 
2609    if (allow_disk_cache)
2610       cache = screen->get_disk_shader_cache(screen);
2611 
2612    /* Look first in the cache */
2613    if (cache) {
2614       disk_cache_compute_key(cache,
2615                              tgsi_tokens,
2616                              tgsi_num_tokens(tgsi_tokens) * sizeof(struct tgsi_token),
2617                              key);
2618       processor = tgsi_get_processor_type(tgsi_tokens);
2619       s = load_nir_from_disk_cache(cache, screen, key, processor);
2620    }
2621 
2622    if (s)
2623       return s;
2624 
2625 #ifndef NDEBUG
2626    nir_process_debug_variable();
2627 #endif
2628 
2629    if (NIR_DEBUG(TGSI)) {
2630       fprintf(stderr, "TGSI before translation to NIR:\n");
2631       tgsi_dump(tgsi_tokens, 0);
2632    }
2633 
2634    /* Not in the cache */
2635 
2636    c = ttn_compile_init(tgsi_tokens, NULL, screen);
2637    s = c->build.shader;
2638    ttn_finalize_nir(c, screen);
2639    ralloc_free(c);
2640 
2641    if (NIR_DEBUG(TGSI)) {
2642       mesa_logi("NIR after translation from TGSI:\n");
2643       nir_log_shaderi(s);
2644    }
2645 
2646    if (cache)
2647       save_nir_to_disk_cache(cache, key, s);
2648 
2649    return s;
2650 }
2651 
2652 struct nir_shader *
tgsi_to_nir_noscreen(const void * tgsi_tokens,const nir_shader_compiler_options * options)2653 tgsi_to_nir_noscreen(const void *tgsi_tokens,
2654                      const nir_shader_compiler_options *options)
2655 {
2656    struct ttn_compile *c;
2657    struct nir_shader *s;
2658 
2659    c = ttn_compile_init(tgsi_tokens, options, NULL);
2660    s = c->build.shader;
2661    ralloc_free(c);
2662 
2663    return s;
2664 }
2665 
2666