1 /*
2 * Copyright © 2014-2015 Broadcom
3 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include "util/blob.h"
26 #include "util/u_debug.h"
27 #include "util/disk_cache.h"
28 #include "util/u_memory.h"
29 #include "util/perf/cpu_trace.h"
30 #include "util/ralloc.h"
31 #include "pipe/p_screen.h"
32
33 #include "compiler/nir/nir.h"
34 #include "compiler/nir/nir_control_flow.h"
35 #include "compiler/nir/nir_builder.h"
36 #include "compiler/nir/nir_serialize.h"
37 #include "compiler/shader_enums.h"
38
39 #include "tgsi_to_nir.h"
40 #include "tgsi/tgsi_parse.h"
41 #include "tgsi/tgsi_dump.h"
42 #include "tgsi/tgsi_info.h"
43 #include "tgsi/tgsi_scan.h"
44 #include "tgsi/tgsi_from_mesa.h"
45
46 #define SWIZ(X, Y, Z, W) (unsigned[4]){ \
47 TGSI_SWIZZLE_##X, \
48 TGSI_SWIZZLE_##Y, \
49 TGSI_SWIZZLE_##Z, \
50 TGSI_SWIZZLE_##W, \
51 }
52
53 struct ttn_reg_info {
54 /** nir register handle containing this TGSI index. */
55 nir_def *reg;
56 nir_variable *var;
57 /** Offset (in vec4s) from the start of var for this TGSI index. */
58 int offset;
59 };
60
61 struct ttn_compile {
62 union tgsi_full_token *token;
63 nir_builder build;
64 struct tgsi_shader_info *scan;
65
66 struct ttn_reg_info *output_regs;
67 struct ttn_reg_info *temp_regs;
68 nir_def **imm_defs;
69
70 unsigned num_samp_types;
71 nir_alu_type *samp_types;
72
73 nir_def *addr_reg;
74
75 nir_variable **inputs;
76 nir_variable **outputs;
77 nir_variable *samplers[PIPE_MAX_SAMPLERS];
78 nir_variable *images[PIPE_MAX_SHADER_IMAGES];
79 nir_variable *ssbo[PIPE_MAX_SHADER_BUFFERS];
80 uint32_t ubo_sizes[PIPE_MAX_CONSTANT_BUFFERS];
81
82 unsigned num_samplers;
83 unsigned num_images;
84 unsigned num_msaa_images;
85
86 nir_variable *input_var_face;
87 nir_variable *input_var_position;
88 nir_variable *input_var_point;
89 nir_variable *clipdist;
90
91 /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
92 unsigned next_imm;
93
94 bool cap_face_is_sysval;
95 bool cap_position_is_sysval;
96 bool cap_point_is_sysval;
97 bool cap_samplers_as_deref;
98 bool cap_integers;
99 bool cap_tg4_component_in_swizzle;
100 };
101
102 #define ttn_swizzle(b, src, x, y, z, w) \
103 nir_swizzle(b, src, SWIZ(x, y, z, w), 4)
104 #define ttn_channel(b, src, swiz) \
105 nir_channel(b, src, TGSI_SWIZZLE_##swiz)
106
107 static gl_varying_slot
tgsi_varying_semantic_to_slot(unsigned semantic,unsigned index)108 tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index)
109 {
110 switch (semantic) {
111 case TGSI_SEMANTIC_POSITION:
112 return VARYING_SLOT_POS;
113 case TGSI_SEMANTIC_COLOR:
114 if (index == 0)
115 return VARYING_SLOT_COL0;
116 else
117 return VARYING_SLOT_COL1;
118 case TGSI_SEMANTIC_BCOLOR:
119 if (index == 0)
120 return VARYING_SLOT_BFC0;
121 else
122 return VARYING_SLOT_BFC1;
123 case TGSI_SEMANTIC_FOG:
124 return VARYING_SLOT_FOGC;
125 case TGSI_SEMANTIC_PSIZE:
126 return VARYING_SLOT_PSIZ;
127 case TGSI_SEMANTIC_GENERIC:
128 assert(index < 32);
129 return VARYING_SLOT_VAR0 + index;
130 case TGSI_SEMANTIC_FACE:
131 return VARYING_SLOT_FACE;
132 case TGSI_SEMANTIC_EDGEFLAG:
133 return VARYING_SLOT_EDGE;
134 case TGSI_SEMANTIC_PRIMID:
135 return VARYING_SLOT_PRIMITIVE_ID;
136 case TGSI_SEMANTIC_CLIPDIST:
137 if (index == 0)
138 return VARYING_SLOT_CLIP_DIST0;
139 else
140 return VARYING_SLOT_CLIP_DIST1;
141 case TGSI_SEMANTIC_CLIPVERTEX:
142 return VARYING_SLOT_CLIP_VERTEX;
143 case TGSI_SEMANTIC_TEXCOORD:
144 assert(index < 8);
145 return VARYING_SLOT_TEX0 + index;
146 case TGSI_SEMANTIC_PCOORD:
147 return VARYING_SLOT_PNTC;
148 case TGSI_SEMANTIC_VIEWPORT_INDEX:
149 return VARYING_SLOT_VIEWPORT;
150 case TGSI_SEMANTIC_LAYER:
151 return VARYING_SLOT_LAYER;
152 case TGSI_SEMANTIC_TESSINNER:
153 return VARYING_SLOT_TESS_LEVEL_INNER;
154 case TGSI_SEMANTIC_TESSOUTER:
155 return VARYING_SLOT_TESS_LEVEL_OUTER;
156 default:
157 fprintf(stderr, "Bad TGSI semantic: %d/%d\n", semantic, index);
158 abort();
159 }
160 }
161
162 static enum gl_frag_depth_layout
ttn_get_depth_layout(unsigned tgsi_fs_depth_layout)163 ttn_get_depth_layout(unsigned tgsi_fs_depth_layout)
164 {
165 switch (tgsi_fs_depth_layout) {
166 case TGSI_FS_DEPTH_LAYOUT_NONE:
167 return FRAG_DEPTH_LAYOUT_NONE;
168 case TGSI_FS_DEPTH_LAYOUT_ANY:
169 return FRAG_DEPTH_LAYOUT_ANY;
170 case TGSI_FS_DEPTH_LAYOUT_GREATER:
171 return FRAG_DEPTH_LAYOUT_GREATER;
172 case TGSI_FS_DEPTH_LAYOUT_LESS:
173 return FRAG_DEPTH_LAYOUT_LESS;
174 case TGSI_FS_DEPTH_LAYOUT_UNCHANGED:
175 return FRAG_DEPTH_LAYOUT_UNCHANGED;
176 default:
177 unreachable("bad TGSI FS depth layout");
178 }
179 }
180
181 static enum glsl_interp_mode
ttn_translate_interp_mode(unsigned tgsi_interp)182 ttn_translate_interp_mode(unsigned tgsi_interp)
183 {
184 switch (tgsi_interp) {
185 case TGSI_INTERPOLATE_CONSTANT:
186 return INTERP_MODE_FLAT;
187 case TGSI_INTERPOLATE_LINEAR:
188 return INTERP_MODE_NOPERSPECTIVE;
189 case TGSI_INTERPOLATE_PERSPECTIVE:
190 return INTERP_MODE_SMOOTH;
191 case TGSI_INTERPOLATE_COLOR:
192 return INTERP_MODE_NONE;
193 default:
194 unreachable("bad TGSI interpolation mode");
195 }
196 }
197
198 static void
ttn_emit_declaration(struct ttn_compile * c)199 ttn_emit_declaration(struct ttn_compile *c)
200 {
201 nir_builder *b = &c->build;
202 struct tgsi_full_declaration *decl = &c->token->FullDeclaration;
203 unsigned array_size = decl->Range.Last - decl->Range.First + 1;
204 unsigned file = decl->Declaration.File;
205 unsigned i;
206
207 if (file == TGSI_FILE_TEMPORARY) {
208 if (decl->Declaration.Array) {
209 /* for arrays, we create variables instead of registers: */
210 nir_variable *var =
211 nir_variable_create(b->shader, nir_var_shader_temp,
212 glsl_array_type(glsl_vec4_type(), array_size, 0),
213 ralloc_asprintf(b->shader, "arr_%d",
214 decl->Array.ArrayID));
215
216 for (i = 0; i < array_size; i++) {
217 /* point all the matching slots to the same var,
218 * with appropriate offset set, mostly just so
219 * we know what to do when tgsi does a non-indirect
220 * access
221 */
222 c->temp_regs[decl->Range.First + i].reg = NULL;
223 c->temp_regs[decl->Range.First + i].var = var;
224 c->temp_regs[decl->Range.First + i].offset = i;
225 }
226 } else {
227 for (i = 0; i < array_size; i++) {
228 nir_def *reg = nir_decl_reg(b, 4, 32, 0);
229 c->temp_regs[decl->Range.First + i].reg = reg;
230 c->temp_regs[decl->Range.First + i].var = NULL;
231 c->temp_regs[decl->Range.First + i].offset = 0;
232 }
233 }
234 } else if (file == TGSI_FILE_ADDRESS) {
235 c->addr_reg = nir_decl_reg(b, 4, 32, 0);
236 } else if (file == TGSI_FILE_SYSTEM_VALUE) {
237 /* Nothing to record for system values. */
238 } else if (file == TGSI_FILE_BUFFER) {
239 /* Nothing to record for buffers. */
240 } else if (file == TGSI_FILE_IMAGE) {
241 /* Nothing to record for images. */
242 } else if (file == TGSI_FILE_SAMPLER) {
243 /* Nothing to record for samplers. */
244 } else if (file == TGSI_FILE_SAMPLER_VIEW) {
245 struct tgsi_declaration_sampler_view *sview = &decl->SamplerView;
246 nir_alu_type type;
247
248 assert((sview->ReturnTypeX == sview->ReturnTypeY) &&
249 (sview->ReturnTypeX == sview->ReturnTypeZ) &&
250 (sview->ReturnTypeX == sview->ReturnTypeW));
251
252 switch (sview->ReturnTypeX) {
253 case TGSI_RETURN_TYPE_SINT:
254 type = nir_type_int32;
255 break;
256 case TGSI_RETURN_TYPE_UINT:
257 type = nir_type_uint32;
258 break;
259 case TGSI_RETURN_TYPE_FLOAT:
260 default:
261 type = nir_type_float32;
262 break;
263 }
264
265 for (i = 0; i < array_size; i++) {
266 c->samp_types[decl->Range.First + i] = type;
267 }
268 } else {
269 bool is_array = (array_size > 1);
270
271 assert(file == TGSI_FILE_INPUT ||
272 file == TGSI_FILE_OUTPUT ||
273 file == TGSI_FILE_CONSTANT);
274
275 /* nothing to do for UBOs: */
276 if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension &&
277 decl->Dim.Index2D != 0) {
278 b->shader->info.num_ubos =
279 MAX2(b->shader->info.num_ubos, decl->Dim.Index2D);
280 c->ubo_sizes[decl->Dim.Index2D] =
281 MAX2(c->ubo_sizes[decl->Dim.Index2D], decl->Range.Last * 16);
282 return;
283 }
284
285 if ((file == TGSI_FILE_INPUT) || (file == TGSI_FILE_OUTPUT)) {
286 is_array = (is_array && decl->Declaration.Array &&
287 (decl->Array.ArrayID != 0));
288 }
289
290 for (i = 0; i < array_size; i++) {
291 unsigned idx = decl->Range.First + i;
292 nir_variable *var = rzalloc(b->shader, nir_variable);
293
294 var->data.driver_location = idx;
295
296 var->type = glsl_vec4_type();
297 if (is_array)
298 var->type = glsl_array_type(var->type, array_size, 0);
299
300 switch (file) {
301 case TGSI_FILE_INPUT:
302 var->data.read_only = true;
303 var->data.mode = nir_var_shader_in;
304 var->name = ralloc_asprintf(var, "in_%d", idx);
305
306 if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
307 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
308 var->type = glsl_bool_type();
309 if (c->cap_face_is_sysval) {
310 var->data.mode = nir_var_system_value;
311 var->data.location = SYSTEM_VALUE_FRONT_FACE;
312 } else {
313 var->data.location = VARYING_SLOT_FACE;
314 }
315 c->input_var_face = var;
316 } else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
317 if (c->cap_position_is_sysval) {
318 var->data.mode = nir_var_system_value;
319 var->data.location = SYSTEM_VALUE_FRAG_COORD;
320 } else {
321 var->data.location = VARYING_SLOT_POS;
322 }
323 c->input_var_position = var;
324 } else if (decl->Semantic.Name == TGSI_SEMANTIC_PCOORD) {
325 if (c->cap_point_is_sysval) {
326 var->data.mode = nir_var_system_value;
327 var->data.location = SYSTEM_VALUE_POINT_COORD;
328 } else {
329 var->data.location = VARYING_SLOT_PNTC;
330 }
331 c->input_var_point = var;
332 } else {
333 var->data.location =
334 tgsi_varying_semantic_to_slot(decl->Semantic.Name,
335 decl->Semantic.Index);
336 }
337 } else {
338 assert(!decl->Declaration.Semantic);
339 var->data.location = VERT_ATTRIB_GENERIC0 + idx;
340 }
341 var->data.index = 0;
342 var->data.interpolation =
343 ttn_translate_interp_mode(decl->Interp.Interpolate);
344
345 c->inputs[idx] = var;
346
347 for (int i = 0; i < array_size; i++)
348 b->shader->info.inputs_read |= 1ull << (var->data.location + i);
349
350 break;
351 case TGSI_FILE_OUTPUT: {
352 int semantic_name = decl->Semantic.Name;
353 int semantic_index = decl->Semantic.Index;
354 /* Since we can't load from outputs in the IR, we make temporaries
355 * for the outputs and emit stores to the real outputs at the end of
356 * the shader.
357 */
358 nir_def *reg = nir_decl_reg(b, 4, 32,
359 is_array ? array_size : 0);
360
361 var->data.mode = nir_var_shader_out;
362 var->name = ralloc_asprintf(var, "out_%d", idx);
363 var->data.index = 0;
364 var->data.interpolation =
365 ttn_translate_interp_mode(decl->Interp.Interpolate);
366 var->data.patch = semantic_name == TGSI_SEMANTIC_TESSINNER ||
367 semantic_name == TGSI_SEMANTIC_TESSOUTER ||
368 semantic_name == TGSI_SEMANTIC_PATCH;
369
370 if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
371 switch (semantic_name) {
372 case TGSI_SEMANTIC_COLOR: {
373 /* TODO tgsi loses some information, so we cannot
374 * actually differentiate here between DSB and MRT
375 * at this point. But so far no drivers using tgsi-
376 * to-nir support dual source blend:
377 */
378 bool dual_src_blend = false;
379 if (dual_src_blend && (semantic_index == 1)) {
380 var->data.location = FRAG_RESULT_DATA0;
381 var->data.index = 1;
382 } else {
383 if (c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
384 var->data.location = FRAG_RESULT_COLOR;
385 else
386 var->data.location = FRAG_RESULT_DATA0 + semantic_index;
387 }
388 switch (decl->Declaration.ValueType) {
389 case TGSI_RETURN_TYPE_SINT:
390 var->type = glsl_ivec4_type();
391 break;
392 case TGSI_RETURN_TYPE_UINT:
393 var->type = glsl_uvec4_type();
394 break;
395 }
396 break;
397 }
398 case TGSI_SEMANTIC_POSITION:
399 var->data.location = FRAG_RESULT_DEPTH;
400 var->type = glsl_float_type();
401 break;
402 case TGSI_SEMANTIC_STENCIL:
403 var->data.location = FRAG_RESULT_STENCIL;
404 var->type = glsl_int_type();
405 break;
406 case TGSI_SEMANTIC_SAMPLEMASK:
407 var->data.location = FRAG_RESULT_SAMPLE_MASK;
408 var->type = glsl_int_type();
409 break;
410
411 default:
412 fprintf(stderr, "Bad TGSI semantic: %d/%d\n",
413 decl->Semantic.Name, decl->Semantic.Index);
414 abort();
415 }
416 } else {
417 var->data.location =
418 tgsi_varying_semantic_to_slot(semantic_name, semantic_index);
419 if (var->data.location == VARYING_SLOT_FOGC ||
420 var->data.location == VARYING_SLOT_PSIZ) {
421 var->type = glsl_float_type();
422 } else if (var->data.location == VARYING_SLOT_LAYER) {
423 var->type = glsl_int_type();
424 } else if (b->shader->options->compact_arrays &&
425 var->data.location == VARYING_SLOT_CLIP_DIST0) {
426 var->type = glsl_array_type(glsl_float_type(),
427 b->shader->info.clip_distance_array_size,
428 sizeof(float));
429 c->clipdist = var;
430 }
431 }
432
433 if (is_array) {
434 unsigned j;
435 for (j = 0; j < array_size; j++) {
436 c->output_regs[idx + j].offset = i + j;
437 c->output_regs[idx + j].reg = reg;
438 }
439 } else {
440 c->output_regs[idx].offset = i;
441 c->output_regs[idx].reg = reg;
442 }
443
444 c->outputs[idx] = var;
445
446 if (b->shader->options->compact_arrays && var->data.location == VARYING_SLOT_CLIP_DIST1) {
447 /* ignore this entirely */
448 continue;
449 }
450
451 for (int i = 0; i < array_size; i++)
452 b->shader->info.outputs_written |= 1ull << (var->data.location + i);
453 }
454 break;
455 case TGSI_FILE_CONSTANT:
456 var->data.mode = nir_var_uniform;
457 var->name = ralloc_asprintf(var, "uniform_%d", idx);
458 var->data.location = idx;
459 break;
460 default:
461 unreachable("bad declaration file");
462 return;
463 }
464
465 nir_shader_add_variable(b->shader, var);
466
467 if (is_array)
468 break;
469 }
470
471 }
472 }
473
474 static void
ttn_emit_immediate(struct ttn_compile * c)475 ttn_emit_immediate(struct ttn_compile *c)
476 {
477 nir_builder *b = &c->build;
478 struct tgsi_full_immediate *tgsi_imm = &c->token->FullImmediate;
479 nir_load_const_instr *load_const;
480 int i;
481
482 load_const = nir_load_const_instr_create(b->shader, 4, 32);
483 c->imm_defs[c->next_imm] = &load_const->def;
484 c->next_imm++;
485
486 for (i = 0; i < load_const->def.num_components; i++)
487 load_const->value[i].u32 = tgsi_imm->u[i].Uint;
488
489 nir_builder_instr_insert(b, &load_const->instr);
490 }
491
492 static nir_def *
493 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect);
494
495 /* generate either a constant or indirect deref chain for accessing an
496 * array variable.
497 */
498 static nir_deref_instr *
ttn_array_deref(struct ttn_compile * c,nir_variable * var,unsigned offset,struct tgsi_ind_register * indirect)499 ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset,
500 struct tgsi_ind_register *indirect)
501 {
502 nir_deref_instr *deref = nir_build_deref_var(&c->build, var);
503 nir_def *index = nir_imm_int(&c->build, offset);
504 if (indirect)
505 index = nir_iadd(&c->build, index, ttn_src_for_indirect(c, indirect));
506 return nir_build_deref_array(&c->build, deref, index);
507 }
508
509 /* Special case: Turn the frontface varying into a load of the
510 * frontface variable, and create the vector as required by TGSI.
511 */
512 static nir_def *
ttn_emulate_tgsi_front_face(struct ttn_compile * c)513 ttn_emulate_tgsi_front_face(struct ttn_compile *c)
514 {
515 nir_def *tgsi_frontface[4];
516
517 if (c->cap_face_is_sysval) {
518 /* When it's a system value, it should be an integer vector: (F, 0, 0, 1)
519 * F is 0xffffffff if front-facing, 0 if not.
520 */
521
522 nir_def *frontface = nir_load_front_face(&c->build, 1);
523
524 tgsi_frontface[0] = nir_bcsel(&c->build,
525 frontface,
526 nir_imm_int(&c->build, 0xffffffff),
527 nir_imm_int(&c->build, 0));
528 tgsi_frontface[1] = nir_imm_int(&c->build, 0);
529 tgsi_frontface[2] = nir_imm_int(&c->build, 0);
530 tgsi_frontface[3] = nir_imm_int(&c->build, 1);
531 } else {
532 /* When it's an input, it should be a float vector: (F, 0.0, 0.0, 1.0)
533 * F is positive if front-facing, negative if not.
534 */
535
536 assert(c->input_var_face);
537 nir_def *frontface = nir_load_var(&c->build, c->input_var_face);
538
539 tgsi_frontface[0] = nir_bcsel(&c->build,
540 frontface,
541 nir_imm_float(&c->build, 1.0),
542 nir_imm_float(&c->build, -1.0));
543 tgsi_frontface[1] = nir_imm_float(&c->build, 0.0);
544 tgsi_frontface[2] = nir_imm_float(&c->build, 0.0);
545 tgsi_frontface[3] = nir_imm_float(&c->build, 1.0);
546 }
547
548 return nir_vec(&c->build, tgsi_frontface, 4);
549 }
550
551 static nir_src
ttn_src_for_file_and_index(struct ttn_compile * c,unsigned file,unsigned index,struct tgsi_ind_register * indirect,struct tgsi_dimension * dim,struct tgsi_ind_register * dimind,bool src_is_float)552 ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
553 struct tgsi_ind_register *indirect,
554 struct tgsi_dimension *dim,
555 struct tgsi_ind_register *dimind,
556 bool src_is_float)
557 {
558 nir_builder *b = &c->build;
559 nir_src src;
560
561 memset(&src, 0, sizeof(src));
562
563 switch (file) {
564 case TGSI_FILE_TEMPORARY:
565 if (c->temp_regs[index].var) {
566 unsigned offset = c->temp_regs[index].offset;
567 nir_variable *var = c->temp_regs[index].var;
568 nir_def *load = nir_load_deref(&c->build,
569 ttn_array_deref(c, var, offset, indirect));
570
571 src = nir_src_for_ssa(load);
572 } else {
573 assert(!indirect);
574 src = nir_src_for_ssa(nir_load_reg(b, c->temp_regs[index].reg));
575 }
576 assert(!dim);
577 break;
578
579 case TGSI_FILE_ADDRESS:
580 src = nir_src_for_ssa(nir_load_reg(b, c->addr_reg));
581 assert(!dim);
582 break;
583
584 case TGSI_FILE_IMMEDIATE:
585 src = nir_src_for_ssa(c->imm_defs[index]);
586 assert(!indirect);
587 assert(!dim);
588 break;
589
590 case TGSI_FILE_SYSTEM_VALUE: {
591 nir_def *load;
592
593 assert(!indirect);
594 assert(!dim);
595
596 switch (c->scan->system_value_semantic_name[index]) {
597 case TGSI_SEMANTIC_VERTEXID_NOBASE:
598 load = nir_load_vertex_id_zero_base(b);
599 break;
600 case TGSI_SEMANTIC_VERTEXID:
601 load = nir_load_vertex_id(b);
602 break;
603 case TGSI_SEMANTIC_BASEVERTEX:
604 load = nir_load_base_vertex(b);
605 break;
606 case TGSI_SEMANTIC_INSTANCEID:
607 load = nir_load_instance_id(b);
608 break;
609 case TGSI_SEMANTIC_FACE:
610 assert(c->cap_face_is_sysval);
611 load = ttn_emulate_tgsi_front_face(c);
612 break;
613 case TGSI_SEMANTIC_POSITION:
614 assert(c->cap_position_is_sysval);
615 load = nir_load_frag_coord(b);
616 break;
617 case TGSI_SEMANTIC_PCOORD:
618 assert(c->cap_point_is_sysval);
619 load = nir_load_point_coord(b);
620 break;
621 case TGSI_SEMANTIC_THREAD_ID:
622 load = nir_load_local_invocation_id(b);
623 break;
624 case TGSI_SEMANTIC_BLOCK_ID:
625 load = nir_load_workgroup_id(b);
626 break;
627 case TGSI_SEMANTIC_BLOCK_SIZE:
628 load = nir_load_workgroup_size(b);
629 break;
630 case TGSI_SEMANTIC_CS_USER_DATA_AMD:
631 load = nir_load_user_data_amd(b);
632 break;
633 case TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL:
634 load = nir_load_tess_level_inner_default(b);
635 break;
636 case TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL:
637 load = nir_load_tess_level_outer_default(b);
638 break;
639 case TGSI_SEMANTIC_SAMPLEID:
640 load = nir_load_sample_id(b);
641 b->shader->info.fs.uses_sample_shading = true;
642 break;
643 default:
644 unreachable("bad system value");
645 }
646
647 if (load->num_components == 2)
648 load = nir_swizzle(b, load, SWIZ(X, Y, Y, Y), 4);
649 else if (load->num_components == 3)
650 load = nir_swizzle(b, load, SWIZ(X, Y, Z, Z), 4);
651
652 src = nir_src_for_ssa(load);
653 break;
654 }
655
656 case TGSI_FILE_INPUT:
657 if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
658 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_FACE) {
659 assert(!c->cap_face_is_sysval && c->input_var_face);
660 return nir_src_for_ssa(ttn_emulate_tgsi_front_face(c));
661 } else if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
662 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_POSITION) {
663 assert(!c->cap_position_is_sysval && c->input_var_position);
664 return nir_src_for_ssa(nir_load_var(&c->build, c->input_var_position));
665 } else if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
666 c->scan->input_semantic_name[index] == TGSI_SEMANTIC_PCOORD) {
667 assert(!c->cap_point_is_sysval && c->input_var_point);
668 return nir_src_for_ssa(nir_load_var(&c->build, c->input_var_point));
669 } else {
670 /* Indirection on input arrays isn't supported by TTN. */
671 assert(!dim);
672 nir_deref_instr *deref = nir_build_deref_var(&c->build,
673 c->inputs[index]);
674 return nir_src_for_ssa(nir_load_deref(&c->build, deref));
675 }
676 break;
677
678 case TGSI_FILE_OUTPUT:
679 if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
680 c->outputs[index]->data.fb_fetch_output = 1;
681 nir_deref_instr *deref = nir_build_deref_var(&c->build,
682 c->outputs[index]);
683 return nir_src_for_ssa(nir_load_deref(&c->build, deref));
684 }
685 unreachable("unsupported output read");
686 break;
687
688 case TGSI_FILE_CONSTANT: {
689 nir_intrinsic_instr *load;
690 nir_intrinsic_op op;
691 unsigned srcn = 0;
692
693 if (dim && (dim->Index > 0 || dim->Indirect)) {
694 op = nir_intrinsic_load_ubo;
695 } else {
696 op = nir_intrinsic_load_uniform;
697 }
698
699 load = nir_intrinsic_instr_create(b->shader, op);
700 if (op == nir_intrinsic_load_uniform) {
701 nir_intrinsic_set_dest_type(load, src_is_float ? nir_type_float :
702 nir_type_int);
703 }
704
705 load->num_components = 4;
706 if (dim && (dim->Index > 0 || dim->Indirect)) {
707 if (dimind) {
708 load->src[srcn] =
709 ttn_src_for_file_and_index(c, dimind->File, dimind->Index,
710 NULL, NULL, NULL, false);
711 } else {
712 /* UBOs start at index 1 in TGSI: */
713 load->src[srcn] =
714 nir_src_for_ssa(nir_imm_int(b, dim->Index - 1));
715 }
716 srcn++;
717 }
718
719 nir_def *offset;
720 if (op == nir_intrinsic_load_ubo) {
721 /* UBO loads don't have a base offset. */
722 offset = nir_imm_int(b, index);
723 if (indirect) {
724 offset = nir_iadd(b, offset, ttn_src_for_indirect(c, indirect));
725 }
726 /* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
727 offset = nir_ishl_imm(b, offset, 4);
728 nir_intrinsic_set_align(load, 16, 0);
729
730 /* Set a very conservative base/range of the access: 16 bytes if not
731 * indirect at all, offset to the end of the UBO if the offset is
732 * indirect, and totally unknown if the block number is indirect.
733 */
734 uint32_t base = index * 16;
735 nir_intrinsic_set_range_base(load, base);
736 if (dimind)
737 nir_intrinsic_set_range(load, ~0);
738 else if (indirect)
739 nir_intrinsic_set_range(load, c->ubo_sizes[dim->Index] - base);
740 else
741 nir_intrinsic_set_range(load, base + 16);
742 } else {
743 nir_intrinsic_set_base(load, index);
744 if (indirect) {
745 offset = ttn_src_for_indirect(c, indirect);
746 nir_intrinsic_set_range(load, c->build.shader->num_uniforms * 16 - index);
747 } else {
748 offset = nir_imm_int(b, 0);
749 nir_intrinsic_set_range(load, 1);
750 }
751 }
752 load->src[srcn++] = nir_src_for_ssa(offset);
753
754 nir_def_init(&load->instr, &load->def, 4, 32);
755 nir_builder_instr_insert(b, &load->instr);
756
757 src = nir_src_for_ssa(&load->def);
758 break;
759 }
760
761 default:
762 unreachable("bad src file");
763 }
764
765
766 return src;
767 }
768
769 static nir_def *
ttn_src_for_indirect(struct ttn_compile * c,struct tgsi_ind_register * indirect)770 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect)
771 {
772 nir_builder *b = &c->build;
773 nir_alu_src src;
774 memset(&src, 0, sizeof(src));
775 for (int i = 0; i < 4; i++)
776 src.swizzle[i] = indirect->Swizzle;
777 src.src = ttn_src_for_file_and_index(c,
778 indirect->File,
779 indirect->Index,
780 NULL, NULL, NULL,
781 false);
782 return nir_mov_alu(b, src, 1);
783 }
784
785 static nir_variable *
ttn_get_var(struct ttn_compile * c,struct tgsi_full_dst_register * tgsi_fdst)786 ttn_get_var(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
787 {
788 struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
789 unsigned index = tgsi_dst->Index;
790
791 if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
792 /* we should not have an indirect when there is no var! */
793 if (!c->temp_regs[index].var)
794 assert(!tgsi_dst->Indirect);
795 return c->temp_regs[index].var;
796 }
797
798 return NULL;
799 }
800
801 static nir_def *
ttn_get_src(struct ttn_compile * c,struct tgsi_full_src_register * tgsi_fsrc,int src_idx)802 ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc,
803 int src_idx)
804 {
805 nir_builder *b = &c->build;
806 struct tgsi_src_register *tgsi_src = &tgsi_fsrc->Register;
807 enum tgsi_opcode opcode = c->token->FullInstruction.Instruction.Opcode;
808 unsigned tgsi_src_type = tgsi_opcode_infer_src_type(opcode, src_idx);
809 bool src_is_float = (tgsi_src_type == TGSI_TYPE_FLOAT ||
810 tgsi_src_type == TGSI_TYPE_DOUBLE ||
811 tgsi_src_type == TGSI_TYPE_UNTYPED);
812 nir_alu_src src;
813
814 memset(&src, 0, sizeof(src));
815
816 if (tgsi_src->File == TGSI_FILE_NULL) {
817 return nir_imm_float(b, 0.0);
818 } else if (tgsi_src->File == TGSI_FILE_SAMPLER ||
819 tgsi_src->File == TGSI_FILE_IMAGE ||
820 tgsi_src->File == TGSI_FILE_BUFFER) {
821 /* Only the index of the resource gets used in texturing, and it will
822 * handle looking that up on its own instead of using the nir_alu_src.
823 */
824 assert(!tgsi_src->Indirect);
825 return NULL;
826 } else {
827 struct tgsi_ind_register *ind = NULL;
828 struct tgsi_dimension *dim = NULL;
829 struct tgsi_ind_register *dimind = NULL;
830 if (tgsi_src->Indirect)
831 ind = &tgsi_fsrc->Indirect;
832 if (tgsi_src->Dimension) {
833 dim = &tgsi_fsrc->Dimension;
834 if (dim->Indirect)
835 dimind = &tgsi_fsrc->DimIndirect;
836 }
837 src.src = ttn_src_for_file_and_index(c,
838 tgsi_src->File,
839 tgsi_src->Index,
840 ind, dim, dimind,
841 src_is_float);
842 }
843
844 src.swizzle[0] = tgsi_src->SwizzleX;
845 src.swizzle[1] = tgsi_src->SwizzleY;
846 src.swizzle[2] = tgsi_src->SwizzleZ;
847 src.swizzle[3] = tgsi_src->SwizzleW;
848
849 nir_def *def = nir_mov_alu(b, src, 4);
850
851 if (tgsi_type_is_64bit(tgsi_src_type))
852 def = nir_bitcast_vector(b, def, 64);
853
854 if (tgsi_src->Absolute) {
855 assert(src_is_float);
856 def = nir_fabs(b, def);
857 }
858
859 if (tgsi_src->Negate) {
860 if (src_is_float)
861 def = nir_fneg(b, def);
862 else
863 def = nir_ineg(b, def);
864 }
865
866 return def;
867 }
868
869 static nir_def *
ttn_alu(nir_builder * b,nir_op op,unsigned dest_bitsize,nir_def ** src)870 ttn_alu(nir_builder *b, nir_op op, unsigned dest_bitsize, nir_def **src)
871 {
872 nir_def *def = nir_build_alu_src_arr(b, op, src);
873 if (def->bit_size == 1)
874 def = nir_ineg(b, nir_b2iN(b, def, dest_bitsize));
875 assert(def->bit_size == dest_bitsize);
876 if (dest_bitsize == 64) {
877 /* Replicate before bitcasting, so we end up with 4x32 at the end */
878 if (def->num_components == 1)
879 def = nir_replicate(b, def, 2);
880
881 if (def->num_components > 2) {
882 /* 32 -> 64 bit conversion ops are supposed to only convert the first
883 * two components, and we need to truncate here to avoid creating a
884 * vec8 after bitcasting the destination.
885 */
886 def = nir_trim_vector(b, def, 2);
887 }
888 def = nir_bitcast_vector(b, def, 32);
889 }
890 return def;
891 }
892
893 /* EXP - Approximate Exponential Base 2
894 * dst.x = 2^{\lfloor src.x\rfloor}
895 * dst.y = src.x - \lfloor src.x\rfloor
896 * dst.z = 2^{src.x}
897 * dst.w = 1.0
898 */
899 static nir_def *
ttn_exp(nir_builder * b,nir_def ** src)900 ttn_exp(nir_builder *b, nir_def **src)
901 {
902 nir_def *srcx = ttn_channel(b, src[0], X);
903
904 return nir_vec4(b, nir_fexp2(b, nir_ffloor(b, srcx)),
905 nir_fsub(b, srcx, nir_ffloor(b, srcx)),
906 nir_fexp2(b, srcx),
907 nir_imm_float(b, 1.0));
908 }
909
910 /* LOG - Approximate Logarithm Base 2
911 * dst.x = \lfloor\log_2{|src.x|}\rfloor
912 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
913 * dst.z = \log_2{|src.x|}
914 * dst.w = 1.0
915 */
916 static nir_def *
ttn_log(nir_builder * b,nir_def ** src)917 ttn_log(nir_builder *b, nir_def **src)
918 {
919 nir_def *abs_srcx = nir_fabs(b, ttn_channel(b, src[0], X));
920 nir_def *log2 = nir_flog2(b, abs_srcx);
921
922 return nir_vec4(b, nir_ffloor(b, log2),
923 nir_fdiv(b, abs_srcx, nir_fexp2(b, nir_ffloor(b, log2))),
924 nir_flog2(b, abs_srcx),
925 nir_imm_float(b, 1.0));
926 }
927
928 /* DST - Distance Vector
929 * dst.x = 1.0
930 * dst.y = src0.y \times src1.y
931 * dst.z = src0.z
932 * dst.w = src1.w
933 */
934 static nir_def *
ttn_dst(nir_builder * b,nir_def ** src)935 ttn_dst(nir_builder *b, nir_def **src)
936 {
937 return nir_vec4(b, nir_imm_float(b, 1.0),
938 nir_fmul(b, ttn_channel(b, src[0], Y),
939 ttn_channel(b, src[1], Y)),
940 ttn_channel(b, src[0], Z),
941 ttn_channel(b, src[1], W));
942 }
943
944 /* LIT - Light Coefficients
945 * dst.x = 1.0
946 * dst.y = max(src.x, 0.0)
947 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
948 * dst.w = 1.0
949 */
950 static nir_def *
ttn_lit(nir_builder * b,nir_def ** src)951 ttn_lit(nir_builder *b, nir_def **src)
952 {
953 nir_def *src0_y = ttn_channel(b, src[0], Y);
954 nir_def *wclamp = nir_fmax(b, nir_fmin(b, ttn_channel(b, src[0], W),
955 nir_imm_float(b, 128.0)),
956 nir_imm_float(b, -128.0));
957 nir_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
958 wclamp);
959 nir_def *z = nir_bcsel(b, nir_flt_imm(b, ttn_channel(b, src[0], X), 0.0),
960 nir_imm_float(b, 0.0), pow);
961
962 return nir_vec4(b, nir_imm_float(b, 1.0),
963 nir_fmax(b, ttn_channel(b, src[0], X),
964 nir_imm_float(b, 0.0)),
965 z, nir_imm_float(b, 1.0));
966 }
967
968 static void
ttn_barrier(nir_builder * b)969 ttn_barrier(nir_builder *b)
970 {
971 nir_barrier(b, .execution_scope = SCOPE_WORKGROUP);
972 }
973
974 static void
ttn_kill(nir_builder * b)975 ttn_kill(nir_builder *b)
976 {
977 nir_discard(b);
978 b->shader->info.fs.uses_discard = true;
979 }
980
981 static void
ttn_kill_if(nir_builder * b,nir_def ** src)982 ttn_kill_if(nir_builder *b, nir_def **src)
983 {
984 /* flt must be exact, because NaN shouldn't discard. (apps rely on this) */
985 b->exact = true;
986 nir_def *cmp = nir_bany(b, nir_flt_imm(b, src[0], 0.0));
987 b->exact = false;
988
989 nir_discard_if(b, cmp);
990 b->shader->info.fs.uses_discard = true;
991 }
992
993 static void
get_texture_info(unsigned texture,enum glsl_sampler_dim * dim,bool * is_shadow,bool * is_array)994 get_texture_info(unsigned texture,
995 enum glsl_sampler_dim *dim,
996 bool *is_shadow,
997 bool *is_array)
998 {
999 assert(is_array);
1000 *is_array = false;
1001
1002 if (is_shadow)
1003 *is_shadow = false;
1004
1005 switch (texture) {
1006 case TGSI_TEXTURE_BUFFER:
1007 *dim = GLSL_SAMPLER_DIM_BUF;
1008 break;
1009 case TGSI_TEXTURE_1D:
1010 *dim = GLSL_SAMPLER_DIM_1D;
1011 break;
1012 case TGSI_TEXTURE_1D_ARRAY:
1013 *dim = GLSL_SAMPLER_DIM_1D;
1014 *is_array = true;
1015 break;
1016 case TGSI_TEXTURE_SHADOW1D:
1017 *dim = GLSL_SAMPLER_DIM_1D;
1018 *is_shadow = true;
1019 break;
1020 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1021 *dim = GLSL_SAMPLER_DIM_1D;
1022 *is_shadow = true;
1023 *is_array = true;
1024 break;
1025 case TGSI_TEXTURE_2D:
1026 *dim = GLSL_SAMPLER_DIM_2D;
1027 break;
1028 case TGSI_TEXTURE_2D_ARRAY:
1029 *dim = GLSL_SAMPLER_DIM_2D;
1030 *is_array = true;
1031 break;
1032 case TGSI_TEXTURE_2D_MSAA:
1033 *dim = GLSL_SAMPLER_DIM_MS;
1034 break;
1035 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1036 *dim = GLSL_SAMPLER_DIM_MS;
1037 *is_array = true;
1038 break;
1039 case TGSI_TEXTURE_SHADOW2D:
1040 *dim = GLSL_SAMPLER_DIM_2D;
1041 *is_shadow = true;
1042 break;
1043 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1044 *dim = GLSL_SAMPLER_DIM_2D;
1045 *is_shadow = true;
1046 *is_array = true;
1047 break;
1048 case TGSI_TEXTURE_3D:
1049 *dim = GLSL_SAMPLER_DIM_3D;
1050 break;
1051 case TGSI_TEXTURE_CUBE:
1052 *dim = GLSL_SAMPLER_DIM_CUBE;
1053 break;
1054 case TGSI_TEXTURE_CUBE_ARRAY:
1055 *dim = GLSL_SAMPLER_DIM_CUBE;
1056 *is_array = true;
1057 break;
1058 case TGSI_TEXTURE_SHADOWCUBE:
1059 *dim = GLSL_SAMPLER_DIM_CUBE;
1060 *is_shadow = true;
1061 break;
1062 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1063 *dim = GLSL_SAMPLER_DIM_CUBE;
1064 *is_shadow = true;
1065 *is_array = true;
1066 break;
1067 case TGSI_TEXTURE_RECT:
1068 *dim = GLSL_SAMPLER_DIM_RECT;
1069 break;
1070 case TGSI_TEXTURE_SHADOWRECT:
1071 *dim = GLSL_SAMPLER_DIM_RECT;
1072 *is_shadow = true;
1073 break;
1074 default:
1075 fprintf(stderr, "Unknown TGSI texture target %d\n", texture);
1076 abort();
1077 }
1078 }
1079
1080 static enum glsl_base_type
base_type_for_alu_type(nir_alu_type type)1081 base_type_for_alu_type(nir_alu_type type)
1082 {
1083 type = nir_alu_type_get_base_type(type);
1084
1085 switch (type) {
1086 case nir_type_float:
1087 return GLSL_TYPE_FLOAT;
1088 case nir_type_int:
1089 return GLSL_TYPE_INT;
1090 case nir_type_uint:
1091 return GLSL_TYPE_UINT;
1092 default:
1093 unreachable("invalid type");
1094 }
1095 }
1096
1097 static nir_variable *
get_sampler_var(struct ttn_compile * c,int binding,enum glsl_sampler_dim dim,bool is_shadow,bool is_array,enum glsl_base_type base_type,nir_texop op)1098 get_sampler_var(struct ttn_compile *c, int binding,
1099 enum glsl_sampler_dim dim,
1100 bool is_shadow,
1101 bool is_array,
1102 enum glsl_base_type base_type,
1103 nir_texop op)
1104 {
1105 nir_variable *var = c->samplers[binding];
1106 if (!var) {
1107 const struct glsl_type *type =
1108 glsl_sampler_type(dim, is_shadow, is_array, base_type);
1109 var = nir_variable_create(c->build.shader, nir_var_uniform, type,
1110 "sampler");
1111 var->data.binding = binding;
1112 var->data.explicit_binding = true;
1113
1114 c->samplers[binding] = var;
1115 c->num_samplers = MAX2(c->num_samplers, binding + 1);
1116
1117 /* Record textures used */
1118 BITSET_SET(c->build.shader->info.textures_used, binding);
1119 if (op == nir_texop_txf || op == nir_texop_txf_ms)
1120 BITSET_SET(c->build.shader->info.textures_used_by_txf, binding);
1121 BITSET_SET(c->build.shader->info.samplers_used, binding);
1122 }
1123
1124 return var;
1125 }
1126
1127 static nir_variable *
get_image_var(struct ttn_compile * c,int binding,enum glsl_sampler_dim dim,bool is_array,enum glsl_base_type base_type,enum gl_access_qualifier access,enum pipe_format format)1128 get_image_var(struct ttn_compile *c, int binding,
1129 enum glsl_sampler_dim dim,
1130 bool is_array,
1131 enum glsl_base_type base_type,
1132 enum gl_access_qualifier access,
1133 enum pipe_format format)
1134 {
1135 nir_variable *var = c->images[binding];
1136
1137 if (!var) {
1138 const struct glsl_type *type = glsl_image_type(dim, is_array, base_type);
1139
1140 var = nir_variable_create(c->build.shader, nir_var_image, type, "image");
1141 var->data.binding = binding;
1142 var->data.explicit_binding = true;
1143 var->data.access = access;
1144 var->data.image.format = format;
1145
1146 c->images[binding] = var;
1147 c->num_images = MAX2(c->num_images, binding + 1);
1148 if (dim == GLSL_SAMPLER_DIM_MS)
1149 c->num_msaa_images = c->num_images;
1150 }
1151
1152 return var;
1153 }
1154
1155 static void
add_ssbo_var(struct ttn_compile * c,int binding)1156 add_ssbo_var(struct ttn_compile *c, int binding)
1157 {
1158 nir_variable *var = c->ssbo[binding];
1159
1160 if (!var) {
1161 /* A length of 0 is used to denote unsized arrays */
1162 const struct glsl_type *type = glsl_array_type(glsl_uint_type(), 0, 0);
1163
1164 struct glsl_struct_field field = {
1165 .type = type,
1166 .name = "data",
1167 .location = -1,
1168 };
1169
1170 var = nir_variable_create(c->build.shader, nir_var_mem_ssbo, type, "ssbo");
1171 var->data.binding = binding;
1172 var->interface_type =
1173 glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430,
1174 false, "data");
1175 c->ssbo[binding] = var;
1176 }
1177 }
1178
1179 static nir_def *
ttn_tex(struct ttn_compile * c,nir_def ** src)1180 ttn_tex(struct ttn_compile *c, nir_def **src)
1181 {
1182 nir_builder *b = &c->build;
1183 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1184 nir_tex_instr *instr;
1185 nir_texop op;
1186 unsigned num_srcs, samp = 1, sview, i;
1187
1188 switch (tgsi_inst->Instruction.Opcode) {
1189 case TGSI_OPCODE_TEX:
1190 op = nir_texop_tex;
1191 num_srcs = 1;
1192 break;
1193 case TGSI_OPCODE_TEX2:
1194 op = nir_texop_tex;
1195 num_srcs = 1;
1196 samp = 2;
1197 break;
1198 case TGSI_OPCODE_TXP:
1199 op = nir_texop_tex;
1200 num_srcs = 2;
1201 break;
1202 case TGSI_OPCODE_TXB:
1203 op = nir_texop_txb;
1204 num_srcs = 2;
1205 break;
1206 case TGSI_OPCODE_TXB2:
1207 op = nir_texop_txb;
1208 num_srcs = 2;
1209 samp = 2;
1210 break;
1211 case TGSI_OPCODE_TXL:
1212 case TGSI_OPCODE_TEX_LZ:
1213 op = nir_texop_txl;
1214 num_srcs = 2;
1215 break;
1216 case TGSI_OPCODE_TXL2:
1217 op = nir_texop_txl;
1218 num_srcs = 2;
1219 samp = 2;
1220 break;
1221 case TGSI_OPCODE_TXF:
1222 case TGSI_OPCODE_TXF_LZ:
1223 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
1224 tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) {
1225 op = nir_texop_txf_ms;
1226 } else {
1227 op = nir_texop_txf;
1228 }
1229 num_srcs = 2;
1230 break;
1231 case TGSI_OPCODE_TXD:
1232 op = nir_texop_txd;
1233 num_srcs = 3;
1234 samp = 3;
1235 break;
1236 case TGSI_OPCODE_LODQ:
1237 op = nir_texop_lod;
1238 num_srcs = 1;
1239 break;
1240 case TGSI_OPCODE_TG4:
1241 /* TODO: Shadow cube samplers unsupported. */
1242 assert(tgsi_inst->Texture.Texture != TGSI_TEXTURE_SHADOWCUBE_ARRAY);
1243 op = nir_texop_tg4;
1244 num_srcs = 1;
1245 samp = 2;
1246 break;
1247
1248 default:
1249 fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode);
1250 abort();
1251 }
1252
1253 if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
1254 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
1255 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
1256 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
1257 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
1258 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
1259 tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1260 num_srcs++;
1261 }
1262
1263 /* Deref sources */
1264 num_srcs += 2;
1265
1266 num_srcs += tgsi_inst->Texture.NumOffsets;
1267
1268 instr = nir_tex_instr_create(b->shader, num_srcs);
1269 instr->op = op;
1270
1271 get_texture_info(tgsi_inst->Texture.Texture,
1272 &instr->sampler_dim, &instr->is_shadow, &instr->is_array);
1273
1274 instr->coord_components =
1275 glsl_get_sampler_dim_coordinate_components(instr->sampler_dim);
1276
1277 if (instr->is_array)
1278 instr->coord_components++;
1279
1280 assert(tgsi_inst->Src[samp].Register.File == TGSI_FILE_SAMPLER);
1281
1282 /* TODO if we supported any opc's which take an explicit SVIEW
1283 * src, we would use that here instead. But for the "legacy"
1284 * texture opc's the SVIEW index is same as SAMP index:
1285 */
1286 sview = tgsi_inst->Src[samp].Register.Index;
1287
1288 nir_alu_type sampler_type =
1289 sview < c->num_samp_types ? c->samp_types[sview] : nir_type_float32;
1290
1291 if (op == nir_texop_lod) {
1292 instr->dest_type = nir_type_float32;
1293 } else {
1294 instr->dest_type = sampler_type;
1295 }
1296
1297 nir_variable *var =
1298 get_sampler_var(c, sview, instr->sampler_dim,
1299 instr->is_shadow,
1300 instr->is_array,
1301 base_type_for_alu_type(sampler_type),
1302 op);
1303
1304 nir_deref_instr *deref = nir_build_deref_var(b, var);
1305
1306 unsigned src_number = 0;
1307
1308 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
1309 &deref->def);
1310 src_number++;
1311 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref,
1312 &deref->def);
1313 src_number++;
1314
1315 instr->src[src_number] =
1316 nir_tex_src_for_ssa(nir_tex_src_coord,
1317 nir_trim_vector(b, src[0], instr->coord_components));
1318 src_number++;
1319
1320 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1321 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_projector,
1322 ttn_channel(b, src[0], W));
1323 src_number++;
1324 }
1325
1326 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB) {
1327 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_bias,
1328 ttn_channel(b, src[0], W));
1329 src_number++;
1330 }
1331
1332 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
1333 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_bias,
1334 ttn_channel(b, src[1], X));
1335 src_number++;
1336 }
1337
1338 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
1339 tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
1340 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ)
1341 instr->src[src_number].src = nir_src_for_ssa(nir_imm_int(b, 0));
1342 else
1343 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1344 instr->src[src_number].src_type = nir_tex_src_lod;
1345 src_number++;
1346 }
1347
1348 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
1349 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_lod,
1350 ttn_channel(b, src[1], X));
1351 src_number++;
1352 }
1353
1354 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF ||
1355 tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF_LZ) {
1356 if (op == nir_texop_txf_ms) {
1357 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
1358 ttn_channel(b, src[0], W));
1359 } else {
1360 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF_LZ)
1361 instr->src[src_number].src = nir_src_for_ssa(nir_imm_int(b, 0));
1362 else
1363 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1364 instr->src[src_number].src_type = nir_tex_src_lod;
1365 }
1366 src_number++;
1367 }
1368
1369 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1370 instr->src[src_number] =
1371 nir_tex_src_for_ssa(nir_tex_src_ddx,
1372 nir_trim_vector(b, src[1], nir_tex_instr_src_size(instr, src_number)));
1373 src_number++;
1374 instr->src[src_number] =
1375 nir_tex_src_for_ssa(nir_tex_src_ddy,
1376 nir_trim_vector(b, src[2], nir_tex_instr_src_size(instr, src_number)));
1377 src_number++;
1378 }
1379
1380 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TG4) {
1381 if (c->cap_tg4_component_in_swizzle)
1382 instr->component = tgsi_inst->Src[samp].Register.SwizzleX;
1383 else
1384 instr->component = nir_scalar_as_uint(nir_scalar_resolved(src[1], 0));
1385 }
1386
1387 if (instr->is_shadow) {
1388 if (instr->coord_components == 4)
1389 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1390 else if (instr->coord_components == 3)
1391 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1392 else
1393 instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
1394
1395 instr->src[src_number].src_type = nir_tex_src_comparator;
1396 src_number++;
1397 }
1398
1399 for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++) {
1400 struct tgsi_texture_offset *tex_offset = &tgsi_inst->TexOffsets[i];
1401 /* since TexOffset ins't using tgsi_full_src_register we get to
1402 * do some extra gymnastics:
1403 */
1404 nir_alu_src src;
1405
1406 memset(&src, 0, sizeof(src));
1407
1408 src.src = ttn_src_for_file_and_index(c,
1409 tex_offset->File,
1410 tex_offset->Index,
1411 NULL, NULL, NULL,
1412 true);
1413
1414 src.swizzle[0] = tex_offset->SwizzleX;
1415 src.swizzle[1] = tex_offset->SwizzleY;
1416 src.swizzle[2] = tex_offset->SwizzleZ;
1417 src.swizzle[3] = TGSI_SWIZZLE_W;
1418
1419 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_offset,
1420 nir_mov_alu(b, src, nir_tex_instr_src_size(instr, src_number)));
1421 src_number++;
1422 }
1423
1424 assert(src_number == num_srcs);
1425 assert(src_number == instr->num_srcs);
1426
1427 nir_def_init(&instr->instr, &instr->def,
1428 nir_tex_instr_dest_size(instr), 32);
1429 nir_builder_instr_insert(b, &instr->instr);
1430 return nir_pad_vector_imm_int(b, &instr->def, 0, 4);
1431 }
1432
1433 /* TGSI_OPCODE_TXQ is actually two distinct operations:
1434 *
1435 * dst.x = texture\_width(unit, lod)
1436 * dst.y = texture\_height(unit, lod)
1437 * dst.z = texture\_depth(unit, lod)
1438 * dst.w = texture\_levels(unit)
1439 *
1440 * dst.xyz map to NIR txs opcode, and dst.w maps to query_levels
1441 */
1442 static nir_def *
ttn_txq(struct ttn_compile * c,nir_def ** src)1443 ttn_txq(struct ttn_compile *c, nir_def **src)
1444 {
1445 nir_builder *b = &c->build;
1446 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1447 nir_tex_instr *txs, *qlv;
1448
1449 txs = nir_tex_instr_create(b->shader, 2);
1450 txs->op = nir_texop_txs;
1451 txs->dest_type = nir_type_uint32;
1452 get_texture_info(tgsi_inst->Texture.Texture,
1453 &txs->sampler_dim, &txs->is_shadow, &txs->is_array);
1454
1455 qlv = nir_tex_instr_create(b->shader, 1);
1456 qlv->op = nir_texop_query_levels;
1457 qlv->dest_type = nir_type_uint32;
1458 get_texture_info(tgsi_inst->Texture.Texture,
1459 &qlv->sampler_dim, &qlv->is_shadow, &qlv->is_array);
1460
1461 assert(tgsi_inst->Src[1].Register.File == TGSI_FILE_SAMPLER);
1462 int sview = tgsi_inst->Src[1].Register.Index;
1463
1464 nir_alu_type sampler_type =
1465 sview < c->num_samp_types ? c->samp_types[sview] : nir_type_float32;
1466
1467 nir_variable *var =
1468 get_sampler_var(c, sview, txs->sampler_dim,
1469 txs->is_shadow,
1470 txs->is_array,
1471 base_type_for_alu_type(sampler_type),
1472 nir_texop_txs);
1473
1474 nir_deref_instr *deref = nir_build_deref_var(b, var);
1475
1476 txs->src[0] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
1477 &deref->def);
1478
1479 qlv->src[0] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
1480 &deref->def);
1481
1482 /* lod: */
1483 txs->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod,
1484 ttn_channel(b, src[0], X));
1485
1486 nir_def_init(&txs->instr, &txs->def, nir_tex_instr_dest_size(txs), 32);
1487 nir_builder_instr_insert(b, &txs->instr);
1488
1489 nir_def_init(&qlv->instr, &qlv->def, 1, 32);
1490 nir_builder_instr_insert(b, &qlv->instr);
1491
1492 return nir_vector_insert_imm(b,
1493 nir_pad_vector_imm_int(b, &txs->def, 0, 4),
1494 &qlv->def, 3);
1495 }
1496
1497 static enum glsl_base_type
get_image_base_type(struct tgsi_full_instruction * tgsi_inst)1498 get_image_base_type(struct tgsi_full_instruction *tgsi_inst)
1499 {
1500 const struct util_format_description *desc =
1501 util_format_description(tgsi_inst->Memory.Format);
1502
1503 if (desc->channel[0].pure_integer) {
1504 if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
1505 return GLSL_TYPE_INT;
1506 else
1507 return GLSL_TYPE_UINT;
1508 }
1509 return GLSL_TYPE_FLOAT;
1510 }
1511
1512 static enum gl_access_qualifier
get_mem_qualifier(struct tgsi_full_instruction * tgsi_inst)1513 get_mem_qualifier(struct tgsi_full_instruction *tgsi_inst)
1514 {
1515 enum gl_access_qualifier access = 0;
1516
1517 if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_COHERENT)
1518 access |= ACCESS_COHERENT;
1519 if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT)
1520 access |= ACCESS_RESTRICT;
1521 if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
1522 access |= ACCESS_VOLATILE;
1523 if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY)
1524 access |= ACCESS_NON_TEMPORAL;
1525
1526 return access;
1527 }
1528
1529 static nir_def *
ttn_mem(struct ttn_compile * c,nir_def ** src)1530 ttn_mem(struct ttn_compile *c, nir_def **src)
1531 {
1532 nir_builder *b = &c->build;
1533 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1534 nir_intrinsic_instr *instr = NULL;
1535 unsigned resource_index, addr_src_index, file;
1536
1537 switch (tgsi_inst->Instruction.Opcode) {
1538 case TGSI_OPCODE_LOAD:
1539 assert(!tgsi_inst->Src[0].Register.Indirect);
1540 resource_index = tgsi_inst->Src[0].Register.Index;
1541 file = tgsi_inst->Src[0].Register.File;
1542 addr_src_index = 1;
1543 break;
1544 case TGSI_OPCODE_STORE:
1545 assert(!tgsi_inst->Dst[0].Register.Indirect);
1546 resource_index = tgsi_inst->Dst[0].Register.Index;
1547 file = tgsi_inst->Dst[0].Register.File;
1548 addr_src_index = 0;
1549 break;
1550 default:
1551 unreachable("unexpected memory opcode");
1552 }
1553
1554 if (file == TGSI_FILE_BUFFER) {
1555 nir_intrinsic_op op;
1556
1557 switch (tgsi_inst->Instruction.Opcode) {
1558 case TGSI_OPCODE_LOAD:
1559 op = nir_intrinsic_load_ssbo;
1560 break;
1561 case TGSI_OPCODE_STORE:
1562 op = nir_intrinsic_store_ssbo;
1563 break;
1564 default:
1565 unreachable("unexpected buffer opcode");
1566 }
1567
1568 add_ssbo_var(c, resource_index);
1569
1570 instr = nir_intrinsic_instr_create(b->shader, op);
1571 instr->num_components = util_last_bit(tgsi_inst->Dst[0].Register.WriteMask);
1572 nir_intrinsic_set_access(instr, get_mem_qualifier(tgsi_inst));
1573 nir_intrinsic_set_align(instr, 4, 0);
1574
1575 unsigned i = 0;
1576 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE)
1577 instr->src[i++] = nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W),
1578 instr->num_components));
1579 instr->src[i++] = nir_src_for_ssa(nir_imm_int(b, resource_index));
1580 instr->src[i++] = nir_src_for_ssa(ttn_channel(b, src[addr_src_index], X));
1581
1582 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE)
1583 nir_intrinsic_set_write_mask(instr, tgsi_inst->Dst[0].Register.WriteMask);
1584
1585 } else if (file == TGSI_FILE_IMAGE) {
1586 nir_intrinsic_op op;
1587
1588 switch (tgsi_inst->Instruction.Opcode) {
1589 case TGSI_OPCODE_LOAD:
1590 op = nir_intrinsic_image_deref_load;
1591 break;
1592 case TGSI_OPCODE_STORE:
1593 op = nir_intrinsic_image_deref_store;
1594 break;
1595 default:
1596 unreachable("unexpected file opcode");
1597 }
1598
1599 instr = nir_intrinsic_instr_create(b->shader, op);
1600
1601 /* Set the image variable dereference. */
1602 enum glsl_sampler_dim dim;
1603 bool is_array;
1604 get_texture_info(tgsi_inst->Memory.Texture, &dim, NULL, &is_array);
1605
1606 enum glsl_base_type base_type = get_image_base_type(tgsi_inst);
1607 enum gl_access_qualifier access = get_mem_qualifier(tgsi_inst);
1608
1609 nir_variable *image =
1610 get_image_var(c, resource_index,
1611 dim, is_array, base_type, access,
1612 tgsi_inst->Memory.Format);
1613 nir_deref_instr *image_deref = nir_build_deref_var(b, image);
1614 const struct glsl_type *type = image_deref->type;
1615
1616 nir_intrinsic_set_access(instr, image_deref->var->data.access);
1617
1618 instr->src[0] = nir_src_for_ssa(&image_deref->def);
1619 instr->src[1] = nir_src_for_ssa(src[addr_src_index]);
1620
1621 /* Set the sample argument, which is undefined for single-sample images. */
1622 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS) {
1623 instr->src[2] = nir_src_for_ssa(ttn_channel(b, src[addr_src_index], W));
1624 } else {
1625 instr->src[2] = nir_src_for_ssa(nir_undef(b, 1, 32));
1626 }
1627
1628 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_LOAD) {
1629 instr->src[3] = nir_src_for_ssa(nir_imm_int(b, 0)); /* LOD */
1630 }
1631
1632 unsigned num_components = util_last_bit(tgsi_inst->Dst[0].Register.WriteMask);
1633
1634 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE) {
1635 instr->src[3] = nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W),
1636 num_components));
1637 instr->src[4] = nir_src_for_ssa(nir_imm_int(b, 0)); /* LOD */
1638 }
1639
1640 instr->num_components = num_components;
1641 } else {
1642 unreachable("unexpected file");
1643 }
1644
1645
1646 if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_LOAD) {
1647 nir_def_init(&instr->instr, &instr->def, instr->num_components, 32);
1648 nir_builder_instr_insert(b, &instr->instr);
1649 return nir_pad_vector_imm_int(b, &instr->def, 0, 4);
1650 } else {
1651 nir_builder_instr_insert(b, &instr->instr);
1652 return NULL;
1653 }
1654 }
1655
1656 static const nir_op op_trans[TGSI_OPCODE_LAST] = {
1657 [TGSI_OPCODE_ARL] = 0,
1658 [TGSI_OPCODE_MOV] = nir_op_mov,
1659 [TGSI_OPCODE_FBFETCH] = nir_op_mov,
1660 [TGSI_OPCODE_LIT] = 0,
1661 [TGSI_OPCODE_RCP] = nir_op_frcp,
1662 [TGSI_OPCODE_RSQ] = nir_op_frsq,
1663 [TGSI_OPCODE_EXP] = 0,
1664 [TGSI_OPCODE_LOG] = 0,
1665 [TGSI_OPCODE_MUL] = nir_op_fmul,
1666 [TGSI_OPCODE_ADD] = nir_op_fadd,
1667 [TGSI_OPCODE_DP3] = 0,
1668 [TGSI_OPCODE_DP4] = 0,
1669 [TGSI_OPCODE_DST] = 0,
1670 [TGSI_OPCODE_MIN] = nir_op_fmin,
1671 [TGSI_OPCODE_MAX] = nir_op_fmax,
1672 [TGSI_OPCODE_SLT] = nir_op_slt,
1673 [TGSI_OPCODE_SGE] = nir_op_sge,
1674 [TGSI_OPCODE_MAD] = nir_op_ffma,
1675 [TGSI_OPCODE_TEX_LZ] = 0,
1676 [TGSI_OPCODE_LRP] = 0,
1677 [TGSI_OPCODE_SQRT] = nir_op_fsqrt,
1678 [TGSI_OPCODE_FRC] = nir_op_ffract,
1679 [TGSI_OPCODE_TXF_LZ] = 0,
1680 [TGSI_OPCODE_FLR] = nir_op_ffloor,
1681 [TGSI_OPCODE_ROUND] = nir_op_fround_even,
1682 [TGSI_OPCODE_EX2] = nir_op_fexp2,
1683 [TGSI_OPCODE_LG2] = nir_op_flog2,
1684 [TGSI_OPCODE_POW] = nir_op_fpow,
1685 [TGSI_OPCODE_COS] = nir_op_fcos,
1686 [TGSI_OPCODE_KILL] = 0,
1687 [TGSI_OPCODE_PK2H] = 0, /* XXX */
1688 [TGSI_OPCODE_PK2US] = 0, /* XXX */
1689 [TGSI_OPCODE_PK4B] = 0, /* XXX */
1690 [TGSI_OPCODE_PK4UB] = 0, /* XXX */
1691 [TGSI_OPCODE_SEQ] = nir_op_seq,
1692 [TGSI_OPCODE_SGT] = 0,
1693 [TGSI_OPCODE_SIN] = nir_op_fsin,
1694 [TGSI_OPCODE_SNE] = nir_op_sne,
1695 [TGSI_OPCODE_SLE] = 0,
1696 [TGSI_OPCODE_TEX] = 0,
1697 [TGSI_OPCODE_TXD] = 0,
1698 [TGSI_OPCODE_TXP] = 0,
1699 [TGSI_OPCODE_UP2H] = 0, /* XXX */
1700 [TGSI_OPCODE_UP2US] = 0, /* XXX */
1701 [TGSI_OPCODE_UP4B] = 0, /* XXX */
1702 [TGSI_OPCODE_UP4UB] = 0, /* XXX */
1703 [TGSI_OPCODE_ARR] = 0,
1704
1705 /* No function calls, yet. */
1706 [TGSI_OPCODE_CAL] = 0, /* XXX */
1707 [TGSI_OPCODE_RET] = 0, /* XXX */
1708
1709 [TGSI_OPCODE_SSG] = nir_op_fsign,
1710 [TGSI_OPCODE_CMP] = 0,
1711 [TGSI_OPCODE_TXB] = 0,
1712 [TGSI_OPCODE_DIV] = nir_op_fdiv,
1713 [TGSI_OPCODE_DP2] = 0,
1714 [TGSI_OPCODE_TXL] = 0,
1715
1716 [TGSI_OPCODE_BRK] = 0,
1717 [TGSI_OPCODE_IF] = 0,
1718 [TGSI_OPCODE_UIF] = 0,
1719 [TGSI_OPCODE_ELSE] = 0,
1720 [TGSI_OPCODE_ENDIF] = 0,
1721
1722 [TGSI_OPCODE_CEIL] = nir_op_fceil,
1723 [TGSI_OPCODE_I2F] = nir_op_i2f32,
1724 [TGSI_OPCODE_NOT] = nir_op_inot,
1725 [TGSI_OPCODE_TRUNC] = nir_op_ftrunc,
1726 [TGSI_OPCODE_SHL] = nir_op_ishl,
1727 [TGSI_OPCODE_AND] = nir_op_iand,
1728 [TGSI_OPCODE_OR] = nir_op_ior,
1729 [TGSI_OPCODE_MOD] = nir_op_umod,
1730 [TGSI_OPCODE_XOR] = nir_op_ixor,
1731 [TGSI_OPCODE_TXF] = 0,
1732 [TGSI_OPCODE_TXQ] = 0,
1733
1734 [TGSI_OPCODE_CONT] = 0,
1735
1736 [TGSI_OPCODE_EMIT] = 0, /* XXX */
1737 [TGSI_OPCODE_ENDPRIM] = 0, /* XXX */
1738
1739 [TGSI_OPCODE_BGNLOOP] = 0,
1740 [TGSI_OPCODE_BGNSUB] = 0, /* XXX: no function calls */
1741 [TGSI_OPCODE_ENDLOOP] = 0,
1742 [TGSI_OPCODE_ENDSUB] = 0, /* XXX: no function calls */
1743
1744 [TGSI_OPCODE_NOP] = 0,
1745 [TGSI_OPCODE_FSEQ] = nir_op_feq,
1746 [TGSI_OPCODE_FSGE] = nir_op_fge,
1747 [TGSI_OPCODE_FSLT] = nir_op_flt,
1748 [TGSI_OPCODE_FSNE] = nir_op_fneu,
1749
1750 [TGSI_OPCODE_KILL_IF] = 0,
1751
1752 [TGSI_OPCODE_END] = 0,
1753
1754 [TGSI_OPCODE_F2I] = nir_op_f2i32,
1755 [TGSI_OPCODE_IDIV] = nir_op_idiv,
1756 [TGSI_OPCODE_IMAX] = nir_op_imax,
1757 [TGSI_OPCODE_IMIN] = nir_op_imin,
1758 [TGSI_OPCODE_INEG] = nir_op_ineg,
1759 [TGSI_OPCODE_ISGE] = nir_op_ige,
1760 [TGSI_OPCODE_ISHR] = nir_op_ishr,
1761 [TGSI_OPCODE_ISLT] = nir_op_ilt,
1762 [TGSI_OPCODE_F2U] = nir_op_f2u32,
1763 [TGSI_OPCODE_U2F] = nir_op_u2f32,
1764 [TGSI_OPCODE_UADD] = nir_op_iadd,
1765 [TGSI_OPCODE_UDIV] = nir_op_udiv,
1766 [TGSI_OPCODE_UMAD] = 0,
1767 [TGSI_OPCODE_UMAX] = nir_op_umax,
1768 [TGSI_OPCODE_UMIN] = nir_op_umin,
1769 [TGSI_OPCODE_UMOD] = nir_op_umod,
1770 [TGSI_OPCODE_UMUL] = nir_op_imul,
1771 [TGSI_OPCODE_USEQ] = nir_op_ieq,
1772 [TGSI_OPCODE_USGE] = nir_op_uge,
1773 [TGSI_OPCODE_USHR] = nir_op_ushr,
1774 [TGSI_OPCODE_USLT] = nir_op_ult,
1775 [TGSI_OPCODE_USNE] = nir_op_ine,
1776
1777 [TGSI_OPCODE_SWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
1778 [TGSI_OPCODE_CASE] = 0, /* not emitted by glsl_to_tgsi.cpp */
1779 [TGSI_OPCODE_DEFAULT] = 0, /* not emitted by glsl_to_tgsi.cpp */
1780 [TGSI_OPCODE_ENDSWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
1781
1782 /* XXX: SAMPLE opcodes */
1783
1784 [TGSI_OPCODE_UARL] = nir_op_mov,
1785 [TGSI_OPCODE_UCMP] = 0,
1786 [TGSI_OPCODE_IABS] = nir_op_iabs,
1787 [TGSI_OPCODE_ISSG] = nir_op_isign,
1788
1789 [TGSI_OPCODE_LOAD] = 0,
1790 [TGSI_OPCODE_STORE] = 0,
1791
1792 /* XXX: atomics */
1793
1794 [TGSI_OPCODE_TEX2] = 0,
1795 [TGSI_OPCODE_TXB2] = 0,
1796 [TGSI_OPCODE_TXL2] = 0,
1797
1798 [TGSI_OPCODE_IMUL_HI] = nir_op_imul_high,
1799 [TGSI_OPCODE_UMUL_HI] = nir_op_umul_high,
1800
1801 [TGSI_OPCODE_TG4] = 0,
1802 [TGSI_OPCODE_LODQ] = 0,
1803
1804 [TGSI_OPCODE_IBFE] = nir_op_ibitfield_extract,
1805 [TGSI_OPCODE_UBFE] = nir_op_ubitfield_extract,
1806 [TGSI_OPCODE_BFI] = nir_op_bitfield_insert,
1807 [TGSI_OPCODE_BREV] = nir_op_bitfield_reverse,
1808 [TGSI_OPCODE_POPC] = nir_op_bit_count,
1809 [TGSI_OPCODE_LSB] = nir_op_find_lsb,
1810 [TGSI_OPCODE_IMSB] = nir_op_ifind_msb,
1811 [TGSI_OPCODE_UMSB] = nir_op_ufind_msb,
1812
1813 [TGSI_OPCODE_INTERP_CENTROID] = 0, /* XXX */
1814 [TGSI_OPCODE_INTERP_SAMPLE] = 0, /* XXX */
1815 [TGSI_OPCODE_INTERP_OFFSET] = 0, /* XXX */
1816
1817 [TGSI_OPCODE_F2D] = nir_op_f2f64,
1818 [TGSI_OPCODE_D2F] = nir_op_f2f32,
1819 [TGSI_OPCODE_DMUL] = nir_op_fmul,
1820 [TGSI_OPCODE_D2U] = nir_op_f2u32,
1821 [TGSI_OPCODE_U2D] = nir_op_u2f64,
1822
1823 [TGSI_OPCODE_U64ADD] = nir_op_iadd,
1824 [TGSI_OPCODE_U64MUL] = nir_op_imul,
1825 [TGSI_OPCODE_U64DIV] = nir_op_udiv,
1826 [TGSI_OPCODE_U64SNE] = nir_op_ine,
1827 [TGSI_OPCODE_I64NEG] = nir_op_ineg,
1828 [TGSI_OPCODE_I64ABS] = nir_op_iabs,
1829 };
1830
1831 static void
ttn_emit_instruction(struct ttn_compile * c)1832 ttn_emit_instruction(struct ttn_compile *c)
1833 {
1834 nir_builder *b = &c->build;
1835 struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1836 unsigned i;
1837 unsigned tgsi_op = tgsi_inst->Instruction.Opcode;
1838 struct tgsi_full_dst_register *tgsi_dst = &tgsi_inst->Dst[0];
1839
1840 if (tgsi_op == TGSI_OPCODE_END)
1841 return;
1842
1843 nir_def *src[TGSI_FULL_MAX_SRC_REGISTERS];
1844 for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
1845 src[i] = ttn_get_src(c, &tgsi_inst->Src[i], i);
1846 }
1847
1848 unsigned tgsi_dst_type = tgsi_opcode_infer_dst_type(tgsi_op, 0);
1849
1850 /* The destination bitsize of the NIR opcode (not TGSI, where it's always
1851 * 32 bits). This needs to be passed into ttn_alu() because it can't be
1852 * inferred for comparison opcodes.
1853 */
1854 unsigned dst_bitsize = tgsi_type_is_64bit(tgsi_dst_type) ? 64 : 32;
1855
1856 /* If this is non-NULL after the switch, it will be written to the
1857 * corresponding register/variable/etc after.
1858 */
1859 nir_def *dst = NULL;
1860
1861 switch (tgsi_op) {
1862 case TGSI_OPCODE_RSQ:
1863 dst = nir_frsq(b, ttn_channel(b, src[0], X));
1864 break;
1865
1866 case TGSI_OPCODE_SQRT:
1867 dst = nir_fsqrt(b, ttn_channel(b, src[0], X));
1868 break;
1869
1870 case TGSI_OPCODE_RCP:
1871 dst = nir_frcp(b, ttn_channel(b, src[0], X));
1872 break;
1873
1874 case TGSI_OPCODE_EX2:
1875 dst = nir_fexp2(b, ttn_channel(b, src[0], X));
1876 break;
1877
1878 case TGSI_OPCODE_LG2:
1879 dst = nir_flog2(b, ttn_channel(b, src[0], X));
1880 break;
1881
1882 case TGSI_OPCODE_POW:
1883 dst = nir_fpow(b, ttn_channel(b, src[0], X), ttn_channel(b, src[1], X));
1884 break;
1885
1886 case TGSI_OPCODE_COS:
1887 dst = nir_fcos(b, ttn_channel(b, src[0], X));
1888 break;
1889
1890 case TGSI_OPCODE_SIN:
1891 dst = nir_fsin(b, ttn_channel(b, src[0], X));
1892 break;
1893
1894 case TGSI_OPCODE_ARL:
1895 dst = nir_f2i32(b, nir_ffloor(b, src[0]));
1896 break;
1897
1898 case TGSI_OPCODE_EXP:
1899 dst = ttn_exp(b, src);
1900 break;
1901
1902 case TGSI_OPCODE_LOG:
1903 dst = ttn_log(b, src);
1904 break;
1905
1906 case TGSI_OPCODE_DST:
1907 dst = ttn_dst(b, src);
1908 break;
1909
1910 case TGSI_OPCODE_LIT:
1911 dst = ttn_lit(b, src);
1912 break;
1913
1914 case TGSI_OPCODE_DP2:
1915 dst = nir_fdot2(b, src[0], src[1]);
1916 break;
1917
1918 case TGSI_OPCODE_DP3:
1919 dst = nir_fdot3(b, src[0], src[1]);
1920 break;
1921
1922 case TGSI_OPCODE_DP4:
1923 dst = nir_fdot4(b, src[0], src[1]);
1924 break;
1925
1926 case TGSI_OPCODE_UMAD:
1927 dst = nir_iadd(b, nir_imul(b, src[0], src[1]), src[2]);
1928 break;
1929
1930 case TGSI_OPCODE_LRP:
1931 dst = nir_flrp(b, src[2], src[1], src[0]);
1932 break;
1933
1934 case TGSI_OPCODE_KILL:
1935 ttn_kill(b);
1936 break;
1937
1938 case TGSI_OPCODE_ARR:
1939 dst = nir_f2i32(b, nir_fround_even(b, src[0]));
1940 break;
1941
1942 case TGSI_OPCODE_CMP:
1943 dst = nir_bcsel(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)),
1944 src[1], src[2]);
1945 break;
1946
1947 case TGSI_OPCODE_UCMP:
1948 dst = nir_bcsel(b, nir_ine(b, src[0], nir_imm_int(b, 0)),
1949 src[1], src[2]);
1950 break;
1951
1952 case TGSI_OPCODE_SGT:
1953 dst = nir_slt(b, src[1], src[0]);
1954 break;
1955
1956 case TGSI_OPCODE_SLE:
1957 dst = nir_sge(b, src[1], src[0]);
1958 break;
1959
1960 case TGSI_OPCODE_KILL_IF:
1961 ttn_kill_if(b, src);
1962 break;
1963
1964 case TGSI_OPCODE_TEX:
1965 case TGSI_OPCODE_TEX_LZ:
1966 case TGSI_OPCODE_TXP:
1967 case TGSI_OPCODE_TXL:
1968 case TGSI_OPCODE_TXB:
1969 case TGSI_OPCODE_TXD:
1970 case TGSI_OPCODE_TEX2:
1971 case TGSI_OPCODE_TXL2:
1972 case TGSI_OPCODE_TXB2:
1973 case TGSI_OPCODE_TXF:
1974 case TGSI_OPCODE_TXF_LZ:
1975 case TGSI_OPCODE_TG4:
1976 case TGSI_OPCODE_LODQ:
1977 dst = ttn_tex(c, src);
1978 break;
1979
1980 case TGSI_OPCODE_TXQ:
1981 dst = ttn_txq(c, src);
1982 break;
1983
1984 case TGSI_OPCODE_LOAD:
1985 case TGSI_OPCODE_STORE:
1986 dst = ttn_mem(c, src);
1987 break;
1988
1989 case TGSI_OPCODE_NOP:
1990 break;
1991
1992 case TGSI_OPCODE_IF:
1993 nir_push_if(b, nir_fneu_imm(b, nir_channel(b, src[0], 0), 0.0));
1994 break;
1995
1996 case TGSI_OPCODE_UIF:
1997 nir_push_if(b, nir_ine_imm(b, nir_channel(b, src[0], 0), 0));
1998 break;
1999
2000 case TGSI_OPCODE_ELSE:
2001 nir_push_else(&c->build, NULL);
2002 break;
2003
2004 case TGSI_OPCODE_ENDIF:
2005 nir_pop_if(&c->build, NULL);
2006 break;
2007
2008 case TGSI_OPCODE_BGNLOOP:
2009 nir_push_loop(&c->build);
2010 break;
2011
2012 case TGSI_OPCODE_BRK:
2013 nir_jump(b, nir_jump_break);
2014 break;
2015
2016 case TGSI_OPCODE_CONT:
2017 nir_jump(b, nir_jump_continue);
2018 break;
2019
2020 case TGSI_OPCODE_ENDLOOP:
2021 nir_pop_loop(&c->build, NULL);
2022 break;
2023
2024 case TGSI_OPCODE_BARRIER:
2025 ttn_barrier(b);
2026 break;
2027
2028 case TGSI_OPCODE_DDX:
2029 dst = nir_ddx(b, src[0]);
2030 break;
2031
2032 case TGSI_OPCODE_DDX_FINE:
2033 dst = nir_ddx_fine(b, src[0]);
2034 break;
2035
2036 case TGSI_OPCODE_DDY:
2037 dst = nir_ddy(b, src[0]);
2038 break;
2039
2040 case TGSI_OPCODE_DDY_FINE:
2041 dst = nir_ddy_fine(b, src[0]);
2042 break;
2043
2044 default:
2045 if (op_trans[tgsi_op] != 0 || tgsi_op == TGSI_OPCODE_MOV) {
2046 dst = ttn_alu(b, op_trans[tgsi_op], dst_bitsize, src);
2047 } else {
2048 fprintf(stderr, "unknown TGSI opcode: %s\n",
2049 tgsi_get_opcode_name(tgsi_op));
2050 abort();
2051 }
2052 break;
2053 }
2054
2055 if (dst == NULL)
2056 return;
2057
2058 if (tgsi_inst->Instruction.Saturate)
2059 dst = nir_fsat(b, dst);
2060
2061 if (dst->num_components == 1)
2062 dst = nir_replicate(b, dst, 4);
2063 else if (dst->num_components == 2)
2064 dst = nir_pad_vector_imm_int(b, dst, 0, 4); /* for 64->32 conversions */
2065
2066 assert(dst->num_components == 4);
2067
2068 /* Finally, copy the SSA def to the NIR variable/register */
2069 nir_variable *var = ttn_get_var(c, tgsi_dst);
2070 if (var) {
2071 unsigned index = tgsi_dst->Register.Index;
2072 unsigned offset = c->temp_regs[index].offset;
2073 struct tgsi_ind_register *indirect = tgsi_dst->Register.Indirect ?
2074 &tgsi_dst->Indirect : NULL;
2075 nir_store_deref(b, ttn_array_deref(c, var, offset, indirect), dst,
2076 tgsi_dst->Register.WriteMask);
2077 } else {
2078 unsigned index = tgsi_dst->Register.Index;
2079 nir_def *reg = NULL;
2080 unsigned base_offset = 0;
2081
2082 if (tgsi_dst->Register.File == TGSI_FILE_TEMPORARY) {
2083 assert(!c->temp_regs[index].var && "handled above");
2084 assert(!tgsi_dst->Register.Indirect);
2085
2086 reg = c->temp_regs[index].reg;
2087 base_offset = c->temp_regs[index].offset;
2088 } else if (tgsi_dst->Register.File == TGSI_FILE_OUTPUT) {
2089 reg = c->output_regs[index].reg;
2090 base_offset = c->output_regs[index].offset;
2091 } else if (tgsi_dst->Register.File == TGSI_FILE_ADDRESS) {
2092 assert(index == 0);
2093 reg = c->addr_reg;
2094 }
2095
2096 if (tgsi_dst->Register.Indirect) {
2097 nir_def *indirect = ttn_src_for_indirect(c, &tgsi_dst->Indirect);
2098 nir_store_reg_indirect(b, dst, reg, indirect, .base = base_offset,
2099 .write_mask = tgsi_dst->Register.WriteMask);
2100 } else {
2101 nir_build_store_reg(b, dst, reg, .base = base_offset,
2102 .write_mask = tgsi_dst->Register.WriteMask);
2103 }
2104 }
2105 }
2106
2107 /**
2108 * Puts a NIR intrinsic to store of each TGSI_FILE_OUTPUT value to the output
2109 * variables at the end of the shader.
2110 *
2111 * We don't generate these incrementally as the TGSI_FILE_OUTPUT values are
2112 * written, because there's no output load intrinsic, which means we couldn't
2113 * handle writemasks.
2114 */
2115 static void
ttn_add_output_stores(struct ttn_compile * c)2116 ttn_add_output_stores(struct ttn_compile *c)
2117 {
2118 nir_builder *b = &c->build;
2119
2120 for (int i = 0; i < c->build.shader->num_outputs; i++) {
2121 nir_variable *var = c->outputs[i];
2122 if (!var)
2123 continue;
2124
2125 nir_def *store_value =
2126 nir_build_load_reg(b, 4, 32, c->output_regs[i].reg,
2127 .base = c->output_regs[i].offset);
2128
2129 uint32_t store_mask = BITFIELD_MASK(store_value->num_components);
2130 if (c->build.shader->info.stage == MESA_SHADER_FRAGMENT) {
2131 /* TGSI uses TGSI_SEMANTIC_POSITION.z for the depth output
2132 * and TGSI_SEMANTIC_STENCIL.y for the stencil output,
2133 * while NIR uses a single-component output.
2134 */
2135 if (var->data.location == FRAG_RESULT_DEPTH)
2136 store_value = nir_channel(b, store_value, 2);
2137 else if (var->data.location == FRAG_RESULT_STENCIL)
2138 store_value = nir_channel(b, store_value, 1);
2139 else if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
2140 store_value = nir_channel(b, store_value, 0);
2141 } else {
2142 /* FOGC, LAYER, and PSIZ are scalar values */
2143 if (var->data.location == VARYING_SLOT_FOGC ||
2144 var->data.location == VARYING_SLOT_LAYER ||
2145 var->data.location == VARYING_SLOT_PSIZ) {
2146 store_value = nir_channel(b, store_value, 0);
2147 }
2148 if (var->data.location == VARYING_SLOT_CLIP_DIST0)
2149 store_mask = BITFIELD_MASK(MIN2(c->build.shader->info.clip_distance_array_size, 4));
2150 else if (var->data.location == VARYING_SLOT_CLIP_DIST1) {
2151 if (c->build.shader->info.clip_distance_array_size > 4)
2152 store_mask = BITFIELD_MASK(c->build.shader->info.clip_distance_array_size - 4);
2153 else
2154 store_mask = 0;
2155 }
2156 }
2157
2158 if (b->shader->options->compact_arrays &&
2159 (var->data.location == VARYING_SLOT_CLIP_DIST0 ||
2160 var->data.location == VARYING_SLOT_CLIP_DIST1)) {
2161 if (!store_mask)
2162 continue;
2163
2164 nir_deref_instr *deref = nir_build_deref_var(b, c->clipdist);
2165 nir_def *zero = nir_imm_zero(b, 1, 32);
2166 unsigned offset = var->data.location == VARYING_SLOT_CLIP_DIST1 ? 4 : 0;
2167 unsigned size = var->data.location == VARYING_SLOT_CLIP_DIST1 ?
2168 b->shader->info.clip_distance_array_size :
2169 MIN2(4, b->shader->info.clip_distance_array_size);
2170 for (unsigned i = offset; i < size; i++) {
2171 /* deref the array member and store each component */
2172 nir_deref_instr *component_deref = nir_build_deref_array_imm(b, deref, i);
2173 nir_def *val = zero;
2174 if (store_mask & BITFIELD_BIT(i - offset))
2175 val = nir_channel(b, store_value, i - offset);
2176 nir_store_deref(b, component_deref, val, 0x1);
2177 }
2178 } else {
2179 nir_store_deref(b, nir_build_deref_var(b, var), store_value, store_mask);
2180 }
2181 }
2182 }
2183
2184 /**
2185 * Parses the given TGSI tokens.
2186 */
2187 static void
ttn_parse_tgsi(struct ttn_compile * c,const void * tgsi_tokens)2188 ttn_parse_tgsi(struct ttn_compile *c, const void *tgsi_tokens)
2189 {
2190 struct tgsi_parse_context parser;
2191 ASSERTED int ret;
2192
2193 ret = tgsi_parse_init(&parser, tgsi_tokens);
2194 assert(ret == TGSI_PARSE_OK);
2195
2196 while (!tgsi_parse_end_of_tokens(&parser)) {
2197 tgsi_parse_token(&parser);
2198 c->token = &parser.FullToken;
2199
2200 switch (parser.FullToken.Token.Type) {
2201 case TGSI_TOKEN_TYPE_DECLARATION:
2202 ttn_emit_declaration(c);
2203 break;
2204
2205 case TGSI_TOKEN_TYPE_INSTRUCTION:
2206 ttn_emit_instruction(c);
2207 break;
2208
2209 case TGSI_TOKEN_TYPE_IMMEDIATE:
2210 ttn_emit_immediate(c);
2211 break;
2212 }
2213 }
2214
2215 tgsi_parse_free(&parser);
2216 }
2217
2218 static void
ttn_read_pipe_caps(struct ttn_compile * c,struct pipe_screen * screen)2219 ttn_read_pipe_caps(struct ttn_compile *c,
2220 struct pipe_screen *screen)
2221 {
2222 c->cap_samplers_as_deref = screen->caps.nir_samplers_as_deref;
2223 c->cap_face_is_sysval = screen->caps.fs_face_is_integer_sysval;
2224 c->cap_position_is_sysval = screen->caps.fs_position_is_sysval;
2225 c->cap_point_is_sysval = screen->caps.fs_point_is_sysval;
2226 c->cap_integers = screen->get_shader_param(screen, c->scan->processor, PIPE_SHADER_CAP_INTEGERS);
2227 c->cap_tg4_component_in_swizzle =
2228 screen->caps.tgsi_tg4_component_in_swizzle;
2229 }
2230
2231 #define BITSET_SET32(bitset, u32_mask) do { \
2232 STATIC_ASSERT(sizeof((bitset)[0]) >= sizeof(u32_mask)); \
2233 BITSET_ZERO(bitset); \
2234 (bitset)[0] = (u32_mask); \
2235 } while (0)
2236
2237 /**
2238 * Initializes a TGSI-to-NIR compiler.
2239 */
2240 static struct ttn_compile *
ttn_compile_init(const void * tgsi_tokens,const nir_shader_compiler_options * options,struct pipe_screen * screen)2241 ttn_compile_init(const void *tgsi_tokens,
2242 const nir_shader_compiler_options *options,
2243 struct pipe_screen *screen)
2244 {
2245 struct ttn_compile *c;
2246 struct nir_shader *s;
2247 struct tgsi_shader_info scan;
2248 static int ttn_sh_counter = 0;
2249
2250 assert(options || screen);
2251 c = rzalloc(NULL, struct ttn_compile);
2252
2253 tgsi_scan_shader(tgsi_tokens, &scan);
2254 c->scan = &scan;
2255
2256 if (!options) {
2257 options =
2258 screen->get_compiler_options(screen, PIPE_SHADER_IR_NIR, scan.processor);
2259 }
2260
2261 c->build = nir_builder_init_simple_shader(tgsi_processor_to_shader_stage(scan.processor),
2262 options, "TTN%d", (int)p_atomic_inc_return(&ttn_sh_counter));
2263
2264 s = c->build.shader;
2265 _mesa_blake3_compute(&scan, sizeof(scan), s->info.source_blake3);
2266
2267 if (screen) {
2268 ttn_read_pipe_caps(c, screen);
2269 } else {
2270 /* TTN used to be hard coded to always make FACE a sysval,
2271 * so it makes sense to preserve that behavior so users don't break. */
2272 c->cap_face_is_sysval = true;
2273 }
2274
2275 s->info.subgroup_size = SUBGROUP_SIZE_UNIFORM;
2276
2277 if (s->info.stage == MESA_SHADER_FRAGMENT)
2278 s->info.fs.untyped_color_outputs = true;
2279
2280 s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
2281 s->num_uniforms = scan.const_file_max[0] + 1;
2282 s->num_outputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
2283 s->info.num_ssbos = util_last_bit(scan.shader_buffers_declared);
2284 s->info.num_ubos = util_last_bit(scan.const_buffers_declared >> 1);
2285 s->info.num_images = util_last_bit(scan.images_declared);
2286 BITSET_SET32(s->info.images_used, scan.images_declared);
2287 BITSET_SET32(s->info.image_buffers, scan.images_buffers);
2288 BITSET_SET32(s->info.msaa_images, scan.msaa_images_declared);
2289 s->info.num_textures = util_last_bit(scan.samplers_declared);
2290 BITSET_SET32(s->info.textures_used, scan.samplers_declared);
2291 BITSET_ZERO(s->info.textures_used_by_txf); /* No scan information yet */
2292 BITSET_SET32(s->info.samplers_used, scan.samplers_declared);
2293 s->info.internal = false;
2294
2295 /* Default for TGSI is separate, this is assumed throughout the tree */
2296 s->info.separate_shader = true;
2297
2298 for (unsigned i = 0; i < TGSI_PROPERTY_COUNT; i++) {
2299 unsigned value = scan.properties[i];
2300
2301 switch (i) {
2302 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2303 break; /* handled in ttn_emit_declaration */
2304 case TGSI_PROPERTY_FS_COORD_ORIGIN:
2305 if (s->info.stage == MESA_SHADER_FRAGMENT)
2306 s->info.fs.origin_upper_left = value == TGSI_FS_COORD_ORIGIN_UPPER_LEFT;
2307 break;
2308 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
2309 if (s->info.stage == MESA_SHADER_FRAGMENT)
2310 s->info.fs.pixel_center_integer = value == TGSI_FS_COORD_PIXEL_CENTER_INTEGER;
2311 break;
2312 case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
2313 if (s->info.stage == MESA_SHADER_FRAGMENT)
2314 s->info.fs.depth_layout = ttn_get_depth_layout(value);
2315 break;
2316 case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION:
2317 if (s->info.stage == MESA_SHADER_VERTEX)
2318 s->info.vs.window_space_position = value;
2319 break;
2320 case TGSI_PROPERTY_NEXT_SHADER:
2321 s->info.next_stage = tgsi_processor_to_shader_stage(value);
2322 break;
2323 case TGSI_PROPERTY_VS_BLIT_SGPRS_AMD:
2324 if (s->info.stage == MESA_SHADER_VERTEX)
2325 s->info.vs.blit_sgprs_amd = value;
2326 break;
2327 case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
2328 if (s->info.stage == MESA_SHADER_COMPUTE)
2329 s->info.workgroup_size[0] = value;
2330 break;
2331 case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
2332 if (s->info.stage == MESA_SHADER_COMPUTE)
2333 s->info.workgroup_size[1] = value;
2334 break;
2335 case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
2336 if (s->info.stage == MESA_SHADER_COMPUTE)
2337 s->info.workgroup_size[2] = value;
2338 break;
2339 case TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD:
2340 if (s->info.stage == MESA_SHADER_COMPUTE)
2341 s->info.cs.user_data_components_amd = value;
2342 break;
2343 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
2344 s->info.clip_distance_array_size = value;
2345 break;
2346 case TGSI_PROPERTY_LEGACY_MATH_RULES:
2347 s->info.use_legacy_math_rules = value;
2348 break;
2349 default:
2350 if (value) {
2351 fprintf(stderr, "tgsi_to_nir: unhandled TGSI property %u = %u\n",
2352 i, value);
2353 unreachable("unhandled TGSI property");
2354 }
2355 }
2356 }
2357
2358 if (s->info.stage == MESA_SHADER_COMPUTE &&
2359 (!s->info.workgroup_size[0] ||
2360 !s->info.workgroup_size[1] ||
2361 !s->info.workgroup_size[2]))
2362 s->info.workgroup_size_variable = true;
2363
2364 c->inputs = rzalloc_array(c, struct nir_variable *, s->num_inputs);
2365 c->outputs = rzalloc_array(c, struct nir_variable *, s->num_outputs);
2366
2367 c->output_regs = rzalloc_array(c, struct ttn_reg_info,
2368 scan.file_max[TGSI_FILE_OUTPUT] + 1);
2369 c->temp_regs = rzalloc_array(c, struct ttn_reg_info,
2370 scan.file_max[TGSI_FILE_TEMPORARY] + 1);
2371 c->imm_defs = rzalloc_array(c, nir_def *,
2372 scan.file_max[TGSI_FILE_IMMEDIATE] + 1);
2373
2374 c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2375 c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types);
2376
2377 ttn_parse_tgsi(c, tgsi_tokens);
2378 ttn_add_output_stores(c);
2379
2380 nir_validate_shader(c->build.shader, "TTN: after parsing TGSI and creating the NIR shader");
2381
2382 return c;
2383 }
2384
2385 static void
ttn_optimize_nir(nir_shader * nir)2386 ttn_optimize_nir(nir_shader *nir)
2387 {
2388 bool progress;
2389
2390 do {
2391 progress = false;
2392
2393 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2394
2395 /* Linking deals with unused inputs/outputs, but here we can remove
2396 * things local to the shader in the hopes that we can cleanup other
2397 * things. This pass will also remove variables with only stores, so we
2398 * might be able to make progress after it.
2399 */
2400 NIR_PASS(progress, nir, nir_remove_dead_variables,
2401 nir_var_function_temp | nir_var_shader_temp |
2402 nir_var_mem_shared,
2403 NULL);
2404
2405 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
2406 NIR_PASS(progress, nir, nir_opt_dead_write_vars);
2407
2408 if (nir->options->lower_to_scalar) {
2409 NIR_PASS_V(nir, nir_lower_alu_to_scalar,
2410 nir->options->lower_to_scalar_filter, NULL);
2411 NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
2412 }
2413
2414 NIR_PASS_V(nir, nir_lower_alu);
2415 NIR_PASS_V(nir, nir_lower_pack);
2416 NIR_PASS(progress, nir, nir_copy_prop);
2417 NIR_PASS(progress, nir, nir_opt_remove_phis);
2418 NIR_PASS(progress, nir, nir_opt_dce);
2419 if (nir_opt_loop(nir)) {
2420 progress = true;
2421 NIR_PASS(progress, nir, nir_copy_prop);
2422 NIR_PASS(progress, nir, nir_opt_dce);
2423 }
2424 NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
2425 NIR_PASS(progress, nir, nir_opt_dead_cf);
2426 NIR_PASS(progress, nir, nir_opt_cse);
2427 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
2428
2429 NIR_PASS(progress, nir, nir_opt_phi_precision);
2430 NIR_PASS(progress, nir, nir_opt_algebraic);
2431 NIR_PASS(progress, nir, nir_opt_constant_folding);
2432
2433 if (!nir->info.flrp_lowered) {
2434 unsigned lower_flrp =
2435 (nir->options->lower_flrp16 ? 16 : 0) |
2436 (nir->options->lower_flrp32 ? 32 : 0) |
2437 (nir->options->lower_flrp64 ? 64 : 0);
2438
2439 if (lower_flrp) {
2440 bool lower_flrp_progress = false;
2441
2442 NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp,
2443 lower_flrp,
2444 false /* always_precise */);
2445 if (lower_flrp_progress) {
2446 NIR_PASS(progress, nir,
2447 nir_opt_constant_folding);
2448 progress = true;
2449 }
2450 }
2451
2452 /* Nothing should rematerialize any flrps, so we only need to do this
2453 * lowering once.
2454 */
2455 nir->info.flrp_lowered = true;
2456 }
2457
2458 NIR_PASS(progress, nir, nir_opt_undef);
2459 NIR_PASS(progress, nir, nir_opt_conditional_discard);
2460 if (nir->options->max_unroll_iterations) {
2461 NIR_PASS(progress, nir, nir_opt_loop_unroll);
2462 }
2463 } while (progress);
2464 }
2465
2466 static bool
lower_clipdistance_to_array(nir_shader * nir)2467 lower_clipdistance_to_array(nir_shader *nir)
2468 {
2469 bool progress = false;
2470 nir_variable *dist0 = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_CLIP_DIST0);
2471 nir_variable *dist1 = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_CLIP_DIST1);
2472 /* resize VARYING_SLOT_CLIP_DIST0 to the full array size */
2473 dist0->type = glsl_array_type(glsl_float_type(), nir->info.clip_distance_array_size, sizeof(float));
2474 struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
2475 nir_foreach_function_impl(impl, nir) {
2476 bool func_progress = false;
2477 nir_builder b = nir_builder_at(nir_before_impl(impl));
2478 /* create a new deref for the arrayed clipdistance variable at the start of the function */
2479 nir_deref_instr *clipdist_deref = nir_build_deref_var(&b, dist0);
2480 nir_def *zero = nir_imm_zero(&b, 1, 32);
2481 nir_foreach_block(block, impl) {
2482 nir_foreach_instr_safe(instr, block) {
2483 /* filter through until a clipdistance store is reached */
2484 if (instr->type != nir_instr_type_intrinsic)
2485 continue;
2486 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2487 if (intr->intrinsic != nir_intrinsic_store_deref)
2488 continue;
2489 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2490 nir_variable *var = nir_deref_instr_get_variable(deref);
2491 if (var != dist0 && (!dist1 || var != dist1))
2492 continue;
2493 b.cursor = nir_before_instr(instr);
2494 uint32_t wrmask = nir_intrinsic_write_mask(intr);
2495 unsigned offset = var == dist1 ? 4 : 0;
2496 /* iterate over the store's writemask for components */
2497 for (unsigned i = 0; i < nir->info.clip_distance_array_size; i++) {
2498 /* deref the array member and store each component */
2499 nir_deref_instr *component_deref = nir_build_deref_array_imm(&b, clipdist_deref, i);
2500 nir_def *val = zero;
2501 if (wrmask & BITFIELD_BIT(i - offset))
2502 val = nir_channel(&b, intr->src[1].ssa, i - offset);
2503 nir_store_deref(&b, component_deref, val, 0x1);
2504 }
2505 func_progress = true;
2506 /* immediately remove the old store, save the original deref */
2507 nir_instr_remove(instr);
2508 _mesa_set_add(deletes, deref);
2509 }
2510 }
2511 if (func_progress)
2512 nir_metadata_preserve(impl, nir_metadata_none);
2513 /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
2514 set_foreach_remove(deletes, he)
2515 nir_instr_remove((void*)he->key);
2516 }
2517 /* VARYING_SLOT_CLIP_DIST1 is no longer used and can be removed */
2518 if (dist1)
2519 exec_node_remove(&dist1->node);
2520 return progress;
2521 }
2522
2523 /**
2524 * Finalizes the NIR in a similar way as st_glsl_to_nir does.
2525 *
2526 * Drivers expect that these passes are already performed,
2527 * so we have to do it here too.
2528 */
2529 static void
ttn_finalize_nir(struct ttn_compile * c,struct pipe_screen * screen)2530 ttn_finalize_nir(struct ttn_compile *c, struct pipe_screen *screen)
2531 {
2532 struct nir_shader *nir = c->build.shader;
2533
2534 MESA_TRACE_FUNC();
2535
2536 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2537 NIR_PASS_V(nir, nir_lower_reg_intrinsics_to_ssa);
2538
2539 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
2540 NIR_PASS_V(nir, nir_split_var_copies);
2541 NIR_PASS_V(nir, nir_lower_var_copies);
2542 NIR_PASS_V(nir, nir_lower_system_values);
2543 NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
2544
2545 if (!screen->caps.texrect) {
2546 const struct nir_lower_tex_options opts = { .lower_rect = true, };
2547 NIR_PASS_V(nir, nir_lower_tex, &opts);
2548 }
2549
2550 /* driver needs clipdistance as array<float> */
2551 if ((nir->info.outputs_written &
2552 (BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0) | BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))) &&
2553 nir->options->compact_arrays) {
2554 NIR_PASS_V(nir, lower_clipdistance_to_array);
2555 }
2556
2557 if (nir->options->lower_uniforms_to_ubo)
2558 NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, !c->cap_integers);
2559
2560 if (nir->options->lower_int64_options)
2561 NIR_PASS_V(nir, nir_lower_int64);
2562
2563 if (!c->cap_samplers_as_deref)
2564 NIR_PASS_V(nir, nir_lower_samplers);
2565
2566 if (screen->finalize_nir) {
2567 char *msg = screen->finalize_nir(screen, nir);
2568 free(msg);
2569 } else {
2570 ttn_optimize_nir(nir);
2571 }
2572 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
2573
2574 nir->info.num_images = c->num_images;
2575 nir->info.num_textures = c->num_samplers;
2576
2577 nir_validate_shader(nir, "TTN: after all optimizations");
2578 }
2579
save_nir_to_disk_cache(struct disk_cache * cache,uint8_t key[CACHE_KEY_SIZE],const nir_shader * s)2580 static void save_nir_to_disk_cache(struct disk_cache *cache,
2581 uint8_t key[CACHE_KEY_SIZE],
2582 const nir_shader *s)
2583 {
2584 struct blob blob = {0};
2585
2586 blob_init(&blob);
2587 /* Because we cannot fully trust disk_cache_put
2588 * (EGL_ANDROID_blob_cache) we add the shader size,
2589 * which we'll check after disk_cache_get().
2590 */
2591 if (blob_reserve_uint32(&blob) != 0) {
2592 blob_finish(&blob);
2593 return;
2594 }
2595
2596 nir_serialize(&blob, s, true);
2597 *(uint32_t *)blob.data = blob.size;
2598
2599 disk_cache_put(cache, key, blob.data, blob.size, NULL);
2600 blob_finish(&blob);
2601 }
2602
2603 static nir_shader *
load_nir_from_disk_cache(struct disk_cache * cache,struct pipe_screen * screen,uint8_t key[CACHE_KEY_SIZE],unsigned processor)2604 load_nir_from_disk_cache(struct disk_cache *cache,
2605 struct pipe_screen *screen,
2606 uint8_t key[CACHE_KEY_SIZE],
2607 unsigned processor)
2608 {
2609 const nir_shader_compiler_options *options =
2610 screen->get_compiler_options(screen, PIPE_SHADER_IR_NIR, processor);
2611 struct blob_reader blob_reader;
2612 size_t size;
2613 nir_shader *s;
2614
2615 uint32_t *buffer = (uint32_t *)disk_cache_get(cache, key, &size);
2616 if (!buffer)
2617 return NULL;
2618
2619 /* Match found. No need to check crc32 or other things.
2620 * disk_cache_get is supposed to do that for us.
2621 * However we do still check if the first element is indeed the size,
2622 * as we cannot fully trust disk_cache_get (EGL_ANDROID_blob_cache) */
2623 if (buffer[0] != size) {
2624 free(buffer);
2625 return NULL;
2626 }
2627
2628 size -= 4;
2629 blob_reader_init(&blob_reader, buffer + 1, size);
2630 s = nir_deserialize(NULL, options, &blob_reader);
2631 free(buffer); /* buffer was malloc-ed */
2632 return s;
2633 }
2634
2635 struct nir_shader *
tgsi_to_nir(const void * tgsi_tokens,struct pipe_screen * screen,bool allow_disk_cache)2636 tgsi_to_nir(const void *tgsi_tokens,
2637 struct pipe_screen *screen,
2638 bool allow_disk_cache)
2639 {
2640 struct disk_cache *cache = NULL;
2641 struct ttn_compile *c;
2642 struct nir_shader *s = NULL;
2643 uint8_t key[CACHE_KEY_SIZE];
2644 unsigned processor;
2645
2646 if (allow_disk_cache)
2647 cache = screen->get_disk_shader_cache(screen);
2648
2649 /* Look first in the cache */
2650 if (cache) {
2651 disk_cache_compute_key(cache,
2652 tgsi_tokens,
2653 tgsi_num_tokens(tgsi_tokens) * sizeof(struct tgsi_token),
2654 key);
2655 processor = tgsi_get_processor_type(tgsi_tokens);
2656 s = load_nir_from_disk_cache(cache, screen, key, processor);
2657 }
2658
2659 if (s)
2660 return s;
2661
2662 #ifndef NDEBUG
2663 nir_process_debug_variable();
2664 #endif
2665
2666 if (NIR_DEBUG(TGSI)) {
2667 fprintf(stderr, "TGSI before translation to NIR:\n");
2668 tgsi_dump(tgsi_tokens, 0);
2669 }
2670
2671 /* Not in the cache */
2672
2673 c = ttn_compile_init(tgsi_tokens, NULL, screen);
2674 s = c->build.shader;
2675 ttn_finalize_nir(c, screen);
2676 ralloc_free(c);
2677
2678 if (NIR_DEBUG(TGSI)) {
2679 mesa_logi("NIR after translation from TGSI:\n");
2680 nir_log_shaderi(s);
2681 }
2682
2683 if (cache)
2684 save_nir_to_disk_cache(cache, key, s);
2685
2686 return s;
2687 }
2688
2689 struct nir_shader *
tgsi_to_nir_noscreen(const void * tgsi_tokens,const nir_shader_compiler_options * options)2690 tgsi_to_nir_noscreen(const void *tgsi_tokens,
2691 const nir_shader_compiler_options *options)
2692 {
2693 struct ttn_compile *c;
2694 struct nir_shader *s;
2695
2696 c = ttn_compile_init(tgsi_tokens, options, NULL);
2697 s = c->build.shader;
2698 ralloc_free(c);
2699
2700 return s;
2701 }
2702