1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28 #include "float64_glsl.h"
29 #include "glsl_to_nir.h"
30 #include "ir_visitor.h"
31 #include "ir_hierarchical_visitor.h"
32 #include "ir.h"
33 #include "ir_optimization.h"
34 #include "program.h"
35 #include "compiler/nir/nir_control_flow.h"
36 #include "compiler/nir/nir_builder.h"
37 #include "compiler/nir/nir_builtin_builder.h"
38 #include "compiler/nir/nir_deref.h"
39 #include "main/errors.h"
40 #include "main/mtypes.h"
41 #include "main/shaderobj.h"
42 #include "util/u_math.h"
43 #include "util/perf/cpu_trace.h"
44
45 /*
46 * pass to lower GLSL IR to NIR
47 *
48 * This will lower variable dereferences to loads/stores of corresponding
49 * variables in NIR - the variables will be converted to registers in a later
50 * pass.
51 */
52
53 namespace {
54
55 class nir_visitor : public ir_visitor
56 {
57 public:
58 nir_visitor(const struct gl_constants *consts, nir_shader *shader);
59 ~nir_visitor();
60
61 virtual void visit(ir_variable *);
62 virtual void visit(ir_function *);
63 virtual void visit(ir_function_signature *);
64 virtual void visit(ir_loop *);
65 virtual void visit(ir_if *);
66 virtual void visit(ir_discard *);
67 virtual void visit(ir_demote *);
68 virtual void visit(ir_loop_jump *);
69 virtual void visit(ir_return *);
70 virtual void visit(ir_call *);
71 virtual void visit(ir_assignment *);
72 virtual void visit(ir_emit_vertex *);
73 virtual void visit(ir_end_primitive *);
74 virtual void visit(ir_expression *);
75 virtual void visit(ir_swizzle *);
76 virtual void visit(ir_texture *);
77 virtual void visit(ir_constant *);
78 virtual void visit(ir_dereference_variable *);
79 virtual void visit(ir_dereference_record *);
80 virtual void visit(ir_dereference_array *);
81 virtual void visit(ir_barrier *);
82
83 void create_function(ir_function_signature *ir);
84
85 private:
86 void add_instr(nir_instr *instr, unsigned num_components, unsigned bit_size);
87 nir_def *evaluate_rvalue(ir_rvalue *ir);
88
89 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_def **srcs);
90 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_def *src1);
91 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_def *src1,
92 nir_def *src2);
93 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_def *src1,
94 nir_def *src2, nir_def *src3);
95
96 bool supports_std430;
97
98 nir_shader *shader;
99 nir_function_impl *impl;
100 nir_builder b;
101 nir_def *result; /* result of the expression tree last visited */
102
103 nir_deref_instr *evaluate_deref(ir_instruction *ir);
104
105 nir_constant *constant_copy(ir_constant *ir, void *mem_ctx);
106
107 /* most recent deref instruction created */
108 nir_deref_instr *deref;
109
110 /* whether the IR we're operating on is per-function or global */
111 bool is_global;
112
113 ir_function_signature *sig;
114
115 /* map of ir_variable -> nir_variable */
116 struct hash_table *var_table;
117
118 /* map of ir_function_signature -> nir_function_overload */
119 struct hash_table *overload_table;
120
121 /* set of nir_variable hold sparse result */
122 struct set *sparse_variable_set;
123
124 void adjust_sparse_variable(nir_deref_instr *var_deref, const glsl_type *type,
125 nir_def *dest);
126
127 const struct gl_constants *consts;
128 };
129
130 /*
131 * This visitor runs before the main visitor, calling create_function() for
132 * each function so that the main visitor can resolve forward references in
133 * calls.
134 */
135
136 class nir_function_visitor : public ir_hierarchical_visitor
137 {
138 public:
nir_function_visitor(nir_visitor * v)139 nir_function_visitor(nir_visitor *v) : visitor(v)
140 {
141 }
142 virtual ir_visitor_status visit_enter(ir_function *);
143
144 private:
145 nir_visitor *visitor;
146 };
147
148 } /* end of anonymous namespace */
149
150 nir_shader *
glsl_to_nir(const struct gl_constants * consts,const struct gl_shader_program * shader_prog,gl_shader_stage stage,const nir_shader_compiler_options * options)151 glsl_to_nir(const struct gl_constants *consts,
152 const struct gl_shader_program *shader_prog,
153 gl_shader_stage stage,
154 const nir_shader_compiler_options *options)
155 {
156 struct gl_linked_shader *sh = shader_prog->_LinkedShaders[stage];
157
158 MESA_TRACE_FUNC();
159
160 nir_shader *shader = nir_shader_create(NULL, stage, options,
161 &sh->Program->info);
162
163 nir_visitor v1(consts, shader);
164 nir_function_visitor v2(&v1);
165 v2.run(sh->ir);
166 visit_exec_list(sh->ir, &v1);
167
168 /* The GLSL IR won't be needed anymore. */
169 ralloc_free(sh->ir);
170 sh->ir = NULL;
171
172 nir_validate_shader(shader, "after glsl to nir, before function inline");
173 if (should_print_nir(shader)) {
174 printf("glsl_to_nir\n");
175 nir_print_shader(shader, stdout);
176 }
177
178 /* We have to lower away local constant initializers right before we
179 * inline functions. That way they get properly initialized at the top
180 * of the function and not at the top of its caller.
181 */
182 NIR_PASS(_, shader, nir_lower_variable_initializers, nir_var_all);
183 NIR_PASS(_, shader, nir_lower_returns);
184 NIR_PASS(_, shader, nir_inline_functions);
185 NIR_PASS(_, shader, nir_opt_deref);
186
187 nir_validate_shader(shader, "after function inlining and return lowering");
188
189 /* We set func->is_entrypoint after nir_function_create if the function
190 * is named "main", so we can use nir_remove_non_entrypoints() for this.
191 * Now that we have inlined everything remove all of the functions except
192 * func->is_entrypoint.
193 */
194 nir_remove_non_entrypoints(shader);
195
196 shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
197 if (shader_prog->Label)
198 shader->info.label = ralloc_strdup(shader, shader_prog->Label);
199
200 shader->info.subgroup_size = SUBGROUP_SIZE_UNIFORM;
201
202 if (shader->info.stage == MESA_SHADER_FRAGMENT) {
203 shader->info.fs.pixel_center_integer = sh->Program->info.fs.pixel_center_integer;
204 shader->info.fs.origin_upper_left = sh->Program->info.fs.origin_upper_left;
205 shader->info.fs.advanced_blend_modes = sh->Program->info.fs.advanced_blend_modes;
206 }
207
208 return shader;
209 }
210
nir_visitor(const struct gl_constants * consts,nir_shader * shader)211 nir_visitor::nir_visitor(const struct gl_constants *consts, nir_shader *shader)
212 {
213 this->consts = consts;
214 this->supports_std430 = consts->UseSTD430AsDefaultPacking;
215 this->shader = shader;
216 this->is_global = true;
217 this->var_table = _mesa_pointer_hash_table_create(NULL);
218 this->overload_table = _mesa_pointer_hash_table_create(NULL);
219 this->sparse_variable_set = _mesa_pointer_set_create(NULL);
220 this->result = NULL;
221 this->impl = NULL;
222 this->deref = NULL;
223 this->sig = NULL;
224 memset(&this->b, 0, sizeof(this->b));
225 }
226
~nir_visitor()227 nir_visitor::~nir_visitor()
228 {
229 _mesa_hash_table_destroy(this->var_table, NULL);
230 _mesa_hash_table_destroy(this->overload_table, NULL);
231 _mesa_set_destroy(this->sparse_variable_set, NULL);
232 }
233
234 nir_deref_instr *
evaluate_deref(ir_instruction * ir)235 nir_visitor::evaluate_deref(ir_instruction *ir)
236 {
237 ir->accept(this);
238 return this->deref;
239 }
240
241 nir_constant *
constant_copy(ir_constant * ir,void * mem_ctx)242 nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx)
243 {
244 if (ir == NULL)
245 return NULL;
246
247 nir_constant *ret = rzalloc(mem_ctx, nir_constant);
248
249 const unsigned rows = ir->type->vector_elements;
250 const unsigned cols = ir->type->matrix_columns;
251 unsigned i;
252
253 ret->num_elements = 0;
254 switch (ir->type->base_type) {
255 case GLSL_TYPE_UINT:
256 /* Only float base types can be matrices. */
257 assert(cols == 1);
258
259 for (unsigned r = 0; r < rows; r++)
260 ret->values[r].u32 = ir->value.u[r];
261
262 break;
263
264 case GLSL_TYPE_UINT16:
265 /* Only float base types can be matrices. */
266 assert(cols == 1);
267
268 for (unsigned r = 0; r < rows; r++)
269 ret->values[r].u16 = ir->value.u16[r];
270 break;
271
272 case GLSL_TYPE_INT:
273 /* Only float base types can be matrices. */
274 assert(cols == 1);
275
276 for (unsigned r = 0; r < rows; r++)
277 ret->values[r].i32 = ir->value.i[r];
278
279 break;
280
281 case GLSL_TYPE_INT16:
282 /* Only float base types can be matrices. */
283 assert(cols == 1);
284
285 for (unsigned r = 0; r < rows; r++)
286 ret->values[r].i16 = ir->value.i16[r];
287 break;
288
289 case GLSL_TYPE_FLOAT:
290 case GLSL_TYPE_FLOAT16:
291 case GLSL_TYPE_DOUBLE:
292 if (cols > 1) {
293 ret->elements = ralloc_array(mem_ctx, nir_constant *, cols);
294 ret->num_elements = cols;
295 for (unsigned c = 0; c < cols; c++) {
296 nir_constant *col_const = rzalloc(mem_ctx, nir_constant);
297 col_const->num_elements = 0;
298 switch (ir->type->base_type) {
299 case GLSL_TYPE_FLOAT:
300 for (unsigned r = 0; r < rows; r++)
301 col_const->values[r].f32 = ir->value.f[c * rows + r];
302 break;
303
304 case GLSL_TYPE_FLOAT16:
305 for (unsigned r = 0; r < rows; r++)
306 col_const->values[r].u16 = ir->value.f16[c * rows + r];
307 break;
308
309 case GLSL_TYPE_DOUBLE:
310 for (unsigned r = 0; r < rows; r++)
311 col_const->values[r].f64 = ir->value.d[c * rows + r];
312 break;
313
314 default:
315 unreachable("Cannot get here from the first level switch");
316 }
317 ret->elements[c] = col_const;
318 }
319 } else {
320 switch (ir->type->base_type) {
321 case GLSL_TYPE_FLOAT:
322 for (unsigned r = 0; r < rows; r++)
323 ret->values[r].f32 = ir->value.f[r];
324 break;
325
326 case GLSL_TYPE_FLOAT16:
327 for (unsigned r = 0; r < rows; r++)
328 ret->values[r].u16 = ir->value.f16[r];
329 break;
330
331 case GLSL_TYPE_DOUBLE:
332 for (unsigned r = 0; r < rows; r++)
333 ret->values[r].f64 = ir->value.d[r];
334 break;
335
336 default:
337 unreachable("Cannot get here from the first level switch");
338 }
339 }
340 break;
341
342 case GLSL_TYPE_UINT64:
343 /* Only float base types can be matrices. */
344 assert(cols == 1);
345
346 for (unsigned r = 0; r < rows; r++)
347 ret->values[r].u64 = ir->value.u64[r];
348 break;
349
350 case GLSL_TYPE_INT64:
351 /* Only float base types can be matrices. */
352 assert(cols == 1);
353
354 for (unsigned r = 0; r < rows; r++)
355 ret->values[r].i64 = ir->value.i64[r];
356 break;
357
358 case GLSL_TYPE_BOOL:
359 /* Only float base types can be matrices. */
360 assert(cols == 1);
361
362 for (unsigned r = 0; r < rows; r++)
363 ret->values[r].b = ir->value.b[r];
364
365 break;
366
367 case GLSL_TYPE_STRUCT:
368 case GLSL_TYPE_ARRAY:
369 ret->elements = ralloc_array(mem_ctx, nir_constant *,
370 ir->type->length);
371 ret->num_elements = ir->type->length;
372
373 for (i = 0; i < ir->type->length; i++)
374 ret->elements[i] = constant_copy(ir->const_elements[i], mem_ctx);
375 break;
376
377 default:
378 unreachable("not reached");
379 }
380
381 return ret;
382 }
383
384 void
adjust_sparse_variable(nir_deref_instr * var_deref,const glsl_type * type,nir_def * dest)385 nir_visitor::adjust_sparse_variable(nir_deref_instr *var_deref, const glsl_type *type,
386 nir_def *dest)
387 {
388 const glsl_type *texel_type = glsl_get_field_type(type, "texel");
389 assert(texel_type);
390
391 assert(var_deref->deref_type == nir_deref_type_var);
392 nir_variable *var = var_deref->var;
393
394 /* Adjust nir_variable type to align with sparse nir instructions.
395 * Because the nir_variable is created with struct type from ir_variable,
396 * but sparse nir instructions output with vector dest.
397 */
398 var->type = glsl_simple_type(glsl_get_base_glsl_type(texel_type)->base_type,
399 dest->num_components, 1);
400
401 var_deref->type = var->type;
402
403 /* Record the adjusted variable. */
404 _mesa_set_add(this->sparse_variable_set, var);
405 }
406
407 static unsigned
get_nir_how_declared(unsigned how_declared)408 get_nir_how_declared(unsigned how_declared)
409 {
410 if (how_declared == ir_var_hidden)
411 return nir_var_hidden;
412
413 if (how_declared == ir_var_declared_implicitly)
414 return nir_var_declared_implicitly;
415
416 return nir_var_declared_normally;
417 }
418
419 void
visit(ir_variable * ir)420 nir_visitor::visit(ir_variable *ir)
421 {
422 /* FINISHME: inout parameters */
423 assert(ir->data.mode != ir_var_function_inout);
424
425 if (ir->data.mode == ir_var_function_out)
426 return;
427
428 nir_variable *var = rzalloc(shader, nir_variable);
429 var->type = ir->type;
430 var->name = ralloc_strdup(var, ir->name);
431
432 var->data.assigned = ir->data.assigned;
433 var->data.read_only = ir->data.read_only;
434 var->data.centroid = ir->data.centroid;
435 var->data.sample = ir->data.sample;
436 var->data.patch = ir->data.patch;
437 var->data.how_declared = get_nir_how_declared(ir->data.how_declared);
438 var->data.invariant = ir->data.invariant;
439 var->data.explicit_invariant = ir->data.explicit_invariant;
440 var->data.location = ir->data.location;
441 var->data.must_be_shader_input = ir->data.must_be_shader_input;
442 var->data.stream = ir->data.stream;
443 if (ir->data.stream & (1u << 31))
444 var->data.stream |= NIR_STREAM_PACKED;
445
446 var->data.precision = ir->data.precision;
447 var->data.explicit_location = ir->data.explicit_location;
448 var->data.matrix_layout = ir->data.matrix_layout;
449 var->data.from_named_ifc_block = ir->data.from_named_ifc_block;
450 var->data.compact = false;
451 var->data.used = ir->data.used;
452
453 switch(ir->data.mode) {
454 case ir_var_auto:
455 case ir_var_temporary:
456 if (is_global)
457 var->data.mode = nir_var_shader_temp;
458 else
459 var->data.mode = nir_var_function_temp;
460 break;
461
462 case ir_var_function_in:
463 case ir_var_const_in:
464 var->data.mode = nir_var_function_temp;
465 break;
466
467 case ir_var_shader_in:
468 if (shader->info.stage == MESA_SHADER_GEOMETRY &&
469 ir->data.location == VARYING_SLOT_PRIMITIVE_ID) {
470 /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */
471 var->data.location = SYSTEM_VALUE_PRIMITIVE_ID;
472 var->data.mode = nir_var_system_value;
473 } else {
474 var->data.mode = nir_var_shader_in;
475 }
476 break;
477
478 case ir_var_shader_out:
479 var->data.mode = nir_var_shader_out;
480 break;
481
482 case ir_var_uniform:
483 if (ir->get_interface_type())
484 var->data.mode = nir_var_mem_ubo;
485 else if (glsl_type_contains_image(ir->type) && !ir->data.bindless)
486 var->data.mode = nir_var_image;
487 else
488 var->data.mode = nir_var_uniform;
489 break;
490
491 case ir_var_shader_storage:
492 var->data.mode = nir_var_mem_ssbo;
493 break;
494
495 case ir_var_system_value:
496 var->data.mode = nir_var_system_value;
497 break;
498
499 case ir_var_shader_shared:
500 var->data.mode = nir_var_mem_shared;
501 break;
502
503 default:
504 unreachable("not reached");
505 }
506
507 unsigned mem_access = 0;
508 if (ir->data.memory_read_only)
509 mem_access |= ACCESS_NON_WRITEABLE;
510 if (ir->data.memory_write_only)
511 mem_access |= ACCESS_NON_READABLE;
512 if (ir->data.memory_coherent)
513 mem_access |= ACCESS_COHERENT;
514 if (ir->data.memory_volatile)
515 mem_access |= ACCESS_VOLATILE;
516 if (ir->data.memory_restrict)
517 mem_access |= ACCESS_RESTRICT;
518
519 var->interface_type = ir->get_interface_type();
520
521 /* For UBO and SSBO variables, we need explicit types */
522 if (var->data.mode & (nir_var_mem_ubo | nir_var_mem_ssbo)) {
523 const glsl_type *explicit_ifc_type =
524 glsl_get_explicit_interface_type(ir->get_interface_type(), supports_std430);
525
526 var->interface_type = explicit_ifc_type;
527
528 if (glsl_type_is_interface(glsl_without_array(ir->type))) {
529 /* If the type contains the interface, wrap the explicit type in the
530 * right number of arrays.
531 */
532 var->type = glsl_type_wrap_in_arrays(explicit_ifc_type, ir->type);
533 } else {
534 /* Otherwise, this variable is one entry in the interface */
535 UNUSED bool found = false;
536 for (unsigned i = 0; i < explicit_ifc_type->length; i++) {
537 const glsl_struct_field *field =
538 &explicit_ifc_type->fields.structure[i];
539 if (strcmp(ir->name, field->name) != 0)
540 continue;
541
542 var->type = field->type;
543 if (field->memory_read_only)
544 mem_access |= ACCESS_NON_WRITEABLE;
545 if (field->memory_write_only)
546 mem_access |= ACCESS_NON_READABLE;
547 if (field->memory_coherent)
548 mem_access |= ACCESS_COHERENT;
549 if (field->memory_volatile)
550 mem_access |= ACCESS_VOLATILE;
551 if (field->memory_restrict)
552 mem_access |= ACCESS_RESTRICT;
553
554 found = true;
555 break;
556 }
557 assert(found);
558 }
559 }
560
561 var->data.interpolation = ir->data.interpolation;
562 var->data.location_frac = ir->data.location_frac;
563
564 switch (ir->data.depth_layout) {
565 case ir_depth_layout_none:
566 var->data.depth_layout = nir_depth_layout_none;
567 break;
568 case ir_depth_layout_any:
569 var->data.depth_layout = nir_depth_layout_any;
570 break;
571 case ir_depth_layout_greater:
572 var->data.depth_layout = nir_depth_layout_greater;
573 break;
574 case ir_depth_layout_less:
575 var->data.depth_layout = nir_depth_layout_less;
576 break;
577 case ir_depth_layout_unchanged:
578 var->data.depth_layout = nir_depth_layout_unchanged;
579 break;
580 default:
581 unreachable("not reached");
582 }
583
584 var->data.index = ir->data.index;
585 var->data.descriptor_set = 0;
586 var->data.binding = ir->data.binding;
587 var->data.explicit_binding = ir->data.explicit_binding;
588 var->data.explicit_offset = ir->data.explicit_xfb_offset;
589 var->data.bindless = ir->data.bindless;
590 var->data.offset = ir->data.offset;
591 var->data.access = (gl_access_qualifier)mem_access;
592
593 if (glsl_type_is_image(glsl_without_array(var->type))) {
594 var->data.image.format = ir->data.image_format;
595 } else if (var->data.mode == nir_var_shader_out) {
596 var->data.xfb.buffer = ir->data.xfb_buffer;
597 var->data.xfb.stride = ir->data.xfb_stride;
598 }
599
600 var->data.fb_fetch_output = ir->data.fb_fetch_output;
601 var->data.explicit_xfb_buffer = ir->data.explicit_xfb_buffer;
602 var->data.explicit_xfb_stride = ir->data.explicit_xfb_stride;
603
604 var->num_state_slots = ir->get_num_state_slots();
605 if (var->num_state_slots > 0) {
606 var->state_slots = rzalloc_array(var, nir_state_slot,
607 var->num_state_slots);
608
609 ir_state_slot *state_slots = ir->get_state_slots();
610 for (unsigned i = 0; i < var->num_state_slots; i++) {
611 for (unsigned j = 0; j < 4; j++)
612 var->state_slots[i].tokens[j] = state_slots[i].tokens[j];
613 }
614 } else {
615 var->state_slots = NULL;
616 }
617
618 /* Values declared const will have ir->constant_value instead of
619 * ir->constant_initializer.
620 */
621 if (ir->constant_initializer)
622 var->constant_initializer = constant_copy(ir->constant_initializer, var);
623 else
624 var->constant_initializer = constant_copy(ir->constant_value, var);
625
626 if (var->data.mode == nir_var_function_temp)
627 nir_function_impl_add_variable(impl, var);
628 else
629 nir_shader_add_variable(shader, var);
630
631 _mesa_hash_table_insert(var_table, ir, var);
632 }
633
634 ir_visitor_status
visit_enter(ir_function * ir)635 nir_function_visitor::visit_enter(ir_function *ir)
636 {
637 foreach_in_list(ir_function_signature, sig, &ir->signatures) {
638 visitor->create_function(sig);
639 }
640 return visit_continue_with_parent;
641 }
642
643 void
create_function(ir_function_signature * ir)644 nir_visitor::create_function(ir_function_signature *ir)
645 {
646 if (ir->is_intrinsic())
647 return;
648
649 nir_function *func = nir_function_create(shader, ir->function_name());
650 if (strcmp(ir->function_name(), "main") == 0)
651 func->is_entrypoint = true;
652
653 func->num_params = ir->parameters.length() +
654 (ir->return_type != &glsl_type_builtin_void);
655 func->params = ralloc_array(shader, nir_parameter, func->num_params);
656
657 unsigned np = 0;
658
659 if (ir->return_type != &glsl_type_builtin_void) {
660 /* The return value is a variable deref (basically an out parameter) */
661 func->params[np].num_components = 1;
662 func->params[np].bit_size = 32;
663 np++;
664 }
665
666 foreach_in_list(ir_variable, param, &ir->parameters) {
667 /* FINISHME: pass arrays, structs, etc by reference? */
668 assert(glsl_type_is_vector(param->type) || glsl_type_is_scalar(param->type));
669
670 if (param->data.mode == ir_var_function_in) {
671 func->params[np].num_components = param->type->vector_elements;
672 func->params[np].bit_size = glsl_get_bit_size(param->type);
673 } else {
674 func->params[np].num_components = 1;
675 func->params[np].bit_size = 32;
676 }
677 np++;
678 }
679 assert(np == func->num_params);
680
681 _mesa_hash_table_insert(this->overload_table, ir, func);
682 }
683
684 void
visit(ir_function * ir)685 nir_visitor::visit(ir_function *ir)
686 {
687 foreach_in_list(ir_function_signature, sig, &ir->signatures)
688 sig->accept(this);
689 }
690
691 void
visit(ir_function_signature * ir)692 nir_visitor::visit(ir_function_signature *ir)
693 {
694 if (ir->is_intrinsic())
695 return;
696
697 this->sig = ir;
698
699 struct hash_entry *entry =
700 _mesa_hash_table_search(this->overload_table, ir);
701
702 assert(entry);
703 nir_function *func = (nir_function *) entry->data;
704
705 if (ir->is_defined) {
706 nir_function_impl *impl = nir_function_impl_create(func);
707 this->impl = impl;
708
709 this->is_global = false;
710
711 b = nir_builder_at(nir_after_impl(impl));
712
713 unsigned i = (ir->return_type != &glsl_type_builtin_void) ? 1 : 0;
714
715 foreach_in_list(ir_variable, param, &ir->parameters) {
716 nir_variable *var =
717 nir_local_variable_create(impl, param->type, param->name);
718
719 if (param->data.mode == ir_var_function_in) {
720 nir_store_var(&b, var, nir_load_param(&b, i), ~0);
721 }
722
723 _mesa_hash_table_insert(var_table, param, var);
724 i++;
725 }
726
727 visit_exec_list(&ir->body, this);
728
729 this->is_global = true;
730 } else {
731 func->impl = NULL;
732 }
733 }
734
735 void
visit(ir_loop * ir)736 nir_visitor::visit(ir_loop *ir)
737 {
738 nir_push_loop(&b);
739 visit_exec_list(&ir->body_instructions, this);
740 nir_pop_loop(&b, NULL);
741 }
742
743 void
visit(ir_if * ir)744 nir_visitor::visit(ir_if *ir)
745 {
746 nir_push_if(&b, evaluate_rvalue(ir->condition));
747 visit_exec_list(&ir->then_instructions, this);
748 nir_push_else(&b, NULL);
749 visit_exec_list(&ir->else_instructions, this);
750 nir_pop_if(&b, NULL);
751 }
752
753 void
visit(ir_discard * ir)754 nir_visitor::visit(ir_discard *ir)
755 {
756 /*
757 * discards aren't treated as control flow, because before we lower them
758 * they can appear anywhere in the shader and the stuff after them may still
759 * be executed (yay, crazy GLSL rules!). However, after lowering, all the
760 * discards will be immediately followed by a return.
761 */
762
763 if (ir->condition)
764 nir_discard_if(&b, evaluate_rvalue(ir->condition));
765 else
766 nir_discard(&b);
767 }
768
769 void
visit(ir_demote * ir)770 nir_visitor::visit(ir_demote *ir)
771 {
772 nir_demote(&b);
773 }
774
775 void
visit(ir_emit_vertex * ir)776 nir_visitor::visit(ir_emit_vertex *ir)
777 {
778 nir_emit_vertex(&b, (unsigned)ir->stream_id());
779 }
780
781 void
visit(ir_end_primitive * ir)782 nir_visitor::visit(ir_end_primitive *ir)
783 {
784 nir_end_primitive(&b, (unsigned)ir->stream_id());
785 }
786
787 void
visit(ir_loop_jump * ir)788 nir_visitor::visit(ir_loop_jump *ir)
789 {
790 nir_jump_type type;
791 switch (ir->mode) {
792 case ir_loop_jump::jump_break:
793 type = nir_jump_break;
794 break;
795 case ir_loop_jump::jump_continue:
796 type = nir_jump_continue;
797 break;
798 default:
799 unreachable("not reached");
800 }
801
802 nir_jump_instr *instr = nir_jump_instr_create(this->shader, type);
803 nir_builder_instr_insert(&b, &instr->instr);
804 }
805
806 void
visit(ir_return * ir)807 nir_visitor::visit(ir_return *ir)
808 {
809 if (ir->value != NULL) {
810 nir_deref_instr *ret_deref =
811 nir_build_deref_cast(&b, nir_load_param(&b, 0),
812 nir_var_function_temp, ir->value->type, 0);
813
814 nir_def *val = evaluate_rvalue(ir->value);
815 nir_store_deref(&b, ret_deref, val, ~0);
816 }
817
818 nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
819 nir_builder_instr_insert(&b, &instr->instr);
820 }
821
822 static void
intrinsic_set_std430_align(nir_intrinsic_instr * intrin,const glsl_type * type)823 intrinsic_set_std430_align(nir_intrinsic_instr *intrin, const glsl_type *type)
824 {
825 unsigned bit_size = glsl_type_is_boolean(type) ? 32 : glsl_get_bit_size(type);
826 unsigned pow2_components = util_next_power_of_two(type->vector_elements);
827 nir_intrinsic_set_align(intrin, (bit_size / 8) * pow2_components, 0);
828 }
829
830 /* Accumulate any qualifiers along the deref chain to get the actual
831 * load/store qualifier.
832 */
833
834 static enum gl_access_qualifier
deref_get_qualifier(nir_deref_instr * deref)835 deref_get_qualifier(nir_deref_instr *deref)
836 {
837 nir_deref_path path;
838 nir_deref_path_init(&path, deref, NULL);
839
840 unsigned qualifiers = path.path[0]->var->data.access;
841
842 const glsl_type *parent_type = path.path[0]->type;
843 for (nir_deref_instr **cur_ptr = &path.path[1]; *cur_ptr; cur_ptr++) {
844 nir_deref_instr *cur = *cur_ptr;
845
846 if (glsl_type_is_interface(parent_type)) {
847 const struct glsl_struct_field *field =
848 &parent_type->fields.structure[cur->strct.index];
849 if (field->memory_read_only)
850 qualifiers |= ACCESS_NON_WRITEABLE;
851 if (field->memory_write_only)
852 qualifiers |= ACCESS_NON_READABLE;
853 if (field->memory_coherent)
854 qualifiers |= ACCESS_COHERENT;
855 if (field->memory_volatile)
856 qualifiers |= ACCESS_VOLATILE;
857 if (field->memory_restrict)
858 qualifiers |= ACCESS_RESTRICT;
859 }
860
861 parent_type = cur->type;
862 }
863
864 nir_deref_path_finish(&path);
865
866 return (gl_access_qualifier) qualifiers;
867 }
868
869 void
visit(ir_call * ir)870 nir_visitor::visit(ir_call *ir)
871 {
872 if (ir->callee->is_intrinsic()) {
873 nir_intrinsic_op op;
874
875 /* Initialize to something because gcc complains otherwise */
876 nir_atomic_op atomic_op = nir_atomic_op_iadd;
877
878 switch (ir->callee->intrinsic_id) {
879 case ir_intrinsic_generic_atomic_add:
880 op = nir_intrinsic_deref_atomic;
881 atomic_op = glsl_type_is_integer_32_64(ir->return_deref->type)
882 ? nir_atomic_op_iadd : nir_atomic_op_fadd;
883 break;
884 case ir_intrinsic_generic_atomic_and:
885 op = nir_intrinsic_deref_atomic;
886 atomic_op = nir_atomic_op_iand;
887 break;
888 case ir_intrinsic_generic_atomic_or:
889 op = nir_intrinsic_deref_atomic;
890 atomic_op = nir_atomic_op_ior;
891 break;
892 case ir_intrinsic_generic_atomic_xor:
893 op = nir_intrinsic_deref_atomic;
894 atomic_op = nir_atomic_op_ixor;
895 break;
896 case ir_intrinsic_generic_atomic_min:
897 assert(ir->return_deref);
898 op = nir_intrinsic_deref_atomic;
899 if (ir->return_deref->type == &glsl_type_builtin_int ||
900 ir->return_deref->type == &glsl_type_builtin_int64_t)
901 atomic_op = nir_atomic_op_imin;
902 else if (ir->return_deref->type == &glsl_type_builtin_uint ||
903 ir->return_deref->type == &glsl_type_builtin_uint64_t)
904 atomic_op = nir_atomic_op_umin;
905 else if (ir->return_deref->type == &glsl_type_builtin_float)
906 atomic_op = nir_atomic_op_fmin;
907 else
908 unreachable("Invalid type");
909 break;
910 case ir_intrinsic_generic_atomic_max:
911 assert(ir->return_deref);
912 op = nir_intrinsic_deref_atomic;
913 if (ir->return_deref->type == &glsl_type_builtin_int ||
914 ir->return_deref->type == &glsl_type_builtin_int64_t)
915 atomic_op = nir_atomic_op_imax;
916 else if (ir->return_deref->type == &glsl_type_builtin_uint ||
917 ir->return_deref->type == &glsl_type_builtin_uint64_t)
918 atomic_op = nir_atomic_op_umax;
919 else if (ir->return_deref->type == &glsl_type_builtin_float)
920 atomic_op = nir_atomic_op_fmax;
921 else
922 unreachable("Invalid type");
923 break;
924 case ir_intrinsic_generic_atomic_exchange:
925 op = nir_intrinsic_deref_atomic;
926 atomic_op = nir_atomic_op_xchg;
927 break;
928 case ir_intrinsic_generic_atomic_comp_swap:
929 op = nir_intrinsic_deref_atomic_swap;
930 atomic_op = glsl_type_is_integer_32_64(ir->return_deref->type)
931 ? nir_atomic_op_cmpxchg
932 : nir_atomic_op_fcmpxchg;
933 break;
934 case ir_intrinsic_atomic_counter_read:
935 op = nir_intrinsic_atomic_counter_read_deref;
936 break;
937 case ir_intrinsic_atomic_counter_increment:
938 op = nir_intrinsic_atomic_counter_inc_deref;
939 break;
940 case ir_intrinsic_atomic_counter_predecrement:
941 op = nir_intrinsic_atomic_counter_pre_dec_deref;
942 break;
943 case ir_intrinsic_atomic_counter_add:
944 op = nir_intrinsic_atomic_counter_add_deref;
945 break;
946 case ir_intrinsic_atomic_counter_and:
947 op = nir_intrinsic_atomic_counter_and_deref;
948 break;
949 case ir_intrinsic_atomic_counter_or:
950 op = nir_intrinsic_atomic_counter_or_deref;
951 break;
952 case ir_intrinsic_atomic_counter_xor:
953 op = nir_intrinsic_atomic_counter_xor_deref;
954 break;
955 case ir_intrinsic_atomic_counter_min:
956 op = nir_intrinsic_atomic_counter_min_deref;
957 break;
958 case ir_intrinsic_atomic_counter_max:
959 op = nir_intrinsic_atomic_counter_max_deref;
960 break;
961 case ir_intrinsic_atomic_counter_exchange:
962 op = nir_intrinsic_atomic_counter_exchange_deref;
963 break;
964 case ir_intrinsic_atomic_counter_comp_swap:
965 op = nir_intrinsic_atomic_counter_comp_swap_deref;
966 break;
967 case ir_intrinsic_image_load:
968 op = nir_intrinsic_image_deref_load;
969 break;
970 case ir_intrinsic_image_store:
971 op = nir_intrinsic_image_deref_store;
972 break;
973 case ir_intrinsic_image_atomic_add:
974 op = nir_intrinsic_image_deref_atomic;
975 atomic_op = glsl_type_is_integer_32_64(ir->return_deref->type)
976 ? nir_atomic_op_iadd
977 : nir_atomic_op_fadd;
978 break;
979 case ir_intrinsic_image_atomic_min:
980 op = nir_intrinsic_image_deref_atomic;
981 if (ir->return_deref->type == &glsl_type_builtin_int)
982 atomic_op = nir_atomic_op_imin;
983 else if (ir->return_deref->type == &glsl_type_builtin_uint)
984 atomic_op = nir_atomic_op_umin;
985 else
986 unreachable("Invalid type");
987 break;
988 case ir_intrinsic_image_atomic_max:
989 op = nir_intrinsic_image_deref_atomic;
990 if (ir->return_deref->type == &glsl_type_builtin_int)
991 atomic_op = nir_atomic_op_imax;
992 else if (ir->return_deref->type == &glsl_type_builtin_uint)
993 atomic_op = nir_atomic_op_umax;
994 else
995 unreachable("Invalid type");
996 break;
997 case ir_intrinsic_image_atomic_and:
998 op = nir_intrinsic_image_deref_atomic;
999 atomic_op = nir_atomic_op_iand;
1000 break;
1001 case ir_intrinsic_image_atomic_or:
1002 op = nir_intrinsic_image_deref_atomic;
1003 atomic_op = nir_atomic_op_ior;
1004 break;
1005 case ir_intrinsic_image_atomic_xor:
1006 op = nir_intrinsic_image_deref_atomic;
1007 atomic_op = nir_atomic_op_ixor;
1008 break;
1009 case ir_intrinsic_image_atomic_exchange:
1010 op = nir_intrinsic_image_deref_atomic;
1011 atomic_op = nir_atomic_op_xchg;
1012 break;
1013 case ir_intrinsic_image_atomic_comp_swap:
1014 op = nir_intrinsic_image_deref_atomic_swap;
1015 atomic_op = nir_atomic_op_cmpxchg;
1016 break;
1017 case ir_intrinsic_image_atomic_inc_wrap:
1018 op = nir_intrinsic_image_deref_atomic;
1019 atomic_op = nir_atomic_op_inc_wrap;
1020 break;
1021 case ir_intrinsic_image_atomic_dec_wrap:
1022 op = nir_intrinsic_image_deref_atomic;
1023 atomic_op = nir_atomic_op_dec_wrap;
1024 break;
1025 case ir_intrinsic_memory_barrier:
1026 case ir_intrinsic_memory_barrier_buffer:
1027 case ir_intrinsic_memory_barrier_image:
1028 case ir_intrinsic_memory_barrier_shared:
1029 case ir_intrinsic_memory_barrier_atomic_counter:
1030 case ir_intrinsic_group_memory_barrier:
1031 op = nir_intrinsic_barrier;
1032 break;
1033 case ir_intrinsic_image_size:
1034 op = nir_intrinsic_image_deref_size;
1035 break;
1036 case ir_intrinsic_image_samples:
1037 op = nir_intrinsic_image_deref_samples;
1038 break;
1039 case ir_intrinsic_image_sparse_load:
1040 op = nir_intrinsic_image_deref_sparse_load;
1041 break;
1042 case ir_intrinsic_shader_clock:
1043 op = nir_intrinsic_shader_clock;
1044 break;
1045 case ir_intrinsic_begin_invocation_interlock:
1046 op = nir_intrinsic_begin_invocation_interlock;
1047 break;
1048 case ir_intrinsic_end_invocation_interlock:
1049 op = nir_intrinsic_end_invocation_interlock;
1050 break;
1051 case ir_intrinsic_vote_any:
1052 op = nir_intrinsic_vote_any;
1053 break;
1054 case ir_intrinsic_vote_all:
1055 op = nir_intrinsic_vote_all;
1056 break;
1057 case ir_intrinsic_vote_eq:
1058 op = nir_intrinsic_vote_ieq;
1059 break;
1060 case ir_intrinsic_ballot:
1061 op = nir_intrinsic_ballot;
1062 break;
1063 case ir_intrinsic_read_invocation:
1064 op = nir_intrinsic_read_invocation;
1065 break;
1066 case ir_intrinsic_read_first_invocation:
1067 op = nir_intrinsic_read_first_invocation;
1068 break;
1069 case ir_intrinsic_helper_invocation:
1070 op = nir_intrinsic_is_helper_invocation;
1071 break;
1072 case ir_intrinsic_is_sparse_texels_resident:
1073 op = nir_intrinsic_is_sparse_texels_resident;
1074 break;
1075 default:
1076 unreachable("not reached");
1077 }
1078
1079 nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
1080 nir_def *ret = &instr->def;
1081
1082 switch (op) {
1083 case nir_intrinsic_deref_atomic:
1084 case nir_intrinsic_deref_atomic_swap: {
1085 int param_count = ir->actual_parameters.length();
1086 assert(param_count == 2 || param_count == 3);
1087
1088 /* Deref */
1089 exec_node *param = ir->actual_parameters.get_head();
1090 ir_rvalue *rvalue = (ir_rvalue *) param;
1091 ir_dereference *deref = rvalue->as_dereference();
1092 ir_swizzle *swizzle = NULL;
1093 if (!deref) {
1094 /* We may have a swizzle to pick off a single vec4 component */
1095 swizzle = rvalue->as_swizzle();
1096 assert(swizzle && swizzle->type->vector_elements == 1);
1097 deref = swizzle->val->as_dereference();
1098 assert(deref);
1099 }
1100 nir_deref_instr *nir_deref = evaluate_deref(deref);
1101 if (swizzle) {
1102 nir_deref = nir_build_deref_array_imm(&b, nir_deref,
1103 swizzle->mask.x);
1104 }
1105 instr->src[0] = nir_src_for_ssa(&nir_deref->def);
1106
1107 nir_intrinsic_set_atomic_op(instr, atomic_op);
1108 nir_intrinsic_set_access(instr, deref_get_qualifier(nir_deref));
1109
1110 /* data1 parameter (this is always present) */
1111 param = param->get_next();
1112 ir_instruction *inst = (ir_instruction *) param;
1113 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
1114
1115 /* data2 parameter (only with atomic_comp_swap) */
1116 if (param_count == 3) {
1117 assert(op == nir_intrinsic_deref_atomic_swap);
1118 param = param->get_next();
1119 inst = (ir_instruction *) param;
1120 instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
1121 }
1122
1123 /* Atomic result */
1124 assert(ir->return_deref);
1125 if (glsl_type_is_integer_64(ir->return_deref->type)) {
1126 nir_def_init(&instr->instr, &instr->def,
1127 ir->return_deref->type->vector_elements, 64);
1128 } else {
1129 nir_def_init(&instr->instr, &instr->def,
1130 ir->return_deref->type->vector_elements, 32);
1131 }
1132 nir_builder_instr_insert(&b, &instr->instr);
1133 break;
1134 }
1135 case nir_intrinsic_atomic_counter_read_deref:
1136 case nir_intrinsic_atomic_counter_inc_deref:
1137 case nir_intrinsic_atomic_counter_pre_dec_deref:
1138 case nir_intrinsic_atomic_counter_add_deref:
1139 case nir_intrinsic_atomic_counter_min_deref:
1140 case nir_intrinsic_atomic_counter_max_deref:
1141 case nir_intrinsic_atomic_counter_and_deref:
1142 case nir_intrinsic_atomic_counter_or_deref:
1143 case nir_intrinsic_atomic_counter_xor_deref:
1144 case nir_intrinsic_atomic_counter_exchange_deref:
1145 case nir_intrinsic_atomic_counter_comp_swap_deref: {
1146 /* Set the counter variable dereference. */
1147 exec_node *param = ir->actual_parameters.get_head();
1148 ir_dereference *counter = (ir_dereference *)param;
1149
1150 instr->src[0] = nir_src_for_ssa(&evaluate_deref(counter)->def);
1151 param = param->get_next();
1152
1153 /* Set the intrinsic destination. */
1154 if (ir->return_deref) {
1155 nir_def_init(&instr->instr, &instr->def, 1, 32);
1156 }
1157
1158 /* Set the intrinsic parameters. */
1159 if (!param->is_tail_sentinel()) {
1160 instr->src[1] =
1161 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1162 param = param->get_next();
1163 }
1164
1165 if (!param->is_tail_sentinel()) {
1166 instr->src[2] =
1167 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1168 param = param->get_next();
1169 }
1170
1171 nir_builder_instr_insert(&b, &instr->instr);
1172 break;
1173 }
1174 case nir_intrinsic_image_deref_load:
1175 case nir_intrinsic_image_deref_store:
1176 case nir_intrinsic_image_deref_atomic:
1177 case nir_intrinsic_image_deref_atomic_swap:
1178 case nir_intrinsic_image_deref_samples:
1179 case nir_intrinsic_image_deref_size:
1180 case nir_intrinsic_image_deref_sparse_load: {
1181 /* Set the image variable dereference. */
1182 exec_node *param = ir->actual_parameters.get_head();
1183 ir_dereference *image = (ir_dereference *)param;
1184 nir_deref_instr *deref = evaluate_deref(image);
1185 const glsl_type *type = deref->type;
1186
1187 nir_intrinsic_set_access(instr, deref_get_qualifier(deref));
1188
1189 if (op == nir_intrinsic_image_deref_atomic ||
1190 op == nir_intrinsic_image_deref_atomic_swap) {
1191 nir_intrinsic_set_atomic_op(instr, atomic_op);
1192 }
1193
1194 instr->src[0] = nir_src_for_ssa(&deref->def);
1195 param = param->get_next();
1196 nir_intrinsic_set_image_dim(instr,
1197 (glsl_sampler_dim)type->sampler_dimensionality);
1198 nir_intrinsic_set_image_array(instr, type->sampler_array);
1199
1200 /* Set the intrinsic destination. */
1201 if (ir->return_deref) {
1202 unsigned num_components;
1203 if (op == nir_intrinsic_image_deref_sparse_load) {
1204 const glsl_type *dest_type =
1205 glsl_get_field_type(ir->return_deref->type, "texel");
1206 /* One extra component to hold residency code. */
1207 num_components = dest_type->vector_elements + 1;
1208 } else
1209 num_components = ir->return_deref->type->vector_elements;
1210
1211 nir_def_init(&instr->instr, &instr->def, num_components, 32);
1212 }
1213
1214 if (op == nir_intrinsic_image_deref_size) {
1215 instr->num_components = instr->def.num_components;
1216 } else if (op == nir_intrinsic_image_deref_load ||
1217 op == nir_intrinsic_image_deref_sparse_load) {
1218 instr->num_components = instr->def.num_components;
1219 nir_intrinsic_set_dest_type(instr,
1220 nir_get_nir_type_for_glsl_base_type(type->sampled_type));
1221 } else if (op == nir_intrinsic_image_deref_store) {
1222 instr->num_components = 4;
1223 nir_intrinsic_set_src_type(instr,
1224 nir_get_nir_type_for_glsl_base_type(type->sampled_type));
1225 }
1226
1227 if (op == nir_intrinsic_image_deref_size ||
1228 op == nir_intrinsic_image_deref_samples) {
1229 /* image_deref_size takes an LOD parameter which is always 0
1230 * coming from GLSL.
1231 */
1232 if (op == nir_intrinsic_image_deref_size)
1233 instr->src[1] = nir_src_for_ssa(nir_imm_int(&b, 0));
1234 nir_builder_instr_insert(&b, &instr->instr);
1235 break;
1236 }
1237
1238 /* Set the address argument, extending the coordinate vector to four
1239 * components.
1240 */
1241 nir_def *src_addr =
1242 evaluate_rvalue((ir_dereference *)param);
1243 nir_def *srcs[4];
1244
1245 for (int i = 0; i < 4; i++) {
1246 if (i < glsl_get_sampler_coordinate_components(type))
1247 srcs[i] = nir_channel(&b, src_addr, i);
1248 else
1249 srcs[i] = nir_undef(&b, 1, 32);
1250 }
1251
1252 instr->src[1] = nir_src_for_ssa(nir_vec(&b, srcs, 4));
1253 param = param->get_next();
1254
1255 /* Set the sample argument, which is undefined for single-sample
1256 * images.
1257 */
1258 if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
1259 instr->src[2] =
1260 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1261 param = param->get_next();
1262 } else {
1263 instr->src[2] = nir_src_for_ssa(nir_undef(&b, 1, 32));
1264 }
1265
1266 /* Set the intrinsic parameters. */
1267 if (!param->is_tail_sentinel()) {
1268 instr->src[3] =
1269 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1270 param = param->get_next();
1271 } else if (op == nir_intrinsic_image_deref_load ||
1272 op == nir_intrinsic_image_deref_sparse_load) {
1273 instr->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0)); /* LOD */
1274 }
1275
1276 if (!param->is_tail_sentinel()) {
1277 instr->src[4] =
1278 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1279 param = param->get_next();
1280 } else if (op == nir_intrinsic_image_deref_store) {
1281 instr->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0)); /* LOD */
1282 }
1283
1284 nir_builder_instr_insert(&b, &instr->instr);
1285 break;
1286 }
1287 case nir_intrinsic_barrier: {
1288 /* The nir_intrinsic_barrier follows the general
1289 * semantics of SPIR-V memory barriers, so this and other memory
1290 * barriers use the mapping based on GLSL->SPIR-V from
1291 *
1292 * https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_gl_spirv.txt
1293 */
1294 mesa_scope scope;
1295 unsigned modes;
1296 switch (ir->callee->intrinsic_id) {
1297 case ir_intrinsic_memory_barrier:
1298 scope = SCOPE_DEVICE;
1299 modes = nir_var_image |
1300 nir_var_mem_ssbo |
1301 nir_var_mem_shared |
1302 nir_var_mem_global;
1303 break;
1304 case ir_intrinsic_memory_barrier_buffer:
1305 scope = SCOPE_DEVICE;
1306 modes = nir_var_mem_ssbo |
1307 nir_var_mem_global;
1308 break;
1309 case ir_intrinsic_memory_barrier_image:
1310 scope = SCOPE_DEVICE;
1311 modes = nir_var_image;
1312 break;
1313 case ir_intrinsic_memory_barrier_shared:
1314 /* Both ARB_gl_spirv and glslang lower this to Device scope, so
1315 * follow their lead. Note GL_KHR_vulkan_glsl also does
1316 * something similar.
1317 */
1318 scope = SCOPE_DEVICE;
1319 modes = nir_var_mem_shared;
1320 break;
1321 case ir_intrinsic_group_memory_barrier:
1322 scope = SCOPE_WORKGROUP;
1323 modes = nir_var_image |
1324 nir_var_mem_ssbo |
1325 nir_var_mem_shared |
1326 nir_var_mem_global;
1327 break;
1328 case ir_intrinsic_memory_barrier_atomic_counter:
1329 /* There's no nir_var_atomic_counter, but since atomic counters are lowered
1330 * to SSBOs, we use nir_var_mem_ssbo instead.
1331 */
1332 scope = SCOPE_DEVICE;
1333 modes = nir_var_mem_ssbo;
1334 break;
1335 default:
1336 unreachable("invalid intrinsic id for memory barrier");
1337 }
1338
1339 nir_scoped_memory_barrier(&b, scope, NIR_MEMORY_ACQ_REL,
1340 (nir_variable_mode)modes);
1341 break;
1342 }
1343 case nir_intrinsic_shader_clock:
1344 nir_def_init(&instr->instr, &instr->def, 2, 32);
1345 nir_intrinsic_set_memory_scope(instr, SCOPE_SUBGROUP);
1346 nir_builder_instr_insert(&b, &instr->instr);
1347 break;
1348 case nir_intrinsic_begin_invocation_interlock:
1349 nir_builder_instr_insert(&b, &instr->instr);
1350 break;
1351 case nir_intrinsic_end_invocation_interlock:
1352 nir_builder_instr_insert(&b, &instr->instr);
1353 break;
1354 case nir_intrinsic_store_ssbo: {
1355 exec_node *param = ir->actual_parameters.get_head();
1356 ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
1357
1358 param = param->get_next();
1359 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
1360
1361 param = param->get_next();
1362 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
1363
1364 param = param->get_next();
1365 ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
1366 assert(write_mask);
1367
1368 nir_def *nir_val = evaluate_rvalue(val);
1369 if (glsl_type_is_boolean(val->type))
1370 nir_val = nir_b2i32(&b, nir_val);
1371
1372 instr->src[0] = nir_src_for_ssa(nir_val);
1373 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
1374 instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
1375 intrinsic_set_std430_align(instr, val->type);
1376 nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
1377 instr->num_components = val->type->vector_elements;
1378
1379 nir_builder_instr_insert(&b, &instr->instr);
1380 break;
1381 }
1382 case nir_intrinsic_load_shared: {
1383 exec_node *param = ir->actual_parameters.get_head();
1384 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
1385
1386 nir_intrinsic_set_base(instr, 0);
1387 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset));
1388
1389 const glsl_type *type = ir->return_deref->var->type;
1390 instr->num_components = type->vector_elements;
1391 intrinsic_set_std430_align(instr, type);
1392
1393 /* Setup destination register */
1394 unsigned bit_size = glsl_type_is_boolean(type) ? 32 : glsl_get_bit_size(type);
1395 nir_def_init(&instr->instr, &instr->def, type->vector_elements,
1396 bit_size);
1397
1398 nir_builder_instr_insert(&b, &instr->instr);
1399
1400 /* The value in shared memory is a 32-bit value */
1401 if (glsl_type_is_boolean(type))
1402 ret = nir_b2b1(&b, &instr->def);
1403 break;
1404 }
1405 case nir_intrinsic_store_shared: {
1406 exec_node *param = ir->actual_parameters.get_head();
1407 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
1408
1409 param = param->get_next();
1410 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
1411
1412 param = param->get_next();
1413 ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
1414 assert(write_mask);
1415
1416 nir_intrinsic_set_base(instr, 0);
1417 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
1418
1419 nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
1420
1421 nir_def *nir_val = evaluate_rvalue(val);
1422 /* The value in shared memory is a 32-bit value */
1423 if (glsl_type_is_boolean(val->type))
1424 nir_val = nir_b2b32(&b, nir_val);
1425
1426 instr->src[0] = nir_src_for_ssa(nir_val);
1427 instr->num_components = val->type->vector_elements;
1428 intrinsic_set_std430_align(instr, val->type);
1429
1430 nir_builder_instr_insert(&b, &instr->instr);
1431 break;
1432 }
1433 case nir_intrinsic_vote_ieq:
1434 instr->num_components = 1;
1435 FALLTHROUGH;
1436 case nir_intrinsic_vote_any:
1437 case nir_intrinsic_vote_all: {
1438 nir_def_init(&instr->instr, &instr->def, 1, 1);
1439
1440 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
1441 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
1442
1443 nir_builder_instr_insert(&b, &instr->instr);
1444 break;
1445 }
1446
1447 case nir_intrinsic_ballot: {
1448 nir_def_init(&instr->instr, &instr->def,
1449 ir->return_deref->type->vector_elements, 64);
1450 instr->num_components = ir->return_deref->type->vector_elements;
1451
1452 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
1453 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
1454
1455 nir_builder_instr_insert(&b, &instr->instr);
1456 break;
1457 }
1458 case nir_intrinsic_read_invocation: {
1459 nir_def_init(&instr->instr, &instr->def,
1460 ir->return_deref->type->vector_elements, 32);
1461 instr->num_components = ir->return_deref->type->vector_elements;
1462
1463 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
1464 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
1465
1466 ir_rvalue *invocation = (ir_rvalue *) ir->actual_parameters.get_head()->next;
1467 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(invocation));
1468
1469 nir_builder_instr_insert(&b, &instr->instr);
1470 break;
1471 }
1472 case nir_intrinsic_read_first_invocation: {
1473 nir_def_init(&instr->instr, &instr->def,
1474 ir->return_deref->type->vector_elements, 32);
1475 instr->num_components = ir->return_deref->type->vector_elements;
1476
1477 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
1478 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
1479
1480 nir_builder_instr_insert(&b, &instr->instr);
1481 break;
1482 }
1483 case nir_intrinsic_is_helper_invocation: {
1484 nir_def_init(&instr->instr, &instr->def, 1, 1);
1485 nir_builder_instr_insert(&b, &instr->instr);
1486 break;
1487 }
1488 case nir_intrinsic_is_sparse_texels_resident: {
1489 nir_def_init(&instr->instr, &instr->def, 1, 1);
1490
1491 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
1492 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
1493
1494 nir_builder_instr_insert(&b, &instr->instr);
1495 break;
1496 }
1497 default:
1498 unreachable("not reached");
1499 }
1500
1501 if (ir->return_deref) {
1502 nir_deref_instr *ret_deref = evaluate_deref(ir->return_deref);
1503
1504 if (op == nir_intrinsic_image_deref_sparse_load)
1505 adjust_sparse_variable(ret_deref, ir->return_deref->type, ret);
1506
1507 nir_store_deref(&b, ret_deref, ret, ~0);
1508 }
1509
1510 return;
1511 }
1512
1513 struct hash_entry *entry =
1514 _mesa_hash_table_search(this->overload_table, ir->callee);
1515 assert(entry);
1516 nir_function *callee = (nir_function *) entry->data;
1517
1518 nir_call_instr *call = nir_call_instr_create(this->shader, callee);
1519
1520 unsigned i = 0;
1521 nir_deref_instr *ret_deref = NULL;
1522 if (ir->return_deref) {
1523 nir_variable *ret_tmp =
1524 nir_local_variable_create(this->impl, ir->return_deref->type,
1525 "return_tmp");
1526 ret_deref = nir_build_deref_var(&b, ret_tmp);
1527 call->params[i++] = nir_src_for_ssa(&ret_deref->def);
1528 }
1529
1530 foreach_two_lists(formal_node, &ir->callee->parameters,
1531 actual_node, &ir->actual_parameters) {
1532 ir_rvalue *param_rvalue = (ir_rvalue *) actual_node;
1533 ir_variable *sig_param = (ir_variable *) formal_node;
1534
1535 if (sig_param->data.mode == ir_var_function_out ||
1536 sig_param->data.mode == ir_var_function_inout) {
1537 nir_variable *out_param =
1538 nir_local_variable_create(this->impl, sig_param->type, "param");
1539 out_param->data.precision = sig_param->data.precision;
1540 nir_deref_instr *out_param_deref = nir_build_deref_var(&b, out_param);
1541
1542 if (sig_param->data.mode == ir_var_function_inout) {
1543 nir_store_deref(&b, out_param_deref,
1544 nir_load_deref(&b, evaluate_deref(param_rvalue)),
1545 ~0);
1546 }
1547
1548 call->params[i] = nir_src_for_ssa(&out_param_deref->def);
1549 } else if (sig_param->data.mode == ir_var_function_in) {
1550 nir_def *val = evaluate_rvalue(param_rvalue);
1551 call->params[i] = nir_src_for_ssa(val);
1552 }
1553
1554 i++;
1555 }
1556
1557 nir_builder_instr_insert(&b, &call->instr);
1558
1559 /* Copy out params. We must do this after the function call to ensure we
1560 * do not overwrite global variables prematurely.
1561 */
1562 i = ir->return_deref ? 1 : 0;
1563 foreach_two_lists(formal_node, &ir->callee->parameters,
1564 actual_node, &ir->actual_parameters) {
1565 ir_rvalue *param_rvalue = (ir_rvalue *) actual_node;
1566 ir_variable *sig_param = (ir_variable *) formal_node;
1567
1568 if (sig_param->data.mode == ir_var_function_out ||
1569 sig_param->data.mode == ir_var_function_inout) {
1570 nir_store_deref(&b, evaluate_deref(param_rvalue),
1571 nir_load_deref(&b, nir_src_as_deref(call->params[i])),
1572 ~0);
1573 }
1574
1575 i++;
1576 }
1577
1578
1579 if (ir->return_deref)
1580 nir_store_deref(&b, evaluate_deref(ir->return_deref), nir_load_deref(&b, ret_deref), ~0);
1581 }
1582
1583 void
visit(ir_assignment * ir)1584 nir_visitor::visit(ir_assignment *ir)
1585 {
1586 unsigned num_components = ir->lhs->type->vector_elements;
1587 unsigned write_mask = ir->write_mask;
1588
1589 b.exact = ir->lhs->variable_referenced()->data.invariant ||
1590 ir->lhs->variable_referenced()->data.precise;
1591
1592 if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) &&
1593 (write_mask == BITFIELD_MASK(num_components) || write_mask == 0)) {
1594 nir_deref_instr *lhs = evaluate_deref(ir->lhs);
1595 nir_deref_instr *rhs = evaluate_deref(ir->rhs);
1596 enum gl_access_qualifier lhs_qualifiers = deref_get_qualifier(lhs);
1597 enum gl_access_qualifier rhs_qualifiers = deref_get_qualifier(rhs);
1598
1599 nir_copy_deref_with_access(&b, lhs, rhs, lhs_qualifiers,
1600 rhs_qualifiers);
1601 return;
1602 }
1603
1604 ir_texture *tex = ir->rhs->as_texture();
1605 bool is_sparse = tex && tex->is_sparse;
1606
1607 if (!is_sparse)
1608 assert(glsl_type_is_scalar(ir->rhs->type) || glsl_type_is_vector(ir->rhs->type));
1609
1610 ir->lhs->accept(this);
1611 nir_deref_instr *lhs_deref = this->deref;
1612 nir_def *src = evaluate_rvalue(ir->rhs);
1613
1614 if (is_sparse) {
1615 adjust_sparse_variable(lhs_deref, tex->type, src);
1616
1617 /* correct component and mask because they are 0 for struct */
1618 num_components = src->num_components;
1619 write_mask = BITFIELD_MASK(num_components);
1620 }
1621
1622 if (write_mask != BITFIELD_MASK(num_components) && write_mask != 0) {
1623 /* GLSL IR will give us the input to the write-masked assignment in a
1624 * single packed vector. So, for example, if the writemask is xzw, then
1625 * we have to swizzle x -> x, y -> z, and z -> w and get the y component
1626 * from the load.
1627 */
1628 unsigned swiz[4];
1629 unsigned component = 0;
1630 for (unsigned i = 0; i < 4; i++) {
1631 swiz[i] = write_mask & (1 << i) ? component++ : 0;
1632 }
1633 src = nir_swizzle(&b, src, swiz, num_components);
1634 }
1635
1636 enum gl_access_qualifier qualifiers = deref_get_qualifier(lhs_deref);
1637
1638 nir_store_deref_with_access(&b, lhs_deref, src, write_mask,
1639 qualifiers);
1640 }
1641
1642 /*
1643 * Given an instruction, returns a pointer to its destination or NULL if there
1644 * is no destination.
1645 *
1646 * Note that this only handles instructions we generate at this level.
1647 */
1648 static nir_def *
get_instr_def(nir_instr * instr)1649 get_instr_def(nir_instr *instr)
1650 {
1651 nir_alu_instr *alu_instr;
1652 nir_intrinsic_instr *intrinsic_instr;
1653 nir_tex_instr *tex_instr;
1654
1655 switch (instr->type) {
1656 case nir_instr_type_alu:
1657 alu_instr = nir_instr_as_alu(instr);
1658 return &alu_instr->def;
1659
1660 case nir_instr_type_intrinsic:
1661 intrinsic_instr = nir_instr_as_intrinsic(instr);
1662 if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest)
1663 return &intrinsic_instr->def;
1664 else
1665 return NULL;
1666
1667 case nir_instr_type_tex:
1668 tex_instr = nir_instr_as_tex(instr);
1669 return &tex_instr->def;
1670
1671 default:
1672 unreachable("not reached");
1673 }
1674
1675 return NULL;
1676 }
1677
1678 void
add_instr(nir_instr * instr,unsigned num_components,unsigned bit_size)1679 nir_visitor::add_instr(nir_instr *instr, unsigned num_components,
1680 unsigned bit_size)
1681 {
1682 nir_def *def = get_instr_def(instr);
1683
1684 if (def)
1685 nir_def_init(instr, def, num_components, bit_size);
1686
1687 nir_builder_instr_insert(&b, instr);
1688
1689 if (def)
1690 this->result = def;
1691 }
1692
1693 nir_def *
evaluate_rvalue(ir_rvalue * ir)1694 nir_visitor::evaluate_rvalue(ir_rvalue* ir)
1695 {
1696 ir->accept(this);
1697 if (ir->as_dereference() || ir->as_constant()) {
1698 /*
1699 * A dereference is being used on the right hand side, which means we
1700 * must emit a variable load.
1701 */
1702
1703 enum gl_access_qualifier access = deref_get_qualifier(this->deref);
1704 this->result = nir_load_deref_with_access(&b, this->deref, access);
1705 }
1706
1707 return this->result;
1708 }
1709
1710 static bool
type_is_float(glsl_base_type type)1711 type_is_float(glsl_base_type type)
1712 {
1713 return type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_DOUBLE ||
1714 type == GLSL_TYPE_FLOAT16;
1715 }
1716
1717 static bool
type_is_signed(glsl_base_type type)1718 type_is_signed(glsl_base_type type)
1719 {
1720 return type == GLSL_TYPE_INT || type == GLSL_TYPE_INT64 ||
1721 type == GLSL_TYPE_INT16;
1722 }
1723
1724 void
visit(ir_expression * ir)1725 nir_visitor::visit(ir_expression *ir)
1726 {
1727 /* Some special cases */
1728 switch (ir->operation) {
1729 case ir_unop_interpolate_at_centroid:
1730 case ir_binop_interpolate_at_offset:
1731 case ir_binop_interpolate_at_sample: {
1732 ir_dereference *deref = ir->operands[0]->as_dereference();
1733 ir_swizzle *swizzle = NULL;
1734 if (!deref) {
1735 /* the api does not allow a swizzle here, but the varying packing code
1736 * may have pushed one into here.
1737 */
1738 swizzle = ir->operands[0]->as_swizzle();
1739 assert(swizzle);
1740 deref = swizzle->val->as_dereference();
1741 assert(deref);
1742 }
1743
1744 deref->accept(this);
1745
1746 assert(nir_deref_mode_is(this->deref, nir_var_shader_in));
1747 nir_intrinsic_op op;
1748 switch (ir->operation) {
1749 case ir_unop_interpolate_at_centroid:
1750 op = nir_intrinsic_interp_deref_at_centroid;
1751 break;
1752 case ir_binop_interpolate_at_offset:
1753 op = nir_intrinsic_interp_deref_at_offset;
1754 break;
1755 case ir_binop_interpolate_at_sample:
1756 op = nir_intrinsic_interp_deref_at_sample;
1757 break;
1758 default:
1759 unreachable("Invalid interpolation intrinsic");
1760 }
1761
1762 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op);
1763 intrin->num_components = deref->type->vector_elements;
1764 intrin->src[0] = nir_src_for_ssa(&this->deref->def);
1765
1766 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
1767 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample)
1768 intrin->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
1769
1770 unsigned bit_size = glsl_get_bit_size(deref->type);
1771 add_instr(&intrin->instr, deref->type->vector_elements, bit_size);
1772
1773 if (swizzle) {
1774 unsigned swiz[4] = {
1775 swizzle->mask.x, swizzle->mask.y, swizzle->mask.z, swizzle->mask.w
1776 };
1777
1778 result = nir_swizzle(&b, result, swiz,
1779 swizzle->type->vector_elements);
1780 }
1781
1782 return;
1783 }
1784
1785 case ir_unop_ssbo_unsized_array_length: {
1786 nir_intrinsic_instr *intrin =
1787 nir_intrinsic_instr_create(b.shader,
1788 nir_intrinsic_deref_buffer_array_length);
1789
1790 ir_dereference *deref = ir->operands[0]->as_dereference();
1791 intrin->src[0] = nir_src_for_ssa(&evaluate_deref(deref)->def);
1792
1793 add_instr(&intrin->instr, 1, 32);
1794 return;
1795 }
1796
1797 default:
1798 break;
1799 }
1800
1801 nir_def *srcs[4];
1802 for (unsigned i = 0; i < ir->num_operands; i++)
1803 srcs[i] = evaluate_rvalue(ir->operands[i]);
1804
1805 glsl_base_type types[4];
1806 for (unsigned i = 0; i < ir->num_operands; i++)
1807 types[i] = ir->operands[i]->type->base_type;
1808
1809 glsl_base_type out_type = ir->type->base_type;
1810
1811 switch (ir->operation) {
1812 case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break;
1813 case ir_unop_logic_not:
1814 result = nir_inot(&b, srcs[0]);
1815 break;
1816 case ir_unop_neg:
1817 result = type_is_float(types[0]) ? nir_fneg(&b, srcs[0])
1818 : nir_ineg(&b, srcs[0]);
1819 break;
1820 case ir_unop_abs:
1821 result = type_is_float(types[0]) ? nir_fabs(&b, srcs[0])
1822 : nir_iabs(&b, srcs[0]);
1823 break;
1824 case ir_unop_clz:
1825 result = nir_uclz(&b, srcs[0]);
1826 break;
1827 case ir_unop_saturate:
1828 assert(type_is_float(types[0]));
1829 result = nir_fsat(&b, srcs[0]);
1830 break;
1831 case ir_unop_sign:
1832 result = type_is_float(types[0]) ? nir_fsign(&b, srcs[0])
1833 : nir_isign(&b, srcs[0]);
1834 break;
1835 case ir_unop_rcp: result = nir_frcp(&b, srcs[0]); break;
1836
1837 case ir_unop_rsq:
1838 if (consts->ForceGLSLAbsSqrt)
1839 srcs[0] = nir_fabs(&b, srcs[0]);
1840 result = nir_frsq(&b, srcs[0]);
1841 break;
1842
1843 case ir_unop_sqrt:
1844 if (consts->ForceGLSLAbsSqrt)
1845 srcs[0] = nir_fabs(&b, srcs[0]);
1846 result = nir_fsqrt(&b, srcs[0]);
1847 break;
1848
1849 case ir_unop_exp: result = nir_fexp2(&b, nir_fmul_imm(&b, srcs[0], M_LOG2E)); break;
1850 case ir_unop_log: result = nir_fmul_imm(&b, nir_flog2(&b, srcs[0]), 1.0 / M_LOG2E); break;
1851 case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break;
1852 case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break;
1853 case ir_unop_i2f:
1854 case ir_unop_u2f:
1855 case ir_unop_b2f:
1856 case ir_unop_f2i:
1857 case ir_unop_f2u:
1858 case ir_unop_f2b:
1859 case ir_unop_i2b:
1860 case ir_unop_b2i:
1861 case ir_unop_b2i64:
1862 case ir_unop_d2f:
1863 case ir_unop_f2d:
1864 case ir_unop_f162u:
1865 case ir_unop_u2f16:
1866 case ir_unop_f162i:
1867 case ir_unop_i2f16:
1868 case ir_unop_f162f:
1869 case ir_unop_f2f16:
1870 case ir_unop_f162b:
1871 case ir_unop_b2f16:
1872 case ir_unop_f162d:
1873 case ir_unop_d2f16:
1874 case ir_unop_f162u64:
1875 case ir_unop_u642f16:
1876 case ir_unop_f162i64:
1877 case ir_unop_i642f16:
1878 case ir_unop_i2i:
1879 case ir_unop_u2u:
1880 case ir_unop_d2i:
1881 case ir_unop_d2u:
1882 case ir_unop_d2b:
1883 case ir_unop_i2d:
1884 case ir_unop_u2d:
1885 case ir_unop_i642i:
1886 case ir_unop_i642u:
1887 case ir_unop_i642f:
1888 case ir_unop_i642b:
1889 case ir_unop_i642d:
1890 case ir_unop_u642i:
1891 case ir_unop_u642u:
1892 case ir_unop_u642f:
1893 case ir_unop_u642d:
1894 case ir_unop_i2i64:
1895 case ir_unop_u2i64:
1896 case ir_unop_f2i64:
1897 case ir_unop_d2i64:
1898 case ir_unop_i2u64:
1899 case ir_unop_u2u64:
1900 case ir_unop_f2u64:
1901 case ir_unop_d2u64:
1902 case ir_unop_i2u:
1903 case ir_unop_u2i:
1904 case ir_unop_i642u64:
1905 case ir_unop_u642i64: {
1906 nir_alu_type src_type = nir_get_nir_type_for_glsl_base_type(types[0]);
1907 nir_alu_type dst_type = nir_get_nir_type_for_glsl_base_type(out_type);
1908 result = nir_type_convert(&b, srcs[0], src_type, dst_type,
1909 nir_rounding_mode_undef);
1910 /* b2i and b2f don't have fixed bit-size versions so the builder will
1911 * just assume 32 and we have to fix it up here.
1912 */
1913 result->bit_size = nir_alu_type_get_type_size(dst_type);
1914 break;
1915 }
1916
1917 case ir_unop_f2fmp: {
1918 result = nir_build_alu(&b, nir_op_f2fmp, srcs[0], NULL, NULL, NULL);
1919 break;
1920 }
1921
1922 case ir_unop_i2imp: {
1923 result = nir_build_alu(&b, nir_op_i2imp, srcs[0], NULL, NULL, NULL);
1924 break;
1925 }
1926
1927 case ir_unop_u2ump: {
1928 result = nir_build_alu(&b, nir_op_i2imp, srcs[0], NULL, NULL, NULL);
1929 break;
1930 }
1931
1932 case ir_unop_bitcast_i2f:
1933 case ir_unop_bitcast_f2i:
1934 case ir_unop_bitcast_u2f:
1935 case ir_unop_bitcast_f2u:
1936 case ir_unop_bitcast_i642d:
1937 case ir_unop_bitcast_d2i64:
1938 case ir_unop_bitcast_u642d:
1939 case ir_unop_bitcast_d2u64:
1940 case ir_unop_subroutine_to_int:
1941 /* no-op */
1942 result = nir_mov(&b, srcs[0]);
1943 break;
1944 case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break;
1945 case ir_unop_ceil: result = nir_fceil(&b, srcs[0]); break;
1946 case ir_unop_floor: result = nir_ffloor(&b, srcs[0]); break;
1947 case ir_unop_fract: result = nir_ffract(&b, srcs[0]); break;
1948 case ir_unop_frexp_exp: result = nir_frexp_exp(&b, srcs[0]); break;
1949 case ir_unop_frexp_sig: result = nir_frexp_sig(&b, srcs[0]); break;
1950 case ir_unop_round_even: result = nir_fround_even(&b, srcs[0]); break;
1951 case ir_unop_sin: result = nir_fsin(&b, srcs[0]); break;
1952 case ir_unop_cos: result = nir_fcos(&b, srcs[0]); break;
1953 case ir_unop_dFdx: result = nir_fddx(&b, srcs[0]); break;
1954 case ir_unop_dFdy: result = nir_fddy(&b, srcs[0]); break;
1955 case ir_unop_dFdx_fine: result = nir_fddx_fine(&b, srcs[0]); break;
1956 case ir_unop_dFdy_fine: result = nir_fddy_fine(&b, srcs[0]); break;
1957 case ir_unop_dFdx_coarse: result = nir_fddx_coarse(&b, srcs[0]); break;
1958 case ir_unop_dFdy_coarse: result = nir_fddy_coarse(&b, srcs[0]); break;
1959 case ir_unop_pack_snorm_2x16:
1960 result = nir_pack_snorm_2x16(&b, srcs[0]);
1961 break;
1962 case ir_unop_pack_snorm_4x8:
1963 result = nir_pack_snorm_4x8(&b, srcs[0]);
1964 break;
1965 case ir_unop_pack_unorm_2x16:
1966 result = nir_pack_unorm_2x16(&b, srcs[0]);
1967 break;
1968 case ir_unop_pack_unorm_4x8:
1969 result = nir_pack_unorm_4x8(&b, srcs[0]);
1970 break;
1971 case ir_unop_pack_half_2x16:
1972 result = nir_pack_half_2x16(&b, srcs[0]);
1973 break;
1974 case ir_unop_unpack_snorm_2x16:
1975 result = nir_unpack_snorm_2x16(&b, srcs[0]);
1976 break;
1977 case ir_unop_unpack_snorm_4x8:
1978 result = nir_unpack_snorm_4x8(&b, srcs[0]);
1979 break;
1980 case ir_unop_unpack_unorm_2x16:
1981 result = nir_unpack_unorm_2x16(&b, srcs[0]);
1982 break;
1983 case ir_unop_unpack_unorm_4x8:
1984 result = nir_unpack_unorm_4x8(&b, srcs[0]);
1985 break;
1986 case ir_unop_unpack_half_2x16:
1987 result = nir_unpack_half_2x16(&b, srcs[0]);
1988 break;
1989 case ir_unop_pack_sampler_2x32:
1990 case ir_unop_pack_image_2x32:
1991 case ir_unop_pack_double_2x32:
1992 case ir_unop_pack_int_2x32:
1993 case ir_unop_pack_uint_2x32:
1994 result = nir_pack_64_2x32(&b, srcs[0]);
1995 break;
1996 case ir_unop_unpack_sampler_2x32:
1997 case ir_unop_unpack_image_2x32:
1998 case ir_unop_unpack_double_2x32:
1999 case ir_unop_unpack_int_2x32:
2000 case ir_unop_unpack_uint_2x32:
2001 result = nir_unpack_64_2x32(&b, srcs[0]);
2002 break;
2003 case ir_unop_bitfield_reverse:
2004 result = nir_bitfield_reverse(&b, srcs[0]);
2005 break;
2006 case ir_unop_bit_count:
2007 result = nir_bit_count(&b, srcs[0]);
2008 break;
2009 case ir_unop_find_msb:
2010 switch (types[0]) {
2011 case GLSL_TYPE_UINT:
2012 result = nir_ufind_msb(&b, srcs[0]);
2013 break;
2014 case GLSL_TYPE_INT:
2015 result = nir_ifind_msb(&b, srcs[0]);
2016 break;
2017 default:
2018 unreachable("Invalid type for findMSB()");
2019 }
2020 break;
2021 case ir_unop_find_lsb:
2022 result = nir_find_lsb(&b, srcs[0]);
2023 break;
2024
2025 case ir_unop_get_buffer_size: {
2026 nir_intrinsic_instr *load = nir_intrinsic_instr_create(
2027 this->shader,
2028 nir_intrinsic_get_ssbo_size);
2029 load->num_components = ir->type->vector_elements;
2030 load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
2031 unsigned bit_size = glsl_get_bit_size(ir->type);
2032 add_instr(&load->instr, ir->type->vector_elements, bit_size);
2033 return;
2034 }
2035
2036 case ir_unop_atan:
2037 result = nir_atan(&b, srcs[0]);
2038 break;
2039
2040 case ir_binop_add:
2041 result = type_is_float(out_type) ? nir_fadd(&b, srcs[0], srcs[1])
2042 : nir_iadd(&b, srcs[0], srcs[1]);
2043 break;
2044 case ir_binop_add_sat:
2045 result = type_is_signed(out_type) ? nir_iadd_sat(&b, srcs[0], srcs[1])
2046 : nir_uadd_sat(&b, srcs[0], srcs[1]);
2047 break;
2048 case ir_binop_sub:
2049 result = type_is_float(out_type) ? nir_fsub(&b, srcs[0], srcs[1])
2050 : nir_isub(&b, srcs[0], srcs[1]);
2051 break;
2052 case ir_binop_sub_sat:
2053 result = type_is_signed(out_type) ? nir_isub_sat(&b, srcs[0], srcs[1])
2054 : nir_usub_sat(&b, srcs[0], srcs[1]);
2055 break;
2056 case ir_binop_abs_sub:
2057 /* out_type is always unsigned for ir_binop_abs_sub, so we have to key
2058 * on the type of the sources.
2059 */
2060 result = type_is_signed(types[0]) ? nir_uabs_isub(&b, srcs[0], srcs[1])
2061 : nir_uabs_usub(&b, srcs[0], srcs[1]);
2062 break;
2063 case ir_binop_avg:
2064 result = type_is_signed(out_type) ? nir_ihadd(&b, srcs[0], srcs[1])
2065 : nir_uhadd(&b, srcs[0], srcs[1]);
2066 break;
2067 case ir_binop_avg_round:
2068 result = type_is_signed(out_type) ? nir_irhadd(&b, srcs[0], srcs[1])
2069 : nir_urhadd(&b, srcs[0], srcs[1]);
2070 break;
2071 case ir_binop_mul_32x16:
2072 result = type_is_signed(out_type) ? nir_imul_32x16(&b, srcs[0], srcs[1])
2073 : nir_umul_32x16(&b, srcs[0], srcs[1]);
2074 break;
2075 case ir_binop_mul:
2076 if (type_is_float(out_type))
2077 result = nir_fmul(&b, srcs[0], srcs[1]);
2078 else if (out_type == GLSL_TYPE_INT64 &&
2079 (ir->operands[0]->type->base_type == GLSL_TYPE_INT ||
2080 ir->operands[1]->type->base_type == GLSL_TYPE_INT))
2081 result = nir_imul_2x32_64(&b, srcs[0], srcs[1]);
2082 else if (out_type == GLSL_TYPE_UINT64 &&
2083 (ir->operands[0]->type->base_type == GLSL_TYPE_UINT ||
2084 ir->operands[1]->type->base_type == GLSL_TYPE_UINT))
2085 result = nir_umul_2x32_64(&b, srcs[0], srcs[1]);
2086 else
2087 result = nir_imul(&b, srcs[0], srcs[1]);
2088 break;
2089 case ir_binop_div:
2090 if (type_is_float(out_type))
2091 result = nir_fdiv(&b, srcs[0], srcs[1]);
2092 else if (type_is_signed(out_type))
2093 result = nir_idiv(&b, srcs[0], srcs[1]);
2094 else
2095 result = nir_udiv(&b, srcs[0], srcs[1]);
2096 break;
2097 case ir_binop_mod:
2098 result = type_is_float(out_type) ? nir_fmod(&b, srcs[0], srcs[1])
2099 : nir_umod(&b, srcs[0], srcs[1]);
2100 break;
2101 case ir_binop_min:
2102 if (type_is_float(out_type))
2103 result = nir_fmin(&b, srcs[0], srcs[1]);
2104 else if (type_is_signed(out_type))
2105 result = nir_imin(&b, srcs[0], srcs[1]);
2106 else
2107 result = nir_umin(&b, srcs[0], srcs[1]);
2108 break;
2109 case ir_binop_max:
2110 if (type_is_float(out_type))
2111 result = nir_fmax(&b, srcs[0], srcs[1]);
2112 else if (type_is_signed(out_type))
2113 result = nir_imax(&b, srcs[0], srcs[1]);
2114 else
2115 result = nir_umax(&b, srcs[0], srcs[1]);
2116 break;
2117 case ir_binop_pow: result = nir_fpow(&b, srcs[0], srcs[1]); break;
2118 case ir_binop_bit_and: result = nir_iand(&b, srcs[0], srcs[1]); break;
2119 case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break;
2120 case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;
2121 case ir_binop_logic_and:
2122 result = nir_iand(&b, srcs[0], srcs[1]);
2123 break;
2124 case ir_binop_logic_or:
2125 result = nir_ior(&b, srcs[0], srcs[1]);
2126 break;
2127 case ir_binop_logic_xor:
2128 result = nir_ixor(&b, srcs[0], srcs[1]);
2129 break;
2130 case ir_binop_lshift: result = nir_ishl(&b, srcs[0], nir_u2u32(&b, srcs[1])); break;
2131 case ir_binop_rshift:
2132 result = (type_is_signed(out_type)) ? nir_ishr(&b, srcs[0], nir_u2u32(&b, srcs[1]))
2133 : nir_ushr(&b, srcs[0], nir_u2u32(&b, srcs[1]));
2134 break;
2135 case ir_binop_imul_high:
2136 result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1])
2137 : nir_umul_high(&b, srcs[0], srcs[1]);
2138 break;
2139 case ir_binop_carry: result = nir_uadd_carry(&b, srcs[0], srcs[1]); break;
2140 case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break;
2141 case ir_binop_less:
2142 if (type_is_float(types[0]))
2143 result = nir_flt(&b, srcs[0], srcs[1]);
2144 else if (type_is_signed(types[0]))
2145 result = nir_ilt(&b, srcs[0], srcs[1]);
2146 else
2147 result = nir_ult(&b, srcs[0], srcs[1]);
2148 break;
2149 case ir_binop_gequal:
2150 if (type_is_float(types[0]))
2151 result = nir_fge(&b, srcs[0], srcs[1]);
2152 else if (type_is_signed(types[0]))
2153 result = nir_ige(&b, srcs[0], srcs[1]);
2154 else
2155 result = nir_uge(&b, srcs[0], srcs[1]);
2156 break;
2157 case ir_binop_equal:
2158 if (type_is_float(types[0]))
2159 result = nir_feq(&b, srcs[0], srcs[1]);
2160 else
2161 result = nir_ieq(&b, srcs[0], srcs[1]);
2162 break;
2163 case ir_binop_nequal:
2164 if (type_is_float(types[0]))
2165 result = nir_fneu(&b, srcs[0], srcs[1]);
2166 else
2167 result = nir_ine(&b, srcs[0], srcs[1]);
2168 break;
2169 case ir_binop_all_equal:
2170 if (type_is_float(types[0])) {
2171 switch (ir->operands[0]->type->vector_elements) {
2172 case 1: result = nir_feq(&b, srcs[0], srcs[1]); break;
2173 case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break;
2174 case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break;
2175 case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break;
2176 default:
2177 unreachable("not reached");
2178 }
2179 } else {
2180 switch (ir->operands[0]->type->vector_elements) {
2181 case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break;
2182 case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break;
2183 case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break;
2184 case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break;
2185 default:
2186 unreachable("not reached");
2187 }
2188 }
2189 break;
2190 case ir_binop_any_nequal:
2191 if (type_is_float(types[0])) {
2192 switch (ir->operands[0]->type->vector_elements) {
2193 case 1: result = nir_fneu(&b, srcs[0], srcs[1]); break;
2194 case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;
2195 case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break;
2196 case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break;
2197 default:
2198 unreachable("not reached");
2199 }
2200 } else {
2201 switch (ir->operands[0]->type->vector_elements) {
2202 case 1: result = nir_ine(&b, srcs[0], srcs[1]); break;
2203 case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break;
2204 case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break;
2205 case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break;
2206 default:
2207 unreachable("not reached");
2208 }
2209 }
2210 break;
2211 case ir_binop_dot:
2212 result = nir_fdot(&b, srcs[0], srcs[1]);
2213 break;
2214
2215 case ir_binop_vector_extract:
2216 result = nir_vector_extract(&b, srcs[0], srcs[1]);
2217 break;
2218 case ir_triop_vector_insert:
2219 result = nir_vector_insert(&b, srcs[0], srcs[1], srcs[2]);
2220 break;
2221
2222 case ir_binop_atan2:
2223 result = nir_atan2(&b, srcs[0], srcs[1]);
2224 break;
2225
2226 case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break;
2227 case ir_triop_fma:
2228 result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]);
2229 break;
2230 case ir_triop_lrp:
2231 result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]);
2232 break;
2233 case ir_triop_csel:
2234 result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]);
2235 break;
2236 case ir_triop_bitfield_extract:
2237 result = glsl_type_is_int_16_32(ir->type) ?
2238 nir_ibitfield_extract(&b, nir_i2i32(&b, srcs[0]), nir_i2i32(&b, srcs[1]), nir_i2i32(&b, srcs[2])) :
2239 nir_ubitfield_extract(&b, nir_u2u32(&b, srcs[0]), nir_i2i32(&b, srcs[1]), nir_i2i32(&b, srcs[2]));
2240
2241 if (ir->type->base_type == GLSL_TYPE_INT16) {
2242 result = nir_i2i16(&b, result);
2243 } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
2244 result = nir_u2u16(&b, result);
2245 }
2246
2247 break;
2248 case ir_quadop_bitfield_insert:
2249 result = nir_bitfield_insert(&b,
2250 nir_u2u32(&b, srcs[0]), nir_u2u32(&b, srcs[1]),
2251 nir_i2i32(&b, srcs[2]), nir_i2i32(&b, srcs[3]));
2252
2253 if (ir->type->base_type == GLSL_TYPE_INT16) {
2254 result = nir_i2i16(&b, result);
2255 } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
2256 result = nir_u2u16(&b, result);
2257 }
2258
2259 break;
2260 case ir_quadop_vector:
2261 result = nir_vec(&b, srcs, ir->type->vector_elements);
2262 break;
2263
2264 default:
2265 unreachable("not reached");
2266 }
2267
2268 /* The bit-size of the NIR SSA value must match the bit-size of the
2269 * original GLSL IR expression.
2270 */
2271 assert(result->bit_size == glsl_base_type_get_bit_size(ir->type->base_type));
2272 }
2273
2274 void
visit(ir_swizzle * ir)2275 nir_visitor::visit(ir_swizzle *ir)
2276 {
2277 unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w };
2278 result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle,
2279 ir->type->vector_elements);
2280 }
2281
2282 void
visit(ir_texture * ir)2283 nir_visitor::visit(ir_texture *ir)
2284 {
2285 unsigned num_srcs;
2286 nir_texop op;
2287 switch (ir->op) {
2288 case ir_tex:
2289 op = nir_texop_tex;
2290 num_srcs = 1; /* coordinate */
2291 break;
2292
2293 case ir_txb:
2294 case ir_txl:
2295 op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl;
2296 num_srcs = 2; /* coordinate, bias/lod */
2297 break;
2298
2299 case ir_txd:
2300 op = nir_texop_txd; /* coordinate, dPdx, dPdy */
2301 num_srcs = 3;
2302 break;
2303
2304 case ir_txf:
2305 op = nir_texop_txf;
2306 if (ir->lod_info.lod != NULL)
2307 num_srcs = 2; /* coordinate, lod */
2308 else
2309 num_srcs = 1; /* coordinate */
2310 break;
2311
2312 case ir_txf_ms:
2313 op = nir_texop_txf_ms;
2314 num_srcs = 2; /* coordinate, sample_index */
2315 break;
2316
2317 case ir_txs:
2318 op = nir_texop_txs;
2319 if (ir->lod_info.lod != NULL)
2320 num_srcs = 1; /* lod */
2321 else
2322 num_srcs = 0;
2323 break;
2324
2325 case ir_lod:
2326 op = nir_texop_lod;
2327 num_srcs = 1; /* coordinate */
2328 break;
2329
2330 case ir_tg4:
2331 op = nir_texop_tg4;
2332 num_srcs = 1; /* coordinate */
2333 break;
2334
2335 case ir_query_levels:
2336 op = nir_texop_query_levels;
2337 num_srcs = 0;
2338 break;
2339
2340 case ir_texture_samples:
2341 op = nir_texop_texture_samples;
2342 num_srcs = 0;
2343 break;
2344
2345 case ir_samples_identical:
2346 op = nir_texop_samples_identical;
2347 num_srcs = 1; /* coordinate */
2348 break;
2349
2350 default:
2351 unreachable("not reached");
2352 }
2353
2354 if (ir->projector != NULL)
2355 num_srcs++;
2356 if (ir->shadow_comparator != NULL)
2357 num_srcs++;
2358 /* offsets are constants we store inside nir_tex_intrs.offsets */
2359 if (ir->offset != NULL && !glsl_type_is_array(ir->offset->type))
2360 num_srcs++;
2361 if (ir->clamp != NULL)
2362 num_srcs++;
2363
2364 /* Add one for the texture deref */
2365 num_srcs += 2;
2366
2367 nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
2368
2369 instr->op = op;
2370 instr->sampler_dim =
2371 (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality;
2372 instr->is_array = ir->sampler->type->sampler_array;
2373 instr->is_shadow = ir->sampler->type->sampler_shadow;
2374
2375 const glsl_type *dest_type
2376 = ir->is_sparse ? glsl_get_field_type(ir->type, "texel") : ir->type;
2377 assert(dest_type != &glsl_type_builtin_error);
2378 if (instr->is_shadow)
2379 instr->is_new_style_shadow = (dest_type->vector_elements == 1);
2380 instr->dest_type = nir_get_nir_type_for_glsl_type(dest_type);
2381 instr->is_sparse = ir->is_sparse;
2382
2383 nir_deref_instr *sampler_deref = evaluate_deref(ir->sampler);
2384
2385 /* check for bindless handles */
2386 if (!nir_deref_mode_is(sampler_deref, nir_var_uniform) ||
2387 nir_deref_instr_get_variable(sampler_deref)->data.bindless) {
2388 nir_def *load = nir_load_deref(&b, sampler_deref);
2389 instr->src[0] = nir_tex_src_for_ssa(nir_tex_src_texture_handle, load);
2390 instr->src[1] = nir_tex_src_for_ssa(nir_tex_src_sampler_handle, load);
2391 } else {
2392 instr->src[0] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
2393 &sampler_deref->def);
2394 instr->src[1] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref,
2395 &sampler_deref->def);
2396 }
2397
2398 unsigned src_number = 2;
2399
2400 if (ir->coordinate != NULL) {
2401 instr->coord_components = ir->coordinate->type->vector_elements;
2402 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_coord,
2403 evaluate_rvalue(ir->coordinate));
2404 src_number++;
2405 }
2406
2407 if (ir->projector != NULL) {
2408 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_projector,
2409 evaluate_rvalue(ir->projector));
2410 src_number++;
2411 }
2412
2413 if (ir->shadow_comparator != NULL) {
2414 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_comparator,
2415 evaluate_rvalue(ir->shadow_comparator));
2416 src_number++;
2417 }
2418
2419 if (ir->offset != NULL) {
2420 if (glsl_type_is_array(ir->offset->type)) {
2421 for (int i = 0; i < glsl_array_size(ir->offset->type); i++) {
2422 const ir_constant *c =
2423 ir->offset->as_constant()->get_array_element(i);
2424
2425 for (unsigned j = 0; j < 2; ++j) {
2426 int val = c->get_int_component(j);
2427 instr->tg4_offsets[i][j] = val;
2428 }
2429 }
2430 } else {
2431 assert(glsl_type_is_vector(ir->offset->type) || glsl_type_is_scalar(ir->offset->type));
2432
2433 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_offset,
2434 evaluate_rvalue(ir->offset));
2435 src_number++;
2436 }
2437 }
2438
2439 if (ir->clamp) {
2440 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_min_lod,
2441 evaluate_rvalue(ir->clamp));
2442 src_number++;
2443 }
2444
2445 switch (ir->op) {
2446 case ir_txb:
2447 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_bias,
2448 evaluate_rvalue(ir->lod_info.bias));
2449 src_number++;
2450 break;
2451
2452 case ir_txl:
2453 case ir_txf:
2454 case ir_txs:
2455 if (ir->lod_info.lod != NULL) {
2456 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_lod,
2457 evaluate_rvalue(ir->lod_info.lod));
2458 src_number++;
2459 }
2460 break;
2461
2462 case ir_txd:
2463 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_ddx,
2464 evaluate_rvalue(ir->lod_info.grad.dPdx));
2465 src_number++;
2466 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_ddy,
2467 evaluate_rvalue(ir->lod_info.grad.dPdy));
2468 src_number++;
2469 break;
2470
2471 case ir_txf_ms:
2472 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
2473 evaluate_rvalue(ir->lod_info.sample_index));
2474 src_number++;
2475 break;
2476
2477 case ir_tg4:
2478 instr->component = ir->lod_info.component->as_constant()->value.u[0];
2479 break;
2480
2481 default:
2482 break;
2483 }
2484
2485 assert(src_number == num_srcs);
2486
2487 unsigned bit_size = glsl_get_bit_size(dest_type);
2488 add_instr(&instr->instr, nir_tex_instr_dest_size(instr), bit_size);
2489 }
2490
2491 void
visit(ir_constant * ir)2492 nir_visitor::visit(ir_constant *ir)
2493 {
2494 /*
2495 * We don't know if this variable is an array or struct that gets
2496 * dereferenced, so do the safe thing an make it a variable with a
2497 * constant initializer and return a dereference.
2498 */
2499
2500 nir_variable *var =
2501 nir_local_variable_create(this->impl, ir->type, "const_temp");
2502 var->data.read_only = true;
2503 var->constant_initializer = constant_copy(ir, var);
2504
2505 this->deref = nir_build_deref_var(&b, var);
2506 }
2507
2508 void
visit(ir_dereference_variable * ir)2509 nir_visitor::visit(ir_dereference_variable *ir)
2510 {
2511 if (ir->variable_referenced()->data.mode == ir_var_function_out ||
2512 ir->variable_referenced()->data.mode == ir_var_function_inout) {
2513 unsigned i = (sig->return_type != &glsl_type_builtin_void) ? 1 : 0;
2514
2515 foreach_in_list(ir_variable, param, &sig->parameters) {
2516 if (param == ir->variable_referenced()) {
2517 break;
2518 }
2519 i++;
2520 }
2521
2522 this->deref = nir_build_deref_cast(&b, nir_load_param(&b, i),
2523 nir_var_function_temp, ir->type, 0);
2524 return;
2525 }
2526
2527 struct hash_entry *entry =
2528 _mesa_hash_table_search(this->var_table, ir->var);
2529 assert(entry);
2530 nir_variable *var = (nir_variable *) entry->data;
2531
2532 this->deref = nir_build_deref_var(&b, var);
2533 }
2534
2535 void
visit(ir_dereference_record * ir)2536 nir_visitor::visit(ir_dereference_record *ir)
2537 {
2538 ir->record->accept(this);
2539
2540 int field_index = ir->field_idx;
2541 assert(field_index >= 0);
2542
2543 /* sparse texture variable is a struct for ir_variable, but it has been
2544 * converted to a vector for nir_variable.
2545 */
2546 if (this->deref->deref_type == nir_deref_type_var &&
2547 _mesa_set_search(this->sparse_variable_set, this->deref->var)) {
2548 nir_def *load = nir_load_deref(&b, this->deref);
2549 assert(load->num_components >= 2);
2550
2551 nir_def *ssa;
2552 const glsl_type *type = ir->record->type;
2553 if (field_index == glsl_get_field_index(type, "code")) {
2554 /* last channel holds residency code */
2555 ssa = nir_channel(&b, load, load->num_components - 1);
2556 } else {
2557 assert(field_index == glsl_get_field_index(type, "texel"));
2558
2559 unsigned mask = BITFIELD_MASK(load->num_components - 1);
2560 ssa = nir_channels(&b, load, mask);
2561 }
2562
2563 /* still need to create a deref for return */
2564 nir_variable *tmp =
2565 nir_local_variable_create(this->impl, ir->type, "deref_tmp");
2566 this->deref = nir_build_deref_var(&b, tmp);
2567 nir_store_deref(&b, this->deref, ssa, ~0);
2568 } else
2569 this->deref = nir_build_deref_struct(&b, this->deref, field_index);
2570 }
2571
2572 void
visit(ir_dereference_array * ir)2573 nir_visitor::visit(ir_dereference_array *ir)
2574 {
2575 nir_def *index = evaluate_rvalue(ir->array_index);
2576
2577 ir->array->accept(this);
2578
2579 this->deref = nir_build_deref_array(&b, this->deref, index);
2580 }
2581
2582 void
visit(ir_barrier *)2583 nir_visitor::visit(ir_barrier *)
2584 {
2585 if (shader->info.stage == MESA_SHADER_COMPUTE) {
2586 nir_barrier(&b, SCOPE_WORKGROUP, SCOPE_WORKGROUP,
2587 NIR_MEMORY_ACQ_REL, nir_var_mem_shared);
2588 } else if (shader->info.stage == MESA_SHADER_TESS_CTRL) {
2589 nir_barrier(&b, SCOPE_WORKGROUP, SCOPE_WORKGROUP,
2590 NIR_MEMORY_ACQ_REL, nir_var_shader_out);
2591 }
2592 }
2593
2594 nir_shader *
glsl_float64_funcs_to_nir(struct gl_context * ctx,const nir_shader_compiler_options * options)2595 glsl_float64_funcs_to_nir(struct gl_context *ctx,
2596 const nir_shader_compiler_options *options)
2597 {
2598 /* We pretend it's a vertex shader. Ultimately, the stage shouldn't
2599 * matter because we're not optimizing anything here.
2600 */
2601 struct gl_shader *sh = _mesa_new_shader(-1, MESA_SHADER_VERTEX);
2602 sh->Source = float64_source;
2603 sh->CompileStatus = COMPILE_FAILURE;
2604 _mesa_glsl_compile_shader(ctx, sh, false, false, true);
2605
2606 if (!sh->CompileStatus) {
2607 if (sh->InfoLog) {
2608 _mesa_problem(ctx,
2609 "fp64 software impl compile failed:\n%s\nsource:\n%s\n",
2610 sh->InfoLog, float64_source);
2611 }
2612 return NULL;
2613 }
2614
2615 nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_VERTEX, options, NULL);
2616
2617 nir_visitor v1(&ctx->Const, nir);
2618 nir_function_visitor v2(&v1);
2619 v2.run(sh->ir);
2620 visit_exec_list(sh->ir, &v1);
2621
2622 /* _mesa_delete_shader will try to free sh->Source but it's static const */
2623 sh->Source = NULL;
2624 _mesa_delete_shader(ctx, sh);
2625
2626 nir_validate_shader(nir, "float64_funcs_to_nir");
2627
2628 NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
2629 NIR_PASS(_, nir, nir_lower_returns);
2630 NIR_PASS(_, nir, nir_inline_functions);
2631 NIR_PASS(_, nir, nir_opt_deref);
2632
2633 /* Do some optimizations to clean up the shader now. By optimizing the
2634 * functions in the library, we avoid having to re-do that work every
2635 * time we inline a copy of a function. Reducing basic blocks also helps
2636 * with compile times.
2637 */
2638 NIR_PASS(_, nir, nir_lower_vars_to_ssa);
2639 NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
2640 NIR_PASS(_, nir, nir_copy_prop);
2641 NIR_PASS(_, nir, nir_opt_dce);
2642 NIR_PASS(_, nir, nir_opt_cse);
2643 NIR_PASS(_, nir, nir_opt_gcm, true);
2644 NIR_PASS(_, nir, nir_opt_peephole_select, 1, false, false);
2645 NIR_PASS(_, nir, nir_opt_dce);
2646
2647 return nir;
2648 }
2649