• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott (cwabbott0@gmail.com)
25  *
26  */
27 
28 #include "float64_glsl.h"
29 #include "glsl_to_nir.h"
30 #include "ir_visitor.h"
31 #include "ir_hierarchical_visitor.h"
32 #include "ir.h"
33 #include "ir_optimization.h"
34 #include "program.h"
35 #include "compiler/nir/nir_control_flow.h"
36 #include "compiler/nir/nir_builder.h"
37 #include "compiler/nir/nir_builtin_builder.h"
38 #include "compiler/nir/nir_deref.h"
39 #include "main/errors.h"
40 #include "main/mtypes.h"
41 #include "main/shaderobj.h"
42 #include "util/u_math.h"
43 
44 /*
45  * pass to lower GLSL IR to NIR
46  *
47  * This will lower variable dereferences to loads/stores of corresponding
48  * variables in NIR - the variables will be converted to registers in a later
49  * pass.
50  */
51 
52 namespace {
53 
54 class nir_visitor : public ir_visitor
55 {
56 public:
57    nir_visitor(gl_context *ctx, nir_shader *shader);
58    ~nir_visitor();
59 
60    virtual void visit(ir_variable *);
61    virtual void visit(ir_function *);
62    virtual void visit(ir_function_signature *);
63    virtual void visit(ir_loop *);
64    virtual void visit(ir_if *);
65    virtual void visit(ir_discard *);
66    virtual void visit(ir_demote *);
67    virtual void visit(ir_loop_jump *);
68    virtual void visit(ir_return *);
69    virtual void visit(ir_call *);
70    virtual void visit(ir_assignment *);
71    virtual void visit(ir_emit_vertex *);
72    virtual void visit(ir_end_primitive *);
73    virtual void visit(ir_expression *);
74    virtual void visit(ir_swizzle *);
75    virtual void visit(ir_texture *);
76    virtual void visit(ir_constant *);
77    virtual void visit(ir_dereference_variable *);
78    virtual void visit(ir_dereference_record *);
79    virtual void visit(ir_dereference_array *);
80    virtual void visit(ir_barrier *);
81 
82    void create_function(ir_function_signature *ir);
83 
84 private:
85    void add_instr(nir_instr *instr, unsigned num_components, unsigned bit_size);
86    nir_ssa_def *evaluate_rvalue(ir_rvalue *ir);
87 
88    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs);
89    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1);
90    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
91                        nir_ssa_def *src2);
92    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
93                        nir_ssa_def *src2, nir_ssa_def *src3);
94 
95    bool supports_std430;
96 
97    nir_shader *shader;
98    nir_function_impl *impl;
99    nir_builder b;
100    nir_ssa_def *result; /* result of the expression tree last visited */
101 
102    nir_deref_instr *evaluate_deref(ir_instruction *ir);
103 
104    nir_constant *constant_copy(ir_constant *ir, void *mem_ctx);
105 
106    /* most recent deref instruction created */
107    nir_deref_instr *deref;
108 
109    /* whether the IR we're operating on is per-function or global */
110    bool is_global;
111 
112    ir_function_signature *sig;
113 
114    /* map of ir_variable -> nir_variable */
115    struct hash_table *var_table;
116 
117    /* map of ir_function_signature -> nir_function_overload */
118    struct hash_table *overload_table;
119 };
120 
121 /*
122  * This visitor runs before the main visitor, calling create_function() for
123  * each function so that the main visitor can resolve forward references in
124  * calls.
125  */
126 
127 class nir_function_visitor : public ir_hierarchical_visitor
128 {
129 public:
nir_function_visitor(nir_visitor * v)130    nir_function_visitor(nir_visitor *v) : visitor(v)
131    {
132    }
133    virtual ir_visitor_status visit_enter(ir_function *);
134 
135 private:
136    nir_visitor *visitor;
137 };
138 
139 /* glsl_to_nir can only handle converting certain function paramaters
140  * to NIR. This visitor checks for parameters it can't currently handle.
141  */
142 class ir_function_param_visitor : public ir_hierarchical_visitor
143 {
144 public:
ir_function_param_visitor()145    ir_function_param_visitor()
146       : unsupported(false)
147    {
148    }
149 
visit_enter(ir_function_signature * ir)150    virtual ir_visitor_status visit_enter(ir_function_signature *ir)
151    {
152 
153       if (ir->is_intrinsic())
154          return visit_continue;
155 
156       foreach_in_list(ir_variable, param, &ir->parameters) {
157          if (!param->type->is_vector() || !param->type->is_scalar()) {
158             unsupported = true;
159             return visit_stop;
160          }
161 
162          if (param->data.mode == ir_var_function_inout) {
163             unsupported = true;
164             return visit_stop;
165          }
166       }
167 
168       if (!glsl_type_is_vector_or_scalar(ir->return_type) &&
169           !ir->return_type->is_void()) {
170          unsupported = true;
171          return visit_stop;
172       }
173 
174       return visit_continue;
175    }
176 
177    bool unsupported;
178 };
179 
180 } /* end of anonymous namespace */
181 
182 
183 static bool
has_unsupported_function_param(exec_list * ir)184 has_unsupported_function_param(exec_list *ir)
185 {
186    ir_function_param_visitor visitor;
187    visit_list_elements(&visitor, ir);
188    return visitor.unsupported;
189 }
190 
191 nir_shader *
glsl_to_nir(struct gl_context * ctx,const struct gl_shader_program * shader_prog,gl_shader_stage stage,const nir_shader_compiler_options * options)192 glsl_to_nir(struct gl_context *ctx,
193             const struct gl_shader_program *shader_prog,
194             gl_shader_stage stage,
195             const nir_shader_compiler_options *options)
196 {
197    struct gl_linked_shader *sh = shader_prog->_LinkedShaders[stage];
198 
199    const struct gl_shader_compiler_options *gl_options =
200       &ctx->Const.ShaderCompilerOptions[stage];
201 
202    /* glsl_to_nir can only handle converting certain function paramaters
203     * to NIR. If we find something we can't handle then we get the GLSL IR
204     * opts to remove it before we continue on.
205     *
206     * TODO: add missing glsl ir to nir support and remove this loop.
207     */
208    while (has_unsupported_function_param(sh->ir)) {
209       do_common_optimization(sh->ir, true, true, gl_options,
210                              ctx->Const.NativeIntegers);
211    }
212 
213    nir_shader *shader = nir_shader_create(NULL, stage, options,
214                                           &sh->Program->info);
215 
216    nir_visitor v1(ctx, shader);
217    nir_function_visitor v2(&v1);
218    v2.run(sh->ir);
219    visit_exec_list(sh->ir, &v1);
220 
221    nir_validate_shader(shader, "after glsl to nir, before function inline");
222 
223    /* We have to lower away local constant initializers right before we
224     * inline functions.  That way they get properly initialized at the top
225     * of the function and not at the top of its caller.
226     */
227    nir_lower_variable_initializers(shader, nir_var_all);
228    nir_lower_returns(shader);
229    nir_inline_functions(shader);
230    nir_opt_deref(shader);
231 
232    nir_validate_shader(shader, "after function inlining and return lowering");
233 
234    /* Now that we have inlined everything remove all of the functions except
235     * main().
236     */
237    foreach_list_typed_safe(nir_function, function, node, &(shader)->functions){
238       if (strcmp("main", function->name) != 0) {
239          exec_node_remove(&function->node);
240       }
241    }
242 
243    shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
244    if (shader_prog->Label)
245       shader->info.label = ralloc_strdup(shader, shader_prog->Label);
246 
247    /* Check for transform feedback varyings specified via the API */
248    shader->info.has_transform_feedback_varyings =
249       shader_prog->TransformFeedback.NumVarying > 0;
250 
251    /* Check for transform feedback varyings specified in the Shader */
252    if (shader_prog->last_vert_prog)
253       shader->info.has_transform_feedback_varyings |=
254          shader_prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0;
255 
256    if (shader->info.stage == MESA_SHADER_FRAGMENT) {
257       shader->info.fs.pixel_center_integer = sh->Program->info.fs.pixel_center_integer;
258       shader->info.fs.origin_upper_left = sh->Program->info.fs.origin_upper_left;
259    }
260 
261    return shader;
262 }
263 
nir_visitor(gl_context * ctx,nir_shader * shader)264 nir_visitor::nir_visitor(gl_context *ctx, nir_shader *shader)
265 {
266    this->supports_std430 = ctx->Const.UseSTD430AsDefaultPacking;
267    this->shader = shader;
268    this->is_global = true;
269    this->var_table = _mesa_pointer_hash_table_create(NULL);
270    this->overload_table = _mesa_pointer_hash_table_create(NULL);
271    this->result = NULL;
272    this->impl = NULL;
273    this->deref = NULL;
274    this->sig = NULL;
275    memset(&this->b, 0, sizeof(this->b));
276 }
277 
~nir_visitor()278 nir_visitor::~nir_visitor()
279 {
280    _mesa_hash_table_destroy(this->var_table, NULL);
281    _mesa_hash_table_destroy(this->overload_table, NULL);
282 }
283 
284 nir_deref_instr *
evaluate_deref(ir_instruction * ir)285 nir_visitor::evaluate_deref(ir_instruction *ir)
286 {
287    ir->accept(this);
288    return this->deref;
289 }
290 
291 nir_constant *
constant_copy(ir_constant * ir,void * mem_ctx)292 nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx)
293 {
294    if (ir == NULL)
295       return NULL;
296 
297    nir_constant *ret = rzalloc(mem_ctx, nir_constant);
298 
299    const unsigned rows = ir->type->vector_elements;
300    const unsigned cols = ir->type->matrix_columns;
301    unsigned i;
302 
303    ret->num_elements = 0;
304    switch (ir->type->base_type) {
305    case GLSL_TYPE_UINT:
306       /* Only float base types can be matrices. */
307       assert(cols == 1);
308 
309       for (unsigned r = 0; r < rows; r++)
310          ret->values[r].u32 = ir->value.u[r];
311 
312       break;
313 
314    case GLSL_TYPE_UINT16:
315       /* Only float base types can be matrices. */
316       assert(cols == 1);
317 
318       for (unsigned r = 0; r < rows; r++)
319          ret->values[r].u16 = ir->value.u16[r];
320       break;
321 
322    case GLSL_TYPE_INT:
323       /* Only float base types can be matrices. */
324       assert(cols == 1);
325 
326       for (unsigned r = 0; r < rows; r++)
327          ret->values[r].i32 = ir->value.i[r];
328 
329       break;
330 
331    case GLSL_TYPE_INT16:
332       /* Only float base types can be matrices. */
333       assert(cols == 1);
334 
335       for (unsigned r = 0; r < rows; r++)
336          ret->values[r].i16 = ir->value.i16[r];
337       break;
338 
339    case GLSL_TYPE_FLOAT:
340    case GLSL_TYPE_FLOAT16:
341    case GLSL_TYPE_DOUBLE:
342       if (cols > 1) {
343          ret->elements = ralloc_array(mem_ctx, nir_constant *, cols);
344          ret->num_elements = cols;
345          for (unsigned c = 0; c < cols; c++) {
346             nir_constant *col_const = rzalloc(mem_ctx, nir_constant);
347             col_const->num_elements = 0;
348             switch (ir->type->base_type) {
349             case GLSL_TYPE_FLOAT:
350                for (unsigned r = 0; r < rows; r++)
351                   col_const->values[r].f32 = ir->value.f[c * rows + r];
352                break;
353 
354             case GLSL_TYPE_FLOAT16:
355                for (unsigned r = 0; r < rows; r++)
356                   col_const->values[r].u16 = ir->value.f16[c * rows + r];
357                break;
358 
359             case GLSL_TYPE_DOUBLE:
360                for (unsigned r = 0; r < rows; r++)
361                   col_const->values[r].f64 = ir->value.d[c * rows + r];
362                break;
363 
364             default:
365                unreachable("Cannot get here from the first level switch");
366             }
367             ret->elements[c] = col_const;
368          }
369       } else {
370          switch (ir->type->base_type) {
371          case GLSL_TYPE_FLOAT:
372             for (unsigned r = 0; r < rows; r++)
373                ret->values[r].f32 = ir->value.f[r];
374             break;
375 
376          case GLSL_TYPE_FLOAT16:
377             for (unsigned r = 0; r < rows; r++)
378                ret->values[r].u16 = ir->value.f16[r];
379             break;
380 
381          case GLSL_TYPE_DOUBLE:
382             for (unsigned r = 0; r < rows; r++)
383                ret->values[r].f64 = ir->value.d[r];
384             break;
385 
386          default:
387             unreachable("Cannot get here from the first level switch");
388          }
389       }
390       break;
391 
392    case GLSL_TYPE_UINT64:
393       /* Only float base types can be matrices. */
394       assert(cols == 1);
395 
396       for (unsigned r = 0; r < rows; r++)
397          ret->values[r].u64 = ir->value.u64[r];
398       break;
399 
400    case GLSL_TYPE_INT64:
401       /* Only float base types can be matrices. */
402       assert(cols == 1);
403 
404       for (unsigned r = 0; r < rows; r++)
405          ret->values[r].i64 = ir->value.i64[r];
406       break;
407 
408    case GLSL_TYPE_BOOL:
409       /* Only float base types can be matrices. */
410       assert(cols == 1);
411 
412       for (unsigned r = 0; r < rows; r++)
413          ret->values[r].b = ir->value.b[r];
414 
415       break;
416 
417    case GLSL_TYPE_STRUCT:
418    case GLSL_TYPE_ARRAY:
419       ret->elements = ralloc_array(mem_ctx, nir_constant *,
420                                    ir->type->length);
421       ret->num_elements = ir->type->length;
422 
423       for (i = 0; i < ir->type->length; i++)
424          ret->elements[i] = constant_copy(ir->const_elements[i], mem_ctx);
425       break;
426 
427    default:
428       unreachable("not reached");
429    }
430 
431    return ret;
432 }
433 
434 static const glsl_type *
wrap_type_in_array(const glsl_type * elem_type,const glsl_type * array_type)435 wrap_type_in_array(const glsl_type *elem_type, const glsl_type *array_type)
436 {
437    if (!array_type->is_array())
438       return elem_type;
439 
440    elem_type = wrap_type_in_array(elem_type, array_type->fields.array);
441 
442    return glsl_type::get_array_instance(elem_type, array_type->length);
443 }
444 
445 static unsigned
get_nir_how_declared(unsigned how_declared)446 get_nir_how_declared(unsigned how_declared)
447 {
448    if (how_declared == ir_var_hidden)
449       return nir_var_hidden;
450 
451    return nir_var_declared_normally;
452 }
453 
454 void
visit(ir_variable * ir)455 nir_visitor::visit(ir_variable *ir)
456 {
457    /* TODO: In future we should switch to using the NIR lowering pass but for
458     * now just ignore these variables as GLSL IR should have lowered them.
459     * Anything remaining are just dead vars that weren't cleaned up.
460     */
461    if (ir->data.mode == ir_var_shader_shared)
462       return;
463 
464    /* FINISHME: inout parameters */
465    assert(ir->data.mode != ir_var_function_inout);
466 
467    if (ir->data.mode == ir_var_function_out)
468       return;
469 
470    nir_variable *var = rzalloc(shader, nir_variable);
471    var->type = ir->type;
472    var->name = ralloc_strdup(var, ir->name);
473 
474    var->data.always_active_io = ir->data.always_active_io;
475    var->data.read_only = ir->data.read_only;
476    var->data.centroid = ir->data.centroid;
477    var->data.sample = ir->data.sample;
478    var->data.patch = ir->data.patch;
479    var->data.how_declared = get_nir_how_declared(ir->data.how_declared);
480    var->data.invariant = ir->data.invariant;
481    var->data.location = ir->data.location;
482    var->data.stream = ir->data.stream;
483    if (ir->data.stream & (1u << 31))
484       var->data.stream |= NIR_STREAM_PACKED;
485 
486    var->data.precision = ir->data.precision;
487    var->data.explicit_location = ir->data.explicit_location;
488    var->data.matrix_layout = ir->data.matrix_layout;
489    var->data.from_named_ifc_block = ir->data.from_named_ifc_block;
490    var->data.compact = false;
491 
492    switch(ir->data.mode) {
493    case ir_var_auto:
494    case ir_var_temporary:
495       if (is_global)
496          var->data.mode = nir_var_shader_temp;
497       else
498          var->data.mode = nir_var_function_temp;
499       break;
500 
501    case ir_var_function_in:
502    case ir_var_const_in:
503       var->data.mode = nir_var_function_temp;
504       break;
505 
506    case ir_var_shader_in:
507       if (shader->info.stage == MESA_SHADER_GEOMETRY &&
508           ir->data.location == VARYING_SLOT_PRIMITIVE_ID) {
509          /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */
510          var->data.location = SYSTEM_VALUE_PRIMITIVE_ID;
511          var->data.mode = nir_var_system_value;
512       } else {
513          var->data.mode = nir_var_shader_in;
514 
515          if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
516              (ir->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
517               ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) {
518             var->data.compact = ir->type->without_array()->is_scalar();
519          }
520 
521          if (shader->info.stage > MESA_SHADER_VERTEX &&
522              ir->data.location >= VARYING_SLOT_CLIP_DIST0 &&
523              ir->data.location <= VARYING_SLOT_CULL_DIST1) {
524             var->data.compact = ir->type->without_array()->is_scalar();
525          }
526       }
527       break;
528 
529    case ir_var_shader_out:
530       var->data.mode = nir_var_shader_out;
531       if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
532           (ir->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
533            ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) {
534          var->data.compact = ir->type->without_array()->is_scalar();
535       }
536 
537       if (shader->info.stage <= MESA_SHADER_GEOMETRY &&
538           ir->data.location >= VARYING_SLOT_CLIP_DIST0 &&
539           ir->data.location <= VARYING_SLOT_CULL_DIST1) {
540          var->data.compact = ir->type->without_array()->is_scalar();
541       }
542       break;
543 
544    case ir_var_uniform:
545       if (ir->get_interface_type())
546          var->data.mode = nir_var_mem_ubo;
547       else
548          var->data.mode = nir_var_uniform;
549       break;
550 
551    case ir_var_shader_storage:
552       var->data.mode = nir_var_mem_ssbo;
553       break;
554 
555    case ir_var_system_value:
556       var->data.mode = nir_var_system_value;
557       break;
558 
559    default:
560       unreachable("not reached");
561    }
562 
563    unsigned mem_access = 0;
564    if (ir->data.memory_read_only)
565       mem_access |= ACCESS_NON_WRITEABLE;
566    if (ir->data.memory_write_only)
567       mem_access |= ACCESS_NON_READABLE;
568    if (ir->data.memory_coherent)
569       mem_access |= ACCESS_COHERENT;
570    if (ir->data.memory_volatile)
571       mem_access |= ACCESS_VOLATILE;
572    if (ir->data.memory_restrict)
573       mem_access |= ACCESS_RESTRICT;
574 
575    var->interface_type = ir->get_interface_type();
576 
577    /* For UBO and SSBO variables, we need explicit types */
578    if (var->data.mode & (nir_var_mem_ubo | nir_var_mem_ssbo)) {
579       const glsl_type *explicit_ifc_type =
580          ir->get_interface_type()->get_explicit_interface_type(supports_std430);
581 
582       var->interface_type = explicit_ifc_type;
583 
584       if (ir->type->without_array()->is_interface()) {
585          /* If the type contains the interface, wrap the explicit type in the
586           * right number of arrays.
587           */
588          var->type = wrap_type_in_array(explicit_ifc_type, ir->type);
589       } else {
590          /* Otherwise, this variable is one entry in the interface */
591          UNUSED bool found = false;
592          for (unsigned i = 0; i < explicit_ifc_type->length; i++) {
593             const glsl_struct_field *field =
594                &explicit_ifc_type->fields.structure[i];
595             if (strcmp(ir->name, field->name) != 0)
596                continue;
597 
598             var->type = field->type;
599             if (field->memory_read_only)
600                mem_access |= ACCESS_NON_WRITEABLE;
601             if (field->memory_write_only)
602                mem_access |= ACCESS_NON_READABLE;
603             if (field->memory_coherent)
604                mem_access |= ACCESS_COHERENT;
605             if (field->memory_volatile)
606                mem_access |= ACCESS_VOLATILE;
607             if (field->memory_restrict)
608                mem_access |= ACCESS_RESTRICT;
609 
610             found = true;
611             break;
612          }
613          assert(found);
614       }
615    }
616 
617    var->data.interpolation = ir->data.interpolation;
618    var->data.location_frac = ir->data.location_frac;
619 
620    switch (ir->data.depth_layout) {
621    case ir_depth_layout_none:
622       var->data.depth_layout = nir_depth_layout_none;
623       break;
624    case ir_depth_layout_any:
625       var->data.depth_layout = nir_depth_layout_any;
626       break;
627    case ir_depth_layout_greater:
628       var->data.depth_layout = nir_depth_layout_greater;
629       break;
630    case ir_depth_layout_less:
631       var->data.depth_layout = nir_depth_layout_less;
632       break;
633    case ir_depth_layout_unchanged:
634       var->data.depth_layout = nir_depth_layout_unchanged;
635       break;
636    default:
637       unreachable("not reached");
638    }
639 
640    var->data.index = ir->data.index;
641    var->data.descriptor_set = 0;
642    var->data.binding = ir->data.binding;
643    var->data.explicit_binding = ir->data.explicit_binding;
644    var->data.bindless = ir->data.bindless;
645    var->data.offset = ir->data.offset;
646    var->data.access = (gl_access_qualifier)mem_access;
647 
648    if (var->type->without_array()->is_image()) {
649       var->data.image.format = ir->data.image_format;
650    } else if (var->data.mode == nir_var_shader_out) {
651       var->data.xfb.buffer = ir->data.xfb_buffer;
652       var->data.xfb.stride = ir->data.xfb_stride;
653    }
654 
655    var->data.fb_fetch_output = ir->data.fb_fetch_output;
656    var->data.explicit_xfb_buffer = ir->data.explicit_xfb_buffer;
657    var->data.explicit_xfb_stride = ir->data.explicit_xfb_stride;
658 
659    var->num_state_slots = ir->get_num_state_slots();
660    if (var->num_state_slots > 0) {
661       var->state_slots = rzalloc_array(var, nir_state_slot,
662                                        var->num_state_slots);
663 
664       ir_state_slot *state_slots = ir->get_state_slots();
665       for (unsigned i = 0; i < var->num_state_slots; i++) {
666          for (unsigned j = 0; j < 5; j++)
667             var->state_slots[i].tokens[j] = state_slots[i].tokens[j];
668          var->state_slots[i].swizzle = state_slots[i].swizzle;
669       }
670    } else {
671       var->state_slots = NULL;
672    }
673 
674    var->constant_initializer = constant_copy(ir->constant_initializer, var);
675 
676    if (var->data.mode == nir_var_function_temp)
677       nir_function_impl_add_variable(impl, var);
678    else
679       nir_shader_add_variable(shader, var);
680 
681    _mesa_hash_table_insert(var_table, ir, var);
682 }
683 
684 ir_visitor_status
visit_enter(ir_function * ir)685 nir_function_visitor::visit_enter(ir_function *ir)
686 {
687    foreach_in_list(ir_function_signature, sig, &ir->signatures) {
688       visitor->create_function(sig);
689    }
690    return visit_continue_with_parent;
691 }
692 
693 void
create_function(ir_function_signature * ir)694 nir_visitor::create_function(ir_function_signature *ir)
695 {
696    if (ir->is_intrinsic())
697       return;
698 
699    nir_function *func = nir_function_create(shader, ir->function_name());
700    if (strcmp(ir->function_name(), "main") == 0)
701       func->is_entrypoint = true;
702 
703    func->num_params = ir->parameters.length() +
704                       (ir->return_type != glsl_type::void_type);
705    func->params = ralloc_array(shader, nir_parameter, func->num_params);
706 
707    unsigned np = 0;
708 
709    if (ir->return_type != glsl_type::void_type) {
710       /* The return value is a variable deref (basically an out parameter) */
711       func->params[np].num_components = 1;
712       func->params[np].bit_size = 32;
713       np++;
714    }
715 
716    foreach_in_list(ir_variable, param, &ir->parameters) {
717       /* FINISHME: pass arrays, structs, etc by reference? */
718       assert(param->type->is_vector() || param->type->is_scalar());
719 
720       if (param->data.mode == ir_var_function_in) {
721          func->params[np].num_components = param->type->vector_elements;
722          func->params[np].bit_size = glsl_get_bit_size(param->type);
723       } else {
724          func->params[np].num_components = 1;
725          func->params[np].bit_size = 32;
726       }
727       np++;
728    }
729    assert(np == func->num_params);
730 
731    _mesa_hash_table_insert(this->overload_table, ir, func);
732 }
733 
734 void
visit(ir_function * ir)735 nir_visitor::visit(ir_function *ir)
736 {
737    foreach_in_list(ir_function_signature, sig, &ir->signatures)
738       sig->accept(this);
739 }
740 
741 void
visit(ir_function_signature * ir)742 nir_visitor::visit(ir_function_signature *ir)
743 {
744    if (ir->is_intrinsic())
745       return;
746 
747    this->sig = ir;
748 
749    struct hash_entry *entry =
750       _mesa_hash_table_search(this->overload_table, ir);
751 
752    assert(entry);
753    nir_function *func = (nir_function *) entry->data;
754 
755    if (ir->is_defined) {
756       nir_function_impl *impl = nir_function_impl_create(func);
757       this->impl = impl;
758 
759       this->is_global = false;
760 
761       nir_builder_init(&b, impl);
762       b.cursor = nir_after_cf_list(&impl->body);
763 
764       unsigned i = (ir->return_type != glsl_type::void_type) ? 1 : 0;
765 
766       foreach_in_list(ir_variable, param, &ir->parameters) {
767          nir_variable *var =
768             nir_local_variable_create(impl, param->type, param->name);
769 
770          if (param->data.mode == ir_var_function_in) {
771             nir_store_var(&b, var, nir_load_param(&b, i), ~0);
772          }
773 
774          _mesa_hash_table_insert(var_table, param, var);
775          i++;
776       }
777 
778       visit_exec_list(&ir->body, this);
779 
780       this->is_global = true;
781    } else {
782       func->impl = NULL;
783    }
784 }
785 
786 void
visit(ir_loop * ir)787 nir_visitor::visit(ir_loop *ir)
788 {
789    nir_push_loop(&b);
790    visit_exec_list(&ir->body_instructions, this);
791    nir_pop_loop(&b, NULL);
792 }
793 
794 void
visit(ir_if * ir)795 nir_visitor::visit(ir_if *ir)
796 {
797    nir_push_if(&b, evaluate_rvalue(ir->condition));
798    visit_exec_list(&ir->then_instructions, this);
799    nir_push_else(&b, NULL);
800    visit_exec_list(&ir->else_instructions, this);
801    nir_pop_if(&b, NULL);
802 }
803 
804 void
visit(ir_discard * ir)805 nir_visitor::visit(ir_discard *ir)
806 {
807    /*
808     * discards aren't treated as control flow, because before we lower them
809     * they can appear anywhere in the shader and the stuff after them may still
810     * be executed (yay, crazy GLSL rules!). However, after lowering, all the
811     * discards will be immediately followed by a return.
812     */
813 
814    nir_intrinsic_instr *discard;
815    if (ir->condition) {
816       discard = nir_intrinsic_instr_create(this->shader,
817                                            nir_intrinsic_discard_if);
818       discard->src[0] =
819          nir_src_for_ssa(evaluate_rvalue(ir->condition));
820    } else {
821       discard = nir_intrinsic_instr_create(this->shader, nir_intrinsic_discard);
822    }
823 
824    nir_builder_instr_insert(&b, &discard->instr);
825 }
826 
827 void
visit(ir_demote * ir)828 nir_visitor::visit(ir_demote *ir)
829 {
830    nir_intrinsic_instr *demote =
831       nir_intrinsic_instr_create(this->shader, nir_intrinsic_demote);
832 
833    nir_builder_instr_insert(&b, &demote->instr);
834 }
835 
836 void
visit(ir_emit_vertex * ir)837 nir_visitor::visit(ir_emit_vertex *ir)
838 {
839    nir_intrinsic_instr *instr =
840       nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex);
841    nir_intrinsic_set_stream_id(instr, ir->stream_id());
842    nir_builder_instr_insert(&b, &instr->instr);
843 }
844 
845 void
visit(ir_end_primitive * ir)846 nir_visitor::visit(ir_end_primitive *ir)
847 {
848    nir_intrinsic_instr *instr =
849       nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive);
850    nir_intrinsic_set_stream_id(instr, ir->stream_id());
851    nir_builder_instr_insert(&b, &instr->instr);
852 }
853 
854 void
visit(ir_loop_jump * ir)855 nir_visitor::visit(ir_loop_jump *ir)
856 {
857    nir_jump_type type;
858    switch (ir->mode) {
859    case ir_loop_jump::jump_break:
860       type = nir_jump_break;
861       break;
862    case ir_loop_jump::jump_continue:
863       type = nir_jump_continue;
864       break;
865    default:
866       unreachable("not reached");
867    }
868 
869    nir_jump_instr *instr = nir_jump_instr_create(this->shader, type);
870    nir_builder_instr_insert(&b, &instr->instr);
871 }
872 
873 void
visit(ir_return * ir)874 nir_visitor::visit(ir_return *ir)
875 {
876    if (ir->value != NULL) {
877       nir_deref_instr *ret_deref =
878          nir_build_deref_cast(&b, nir_load_param(&b, 0),
879                               nir_var_function_temp, ir->value->type, 0);
880 
881       nir_ssa_def *val = evaluate_rvalue(ir->value);
882       nir_store_deref(&b, ret_deref, val, ~0);
883    }
884 
885    nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
886    nir_builder_instr_insert(&b, &instr->instr);
887 }
888 
889 static void
intrinsic_set_std430_align(nir_intrinsic_instr * intrin,const glsl_type * type)890 intrinsic_set_std430_align(nir_intrinsic_instr *intrin, const glsl_type *type)
891 {
892    unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);
893    unsigned pow2_components = util_next_power_of_two(type->vector_elements);
894    nir_intrinsic_set_align(intrin, (bit_size / 8) * pow2_components, 0);
895 }
896 
897 /* Accumulate any qualifiers along the deref chain to get the actual
898  * load/store qualifier.
899  */
900 
901 static enum gl_access_qualifier
deref_get_qualifier(nir_deref_instr * deref)902 deref_get_qualifier(nir_deref_instr *deref)
903 {
904    nir_deref_path path;
905    nir_deref_path_init(&path, deref, NULL);
906 
907    unsigned qualifiers = path.path[0]->var->data.access;
908 
909    const glsl_type *parent_type = path.path[0]->type;
910    for (nir_deref_instr **cur_ptr = &path.path[1]; *cur_ptr; cur_ptr++) {
911       nir_deref_instr *cur = *cur_ptr;
912 
913       if (parent_type->is_interface()) {
914          const struct glsl_struct_field *field =
915             &parent_type->fields.structure[cur->strct.index];
916          if (field->memory_read_only)
917             qualifiers |= ACCESS_NON_WRITEABLE;
918          if (field->memory_write_only)
919             qualifiers |= ACCESS_NON_READABLE;
920          if (field->memory_coherent)
921             qualifiers |= ACCESS_COHERENT;
922          if (field->memory_volatile)
923             qualifiers |= ACCESS_VOLATILE;
924          if (field->memory_restrict)
925             qualifiers |= ACCESS_RESTRICT;
926       }
927 
928       parent_type = cur->type;
929    }
930 
931    nir_deref_path_finish(&path);
932 
933    return (gl_access_qualifier) qualifiers;
934 }
935 
936 void
visit(ir_call * ir)937 nir_visitor::visit(ir_call *ir)
938 {
939    if (ir->callee->is_intrinsic()) {
940       nir_intrinsic_op op;
941 
942       switch (ir->callee->intrinsic_id) {
943       case ir_intrinsic_generic_atomic_add:
944          op = ir->return_deref->type->is_integer_32_64()
945             ? nir_intrinsic_deref_atomic_add : nir_intrinsic_deref_atomic_fadd;
946          break;
947       case ir_intrinsic_generic_atomic_and:
948          op = nir_intrinsic_deref_atomic_and;
949          break;
950       case ir_intrinsic_generic_atomic_or:
951          op = nir_intrinsic_deref_atomic_or;
952          break;
953       case ir_intrinsic_generic_atomic_xor:
954          op = nir_intrinsic_deref_atomic_xor;
955          break;
956       case ir_intrinsic_generic_atomic_min:
957          assert(ir->return_deref);
958          if (ir->return_deref->type == glsl_type::int_type ||
959              ir->return_deref->type == glsl_type::int64_t_type)
960             op = nir_intrinsic_deref_atomic_imin;
961          else if (ir->return_deref->type == glsl_type::uint_type ||
962                   ir->return_deref->type == glsl_type::uint64_t_type)
963             op = nir_intrinsic_deref_atomic_umin;
964          else if (ir->return_deref->type == glsl_type::float_type)
965             op = nir_intrinsic_deref_atomic_fmin;
966          else
967             unreachable("Invalid type");
968          break;
969       case ir_intrinsic_generic_atomic_max:
970          assert(ir->return_deref);
971          if (ir->return_deref->type == glsl_type::int_type ||
972              ir->return_deref->type == glsl_type::int64_t_type)
973             op = nir_intrinsic_deref_atomic_imax;
974          else if (ir->return_deref->type == glsl_type::uint_type ||
975                   ir->return_deref->type == glsl_type::uint64_t_type)
976             op = nir_intrinsic_deref_atomic_umax;
977          else if (ir->return_deref->type == glsl_type::float_type)
978             op = nir_intrinsic_deref_atomic_fmax;
979          else
980             unreachable("Invalid type");
981          break;
982       case ir_intrinsic_generic_atomic_exchange:
983          op = nir_intrinsic_deref_atomic_exchange;
984          break;
985       case ir_intrinsic_generic_atomic_comp_swap:
986          op = ir->return_deref->type->is_integer_32_64()
987             ? nir_intrinsic_deref_atomic_comp_swap
988             : nir_intrinsic_deref_atomic_fcomp_swap;
989          break;
990       case ir_intrinsic_atomic_counter_read:
991          op = nir_intrinsic_atomic_counter_read_deref;
992          break;
993       case ir_intrinsic_atomic_counter_increment:
994          op = nir_intrinsic_atomic_counter_inc_deref;
995          break;
996       case ir_intrinsic_atomic_counter_predecrement:
997          op = nir_intrinsic_atomic_counter_pre_dec_deref;
998          break;
999       case ir_intrinsic_atomic_counter_add:
1000          op = nir_intrinsic_atomic_counter_add_deref;
1001          break;
1002       case ir_intrinsic_atomic_counter_and:
1003          op = nir_intrinsic_atomic_counter_and_deref;
1004          break;
1005       case ir_intrinsic_atomic_counter_or:
1006          op = nir_intrinsic_atomic_counter_or_deref;
1007          break;
1008       case ir_intrinsic_atomic_counter_xor:
1009          op = nir_intrinsic_atomic_counter_xor_deref;
1010          break;
1011       case ir_intrinsic_atomic_counter_min:
1012          op = nir_intrinsic_atomic_counter_min_deref;
1013          break;
1014       case ir_intrinsic_atomic_counter_max:
1015          op = nir_intrinsic_atomic_counter_max_deref;
1016          break;
1017       case ir_intrinsic_atomic_counter_exchange:
1018          op = nir_intrinsic_atomic_counter_exchange_deref;
1019          break;
1020       case ir_intrinsic_atomic_counter_comp_swap:
1021          op = nir_intrinsic_atomic_counter_comp_swap_deref;
1022          break;
1023       case ir_intrinsic_image_load:
1024          op = nir_intrinsic_image_deref_load;
1025          break;
1026       case ir_intrinsic_image_store:
1027          op = nir_intrinsic_image_deref_store;
1028          break;
1029       case ir_intrinsic_image_atomic_add:
1030          op = ir->return_deref->type->is_integer_32_64()
1031             ? nir_intrinsic_image_deref_atomic_add
1032             : nir_intrinsic_image_deref_atomic_fadd;
1033          break;
1034       case ir_intrinsic_image_atomic_min:
1035          if (ir->return_deref->type == glsl_type::int_type)
1036             op = nir_intrinsic_image_deref_atomic_imin;
1037          else if (ir->return_deref->type == glsl_type::uint_type)
1038             op = nir_intrinsic_image_deref_atomic_umin;
1039          else
1040             unreachable("Invalid type");
1041          break;
1042       case ir_intrinsic_image_atomic_max:
1043          if (ir->return_deref->type == glsl_type::int_type)
1044             op = nir_intrinsic_image_deref_atomic_imax;
1045          else if (ir->return_deref->type == glsl_type::uint_type)
1046             op = nir_intrinsic_image_deref_atomic_umax;
1047          else
1048             unreachable("Invalid type");
1049          break;
1050       case ir_intrinsic_image_atomic_and:
1051          op = nir_intrinsic_image_deref_atomic_and;
1052          break;
1053       case ir_intrinsic_image_atomic_or:
1054          op = nir_intrinsic_image_deref_atomic_or;
1055          break;
1056       case ir_intrinsic_image_atomic_xor:
1057          op = nir_intrinsic_image_deref_atomic_xor;
1058          break;
1059       case ir_intrinsic_image_atomic_exchange:
1060          op = nir_intrinsic_image_deref_atomic_exchange;
1061          break;
1062       case ir_intrinsic_image_atomic_comp_swap:
1063          op = nir_intrinsic_image_deref_atomic_comp_swap;
1064          break;
1065       case ir_intrinsic_image_atomic_inc_wrap:
1066          op = nir_intrinsic_image_deref_atomic_inc_wrap;
1067          break;
1068       case ir_intrinsic_image_atomic_dec_wrap:
1069          op = nir_intrinsic_image_deref_atomic_dec_wrap;
1070          break;
1071       case ir_intrinsic_memory_barrier:
1072          op = nir_intrinsic_memory_barrier;
1073          break;
1074       case ir_intrinsic_image_size:
1075          op = nir_intrinsic_image_deref_size;
1076          break;
1077       case ir_intrinsic_image_samples:
1078          op = nir_intrinsic_image_deref_samples;
1079          break;
1080       case ir_intrinsic_ssbo_store:
1081       case ir_intrinsic_ssbo_load:
1082       case ir_intrinsic_ssbo_atomic_add:
1083       case ir_intrinsic_ssbo_atomic_and:
1084       case ir_intrinsic_ssbo_atomic_or:
1085       case ir_intrinsic_ssbo_atomic_xor:
1086       case ir_intrinsic_ssbo_atomic_min:
1087       case ir_intrinsic_ssbo_atomic_max:
1088       case ir_intrinsic_ssbo_atomic_exchange:
1089       case ir_intrinsic_ssbo_atomic_comp_swap:
1090          /* SSBO store/loads should only have been lowered in GLSL IR for
1091           * non-nir drivers, NIR drivers make use of gl_nir_lower_buffers()
1092           * instead.
1093           */
1094          unreachable("Invalid operation nir doesn't want lowered ssbo "
1095                      "store/loads");
1096       case ir_intrinsic_shader_clock:
1097          op = nir_intrinsic_shader_clock;
1098          break;
1099       case ir_intrinsic_begin_invocation_interlock:
1100          op = nir_intrinsic_begin_invocation_interlock;
1101          break;
1102       case ir_intrinsic_end_invocation_interlock:
1103          op = nir_intrinsic_end_invocation_interlock;
1104          break;
1105       case ir_intrinsic_group_memory_barrier:
1106          op = nir_intrinsic_group_memory_barrier;
1107          break;
1108       case ir_intrinsic_memory_barrier_atomic_counter:
1109          op = nir_intrinsic_memory_barrier_atomic_counter;
1110          break;
1111       case ir_intrinsic_memory_barrier_buffer:
1112          op = nir_intrinsic_memory_barrier_buffer;
1113          break;
1114       case ir_intrinsic_memory_barrier_image:
1115          op = nir_intrinsic_memory_barrier_image;
1116          break;
1117       case ir_intrinsic_memory_barrier_shared:
1118          op = nir_intrinsic_memory_barrier_shared;
1119          break;
1120       case ir_intrinsic_shared_load:
1121          op = nir_intrinsic_load_shared;
1122          break;
1123       case ir_intrinsic_shared_store:
1124          op = nir_intrinsic_store_shared;
1125          break;
1126       case ir_intrinsic_shared_atomic_add:
1127          op = ir->return_deref->type->is_integer_32_64()
1128             ? nir_intrinsic_shared_atomic_add
1129             : nir_intrinsic_shared_atomic_fadd;
1130          break;
1131       case ir_intrinsic_shared_atomic_and:
1132          op = nir_intrinsic_shared_atomic_and;
1133          break;
1134       case ir_intrinsic_shared_atomic_or:
1135          op = nir_intrinsic_shared_atomic_or;
1136          break;
1137       case ir_intrinsic_shared_atomic_xor:
1138          op = nir_intrinsic_shared_atomic_xor;
1139          break;
1140       case ir_intrinsic_shared_atomic_min:
1141          assert(ir->return_deref);
1142          if (ir->return_deref->type == glsl_type::int_type ||
1143              ir->return_deref->type == glsl_type::int64_t_type)
1144             op = nir_intrinsic_shared_atomic_imin;
1145          else if (ir->return_deref->type == glsl_type::uint_type ||
1146                   ir->return_deref->type == glsl_type::uint64_t_type)
1147             op = nir_intrinsic_shared_atomic_umin;
1148          else if (ir->return_deref->type == glsl_type::float_type)
1149             op = nir_intrinsic_shared_atomic_fmin;
1150          else
1151             unreachable("Invalid type");
1152          break;
1153       case ir_intrinsic_shared_atomic_max:
1154          assert(ir->return_deref);
1155          if (ir->return_deref->type == glsl_type::int_type ||
1156              ir->return_deref->type == glsl_type::int64_t_type)
1157             op = nir_intrinsic_shared_atomic_imax;
1158          else if (ir->return_deref->type == glsl_type::uint_type ||
1159                   ir->return_deref->type == glsl_type::uint64_t_type)
1160             op = nir_intrinsic_shared_atomic_umax;
1161          else if (ir->return_deref->type == glsl_type::float_type)
1162             op = nir_intrinsic_shared_atomic_fmax;
1163          else
1164             unreachable("Invalid type");
1165          break;
1166       case ir_intrinsic_shared_atomic_exchange:
1167          op = nir_intrinsic_shared_atomic_exchange;
1168          break;
1169       case ir_intrinsic_shared_atomic_comp_swap:
1170          op = ir->return_deref->type->is_integer_32_64()
1171             ? nir_intrinsic_shared_atomic_comp_swap
1172             : nir_intrinsic_shared_atomic_fcomp_swap;
1173          break;
1174       case ir_intrinsic_vote_any:
1175          op = nir_intrinsic_vote_any;
1176          break;
1177       case ir_intrinsic_vote_all:
1178          op = nir_intrinsic_vote_all;
1179          break;
1180       case ir_intrinsic_vote_eq:
1181          op = nir_intrinsic_vote_ieq;
1182          break;
1183       case ir_intrinsic_ballot:
1184          op = nir_intrinsic_ballot;
1185          break;
1186       case ir_intrinsic_read_invocation:
1187          op = nir_intrinsic_read_invocation;
1188          break;
1189       case ir_intrinsic_read_first_invocation:
1190          op = nir_intrinsic_read_first_invocation;
1191          break;
1192       case ir_intrinsic_helper_invocation:
1193          op = nir_intrinsic_is_helper_invocation;
1194          break;
1195       default:
1196          unreachable("not reached");
1197       }
1198 
1199       nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
1200       nir_ssa_def *ret = &instr->dest.ssa;
1201 
1202       switch (op) {
1203       case nir_intrinsic_deref_atomic_add:
1204       case nir_intrinsic_deref_atomic_imin:
1205       case nir_intrinsic_deref_atomic_umin:
1206       case nir_intrinsic_deref_atomic_imax:
1207       case nir_intrinsic_deref_atomic_umax:
1208       case nir_intrinsic_deref_atomic_and:
1209       case nir_intrinsic_deref_atomic_or:
1210       case nir_intrinsic_deref_atomic_xor:
1211       case nir_intrinsic_deref_atomic_exchange:
1212       case nir_intrinsic_deref_atomic_comp_swap:
1213       case nir_intrinsic_deref_atomic_fadd:
1214       case nir_intrinsic_deref_atomic_fmin:
1215       case nir_intrinsic_deref_atomic_fmax:
1216       case nir_intrinsic_deref_atomic_fcomp_swap: {
1217          int param_count = ir->actual_parameters.length();
1218          assert(param_count == 2 || param_count == 3);
1219 
1220          /* Deref */
1221          exec_node *param = ir->actual_parameters.get_head();
1222          ir_rvalue *rvalue = (ir_rvalue *) param;
1223          ir_dereference *deref = rvalue->as_dereference();
1224          ir_swizzle *swizzle = NULL;
1225          if (!deref) {
1226             /* We may have a swizzle to pick off a single vec4 component */
1227             swizzle = rvalue->as_swizzle();
1228             assert(swizzle && swizzle->type->vector_elements == 1);
1229             deref = swizzle->val->as_dereference();
1230             assert(deref);
1231          }
1232          nir_deref_instr *nir_deref = evaluate_deref(deref);
1233          if (swizzle) {
1234             nir_deref = nir_build_deref_array_imm(&b, nir_deref,
1235                                                   swizzle->mask.x);
1236          }
1237          instr->src[0] = nir_src_for_ssa(&nir_deref->dest.ssa);
1238 
1239          nir_intrinsic_set_access(instr, deref_get_qualifier(nir_deref));
1240 
1241          /* data1 parameter (this is always present) */
1242          param = param->get_next();
1243          ir_instruction *inst = (ir_instruction *) param;
1244          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
1245 
1246          /* data2 parameter (only with atomic_comp_swap) */
1247          if (param_count == 3) {
1248             assert(op == nir_intrinsic_deref_atomic_comp_swap ||
1249                    op == nir_intrinsic_deref_atomic_fcomp_swap);
1250             param = param->get_next();
1251             inst = (ir_instruction *) param;
1252             instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
1253          }
1254 
1255          /* Atomic result */
1256          assert(ir->return_deref);
1257          if (ir->return_deref->type->is_integer_64()) {
1258             nir_ssa_dest_init(&instr->instr, &instr->dest,
1259                               ir->return_deref->type->vector_elements, 64, NULL);
1260          } else {
1261             nir_ssa_dest_init(&instr->instr, &instr->dest,
1262                               ir->return_deref->type->vector_elements, 32, NULL);
1263          }
1264          nir_builder_instr_insert(&b, &instr->instr);
1265          break;
1266       }
1267       case nir_intrinsic_atomic_counter_read_deref:
1268       case nir_intrinsic_atomic_counter_inc_deref:
1269       case nir_intrinsic_atomic_counter_pre_dec_deref:
1270       case nir_intrinsic_atomic_counter_add_deref:
1271       case nir_intrinsic_atomic_counter_min_deref:
1272       case nir_intrinsic_atomic_counter_max_deref:
1273       case nir_intrinsic_atomic_counter_and_deref:
1274       case nir_intrinsic_atomic_counter_or_deref:
1275       case nir_intrinsic_atomic_counter_xor_deref:
1276       case nir_intrinsic_atomic_counter_exchange_deref:
1277       case nir_intrinsic_atomic_counter_comp_swap_deref: {
1278          /* Set the counter variable dereference. */
1279          exec_node *param = ir->actual_parameters.get_head();
1280          ir_dereference *counter = (ir_dereference *)param;
1281 
1282          instr->src[0] = nir_src_for_ssa(&evaluate_deref(counter)->dest.ssa);
1283          param = param->get_next();
1284 
1285          /* Set the intrinsic destination. */
1286          if (ir->return_deref) {
1287             nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
1288          }
1289 
1290          /* Set the intrinsic parameters. */
1291          if (!param->is_tail_sentinel()) {
1292             instr->src[1] =
1293                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1294             param = param->get_next();
1295          }
1296 
1297          if (!param->is_tail_sentinel()) {
1298             instr->src[2] =
1299                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1300             param = param->get_next();
1301          }
1302 
1303          nir_builder_instr_insert(&b, &instr->instr);
1304          break;
1305       }
1306       case nir_intrinsic_image_deref_load:
1307       case nir_intrinsic_image_deref_store:
1308       case nir_intrinsic_image_deref_atomic_add:
1309       case nir_intrinsic_image_deref_atomic_imin:
1310       case nir_intrinsic_image_deref_atomic_umin:
1311       case nir_intrinsic_image_deref_atomic_imax:
1312       case nir_intrinsic_image_deref_atomic_umax:
1313       case nir_intrinsic_image_deref_atomic_and:
1314       case nir_intrinsic_image_deref_atomic_or:
1315       case nir_intrinsic_image_deref_atomic_xor:
1316       case nir_intrinsic_image_deref_atomic_exchange:
1317       case nir_intrinsic_image_deref_atomic_comp_swap:
1318       case nir_intrinsic_image_deref_atomic_fadd:
1319       case nir_intrinsic_image_deref_samples:
1320       case nir_intrinsic_image_deref_size:
1321       case nir_intrinsic_image_deref_atomic_inc_wrap:
1322       case nir_intrinsic_image_deref_atomic_dec_wrap: {
1323          nir_ssa_undef_instr *instr_undef =
1324             nir_ssa_undef_instr_create(shader, 1, 32);
1325          nir_builder_instr_insert(&b, &instr_undef->instr);
1326 
1327          /* Set the image variable dereference. */
1328          exec_node *param = ir->actual_parameters.get_head();
1329          ir_dereference *image = (ir_dereference *)param;
1330          nir_deref_instr *deref = evaluate_deref(image);
1331          const glsl_type *type = deref->type;
1332 
1333          nir_intrinsic_set_access(instr, deref_get_qualifier(deref));
1334 
1335          instr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
1336          param = param->get_next();
1337 
1338          /* Set the intrinsic destination. */
1339          if (ir->return_deref) {
1340             unsigned num_components = ir->return_deref->type->vector_elements;
1341             nir_ssa_dest_init(&instr->instr, &instr->dest,
1342                               num_components, 32, NULL);
1343          }
1344 
1345          if (op == nir_intrinsic_image_deref_size) {
1346             instr->num_components = instr->dest.ssa.num_components;
1347          } else if (op == nir_intrinsic_image_deref_load) {
1348             instr->num_components = 4;
1349             nir_intrinsic_set_dest_type(instr,
1350                nir_get_nir_type_for_glsl_base_type(type->sampled_type));
1351          } else if (op == nir_intrinsic_image_deref_store) {
1352             instr->num_components = 4;
1353             nir_intrinsic_set_src_type(instr,
1354                nir_get_nir_type_for_glsl_base_type(type->sampled_type));
1355          }
1356 
1357          if (op == nir_intrinsic_image_deref_size ||
1358              op == nir_intrinsic_image_deref_samples) {
1359             /* image_deref_size takes an LOD parameter which is always 0
1360              * coming from GLSL.
1361              */
1362             if (op == nir_intrinsic_image_deref_size)
1363                instr->src[1] = nir_src_for_ssa(nir_imm_int(&b, 0));
1364             nir_builder_instr_insert(&b, &instr->instr);
1365             break;
1366          }
1367 
1368          /* Set the address argument, extending the coordinate vector to four
1369           * components.
1370           */
1371          nir_ssa_def *src_addr =
1372             evaluate_rvalue((ir_dereference *)param);
1373          nir_ssa_def *srcs[4];
1374 
1375          for (int i = 0; i < 4; i++) {
1376             if (i < type->coordinate_components())
1377                srcs[i] = nir_channel(&b, src_addr, i);
1378             else
1379                srcs[i] = &instr_undef->def;
1380          }
1381 
1382          instr->src[1] = nir_src_for_ssa(nir_vec(&b, srcs, 4));
1383          param = param->get_next();
1384 
1385          /* Set the sample argument, which is undefined for single-sample
1386           * images.
1387           */
1388          if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
1389             instr->src[2] =
1390                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1391             param = param->get_next();
1392          } else {
1393             instr->src[2] = nir_src_for_ssa(&instr_undef->def);
1394          }
1395 
1396          /* Set the intrinsic parameters. */
1397          if (!param->is_tail_sentinel()) {
1398             instr->src[3] =
1399                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1400             param = param->get_next();
1401          } else if (op == nir_intrinsic_image_deref_load) {
1402             instr->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0)); /* LOD */
1403          }
1404 
1405          if (!param->is_tail_sentinel()) {
1406             instr->src[4] =
1407                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1408             param = param->get_next();
1409          } else if (op == nir_intrinsic_image_deref_store) {
1410             instr->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0)); /* LOD */
1411          }
1412 
1413          nir_builder_instr_insert(&b, &instr->instr);
1414          break;
1415       }
1416       case nir_intrinsic_memory_barrier:
1417       case nir_intrinsic_group_memory_barrier:
1418       case nir_intrinsic_memory_barrier_atomic_counter:
1419       case nir_intrinsic_memory_barrier_buffer:
1420       case nir_intrinsic_memory_barrier_image:
1421       case nir_intrinsic_memory_barrier_shared:
1422          nir_builder_instr_insert(&b, &instr->instr);
1423          break;
1424       case nir_intrinsic_shader_clock:
1425          nir_ssa_dest_init(&instr->instr, &instr->dest, 2, 32, NULL);
1426          nir_intrinsic_set_memory_scope(instr, NIR_SCOPE_SUBGROUP);
1427          nir_builder_instr_insert(&b, &instr->instr);
1428          break;
1429       case nir_intrinsic_begin_invocation_interlock:
1430          nir_builder_instr_insert(&b, &instr->instr);
1431          break;
1432       case nir_intrinsic_end_invocation_interlock:
1433          nir_builder_instr_insert(&b, &instr->instr);
1434          break;
1435       case nir_intrinsic_store_ssbo: {
1436          exec_node *param = ir->actual_parameters.get_head();
1437          ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
1438 
1439          param = param->get_next();
1440          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
1441 
1442          param = param->get_next();
1443          ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
1444 
1445          param = param->get_next();
1446          ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
1447          assert(write_mask);
1448 
1449          nir_ssa_def *nir_val = evaluate_rvalue(val);
1450          if (val->type->is_boolean())
1451             nir_val = nir_b2i32(&b, nir_val);
1452 
1453          instr->src[0] = nir_src_for_ssa(nir_val);
1454          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
1455          instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
1456          intrinsic_set_std430_align(instr, val->type);
1457          nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
1458          instr->num_components = val->type->vector_elements;
1459 
1460          nir_builder_instr_insert(&b, &instr->instr);
1461          break;
1462       }
1463       case nir_intrinsic_load_shared: {
1464          exec_node *param = ir->actual_parameters.get_head();
1465          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
1466 
1467          nir_intrinsic_set_base(instr, 0);
1468          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset));
1469 
1470          const glsl_type *type = ir->return_deref->var->type;
1471          instr->num_components = type->vector_elements;
1472          intrinsic_set_std430_align(instr, type);
1473 
1474          /* Setup destination register */
1475          unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);
1476          nir_ssa_dest_init(&instr->instr, &instr->dest,
1477                            type->vector_elements, bit_size, NULL);
1478 
1479          nir_builder_instr_insert(&b, &instr->instr);
1480 
1481          /* The value in shared memory is a 32-bit value */
1482          if (type->is_boolean())
1483             ret = nir_b2b1(&b, &instr->dest.ssa);
1484          break;
1485       }
1486       case nir_intrinsic_store_shared: {
1487          exec_node *param = ir->actual_parameters.get_head();
1488          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
1489 
1490          param = param->get_next();
1491          ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
1492 
1493          param = param->get_next();
1494          ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
1495          assert(write_mask);
1496 
1497          nir_intrinsic_set_base(instr, 0);
1498          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
1499 
1500          nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
1501 
1502          nir_ssa_def *nir_val = evaluate_rvalue(val);
1503          /* The value in shared memory is a 32-bit value */
1504          if (val->type->is_boolean())
1505             nir_val = nir_b2b32(&b, nir_val);
1506 
1507          instr->src[0] = nir_src_for_ssa(nir_val);
1508          instr->num_components = val->type->vector_elements;
1509          intrinsic_set_std430_align(instr, val->type);
1510 
1511          nir_builder_instr_insert(&b, &instr->instr);
1512          break;
1513       }
1514       case nir_intrinsic_shared_atomic_add:
1515       case nir_intrinsic_shared_atomic_imin:
1516       case nir_intrinsic_shared_atomic_umin:
1517       case nir_intrinsic_shared_atomic_imax:
1518       case nir_intrinsic_shared_atomic_umax:
1519       case nir_intrinsic_shared_atomic_and:
1520       case nir_intrinsic_shared_atomic_or:
1521       case nir_intrinsic_shared_atomic_xor:
1522       case nir_intrinsic_shared_atomic_exchange:
1523       case nir_intrinsic_shared_atomic_comp_swap:
1524       case nir_intrinsic_shared_atomic_fadd:
1525       case nir_intrinsic_shared_atomic_fmin:
1526       case nir_intrinsic_shared_atomic_fmax:
1527       case nir_intrinsic_shared_atomic_fcomp_swap:  {
1528          int param_count = ir->actual_parameters.length();
1529          assert(param_count == 2 || param_count == 3);
1530 
1531          /* Offset */
1532          exec_node *param = ir->actual_parameters.get_head();
1533          ir_instruction *inst = (ir_instruction *) param;
1534          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
1535 
1536          /* data1 parameter (this is always present) */
1537          param = param->get_next();
1538          inst = (ir_instruction *) param;
1539          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
1540 
1541          /* data2 parameter (only with atomic_comp_swap) */
1542          if (param_count == 3) {
1543             assert(op == nir_intrinsic_shared_atomic_comp_swap ||
1544                    op == nir_intrinsic_shared_atomic_fcomp_swap);
1545             param = param->get_next();
1546             inst = (ir_instruction *) param;
1547             instr->src[2] =
1548                nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
1549          }
1550 
1551          /* Atomic result */
1552          assert(ir->return_deref);
1553          unsigned bit_size = glsl_get_bit_size(ir->return_deref->type);
1554          nir_ssa_dest_init(&instr->instr, &instr->dest,
1555                            ir->return_deref->type->vector_elements,
1556                            bit_size, NULL);
1557          nir_builder_instr_insert(&b, &instr->instr);
1558          break;
1559       }
1560       case nir_intrinsic_vote_ieq:
1561          instr->num_components = 1;
1562          /* fall-through */
1563       case nir_intrinsic_vote_any:
1564       case nir_intrinsic_vote_all: {
1565          nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL);
1566 
1567          ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
1568          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
1569 
1570          nir_builder_instr_insert(&b, &instr->instr);
1571          break;
1572       }
1573 
1574       case nir_intrinsic_ballot: {
1575          nir_ssa_dest_init(&instr->instr, &instr->dest,
1576                            ir->return_deref->type->vector_elements, 64, NULL);
1577          instr->num_components = ir->return_deref->type->vector_elements;
1578 
1579          ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
1580          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
1581 
1582          nir_builder_instr_insert(&b, &instr->instr);
1583          break;
1584       }
1585       case nir_intrinsic_read_invocation: {
1586          nir_ssa_dest_init(&instr->instr, &instr->dest,
1587                            ir->return_deref->type->vector_elements, 32, NULL);
1588          instr->num_components = ir->return_deref->type->vector_elements;
1589 
1590          ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
1591          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
1592 
1593          ir_rvalue *invocation = (ir_rvalue *) ir->actual_parameters.get_head()->next;
1594          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(invocation));
1595 
1596          nir_builder_instr_insert(&b, &instr->instr);
1597          break;
1598       }
1599       case nir_intrinsic_read_first_invocation: {
1600          nir_ssa_dest_init(&instr->instr, &instr->dest,
1601                            ir->return_deref->type->vector_elements, 32, NULL);
1602          instr->num_components = ir->return_deref->type->vector_elements;
1603 
1604          ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
1605          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
1606 
1607          nir_builder_instr_insert(&b, &instr->instr);
1608          break;
1609       }
1610       case nir_intrinsic_is_helper_invocation: {
1611          nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL);
1612          nir_builder_instr_insert(&b, &instr->instr);
1613          break;
1614       }
1615       default:
1616          unreachable("not reached");
1617       }
1618 
1619       if (ir->return_deref)
1620          nir_store_deref(&b, evaluate_deref(ir->return_deref), ret, ~0);
1621 
1622       return;
1623    }
1624 
1625    struct hash_entry *entry =
1626       _mesa_hash_table_search(this->overload_table, ir->callee);
1627    assert(entry);
1628    nir_function *callee = (nir_function *) entry->data;
1629 
1630    nir_call_instr *call = nir_call_instr_create(this->shader, callee);
1631 
1632    unsigned i = 0;
1633    nir_deref_instr *ret_deref = NULL;
1634    if (ir->return_deref) {
1635       nir_variable *ret_tmp =
1636          nir_local_variable_create(this->impl, ir->return_deref->type,
1637                                    "return_tmp");
1638       ret_deref = nir_build_deref_var(&b, ret_tmp);
1639       call->params[i++] = nir_src_for_ssa(&ret_deref->dest.ssa);
1640    }
1641 
1642    foreach_two_lists(formal_node, &ir->callee->parameters,
1643                      actual_node, &ir->actual_parameters) {
1644       ir_rvalue *param_rvalue = (ir_rvalue *) actual_node;
1645       ir_variable *sig_param = (ir_variable *) formal_node;
1646 
1647       if (sig_param->data.mode == ir_var_function_out) {
1648          nir_deref_instr *out_deref = evaluate_deref(param_rvalue);
1649          call->params[i] = nir_src_for_ssa(&out_deref->dest.ssa);
1650       } else if (sig_param->data.mode == ir_var_function_in) {
1651          nir_ssa_def *val = evaluate_rvalue(param_rvalue);
1652          nir_src src = nir_src_for_ssa(val);
1653 
1654          nir_src_copy(&call->params[i], &src, call);
1655       } else if (sig_param->data.mode == ir_var_function_inout) {
1656          unreachable("unimplemented: inout parameters");
1657       }
1658 
1659       i++;
1660    }
1661 
1662    nir_builder_instr_insert(&b, &call->instr);
1663 
1664    if (ir->return_deref)
1665       nir_store_deref(&b, evaluate_deref(ir->return_deref), nir_load_deref(&b, ret_deref), ~0);
1666 }
1667 
1668 void
visit(ir_assignment * ir)1669 nir_visitor::visit(ir_assignment *ir)
1670 {
1671    unsigned num_components = ir->lhs->type->vector_elements;
1672 
1673    b.exact = ir->lhs->variable_referenced()->data.invariant ||
1674              ir->lhs->variable_referenced()->data.precise;
1675 
1676    if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) &&
1677        (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) {
1678       nir_deref_instr *lhs = evaluate_deref(ir->lhs);
1679       nir_deref_instr *rhs = evaluate_deref(ir->rhs);
1680       enum gl_access_qualifier lhs_qualifiers = deref_get_qualifier(lhs);
1681       enum gl_access_qualifier rhs_qualifiers = deref_get_qualifier(rhs);
1682       if (ir->condition) {
1683          nir_push_if(&b, evaluate_rvalue(ir->condition));
1684          nir_copy_deref_with_access(&b, lhs, rhs, lhs_qualifiers,
1685                                     rhs_qualifiers);
1686          nir_pop_if(&b, NULL);
1687       } else {
1688          nir_copy_deref_with_access(&b, lhs, rhs, lhs_qualifiers,
1689                                     rhs_qualifiers);
1690       }
1691       return;
1692    }
1693 
1694    assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector());
1695 
1696    ir->lhs->accept(this);
1697    nir_deref_instr *lhs_deref = this->deref;
1698    nir_ssa_def *src = evaluate_rvalue(ir->rhs);
1699 
1700    if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) {
1701       /* GLSL IR will give us the input to the write-masked assignment in a
1702        * single packed vector.  So, for example, if the writemask is xzw, then
1703        * we have to swizzle x -> x, y -> z, and z -> w and get the y component
1704        * from the load.
1705        */
1706       unsigned swiz[4];
1707       unsigned component = 0;
1708       for (unsigned i = 0; i < 4; i++) {
1709          swiz[i] = ir->write_mask & (1 << i) ? component++ : 0;
1710       }
1711       src = nir_swizzle(&b, src, swiz, num_components);
1712    }
1713 
1714    enum gl_access_qualifier qualifiers = deref_get_qualifier(lhs_deref);
1715    if (ir->condition) {
1716       nir_push_if(&b, evaluate_rvalue(ir->condition));
1717       nir_store_deref_with_access(&b, lhs_deref, src, ir->write_mask,
1718                                   qualifiers);
1719       nir_pop_if(&b, NULL);
1720    } else {
1721       nir_store_deref_with_access(&b, lhs_deref, src, ir->write_mask,
1722                                   qualifiers);
1723    }
1724 }
1725 
1726 /*
1727  * Given an instruction, returns a pointer to its destination or NULL if there
1728  * is no destination.
1729  *
1730  * Note that this only handles instructions we generate at this level.
1731  */
1732 static nir_dest *
get_instr_dest(nir_instr * instr)1733 get_instr_dest(nir_instr *instr)
1734 {
1735    nir_alu_instr *alu_instr;
1736    nir_intrinsic_instr *intrinsic_instr;
1737    nir_tex_instr *tex_instr;
1738 
1739    switch (instr->type) {
1740       case nir_instr_type_alu:
1741          alu_instr = nir_instr_as_alu(instr);
1742          return &alu_instr->dest.dest;
1743 
1744       case nir_instr_type_intrinsic:
1745          intrinsic_instr = nir_instr_as_intrinsic(instr);
1746          if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest)
1747             return &intrinsic_instr->dest;
1748          else
1749             return NULL;
1750 
1751       case nir_instr_type_tex:
1752          tex_instr = nir_instr_as_tex(instr);
1753          return &tex_instr->dest;
1754 
1755       default:
1756          unreachable("not reached");
1757    }
1758 
1759    return NULL;
1760 }
1761 
1762 void
add_instr(nir_instr * instr,unsigned num_components,unsigned bit_size)1763 nir_visitor::add_instr(nir_instr *instr, unsigned num_components,
1764                        unsigned bit_size)
1765 {
1766    nir_dest *dest = get_instr_dest(instr);
1767 
1768    if (dest)
1769       nir_ssa_dest_init(instr, dest, num_components, bit_size, NULL);
1770 
1771    nir_builder_instr_insert(&b, instr);
1772 
1773    if (dest) {
1774       assert(dest->is_ssa);
1775       this->result = &dest->ssa;
1776    }
1777 }
1778 
1779 nir_ssa_def *
evaluate_rvalue(ir_rvalue * ir)1780 nir_visitor::evaluate_rvalue(ir_rvalue* ir)
1781 {
1782    ir->accept(this);
1783    if (ir->as_dereference() || ir->as_constant()) {
1784       /*
1785        * A dereference is being used on the right hand side, which means we
1786        * must emit a variable load.
1787        */
1788 
1789       enum gl_access_qualifier access = deref_get_qualifier(this->deref);
1790       this->result = nir_load_deref_with_access(&b, this->deref, access);
1791    }
1792 
1793    return this->result;
1794 }
1795 
1796 static bool
type_is_float(glsl_base_type type)1797 type_is_float(glsl_base_type type)
1798 {
1799    return type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_DOUBLE ||
1800       type == GLSL_TYPE_FLOAT16;
1801 }
1802 
1803 static bool
type_is_signed(glsl_base_type type)1804 type_is_signed(glsl_base_type type)
1805 {
1806    return type == GLSL_TYPE_INT || type == GLSL_TYPE_INT64 ||
1807       type == GLSL_TYPE_INT16;
1808 }
1809 
1810 void
visit(ir_expression * ir)1811 nir_visitor::visit(ir_expression *ir)
1812 {
1813    /* Some special cases */
1814    switch (ir->operation) {
1815    case ir_unop_interpolate_at_centroid:
1816    case ir_binop_interpolate_at_offset:
1817    case ir_binop_interpolate_at_sample: {
1818       ir_dereference *deref = ir->operands[0]->as_dereference();
1819       ir_swizzle *swizzle = NULL;
1820       if (!deref) {
1821          /* the api does not allow a swizzle here, but the varying packing code
1822           * may have pushed one into here.
1823           */
1824          swizzle = ir->operands[0]->as_swizzle();
1825          assert(swizzle);
1826          deref = swizzle->val->as_dereference();
1827          assert(deref);
1828       }
1829 
1830       deref->accept(this);
1831 
1832       nir_intrinsic_op op;
1833       if (nir_deref_mode_is(this->deref, nir_var_shader_in)) {
1834          switch (ir->operation) {
1835          case ir_unop_interpolate_at_centroid:
1836             op = nir_intrinsic_interp_deref_at_centroid;
1837             break;
1838          case ir_binop_interpolate_at_offset:
1839             op = nir_intrinsic_interp_deref_at_offset;
1840             break;
1841          case ir_binop_interpolate_at_sample:
1842             op = nir_intrinsic_interp_deref_at_sample;
1843             break;
1844          default:
1845             unreachable("Invalid interpolation intrinsic");
1846          }
1847       } else {
1848          /* This case can happen if the vertex shader does not write the
1849           * given varying.  In this case, the linker will lower it to a
1850           * global variable.  Since interpolating a variable makes no
1851           * sense, we'll just turn it into a load which will probably
1852           * eventually end up as an SSA definition.
1853           */
1854          assert(nir_deref_mode_is(this->deref, nir_var_shader_temp));
1855          op = nir_intrinsic_load_deref;
1856       }
1857 
1858       nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op);
1859       intrin->num_components = deref->type->vector_elements;
1860       intrin->src[0] = nir_src_for_ssa(&this->deref->dest.ssa);
1861 
1862       if (intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
1863           intrin->intrinsic == nir_intrinsic_interp_deref_at_sample)
1864          intrin->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
1865 
1866       unsigned bit_size =  glsl_get_bit_size(deref->type);
1867       add_instr(&intrin->instr, deref->type->vector_elements, bit_size);
1868 
1869       if (swizzle) {
1870          unsigned swiz[4] = {
1871             swizzle->mask.x, swizzle->mask.y, swizzle->mask.z, swizzle->mask.w
1872          };
1873 
1874          result = nir_swizzle(&b, result, swiz,
1875                               swizzle->type->vector_elements);
1876       }
1877 
1878       return;
1879    }
1880 
1881    case ir_unop_ssbo_unsized_array_length: {
1882       nir_intrinsic_instr *intrin =
1883          nir_intrinsic_instr_create(b.shader,
1884                                     nir_intrinsic_deref_buffer_array_length);
1885 
1886       ir_dereference *deref = ir->operands[0]->as_dereference();
1887       intrin->src[0] = nir_src_for_ssa(&evaluate_deref(deref)->dest.ssa);
1888 
1889       add_instr(&intrin->instr, 1, 32);
1890       return;
1891    }
1892 
1893    case ir_binop_ubo_load:
1894       /* UBO loads should only have been lowered in GLSL IR for non-nir drivers,
1895        * NIR drivers make use of gl_nir_lower_buffers() instead.
1896        */
1897       unreachable("Invalid operation nir doesn't want lowered ubo loads");
1898    default:
1899       break;
1900    }
1901 
1902    nir_ssa_def *srcs[4];
1903    for (unsigned i = 0; i < ir->num_operands; i++)
1904       srcs[i] = evaluate_rvalue(ir->operands[i]);
1905 
1906    glsl_base_type types[4];
1907    for (unsigned i = 0; i < ir->num_operands; i++)
1908       types[i] = ir->operands[i]->type->base_type;
1909 
1910    glsl_base_type out_type = ir->type->base_type;
1911 
1912    switch (ir->operation) {
1913    case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break;
1914    case ir_unop_logic_not:
1915       result = nir_inot(&b, srcs[0]);
1916       break;
1917    case ir_unop_neg:
1918       result = type_is_float(types[0]) ? nir_fneg(&b, srcs[0])
1919                                        : nir_ineg(&b, srcs[0]);
1920       break;
1921    case ir_unop_abs:
1922       result = type_is_float(types[0]) ? nir_fabs(&b, srcs[0])
1923                                        : nir_iabs(&b, srcs[0]);
1924       break;
1925    case ir_unop_clz:
1926       result = nir_uclz(&b, srcs[0]);
1927       break;
1928    case ir_unop_saturate:
1929       assert(type_is_float(types[0]));
1930       result = nir_fsat(&b, srcs[0]);
1931       break;
1932    case ir_unop_sign:
1933       result = type_is_float(types[0]) ? nir_fsign(&b, srcs[0])
1934                                        : nir_isign(&b, srcs[0]);
1935       break;
1936    case ir_unop_rcp:  result = nir_frcp(&b, srcs[0]);  break;
1937    case ir_unop_rsq:  result = nir_frsq(&b, srcs[0]);  break;
1938    case ir_unop_sqrt: result = nir_fsqrt(&b, srcs[0]); break;
1939    case ir_unop_exp:  unreachable("ir_unop_exp should have been lowered");
1940    case ir_unop_log:  unreachable("ir_unop_log should have been lowered");
1941    case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break;
1942    case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break;
1943    case ir_unop_i2f:
1944    case ir_unop_u2f:
1945    case ir_unop_b2f:
1946    case ir_unop_f2i:
1947    case ir_unop_f2u:
1948    case ir_unop_f2b:
1949    case ir_unop_i2b:
1950    case ir_unop_b2i:
1951    case ir_unop_b2i64:
1952    case ir_unop_d2f:
1953    case ir_unop_f2d:
1954    case ir_unop_f162f:
1955    case ir_unop_f2f16:
1956    case ir_unop_f162b:
1957    case ir_unop_b2f16:
1958    case ir_unop_i2i:
1959    case ir_unop_u2u:
1960    case ir_unop_d2i:
1961    case ir_unop_d2u:
1962    case ir_unop_d2b:
1963    case ir_unop_i2d:
1964    case ir_unop_u2d:
1965    case ir_unop_i642i:
1966    case ir_unop_i642u:
1967    case ir_unop_i642f:
1968    case ir_unop_i642b:
1969    case ir_unop_i642d:
1970    case ir_unop_u642i:
1971    case ir_unop_u642u:
1972    case ir_unop_u642f:
1973    case ir_unop_u642d:
1974    case ir_unop_i2i64:
1975    case ir_unop_u2i64:
1976    case ir_unop_f2i64:
1977    case ir_unop_d2i64:
1978    case ir_unop_i2u64:
1979    case ir_unop_u2u64:
1980    case ir_unop_f2u64:
1981    case ir_unop_d2u64:
1982    case ir_unop_i2u:
1983    case ir_unop_u2i:
1984    case ir_unop_i642u64:
1985    case ir_unop_u642i64: {
1986       nir_alu_type src_type = nir_get_nir_type_for_glsl_base_type(types[0]);
1987       nir_alu_type dst_type = nir_get_nir_type_for_glsl_base_type(out_type);
1988       result = nir_build_alu(&b, nir_type_conversion_op(src_type, dst_type,
1989                                  nir_rounding_mode_undef),
1990                                  srcs[0], NULL, NULL, NULL);
1991       /* b2i and b2f don't have fixed bit-size versions so the builder will
1992        * just assume 32 and we have to fix it up here.
1993        */
1994       result->bit_size = nir_alu_type_get_type_size(dst_type);
1995       break;
1996    }
1997 
1998    case ir_unop_f2fmp: {
1999       result = nir_build_alu(&b, nir_op_f2fmp, srcs[0], NULL, NULL, NULL);
2000       break;
2001    }
2002 
2003    case ir_unop_i2imp: {
2004       result = nir_build_alu(&b, nir_op_i2imp, srcs[0], NULL, NULL, NULL);
2005       break;
2006    }
2007 
2008    case ir_unop_u2ump: {
2009       result = nir_build_alu(&b, nir_op_i2imp, srcs[0], NULL, NULL, NULL);
2010       break;
2011    }
2012 
2013    case ir_unop_bitcast_i2f:
2014    case ir_unop_bitcast_f2i:
2015    case ir_unop_bitcast_u2f:
2016    case ir_unop_bitcast_f2u:
2017    case ir_unop_bitcast_i642d:
2018    case ir_unop_bitcast_d2i64:
2019    case ir_unop_bitcast_u642d:
2020    case ir_unop_bitcast_d2u64:
2021    case ir_unop_subroutine_to_int:
2022       /* no-op */
2023       result = nir_mov(&b, srcs[0]);
2024       break;
2025    case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break;
2026    case ir_unop_ceil:  result = nir_fceil(&b, srcs[0]); break;
2027    case ir_unop_floor: result = nir_ffloor(&b, srcs[0]); break;
2028    case ir_unop_fract: result = nir_ffract(&b, srcs[0]); break;
2029    case ir_unop_frexp_exp: result = nir_frexp_exp(&b, srcs[0]); break;
2030    case ir_unop_frexp_sig: result = nir_frexp_sig(&b, srcs[0]); break;
2031    case ir_unop_round_even: result = nir_fround_even(&b, srcs[0]); break;
2032    case ir_unop_sin:   result = nir_fsin(&b, srcs[0]); break;
2033    case ir_unop_cos:   result = nir_fcos(&b, srcs[0]); break;
2034    case ir_unop_dFdx:        result = nir_fddx(&b, srcs[0]); break;
2035    case ir_unop_dFdy:        result = nir_fddy(&b, srcs[0]); break;
2036    case ir_unop_dFdx_fine:   result = nir_fddx_fine(&b, srcs[0]); break;
2037    case ir_unop_dFdy_fine:   result = nir_fddy_fine(&b, srcs[0]); break;
2038    case ir_unop_dFdx_coarse: result = nir_fddx_coarse(&b, srcs[0]); break;
2039    case ir_unop_dFdy_coarse: result = nir_fddy_coarse(&b, srcs[0]); break;
2040    case ir_unop_pack_snorm_2x16:
2041       result = nir_pack_snorm_2x16(&b, srcs[0]);
2042       break;
2043    case ir_unop_pack_snorm_4x8:
2044       result = nir_pack_snorm_4x8(&b, srcs[0]);
2045       break;
2046    case ir_unop_pack_unorm_2x16:
2047       result = nir_pack_unorm_2x16(&b, srcs[0]);
2048       break;
2049    case ir_unop_pack_unorm_4x8:
2050       result = nir_pack_unorm_4x8(&b, srcs[0]);
2051       break;
2052    case ir_unop_pack_half_2x16:
2053       result = nir_pack_half_2x16(&b, srcs[0]);
2054       break;
2055    case ir_unop_unpack_snorm_2x16:
2056       result = nir_unpack_snorm_2x16(&b, srcs[0]);
2057       break;
2058    case ir_unop_unpack_snorm_4x8:
2059       result = nir_unpack_snorm_4x8(&b, srcs[0]);
2060       break;
2061    case ir_unop_unpack_unorm_2x16:
2062       result = nir_unpack_unorm_2x16(&b, srcs[0]);
2063       break;
2064    case ir_unop_unpack_unorm_4x8:
2065       result = nir_unpack_unorm_4x8(&b, srcs[0]);
2066       break;
2067    case ir_unop_unpack_half_2x16:
2068       result = nir_unpack_half_2x16(&b, srcs[0]);
2069       break;
2070    case ir_unop_pack_sampler_2x32:
2071    case ir_unop_pack_image_2x32:
2072    case ir_unop_pack_double_2x32:
2073    case ir_unop_pack_int_2x32:
2074    case ir_unop_pack_uint_2x32:
2075       result = nir_pack_64_2x32(&b, srcs[0]);
2076       break;
2077    case ir_unop_unpack_sampler_2x32:
2078    case ir_unop_unpack_image_2x32:
2079    case ir_unop_unpack_double_2x32:
2080    case ir_unop_unpack_int_2x32:
2081    case ir_unop_unpack_uint_2x32:
2082       result = nir_unpack_64_2x32(&b, srcs[0]);
2083       break;
2084    case ir_unop_bitfield_reverse:
2085       result = nir_bitfield_reverse(&b, srcs[0]);
2086       break;
2087    case ir_unop_bit_count:
2088       result = nir_bit_count(&b, srcs[0]);
2089       break;
2090    case ir_unop_find_msb:
2091       switch (types[0]) {
2092       case GLSL_TYPE_UINT:
2093          result = nir_ufind_msb(&b, srcs[0]);
2094          break;
2095       case GLSL_TYPE_INT:
2096          result = nir_ifind_msb(&b, srcs[0]);
2097          break;
2098       default:
2099          unreachable("Invalid type for findMSB()");
2100       }
2101       break;
2102    case ir_unop_find_lsb:
2103       result = nir_find_lsb(&b, srcs[0]);
2104       break;
2105 
2106    case ir_unop_get_buffer_size: {
2107       nir_intrinsic_instr *load = nir_intrinsic_instr_create(
2108          this->shader,
2109          nir_intrinsic_get_ssbo_size);
2110       load->num_components = ir->type->vector_elements;
2111       load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
2112       unsigned bit_size = glsl_get_bit_size(ir->type);
2113       add_instr(&load->instr, ir->type->vector_elements, bit_size);
2114       return;
2115    }
2116 
2117    case ir_unop_atan:
2118       result = nir_atan(&b, srcs[0]);
2119       break;
2120 
2121    case ir_binop_add:
2122       result = type_is_float(out_type) ? nir_fadd(&b, srcs[0], srcs[1])
2123                                        : nir_iadd(&b, srcs[0], srcs[1]);
2124       break;
2125    case ir_binop_add_sat:
2126       result = type_is_signed(out_type) ? nir_iadd_sat(&b, srcs[0], srcs[1])
2127                                         : nir_uadd_sat(&b, srcs[0], srcs[1]);
2128       break;
2129    case ir_binop_sub:
2130       result = type_is_float(out_type) ? nir_fsub(&b, srcs[0], srcs[1])
2131                                        : nir_isub(&b, srcs[0], srcs[1]);
2132       break;
2133    case ir_binop_sub_sat:
2134       result = type_is_signed(out_type) ? nir_isub_sat(&b, srcs[0], srcs[1])
2135                                         : nir_usub_sat(&b, srcs[0], srcs[1]);
2136       break;
2137    case ir_binop_abs_sub:
2138       /* out_type is always unsigned for ir_binop_abs_sub, so we have to key
2139        * on the type of the sources.
2140        */
2141       result = type_is_signed(types[0]) ? nir_uabs_isub(&b, srcs[0], srcs[1])
2142                                         : nir_uabs_usub(&b, srcs[0], srcs[1]);
2143       break;
2144    case ir_binop_avg:
2145       result = type_is_signed(out_type) ? nir_ihadd(&b, srcs[0], srcs[1])
2146                                         : nir_uhadd(&b, srcs[0], srcs[1]);
2147       break;
2148    case ir_binop_avg_round:
2149       result = type_is_signed(out_type) ? nir_irhadd(&b, srcs[0], srcs[1])
2150                                         : nir_urhadd(&b, srcs[0], srcs[1]);
2151       break;
2152    case ir_binop_mul_32x16:
2153       result = type_is_signed(out_type) ? nir_imul_32x16(&b, srcs[0], srcs[1])
2154                                         : nir_umul_32x16(&b, srcs[0], srcs[1]);
2155       break;
2156    case ir_binop_mul:
2157       if (type_is_float(out_type))
2158          result = nir_fmul(&b, srcs[0], srcs[1]);
2159       else if (out_type == GLSL_TYPE_INT64 &&
2160                (ir->operands[0]->type->base_type == GLSL_TYPE_INT ||
2161                 ir->operands[1]->type->base_type == GLSL_TYPE_INT))
2162          result = nir_imul_2x32_64(&b, srcs[0], srcs[1]);
2163       else if (out_type == GLSL_TYPE_UINT64 &&
2164                (ir->operands[0]->type->base_type == GLSL_TYPE_UINT ||
2165                 ir->operands[1]->type->base_type == GLSL_TYPE_UINT))
2166          result = nir_umul_2x32_64(&b, srcs[0], srcs[1]);
2167       else
2168          result = nir_imul(&b, srcs[0], srcs[1]);
2169       break;
2170    case ir_binop_div:
2171       if (type_is_float(out_type))
2172          result = nir_fdiv(&b, srcs[0], srcs[1]);
2173       else if (type_is_signed(out_type))
2174          result = nir_idiv(&b, srcs[0], srcs[1]);
2175       else
2176          result = nir_udiv(&b, srcs[0], srcs[1]);
2177       break;
2178    case ir_binop_mod:
2179       result = type_is_float(out_type) ? nir_fmod(&b, srcs[0], srcs[1])
2180                                        : nir_umod(&b, srcs[0], srcs[1]);
2181       break;
2182    case ir_binop_min:
2183       if (type_is_float(out_type))
2184          result = nir_fmin(&b, srcs[0], srcs[1]);
2185       else if (type_is_signed(out_type))
2186          result = nir_imin(&b, srcs[0], srcs[1]);
2187       else
2188          result = nir_umin(&b, srcs[0], srcs[1]);
2189       break;
2190    case ir_binop_max:
2191       if (type_is_float(out_type))
2192          result = nir_fmax(&b, srcs[0], srcs[1]);
2193       else if (type_is_signed(out_type))
2194          result = nir_imax(&b, srcs[0], srcs[1]);
2195       else
2196          result = nir_umax(&b, srcs[0], srcs[1]);
2197       break;
2198    case ir_binop_pow: result = nir_fpow(&b, srcs[0], srcs[1]); break;
2199    case ir_binop_bit_and: result = nir_iand(&b, srcs[0], srcs[1]); break;
2200    case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break;
2201    case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;
2202    case ir_binop_logic_and:
2203       result = nir_iand(&b, srcs[0], srcs[1]);
2204       break;
2205    case ir_binop_logic_or:
2206       result = nir_ior(&b, srcs[0], srcs[1]);
2207       break;
2208    case ir_binop_logic_xor:
2209       result = nir_ixor(&b, srcs[0], srcs[1]);
2210       break;
2211    case ir_binop_lshift: result = nir_ishl(&b, srcs[0], nir_u2u32(&b, srcs[1])); break;
2212    case ir_binop_rshift:
2213       result = (type_is_signed(out_type)) ? nir_ishr(&b, srcs[0], nir_u2u32(&b, srcs[1]))
2214                                           : nir_ushr(&b, srcs[0], nir_u2u32(&b, srcs[1]));
2215       break;
2216    case ir_binop_imul_high:
2217       result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1])
2218                                            : nir_umul_high(&b, srcs[0], srcs[1]);
2219       break;
2220    case ir_binop_carry:  result = nir_uadd_carry(&b, srcs[0], srcs[1]);  break;
2221    case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break;
2222    case ir_binop_less:
2223       if (type_is_float(types[0]))
2224          result = nir_flt(&b, srcs[0], srcs[1]);
2225       else if (type_is_signed(types[0]))
2226          result = nir_ilt(&b, srcs[0], srcs[1]);
2227       else
2228          result = nir_ult(&b, srcs[0], srcs[1]);
2229       break;
2230    case ir_binop_gequal:
2231       if (type_is_float(types[0]))
2232          result = nir_fge(&b, srcs[0], srcs[1]);
2233       else if (type_is_signed(types[0]))
2234          result = nir_ige(&b, srcs[0], srcs[1]);
2235       else
2236          result = nir_uge(&b, srcs[0], srcs[1]);
2237       break;
2238    case ir_binop_equal:
2239       if (type_is_float(types[0]))
2240          result = nir_feq(&b, srcs[0], srcs[1]);
2241       else
2242          result = nir_ieq(&b, srcs[0], srcs[1]);
2243       break;
2244    case ir_binop_nequal:
2245       if (type_is_float(types[0]))
2246          result = nir_fneu(&b, srcs[0], srcs[1]);
2247       else
2248          result = nir_ine(&b, srcs[0], srcs[1]);
2249       break;
2250    case ir_binop_all_equal:
2251       if (type_is_float(types[0])) {
2252          switch (ir->operands[0]->type->vector_elements) {
2253             case 1: result = nir_feq(&b, srcs[0], srcs[1]); break;
2254             case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break;
2255             case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break;
2256             case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break;
2257             default:
2258                unreachable("not reached");
2259          }
2260       } else {
2261          switch (ir->operands[0]->type->vector_elements) {
2262             case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break;
2263             case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break;
2264             case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break;
2265             case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break;
2266             default:
2267                unreachable("not reached");
2268          }
2269       }
2270       break;
2271    case ir_binop_any_nequal:
2272       if (type_is_float(types[0])) {
2273          switch (ir->operands[0]->type->vector_elements) {
2274             case 1: result = nir_fneu(&b, srcs[0], srcs[1]); break;
2275             case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;
2276             case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break;
2277             case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break;
2278             default:
2279                unreachable("not reached");
2280          }
2281       } else {
2282          switch (ir->operands[0]->type->vector_elements) {
2283             case 1: result = nir_ine(&b, srcs[0], srcs[1]); break;
2284             case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break;
2285             case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break;
2286             case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break;
2287             default:
2288                unreachable("not reached");
2289          }
2290       }
2291       break;
2292    case ir_binop_dot:
2293       switch (ir->operands[0]->type->vector_elements) {
2294          case 2: result = nir_fdot2(&b, srcs[0], srcs[1]); break;
2295          case 3: result = nir_fdot3(&b, srcs[0], srcs[1]); break;
2296          case 4: result = nir_fdot4(&b, srcs[0], srcs[1]); break;
2297          default:
2298             unreachable("not reached");
2299       }
2300       break;
2301    case ir_binop_vector_extract: {
2302       result = nir_channel(&b, srcs[0], 0);
2303       for (unsigned i = 1; i < ir->operands[0]->type->vector_elements; i++) {
2304          nir_ssa_def *swizzled = nir_channel(&b, srcs[0], i);
2305          result = nir_bcsel(&b, nir_ieq_imm(&b, srcs[1], i),
2306                             swizzled, result);
2307       }
2308       break;
2309    }
2310 
2311    case ir_binop_atan2:
2312       result = nir_atan2(&b, srcs[0], srcs[1]);
2313       break;
2314 
2315    case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break;
2316    case ir_triop_fma:
2317       result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]);
2318       break;
2319    case ir_triop_lrp:
2320       result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]);
2321       break;
2322    case ir_triop_csel:
2323       result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]);
2324       break;
2325    case ir_triop_bitfield_extract:
2326       result = ir->type->is_int_16_32() ?
2327          nir_ibitfield_extract(&b, nir_i2i32(&b, srcs[0]), nir_i2i32(&b, srcs[1]), nir_i2i32(&b, srcs[2])) :
2328          nir_ubitfield_extract(&b, nir_u2u32(&b, srcs[0]), nir_i2i32(&b, srcs[1]), nir_i2i32(&b, srcs[2]));
2329       break;
2330    case ir_quadop_bitfield_insert:
2331       result = nir_bitfield_insert(&b,
2332                                    nir_u2u32(&b, srcs[0]), nir_u2u32(&b, srcs[1]),
2333                                    nir_i2i32(&b, srcs[2]), nir_i2i32(&b, srcs[3]));
2334       break;
2335    case ir_quadop_vector:
2336       result = nir_vec(&b, srcs, ir->type->vector_elements);
2337       break;
2338 
2339    default:
2340       unreachable("not reached");
2341    }
2342 }
2343 
2344 void
visit(ir_swizzle * ir)2345 nir_visitor::visit(ir_swizzle *ir)
2346 {
2347    unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w };
2348    result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle,
2349                         ir->type->vector_elements);
2350 }
2351 
2352 void
visit(ir_texture * ir)2353 nir_visitor::visit(ir_texture *ir)
2354 {
2355    unsigned num_srcs;
2356    nir_texop op;
2357    switch (ir->op) {
2358    case ir_tex:
2359       op = nir_texop_tex;
2360       num_srcs = 1; /* coordinate */
2361       break;
2362 
2363    case ir_txb:
2364    case ir_txl:
2365       op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl;
2366       num_srcs = 2; /* coordinate, bias/lod */
2367       break;
2368 
2369    case ir_txd:
2370       op = nir_texop_txd; /* coordinate, dPdx, dPdy */
2371       num_srcs = 3;
2372       break;
2373 
2374    case ir_txf:
2375       op = nir_texop_txf;
2376       if (ir->lod_info.lod != NULL)
2377          num_srcs = 2; /* coordinate, lod */
2378       else
2379          num_srcs = 1; /* coordinate */
2380       break;
2381 
2382    case ir_txf_ms:
2383       op = nir_texop_txf_ms;
2384       num_srcs = 2; /* coordinate, sample_index */
2385       break;
2386 
2387    case ir_txs:
2388       op = nir_texop_txs;
2389       if (ir->lod_info.lod != NULL)
2390          num_srcs = 1; /* lod */
2391       else
2392          num_srcs = 0;
2393       break;
2394 
2395    case ir_lod:
2396       op = nir_texop_lod;
2397       num_srcs = 1; /* coordinate */
2398       break;
2399 
2400    case ir_tg4:
2401       op = nir_texop_tg4;
2402       num_srcs = 1; /* coordinate */
2403       break;
2404 
2405    case ir_query_levels:
2406       op = nir_texop_query_levels;
2407       num_srcs = 0;
2408       break;
2409 
2410    case ir_texture_samples:
2411       op = nir_texop_texture_samples;
2412       num_srcs = 0;
2413       break;
2414 
2415    case ir_samples_identical:
2416       op = nir_texop_samples_identical;
2417       num_srcs = 1; /* coordinate */
2418       break;
2419 
2420    default:
2421       unreachable("not reached");
2422    }
2423 
2424    if (ir->projector != NULL)
2425       num_srcs++;
2426    if (ir->shadow_comparator != NULL)
2427       num_srcs++;
2428    /* offsets are constants we store inside nir_tex_intrs.offsets */
2429    if (ir->offset != NULL && !ir->offset->type->is_array())
2430       num_srcs++;
2431 
2432    /* Add one for the texture deref */
2433    num_srcs += 2;
2434 
2435    nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
2436 
2437    instr->op = op;
2438    instr->sampler_dim =
2439       (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality;
2440    instr->is_array = ir->sampler->type->sampler_array;
2441    instr->is_shadow = ir->sampler->type->sampler_shadow;
2442    if (instr->is_shadow)
2443       instr->is_new_style_shadow = (ir->type->vector_elements == 1);
2444    switch (ir->type->base_type) {
2445    case GLSL_TYPE_FLOAT:
2446       instr->dest_type = nir_type_float;
2447       break;
2448    case GLSL_TYPE_FLOAT16:
2449       instr->dest_type = nir_type_float16;
2450       break;
2451    case GLSL_TYPE_INT16:
2452       instr->dest_type = nir_type_int16;
2453       break;
2454    case GLSL_TYPE_UINT16:
2455       instr->dest_type = nir_type_uint16;
2456       break;
2457    case GLSL_TYPE_INT:
2458       instr->dest_type = nir_type_int;
2459       break;
2460    case GLSL_TYPE_BOOL:
2461    case GLSL_TYPE_UINT:
2462       instr->dest_type = nir_type_uint;
2463       break;
2464    default:
2465       unreachable("not reached");
2466    }
2467 
2468    nir_deref_instr *sampler_deref = evaluate_deref(ir->sampler);
2469 
2470    /* check for bindless handles */
2471    if (!nir_deref_mode_is(sampler_deref, nir_var_uniform) ||
2472        nir_deref_instr_get_variable(sampler_deref)->data.bindless) {
2473       nir_ssa_def *load = nir_load_deref(&b, sampler_deref);
2474       instr->src[0].src = nir_src_for_ssa(load);
2475       instr->src[0].src_type = nir_tex_src_texture_handle;
2476       instr->src[1].src = nir_src_for_ssa(load);
2477       instr->src[1].src_type = nir_tex_src_sampler_handle;
2478    } else {
2479       instr->src[0].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
2480       instr->src[0].src_type = nir_tex_src_texture_deref;
2481       instr->src[1].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
2482       instr->src[1].src_type = nir_tex_src_sampler_deref;
2483    }
2484 
2485    unsigned src_number = 2;
2486 
2487    if (ir->coordinate != NULL) {
2488       instr->coord_components = ir->coordinate->type->vector_elements;
2489       instr->src[src_number].src =
2490          nir_src_for_ssa(evaluate_rvalue(ir->coordinate));
2491       instr->src[src_number].src_type = nir_tex_src_coord;
2492       src_number++;
2493    }
2494 
2495    if (ir->projector != NULL) {
2496       instr->src[src_number].src =
2497          nir_src_for_ssa(evaluate_rvalue(ir->projector));
2498       instr->src[src_number].src_type = nir_tex_src_projector;
2499       src_number++;
2500    }
2501 
2502    if (ir->shadow_comparator != NULL) {
2503       instr->src[src_number].src =
2504          nir_src_for_ssa(evaluate_rvalue(ir->shadow_comparator));
2505       instr->src[src_number].src_type = nir_tex_src_comparator;
2506       src_number++;
2507    }
2508 
2509    if (ir->offset != NULL) {
2510       if (ir->offset->type->is_array()) {
2511          for (int i = 0; i < ir->offset->type->array_size(); i++) {
2512             const ir_constant *c =
2513                ir->offset->as_constant()->get_array_element(i);
2514 
2515             for (unsigned j = 0; j < 2; ++j) {
2516                int val = c->get_int_component(j);
2517                assert(val <= 31 && val >= -32);
2518                instr->tg4_offsets[i][j] = val;
2519             }
2520          }
2521       } else {
2522          assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar());
2523 
2524          instr->src[src_number].src =
2525             nir_src_for_ssa(evaluate_rvalue(ir->offset));
2526          instr->src[src_number].src_type = nir_tex_src_offset;
2527          src_number++;
2528       }
2529    }
2530 
2531    switch (ir->op) {
2532    case ir_txb:
2533       instr->src[src_number].src =
2534          nir_src_for_ssa(evaluate_rvalue(ir->lod_info.bias));
2535       instr->src[src_number].src_type = nir_tex_src_bias;
2536       src_number++;
2537       break;
2538 
2539    case ir_txl:
2540    case ir_txf:
2541    case ir_txs:
2542       if (ir->lod_info.lod != NULL) {
2543          instr->src[src_number].src =
2544             nir_src_for_ssa(evaluate_rvalue(ir->lod_info.lod));
2545          instr->src[src_number].src_type = nir_tex_src_lod;
2546          src_number++;
2547       }
2548       break;
2549 
2550    case ir_txd:
2551       instr->src[src_number].src =
2552          nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdx));
2553       instr->src[src_number].src_type = nir_tex_src_ddx;
2554       src_number++;
2555       instr->src[src_number].src =
2556          nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdy));
2557       instr->src[src_number].src_type = nir_tex_src_ddy;
2558       src_number++;
2559       break;
2560 
2561    case ir_txf_ms:
2562       instr->src[src_number].src =
2563          nir_src_for_ssa(evaluate_rvalue(ir->lod_info.sample_index));
2564       instr->src[src_number].src_type = nir_tex_src_ms_index;
2565       src_number++;
2566       break;
2567 
2568    case ir_tg4:
2569       instr->component = ir->lod_info.component->as_constant()->value.u[0];
2570       break;
2571 
2572    default:
2573       break;
2574    }
2575 
2576    assert(src_number == num_srcs);
2577 
2578    unsigned bit_size = glsl_get_bit_size(ir->type);
2579    add_instr(&instr->instr, nir_tex_instr_dest_size(instr), bit_size);
2580 }
2581 
2582 void
visit(ir_constant * ir)2583 nir_visitor::visit(ir_constant *ir)
2584 {
2585    /*
2586     * We don't know if this variable is an array or struct that gets
2587     * dereferenced, so do the safe thing an make it a variable with a
2588     * constant initializer and return a dereference.
2589     */
2590 
2591    nir_variable *var =
2592       nir_local_variable_create(this->impl, ir->type, "const_temp");
2593    var->data.read_only = true;
2594    var->constant_initializer = constant_copy(ir, var);
2595 
2596    this->deref = nir_build_deref_var(&b, var);
2597 }
2598 
2599 void
visit(ir_dereference_variable * ir)2600 nir_visitor::visit(ir_dereference_variable *ir)
2601 {
2602    if (ir->variable_referenced()->data.mode == ir_var_function_out) {
2603       unsigned i = (sig->return_type != glsl_type::void_type) ? 1 : 0;
2604 
2605       foreach_in_list(ir_variable, param, &sig->parameters) {
2606          if (param == ir->variable_referenced()) {
2607             break;
2608          }
2609          i++;
2610       }
2611 
2612       this->deref = nir_build_deref_cast(&b, nir_load_param(&b, i),
2613                                          nir_var_function_temp, ir->type, 0);
2614       return;
2615    }
2616 
2617    assert(ir->variable_referenced()->data.mode != ir_var_function_inout);
2618 
2619    struct hash_entry *entry =
2620       _mesa_hash_table_search(this->var_table, ir->var);
2621    assert(entry);
2622    nir_variable *var = (nir_variable *) entry->data;
2623 
2624    this->deref = nir_build_deref_var(&b, var);
2625 }
2626 
2627 void
visit(ir_dereference_record * ir)2628 nir_visitor::visit(ir_dereference_record *ir)
2629 {
2630    ir->record->accept(this);
2631 
2632    int field_index = ir->field_idx;
2633    assert(field_index >= 0);
2634 
2635    this->deref = nir_build_deref_struct(&b, this->deref, field_index);
2636 }
2637 
2638 void
visit(ir_dereference_array * ir)2639 nir_visitor::visit(ir_dereference_array *ir)
2640 {
2641    nir_ssa_def *index = evaluate_rvalue(ir->array_index);
2642 
2643    ir->array->accept(this);
2644 
2645    this->deref = nir_build_deref_array(&b, this->deref, index);
2646 }
2647 
2648 void
visit(ir_barrier *)2649 nir_visitor::visit(ir_barrier *)
2650 {
2651    if (shader->info.stage == MESA_SHADER_COMPUTE) {
2652       nir_intrinsic_instr *shared_barrier =
2653          nir_intrinsic_instr_create(this->shader,
2654                                     nir_intrinsic_memory_barrier_shared);
2655       nir_builder_instr_insert(&b, &shared_barrier->instr);
2656    } else if (shader->info.stage == MESA_SHADER_TESS_CTRL) {
2657       nir_intrinsic_instr *patch_barrier =
2658          nir_intrinsic_instr_create(this->shader,
2659                                     nir_intrinsic_memory_barrier_tcs_patch);
2660       nir_builder_instr_insert(&b, &patch_barrier->instr);
2661    }
2662 
2663    nir_intrinsic_instr *instr =
2664       nir_intrinsic_instr_create(this->shader, nir_intrinsic_control_barrier);
2665    nir_builder_instr_insert(&b, &instr->instr);
2666 }
2667 
2668 nir_shader *
glsl_float64_funcs_to_nir(struct gl_context * ctx,const nir_shader_compiler_options * options)2669 glsl_float64_funcs_to_nir(struct gl_context *ctx,
2670                           const nir_shader_compiler_options *options)
2671 {
2672    /* We pretend it's a vertex shader.  Ultimately, the stage shouldn't
2673     * matter because we're not optimizing anything here.
2674     */
2675    struct gl_shader *sh = _mesa_new_shader(-1, MESA_SHADER_VERTEX);
2676    sh->Source = float64_source;
2677    sh->CompileStatus = COMPILE_FAILURE;
2678    _mesa_glsl_compile_shader(ctx, sh, false, false, true);
2679 
2680    if (!sh->CompileStatus) {
2681       if (sh->InfoLog) {
2682          _mesa_problem(ctx,
2683                        "fp64 software impl compile failed:\n%s\nsource:\n%s\n",
2684                        sh->InfoLog, float64_source);
2685       }
2686       return NULL;
2687    }
2688 
2689    nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_VERTEX, options, NULL);
2690 
2691    nir_visitor v1(ctx, nir);
2692    nir_function_visitor v2(&v1);
2693    v2.run(sh->ir);
2694    visit_exec_list(sh->ir, &v1);
2695 
2696    /* _mesa_delete_shader will try to free sh->Source but it's static const */
2697    sh->Source = NULL;
2698    _mesa_delete_shader(ctx, sh);
2699 
2700    nir_validate_shader(nir, "float64_funcs_to_nir");
2701 
2702    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
2703    NIR_PASS_V(nir, nir_lower_returns);
2704    NIR_PASS_V(nir, nir_inline_functions);
2705    NIR_PASS_V(nir, nir_opt_deref);
2706 
2707    /* Do some optimizations to clean up the shader now.  By optimizing the
2708     * functions in the library, we avoid having to re-do that work every
2709     * time we inline a copy of a function.  Reducing basic blocks also helps
2710     * with compile times.
2711     */
2712    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2713    NIR_PASS_V(nir, nir_copy_prop);
2714    NIR_PASS_V(nir, nir_opt_dce);
2715    NIR_PASS_V(nir, nir_opt_cse);
2716    NIR_PASS_V(nir, nir_opt_gcm, true);
2717    NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
2718    NIR_PASS_V(nir, nir_opt_dce);
2719 
2720    return nir;
2721 }
2722