• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott (cwabbott0@gmail.com)
25  *
26  */
27 
28 #include "float64_glsl.h"
29 #include "glsl_to_nir.h"
30 #include "ir_visitor.h"
31 #include "ir_hierarchical_visitor.h"
32 #include "ir.h"
33 #include "ir_optimization.h"
34 #include "program.h"
35 #include "compiler/nir/nir_control_flow.h"
36 #include "compiler/nir/nir_builder.h"
37 #include "compiler/nir/nir_builtin_builder.h"
38 #include "compiler/nir/nir_deref.h"
39 #include "main/errors.h"
40 #include "main/mtypes.h"
41 #include "main/shaderobj.h"
42 #include "main/context.h"
43 #include "util/u_math.h"
44 
45 /*
46  * pass to lower GLSL IR to NIR
47  *
48  * This will lower variable dereferences to loads/stores of corresponding
49  * variables in NIR - the variables will be converted to registers in a later
50  * pass.
51  */
52 
53 namespace {
54 
55 class nir_visitor : public ir_visitor
56 {
57 public:
58    nir_visitor(const struct gl_constants *consts, nir_shader *shader);
59    ~nir_visitor();
60 
61    virtual void visit(ir_variable *);
62    virtual void visit(ir_function *);
63    virtual void visit(ir_function_signature *);
64    virtual void visit(ir_loop *);
65    virtual void visit(ir_if *);
66    virtual void visit(ir_discard *);
67    virtual void visit(ir_demote *);
68    virtual void visit(ir_loop_jump *);
69    virtual void visit(ir_return *);
70    virtual void visit(ir_call *);
71    virtual void visit(ir_assignment *);
72    virtual void visit(ir_emit_vertex *);
73    virtual void visit(ir_end_primitive *);
74    virtual void visit(ir_expression *);
75    virtual void visit(ir_swizzle *);
76    virtual void visit(ir_texture *);
77    virtual void visit(ir_constant *);
78    virtual void visit(ir_dereference_variable *);
79    virtual void visit(ir_dereference_record *);
80    virtual void visit(ir_dereference_array *);
81    virtual void visit(ir_barrier *);
82 
83    void create_function(ir_function_signature *ir);
84 
85    /* True if we have any output rvalues */
86    bool has_output_rvalue;
87 
88 private:
89    void add_instr(nir_instr *instr, unsigned num_components, unsigned bit_size);
90    nir_ssa_def *evaluate_rvalue(ir_rvalue *ir);
91 
92    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs);
93    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1);
94    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
95                        nir_ssa_def *src2);
96    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
97                        nir_ssa_def *src2, nir_ssa_def *src3);
98 
99    bool supports_std430;
100 
101    nir_shader *shader;
102    nir_function_impl *impl;
103    nir_builder b;
104    nir_ssa_def *result; /* result of the expression tree last visited */
105 
106    nir_deref_instr *evaluate_deref(ir_instruction *ir);
107 
108    nir_constant *constant_copy(ir_constant *ir, void *mem_ctx);
109 
110    /* most recent deref instruction created */
111    nir_deref_instr *deref;
112 
113    /* whether the IR we're operating on is per-function or global */
114    bool is_global;
115 
116    ir_function_signature *sig;
117 
118    /* map of ir_variable -> nir_variable */
119    struct hash_table *var_table;
120 
121    /* map of ir_function_signature -> nir_function_overload */
122    struct hash_table *overload_table;
123 
124    /* set of nir_variable hold sparse result */
125    struct set *sparse_variable_set;
126 
127    void adjust_sparse_variable(nir_deref_instr *var_deref, const glsl_type *type,
128                                nir_ssa_def *dest);
129 };
130 
131 /*
132  * This visitor runs before the main visitor, calling create_function() for
133  * each function so that the main visitor can resolve forward references in
134  * calls.
135  */
136 
137 class nir_function_visitor : public ir_hierarchical_visitor
138 {
139 public:
nir_function_visitor(nir_visitor * v)140    nir_function_visitor(nir_visitor *v) : visitor(v)
141    {
142    }
143    virtual ir_visitor_status visit_enter(ir_function *);
144 
145 private:
146    nir_visitor *visitor;
147 };
148 
149 /* glsl_to_nir can only handle converting certain function paramaters
150  * to NIR. This visitor checks for parameters it can't currently handle.
151  */
152 class ir_function_param_visitor : public ir_hierarchical_visitor
153 {
154 public:
ir_function_param_visitor()155    ir_function_param_visitor()
156       : unsupported(false)
157    {
158    }
159 
visit_enter(ir_function_signature * ir)160    virtual ir_visitor_status visit_enter(ir_function_signature *ir)
161    {
162 
163       if (ir->is_intrinsic())
164          return visit_continue;
165 
166       foreach_in_list(ir_variable, param, &ir->parameters) {
167          if (!param->type->is_vector() || !param->type->is_scalar()) {
168             unsupported = true;
169             return visit_stop;
170          }
171 
172          if (param->data.mode == ir_var_function_inout) {
173             unsupported = true;
174             return visit_stop;
175          }
176       }
177 
178       if (!glsl_type_is_vector_or_scalar(ir->return_type) &&
179           !ir->return_type->is_void()) {
180          unsupported = true;
181          return visit_stop;
182       }
183 
184       return visit_continue;
185    }
186 
187    bool unsupported;
188 };
189 
190 } /* end of anonymous namespace */
191 
192 
193 static bool
has_unsupported_function_param(exec_list * ir)194 has_unsupported_function_param(exec_list *ir)
195 {
196    ir_function_param_visitor visitor;
197    visit_list_elements(&visitor, ir);
198    return visitor.unsupported;
199 }
200 
201 nir_shader *
glsl_to_nir(const struct gl_constants * consts,const struct gl_shader_program * shader_prog,gl_shader_stage stage,const nir_shader_compiler_options * options)202 glsl_to_nir(const struct gl_constants *consts,
203             const struct gl_shader_program *shader_prog,
204             gl_shader_stage stage,
205             const nir_shader_compiler_options *options)
206 {
207    struct gl_linked_shader *sh = shader_prog->_LinkedShaders[stage];
208 
209    const struct gl_shader_compiler_options *gl_options =
210       &consts->ShaderCompilerOptions[stage];
211 
212    /* glsl_to_nir can only handle converting certain function paramaters
213     * to NIR. If we find something we can't handle then we get the GLSL IR
214     * opts to remove it before we continue on.
215     *
216     * TODO: add missing glsl ir to nir support and remove this loop.
217     */
218    while (has_unsupported_function_param(sh->ir)) {
219       do_common_optimization(sh->ir, true, gl_options, consts->NativeIntegers);
220    }
221 
222    nir_shader *shader = nir_shader_create(NULL, stage, options,
223                                           &sh->Program->info);
224 
225    nir_visitor v1(consts, shader);
226    nir_function_visitor v2(&v1);
227    v2.run(sh->ir);
228    visit_exec_list(sh->ir, &v1);
229 
230    /* The GLSL IR won't be needed anymore. */
231    ralloc_free(sh->ir);
232    sh->ir = NULL;
233 
234    nir_validate_shader(shader, "after glsl to nir, before function inline");
235 
236    /* We have to lower away local constant initializers right before we
237     * inline functions.  That way they get properly initialized at the top
238     * of the function and not at the top of its caller.
239     */
240    nir_lower_variable_initializers(shader, nir_var_all);
241    nir_lower_returns(shader);
242    nir_inline_functions(shader);
243    nir_opt_deref(shader);
244 
245    nir_validate_shader(shader, "after function inlining and return lowering");
246 
247    /* Now that we have inlined everything remove all of the functions except
248     * main().
249     */
250    foreach_list_typed_safe(nir_function, function, node, &(shader)->functions){
251       if (strcmp("main", function->name) != 0) {
252          exec_node_remove(&function->node);
253       }
254    }
255 
256    shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
257    if (shader_prog->Label)
258       shader->info.label = ralloc_strdup(shader, shader_prog->Label);
259 
260    shader->info.subgroup_size = SUBGROUP_SIZE_UNIFORM;
261 
262    if (shader->info.stage == MESA_SHADER_FRAGMENT) {
263       shader->info.fs.pixel_center_integer = sh->Program->info.fs.pixel_center_integer;
264       shader->info.fs.origin_upper_left = sh->Program->info.fs.origin_upper_left;
265       shader->info.fs.advanced_blend_modes = sh->Program->info.fs.advanced_blend_modes;
266 
267       nir_foreach_variable_with_modes(var, shader,
268                                       nir_var_shader_in |
269                                       nir_var_system_value) {
270          if (var->data.mode == nir_var_system_value &&
271              (var->data.location == SYSTEM_VALUE_SAMPLE_ID ||
272               var->data.location == SYSTEM_VALUE_SAMPLE_POS))
273             shader->info.fs.uses_sample_shading = true;
274 
275          if (var->data.mode == nir_var_shader_in && var->data.sample)
276             shader->info.fs.uses_sample_shading = true;
277       }
278 
279       if (v1.has_output_rvalue)
280          shader->info.fs.uses_sample_shading = true;
281    }
282 
283    return shader;
284 }
285 
nir_visitor(const struct gl_constants * consts,nir_shader * shader)286 nir_visitor::nir_visitor(const struct gl_constants *consts, nir_shader *shader)
287 {
288    this->supports_std430 = consts->UseSTD430AsDefaultPacking;
289    this->shader = shader;
290    this->is_global = true;
291    this->has_output_rvalue = false;
292    this->var_table = _mesa_pointer_hash_table_create(NULL);
293    this->overload_table = _mesa_pointer_hash_table_create(NULL);
294    this->sparse_variable_set = _mesa_pointer_set_create(NULL);
295    this->result = NULL;
296    this->impl = NULL;
297    this->deref = NULL;
298    this->sig = NULL;
299    memset(&this->b, 0, sizeof(this->b));
300 }
301 
~nir_visitor()302 nir_visitor::~nir_visitor()
303 {
304    _mesa_hash_table_destroy(this->var_table, NULL);
305    _mesa_hash_table_destroy(this->overload_table, NULL);
306    _mesa_set_destroy(this->sparse_variable_set, NULL);
307 }
308 
309 nir_deref_instr *
evaluate_deref(ir_instruction * ir)310 nir_visitor::evaluate_deref(ir_instruction *ir)
311 {
312    ir->accept(this);
313    return this->deref;
314 }
315 
316 nir_constant *
constant_copy(ir_constant * ir,void * mem_ctx)317 nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx)
318 {
319    if (ir == NULL)
320       return NULL;
321 
322    nir_constant *ret = rzalloc(mem_ctx, nir_constant);
323 
324    const unsigned rows = ir->type->vector_elements;
325    const unsigned cols = ir->type->matrix_columns;
326    unsigned i;
327 
328    ret->num_elements = 0;
329    switch (ir->type->base_type) {
330    case GLSL_TYPE_UINT:
331       /* Only float base types can be matrices. */
332       assert(cols == 1);
333 
334       for (unsigned r = 0; r < rows; r++)
335          ret->values[r].u32 = ir->value.u[r];
336 
337       break;
338 
339    case GLSL_TYPE_UINT16:
340       /* Only float base types can be matrices. */
341       assert(cols == 1);
342 
343       for (unsigned r = 0; r < rows; r++)
344          ret->values[r].u16 = ir->value.u16[r];
345       break;
346 
347    case GLSL_TYPE_INT:
348       /* Only float base types can be matrices. */
349       assert(cols == 1);
350 
351       for (unsigned r = 0; r < rows; r++)
352          ret->values[r].i32 = ir->value.i[r];
353 
354       break;
355 
356    case GLSL_TYPE_INT16:
357       /* Only float base types can be matrices. */
358       assert(cols == 1);
359 
360       for (unsigned r = 0; r < rows; r++)
361          ret->values[r].i16 = ir->value.i16[r];
362       break;
363 
364    case GLSL_TYPE_FLOAT:
365    case GLSL_TYPE_FLOAT16:
366    case GLSL_TYPE_DOUBLE:
367       if (cols > 1) {
368          ret->elements = ralloc_array(mem_ctx, nir_constant *, cols);
369          ret->num_elements = cols;
370          for (unsigned c = 0; c < cols; c++) {
371             nir_constant *col_const = rzalloc(mem_ctx, nir_constant);
372             col_const->num_elements = 0;
373             switch (ir->type->base_type) {
374             case GLSL_TYPE_FLOAT:
375                for (unsigned r = 0; r < rows; r++)
376                   col_const->values[r].f32 = ir->value.f[c * rows + r];
377                break;
378 
379             case GLSL_TYPE_FLOAT16:
380                for (unsigned r = 0; r < rows; r++)
381                   col_const->values[r].u16 = ir->value.f16[c * rows + r];
382                break;
383 
384             case GLSL_TYPE_DOUBLE:
385                for (unsigned r = 0; r < rows; r++)
386                   col_const->values[r].f64 = ir->value.d[c * rows + r];
387                break;
388 
389             default:
390                unreachable("Cannot get here from the first level switch");
391             }
392             ret->elements[c] = col_const;
393          }
394       } else {
395          switch (ir->type->base_type) {
396          case GLSL_TYPE_FLOAT:
397             for (unsigned r = 0; r < rows; r++)
398                ret->values[r].f32 = ir->value.f[r];
399             break;
400 
401          case GLSL_TYPE_FLOAT16:
402             for (unsigned r = 0; r < rows; r++)
403                ret->values[r].u16 = ir->value.f16[r];
404             break;
405 
406          case GLSL_TYPE_DOUBLE:
407             for (unsigned r = 0; r < rows; r++)
408                ret->values[r].f64 = ir->value.d[r];
409             break;
410 
411          default:
412             unreachable("Cannot get here from the first level switch");
413          }
414       }
415       break;
416 
417    case GLSL_TYPE_UINT64:
418       /* Only float base types can be matrices. */
419       assert(cols == 1);
420 
421       for (unsigned r = 0; r < rows; r++)
422          ret->values[r].u64 = ir->value.u64[r];
423       break;
424 
425    case GLSL_TYPE_INT64:
426       /* Only float base types can be matrices. */
427       assert(cols == 1);
428 
429       for (unsigned r = 0; r < rows; r++)
430          ret->values[r].i64 = ir->value.i64[r];
431       break;
432 
433    case GLSL_TYPE_BOOL:
434       /* Only float base types can be matrices. */
435       assert(cols == 1);
436 
437       for (unsigned r = 0; r < rows; r++)
438          ret->values[r].b = ir->value.b[r];
439 
440       break;
441 
442    case GLSL_TYPE_STRUCT:
443    case GLSL_TYPE_ARRAY:
444       ret->elements = ralloc_array(mem_ctx, nir_constant *,
445                                    ir->type->length);
446       ret->num_elements = ir->type->length;
447 
448       for (i = 0; i < ir->type->length; i++)
449          ret->elements[i] = constant_copy(ir->const_elements[i], mem_ctx);
450       break;
451 
452    default:
453       unreachable("not reached");
454    }
455 
456    return ret;
457 }
458 
459 void
adjust_sparse_variable(nir_deref_instr * var_deref,const glsl_type * type,nir_ssa_def * dest)460 nir_visitor::adjust_sparse_variable(nir_deref_instr *var_deref, const glsl_type *type,
461                                     nir_ssa_def *dest)
462 {
463    const glsl_type *texel_type = type->field_type("texel");
464    assert(texel_type);
465 
466    assert(var_deref->deref_type == nir_deref_type_var);
467    nir_variable *var = var_deref->var;
468 
469    /* Adjust nir_variable type to align with sparse nir instructions.
470     * Because the nir_variable is created with struct type from ir_variable,
471     * but sparse nir instructions output with vector dest.
472     */
473    var->type = glsl_type::get_instance(texel_type->get_base_type()->base_type,
474                                        dest->num_components, 1);
475 
476    var_deref->type = var->type;
477 
478    /* Record the adjusted variable. */
479    _mesa_set_add(this->sparse_variable_set, var);
480 }
481 
482 static unsigned
get_nir_how_declared(unsigned how_declared)483 get_nir_how_declared(unsigned how_declared)
484 {
485    if (how_declared == ir_var_hidden)
486       return nir_var_hidden;
487 
488    return nir_var_declared_normally;
489 }
490 
491 void
visit(ir_variable * ir)492 nir_visitor::visit(ir_variable *ir)
493 {
494    /* FINISHME: inout parameters */
495    assert(ir->data.mode != ir_var_function_inout);
496 
497    if (ir->data.mode == ir_var_function_out)
498       return;
499 
500    nir_variable *var = rzalloc(shader, nir_variable);
501    var->type = ir->type;
502    var->name = ralloc_strdup(var, ir->name);
503 
504    var->data.assigned = ir->data.assigned;
505    var->data.always_active_io = ir->data.always_active_io;
506    var->data.read_only = ir->data.read_only;
507    var->data.centroid = ir->data.centroid;
508    var->data.sample = ir->data.sample;
509    var->data.patch = ir->data.patch;
510    var->data.how_declared = get_nir_how_declared(ir->data.how_declared);
511    var->data.invariant = ir->data.invariant;
512    var->data.location = ir->data.location;
513    var->data.must_be_shader_input = ir->data.must_be_shader_input;
514    var->data.stream = ir->data.stream;
515    if (ir->data.stream & (1u << 31))
516       var->data.stream |= NIR_STREAM_PACKED;
517 
518    var->data.precision = ir->data.precision;
519    var->data.explicit_location = ir->data.explicit_location;
520    var->data.matrix_layout = ir->data.matrix_layout;
521    var->data.from_named_ifc_block = ir->data.from_named_ifc_block;
522    var->data.compact = false;
523 
524    switch(ir->data.mode) {
525    case ir_var_auto:
526    case ir_var_temporary:
527       if (is_global)
528          var->data.mode = nir_var_shader_temp;
529       else
530          var->data.mode = nir_var_function_temp;
531       break;
532 
533    case ir_var_function_in:
534    case ir_var_const_in:
535       var->data.mode = nir_var_function_temp;
536       break;
537 
538    case ir_var_shader_in:
539       if (shader->info.stage == MESA_SHADER_GEOMETRY &&
540           ir->data.location == VARYING_SLOT_PRIMITIVE_ID) {
541          /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */
542          var->data.location = SYSTEM_VALUE_PRIMITIVE_ID;
543          var->data.mode = nir_var_system_value;
544       } else {
545          var->data.mode = nir_var_shader_in;
546 
547          if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
548              (ir->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
549               ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) {
550             var->data.compact = ir->type->without_array()->is_scalar();
551          }
552 
553          if (shader->info.stage > MESA_SHADER_VERTEX &&
554              ir->data.location >= VARYING_SLOT_CLIP_DIST0 &&
555              ir->data.location <= VARYING_SLOT_CULL_DIST1) {
556             var->data.compact = ir->type->without_array()->is_scalar();
557          }
558       }
559       break;
560 
561    case ir_var_shader_out:
562       var->data.mode = nir_var_shader_out;
563       if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
564           (ir->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
565            ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) {
566          var->data.compact = ir->type->without_array()->is_scalar();
567       }
568 
569       if (shader->info.stage <= MESA_SHADER_GEOMETRY &&
570           ir->data.location >= VARYING_SLOT_CLIP_DIST0 &&
571           ir->data.location <= VARYING_SLOT_CULL_DIST1) {
572          var->data.compact = ir->type->without_array()->is_scalar();
573       }
574       break;
575 
576    case ir_var_uniform:
577       if (ir->get_interface_type())
578          var->data.mode = nir_var_mem_ubo;
579       else if (ir->type->contains_image() && !ir->data.bindless)
580          var->data.mode = nir_var_image;
581       else
582          var->data.mode = nir_var_uniform;
583       break;
584 
585    case ir_var_shader_storage:
586       var->data.mode = nir_var_mem_ssbo;
587       break;
588 
589    case ir_var_system_value:
590       var->data.mode = nir_var_system_value;
591       break;
592 
593    case ir_var_shader_shared:
594       var->data.mode = nir_var_mem_shared;
595       break;
596 
597    default:
598       unreachable("not reached");
599    }
600 
601    unsigned mem_access = 0;
602    if (ir->data.memory_read_only)
603       mem_access |= ACCESS_NON_WRITEABLE;
604    if (ir->data.memory_write_only)
605       mem_access |= ACCESS_NON_READABLE;
606    if (ir->data.memory_coherent)
607       mem_access |= ACCESS_COHERENT;
608    if (ir->data.memory_volatile)
609       mem_access |= ACCESS_VOLATILE;
610    if (ir->data.memory_restrict)
611       mem_access |= ACCESS_RESTRICT;
612 
613    var->interface_type = ir->get_interface_type();
614 
615    /* For UBO and SSBO variables, we need explicit types */
616    if (var->data.mode & (nir_var_mem_ubo | nir_var_mem_ssbo)) {
617       const glsl_type *explicit_ifc_type =
618          ir->get_interface_type()->get_explicit_interface_type(supports_std430);
619 
620       var->interface_type = explicit_ifc_type;
621 
622       if (ir->type->without_array()->is_interface()) {
623          /* If the type contains the interface, wrap the explicit type in the
624           * right number of arrays.
625           */
626          var->type = glsl_type_wrap_in_arrays(explicit_ifc_type, ir->type);
627       } else {
628          /* Otherwise, this variable is one entry in the interface */
629          UNUSED bool found = false;
630          for (unsigned i = 0; i < explicit_ifc_type->length; i++) {
631             const glsl_struct_field *field =
632                &explicit_ifc_type->fields.structure[i];
633             if (strcmp(ir->name, field->name) != 0)
634                continue;
635 
636             var->type = field->type;
637             if (field->memory_read_only)
638                mem_access |= ACCESS_NON_WRITEABLE;
639             if (field->memory_write_only)
640                mem_access |= ACCESS_NON_READABLE;
641             if (field->memory_coherent)
642                mem_access |= ACCESS_COHERENT;
643             if (field->memory_volatile)
644                mem_access |= ACCESS_VOLATILE;
645             if (field->memory_restrict)
646                mem_access |= ACCESS_RESTRICT;
647 
648             found = true;
649             break;
650          }
651          assert(found);
652       }
653    }
654 
655    var->data.interpolation = ir->data.interpolation;
656    var->data.location_frac = ir->data.location_frac;
657 
658    switch (ir->data.depth_layout) {
659    case ir_depth_layout_none:
660       var->data.depth_layout = nir_depth_layout_none;
661       break;
662    case ir_depth_layout_any:
663       var->data.depth_layout = nir_depth_layout_any;
664       break;
665    case ir_depth_layout_greater:
666       var->data.depth_layout = nir_depth_layout_greater;
667       break;
668    case ir_depth_layout_less:
669       var->data.depth_layout = nir_depth_layout_less;
670       break;
671    case ir_depth_layout_unchanged:
672       var->data.depth_layout = nir_depth_layout_unchanged;
673       break;
674    default:
675       unreachable("not reached");
676    }
677 
678    var->data.index = ir->data.index;
679    var->data.descriptor_set = 0;
680    var->data.binding = ir->data.binding;
681    var->data.explicit_binding = ir->data.explicit_binding;
682    var->data.explicit_offset = ir->data.explicit_xfb_offset;
683    var->data.bindless = ir->data.bindless;
684    var->data.offset = ir->data.offset;
685    var->data.access = (gl_access_qualifier)mem_access;
686 
687    if (var->type->without_array()->is_image()) {
688       var->data.image.format = ir->data.image_format;
689    } else if (var->data.mode == nir_var_shader_out) {
690       var->data.xfb.buffer = ir->data.xfb_buffer;
691       var->data.xfb.stride = ir->data.xfb_stride;
692    }
693 
694    var->data.fb_fetch_output = ir->data.fb_fetch_output;
695    var->data.explicit_xfb_buffer = ir->data.explicit_xfb_buffer;
696    var->data.explicit_xfb_stride = ir->data.explicit_xfb_stride;
697 
698    var->num_state_slots = ir->get_num_state_slots();
699    if (var->num_state_slots > 0) {
700       var->state_slots = rzalloc_array(var, nir_state_slot,
701                                        var->num_state_slots);
702 
703       ir_state_slot *state_slots = ir->get_state_slots();
704       for (unsigned i = 0; i < var->num_state_slots; i++) {
705          for (unsigned j = 0; j < 4; j++)
706             var->state_slots[i].tokens[j] = state_slots[i].tokens[j];
707          var->state_slots[i].swizzle = state_slots[i].swizzle;
708       }
709    } else {
710       var->state_slots = NULL;
711    }
712 
713    var->constant_initializer = constant_copy(ir->constant_initializer, var);
714 
715    if (var->data.mode == nir_var_function_temp)
716       nir_function_impl_add_variable(impl, var);
717    else
718       nir_shader_add_variable(shader, var);
719 
720    _mesa_hash_table_insert(var_table, ir, var);
721 }
722 
723 ir_visitor_status
visit_enter(ir_function * ir)724 nir_function_visitor::visit_enter(ir_function *ir)
725 {
726    foreach_in_list(ir_function_signature, sig, &ir->signatures) {
727       visitor->create_function(sig);
728    }
729    return visit_continue_with_parent;
730 }
731 
732 void
create_function(ir_function_signature * ir)733 nir_visitor::create_function(ir_function_signature *ir)
734 {
735    if (ir->is_intrinsic())
736       return;
737 
738    nir_function *func = nir_function_create(shader, ir->function_name());
739    if (strcmp(ir->function_name(), "main") == 0)
740       func->is_entrypoint = true;
741 
742    func->num_params = ir->parameters.length() +
743                       (ir->return_type != glsl_type::void_type);
744    func->params = ralloc_array(shader, nir_parameter, func->num_params);
745 
746    unsigned np = 0;
747 
748    if (ir->return_type != glsl_type::void_type) {
749       /* The return value is a variable deref (basically an out parameter) */
750       func->params[np].num_components = 1;
751       func->params[np].bit_size = 32;
752       np++;
753    }
754 
755    foreach_in_list(ir_variable, param, &ir->parameters) {
756       /* FINISHME: pass arrays, structs, etc by reference? */
757       assert(param->type->is_vector() || param->type->is_scalar());
758 
759       if (param->data.mode == ir_var_function_in) {
760          func->params[np].num_components = param->type->vector_elements;
761          func->params[np].bit_size = glsl_get_bit_size(param->type);
762       } else {
763          func->params[np].num_components = 1;
764          func->params[np].bit_size = 32;
765       }
766       np++;
767    }
768    assert(np == func->num_params);
769 
770    _mesa_hash_table_insert(this->overload_table, ir, func);
771 }
772 
773 void
visit(ir_function * ir)774 nir_visitor::visit(ir_function *ir)
775 {
776    foreach_in_list(ir_function_signature, sig, &ir->signatures)
777       sig->accept(this);
778 }
779 
780 void
visit(ir_function_signature * ir)781 nir_visitor::visit(ir_function_signature *ir)
782 {
783    if (ir->is_intrinsic())
784       return;
785 
786    this->sig = ir;
787 
788    struct hash_entry *entry =
789       _mesa_hash_table_search(this->overload_table, ir);
790 
791    assert(entry);
792    nir_function *func = (nir_function *) entry->data;
793 
794    if (ir->is_defined) {
795       nir_function_impl *impl = nir_function_impl_create(func);
796       this->impl = impl;
797 
798       this->is_global = false;
799 
800       nir_builder_init(&b, impl);
801       b.cursor = nir_after_cf_list(&impl->body);
802 
803       unsigned i = (ir->return_type != glsl_type::void_type) ? 1 : 0;
804 
805       foreach_in_list(ir_variable, param, &ir->parameters) {
806          nir_variable *var =
807             nir_local_variable_create(impl, param->type, param->name);
808 
809          if (param->data.mode == ir_var_function_in) {
810             nir_store_var(&b, var, nir_load_param(&b, i), ~0);
811          }
812 
813          _mesa_hash_table_insert(var_table, param, var);
814          i++;
815       }
816 
817       visit_exec_list(&ir->body, this);
818 
819       this->is_global = true;
820    } else {
821       func->impl = NULL;
822    }
823 }
824 
825 void
visit(ir_loop * ir)826 nir_visitor::visit(ir_loop *ir)
827 {
828    nir_push_loop(&b);
829    visit_exec_list(&ir->body_instructions, this);
830    nir_pop_loop(&b, NULL);
831 }
832 
833 void
visit(ir_if * ir)834 nir_visitor::visit(ir_if *ir)
835 {
836    nir_push_if(&b, evaluate_rvalue(ir->condition));
837    visit_exec_list(&ir->then_instructions, this);
838    nir_push_else(&b, NULL);
839    visit_exec_list(&ir->else_instructions, this);
840    nir_pop_if(&b, NULL);
841 }
842 
843 void
visit(ir_discard * ir)844 nir_visitor::visit(ir_discard *ir)
845 {
846    /*
847     * discards aren't treated as control flow, because before we lower them
848     * they can appear anywhere in the shader and the stuff after them may still
849     * be executed (yay, crazy GLSL rules!). However, after lowering, all the
850     * discards will be immediately followed by a return.
851     */
852 
853    if (ir->condition)
854       nir_discard_if(&b, evaluate_rvalue(ir->condition));
855    else
856       nir_discard(&b);
857 }
858 
859 void
visit(ir_demote * ir)860 nir_visitor::visit(ir_demote *ir)
861 {
862    nir_demote(&b);
863 }
864 
865 void
visit(ir_emit_vertex * ir)866 nir_visitor::visit(ir_emit_vertex *ir)
867 {
868    nir_emit_vertex(&b, (unsigned)ir->stream_id());
869 }
870 
871 void
visit(ir_end_primitive * ir)872 nir_visitor::visit(ir_end_primitive *ir)
873 {
874    nir_end_primitive(&b, (unsigned)ir->stream_id());
875 }
876 
877 void
visit(ir_loop_jump * ir)878 nir_visitor::visit(ir_loop_jump *ir)
879 {
880    nir_jump_type type;
881    switch (ir->mode) {
882    case ir_loop_jump::jump_break:
883       type = nir_jump_break;
884       break;
885    case ir_loop_jump::jump_continue:
886       type = nir_jump_continue;
887       break;
888    default:
889       unreachable("not reached");
890    }
891 
892    nir_jump_instr *instr = nir_jump_instr_create(this->shader, type);
893    nir_builder_instr_insert(&b, &instr->instr);
894 }
895 
896 void
visit(ir_return * ir)897 nir_visitor::visit(ir_return *ir)
898 {
899    if (ir->value != NULL) {
900       nir_deref_instr *ret_deref =
901          nir_build_deref_cast(&b, nir_load_param(&b, 0),
902                               nir_var_function_temp, ir->value->type, 0);
903 
904       nir_ssa_def *val = evaluate_rvalue(ir->value);
905       nir_store_deref(&b, ret_deref, val, ~0);
906    }
907 
908    nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
909    nir_builder_instr_insert(&b, &instr->instr);
910 }
911 
912 static void
intrinsic_set_std430_align(nir_intrinsic_instr * intrin,const glsl_type * type)913 intrinsic_set_std430_align(nir_intrinsic_instr *intrin, const glsl_type *type)
914 {
915    unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);
916    unsigned pow2_components = util_next_power_of_two(type->vector_elements);
917    nir_intrinsic_set_align(intrin, (bit_size / 8) * pow2_components, 0);
918 }
919 
920 /* Accumulate any qualifiers along the deref chain to get the actual
921  * load/store qualifier.
922  */
923 
924 static enum gl_access_qualifier
deref_get_qualifier(nir_deref_instr * deref)925 deref_get_qualifier(nir_deref_instr *deref)
926 {
927    nir_deref_path path;
928    nir_deref_path_init(&path, deref, NULL);
929 
930    unsigned qualifiers = path.path[0]->var->data.access;
931 
932    const glsl_type *parent_type = path.path[0]->type;
933    for (nir_deref_instr **cur_ptr = &path.path[1]; *cur_ptr; cur_ptr++) {
934       nir_deref_instr *cur = *cur_ptr;
935 
936       if (parent_type->is_interface()) {
937          const struct glsl_struct_field *field =
938             &parent_type->fields.structure[cur->strct.index];
939          if (field->memory_read_only)
940             qualifiers |= ACCESS_NON_WRITEABLE;
941          if (field->memory_write_only)
942             qualifiers |= ACCESS_NON_READABLE;
943          if (field->memory_coherent)
944             qualifiers |= ACCESS_COHERENT;
945          if (field->memory_volatile)
946             qualifiers |= ACCESS_VOLATILE;
947          if (field->memory_restrict)
948             qualifiers |= ACCESS_RESTRICT;
949       }
950 
951       parent_type = cur->type;
952    }
953 
954    nir_deref_path_finish(&path);
955 
956    return (gl_access_qualifier) qualifiers;
957 }
958 
959 void
visit(ir_call * ir)960 nir_visitor::visit(ir_call *ir)
961 {
962    if (ir->callee->is_intrinsic()) {
963       nir_intrinsic_op op;
964 
965       switch (ir->callee->intrinsic_id) {
966       case ir_intrinsic_generic_atomic_add:
967          op = ir->return_deref->type->is_integer_32_64()
968             ? nir_intrinsic_deref_atomic_add : nir_intrinsic_deref_atomic_fadd;
969          break;
970       case ir_intrinsic_generic_atomic_and:
971          op = nir_intrinsic_deref_atomic_and;
972          break;
973       case ir_intrinsic_generic_atomic_or:
974          op = nir_intrinsic_deref_atomic_or;
975          break;
976       case ir_intrinsic_generic_atomic_xor:
977          op = nir_intrinsic_deref_atomic_xor;
978          break;
979       case ir_intrinsic_generic_atomic_min:
980          assert(ir->return_deref);
981          if (ir->return_deref->type == glsl_type::int_type ||
982              ir->return_deref->type == glsl_type::int64_t_type)
983             op = nir_intrinsic_deref_atomic_imin;
984          else if (ir->return_deref->type == glsl_type::uint_type ||
985                   ir->return_deref->type == glsl_type::uint64_t_type)
986             op = nir_intrinsic_deref_atomic_umin;
987          else if (ir->return_deref->type == glsl_type::float_type)
988             op = nir_intrinsic_deref_atomic_fmin;
989          else
990             unreachable("Invalid type");
991          break;
992       case ir_intrinsic_generic_atomic_max:
993          assert(ir->return_deref);
994          if (ir->return_deref->type == glsl_type::int_type ||
995              ir->return_deref->type == glsl_type::int64_t_type)
996             op = nir_intrinsic_deref_atomic_imax;
997          else if (ir->return_deref->type == glsl_type::uint_type ||
998                   ir->return_deref->type == glsl_type::uint64_t_type)
999             op = nir_intrinsic_deref_atomic_umax;
1000          else if (ir->return_deref->type == glsl_type::float_type)
1001             op = nir_intrinsic_deref_atomic_fmax;
1002          else
1003             unreachable("Invalid type");
1004          break;
1005       case ir_intrinsic_generic_atomic_exchange:
1006          op = nir_intrinsic_deref_atomic_exchange;
1007          break;
1008       case ir_intrinsic_generic_atomic_comp_swap:
1009          op = ir->return_deref->type->is_integer_32_64()
1010             ? nir_intrinsic_deref_atomic_comp_swap
1011             : nir_intrinsic_deref_atomic_fcomp_swap;
1012          break;
1013       case ir_intrinsic_atomic_counter_read:
1014          op = nir_intrinsic_atomic_counter_read_deref;
1015          break;
1016       case ir_intrinsic_atomic_counter_increment:
1017          op = nir_intrinsic_atomic_counter_inc_deref;
1018          break;
1019       case ir_intrinsic_atomic_counter_predecrement:
1020          op = nir_intrinsic_atomic_counter_pre_dec_deref;
1021          break;
1022       case ir_intrinsic_atomic_counter_add:
1023          op = nir_intrinsic_atomic_counter_add_deref;
1024          break;
1025       case ir_intrinsic_atomic_counter_and:
1026          op = nir_intrinsic_atomic_counter_and_deref;
1027          break;
1028       case ir_intrinsic_atomic_counter_or:
1029          op = nir_intrinsic_atomic_counter_or_deref;
1030          break;
1031       case ir_intrinsic_atomic_counter_xor:
1032          op = nir_intrinsic_atomic_counter_xor_deref;
1033          break;
1034       case ir_intrinsic_atomic_counter_min:
1035          op = nir_intrinsic_atomic_counter_min_deref;
1036          break;
1037       case ir_intrinsic_atomic_counter_max:
1038          op = nir_intrinsic_atomic_counter_max_deref;
1039          break;
1040       case ir_intrinsic_atomic_counter_exchange:
1041          op = nir_intrinsic_atomic_counter_exchange_deref;
1042          break;
1043       case ir_intrinsic_atomic_counter_comp_swap:
1044          op = nir_intrinsic_atomic_counter_comp_swap_deref;
1045          break;
1046       case ir_intrinsic_image_load:
1047          op = nir_intrinsic_image_deref_load;
1048          break;
1049       case ir_intrinsic_image_store:
1050          op = nir_intrinsic_image_deref_store;
1051          break;
1052       case ir_intrinsic_image_atomic_add:
1053          op = ir->return_deref->type->is_integer_32_64()
1054             ? nir_intrinsic_image_deref_atomic_add
1055             : nir_intrinsic_image_deref_atomic_fadd;
1056          break;
1057       case ir_intrinsic_image_atomic_min:
1058          if (ir->return_deref->type == glsl_type::int_type)
1059             op = nir_intrinsic_image_deref_atomic_imin;
1060          else if (ir->return_deref->type == glsl_type::uint_type)
1061             op = nir_intrinsic_image_deref_atomic_umin;
1062          else
1063             unreachable("Invalid type");
1064          break;
1065       case ir_intrinsic_image_atomic_max:
1066          if (ir->return_deref->type == glsl_type::int_type)
1067             op = nir_intrinsic_image_deref_atomic_imax;
1068          else if (ir->return_deref->type == glsl_type::uint_type)
1069             op = nir_intrinsic_image_deref_atomic_umax;
1070          else
1071             unreachable("Invalid type");
1072          break;
1073       case ir_intrinsic_image_atomic_and:
1074          op = nir_intrinsic_image_deref_atomic_and;
1075          break;
1076       case ir_intrinsic_image_atomic_or:
1077          op = nir_intrinsic_image_deref_atomic_or;
1078          break;
1079       case ir_intrinsic_image_atomic_xor:
1080          op = nir_intrinsic_image_deref_atomic_xor;
1081          break;
1082       case ir_intrinsic_image_atomic_exchange:
1083          op = nir_intrinsic_image_deref_atomic_exchange;
1084          break;
1085       case ir_intrinsic_image_atomic_comp_swap:
1086          op = nir_intrinsic_image_deref_atomic_comp_swap;
1087          break;
1088       case ir_intrinsic_image_atomic_inc_wrap:
1089          op = nir_intrinsic_image_deref_atomic_inc_wrap;
1090          break;
1091       case ir_intrinsic_image_atomic_dec_wrap:
1092          op = nir_intrinsic_image_deref_atomic_dec_wrap;
1093          break;
1094       case ir_intrinsic_memory_barrier:
1095          op = nir_intrinsic_memory_barrier;
1096          break;
1097       case ir_intrinsic_image_size:
1098          op = nir_intrinsic_image_deref_size;
1099          break;
1100       case ir_intrinsic_image_samples:
1101          op = nir_intrinsic_image_deref_samples;
1102          break;
1103       case ir_intrinsic_image_sparse_load:
1104          op = nir_intrinsic_image_deref_sparse_load;
1105          break;
1106       case ir_intrinsic_ssbo_store:
1107       case ir_intrinsic_ssbo_load:
1108       case ir_intrinsic_ssbo_atomic_add:
1109       case ir_intrinsic_ssbo_atomic_and:
1110       case ir_intrinsic_ssbo_atomic_or:
1111       case ir_intrinsic_ssbo_atomic_xor:
1112       case ir_intrinsic_ssbo_atomic_min:
1113       case ir_intrinsic_ssbo_atomic_max:
1114       case ir_intrinsic_ssbo_atomic_exchange:
1115       case ir_intrinsic_ssbo_atomic_comp_swap:
1116          /* SSBO store/loads should only have been lowered in GLSL IR for
1117           * non-nir drivers, NIR drivers make use of gl_nir_lower_buffers()
1118           * instead.
1119           */
1120          unreachable("Invalid operation nir doesn't want lowered ssbo "
1121                      "store/loads");
1122       case ir_intrinsic_shader_clock:
1123          op = nir_intrinsic_shader_clock;
1124          break;
1125       case ir_intrinsic_begin_invocation_interlock:
1126          op = nir_intrinsic_begin_invocation_interlock;
1127          break;
1128       case ir_intrinsic_end_invocation_interlock:
1129          op = nir_intrinsic_end_invocation_interlock;
1130          break;
1131       case ir_intrinsic_group_memory_barrier:
1132          op = nir_intrinsic_group_memory_barrier;
1133          break;
1134       case ir_intrinsic_memory_barrier_atomic_counter:
1135          op = nir_intrinsic_memory_barrier_atomic_counter;
1136          break;
1137       case ir_intrinsic_memory_barrier_buffer:
1138          op = nir_intrinsic_memory_barrier_buffer;
1139          break;
1140       case ir_intrinsic_memory_barrier_image:
1141          op = nir_intrinsic_memory_barrier_image;
1142          break;
1143       case ir_intrinsic_memory_barrier_shared:
1144          op = nir_intrinsic_memory_barrier_shared;
1145          break;
1146       case ir_intrinsic_shared_load:
1147          op = nir_intrinsic_load_shared;
1148          break;
1149       case ir_intrinsic_shared_store:
1150          op = nir_intrinsic_store_shared;
1151          break;
1152       case ir_intrinsic_shared_atomic_add:
1153          op = ir->return_deref->type->is_integer_32_64()
1154             ? nir_intrinsic_shared_atomic_add
1155             : nir_intrinsic_shared_atomic_fadd;
1156          break;
1157       case ir_intrinsic_shared_atomic_and:
1158          op = nir_intrinsic_shared_atomic_and;
1159          break;
1160       case ir_intrinsic_shared_atomic_or:
1161          op = nir_intrinsic_shared_atomic_or;
1162          break;
1163       case ir_intrinsic_shared_atomic_xor:
1164          op = nir_intrinsic_shared_atomic_xor;
1165          break;
1166       case ir_intrinsic_shared_atomic_min:
1167          assert(ir->return_deref);
1168          if (ir->return_deref->type == glsl_type::int_type ||
1169              ir->return_deref->type == glsl_type::int64_t_type)
1170             op = nir_intrinsic_shared_atomic_imin;
1171          else if (ir->return_deref->type == glsl_type::uint_type ||
1172                   ir->return_deref->type == glsl_type::uint64_t_type)
1173             op = nir_intrinsic_shared_atomic_umin;
1174          else if (ir->return_deref->type == glsl_type::float_type)
1175             op = nir_intrinsic_shared_atomic_fmin;
1176          else
1177             unreachable("Invalid type");
1178          break;
1179       case ir_intrinsic_shared_atomic_max:
1180          assert(ir->return_deref);
1181          if (ir->return_deref->type == glsl_type::int_type ||
1182              ir->return_deref->type == glsl_type::int64_t_type)
1183             op = nir_intrinsic_shared_atomic_imax;
1184          else if (ir->return_deref->type == glsl_type::uint_type ||
1185                   ir->return_deref->type == glsl_type::uint64_t_type)
1186             op = nir_intrinsic_shared_atomic_umax;
1187          else if (ir->return_deref->type == glsl_type::float_type)
1188             op = nir_intrinsic_shared_atomic_fmax;
1189          else
1190             unreachable("Invalid type");
1191          break;
1192       case ir_intrinsic_shared_atomic_exchange:
1193          op = nir_intrinsic_shared_atomic_exchange;
1194          break;
1195       case ir_intrinsic_shared_atomic_comp_swap:
1196          op = ir->return_deref->type->is_integer_32_64()
1197             ? nir_intrinsic_shared_atomic_comp_swap
1198             : nir_intrinsic_shared_atomic_fcomp_swap;
1199          break;
1200       case ir_intrinsic_vote_any:
1201          op = nir_intrinsic_vote_any;
1202          break;
1203       case ir_intrinsic_vote_all:
1204          op = nir_intrinsic_vote_all;
1205          break;
1206       case ir_intrinsic_vote_eq:
1207          op = nir_intrinsic_vote_ieq;
1208          break;
1209       case ir_intrinsic_ballot:
1210          op = nir_intrinsic_ballot;
1211          break;
1212       case ir_intrinsic_read_invocation:
1213          op = nir_intrinsic_read_invocation;
1214          break;
1215       case ir_intrinsic_read_first_invocation:
1216          op = nir_intrinsic_read_first_invocation;
1217          break;
1218       case ir_intrinsic_helper_invocation:
1219          op = nir_intrinsic_is_helper_invocation;
1220          break;
1221       case ir_intrinsic_is_sparse_texels_resident:
1222          op = nir_intrinsic_is_sparse_texels_resident;
1223          break;
1224       default:
1225          unreachable("not reached");
1226       }
1227 
1228       nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
1229       nir_ssa_def *ret = &instr->dest.ssa;
1230 
1231       switch (op) {
1232       case nir_intrinsic_deref_atomic_add:
1233       case nir_intrinsic_deref_atomic_imin:
1234       case nir_intrinsic_deref_atomic_umin:
1235       case nir_intrinsic_deref_atomic_imax:
1236       case nir_intrinsic_deref_atomic_umax:
1237       case nir_intrinsic_deref_atomic_and:
1238       case nir_intrinsic_deref_atomic_or:
1239       case nir_intrinsic_deref_atomic_xor:
1240       case nir_intrinsic_deref_atomic_exchange:
1241       case nir_intrinsic_deref_atomic_comp_swap:
1242       case nir_intrinsic_deref_atomic_fadd:
1243       case nir_intrinsic_deref_atomic_fmin:
1244       case nir_intrinsic_deref_atomic_fmax:
1245       case nir_intrinsic_deref_atomic_fcomp_swap: {
1246          int param_count = ir->actual_parameters.length();
1247          assert(param_count == 2 || param_count == 3);
1248 
1249          /* Deref */
1250          exec_node *param = ir->actual_parameters.get_head();
1251          ir_rvalue *rvalue = (ir_rvalue *) param;
1252          ir_dereference *deref = rvalue->as_dereference();
1253          ir_swizzle *swizzle = NULL;
1254          if (!deref) {
1255             /* We may have a swizzle to pick off a single vec4 component */
1256             swizzle = rvalue->as_swizzle();
1257             assert(swizzle && swizzle->type->vector_elements == 1);
1258             deref = swizzle->val->as_dereference();
1259             assert(deref);
1260          }
1261          nir_deref_instr *nir_deref = evaluate_deref(deref);
1262          if (swizzle) {
1263             nir_deref = nir_build_deref_array_imm(&b, nir_deref,
1264                                                   swizzle->mask.x);
1265          }
1266          instr->src[0] = nir_src_for_ssa(&nir_deref->dest.ssa);
1267 
1268          nir_intrinsic_set_access(instr, deref_get_qualifier(nir_deref));
1269 
1270          /* data1 parameter (this is always present) */
1271          param = param->get_next();
1272          ir_instruction *inst = (ir_instruction *) param;
1273          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
1274 
1275          /* data2 parameter (only with atomic_comp_swap) */
1276          if (param_count == 3) {
1277             assert(op == nir_intrinsic_deref_atomic_comp_swap ||
1278                    op == nir_intrinsic_deref_atomic_fcomp_swap);
1279             param = param->get_next();
1280             inst = (ir_instruction *) param;
1281             instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
1282          }
1283 
1284          /* Atomic result */
1285          assert(ir->return_deref);
1286          if (ir->return_deref->type->is_integer_64()) {
1287             nir_ssa_dest_init(&instr->instr, &instr->dest,
1288                               ir->return_deref->type->vector_elements, 64, NULL);
1289          } else {
1290             nir_ssa_dest_init(&instr->instr, &instr->dest,
1291                               ir->return_deref->type->vector_elements, 32, NULL);
1292          }
1293          nir_builder_instr_insert(&b, &instr->instr);
1294          break;
1295       }
1296       case nir_intrinsic_atomic_counter_read_deref:
1297       case nir_intrinsic_atomic_counter_inc_deref:
1298       case nir_intrinsic_atomic_counter_pre_dec_deref:
1299       case nir_intrinsic_atomic_counter_add_deref:
1300       case nir_intrinsic_atomic_counter_min_deref:
1301       case nir_intrinsic_atomic_counter_max_deref:
1302       case nir_intrinsic_atomic_counter_and_deref:
1303       case nir_intrinsic_atomic_counter_or_deref:
1304       case nir_intrinsic_atomic_counter_xor_deref:
1305       case nir_intrinsic_atomic_counter_exchange_deref:
1306       case nir_intrinsic_atomic_counter_comp_swap_deref: {
1307          /* Set the counter variable dereference. */
1308          exec_node *param = ir->actual_parameters.get_head();
1309          ir_dereference *counter = (ir_dereference *)param;
1310 
1311          instr->src[0] = nir_src_for_ssa(&evaluate_deref(counter)->dest.ssa);
1312          param = param->get_next();
1313 
1314          /* Set the intrinsic destination. */
1315          if (ir->return_deref) {
1316             nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
1317          }
1318 
1319          /* Set the intrinsic parameters. */
1320          if (!param->is_tail_sentinel()) {
1321             instr->src[1] =
1322                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1323             param = param->get_next();
1324          }
1325 
1326          if (!param->is_tail_sentinel()) {
1327             instr->src[2] =
1328                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1329             param = param->get_next();
1330          }
1331 
1332          nir_builder_instr_insert(&b, &instr->instr);
1333          break;
1334       }
1335       case nir_intrinsic_image_deref_load:
1336       case nir_intrinsic_image_deref_store:
1337       case nir_intrinsic_image_deref_atomic_add:
1338       case nir_intrinsic_image_deref_atomic_imin:
1339       case nir_intrinsic_image_deref_atomic_umin:
1340       case nir_intrinsic_image_deref_atomic_imax:
1341       case nir_intrinsic_image_deref_atomic_umax:
1342       case nir_intrinsic_image_deref_atomic_and:
1343       case nir_intrinsic_image_deref_atomic_or:
1344       case nir_intrinsic_image_deref_atomic_xor:
1345       case nir_intrinsic_image_deref_atomic_exchange:
1346       case nir_intrinsic_image_deref_atomic_comp_swap:
1347       case nir_intrinsic_image_deref_atomic_fadd:
1348       case nir_intrinsic_image_deref_samples:
1349       case nir_intrinsic_image_deref_size:
1350       case nir_intrinsic_image_deref_atomic_inc_wrap:
1351       case nir_intrinsic_image_deref_atomic_dec_wrap:
1352       case nir_intrinsic_image_deref_sparse_load: {
1353          /* Set the image variable dereference. */
1354          exec_node *param = ir->actual_parameters.get_head();
1355          ir_dereference *image = (ir_dereference *)param;
1356          nir_deref_instr *deref = evaluate_deref(image);
1357          const glsl_type *type = deref->type;
1358 
1359          nir_intrinsic_set_access(instr, deref_get_qualifier(deref));
1360 
1361          instr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
1362          param = param->get_next();
1363          nir_intrinsic_set_image_dim(instr,
1364             (glsl_sampler_dim)type->sampler_dimensionality);
1365          nir_intrinsic_set_image_array(instr, type->sampler_array);
1366 
1367          /* Set the intrinsic destination. */
1368          if (ir->return_deref) {
1369             unsigned num_components;
1370             if (op == nir_intrinsic_image_deref_sparse_load) {
1371                const glsl_type *dest_type =
1372                   ir->return_deref->type->field_type("texel");
1373                /* One extra component to hold residency code. */
1374                num_components = dest_type->vector_elements + 1;
1375             } else
1376                num_components = ir->return_deref->type->vector_elements;
1377 
1378             nir_ssa_dest_init(&instr->instr, &instr->dest,
1379                               num_components, 32, NULL);
1380          }
1381 
1382          if (op == nir_intrinsic_image_deref_size) {
1383             instr->num_components = instr->dest.ssa.num_components;
1384          } else if (op == nir_intrinsic_image_deref_load ||
1385                     op == nir_intrinsic_image_deref_sparse_load) {
1386             instr->num_components = instr->dest.ssa.num_components;
1387             nir_intrinsic_set_dest_type(instr,
1388                nir_get_nir_type_for_glsl_base_type(type->sampled_type));
1389          } else if (op == nir_intrinsic_image_deref_store) {
1390             instr->num_components = 4;
1391             nir_intrinsic_set_src_type(instr,
1392                nir_get_nir_type_for_glsl_base_type(type->sampled_type));
1393          }
1394 
1395          if (op == nir_intrinsic_image_deref_size ||
1396              op == nir_intrinsic_image_deref_samples) {
1397             /* image_deref_size takes an LOD parameter which is always 0
1398              * coming from GLSL.
1399              */
1400             if (op == nir_intrinsic_image_deref_size)
1401                instr->src[1] = nir_src_for_ssa(nir_imm_int(&b, 0));
1402             nir_builder_instr_insert(&b, &instr->instr);
1403             break;
1404          }
1405 
1406          /* Set the address argument, extending the coordinate vector to four
1407           * components.
1408           */
1409          nir_ssa_def *src_addr =
1410             evaluate_rvalue((ir_dereference *)param);
1411          nir_ssa_def *srcs[4];
1412 
1413          for (int i = 0; i < 4; i++) {
1414             if (i < type->coordinate_components())
1415                srcs[i] = nir_channel(&b, src_addr, i);
1416             else
1417                srcs[i] = nir_ssa_undef(&b, 1, 32);
1418          }
1419 
1420          instr->src[1] = nir_src_for_ssa(nir_vec(&b, srcs, 4));
1421          param = param->get_next();
1422 
1423          /* Set the sample argument, which is undefined for single-sample
1424           * images.
1425           */
1426          if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
1427             instr->src[2] =
1428                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1429             param = param->get_next();
1430          } else {
1431             instr->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1432          }
1433 
1434          /* Set the intrinsic parameters. */
1435          if (!param->is_tail_sentinel()) {
1436             instr->src[3] =
1437                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1438             param = param->get_next();
1439          } else if (op == nir_intrinsic_image_deref_load ||
1440                     op == nir_intrinsic_image_deref_sparse_load) {
1441             instr->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0)); /* LOD */
1442          }
1443 
1444          if (!param->is_tail_sentinel()) {
1445             instr->src[4] =
1446                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
1447             param = param->get_next();
1448          } else if (op == nir_intrinsic_image_deref_store) {
1449             instr->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0)); /* LOD */
1450          }
1451 
1452          nir_builder_instr_insert(&b, &instr->instr);
1453          break;
1454       }
1455       case nir_intrinsic_memory_barrier:
1456       case nir_intrinsic_group_memory_barrier:
1457       case nir_intrinsic_memory_barrier_atomic_counter:
1458       case nir_intrinsic_memory_barrier_buffer:
1459       case nir_intrinsic_memory_barrier_image:
1460       case nir_intrinsic_memory_barrier_shared:
1461          nir_builder_instr_insert(&b, &instr->instr);
1462          break;
1463       case nir_intrinsic_shader_clock:
1464          nir_ssa_dest_init(&instr->instr, &instr->dest, 2, 32, NULL);
1465          nir_intrinsic_set_memory_scope(instr, NIR_SCOPE_SUBGROUP);
1466          nir_builder_instr_insert(&b, &instr->instr);
1467          break;
1468       case nir_intrinsic_begin_invocation_interlock:
1469          nir_builder_instr_insert(&b, &instr->instr);
1470          break;
1471       case nir_intrinsic_end_invocation_interlock:
1472          nir_builder_instr_insert(&b, &instr->instr);
1473          break;
1474       case nir_intrinsic_store_ssbo: {
1475          exec_node *param = ir->actual_parameters.get_head();
1476          ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
1477 
1478          param = param->get_next();
1479          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
1480 
1481          param = param->get_next();
1482          ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
1483 
1484          param = param->get_next();
1485          ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
1486          assert(write_mask);
1487 
1488          nir_ssa_def *nir_val = evaluate_rvalue(val);
1489          if (val->type->is_boolean())
1490             nir_val = nir_b2i32(&b, nir_val);
1491 
1492          instr->src[0] = nir_src_for_ssa(nir_val);
1493          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
1494          instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
1495          intrinsic_set_std430_align(instr, val->type);
1496          nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
1497          instr->num_components = val->type->vector_elements;
1498 
1499          nir_builder_instr_insert(&b, &instr->instr);
1500          break;
1501       }
1502       case nir_intrinsic_load_shared: {
1503          exec_node *param = ir->actual_parameters.get_head();
1504          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
1505 
1506          nir_intrinsic_set_base(instr, 0);
1507          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset));
1508 
1509          const glsl_type *type = ir->return_deref->var->type;
1510          instr->num_components = type->vector_elements;
1511          intrinsic_set_std430_align(instr, type);
1512 
1513          /* Setup destination register */
1514          unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);
1515          nir_ssa_dest_init(&instr->instr, &instr->dest,
1516                            type->vector_elements, bit_size, NULL);
1517 
1518          nir_builder_instr_insert(&b, &instr->instr);
1519 
1520          /* The value in shared memory is a 32-bit value */
1521          if (type->is_boolean())
1522             ret = nir_b2b1(&b, &instr->dest.ssa);
1523          break;
1524       }
1525       case nir_intrinsic_store_shared: {
1526          exec_node *param = ir->actual_parameters.get_head();
1527          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
1528 
1529          param = param->get_next();
1530          ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
1531 
1532          param = param->get_next();
1533          ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
1534          assert(write_mask);
1535 
1536          nir_intrinsic_set_base(instr, 0);
1537          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
1538 
1539          nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
1540 
1541          nir_ssa_def *nir_val = evaluate_rvalue(val);
1542          /* The value in shared memory is a 32-bit value */
1543          if (val->type->is_boolean())
1544             nir_val = nir_b2b32(&b, nir_val);
1545 
1546          instr->src[0] = nir_src_for_ssa(nir_val);
1547          instr->num_components = val->type->vector_elements;
1548          intrinsic_set_std430_align(instr, val->type);
1549 
1550          nir_builder_instr_insert(&b, &instr->instr);
1551          break;
1552       }
1553       case nir_intrinsic_shared_atomic_add:
1554       case nir_intrinsic_shared_atomic_imin:
1555       case nir_intrinsic_shared_atomic_umin:
1556       case nir_intrinsic_shared_atomic_imax:
1557       case nir_intrinsic_shared_atomic_umax:
1558       case nir_intrinsic_shared_atomic_and:
1559       case nir_intrinsic_shared_atomic_or:
1560       case nir_intrinsic_shared_atomic_xor:
1561       case nir_intrinsic_shared_atomic_exchange:
1562       case nir_intrinsic_shared_atomic_comp_swap:
1563       case nir_intrinsic_shared_atomic_fadd:
1564       case nir_intrinsic_shared_atomic_fmin:
1565       case nir_intrinsic_shared_atomic_fmax:
1566       case nir_intrinsic_shared_atomic_fcomp_swap:  {
1567          int param_count = ir->actual_parameters.length();
1568          assert(param_count == 2 || param_count == 3);
1569 
1570          /* Offset */
1571          exec_node *param = ir->actual_parameters.get_head();
1572          ir_instruction *inst = (ir_instruction *) param;
1573          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
1574 
1575          /* data1 parameter (this is always present) */
1576          param = param->get_next();
1577          inst = (ir_instruction *) param;
1578          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
1579 
1580          /* data2 parameter (only with atomic_comp_swap) */
1581          if (param_count == 3) {
1582             assert(op == nir_intrinsic_shared_atomic_comp_swap ||
1583                    op == nir_intrinsic_shared_atomic_fcomp_swap);
1584             param = param->get_next();
1585             inst = (ir_instruction *) param;
1586             instr->src[2] =
1587                nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
1588          }
1589 
1590          /* Atomic result */
1591          assert(ir->return_deref);
1592          unsigned bit_size = glsl_get_bit_size(ir->return_deref->type);
1593          nir_ssa_dest_init(&instr->instr, &instr->dest,
1594                            ir->return_deref->type->vector_elements,
1595                            bit_size, NULL);
1596          nir_builder_instr_insert(&b, &instr->instr);
1597          break;
1598       }
1599       case nir_intrinsic_vote_ieq:
1600          instr->num_components = 1;
1601          FALLTHROUGH;
1602       case nir_intrinsic_vote_any:
1603       case nir_intrinsic_vote_all: {
1604          nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL);
1605 
1606          ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
1607          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
1608 
1609          nir_builder_instr_insert(&b, &instr->instr);
1610          break;
1611       }
1612 
1613       case nir_intrinsic_ballot: {
1614          nir_ssa_dest_init(&instr->instr, &instr->dest,
1615                            ir->return_deref->type->vector_elements, 64, NULL);
1616          instr->num_components = ir->return_deref->type->vector_elements;
1617 
1618          ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
1619          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
1620 
1621          nir_builder_instr_insert(&b, &instr->instr);
1622          break;
1623       }
1624       case nir_intrinsic_read_invocation: {
1625          nir_ssa_dest_init(&instr->instr, &instr->dest,
1626                            ir->return_deref->type->vector_elements, 32, NULL);
1627          instr->num_components = ir->return_deref->type->vector_elements;
1628 
1629          ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
1630          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
1631 
1632          ir_rvalue *invocation = (ir_rvalue *) ir->actual_parameters.get_head()->next;
1633          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(invocation));
1634 
1635          nir_builder_instr_insert(&b, &instr->instr);
1636          break;
1637       }
1638       case nir_intrinsic_read_first_invocation: {
1639          nir_ssa_dest_init(&instr->instr, &instr->dest,
1640                            ir->return_deref->type->vector_elements, 32, NULL);
1641          instr->num_components = ir->return_deref->type->vector_elements;
1642 
1643          ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
1644          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
1645 
1646          nir_builder_instr_insert(&b, &instr->instr);
1647          break;
1648       }
1649       case nir_intrinsic_is_helper_invocation: {
1650          nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL);
1651          nir_builder_instr_insert(&b, &instr->instr);
1652          break;
1653       }
1654       case nir_intrinsic_is_sparse_texels_resident: {
1655          nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL);
1656 
1657          ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
1658          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
1659 
1660          nir_builder_instr_insert(&b, &instr->instr);
1661          break;
1662       }
1663       default:
1664          unreachable("not reached");
1665       }
1666 
1667       if (ir->return_deref) {
1668          nir_deref_instr *ret_deref = evaluate_deref(ir->return_deref);
1669 
1670          if (op == nir_intrinsic_image_deref_sparse_load)
1671             adjust_sparse_variable(ret_deref, ir->return_deref->type, ret);
1672 
1673          nir_store_deref(&b, ret_deref, ret, ~0);
1674       }
1675 
1676       return;
1677    }
1678 
1679    struct hash_entry *entry =
1680       _mesa_hash_table_search(this->overload_table, ir->callee);
1681    assert(entry);
1682    nir_function *callee = (nir_function *) entry->data;
1683 
1684    nir_call_instr *call = nir_call_instr_create(this->shader, callee);
1685 
1686    unsigned i = 0;
1687    nir_deref_instr *ret_deref = NULL;
1688    if (ir->return_deref) {
1689       nir_variable *ret_tmp =
1690          nir_local_variable_create(this->impl, ir->return_deref->type,
1691                                    "return_tmp");
1692       ret_deref = nir_build_deref_var(&b, ret_tmp);
1693       call->params[i++] = nir_src_for_ssa(&ret_deref->dest.ssa);
1694    }
1695 
1696    foreach_two_lists(formal_node, &ir->callee->parameters,
1697                      actual_node, &ir->actual_parameters) {
1698       ir_rvalue *param_rvalue = (ir_rvalue *) actual_node;
1699       ir_variable *sig_param = (ir_variable *) formal_node;
1700 
1701       if (sig_param->data.mode == ir_var_function_out) {
1702          nir_deref_instr *out_deref = evaluate_deref(param_rvalue);
1703          call->params[i] = nir_src_for_ssa(&out_deref->dest.ssa);
1704       } else if (sig_param->data.mode == ir_var_function_in) {
1705          nir_ssa_def *val = evaluate_rvalue(param_rvalue);
1706          nir_src src = nir_src_for_ssa(val);
1707 
1708          nir_src_copy(&call->params[i], &src);
1709       } else if (sig_param->data.mode == ir_var_function_inout) {
1710          unreachable("unimplemented: inout parameters");
1711       }
1712 
1713       i++;
1714    }
1715 
1716    nir_builder_instr_insert(&b, &call->instr);
1717 
1718    if (ir->return_deref)
1719       nir_store_deref(&b, evaluate_deref(ir->return_deref), nir_load_deref(&b, ret_deref), ~0);
1720 }
1721 
1722 void
visit(ir_assignment * ir)1723 nir_visitor::visit(ir_assignment *ir)
1724 {
1725    unsigned num_components = ir->lhs->type->vector_elements;
1726    unsigned write_mask = ir->write_mask;
1727 
1728    b.exact = ir->lhs->variable_referenced()->data.invariant ||
1729              ir->lhs->variable_referenced()->data.precise;
1730 
1731    if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) &&
1732        (write_mask == BITFIELD_MASK(num_components) || write_mask == 0)) {
1733       nir_deref_instr *lhs = evaluate_deref(ir->lhs);
1734       nir_deref_instr *rhs = evaluate_deref(ir->rhs);
1735       enum gl_access_qualifier lhs_qualifiers = deref_get_qualifier(lhs);
1736       enum gl_access_qualifier rhs_qualifiers = deref_get_qualifier(rhs);
1737 
1738       nir_copy_deref_with_access(&b, lhs, rhs, lhs_qualifiers,
1739                                  rhs_qualifiers);
1740       return;
1741    }
1742 
1743    ir_texture *tex = ir->rhs->as_texture();
1744    bool is_sparse = tex && tex->is_sparse;
1745 
1746    if (!is_sparse)
1747       assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector());
1748 
1749    ir->lhs->accept(this);
1750    nir_deref_instr *lhs_deref = this->deref;
1751    nir_ssa_def *src = evaluate_rvalue(ir->rhs);
1752 
1753    if (is_sparse) {
1754       adjust_sparse_variable(lhs_deref, tex->type, src);
1755 
1756       /* correct component and mask because they are 0 for struct */
1757       num_components = src->num_components;
1758       write_mask = BITFIELD_MASK(num_components);
1759    }
1760 
1761    if (write_mask != BITFIELD_MASK(num_components) && write_mask != 0) {
1762       /* GLSL IR will give us the input to the write-masked assignment in a
1763        * single packed vector.  So, for example, if the writemask is xzw, then
1764        * we have to swizzle x -> x, y -> z, and z -> w and get the y component
1765        * from the load.
1766        */
1767       unsigned swiz[4];
1768       unsigned component = 0;
1769       for (unsigned i = 0; i < 4; i++) {
1770          swiz[i] = write_mask & (1 << i) ? component++ : 0;
1771       }
1772       src = nir_swizzle(&b, src, swiz, num_components);
1773    }
1774 
1775    enum gl_access_qualifier qualifiers = deref_get_qualifier(lhs_deref);
1776 
1777    nir_store_deref_with_access(&b, lhs_deref, src, write_mask,
1778                                qualifiers);
1779 }
1780 
1781 /*
1782  * Given an instruction, returns a pointer to its destination or NULL if there
1783  * is no destination.
1784  *
1785  * Note that this only handles instructions we generate at this level.
1786  */
1787 static nir_dest *
get_instr_dest(nir_instr * instr)1788 get_instr_dest(nir_instr *instr)
1789 {
1790    nir_alu_instr *alu_instr;
1791    nir_intrinsic_instr *intrinsic_instr;
1792    nir_tex_instr *tex_instr;
1793 
1794    switch (instr->type) {
1795       case nir_instr_type_alu:
1796          alu_instr = nir_instr_as_alu(instr);
1797          return &alu_instr->dest.dest;
1798 
1799       case nir_instr_type_intrinsic:
1800          intrinsic_instr = nir_instr_as_intrinsic(instr);
1801          if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest)
1802             return &intrinsic_instr->dest;
1803          else
1804             return NULL;
1805 
1806       case nir_instr_type_tex:
1807          tex_instr = nir_instr_as_tex(instr);
1808          return &tex_instr->dest;
1809 
1810       default:
1811          unreachable("not reached");
1812    }
1813 
1814    return NULL;
1815 }
1816 
1817 void
add_instr(nir_instr * instr,unsigned num_components,unsigned bit_size)1818 nir_visitor::add_instr(nir_instr *instr, unsigned num_components,
1819                        unsigned bit_size)
1820 {
1821    nir_dest *dest = get_instr_dest(instr);
1822 
1823    if (dest)
1824       nir_ssa_dest_init(instr, dest, num_components, bit_size, NULL);
1825 
1826    nir_builder_instr_insert(&b, instr);
1827 
1828    if (dest) {
1829       assert(dest->is_ssa);
1830       this->result = &dest->ssa;
1831    }
1832 }
1833 
1834 nir_ssa_def *
evaluate_rvalue(ir_rvalue * ir)1835 nir_visitor::evaluate_rvalue(ir_rvalue* ir)
1836 {
1837    ir->accept(this);
1838    if (ir->as_dereference() || ir->as_constant()) {
1839       /*
1840        * A dereference is being used on the right hand side, which means we
1841        * must emit a variable load.
1842        */
1843 
1844       enum gl_access_qualifier access = deref_get_qualifier(this->deref);
1845       this->result = nir_load_deref_with_access(&b, this->deref, access);
1846 
1847       if (nir_deref_mode_is(this->deref, nir_var_shader_out))
1848          this->has_output_rvalue = true;
1849    }
1850 
1851    return this->result;
1852 }
1853 
1854 static bool
type_is_float(glsl_base_type type)1855 type_is_float(glsl_base_type type)
1856 {
1857    return type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_DOUBLE ||
1858       type == GLSL_TYPE_FLOAT16;
1859 }
1860 
1861 static bool
type_is_signed(glsl_base_type type)1862 type_is_signed(glsl_base_type type)
1863 {
1864    return type == GLSL_TYPE_INT || type == GLSL_TYPE_INT64 ||
1865       type == GLSL_TYPE_INT16;
1866 }
1867 
1868 void
visit(ir_expression * ir)1869 nir_visitor::visit(ir_expression *ir)
1870 {
1871    /* Some special cases */
1872    switch (ir->operation) {
1873    case ir_unop_interpolate_at_centroid:
1874    case ir_binop_interpolate_at_offset:
1875    case ir_binop_interpolate_at_sample: {
1876       ir_dereference *deref = ir->operands[0]->as_dereference();
1877       ir_swizzle *swizzle = NULL;
1878       if (!deref) {
1879          /* the api does not allow a swizzle here, but the varying packing code
1880           * may have pushed one into here.
1881           */
1882          swizzle = ir->operands[0]->as_swizzle();
1883          assert(swizzle);
1884          deref = swizzle->val->as_dereference();
1885          assert(deref);
1886       }
1887 
1888       deref->accept(this);
1889 
1890       nir_intrinsic_op op;
1891       if (nir_deref_mode_is(this->deref, nir_var_shader_in)) {
1892          switch (ir->operation) {
1893          case ir_unop_interpolate_at_centroid:
1894             op = nir_intrinsic_interp_deref_at_centroid;
1895             break;
1896          case ir_binop_interpolate_at_offset:
1897             op = nir_intrinsic_interp_deref_at_offset;
1898             break;
1899          case ir_binop_interpolate_at_sample:
1900             op = nir_intrinsic_interp_deref_at_sample;
1901             break;
1902          default:
1903             unreachable("Invalid interpolation intrinsic");
1904          }
1905       } else {
1906          /* This case can happen if the vertex shader does not write the
1907           * given varying.  In this case, the linker will lower it to a
1908           * global variable.  Since interpolating a variable makes no
1909           * sense, we'll just turn it into a load which will probably
1910           * eventually end up as an SSA definition.
1911           */
1912          assert(nir_deref_mode_is(this->deref, nir_var_shader_temp));
1913          op = nir_intrinsic_load_deref;
1914       }
1915 
1916       nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op);
1917       intrin->num_components = deref->type->vector_elements;
1918       intrin->src[0] = nir_src_for_ssa(&this->deref->dest.ssa);
1919 
1920       if (intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
1921           intrin->intrinsic == nir_intrinsic_interp_deref_at_sample)
1922          intrin->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
1923 
1924       unsigned bit_size =  glsl_get_bit_size(deref->type);
1925       add_instr(&intrin->instr, deref->type->vector_elements, bit_size);
1926 
1927       if (swizzle) {
1928          unsigned swiz[4] = {
1929             swizzle->mask.x, swizzle->mask.y, swizzle->mask.z, swizzle->mask.w
1930          };
1931 
1932          result = nir_swizzle(&b, result, swiz,
1933                               swizzle->type->vector_elements);
1934       }
1935 
1936       return;
1937    }
1938 
1939    case ir_unop_ssbo_unsized_array_length: {
1940       nir_intrinsic_instr *intrin =
1941          nir_intrinsic_instr_create(b.shader,
1942                                     nir_intrinsic_deref_buffer_array_length);
1943 
1944       ir_dereference *deref = ir->operands[0]->as_dereference();
1945       intrin->src[0] = nir_src_for_ssa(&evaluate_deref(deref)->dest.ssa);
1946 
1947       add_instr(&intrin->instr, 1, 32);
1948       return;
1949    }
1950 
1951    case ir_binop_ubo_load:
1952       /* UBO loads should only have been lowered in GLSL IR for non-nir drivers,
1953        * NIR drivers make use of gl_nir_lower_buffers() instead.
1954        */
1955       unreachable("Invalid operation nir doesn't want lowered ubo loads");
1956    default:
1957       break;
1958    }
1959 
1960    nir_ssa_def *srcs[4];
1961    for (unsigned i = 0; i < ir->num_operands; i++)
1962       srcs[i] = evaluate_rvalue(ir->operands[i]);
1963 
1964    glsl_base_type types[4];
1965    for (unsigned i = 0; i < ir->num_operands; i++)
1966       types[i] = ir->operands[i]->type->base_type;
1967 
1968    glsl_base_type out_type = ir->type->base_type;
1969 
1970    switch (ir->operation) {
1971    case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break;
1972    case ir_unop_logic_not:
1973       result = nir_inot(&b, srcs[0]);
1974       break;
1975    case ir_unop_neg:
1976       result = type_is_float(types[0]) ? nir_fneg(&b, srcs[0])
1977                                        : nir_ineg(&b, srcs[0]);
1978       break;
1979    case ir_unop_abs:
1980       result = type_is_float(types[0]) ? nir_fabs(&b, srcs[0])
1981                                        : nir_iabs(&b, srcs[0]);
1982       break;
1983    case ir_unop_clz:
1984       result = nir_uclz(&b, srcs[0]);
1985       break;
1986    case ir_unop_saturate:
1987       assert(type_is_float(types[0]));
1988       result = nir_fsat(&b, srcs[0]);
1989       break;
1990    case ir_unop_sign:
1991       result = type_is_float(types[0]) ? nir_fsign(&b, srcs[0])
1992                                        : nir_isign(&b, srcs[0]);
1993       break;
1994    case ir_unop_rcp:  result = nir_frcp(&b, srcs[0]);  break;
1995    case ir_unop_rsq:  result = nir_frsq(&b, srcs[0]);  break;
1996    case ir_unop_sqrt: result = nir_fsqrt(&b, srcs[0]); break;
1997    case ir_unop_exp:  result = nir_fexp2(&b, nir_fmul_imm(&b, srcs[0], M_LOG2E)); break;
1998    case ir_unop_log:  result = nir_fmul_imm(&b, nir_flog2(&b, srcs[0]), 1.0 / M_LOG2E); break;
1999    case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break;
2000    case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break;
2001    case ir_unop_i2f:
2002    case ir_unop_u2f:
2003    case ir_unop_b2f:
2004    case ir_unop_f2i:
2005    case ir_unop_f2u:
2006    case ir_unop_f2b:
2007    case ir_unop_i2b:
2008    case ir_unop_b2i:
2009    case ir_unop_b2i64:
2010    case ir_unop_d2f:
2011    case ir_unop_f2d:
2012    case ir_unop_f162f:
2013    case ir_unop_f2f16:
2014    case ir_unop_f162b:
2015    case ir_unop_b2f16:
2016    case ir_unop_i2i:
2017    case ir_unop_u2u:
2018    case ir_unop_d2i:
2019    case ir_unop_d2u:
2020    case ir_unop_d2b:
2021    case ir_unop_i2d:
2022    case ir_unop_u2d:
2023    case ir_unop_i642i:
2024    case ir_unop_i642u:
2025    case ir_unop_i642f:
2026    case ir_unop_i642b:
2027    case ir_unop_i642d:
2028    case ir_unop_u642i:
2029    case ir_unop_u642u:
2030    case ir_unop_u642f:
2031    case ir_unop_u642d:
2032    case ir_unop_i2i64:
2033    case ir_unop_u2i64:
2034    case ir_unop_f2i64:
2035    case ir_unop_d2i64:
2036    case ir_unop_i2u64:
2037    case ir_unop_u2u64:
2038    case ir_unop_f2u64:
2039    case ir_unop_d2u64:
2040    case ir_unop_i2u:
2041    case ir_unop_u2i:
2042    case ir_unop_i642u64:
2043    case ir_unop_u642i64: {
2044       nir_alu_type src_type = nir_get_nir_type_for_glsl_base_type(types[0]);
2045       nir_alu_type dst_type = nir_get_nir_type_for_glsl_base_type(out_type);
2046       result = nir_build_alu(&b, nir_type_conversion_op(src_type, dst_type,
2047                                  nir_rounding_mode_undef),
2048                                  srcs[0], NULL, NULL, NULL);
2049       /* b2i and b2f don't have fixed bit-size versions so the builder will
2050        * just assume 32 and we have to fix it up here.
2051        */
2052       result->bit_size = nir_alu_type_get_type_size(dst_type);
2053       break;
2054    }
2055 
2056    case ir_unop_f2fmp: {
2057       result = nir_build_alu(&b, nir_op_f2fmp, srcs[0], NULL, NULL, NULL);
2058       break;
2059    }
2060 
2061    case ir_unop_i2imp: {
2062       result = nir_build_alu(&b, nir_op_i2imp, srcs[0], NULL, NULL, NULL);
2063       break;
2064    }
2065 
2066    case ir_unop_u2ump: {
2067       result = nir_build_alu(&b, nir_op_i2imp, srcs[0], NULL, NULL, NULL);
2068       break;
2069    }
2070 
2071    case ir_unop_bitcast_i2f:
2072    case ir_unop_bitcast_f2i:
2073    case ir_unop_bitcast_u2f:
2074    case ir_unop_bitcast_f2u:
2075    case ir_unop_bitcast_i642d:
2076    case ir_unop_bitcast_d2i64:
2077    case ir_unop_bitcast_u642d:
2078    case ir_unop_bitcast_d2u64:
2079    case ir_unop_subroutine_to_int:
2080       /* no-op */
2081       result = nir_mov(&b, srcs[0]);
2082       break;
2083    case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break;
2084    case ir_unop_ceil:  result = nir_fceil(&b, srcs[0]); break;
2085    case ir_unop_floor: result = nir_ffloor(&b, srcs[0]); break;
2086    case ir_unop_fract: result = nir_ffract(&b, srcs[0]); break;
2087    case ir_unop_frexp_exp: result = nir_frexp_exp(&b, srcs[0]); break;
2088    case ir_unop_frexp_sig: result = nir_frexp_sig(&b, srcs[0]); break;
2089    case ir_unop_round_even: result = nir_fround_even(&b, srcs[0]); break;
2090    case ir_unop_sin:   result = nir_fsin(&b, srcs[0]); break;
2091    case ir_unop_cos:   result = nir_fcos(&b, srcs[0]); break;
2092    case ir_unop_dFdx:        result = nir_fddx(&b, srcs[0]); break;
2093    case ir_unop_dFdy:        result = nir_fddy(&b, srcs[0]); break;
2094    case ir_unop_dFdx_fine:   result = nir_fddx_fine(&b, srcs[0]); break;
2095    case ir_unop_dFdy_fine:   result = nir_fddy_fine(&b, srcs[0]); break;
2096    case ir_unop_dFdx_coarse: result = nir_fddx_coarse(&b, srcs[0]); break;
2097    case ir_unop_dFdy_coarse: result = nir_fddy_coarse(&b, srcs[0]); break;
2098    case ir_unop_pack_snorm_2x16:
2099       result = nir_pack_snorm_2x16(&b, srcs[0]);
2100       break;
2101    case ir_unop_pack_snorm_4x8:
2102       result = nir_pack_snorm_4x8(&b, srcs[0]);
2103       break;
2104    case ir_unop_pack_unorm_2x16:
2105       result = nir_pack_unorm_2x16(&b, srcs[0]);
2106       break;
2107    case ir_unop_pack_unorm_4x8:
2108       result = nir_pack_unorm_4x8(&b, srcs[0]);
2109       break;
2110    case ir_unop_pack_half_2x16:
2111       result = nir_pack_half_2x16(&b, srcs[0]);
2112       break;
2113    case ir_unop_unpack_snorm_2x16:
2114       result = nir_unpack_snorm_2x16(&b, srcs[0]);
2115       break;
2116    case ir_unop_unpack_snorm_4x8:
2117       result = nir_unpack_snorm_4x8(&b, srcs[0]);
2118       break;
2119    case ir_unop_unpack_unorm_2x16:
2120       result = nir_unpack_unorm_2x16(&b, srcs[0]);
2121       break;
2122    case ir_unop_unpack_unorm_4x8:
2123       result = nir_unpack_unorm_4x8(&b, srcs[0]);
2124       break;
2125    case ir_unop_unpack_half_2x16:
2126       result = nir_unpack_half_2x16(&b, srcs[0]);
2127       break;
2128    case ir_unop_pack_sampler_2x32:
2129    case ir_unop_pack_image_2x32:
2130    case ir_unop_pack_double_2x32:
2131    case ir_unop_pack_int_2x32:
2132    case ir_unop_pack_uint_2x32:
2133       result = nir_pack_64_2x32(&b, srcs[0]);
2134       break;
2135    case ir_unop_unpack_sampler_2x32:
2136    case ir_unop_unpack_image_2x32:
2137    case ir_unop_unpack_double_2x32:
2138    case ir_unop_unpack_int_2x32:
2139    case ir_unop_unpack_uint_2x32:
2140       result = nir_unpack_64_2x32(&b, srcs[0]);
2141       break;
2142    case ir_unop_bitfield_reverse:
2143       result = nir_bitfield_reverse(&b, srcs[0]);
2144       break;
2145    case ir_unop_bit_count:
2146       result = nir_bit_count(&b, srcs[0]);
2147       break;
2148    case ir_unop_find_msb:
2149       switch (types[0]) {
2150       case GLSL_TYPE_UINT:
2151          result = nir_ufind_msb(&b, srcs[0]);
2152          break;
2153       case GLSL_TYPE_INT:
2154          result = nir_ifind_msb(&b, srcs[0]);
2155          break;
2156       default:
2157          unreachable("Invalid type for findMSB()");
2158       }
2159       break;
2160    case ir_unop_find_lsb:
2161       result = nir_find_lsb(&b, srcs[0]);
2162       break;
2163 
2164    case ir_unop_get_buffer_size: {
2165       nir_intrinsic_instr *load = nir_intrinsic_instr_create(
2166          this->shader,
2167          nir_intrinsic_get_ssbo_size);
2168       load->num_components = ir->type->vector_elements;
2169       load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
2170       unsigned bit_size = glsl_get_bit_size(ir->type);
2171       add_instr(&load->instr, ir->type->vector_elements, bit_size);
2172       return;
2173    }
2174 
2175    case ir_unop_atan:
2176       result = nir_atan(&b, srcs[0]);
2177       break;
2178 
2179    case ir_binop_add:
2180       result = type_is_float(out_type) ? nir_fadd(&b, srcs[0], srcs[1])
2181                                        : nir_iadd(&b, srcs[0], srcs[1]);
2182       break;
2183    case ir_binop_add_sat:
2184       result = type_is_signed(out_type) ? nir_iadd_sat(&b, srcs[0], srcs[1])
2185                                         : nir_uadd_sat(&b, srcs[0], srcs[1]);
2186       break;
2187    case ir_binop_sub:
2188       result = type_is_float(out_type) ? nir_fsub(&b, srcs[0], srcs[1])
2189                                        : nir_isub(&b, srcs[0], srcs[1]);
2190       break;
2191    case ir_binop_sub_sat:
2192       result = type_is_signed(out_type) ? nir_isub_sat(&b, srcs[0], srcs[1])
2193                                         : nir_usub_sat(&b, srcs[0], srcs[1]);
2194       break;
2195    case ir_binop_abs_sub:
2196       /* out_type is always unsigned for ir_binop_abs_sub, so we have to key
2197        * on the type of the sources.
2198        */
2199       result = type_is_signed(types[0]) ? nir_uabs_isub(&b, srcs[0], srcs[1])
2200                                         : nir_uabs_usub(&b, srcs[0], srcs[1]);
2201       break;
2202    case ir_binop_avg:
2203       result = type_is_signed(out_type) ? nir_ihadd(&b, srcs[0], srcs[1])
2204                                         : nir_uhadd(&b, srcs[0], srcs[1]);
2205       break;
2206    case ir_binop_avg_round:
2207       result = type_is_signed(out_type) ? nir_irhadd(&b, srcs[0], srcs[1])
2208                                         : nir_urhadd(&b, srcs[0], srcs[1]);
2209       break;
2210    case ir_binop_mul_32x16:
2211       result = type_is_signed(out_type) ? nir_imul_32x16(&b, srcs[0], srcs[1])
2212                                         : nir_umul_32x16(&b, srcs[0], srcs[1]);
2213       break;
2214    case ir_binop_mul:
2215       if (type_is_float(out_type))
2216          result = nir_fmul(&b, srcs[0], srcs[1]);
2217       else if (out_type == GLSL_TYPE_INT64 &&
2218                (ir->operands[0]->type->base_type == GLSL_TYPE_INT ||
2219                 ir->operands[1]->type->base_type == GLSL_TYPE_INT))
2220          result = nir_imul_2x32_64(&b, srcs[0], srcs[1]);
2221       else if (out_type == GLSL_TYPE_UINT64 &&
2222                (ir->operands[0]->type->base_type == GLSL_TYPE_UINT ||
2223                 ir->operands[1]->type->base_type == GLSL_TYPE_UINT))
2224          result = nir_umul_2x32_64(&b, srcs[0], srcs[1]);
2225       else
2226          result = nir_imul(&b, srcs[0], srcs[1]);
2227       break;
2228    case ir_binop_div:
2229       if (type_is_float(out_type))
2230          result = nir_fdiv(&b, srcs[0], srcs[1]);
2231       else if (type_is_signed(out_type))
2232          result = nir_idiv(&b, srcs[0], srcs[1]);
2233       else
2234          result = nir_udiv(&b, srcs[0], srcs[1]);
2235       break;
2236    case ir_binop_mod:
2237       result = type_is_float(out_type) ? nir_fmod(&b, srcs[0], srcs[1])
2238                                        : nir_umod(&b, srcs[0], srcs[1]);
2239       break;
2240    case ir_binop_min:
2241       if (type_is_float(out_type))
2242          result = nir_fmin(&b, srcs[0], srcs[1]);
2243       else if (type_is_signed(out_type))
2244          result = nir_imin(&b, srcs[0], srcs[1]);
2245       else
2246          result = nir_umin(&b, srcs[0], srcs[1]);
2247       break;
2248    case ir_binop_max:
2249       if (type_is_float(out_type))
2250          result = nir_fmax(&b, srcs[0], srcs[1]);
2251       else if (type_is_signed(out_type))
2252          result = nir_imax(&b, srcs[0], srcs[1]);
2253       else
2254          result = nir_umax(&b, srcs[0], srcs[1]);
2255       break;
2256    case ir_binop_pow: result = nir_fpow(&b, srcs[0], srcs[1]); break;
2257    case ir_binop_bit_and: result = nir_iand(&b, srcs[0], srcs[1]); break;
2258    case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break;
2259    case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;
2260    case ir_binop_logic_and:
2261       result = nir_iand(&b, srcs[0], srcs[1]);
2262       break;
2263    case ir_binop_logic_or:
2264       result = nir_ior(&b, srcs[0], srcs[1]);
2265       break;
2266    case ir_binop_logic_xor:
2267       result = nir_ixor(&b, srcs[0], srcs[1]);
2268       break;
2269    case ir_binop_lshift: result = nir_ishl(&b, srcs[0], nir_u2u32(&b, srcs[1])); break;
2270    case ir_binop_rshift:
2271       result = (type_is_signed(out_type)) ? nir_ishr(&b, srcs[0], nir_u2u32(&b, srcs[1]))
2272                                           : nir_ushr(&b, srcs[0], nir_u2u32(&b, srcs[1]));
2273       break;
2274    case ir_binop_imul_high:
2275       result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1])
2276                                            : nir_umul_high(&b, srcs[0], srcs[1]);
2277       break;
2278    case ir_binop_carry:  result = nir_uadd_carry(&b, srcs[0], srcs[1]);  break;
2279    case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break;
2280    case ir_binop_less:
2281       if (type_is_float(types[0]))
2282          result = nir_flt(&b, srcs[0], srcs[1]);
2283       else if (type_is_signed(types[0]))
2284          result = nir_ilt(&b, srcs[0], srcs[1]);
2285       else
2286          result = nir_ult(&b, srcs[0], srcs[1]);
2287       break;
2288    case ir_binop_gequal:
2289       if (type_is_float(types[0]))
2290          result = nir_fge(&b, srcs[0], srcs[1]);
2291       else if (type_is_signed(types[0]))
2292          result = nir_ige(&b, srcs[0], srcs[1]);
2293       else
2294          result = nir_uge(&b, srcs[0], srcs[1]);
2295       break;
2296    case ir_binop_equal:
2297       if (type_is_float(types[0]))
2298          result = nir_feq(&b, srcs[0], srcs[1]);
2299       else
2300          result = nir_ieq(&b, srcs[0], srcs[1]);
2301       break;
2302    case ir_binop_nequal:
2303       if (type_is_float(types[0]))
2304          result = nir_fneu(&b, srcs[0], srcs[1]);
2305       else
2306          result = nir_ine(&b, srcs[0], srcs[1]);
2307       break;
2308    case ir_binop_all_equal:
2309       if (type_is_float(types[0])) {
2310          switch (ir->operands[0]->type->vector_elements) {
2311             case 1: result = nir_feq(&b, srcs[0], srcs[1]); break;
2312             case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break;
2313             case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break;
2314             case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break;
2315             default:
2316                unreachable("not reached");
2317          }
2318       } else {
2319          switch (ir->operands[0]->type->vector_elements) {
2320             case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break;
2321             case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break;
2322             case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break;
2323             case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break;
2324             default:
2325                unreachable("not reached");
2326          }
2327       }
2328       break;
2329    case ir_binop_any_nequal:
2330       if (type_is_float(types[0])) {
2331          switch (ir->operands[0]->type->vector_elements) {
2332             case 1: result = nir_fneu(&b, srcs[0], srcs[1]); break;
2333             case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;
2334             case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break;
2335             case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break;
2336             default:
2337                unreachable("not reached");
2338          }
2339       } else {
2340          switch (ir->operands[0]->type->vector_elements) {
2341             case 1: result = nir_ine(&b, srcs[0], srcs[1]); break;
2342             case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break;
2343             case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break;
2344             case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break;
2345             default:
2346                unreachable("not reached");
2347          }
2348       }
2349       break;
2350    case ir_binop_dot:
2351       result = nir_fdot(&b, srcs[0], srcs[1]);
2352       break;
2353    case ir_binop_vector_extract:
2354       result = nir_vector_extract(&b, srcs[0], srcs[1]);
2355       break;
2356 
2357    case ir_binop_atan2:
2358       result = nir_atan2(&b, srcs[0], srcs[1]);
2359       break;
2360 
2361    case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break;
2362    case ir_triop_fma:
2363       result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]);
2364       break;
2365    case ir_triop_lrp:
2366       result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]);
2367       break;
2368    case ir_triop_csel:
2369       result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]);
2370       break;
2371    case ir_triop_bitfield_extract:
2372       result = ir->type->is_int_16_32() ?
2373          nir_ibitfield_extract(&b, nir_i2i32(&b, srcs[0]), nir_i2i32(&b, srcs[1]), nir_i2i32(&b, srcs[2])) :
2374          nir_ubitfield_extract(&b, nir_u2u32(&b, srcs[0]), nir_i2i32(&b, srcs[1]), nir_i2i32(&b, srcs[2]));
2375       break;
2376    case ir_quadop_bitfield_insert:
2377       result = nir_bitfield_insert(&b,
2378                                    nir_u2u32(&b, srcs[0]), nir_u2u32(&b, srcs[1]),
2379                                    nir_i2i32(&b, srcs[2]), nir_i2i32(&b, srcs[3]));
2380       break;
2381    case ir_quadop_vector:
2382       result = nir_vec(&b, srcs, ir->type->vector_elements);
2383       break;
2384 
2385    default:
2386       unreachable("not reached");
2387    }
2388 }
2389 
2390 void
visit(ir_swizzle * ir)2391 nir_visitor::visit(ir_swizzle *ir)
2392 {
2393    unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w };
2394    result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle,
2395                         ir->type->vector_elements);
2396 }
2397 
2398 void
visit(ir_texture * ir)2399 nir_visitor::visit(ir_texture *ir)
2400 {
2401    unsigned num_srcs;
2402    nir_texop op;
2403    switch (ir->op) {
2404    case ir_tex:
2405       op = nir_texop_tex;
2406       num_srcs = 1; /* coordinate */
2407       break;
2408 
2409    case ir_txb:
2410    case ir_txl:
2411       op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl;
2412       num_srcs = 2; /* coordinate, bias/lod */
2413       break;
2414 
2415    case ir_txd:
2416       op = nir_texop_txd; /* coordinate, dPdx, dPdy */
2417       num_srcs = 3;
2418       break;
2419 
2420    case ir_txf:
2421       op = nir_texop_txf;
2422       if (ir->lod_info.lod != NULL)
2423          num_srcs = 2; /* coordinate, lod */
2424       else
2425          num_srcs = 1; /* coordinate */
2426       break;
2427 
2428    case ir_txf_ms:
2429       op = nir_texop_txf_ms;
2430       num_srcs = 2; /* coordinate, sample_index */
2431       break;
2432 
2433    case ir_txs:
2434       op = nir_texop_txs;
2435       if (ir->lod_info.lod != NULL)
2436          num_srcs = 1; /* lod */
2437       else
2438          num_srcs = 0;
2439       break;
2440 
2441    case ir_lod:
2442       op = nir_texop_lod;
2443       num_srcs = 1; /* coordinate */
2444       break;
2445 
2446    case ir_tg4:
2447       op = nir_texop_tg4;
2448       num_srcs = 1; /* coordinate */
2449       break;
2450 
2451    case ir_query_levels:
2452       op = nir_texop_query_levels;
2453       num_srcs = 0;
2454       break;
2455 
2456    case ir_texture_samples:
2457       op = nir_texop_texture_samples;
2458       num_srcs = 0;
2459       break;
2460 
2461    case ir_samples_identical:
2462       op = nir_texop_samples_identical;
2463       num_srcs = 1; /* coordinate */
2464       break;
2465 
2466    default:
2467       unreachable("not reached");
2468    }
2469 
2470    if (ir->projector != NULL)
2471       num_srcs++;
2472    if (ir->shadow_comparator != NULL)
2473       num_srcs++;
2474    /* offsets are constants we store inside nir_tex_intrs.offsets */
2475    if (ir->offset != NULL && !ir->offset->type->is_array())
2476       num_srcs++;
2477    if (ir->clamp != NULL)
2478       num_srcs++;
2479 
2480    /* Add one for the texture deref */
2481    num_srcs += 2;
2482 
2483    nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
2484 
2485    instr->op = op;
2486    instr->sampler_dim =
2487       (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality;
2488    instr->is_array = ir->sampler->type->sampler_array;
2489    instr->is_shadow = ir->sampler->type->sampler_shadow;
2490 
2491    const glsl_type *dest_type
2492       = ir->is_sparse ? ir->type->field_type("texel") : ir->type;
2493    assert(dest_type != glsl_type::error_type);
2494    if (instr->is_shadow)
2495       instr->is_new_style_shadow = (dest_type->vector_elements == 1);
2496    instr->dest_type = nir_get_nir_type_for_glsl_type(dest_type);
2497    instr->is_sparse = ir->is_sparse;
2498 
2499    nir_deref_instr *sampler_deref = evaluate_deref(ir->sampler);
2500 
2501    /* check for bindless handles */
2502    if (!nir_deref_mode_is(sampler_deref, nir_var_uniform) ||
2503        nir_deref_instr_get_variable(sampler_deref)->data.bindless) {
2504       nir_ssa_def *load = nir_load_deref(&b, sampler_deref);
2505       instr->src[0].src = nir_src_for_ssa(load);
2506       instr->src[0].src_type = nir_tex_src_texture_handle;
2507       instr->src[1].src = nir_src_for_ssa(load);
2508       instr->src[1].src_type = nir_tex_src_sampler_handle;
2509    } else {
2510       instr->src[0].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
2511       instr->src[0].src_type = nir_tex_src_texture_deref;
2512       instr->src[1].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
2513       instr->src[1].src_type = nir_tex_src_sampler_deref;
2514    }
2515 
2516    unsigned src_number = 2;
2517 
2518    if (ir->coordinate != NULL) {
2519       instr->coord_components = ir->coordinate->type->vector_elements;
2520       instr->src[src_number].src =
2521          nir_src_for_ssa(evaluate_rvalue(ir->coordinate));
2522       instr->src[src_number].src_type = nir_tex_src_coord;
2523       src_number++;
2524    }
2525 
2526    if (ir->projector != NULL) {
2527       instr->src[src_number].src =
2528          nir_src_for_ssa(evaluate_rvalue(ir->projector));
2529       instr->src[src_number].src_type = nir_tex_src_projector;
2530       src_number++;
2531    }
2532 
2533    if (ir->shadow_comparator != NULL) {
2534       instr->src[src_number].src =
2535          nir_src_for_ssa(evaluate_rvalue(ir->shadow_comparator));
2536       instr->src[src_number].src_type = nir_tex_src_comparator;
2537       src_number++;
2538    }
2539 
2540    if (ir->offset != NULL) {
2541       if (ir->offset->type->is_array()) {
2542          for (int i = 0; i < ir->offset->type->array_size(); i++) {
2543             const ir_constant *c =
2544                ir->offset->as_constant()->get_array_element(i);
2545 
2546             for (unsigned j = 0; j < 2; ++j) {
2547                int val = c->get_int_component(j);
2548                instr->tg4_offsets[i][j] = val;
2549             }
2550          }
2551       } else {
2552          assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar());
2553 
2554          instr->src[src_number].src =
2555             nir_src_for_ssa(evaluate_rvalue(ir->offset));
2556          instr->src[src_number].src_type = nir_tex_src_offset;
2557          src_number++;
2558       }
2559    }
2560 
2561    if (ir->clamp) {
2562       instr->src[src_number].src =
2563          nir_src_for_ssa(evaluate_rvalue(ir->clamp));
2564       instr->src[src_number].src_type = nir_tex_src_min_lod;
2565       src_number++;
2566    }
2567 
2568    switch (ir->op) {
2569    case ir_txb:
2570       instr->src[src_number].src =
2571          nir_src_for_ssa(evaluate_rvalue(ir->lod_info.bias));
2572       instr->src[src_number].src_type = nir_tex_src_bias;
2573       src_number++;
2574       break;
2575 
2576    case ir_txl:
2577    case ir_txf:
2578    case ir_txs:
2579       if (ir->lod_info.lod != NULL) {
2580          instr->src[src_number].src =
2581             nir_src_for_ssa(evaluate_rvalue(ir->lod_info.lod));
2582          instr->src[src_number].src_type = nir_tex_src_lod;
2583          src_number++;
2584       }
2585       break;
2586 
2587    case ir_txd:
2588       instr->src[src_number].src =
2589          nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdx));
2590       instr->src[src_number].src_type = nir_tex_src_ddx;
2591       src_number++;
2592       instr->src[src_number].src =
2593          nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdy));
2594       instr->src[src_number].src_type = nir_tex_src_ddy;
2595       src_number++;
2596       break;
2597 
2598    case ir_txf_ms:
2599       instr->src[src_number].src =
2600          nir_src_for_ssa(evaluate_rvalue(ir->lod_info.sample_index));
2601       instr->src[src_number].src_type = nir_tex_src_ms_index;
2602       src_number++;
2603       break;
2604 
2605    case ir_tg4:
2606       instr->component = ir->lod_info.component->as_constant()->value.u[0];
2607       break;
2608 
2609    default:
2610       break;
2611    }
2612 
2613    assert(src_number == num_srcs);
2614 
2615    unsigned bit_size = glsl_get_bit_size(dest_type);
2616    add_instr(&instr->instr, nir_tex_instr_dest_size(instr), bit_size);
2617 }
2618 
2619 void
visit(ir_constant * ir)2620 nir_visitor::visit(ir_constant *ir)
2621 {
2622    /*
2623     * We don't know if this variable is an array or struct that gets
2624     * dereferenced, so do the safe thing an make it a variable with a
2625     * constant initializer and return a dereference.
2626     */
2627 
2628    nir_variable *var =
2629       nir_local_variable_create(this->impl, ir->type, "const_temp");
2630    var->data.read_only = true;
2631    var->constant_initializer = constant_copy(ir, var);
2632 
2633    this->deref = nir_build_deref_var(&b, var);
2634 }
2635 
2636 void
visit(ir_dereference_variable * ir)2637 nir_visitor::visit(ir_dereference_variable *ir)
2638 {
2639    if (ir->variable_referenced()->data.mode == ir_var_function_out) {
2640       unsigned i = (sig->return_type != glsl_type::void_type) ? 1 : 0;
2641 
2642       foreach_in_list(ir_variable, param, &sig->parameters) {
2643          if (param == ir->variable_referenced()) {
2644             break;
2645          }
2646          i++;
2647       }
2648 
2649       this->deref = nir_build_deref_cast(&b, nir_load_param(&b, i),
2650                                          nir_var_function_temp, ir->type, 0);
2651       return;
2652    }
2653 
2654    assert(ir->variable_referenced()->data.mode != ir_var_function_inout);
2655 
2656    struct hash_entry *entry =
2657       _mesa_hash_table_search(this->var_table, ir->var);
2658    assert(entry);
2659    nir_variable *var = (nir_variable *) entry->data;
2660 
2661    this->deref = nir_build_deref_var(&b, var);
2662 }
2663 
2664 void
visit(ir_dereference_record * ir)2665 nir_visitor::visit(ir_dereference_record *ir)
2666 {
2667    ir->record->accept(this);
2668 
2669    int field_index = ir->field_idx;
2670    assert(field_index >= 0);
2671 
2672    /* sparse texture variable is a struct for ir_variable, but it has been
2673     * converted to a vector for nir_variable.
2674     */
2675    if (this->deref->deref_type == nir_deref_type_var &&
2676        _mesa_set_search(this->sparse_variable_set, this->deref->var)) {
2677       nir_ssa_def *load = nir_load_deref(&b, this->deref);
2678       assert(load->num_components >= 2);
2679 
2680       nir_ssa_def *ssa;
2681       const glsl_type *type = ir->record->type;
2682       if (field_index == type->field_index("code")) {
2683          /* last channel holds residency code */
2684          ssa = nir_channel(&b, load, load->num_components - 1);
2685       } else {
2686          assert(field_index == type->field_index("texel"));
2687 
2688          unsigned mask = BITFIELD_MASK(load->num_components - 1);
2689          ssa = nir_channels(&b, load, mask);
2690       }
2691 
2692       /* still need to create a deref for return */
2693       nir_variable *tmp =
2694          nir_local_variable_create(this->impl, ir->type, "deref_tmp");
2695       this->deref = nir_build_deref_var(&b, tmp);
2696       nir_store_deref(&b, this->deref, ssa, ~0);
2697    } else
2698       this->deref = nir_build_deref_struct(&b, this->deref, field_index);
2699 }
2700 
2701 void
visit(ir_dereference_array * ir)2702 nir_visitor::visit(ir_dereference_array *ir)
2703 {
2704    nir_ssa_def *index = evaluate_rvalue(ir->array_index);
2705 
2706    ir->array->accept(this);
2707 
2708    this->deref = nir_build_deref_array(&b, this->deref, index);
2709 }
2710 
2711 void
visit(ir_barrier *)2712 nir_visitor::visit(ir_barrier *)
2713 {
2714    if (shader->info.stage == MESA_SHADER_COMPUTE)
2715       nir_memory_barrier_shared(&b);
2716    else if (shader->info.stage == MESA_SHADER_TESS_CTRL)
2717       nir_memory_barrier_tcs_patch(&b);
2718 
2719    nir_control_barrier(&b);
2720 }
2721 
2722 nir_shader *
glsl_float64_funcs_to_nir(struct gl_context * ctx,const nir_shader_compiler_options * options)2723 glsl_float64_funcs_to_nir(struct gl_context *ctx,
2724                           const nir_shader_compiler_options *options)
2725 {
2726    /* It's not possible to use float64 on GLSL ES, so don't bother trying to
2727     * build the support code.  The support code depends on higher versions of
2728     * desktop GLSL, so it will fail to compile (below) anyway.
2729     */
2730    if (!_mesa_is_desktop_gl(ctx) || ctx->Const.GLSLVersion < 400)
2731       return NULL;
2732 
2733    /* We pretend it's a vertex shader.  Ultimately, the stage shouldn't
2734     * matter because we're not optimizing anything here.
2735     */
2736    struct gl_shader *sh = _mesa_new_shader(-1, MESA_SHADER_VERTEX);
2737    sh->Source = float64_source;
2738    sh->CompileStatus = COMPILE_FAILURE;
2739    _mesa_glsl_compile_shader(ctx, sh, false, false, true);
2740 
2741    if (!sh->CompileStatus) {
2742       if (sh->InfoLog) {
2743          _mesa_problem(ctx,
2744                        "fp64 software impl compile failed:\n%s\nsource:\n%s\n",
2745                        sh->InfoLog, float64_source);
2746       }
2747       return NULL;
2748    }
2749 
2750    nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_VERTEX, options, NULL);
2751 
2752    nir_visitor v1(&ctx->Const, nir);
2753    nir_function_visitor v2(&v1);
2754    v2.run(sh->ir);
2755    visit_exec_list(sh->ir, &v1);
2756 
2757    /* _mesa_delete_shader will try to free sh->Source but it's static const */
2758    sh->Source = NULL;
2759    _mesa_delete_shader(ctx, sh);
2760 
2761    nir_validate_shader(nir, "float64_funcs_to_nir");
2762 
2763    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
2764    NIR_PASS_V(nir, nir_lower_returns);
2765    NIR_PASS_V(nir, nir_inline_functions);
2766    NIR_PASS_V(nir, nir_opt_deref);
2767 
2768    /* Do some optimizations to clean up the shader now.  By optimizing the
2769     * functions in the library, we avoid having to re-do that work every
2770     * time we inline a copy of a function.  Reducing basic blocks also helps
2771     * with compile times.
2772     */
2773    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2774    NIR_PASS_V(nir, nir_copy_prop);
2775    NIR_PASS_V(nir, nir_opt_dce);
2776    NIR_PASS_V(nir, nir_opt_cse);
2777    NIR_PASS_V(nir, nir_opt_gcm, true);
2778    NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
2779    NIR_PASS_V(nir, nir_opt_dce);
2780 
2781    return nir;
2782 }
2783