• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_nir.h"
25 #include "brw_shader.h"
26 #include "compiler/glsl_types.h"
27 #include "compiler/nir/nir_builder.h"
28 
29 static bool
is_input(nir_intrinsic_instr * intrin)30 is_input(nir_intrinsic_instr *intrin)
31 {
32    return intrin->intrinsic == nir_intrinsic_load_input ||
33           intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
34           intrin->intrinsic == nir_intrinsic_load_interpolated_input;
35 }
36 
37 static bool
is_output(nir_intrinsic_instr * intrin)38 is_output(nir_intrinsic_instr *intrin)
39 {
40    return intrin->intrinsic == nir_intrinsic_load_output ||
41           intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
42           intrin->intrinsic == nir_intrinsic_store_output ||
43           intrin->intrinsic == nir_intrinsic_store_per_vertex_output;
44 }
45 
46 /**
47  * In many cases, we just add the base and offset together, so there's no
48  * reason to keep them separate.  Sometimes, combining them is essential:
49  * if a shader only accesses part of a compound variable (such as a matrix
50  * or array), the variable's base may not actually exist in the VUE map.
51  *
52  * This pass adds constant offsets to instr->const_index[0], and resets
53  * the offset source to 0.  Non-constant offsets remain unchanged - since
54  * we don't know what part of a compound variable is accessed, we allocate
55  * storage for the entire thing.
56  */
57 
58 static bool
add_const_offset_to_base_block(nir_block * block,nir_builder * b,nir_variable_mode mode)59 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
60                                nir_variable_mode mode)
61 {
62    nir_foreach_instr_safe(instr, block) {
63       if (instr->type != nir_instr_type_intrinsic)
64          continue;
65 
66       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
67 
68       if ((mode == nir_var_shader_in && is_input(intrin)) ||
69           (mode == nir_var_shader_out && is_output(intrin))) {
70          nir_src *offset = nir_get_io_offset_src(intrin);
71          nir_const_value *const_offset = nir_src_as_const_value(*offset);
72 
73          if (const_offset) {
74             intrin->const_index[0] += const_offset->u32[0];
75             b->cursor = nir_before_instr(&intrin->instr);
76             nir_instr_rewrite_src(&intrin->instr, offset,
77                                   nir_src_for_ssa(nir_imm_int(b, 0)));
78          }
79       }
80    }
81    return true;
82 }
83 
84 static void
add_const_offset_to_base(nir_shader * nir,nir_variable_mode mode)85 add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode)
86 {
87    nir_foreach_function(f, nir) {
88       if (f->impl) {
89          nir_builder b;
90          nir_builder_init(&b, f->impl);
91          nir_foreach_block(block, f->impl) {
92             add_const_offset_to_base_block(block, &b, mode);
93          }
94       }
95    }
96 }
97 
98 static bool
remap_vs_attrs(nir_block * block,shader_info * nir_info)99 remap_vs_attrs(nir_block *block, shader_info *nir_info)
100 {
101    nir_foreach_instr(instr, block) {
102       if (instr->type != nir_instr_type_intrinsic)
103          continue;
104 
105       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
106 
107       if (intrin->intrinsic == nir_intrinsic_load_input) {
108          /* Attributes come in a contiguous block, ordered by their
109           * gl_vert_attrib value.  That means we can compute the slot
110           * number for an attribute by masking out the enabled attributes
111           * before it and counting the bits.
112           */
113          int attr = intrin->const_index[0];
114          int slot = _mesa_bitcount_64(nir_info->inputs_read &
115                                       BITFIELD64_MASK(attr));
116          intrin->const_index[0] = 4 * slot;
117       }
118    }
119    return true;
120 }
121 
122 static bool
remap_inputs_with_vue_map(nir_block * block,const struct brw_vue_map * vue_map)123 remap_inputs_with_vue_map(nir_block *block, const struct brw_vue_map *vue_map)
124 {
125    nir_foreach_instr(instr, block) {
126       if (instr->type != nir_instr_type_intrinsic)
127          continue;
128 
129       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
130 
131       if (intrin->intrinsic == nir_intrinsic_load_input ||
132           intrin->intrinsic == nir_intrinsic_load_per_vertex_input) {
133          int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]];
134          assert(vue_slot != -1);
135          intrin->const_index[0] = vue_slot;
136       }
137    }
138    return true;
139 }
140 
141 static bool
remap_tess_levels(nir_builder * b,nir_intrinsic_instr * intr,GLenum primitive_mode)142 remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr,
143                   GLenum primitive_mode)
144 {
145    const int location = nir_intrinsic_base(intr);
146    const unsigned component = nir_intrinsic_component(intr);
147    bool out_of_bounds;
148 
149    if (location == VARYING_SLOT_TESS_LEVEL_INNER) {
150       switch (primitive_mode) {
151       case GL_QUADS:
152          /* gl_TessLevelInner[0..1] lives at DWords 3-2 (reversed). */
153          nir_intrinsic_set_base(intr, 0);
154          nir_intrinsic_set_component(intr, 3 - component);
155          out_of_bounds = false;
156          break;
157       case GL_TRIANGLES:
158          /* gl_TessLevelInner[0] lives at DWord 4. */
159          nir_intrinsic_set_base(intr, 1);
160          out_of_bounds = component > 0;
161          break;
162       case GL_ISOLINES:
163          out_of_bounds = true;
164          break;
165       default:
166          unreachable("Bogus tessellation domain");
167       }
168    } else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) {
169       if (primitive_mode == GL_ISOLINES) {
170          /* gl_TessLevelOuter[0..1] lives at DWords 6-7 (in order). */
171          nir_intrinsic_set_base(intr, 1);
172          nir_intrinsic_set_component(intr, 2 + nir_intrinsic_component(intr));
173          out_of_bounds = component > 1;
174       } else {
175          /* Triangles use DWords 7-5 (reversed); Quads use 7-4 (reversed) */
176          nir_intrinsic_set_base(intr, 1);
177          nir_intrinsic_set_component(intr, 3 - nir_intrinsic_component(intr));
178          out_of_bounds = component == 3 && primitive_mode == GL_TRIANGLES;
179       }
180    } else {
181       return false;
182    }
183 
184    if (out_of_bounds) {
185       if (nir_intrinsic_infos[intr->intrinsic].has_dest) {
186          b->cursor = nir_before_instr(&intr->instr);
187          nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
188          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(undef));
189       }
190       nir_instr_remove(&intr->instr);
191    }
192 
193    return true;
194 }
195 
196 static bool
remap_patch_urb_offsets(nir_block * block,nir_builder * b,const struct brw_vue_map * vue_map,GLenum tes_primitive_mode)197 remap_patch_urb_offsets(nir_block *block, nir_builder *b,
198                         const struct brw_vue_map *vue_map,
199                         GLenum tes_primitive_mode)
200 {
201    const bool is_passthrough_tcs = b->shader->info->name &&
202       strcmp(b->shader->info->name, "passthrough") == 0;
203 
204    nir_foreach_instr_safe(instr, block) {
205       if (instr->type != nir_instr_type_intrinsic)
206          continue;
207 
208       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
209 
210       gl_shader_stage stage = b->shader->stage;
211 
212       if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) ||
213           (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) {
214 
215          if (!is_passthrough_tcs &&
216              remap_tess_levels(b, intrin, tes_primitive_mode))
217             continue;
218 
219          int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]];
220          assert(vue_slot != -1);
221          intrin->const_index[0] = vue_slot;
222 
223          nir_src *vertex = nir_get_io_vertex_index_src(intrin);
224          if (vertex) {
225             nir_const_value *const_vertex = nir_src_as_const_value(*vertex);
226             if (const_vertex) {
227                intrin->const_index[0] += const_vertex->u32[0] *
228                                          vue_map->num_per_vertex_slots;
229             } else {
230                b->cursor = nir_before_instr(&intrin->instr);
231 
232                /* Multiply by the number of per-vertex slots. */
233                nir_ssa_def *vertex_offset =
234                   nir_imul(b,
235                            nir_ssa_for_src(b, *vertex, 1),
236                            nir_imm_int(b,
237                                        vue_map->num_per_vertex_slots));
238 
239                /* Add it to the existing offset */
240                nir_src *offset = nir_get_io_offset_src(intrin);
241                nir_ssa_def *total_offset =
242                   nir_iadd(b, vertex_offset,
243                            nir_ssa_for_src(b, *offset, 1));
244 
245                nir_instr_rewrite_src(&intrin->instr, offset,
246                                      nir_src_for_ssa(total_offset));
247             }
248          }
249       }
250    }
251    return true;
252 }
253 
254 void
brw_nir_lower_vs_inputs(nir_shader * nir,bool is_scalar,bool use_legacy_snorm_formula,const uint8_t * vs_attrib_wa_flags)255 brw_nir_lower_vs_inputs(nir_shader *nir,
256                         bool is_scalar,
257                         bool use_legacy_snorm_formula,
258                         const uint8_t *vs_attrib_wa_flags)
259 {
260    /* Start with the location of the variable's base. */
261    foreach_list_typed(nir_variable, var, node, &nir->inputs) {
262       var->data.driver_location = var->data.location;
263    }
264 
265    /* Now use nir_lower_io to walk dereference chains.  Attribute arrays are
266     * loaded as one vec4 or dvec4 per element (or matrix column), depending on
267     * whether it is a double-precision type or not.
268     */
269    nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
270 
271    /* This pass needs actual constants */
272    nir_opt_constant_folding(nir);
273 
274    add_const_offset_to_base(nir, nir_var_shader_in);
275 
276    brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula,
277                                        vs_attrib_wa_flags);
278 
279    if (is_scalar) {
280       /* Finally, translate VERT_ATTRIB_* values into the actual registers. */
281 
282       nir_foreach_function(function, nir) {
283          if (function->impl) {
284             nir_foreach_block(block, function->impl) {
285                remap_vs_attrs(block, nir->info);
286             }
287          }
288       }
289    }
290 }
291 
292 void
brw_nir_lower_vue_inputs(nir_shader * nir,bool is_scalar,const struct brw_vue_map * vue_map)293 brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar,
294                          const struct brw_vue_map *vue_map)
295 {
296    foreach_list_typed(nir_variable, var, node, &nir->inputs) {
297       var->data.driver_location = var->data.location;
298    }
299 
300    /* Inputs are stored in vec4 slots, so use type_size_vec4(). */
301    nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
302 
303    if (is_scalar || nir->stage != MESA_SHADER_GEOMETRY) {
304       /* This pass needs actual constants */
305       nir_opt_constant_folding(nir);
306 
307       add_const_offset_to_base(nir, nir_var_shader_in);
308 
309       nir_foreach_function(function, nir) {
310          if (function->impl) {
311             nir_foreach_block(block, function->impl) {
312                remap_inputs_with_vue_map(block, vue_map);
313             }
314          }
315       }
316    }
317 }
318 
319 void
brw_nir_lower_tes_inputs(nir_shader * nir,const struct brw_vue_map * vue_map)320 brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map)
321 {
322    foreach_list_typed(nir_variable, var, node, &nir->inputs) {
323       var->data.driver_location = var->data.location;
324    }
325 
326    nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
327 
328    /* This pass needs actual constants */
329    nir_opt_constant_folding(nir);
330 
331    add_const_offset_to_base(nir, nir_var_shader_in);
332 
333    nir_foreach_function(function, nir) {
334       if (function->impl) {
335          nir_builder b;
336          nir_builder_init(&b, function->impl);
337          nir_foreach_block(block, function->impl) {
338             remap_patch_urb_offsets(block, &b, vue_map,
339                                     nir->info->tess.primitive_mode);
340          }
341       }
342    }
343 }
344 
345 void
brw_nir_lower_fs_inputs(nir_shader * nir,const struct gen_device_info * devinfo,const struct brw_wm_prog_key * key)346 brw_nir_lower_fs_inputs(nir_shader *nir,
347                         const struct gen_device_info *devinfo,
348                         const struct brw_wm_prog_key *key)
349 {
350    foreach_list_typed(nir_variable, var, node, &nir->inputs) {
351       var->data.driver_location = var->data.location;
352 
353       /* Apply default interpolation mode.
354        *
355        * Everything defaults to smooth except for the legacy GL color
356        * built-in variables, which might be flat depending on API state.
357        */
358       if (var->data.interpolation == INTERP_MODE_NONE) {
359          const bool flat = key->flat_shade &&
360             (var->data.location == VARYING_SLOT_COL0 ||
361              var->data.location == VARYING_SLOT_COL1);
362 
363          var->data.interpolation = flat ? INTERP_MODE_FLAT
364                                         : INTERP_MODE_SMOOTH;
365       }
366 
367       /* On Ironlake and below, there is only one interpolation mode.
368        * Centroid interpolation doesn't mean anything on this hardware --
369        * there is no multisampling.
370        */
371       if (devinfo->gen < 6) {
372          var->data.centroid = false;
373          var->data.sample = false;
374       }
375    }
376 
377    nir_lower_io_options lower_io_options = 0;
378    if (key->persample_interp)
379       lower_io_options |= nir_lower_io_force_sample_interpolation;
380 
381    nir_lower_io(nir, nir_var_shader_in, type_size_vec4, lower_io_options);
382 
383    /* This pass needs actual constants */
384    nir_opt_constant_folding(nir);
385 
386    add_const_offset_to_base(nir, nir_var_shader_in);
387 }
388 
389 void
brw_nir_lower_vue_outputs(nir_shader * nir,bool is_scalar)390 brw_nir_lower_vue_outputs(nir_shader *nir,
391                           bool is_scalar)
392 {
393    nir_foreach_variable(var, &nir->outputs) {
394       var->data.driver_location = var->data.location;
395    }
396 
397    nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0);
398 }
399 
400 void
brw_nir_lower_tcs_outputs(nir_shader * nir,const struct brw_vue_map * vue_map,GLenum tes_primitive_mode)401 brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map,
402                           GLenum tes_primitive_mode)
403 {
404    nir_foreach_variable(var, &nir->outputs) {
405       var->data.driver_location = var->data.location;
406    }
407 
408    nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0);
409 
410    /* This pass needs actual constants */
411    nir_opt_constant_folding(nir);
412 
413    add_const_offset_to_base(nir, nir_var_shader_out);
414 
415    nir_foreach_function(function, nir) {
416       if (function->impl) {
417          nir_builder b;
418          nir_builder_init(&b, function->impl);
419          nir_foreach_block(block, function->impl) {
420             remap_patch_urb_offsets(block, &b, vue_map, tes_primitive_mode);
421          }
422       }
423    }
424 }
425 
426 void
brw_nir_lower_fs_outputs(nir_shader * nir)427 brw_nir_lower_fs_outputs(nir_shader *nir)
428 {
429    nir_foreach_variable(var, &nir->outputs) {
430       var->data.driver_location =
431          SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) |
432          SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION);
433    }
434 
435    nir_lower_io(nir, nir_var_shader_out, type_size_dvec4, 0);
436 }
437 
438 void
brw_nir_lower_cs_shared(nir_shader * nir)439 brw_nir_lower_cs_shared(nir_shader *nir)
440 {
441    nir_assign_var_locations(&nir->shared, &nir->num_shared,
442                             type_size_scalar_bytes);
443    nir_lower_io(nir, nir_var_shared, type_size_scalar_bytes, 0);
444 }
445 
446 #define OPT(pass, ...) ({                                  \
447    bool this_progress = false;                             \
448    NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
449    if (this_progress)                                      \
450       progress = true;                                     \
451    this_progress;                                          \
452 })
453 
454 #define OPT_V(pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
455 
456 static nir_shader *
nir_optimize(nir_shader * nir,const struct brw_compiler * compiler,bool is_scalar)457 nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
458              bool is_scalar)
459 {
460    nir_variable_mode indirect_mask = 0;
461    if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput)
462       indirect_mask |= nir_var_shader_in;
463    if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectOutput)
464       indirect_mask |= nir_var_shader_out;
465    if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectTemp)
466       indirect_mask |= nir_var_local;
467 
468    bool progress;
469    do {
470       progress = false;
471       OPT_V(nir_lower_vars_to_ssa);
472       OPT(nir_opt_copy_prop_vars);
473 
474       if (is_scalar) {
475          OPT(nir_lower_alu_to_scalar);
476       }
477 
478       OPT(nir_copy_prop);
479 
480       if (is_scalar) {
481          OPT(nir_lower_phis_to_scalar);
482       }
483 
484       OPT(nir_copy_prop);
485       OPT(nir_opt_dce);
486       OPT(nir_opt_cse);
487       OPT(nir_opt_peephole_select, 0);
488       OPT(nir_opt_algebraic);
489       OPT(nir_opt_constant_folding);
490       OPT(nir_opt_dead_cf);
491       if (OPT(nir_opt_trivial_continues)) {
492          /* If nir_opt_trivial_continues makes progress, then we need to clean
493           * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
494           * to make progress.
495           */
496          OPT(nir_copy_prop);
497          OPT(nir_opt_dce);
498       }
499       OPT(nir_opt_if);
500       if (nir->options->max_unroll_iterations != 0) {
501          OPT(nir_opt_loop_unroll, indirect_mask);
502       }
503       OPT(nir_opt_remove_phis);
504       OPT(nir_opt_undef);
505       OPT_V(nir_lower_doubles, nir_lower_drcp |
506                                nir_lower_dsqrt |
507                                nir_lower_drsq |
508                                nir_lower_dtrunc |
509                                nir_lower_dfloor |
510                                nir_lower_dceil |
511                                nir_lower_dfract |
512                                nir_lower_dround_even |
513                                nir_lower_dmod);
514       OPT_V(nir_lower_double_pack);
515    } while (progress);
516 
517    return nir;
518 }
519 
520 /* Does some simple lowering and runs the standard suite of optimizations
521  *
522  * This is intended to be called more-or-less directly after you get the
523  * shader out of GLSL or some other source.  While it is geared towards i965,
524  * it is not at all generator-specific except for the is_scalar flag.  Even
525  * there, it is safe to call with is_scalar = false for a shader that is
526  * intended for the FS backend as long as nir_optimize is called again with
527  * is_scalar = true to scalarize everything prior to code gen.
528  */
529 nir_shader *
brw_preprocess_nir(const struct brw_compiler * compiler,nir_shader * nir)530 brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
531 {
532    const struct gen_device_info *devinfo = compiler->devinfo;
533    bool progress; /* Written by OPT and OPT_V */
534    (void)progress;
535 
536    const bool is_scalar = compiler->scalar_stage[nir->stage];
537 
538    if (nir->stage == MESA_SHADER_GEOMETRY)
539       OPT(nir_lower_gs_intrinsics);
540 
541    /* See also brw_nir_trig_workarounds.py */
542    if (compiler->precise_trig &&
543        !(devinfo->gen >= 10 || devinfo->is_kabylake))
544       OPT(brw_nir_apply_trig_workarounds);
545 
546    static const nir_lower_tex_options tex_options = {
547       .lower_txp = ~0,
548       .lower_txf_offset = true,
549       .lower_rect_offset = true,
550       .lower_txd_cube_map = true,
551    };
552 
553    OPT(nir_lower_tex, &tex_options);
554    OPT(nir_normalize_cubemap_coords);
555 
556    OPT(nir_lower_global_vars_to_local);
557 
558    OPT(nir_split_var_copies);
559 
560    nir = nir_optimize(nir, compiler, is_scalar);
561 
562    if (is_scalar) {
563       OPT_V(nir_lower_load_const_to_scalar);
564    }
565 
566    /* Lower a bunch of stuff */
567    OPT_V(nir_lower_var_copies);
568 
569    OPT_V(nir_lower_clip_cull_distance_arrays);
570 
571    nir_variable_mode indirect_mask = 0;
572    if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput)
573       indirect_mask |= nir_var_shader_in;
574    if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectOutput)
575       indirect_mask |= nir_var_shader_out;
576    if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectTemp)
577       indirect_mask |= nir_var_local;
578 
579    nir_lower_indirect_derefs(nir, indirect_mask);
580 
581    /* Get rid of split copies */
582    nir = nir_optimize(nir, compiler, is_scalar);
583 
584    OPT(nir_remove_dead_variables, nir_var_local);
585 
586    return nir;
587 }
588 
589 /* Prepare the given shader for codegen
590  *
591  * This function is intended to be called right before going into the actual
592  * backend and is highly backend-specific.  Also, once this function has been
593  * called on a shader, it will no longer be in SSA form so most optimizations
594  * will not work.
595  */
596 nir_shader *
brw_postprocess_nir(nir_shader * nir,const struct brw_compiler * compiler,bool is_scalar)597 brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
598                     bool is_scalar)
599 {
600    const struct gen_device_info *devinfo = compiler->devinfo;
601    bool debug_enabled =
602       (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage));
603 
604    bool progress; /* Written by OPT and OPT_V */
605    (void)progress;
606 
607    nir = nir_optimize(nir, compiler, is_scalar);
608 
609    if (devinfo->gen >= 6) {
610       /* Try and fuse multiply-adds */
611       OPT(brw_nir_opt_peephole_ffma);
612    }
613 
614    OPT(nir_opt_algebraic_late);
615 
616    OPT_V(nir_lower_to_source_mods);
617    OPT(nir_copy_prop);
618    OPT(nir_opt_dce);
619    OPT(nir_opt_move_comparisons);
620 
621    OPT(nir_lower_locals_to_regs);
622 
623    if (unlikely(debug_enabled)) {
624       /* Re-index SSA defs so we print more sensible numbers. */
625       nir_foreach_function(function, nir) {
626          if (function->impl)
627             nir_index_ssa_defs(function->impl);
628       }
629 
630       fprintf(stderr, "NIR (SSA form) for %s shader:\n",
631               _mesa_shader_stage_to_string(nir->stage));
632       nir_print_shader(nir, stderr);
633    }
634 
635    OPT_V(nir_convert_from_ssa, true);
636 
637    if (!is_scalar) {
638       OPT_V(nir_move_vec_src_uses_to_dest);
639       OPT(nir_lower_vec_to_movs);
640    }
641 
642    /* This is the last pass we run before we start emitting stuff.  It
643     * determines when we need to insert boolean resolves on Gen <= 5.  We
644     * run it last because it stashes data in instr->pass_flags and we don't
645     * want that to be squashed by other NIR passes.
646     */
647    if (devinfo->gen <= 5)
648       brw_nir_analyze_boolean_resolves(nir);
649 
650    nir_sweep(nir);
651 
652    if (unlikely(debug_enabled)) {
653       fprintf(stderr, "NIR (final form) for %s shader:\n",
654               _mesa_shader_stage_to_string(nir->stage));
655       nir_print_shader(nir, stderr);
656    }
657 
658    return nir;
659 }
660 
661 nir_shader *
brw_nir_apply_sampler_key(nir_shader * nir,const struct brw_compiler * compiler,const struct brw_sampler_prog_key_data * key_tex,bool is_scalar)662 brw_nir_apply_sampler_key(nir_shader *nir,
663                           const struct brw_compiler *compiler,
664                           const struct brw_sampler_prog_key_data *key_tex,
665                           bool is_scalar)
666 {
667    const struct gen_device_info *devinfo = compiler->devinfo;
668    nir_lower_tex_options tex_options = { 0 };
669 
670    /* Iron Lake and prior require lowering of all rectangle textures */
671    if (devinfo->gen < 6)
672       tex_options.lower_rect = true;
673 
674    /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */
675    if (devinfo->gen < 8) {
676       tex_options.saturate_s = key_tex->gl_clamp_mask[0];
677       tex_options.saturate_t = key_tex->gl_clamp_mask[1];
678       tex_options.saturate_r = key_tex->gl_clamp_mask[2];
679    }
680 
681    /* Prior to Haswell, we have to fake texture swizzle */
682    for (unsigned s = 0; s < MAX_SAMPLERS; s++) {
683       if (key_tex->swizzles[s] == SWIZZLE_NOOP)
684          continue;
685 
686       tex_options.swizzle_result |= (1 << s);
687       for (unsigned c = 0; c < 4; c++)
688          tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c);
689    }
690 
691    /* Prior to Haswell, we have to lower gradients on shadow samplers */
692    tex_options.lower_txd_shadow = devinfo->gen < 8 && !devinfo->is_haswell;
693 
694    tex_options.lower_y_uv_external = key_tex->y_uv_image_mask;
695    tex_options.lower_y_u_v_external = key_tex->y_u_v_image_mask;
696    tex_options.lower_yx_xuxv_external = key_tex->yx_xuxv_image_mask;
697 
698    if (nir_lower_tex(nir, &tex_options)) {
699       nir_validate_shader(nir);
700       nir = nir_optimize(nir, compiler, is_scalar);
701    }
702 
703    return nir;
704 }
705 
706 enum brw_reg_type
brw_type_for_nir_type(nir_alu_type type)707 brw_type_for_nir_type(nir_alu_type type)
708 {
709    switch (type) {
710    case nir_type_uint:
711    case nir_type_uint32:
712       return BRW_REGISTER_TYPE_UD;
713    case nir_type_bool:
714    case nir_type_int:
715    case nir_type_bool32:
716    case nir_type_int32:
717       return BRW_REGISTER_TYPE_D;
718    case nir_type_float:
719    case nir_type_float32:
720       return BRW_REGISTER_TYPE_F;
721    case nir_type_float64:
722       return BRW_REGISTER_TYPE_DF;
723    case nir_type_int64:
724    case nir_type_uint64:
725       /* TODO we should only see these in moves, so for now it's ok, but when
726        * we add actual 64-bit integer support we should fix this.
727        */
728       return BRW_REGISTER_TYPE_DF;
729    default:
730       unreachable("unknown type");
731    }
732 
733    return BRW_REGISTER_TYPE_F;
734 }
735 
736 /* Returns the glsl_base_type corresponding to a nir_alu_type.
737  * This is used by both brw_vec4_nir and brw_fs_nir.
738  */
739 enum glsl_base_type
brw_glsl_base_type_for_nir_type(nir_alu_type type)740 brw_glsl_base_type_for_nir_type(nir_alu_type type)
741 {
742    switch (type) {
743    case nir_type_float:
744    case nir_type_float32:
745       return GLSL_TYPE_FLOAT;
746 
747    case nir_type_float64:
748       return GLSL_TYPE_DOUBLE;
749 
750    case nir_type_int:
751    case nir_type_int32:
752       return GLSL_TYPE_INT;
753 
754    case nir_type_uint:
755    case nir_type_uint32:
756       return GLSL_TYPE_UINT;
757 
758    default:
759       unreachable("bad type");
760    }
761 }
762