• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "tu_private.h"
7 #include "nir_builder.h"
8 
9 /* Some a6xx variants cannot support a non-contiguous multiview mask. Instead,
10  * inside the shader something like this needs to be inserted:
11  *
12  * gl_Position = ((1ull << gl_ViewIndex) & view_mask) ? gl_Position : vec4(0.);
13  *
14  * Scan backwards until we find the gl_Position write (there should only be
15  * one).
16  */
17 static bool
lower_multiview_mask(nir_shader * nir,uint32_t * mask)18 lower_multiview_mask(nir_shader *nir, uint32_t *mask)
19 {
20    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
21 
22    if (util_is_power_of_two_or_zero(*mask + 1)) {
23       nir_metadata_preserve(impl, nir_metadata_all);
24       return false;
25    }
26 
27    nir_builder b;
28    nir_builder_init(&b, impl);
29 
30    uint32_t old_mask = *mask;
31    *mask = BIT(util_logbase2(old_mask) + 1) - 1;
32 
33    nir_foreach_block_reverse(block, impl) {
34       nir_foreach_instr_reverse(instr, block) {
35          if (instr->type != nir_instr_type_intrinsic)
36             continue;
37 
38          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
39          if (intrin->intrinsic != nir_intrinsic_store_deref)
40             continue;
41 
42          nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
43          if (!nir_deref_mode_is(deref, nir_var_shader_out))
44             continue;
45 
46          nir_variable *var = nir_deref_instr_get_variable(deref);
47          if (var->data.location != VARYING_SLOT_POS)
48             continue;
49 
50          assert(intrin->src[1].is_ssa);
51          nir_ssa_def *orig_src = intrin->src[1].ssa;
52          b.cursor = nir_before_instr(instr);
53 
54          /* ((1ull << gl_ViewIndex) & mask) != 0 */
55          nir_ssa_def *cmp =
56             nir_i2b(&b, nir_iand(&b, nir_imm_int(&b, old_mask),
57                                   nir_ishl(&b, nir_imm_int(&b, 1),
58                                            nir_load_view_index(&b))));
59 
60          nir_ssa_def *src = nir_bcsel(&b, cmp, orig_src, nir_imm_float(&b, 0.));
61          nir_instr_rewrite_src(instr, &intrin->src[1], nir_src_for_ssa(src));
62 
63          nir_metadata_preserve(impl, nir_metadata_block_index |
64                                      nir_metadata_dominance);
65          return true;
66       }
67    }
68 
69    nir_metadata_preserve(impl, nir_metadata_all);
70    return false;
71 }
72 
73 bool
tu_nir_lower_multiview(nir_shader * nir,uint32_t mask,bool * multi_pos_output,struct tu_device * dev)74 tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, bool *multi_pos_output,
75                        struct tu_device *dev)
76 {
77    *multi_pos_output = false;
78 
79    bool progress = false;
80 
81    if (!dev->physical_device->info.a6xx.supports_multiview_mask)
82       NIR_PASS(progress, nir, lower_multiview_mask, &mask);
83 
84    unsigned num_views = util_logbase2(mask) + 1;
85 
86    /* Speculatively assign output locations so that we know num_outputs. We
87     * will assign output locations for real after this pass.
88     */
89    unsigned num_outputs;
90    nir_assign_io_var_locations(nir, nir_var_shader_out, &num_outputs, MESA_SHADER_VERTEX);
91 
92    /* In addition to the generic checks done by NIR, check that we don't
93     * overflow VPC with the extra copies of gl_Position.
94     */
95    if (likely(!(dev->physical_device->instance->debug_flags & TU_DEBUG_NOMULTIPOS)) &&
96        num_outputs + (num_views - 1) <= 32 && nir_can_lower_multiview(nir)) {
97       *multi_pos_output = true;
98 
99       /* It appears that the multiview mask is ignored when multi-position
100        * output is enabled, so we have to write 0 to inactive views ourselves.
101        */
102       NIR_PASS(progress, nir, lower_multiview_mask, &mask);
103 
104       NIR_PASS_V(nir, nir_lower_multiview, mask);
105       progress = true;
106    }
107 
108    return progress;
109 }
110 
111