• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "tu_shader.h"
7 
8 #include "nir_builder.h"
9 
10 #include "tu_device.h"
11 
12 /* Some a6xx variants cannot support a non-contiguous multiview mask. Instead,
13  * inside the shader something like this needs to be inserted:
14  *
15  * gl_Position = ((1ull << gl_ViewIndex) & view_mask) ? gl_Position : vec4(0.);
16  *
17  * Scan backwards until we find the gl_Position write (there should only be
18  * one).
19  */
20 static bool
lower_multiview_mask(nir_shader * nir,uint32_t * mask)21 lower_multiview_mask(nir_shader *nir, uint32_t *mask)
22 {
23    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
24 
25    if (util_is_power_of_two_or_zero(*mask + 1)) {
26       nir_metadata_preserve(impl, nir_metadata_all);
27       return false;
28    }
29 
30    nir_builder b = nir_builder_create(impl);
31 
32    uint32_t old_mask = *mask;
33    *mask = BIT(util_logbase2(old_mask) + 1) - 1;
34 
35    nir_foreach_block_reverse(block, impl) {
36       nir_foreach_instr_reverse(instr, block) {
37          if (instr->type != nir_instr_type_intrinsic)
38             continue;
39 
40          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
41          if (intrin->intrinsic != nir_intrinsic_store_deref)
42             continue;
43 
44          nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
45          if (!nir_deref_mode_is(deref, nir_var_shader_out))
46             continue;
47 
48          nir_variable *var = nir_deref_instr_get_variable(deref);
49          if (var->data.location != VARYING_SLOT_POS)
50             continue;
51 
52          nir_def *orig_src = intrin->src[1].ssa;
53          b.cursor = nir_before_instr(instr);
54 
55          /* ((1ull << gl_ViewIndex) & mask) != 0 */
56          nir_def *cmp =
57             nir_i2b(&b, nir_iand(&b, nir_imm_int(&b, old_mask),
58                                   nir_ishl(&b, nir_imm_int(&b, 1),
59                                            nir_load_view_index(&b))));
60 
61          nir_def *src = nir_bcsel(&b, cmp, orig_src, nir_imm_float(&b, 0.));
62          nir_src_rewrite(&intrin->src[1], src);
63 
64          nir_metadata_preserve(impl, nir_metadata_control_flow);
65          return true;
66       }
67    }
68 
69    nir_metadata_preserve(impl, nir_metadata_all);
70    return false;
71 }
72 
73 bool
tu_nir_lower_multiview(nir_shader * nir,uint32_t mask,struct tu_device * dev)74 tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, struct tu_device *dev)
75 {
76    bool progress = false;
77    nir_lower_multiview_options options = {
78       .view_mask = mask,
79       .allowed_per_view_outputs = VARYING_BIT_POS
80    };
81 
82    if (!dev->physical_device->info->a6xx.supports_multiview_mask)
83       NIR_PASS(progress, nir, lower_multiview_mask, &options.view_mask);
84 
85    unsigned num_views = util_logbase2(mask) + 1;
86 
87    /* Blob doesn't apply multipos optimization starting from 11 views
88     * even on a650, however in practice, with the limit of 16 views,
89     * tests pass on a640/a650 and fail on a630.
90     */
91    unsigned max_views_for_multipos =
92       dev->physical_device->info->a6xx.supports_multiview_mask ? 16 : 10;
93 
94    /* Speculatively assign output locations so that we know num_outputs. We
95     * will assign output locations for real after this pass.
96     */
97    unsigned num_outputs;
98    nir_assign_io_var_locations(nir, nir_var_shader_out, &num_outputs, MESA_SHADER_VERTEX);
99 
100    /* In addition to the generic checks done by NIR, check that we don't
101     * overflow VPC with the extra copies of gl_Position.
102     */
103    if (!TU_DEBUG(NOMULTIPOS) &&
104        num_views <= max_views_for_multipos && num_outputs + (num_views - 1) <= 32 &&
105        nir_can_lower_multiview(nir, options)) {
106       /* It appears that the multiview mask is ignored when multi-position
107        * output is enabled, so we have to write 0 to inactive views ourselves.
108        */
109       NIR_PASS(progress, nir, lower_multiview_mask, &options.view_mask);
110 
111       NIR_PASS_V(nir, nir_lower_multiview, options);
112       progress = true;
113    }
114 
115    return progress;
116 }
117 
118