1 /*
2 * Copyright © 2020 Valve Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "tu_shader.h"
7
8 #include "nir_builder.h"
9
10 #include "tu_device.h"
11
12 /* Some a6xx variants cannot support a non-contiguous multiview mask. Instead,
13 * inside the shader something like this needs to be inserted:
14 *
15 * gl_Position = ((1ull << gl_ViewIndex) & view_mask) ? gl_Position : vec4(0.);
16 *
17 * Scan backwards until we find the gl_Position write (there should only be
18 * one).
19 */
20 static bool
lower_multiview_mask(nir_shader * nir,uint32_t * mask)21 lower_multiview_mask(nir_shader *nir, uint32_t *mask)
22 {
23 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
24
25 if (util_is_power_of_two_or_zero(*mask + 1)) {
26 nir_metadata_preserve(impl, nir_metadata_all);
27 return false;
28 }
29
30 nir_builder b;
31 nir_builder_init(&b, impl);
32
33 uint32_t old_mask = *mask;
34 *mask = BIT(util_logbase2(old_mask) + 1) - 1;
35
36 nir_foreach_block_reverse(block, impl) {
37 nir_foreach_instr_reverse(instr, block) {
38 if (instr->type != nir_instr_type_intrinsic)
39 continue;
40
41 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
42 if (intrin->intrinsic != nir_intrinsic_store_deref)
43 continue;
44
45 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
46 if (!nir_deref_mode_is(deref, nir_var_shader_out))
47 continue;
48
49 nir_variable *var = nir_deref_instr_get_variable(deref);
50 if (var->data.location != VARYING_SLOT_POS)
51 continue;
52
53 assert(intrin->src[1].is_ssa);
54 nir_ssa_def *orig_src = intrin->src[1].ssa;
55 b.cursor = nir_before_instr(instr);
56
57 /* ((1ull << gl_ViewIndex) & mask) != 0 */
58 nir_ssa_def *cmp =
59 nir_i2b(&b, nir_iand(&b, nir_imm_int(&b, old_mask),
60 nir_ishl(&b, nir_imm_int(&b, 1),
61 nir_load_view_index(&b))));
62
63 nir_ssa_def *src = nir_bcsel(&b, cmp, orig_src, nir_imm_float(&b, 0.));
64 nir_instr_rewrite_src(instr, &intrin->src[1], nir_src_for_ssa(src));
65
66 nir_metadata_preserve(impl, nir_metadata_block_index |
67 nir_metadata_dominance);
68 return true;
69 }
70 }
71
72 nir_metadata_preserve(impl, nir_metadata_all);
73 return false;
74 }
75
76 bool
tu_nir_lower_multiview(nir_shader * nir,uint32_t mask,bool * multi_pos_output,struct tu_device * dev)77 tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, bool *multi_pos_output,
78 struct tu_device *dev)
79 {
80 *multi_pos_output = false;
81
82 bool progress = false;
83
84 if (!dev->physical_device->info->a6xx.supports_multiview_mask)
85 NIR_PASS(progress, nir, lower_multiview_mask, &mask);
86
87 unsigned num_views = util_logbase2(mask) + 1;
88
89 /* Blob doesn't apply multipos optimization starting from 11 views
90 * even on a650, however in practice, with the limit of 16 views,
91 * tests pass on a640/a650 and fail on a630.
92 */
93 unsigned max_views_for_multipos =
94 dev->physical_device->info->a6xx.supports_multiview_mask ? 16 : 10;
95
96 /* Speculatively assign output locations so that we know num_outputs. We
97 * will assign output locations for real after this pass.
98 */
99 unsigned num_outputs;
100 nir_assign_io_var_locations(nir, nir_var_shader_out, &num_outputs, MESA_SHADER_VERTEX);
101
102 /* In addition to the generic checks done by NIR, check that we don't
103 * overflow VPC with the extra copies of gl_Position.
104 */
105 if (likely(!(dev->physical_device->instance->debug_flags & TU_DEBUG_NOMULTIPOS)) &&
106 num_views <= max_views_for_multipos && num_outputs + (num_views - 1) <= 32 &&
107 nir_can_lower_multiview(nir)) {
108 *multi_pos_output = true;
109
110 /* It appears that the multiview mask is ignored when multi-position
111 * output is enabled, so we have to write 0 to inactive views ourselves.
112 */
113 NIR_PASS(progress, nir, lower_multiview_mask, &mask);
114
115 NIR_PASS_V(nir, nir_lower_multiview, mask);
116 progress = true;
117 }
118
119 return progress;
120 }
121
122