1 /*
2 * Copyright (C) 2020-2022 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "compiler/nir/nir_builder.h"
28 #include "pan_ir.h"
29
30 /*
31 * If the shader packs multiple varyings into the same location with different
32 * location_frac, we'll need to lower to a single varying store that collects
33 * all of the channels together. This is because the varying instruction on
34 * Midgard and Bifrost is slot-based, writing out an entire vec4 slot at a time.
35 *
36 * NOTE: this expects all stores to be outside of control flow, and with
37 * constant offsets. It should be run after nir_lower_io_to_temporaries.
38 */
39 static bool
lower_store_component(nir_builder * b,nir_intrinsic_instr * intr,void * data)40 lower_store_component(nir_builder *b, nir_intrinsic_instr *intr, void *data)
41 {
42 if (intr->intrinsic != nir_intrinsic_store_output &&
43 intr->intrinsic != nir_intrinsic_store_per_view_output)
44 return false;
45
46 struct hash_table_u64 *slots = data;
47 unsigned component = nir_intrinsic_component(intr);
48 nir_src *slot_src = nir_get_io_offset_src(intr);
49 uint64_t slot = nir_src_as_uint(*slot_src) + nir_intrinsic_base(intr);
50
51 if (intr->intrinsic == nir_intrinsic_store_per_view_output) {
52 uint64_t view_index = nir_src_as_uint(intr->src[1]);
53 slot |= view_index << 32;
54 }
55
56 nir_intrinsic_instr *prev = _mesa_hash_table_u64_search(slots, slot);
57 unsigned mask = (prev ? nir_intrinsic_write_mask(prev) : 0);
58
59 nir_def *value = intr->src[0].ssa;
60 b->cursor = nir_before_instr(&intr->instr);
61
62 nir_def *undef = nir_undef(b, 1, value->bit_size);
63 nir_def *channels[4] = {undef, undef, undef, undef};
64
65 /* Copy old */
66 u_foreach_bit(i, mask) {
67 assert(prev != NULL);
68 nir_def *prev_ssa = prev->src[0].ssa;
69 channels[i] = nir_channel(b, prev_ssa, i);
70 }
71
72 /* Copy new */
73 unsigned new_mask = nir_intrinsic_write_mask(intr);
74 mask |= (new_mask << component);
75
76 u_foreach_bit(i, new_mask) {
77 assert(component + i < 4);
78 channels[component + i] = nir_channel(b, value, i);
79 }
80
81 intr->num_components = util_last_bit(mask);
82 nir_src_rewrite(&intr->src[0], nir_vec(b, channels, intr->num_components));
83
84 nir_intrinsic_set_component(intr, 0);
85 nir_intrinsic_set_write_mask(intr, mask);
86
87 if (prev) {
88 _mesa_hash_table_u64_remove(slots, slot);
89 nir_instr_remove(&prev->instr);
90 }
91
92 _mesa_hash_table_u64_insert(slots, slot, intr);
93 return false;
94 }
95
96 bool
pan_nir_lower_store_component(nir_shader * s)97 pan_nir_lower_store_component(nir_shader *s)
98 {
99 assert(s->info.stage == MESA_SHADER_VERTEX);
100
101 struct hash_table_u64 *stores = _mesa_hash_table_u64_create(NULL);
102 bool progress = nir_shader_intrinsics_pass(
103 s, lower_store_component,
104 nir_metadata_control_flow, stores);
105 _mesa_hash_table_u64_destroy(stores);
106 return progress;
107 }
108