1 /*
2 * Copyright © 2021 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "ir3_nir.h"
25
26
27 /*
28 * Lowering for wide (larger than vec4) load/store
29 */
30
31 static bool
lower_wide_load_store_filter(const nir_instr * instr,const void * unused)32 lower_wide_load_store_filter(const nir_instr *instr, const void *unused)
33 {
34 (void)unused;
35
36 if (instr->type != nir_instr_type_intrinsic)
37 return false;
38
39 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
40
41 if (is_intrinsic_store(intr->intrinsic))
42 return nir_intrinsic_src_components(intr, 0) > 4;
43
44 if (is_intrinsic_load(intr->intrinsic))
45 return nir_intrinsic_dest_components(intr) > 4;
46
47 return false;
48 }
49
50 static nir_ssa_def *
lower_wide_load_store(nir_builder * b,nir_instr * instr,void * unused)51 lower_wide_load_store(nir_builder *b, nir_instr *instr, void *unused)
52 {
53 (void)unused;
54
55 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
56
57 if (is_intrinsic_store(intr->intrinsic)) {
58 unsigned num_comp = nir_intrinsic_src_components(intr, 0);
59 unsigned wrmask = nir_intrinsic_write_mask(intr);
60 nir_ssa_def *val = nir_ssa_for_src(b, intr->src[0], num_comp);
61 nir_ssa_def *addr = nir_ssa_for_src(b, intr->src[1], 1);
62
63 for (unsigned off = 0; off < num_comp; off += 4) {
64 unsigned c = MIN2(num_comp - off, 4);
65 nir_ssa_def *v = nir_channels(b, val, BITFIELD_MASK(c) << off);
66
67 nir_intrinsic_instr *store =
68 nir_intrinsic_instr_create(b->shader, intr->intrinsic);
69 store->num_components = c;
70 store->src[0] = nir_src_for_ssa(v);
71 store->src[1] = nir_src_for_ssa(addr);
72 nir_intrinsic_set_align(store, nir_intrinsic_align(intr), 0);
73 nir_intrinsic_set_write_mask(store, (wrmask >> off) & 0xf);
74 nir_builder_instr_insert(b, &store->instr);
75
76 addr = nir_iadd(b,
77 nir_imm_intN_t(b, (c * val->bit_size) / 8, addr->bit_size),
78 addr);
79 }
80
81 return NIR_LOWER_INSTR_PROGRESS_REPLACE;
82 } else {
83 unsigned num_comp = nir_intrinsic_dest_components(intr);
84 unsigned bit_size = nir_dest_bit_size(intr->dest);
85 nir_ssa_def *addr = nir_ssa_for_src(b, intr->src[0], 1);
86 nir_ssa_def *components[num_comp];
87
88 for (unsigned off = 0; off < num_comp;) {
89 unsigned c = MIN2(num_comp - off, 4);
90
91 nir_intrinsic_instr *load =
92 nir_intrinsic_instr_create(b->shader, intr->intrinsic);
93 load->num_components = c;
94 load->src[0] = nir_src_for_ssa(addr);
95 nir_intrinsic_set_align(load, nir_intrinsic_align(intr), 0);
96 nir_ssa_dest_init(&load->instr, &load->dest, c, bit_size, NULL);
97 nir_builder_instr_insert(b, &load->instr);
98
99 addr = nir_iadd(b,
100 nir_imm_intN_t(b, (c * bit_size) / 8, addr->bit_size),
101 addr);
102
103 for (unsigned i = 0; i < c; i++) {
104 components[off++] = nir_channel(b, &load->dest.ssa, i);
105 }
106 }
107
108 return nir_build_alu_src_arr(b, nir_op_vec(num_comp), components);
109 }
110 }
111
112 bool
ir3_nir_lower_wide_load_store(nir_shader * shader)113 ir3_nir_lower_wide_load_store(nir_shader *shader)
114 {
115 return nir_shader_lower_instructions(
116 shader, lower_wide_load_store_filter,
117 lower_wide_load_store, NULL);
118 }
119