1 /*
2 * Copyright © 2018 Intel Corporation
3 * Copyright © 2018 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include "v3d_compiler.h"
26 #include "compiler/nir/nir_builder.h"
27 #include "compiler/nir/nir_format_convert.h"
28
29 /** @file v3d_nir_lower_image_load_store.c
30 *
31 * Performs any necessary lowering of GL_ARB_shader_image_load_store
32 * operations.
33 *
34 * On V3D 4.x, we just need to do format conversion for stores such that the
35 * GPU can effectively memcpy the arguments (in increments of 32-bit words)
36 * into the texel. Loads are the same as texturing, where we may need to
37 * unpack from 16-bit ints or floats.
38 *
39 * On V3D 3.x, to implement image load store we would need to do manual tiling
40 * calculations and load/store using the TMU general memory access path.
41 */
42
43 bool
v3d_gl_format_is_return_32(enum pipe_format format)44 v3d_gl_format_is_return_32(enum pipe_format format)
45 {
46 const struct util_format_description *desc =
47 util_format_description(format);
48 const struct util_format_channel_description *chan = &desc->channel[0];
49
50 return chan->size > 16 || (chan->size == 16 && chan->normalized);
51 }
52
53 /* Packs a 32-bit vector of colors in the range [0, (1 << bits[i]) - 1] to a
54 * 32-bit SSA value, with as many channels as necessary to store all the bits
55 */
56 static nir_ssa_def *
pack_bits(nir_builder * b,nir_ssa_def * color,const unsigned * bits,int num_components,bool mask)57 pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
58 int num_components, bool mask)
59 {
60 nir_ssa_def *results[4];
61 int offset = 0;
62 for (int i = 0; i < num_components; i++) {
63 nir_ssa_def *chan = nir_channel(b, color, i);
64
65 /* Channels being stored shouldn't cross a 32-bit boundary. */
66 assert((offset & ~31) == ((offset + bits[i] - 1) & ~31));
67
68 if (mask) {
69 chan = nir_iand(b, chan,
70 nir_imm_int(b, (1 << bits[i]) - 1));
71 }
72
73 if (offset % 32 == 0) {
74 results[offset / 32] = chan;
75 } else {
76 results[offset / 32] =
77 nir_ior(b, results[offset / 32],
78 nir_ishl(b, chan,
79 nir_imm_int(b, offset % 32)));
80 }
81 offset += bits[i];
82 }
83
84 return nir_vec(b, results, DIV_ROUND_UP(offset, 32));
85 }
86
87 static bool
v3d_nir_lower_image_store(nir_builder * b,nir_intrinsic_instr * instr)88 v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
89 {
90 enum pipe_format format = nir_intrinsic_format(instr);
91 assert(format != PIPE_FORMAT_NONE);
92 const struct util_format_description *desc =
93 util_format_description(format);
94 const struct util_format_channel_description *r_chan = &desc->channel[0];
95 unsigned num_components = util_format_get_nr_components(format);
96
97 b->cursor = nir_before_instr(&instr->instr);
98
99 nir_ssa_def *color = nir_channels(b,
100 nir_ssa_for_src(b, instr->src[3], 4),
101 (1 << num_components) - 1);
102 nir_ssa_def *formatted = NULL;
103
104 if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
105 formatted = nir_format_pack_11f11f10f(b, color);
106 } else if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
107 formatted = nir_format_pack_r9g9b9e5(b, color);
108 } else if (r_chan->size == 32) {
109 /* For 32-bit formats, we just have to move the vector
110 * across (possibly reducing the number of channels).
111 */
112 formatted = color;
113 } else {
114 static const unsigned bits_8[4] = {8, 8, 8, 8};
115 static const unsigned bits_16[4] = {16, 16, 16, 16};
116 static const unsigned bits_1010102[4] = {10, 10, 10, 2};
117 const unsigned *bits;
118
119 switch (r_chan->size) {
120 case 8:
121 bits = bits_8;
122 break;
123 case 10:
124 bits = bits_1010102;
125 break;
126 case 16:
127 bits = bits_16;
128 break;
129 default:
130 unreachable("unrecognized bits");
131 }
132
133 bool pack_mask = false;
134 if (r_chan->pure_integer &&
135 r_chan->type == UTIL_FORMAT_TYPE_SIGNED) {
136 /* We don't need to do any conversion or clamping in this case */
137 formatted = color;
138 pack_mask = true;
139 } else if (r_chan->pure_integer &&
140 r_chan->type == UTIL_FORMAT_TYPE_UNSIGNED) {
141 /* We don't need to do any conversion or clamping in this case */
142 formatted = color;
143 } else if (r_chan->normalized &&
144 r_chan->type == UTIL_FORMAT_TYPE_SIGNED) {
145 formatted = nir_format_float_to_snorm(b, color, bits);
146 pack_mask = true;
147 } else if (r_chan->normalized &&
148 r_chan->type == UTIL_FORMAT_TYPE_UNSIGNED) {
149 formatted = nir_format_float_to_unorm(b, color, bits);
150 } else {
151 assert(r_chan->size == 16);
152 assert(r_chan->type == UTIL_FORMAT_TYPE_FLOAT);
153 formatted = nir_format_float_to_half(b, color);
154 }
155
156 formatted = pack_bits(b, formatted, bits, num_components,
157 pack_mask);
158 }
159
160 nir_instr_rewrite_src(&instr->instr, &instr->src[3],
161 nir_src_for_ssa(formatted));
162 instr->num_components = formatted->num_components;
163
164 return true;
165 }
166
167 static bool
v3d_nir_lower_image_load(nir_builder * b,nir_intrinsic_instr * instr)168 v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr)
169 {
170 static const unsigned bits16[] = {16, 16, 16, 16};
171 enum pipe_format format = nir_intrinsic_format(instr);
172
173 if (v3d_gl_format_is_return_32(format))
174 return false;
175
176 b->cursor = nir_after_instr(&instr->instr);
177
178 assert(instr->dest.is_ssa);
179 nir_ssa_def *result = &instr->dest.ssa;
180 if (util_format_is_pure_uint(format)) {
181 result = nir_format_unpack_uint(b, result, bits16, 4);
182 } else if (util_format_is_pure_sint(format)) {
183 result = nir_format_unpack_sint(b, result, bits16, 4);
184 } else {
185 nir_ssa_def *rg = nir_channel(b, result, 0);
186 nir_ssa_def *ba = nir_channel(b, result, 1);
187 result = nir_vec4(b,
188 nir_unpack_half_2x16_split_x(b, rg),
189 nir_unpack_half_2x16_split_y(b, rg),
190 nir_unpack_half_2x16_split_x(b, ba),
191 nir_unpack_half_2x16_split_y(b, ba));
192 }
193
194 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, result,
195 result->parent_instr);
196
197 return true;
198 }
199
200 static bool
v3d_nir_lower_image_load_store_cb(nir_builder * b,nir_instr * instr,void * _state)201 v3d_nir_lower_image_load_store_cb(nir_builder *b,
202 nir_instr *instr,
203 void *_state)
204 {
205 if (instr->type != nir_instr_type_intrinsic)
206 return false;
207
208 nir_intrinsic_instr *intr =
209 nir_instr_as_intrinsic(instr);
210
211 switch (intr->intrinsic) {
212 case nir_intrinsic_image_load:
213 return v3d_nir_lower_image_load(b, intr);
214 case nir_intrinsic_image_store:
215 return v3d_nir_lower_image_store(b, intr);
216 default:
217 return false;
218 }
219
220 return false;
221 }
222
223 bool
v3d_nir_lower_image_load_store(nir_shader * s)224 v3d_nir_lower_image_load_store(nir_shader *s)
225 {
226 return nir_shader_instructions_pass(s, v3d_nir_lower_image_load_store_cb,
227 nir_metadata_block_index |
228 nir_metadata_dominance, NULL);
229 }
230