1 /*
2 * Copyright © 2021 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * This lowering pass supports (as configured via nir_lower_image_options)
26 * image related conversions:
27 * + cube array size lowering. The size operation is converted from cube
28 * size to a 2d-array with the z component divided by 6.
29 */
30
31 #include "nir.h"
32 #include "nir_builder.h"
33
34 static void
lower_cube_size(nir_builder * b,nir_intrinsic_instr * intrin)35 lower_cube_size(nir_builder *b, nir_intrinsic_instr *intrin)
36 {
37 assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE);
38
39 b->cursor = nir_before_instr(&intrin->instr);
40
41 nir_intrinsic_instr *_2darray_size =
42 nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intrin->instr));
43 nir_intrinsic_set_image_dim(_2darray_size, GLSL_SAMPLER_DIM_2D);
44 nir_intrinsic_set_image_array(_2darray_size, true);
45 nir_builder_instr_insert(b, &_2darray_size->instr);
46
47 nir_def *size = nir_instr_def(&_2darray_size->instr);
48 nir_scalar comps[NIR_MAX_VEC_COMPONENTS] = { 0 };
49 unsigned coord_comps = intrin->def.num_components;
50 for (unsigned c = 0; c < coord_comps; c++) {
51 if (c == 2) {
52 comps[2] = nir_get_scalar(nir_idiv(b, nir_channel(b, size, 2), nir_imm_int(b, 6)), 0);
53 } else {
54 comps[c] = nir_get_scalar(size, c);
55 }
56 }
57
58 nir_def *vec = nir_vec_scalars(b, comps, intrin->def.num_components);
59 nir_def_rewrite_uses(&intrin->def, vec);
60 nir_instr_remove(&intrin->instr);
61 nir_instr_free(&intrin->instr);
62 }
63
64 /* Adjust the sample index according to AMD FMASK (fragment mask).
65 *
66 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
67 * which is the identity mapping. Each nibble says which physical sample
68 * should be fetched to get that sample.
69 *
70 * For example, 0x11111100 means there are only 2 samples stored and
71 * the second sample covers 3/4 of the pixel. When reading samples 0
72 * and 1, return physical sample 0 (determined by the first two 0s
73 * in FMASK), otherwise return physical sample 1.
74 *
75 * The sample index should be adjusted as follows:
76 * sample_index = ubfe(fmask, sample_index * 4, 3);
77 *
78 * Only extract 3 bits because EQAA can generate number 8 in FMASK, which
79 * means the physical sample index is unknown. We can map 8 to any valid
80 * sample index, and extracting only 3 bits will map it to 0, which works
81 * with all MSAA modes.
82 */
83 static void
lower_image_to_fragment_mask_load(nir_builder * b,nir_intrinsic_instr * intrin)84 lower_image_to_fragment_mask_load(nir_builder *b, nir_intrinsic_instr *intrin)
85 {
86 b->cursor = nir_before_instr(&intrin->instr);
87
88 nir_intrinsic_op fmask_op;
89 switch (intrin->intrinsic) {
90 case nir_intrinsic_image_load:
91 fmask_op = nir_intrinsic_image_fragment_mask_load_amd;
92 break;
93 case nir_intrinsic_image_deref_load:
94 fmask_op = nir_intrinsic_image_deref_fragment_mask_load_amd;
95 break;
96 case nir_intrinsic_bindless_image_load:
97 fmask_op = nir_intrinsic_bindless_image_fragment_mask_load_amd;
98 break;
99 default:
100 unreachable("bad intrinsic");
101 break;
102 }
103
104 nir_def *fmask =
105 nir_image_fragment_mask_load_amd(b, intrin->src[0].ssa, intrin->src[1].ssa,
106 .image_dim = nir_intrinsic_image_dim(intrin),
107 .image_array = nir_intrinsic_image_array(intrin),
108 .format = nir_intrinsic_format(intrin),
109 .access = nir_intrinsic_access(intrin));
110
111 /* fix intrinsic op */
112 nir_intrinsic_instr *fmask_load = nir_instr_as_intrinsic(fmask->parent_instr);
113 fmask_load->intrinsic = fmask_op;
114
115 /* extract real color buffer index from fmask buffer */
116 nir_def *sample_index_old = intrin->src[2].ssa;
117 nir_def *fmask_offset = nir_ishl_imm(b, sample_index_old, 2);
118 nir_def *fmask_width = nir_imm_int(b, 3);
119 nir_def *sample_index_new = nir_ubfe(b, fmask, fmask_offset, fmask_width);
120
121 /* fix color buffer load */
122 nir_src_rewrite(&intrin->src[2], sample_index_new);
123
124 /* Mark uses fmask to prevent lower this intrinsic again. */
125 enum gl_access_qualifier access = nir_intrinsic_access(intrin);
126 nir_intrinsic_set_access(intrin, access | ACCESS_FMASK_LOWERED_AMD);
127 }
128
129 static void
lower_image_samples_identical_to_fragment_mask_load(nir_builder * b,nir_intrinsic_instr * intrin)130 lower_image_samples_identical_to_fragment_mask_load(nir_builder *b, nir_intrinsic_instr *intrin)
131 {
132 b->cursor = nir_before_instr(&intrin->instr);
133
134 nir_intrinsic_instr *fmask_load =
135 nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intrin->instr));
136
137 switch (intrin->intrinsic) {
138 case nir_intrinsic_image_samples_identical:
139 fmask_load->intrinsic = nir_intrinsic_image_fragment_mask_load_amd;
140 break;
141 case nir_intrinsic_image_deref_samples_identical:
142 fmask_load->intrinsic = nir_intrinsic_image_deref_fragment_mask_load_amd;
143 break;
144 case nir_intrinsic_bindless_image_samples_identical:
145 fmask_load->intrinsic = nir_intrinsic_bindless_image_fragment_mask_load_amd;
146 break;
147 default:
148 unreachable("bad intrinsic");
149 break;
150 }
151
152 nir_def_init(&fmask_load->instr, &fmask_load->def, 1, 32);
153 nir_builder_instr_insert(b, &fmask_load->instr);
154
155 nir_def *samples_identical = nir_ieq_imm(b, &fmask_load->def, 0);
156 nir_def_rewrite_uses(&intrin->def, samples_identical);
157
158 nir_instr_remove(&intrin->instr);
159 nir_instr_free(&intrin->instr);
160 }
161
162 static bool
lower_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,void * state)163 lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
164 {
165 const nir_lower_image_options *options = state;
166
167 switch (intrin->intrinsic) {
168 case nir_intrinsic_image_size:
169 case nir_intrinsic_image_deref_size:
170 case nir_intrinsic_bindless_image_size:
171 if (options->lower_cube_size &&
172 nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE) {
173 lower_cube_size(b, intrin);
174 return true;
175 }
176 return false;
177
178 case nir_intrinsic_image_load:
179 case nir_intrinsic_image_deref_load:
180 case nir_intrinsic_bindless_image_load:
181 if (options->lower_to_fragment_mask_load_amd &&
182 nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS &&
183 /* Don't lower again. */
184 !(nir_intrinsic_access(intrin) & ACCESS_FMASK_LOWERED_AMD)) {
185 lower_image_to_fragment_mask_load(b, intrin);
186 return true;
187 }
188 return false;
189
190 case nir_intrinsic_image_samples_identical:
191 case nir_intrinsic_image_deref_samples_identical:
192 case nir_intrinsic_bindless_image_samples_identical:
193 if (options->lower_to_fragment_mask_load_amd &&
194 nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS) {
195 lower_image_samples_identical_to_fragment_mask_load(b, intrin);
196 return true;
197 }
198 return false;
199
200 case nir_intrinsic_image_samples:
201 case nir_intrinsic_image_deref_samples:
202 case nir_intrinsic_bindless_image_samples: {
203 if (options->lower_image_samples_to_one) {
204 b->cursor = nir_after_instr(&intrin->instr);
205 nir_def *samples = nir_imm_intN_t(b, 1, intrin->def.bit_size);
206 nir_def_rewrite_uses(&intrin->def, samples);
207 return true;
208 }
209 return false;
210 }
211 default:
212 return false;
213 }
214 }
215
216 bool
nir_lower_image(nir_shader * nir,const nir_lower_image_options * options)217 nir_lower_image(nir_shader *nir, const nir_lower_image_options *options)
218 {
219 return nir_shader_intrinsics_pass(nir, lower_image_intrin,
220 nir_metadata_block_index |
221 nir_metadata_dominance,
222 (void *)options);
223 }
224