• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * This lowering pass supports (as configured via nir_lower_image_options)
26  * image related conversions:
27  *   + cube array size lowering. The size operation is converted from cube
28  *     size to a 2d-array with the z component divided by 6.
29  */
30 
31 #include "nir.h"
32 #include "nir_builder.h"
33 
34 static void
lower_cube_size(nir_builder * b,nir_intrinsic_instr * intrin)35 lower_cube_size(nir_builder *b, nir_intrinsic_instr *intrin)
36 {
37    assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE);
38 
39    b->cursor = nir_before_instr(&intrin->instr);
40 
41    nir_intrinsic_instr *_2darray_size =
42       nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intrin->instr));
43    nir_intrinsic_set_image_dim(_2darray_size, GLSL_SAMPLER_DIM_2D);
44    nir_intrinsic_set_image_array(_2darray_size, true);
45    nir_builder_instr_insert(b, &_2darray_size->instr);
46 
47    nir_def *size = nir_instr_def(&_2darray_size->instr);
48    nir_scalar comps[NIR_MAX_VEC_COMPONENTS] = { 0 };
49    unsigned coord_comps = intrin->def.num_components;
50    for (unsigned c = 0; c < coord_comps; c++) {
51       if (c == 2) {
52          comps[2] = nir_get_scalar(nir_idiv(b, nir_channel(b, size, 2), nir_imm_int(b, 6)), 0);
53       } else {
54          comps[c] = nir_get_scalar(size, c);
55       }
56    }
57 
58    nir_def *vec = nir_vec_scalars(b, comps, intrin->def.num_components);
59    nir_def_rewrite_uses(&intrin->def, vec);
60    nir_instr_remove(&intrin->instr);
61    nir_instr_free(&intrin->instr);
62 }
63 
64 /* Adjust the sample index according to AMD FMASK (fragment mask).
65  *
66  * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
67  * which is the identity mapping. Each nibble says which physical sample
68  * should be fetched to get that sample.
69  *
70  * For example, 0x11111100 means there are only 2 samples stored and
71  * the second sample covers 3/4 of the pixel. When reading samples 0
72  * and 1, return physical sample 0 (determined by the first two 0s
73  * in FMASK), otherwise return physical sample 1.
74  *
75  * The sample index should be adjusted as follows:
76  *   sample_index = ubfe(fmask, sample_index * 4, 3);
77  *
78  * Only extract 3 bits because EQAA can generate number 8 in FMASK, which
79  * means the physical sample index is unknown. We can map 8 to any valid
80  * sample index, and extracting only 3 bits will map it to 0, which works
81  * with all MSAA modes.
82  */
83 static void
lower_image_to_fragment_mask_load(nir_builder * b,nir_intrinsic_instr * intrin)84 lower_image_to_fragment_mask_load(nir_builder *b, nir_intrinsic_instr *intrin)
85 {
86    b->cursor = nir_before_instr(&intrin->instr);
87 
88    nir_intrinsic_op fmask_op;
89    switch (intrin->intrinsic) {
90    case nir_intrinsic_image_load:
91       fmask_op = nir_intrinsic_image_fragment_mask_load_amd;
92       break;
93    case nir_intrinsic_image_deref_load:
94       fmask_op = nir_intrinsic_image_deref_fragment_mask_load_amd;
95       break;
96    case nir_intrinsic_bindless_image_load:
97       fmask_op = nir_intrinsic_bindless_image_fragment_mask_load_amd;
98       break;
99    default:
100       unreachable("bad intrinsic");
101       break;
102    }
103 
104    nir_def *fmask =
105       nir_image_fragment_mask_load_amd(b, intrin->src[0].ssa, intrin->src[1].ssa,
106                                        .image_dim = nir_intrinsic_image_dim(intrin),
107                                        .image_array = nir_intrinsic_image_array(intrin),
108                                        .format = nir_intrinsic_format(intrin),
109                                        .access = nir_intrinsic_access(intrin));
110 
111    /* fix intrinsic op */
112    nir_intrinsic_instr *fmask_load = nir_instr_as_intrinsic(fmask->parent_instr);
113    fmask_load->intrinsic = fmask_op;
114 
115    /* extract real color buffer index from fmask buffer */
116    nir_def *sample_index_old = intrin->src[2].ssa;
117    nir_def *fmask_offset = nir_ishl_imm(b, sample_index_old, 2);
118    nir_def *fmask_width = nir_imm_int(b, 3);
119    nir_def *sample_index_new = nir_ubfe(b, fmask, fmask_offset, fmask_width);
120 
121    /* fix color buffer load */
122    nir_src_rewrite(&intrin->src[2], sample_index_new);
123 
124    /* Mark uses fmask to prevent lower this intrinsic again. */
125    enum gl_access_qualifier access = nir_intrinsic_access(intrin);
126    nir_intrinsic_set_access(intrin, access | ACCESS_FMASK_LOWERED_AMD);
127 }
128 
129 static void
lower_image_samples_identical_to_fragment_mask_load(nir_builder * b,nir_intrinsic_instr * intrin)130 lower_image_samples_identical_to_fragment_mask_load(nir_builder *b, nir_intrinsic_instr *intrin)
131 {
132    b->cursor = nir_before_instr(&intrin->instr);
133 
134    nir_intrinsic_instr *fmask_load =
135       nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intrin->instr));
136 
137    switch (intrin->intrinsic) {
138    case nir_intrinsic_image_samples_identical:
139       fmask_load->intrinsic = nir_intrinsic_image_fragment_mask_load_amd;
140       break;
141    case nir_intrinsic_image_deref_samples_identical:
142       fmask_load->intrinsic = nir_intrinsic_image_deref_fragment_mask_load_amd;
143       break;
144    case nir_intrinsic_bindless_image_samples_identical:
145       fmask_load->intrinsic = nir_intrinsic_bindless_image_fragment_mask_load_amd;
146       break;
147    default:
148       unreachable("bad intrinsic");
149       break;
150    }
151 
152    nir_def_init(&fmask_load->instr, &fmask_load->def, 1, 32);
153    nir_builder_instr_insert(b, &fmask_load->instr);
154 
155    nir_def *samples_identical = nir_ieq_imm(b, &fmask_load->def, 0);
156    nir_def_rewrite_uses(&intrin->def, samples_identical);
157 
158    nir_instr_remove(&intrin->instr);
159    nir_instr_free(&intrin->instr);
160 }
161 
162 static bool
lower_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,void * state)163 lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
164 {
165    const nir_lower_image_options *options = state;
166 
167    switch (intrin->intrinsic) {
168    case nir_intrinsic_image_size:
169    case nir_intrinsic_image_deref_size:
170    case nir_intrinsic_bindless_image_size:
171       if (options->lower_cube_size &&
172           nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE) {
173          lower_cube_size(b, intrin);
174          return true;
175       }
176       return false;
177 
178    case nir_intrinsic_image_load:
179    case nir_intrinsic_image_deref_load:
180    case nir_intrinsic_bindless_image_load:
181       if (options->lower_to_fragment_mask_load_amd &&
182           nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS &&
183           /* Don't lower again. */
184           !(nir_intrinsic_access(intrin) & ACCESS_FMASK_LOWERED_AMD)) {
185          lower_image_to_fragment_mask_load(b, intrin);
186          return true;
187       }
188       return false;
189 
190    case nir_intrinsic_image_samples_identical:
191    case nir_intrinsic_image_deref_samples_identical:
192    case nir_intrinsic_bindless_image_samples_identical:
193       if (options->lower_to_fragment_mask_load_amd &&
194           nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS) {
195          lower_image_samples_identical_to_fragment_mask_load(b, intrin);
196          return true;
197       }
198       return false;
199 
200    case nir_intrinsic_image_samples:
201    case nir_intrinsic_image_deref_samples:
202    case nir_intrinsic_bindless_image_samples: {
203       if (options->lower_image_samples_to_one) {
204          b->cursor = nir_after_instr(&intrin->instr);
205          nir_def *samples = nir_imm_intN_t(b, 1, intrin->def.bit_size);
206          nir_def_rewrite_uses(&intrin->def, samples);
207          return true;
208       }
209       return false;
210    }
211    default:
212       return false;
213    }
214 }
215 
216 bool
nir_lower_image(nir_shader * nir,const nir_lower_image_options * options)217 nir_lower_image(nir_shader *nir, const nir_lower_image_options *options)
218 {
219    return nir_shader_intrinsics_pass(nir, lower_image_intrin,
220                                      nir_metadata_block_index |
221                                         nir_metadata_dominance,
222                                      (void *)options);
223 }
224