1 /*
2 * Copyright © 2023 Collabora, Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "nak_private.h"
7 #include "nir_builder.h"
8
9 /** Load a flat FS input */
10 static nir_def *
load_fs_input(nir_builder * b,unsigned num_components,uint32_t addr,UNUSED const struct nak_compiler * nak)11 load_fs_input(nir_builder *b, unsigned num_components, uint32_t addr,
12 UNUSED const struct nak_compiler *nak)
13 {
14 const struct nak_nir_ipa_flags flags = {
15 .interp_mode = NAK_INTERP_MODE_CONSTANT,
16 .interp_freq = NAK_INTERP_FREQ_CONSTANT,
17 .interp_loc = NAK_INTERP_LOC_DEFAULT,
18 };
19 uint32_t flags_u32;
20 memcpy(&flags_u32, &flags, sizeof(flags_u32));
21
22 nir_def *comps[NIR_MAX_VEC_COMPONENTS];
23 for (unsigned c = 0; c < num_components; c++) {
24 comps[c] = nir_ipa_nv(b, nir_imm_float(b, 0), nir_imm_int(b, 0),
25 .base = addr + c * 4, .flags = flags_u32);
26 }
27 return nir_vec(b, comps, num_components);
28 }
29
30 static nir_def *
load_frag_w(nir_builder * b,enum nak_interp_loc interp_loc,nir_def * offset,const struct nak_compiler * nak)31 load_frag_w(nir_builder *b, enum nak_interp_loc interp_loc, nir_def *offset,
32 const struct nak_compiler *nak)
33 {
34 if (offset == NULL)
35 offset = nir_imm_int(b, 0);
36
37 const uint16_t w_addr =
38 nak_sysval_attr_addr(nak, SYSTEM_VALUE_FRAG_COORD) + 12;
39
40 const struct nak_nir_ipa_flags flags = {
41 .interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR,
42 .interp_freq = NAK_INTERP_FREQ_PASS,
43 .interp_loc = interp_loc,
44 };
45 uint32_t flags_u32;
46 memcpy(&flags_u32, &flags, sizeof(flags_u32));
47
48 return nir_ipa_nv(b, nir_imm_float(b, 0), offset,
49 .base = w_addr, .flags = flags_u32);
50 }
51
52 static nir_def *
interp_fs_input(nir_builder * b,unsigned num_components,uint32_t addr,enum nak_interp_mode interp_mode,enum nak_interp_loc interp_loc,nir_def * inv_w,nir_def * offset,const struct nak_compiler * nak)53 interp_fs_input(nir_builder *b, unsigned num_components, uint32_t addr,
54 enum nak_interp_mode interp_mode,
55 enum nak_interp_loc interp_loc,
56 nir_def *inv_w, nir_def *offset,
57 const struct nak_compiler *nak)
58 {
59 if (offset == NULL)
60 offset = nir_imm_int(b, 0);
61
62 if (nak->sm >= 70) {
63 const struct nak_nir_ipa_flags flags = {
64 .interp_mode = interp_mode,
65 .interp_freq = NAK_INTERP_FREQ_PASS,
66 .interp_loc = interp_loc,
67 };
68 uint32_t flags_u32;
69 memcpy(&flags_u32, &flags, sizeof(flags_u32));
70
71 nir_def *comps[NIR_MAX_VEC_COMPONENTS];
72 for (unsigned c = 0; c < num_components; c++) {
73 comps[c] = nir_ipa_nv(b, nir_imm_float(b, 0), offset,
74 .base = addr + c * 4,
75 .flags = flags_u32);
76 if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
77 comps[c] = nir_fmul(b, comps[c], inv_w);
78 }
79 return nir_vec(b, comps, num_components);
80 } else if (nak->sm >= 50) {
81 struct nak_nir_ipa_flags flags = {
82 .interp_mode = interp_mode,
83 .interp_freq = NAK_INTERP_FREQ_PASS,
84 .interp_loc = interp_loc,
85 };
86
87 if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
88 flags.interp_freq = NAK_INTERP_FREQ_PASS_MUL_W;
89 else
90 inv_w = nir_imm_float(b, 0);
91
92 uint32_t flags_u32;
93 memcpy(&flags_u32, &flags, sizeof(flags_u32));
94
95 nir_def *comps[NIR_MAX_VEC_COMPONENTS];
96 for (unsigned c = 0; c < num_components; c++) {
97 comps[c] = nir_ipa_nv(b, inv_w, offset,
98 .base = addr + c * 4,
99 .flags = flags_u32);
100 }
101 return nir_vec(b, comps, num_components);
102 } else {
103 unreachable("Figure out input interpolation on Kepler");
104 }
105 }
106
107 static nir_def *
load_sample_pos_u4_at(nir_builder * b,nir_def * sample_id,const struct nak_fs_key * fs_key)108 load_sample_pos_u4_at(nir_builder *b, nir_def *sample_id,
109 const struct nak_fs_key *fs_key)
110 {
111 nir_def *loc = nir_ldc_nv(b, 1, 8,
112 nir_imm_int(b, fs_key->sample_info_cb),
113 nir_iadd_imm(b, sample_id,
114 fs_key->sample_locations_offset),
115 .align_mul = 1, .align_offset = 0);
116
117 /* The rest of these calculations are in 32-bit */
118 loc = nir_u2u32(b, loc);
119 nir_def *loc_x_u4 = nir_iand_imm(b, loc, 0xf);
120 nir_def *loc_y_u4 = nir_iand_imm(b, nir_ushr_imm(b, loc, 4), 0xf);
121 return nir_vec2(b, loc_x_u4, loc_y_u4);
122 }
123
124 static nir_def *
load_pass_sample_mask_at(nir_builder * b,nir_def * sample_id,const struct nak_fs_key * fs_key)125 load_pass_sample_mask_at(nir_builder *b, nir_def *sample_id,
126 const struct nak_fs_key *fs_key)
127 {
128 nir_def *offset =
129 nir_imul_imm(b, sample_id, sizeof(struct nak_sample_mask));
130 offset = nir_iadd_imm(b, offset, fs_key->sample_masks_offset);
131
132 return nir_ldc_nv(b, 1, 8 * sizeof(struct nak_sample_mask),
133 nir_imm_int(b, fs_key->sample_info_cb), offset,
134 .align_mul = sizeof(struct nak_sample_mask),
135 .align_offset = 0);
136 }
137
138 static nir_def *
load_sample_pos_at(nir_builder * b,nir_def * sample_id,const struct nak_fs_key * fs_key)139 load_sample_pos_at(nir_builder *b, nir_def *sample_id,
140 const struct nak_fs_key *fs_key)
141 {
142 nir_def *loc_u4 = load_sample_pos_u4_at(b, sample_id, fs_key);
143 nir_def *result = nir_fmul_imm(b, nir_i2f32(b, loc_u4), 1.0 / 16.0);
144
145 return result;
146 }
147
148 static nir_def *
load_barycentric_offset(nir_builder * b,nir_intrinsic_instr * bary,const struct nak_fs_key * fs_key)149 load_barycentric_offset(nir_builder *b, nir_intrinsic_instr *bary,
150 const struct nak_fs_key *fs_key)
151 {
152 nir_def *offset_s12;
153
154 if (bary->intrinsic == nir_intrinsic_load_barycentric_coord_at_sample ||
155 bary->intrinsic == nir_intrinsic_load_barycentric_at_sample) {
156 nir_def *sample_id = bary->src[0].ssa;
157 nir_def *offset_u4 = load_sample_pos_u4_at(b, sample_id, fs_key);
158 /* The sample position we loaded is a u4 from the upper-left and the
159 * sample position wanted by ipa.offset is s12
160 */
161 offset_s12 = nir_iadd_imm(b, nir_ishl_imm(b, offset_u4, 8), -2048);
162 } else {
163 nir_def *offset_f = bary->src[0].ssa;
164
165 offset_f = nir_fclamp(b, offset_f, nir_imm_float(b, -0.5),
166 nir_imm_float(b, 0.437500));
167 offset_s12 = nir_f2i32(b, nir_fmul_imm(b, offset_f, 4096.0));
168 }
169
170 return nir_prmt_nv(b, nir_imm_int(b, 0x5410),
171 nir_channel(b, offset_s12, 0),
172 nir_channel(b, offset_s12, 1));
173 }
174
175 struct lower_fs_input_ctx {
176 const struct nak_compiler *nak;
177 const struct nak_fs_key *fs_key;
178 };
179
180 static uint16_t
fs_input_intrin_addr(nir_intrinsic_instr * intrin,const struct nak_compiler * nak)181 fs_input_intrin_addr(nir_intrinsic_instr *intrin,
182 const struct nak_compiler *nak)
183 {
184 const nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
185 return nak_varying_attr_addr(nak, sem.location) +
186 nir_src_as_uint(*nir_get_io_offset_src(intrin)) * 16 +
187 nir_intrinsic_component(intrin) * 4;
188 }
189
190 static bool
lower_fs_input_intrin(nir_builder * b,nir_intrinsic_instr * intrin,void * data)191 lower_fs_input_intrin(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
192 {
193 const struct lower_fs_input_ctx *ctx = data;
194 const struct nak_compiler *nak = ctx->nak;
195
196 b->cursor = nir_before_instr(&intrin->instr);
197
198 nir_def *res;
199 switch (intrin->intrinsic) {
200 case nir_intrinsic_load_barycentric_pixel: {
201 if (!(ctx->fs_key && ctx->fs_key->force_sample_shading))
202 return false;
203
204 intrin->intrinsic = nir_intrinsic_load_barycentric_sample;
205 return true;
206 }
207
208 case nir_intrinsic_load_frag_coord:
209 case nir_intrinsic_load_point_coord: {
210 const enum nak_interp_loc interp_loc =
211 b->shader->info.fs.uses_sample_shading ? NAK_INTERP_LOC_CENTROID
212 : NAK_INTERP_LOC_DEFAULT;
213 const uint32_t addr =
214 intrin->intrinsic == nir_intrinsic_load_point_coord ?
215 nak_sysval_attr_addr(nak, SYSTEM_VALUE_POINT_COORD) :
216 nak_sysval_attr_addr(nak, SYSTEM_VALUE_FRAG_COORD);
217
218 res = interp_fs_input(b, intrin->def.num_components, addr,
219 NAK_INTERP_MODE_SCREEN_LINEAR,
220 interp_loc, NULL, NULL,
221 ctx->nak);
222 break;
223 }
224
225 case nir_intrinsic_load_front_face:
226 case nir_intrinsic_load_layer_id: {
227 assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
228 const gl_system_value sysval =
229 nir_system_value_from_intrinsic(intrin->intrinsic);
230 const uint32_t addr = nak_sysval_attr_addr(nak, sysval);
231
232 res = load_fs_input(b, intrin->def.num_components, addr, ctx->nak);
233 if (intrin->def.bit_size == 1)
234 res = nir_i2b(b, res);
235 break;
236 }
237
238 case nir_intrinsic_load_input: {
239 const uint16_t addr = fs_input_intrin_addr(intrin, ctx->nak);
240 res = load_fs_input(b, intrin->def.num_components, addr, ctx->nak);
241 break;
242 }
243
244 case nir_intrinsic_load_barycentric_coord_pixel:
245 case nir_intrinsic_load_barycentric_coord_centroid:
246 case nir_intrinsic_load_barycentric_coord_sample:
247 case nir_intrinsic_load_barycentric_coord_at_sample:
248 case nir_intrinsic_load_barycentric_coord_at_offset: {
249 uint32_t addr;
250 enum nak_interp_mode interp_mode;
251 if (nir_intrinsic_interp_mode(intrin) == INTERP_MODE_NOPERSPECTIVE) {
252 addr = NAK_ATTR_BARY_COORD_NO_PERSP;
253 interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR;
254 } else {
255 addr = NAK_ATTR_BARY_COORD;
256 interp_mode = NAK_INTERP_MODE_PERSPECTIVE;
257 }
258
259 nir_def *offset = NULL;
260 enum nak_interp_loc interp_loc;
261 switch (intrin->intrinsic) {
262 case nir_intrinsic_load_barycentric_coord_at_sample:
263 case nir_intrinsic_load_barycentric_coord_at_offset:
264 interp_loc = NAK_INTERP_LOC_OFFSET;
265 offset = load_barycentric_offset(b, intrin, ctx->fs_key);
266 break;
267 case nir_intrinsic_load_barycentric_coord_centroid:
268 case nir_intrinsic_load_barycentric_coord_sample:
269 interp_loc = NAK_INTERP_LOC_CENTROID;
270 break;
271 case nir_intrinsic_load_barycentric_coord_pixel:
272 interp_loc = NAK_INTERP_LOC_DEFAULT;
273 break;
274 default:
275 unreachable("Unknown intrinsic");
276 }
277
278 nir_def *inv_w = NULL;
279 if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
280 inv_w = nir_frcp(b, load_frag_w(b, interp_loc, offset, nak));
281
282 res = interp_fs_input(b, intrin->def.num_components,
283 addr, interp_mode, interp_loc,
284 inv_w, offset, ctx->nak);
285 break;
286 }
287
288 case nir_intrinsic_load_interpolated_input: {
289 const uint16_t addr = fs_input_intrin_addr(intrin, ctx->nak);
290 nir_intrinsic_instr *bary = nir_src_as_intrinsic(intrin->src[0]);
291
292 enum nak_interp_mode interp_mode;
293 if (nir_intrinsic_interp_mode(bary) == INTERP_MODE_SMOOTH ||
294 nir_intrinsic_interp_mode(bary) == INTERP_MODE_NONE)
295 interp_mode = NAK_INTERP_MODE_PERSPECTIVE;
296 else
297 interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR;
298
299 nir_def *offset = NULL;
300 enum nak_interp_loc interp_loc;
301 switch (bary->intrinsic) {
302 case nir_intrinsic_load_barycentric_at_offset:
303 case nir_intrinsic_load_barycentric_at_sample: {
304 interp_loc = NAK_INTERP_LOC_OFFSET;
305 offset = load_barycentric_offset(b, bary, ctx->fs_key);
306 break;
307 }
308
309 case nir_intrinsic_load_barycentric_centroid:
310 case nir_intrinsic_load_barycentric_sample:
311 interp_loc = NAK_INTERP_LOC_CENTROID;
312 break;
313
314 case nir_intrinsic_load_barycentric_pixel:
315 interp_loc = NAK_INTERP_LOC_DEFAULT;
316 break;
317
318 default:
319 unreachable("Unsupported barycentric");
320 }
321
322 nir_def *inv_w = NULL;
323 if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
324 inv_w = nir_frcp(b, load_frag_w(b, interp_loc, offset, nak));
325
326 res = interp_fs_input(b, intrin->def.num_components,
327 addr, interp_mode, interp_loc,
328 inv_w, offset, ctx->nak);
329 break;
330 }
331
332 case nir_intrinsic_load_sample_mask_in:
333 b->cursor = nir_after_instr(&intrin->instr);
334
335 /* pixld.covmask returns the coverage mask for the entire pixel being
336 * shaded, not the set of samples covered by the current FS invocation.
337 * We need to mask off excess samples in order to get the GL/Vulkan
338 * behavior.
339 */
340 if (b->shader->info.fs.uses_sample_shading) {
341 /* Mask off just the current sample */
342 nir_def *sample = nir_load_sample_id(b);
343 nir_def *mask = nir_ishl(b, nir_imm_int(b, 1), sample);
344 mask = nir_iand(b, &intrin->def, mask);
345 nir_def_rewrite_uses_after(&intrin->def, mask, mask->parent_instr);
346
347 return true;
348 } else if (ctx->fs_key && ctx->fs_key->force_sample_shading) {
349 /* In this case we don't know up-front how many passes will be run so
350 * we need to take the per-pass sample mask from the driver and AND
351 * that with the coverage mask.
352 */
353 nir_def *sample = nir_load_sample_id(b);
354 nir_def *mask = load_pass_sample_mask_at(b, sample, ctx->fs_key);
355 mask = nir_iand(b, &intrin->def, nir_u2u32(b, mask));
356 nir_def_rewrite_uses_after(&intrin->def, mask, mask->parent_instr);
357
358 return true;
359 } else {
360 /* We're always executing single-pass so just use the sample mask as
361 * given by the hardware.
362 */
363 return false;
364 }
365 break;
366
367 case nir_intrinsic_load_sample_pos:
368 res = load_sample_pos_at(b, nir_load_sample_id(b), ctx->fs_key);
369 break;
370
371 case nir_intrinsic_load_input_vertex: {
372 const uint16_t addr = fs_input_intrin_addr(intrin, ctx->nak);
373 unsigned vertex_id = nir_src_as_uint(intrin->src[0]);
374 assert(vertex_id < 3);
375
376 nir_def *comps[NIR_MAX_VEC_COMPONENTS];
377 for (unsigned c = 0; c < intrin->def.num_components; c++) {
378 nir_def *data = nir_ldtram_nv(b, .base = addr + c * 4,
379 .flags = vertex_id == 2);
380 comps[c] = nir_channel(b, data, vertex_id & 1);
381 }
382 res = nir_vec(b, comps, intrin->num_components);
383 break;
384 }
385
386 default:
387 return false;
388 }
389
390 nir_def_replace(&intrin->def, res);
391
392 return true;
393 }
394
395 bool
nak_nir_lower_fs_inputs(nir_shader * nir,const struct nak_compiler * nak,const struct nak_fs_key * fs_key)396 nak_nir_lower_fs_inputs(nir_shader *nir,
397 const struct nak_compiler *nak,
398 const struct nak_fs_key *fs_key)
399 {
400 const struct lower_fs_input_ctx fs_in_ctx = {
401 .nak = nak,
402 .fs_key = fs_key,
403 };
404 NIR_PASS_V(nir, nir_shader_intrinsics_pass, lower_fs_input_intrin,
405 nir_metadata_control_flow,
406 (void *)&fs_in_ctx);
407
408 return true;
409 }
410