• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2023 Collabora, Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "nak_private.h"
7 #include "nir_builder.h"
8 
9 /** Load a flat FS input */
10 static nir_def *
load_fs_input(nir_builder * b,unsigned num_components,uint32_t addr,UNUSED const struct nak_compiler * nak)11 load_fs_input(nir_builder *b, unsigned num_components, uint32_t addr,
12               UNUSED const struct nak_compiler *nak)
13 {
14    const struct nak_nir_ipa_flags flags = {
15       .interp_mode = NAK_INTERP_MODE_CONSTANT,
16       .interp_freq = NAK_INTERP_FREQ_CONSTANT,
17       .interp_loc = NAK_INTERP_LOC_DEFAULT,
18    };
19    uint32_t flags_u32;
20    memcpy(&flags_u32, &flags, sizeof(flags_u32));
21 
22    nir_def *comps[NIR_MAX_VEC_COMPONENTS];
23    for (unsigned c = 0; c < num_components; c++) {
24       comps[c] = nir_ipa_nv(b, nir_imm_float(b, 0), nir_imm_int(b, 0),
25                             .base = addr + c * 4, .flags = flags_u32);
26    }
27    return nir_vec(b, comps, num_components);
28 }
29 
30 static nir_def *
load_frag_w(nir_builder * b,enum nak_interp_loc interp_loc,nir_def * offset,const struct nak_compiler * nak)31 load_frag_w(nir_builder *b, enum nak_interp_loc interp_loc, nir_def *offset,
32             const struct nak_compiler *nak)
33 {
34    if (offset == NULL)
35       offset = nir_imm_int(b, 0);
36 
37    const uint16_t w_addr =
38       nak_sysval_attr_addr(nak, SYSTEM_VALUE_FRAG_COORD) + 12;
39 
40    const struct nak_nir_ipa_flags flags = {
41       .interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR,
42       .interp_freq = NAK_INTERP_FREQ_PASS,
43       .interp_loc = interp_loc,
44    };
45    uint32_t flags_u32;
46    memcpy(&flags_u32, &flags, sizeof(flags_u32));
47 
48    return nir_ipa_nv(b, nir_imm_float(b, 0), offset,
49                      .base = w_addr, .flags = flags_u32);
50 }
51 
52 static nir_def *
interp_fs_input(nir_builder * b,unsigned num_components,uint32_t addr,enum nak_interp_mode interp_mode,enum nak_interp_loc interp_loc,nir_def * inv_w,nir_def * offset,const struct nak_compiler * nak)53 interp_fs_input(nir_builder *b, unsigned num_components, uint32_t addr,
54                 enum nak_interp_mode interp_mode,
55                 enum nak_interp_loc interp_loc,
56                 nir_def *inv_w, nir_def *offset,
57                 const struct nak_compiler *nak)
58 {
59    if (offset == NULL)
60       offset = nir_imm_int(b, 0);
61 
62    if (nak->sm >= 70) {
63       const struct nak_nir_ipa_flags flags = {
64          .interp_mode = interp_mode,
65          .interp_freq = NAK_INTERP_FREQ_PASS,
66          .interp_loc = interp_loc,
67       };
68       uint32_t flags_u32;
69       memcpy(&flags_u32, &flags, sizeof(flags_u32));
70 
71       nir_def *comps[NIR_MAX_VEC_COMPONENTS];
72       for (unsigned c = 0; c < num_components; c++) {
73          comps[c] = nir_ipa_nv(b, nir_imm_float(b, 0), offset,
74                                .base = addr + c * 4,
75                                .flags = flags_u32);
76          if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
77             comps[c] = nir_fmul(b, comps[c], inv_w);
78       }
79       return nir_vec(b, comps, num_components);
80    } else if (nak->sm >= 50) {
81       struct nak_nir_ipa_flags flags = {
82          .interp_mode = interp_mode,
83          .interp_freq = NAK_INTERP_FREQ_PASS,
84          .interp_loc = interp_loc,
85       };
86 
87       if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
88          flags.interp_freq = NAK_INTERP_FREQ_PASS_MUL_W;
89       else
90          inv_w = nir_imm_float(b, 0);
91 
92       uint32_t flags_u32;
93       memcpy(&flags_u32, &flags, sizeof(flags_u32));
94 
95       nir_def *comps[NIR_MAX_VEC_COMPONENTS];
96       for (unsigned c = 0; c < num_components; c++) {
97          comps[c] = nir_ipa_nv(b, inv_w, offset,
98                                .base = addr + c * 4,
99                                .flags = flags_u32);
100       }
101       return nir_vec(b, comps, num_components);
102    } else {
103       unreachable("Figure out input interpolation on Kepler");
104    }
105 }
106 
107 static nir_def *
load_sample_pos_u4_at(nir_builder * b,nir_def * sample_id,const struct nak_fs_key * fs_key)108 load_sample_pos_u4_at(nir_builder *b, nir_def *sample_id,
109                       const struct nak_fs_key *fs_key)
110 {
111    nir_def *loc = nir_ldc_nv(b, 1, 8,
112                              nir_imm_int(b, fs_key->sample_info_cb),
113                              nir_iadd_imm(b, sample_id,
114                                           fs_key->sample_locations_offset),
115                              .align_mul = 1, .align_offset = 0);
116 
117    /* The rest of these calculations are in 32-bit */
118    loc = nir_u2u32(b, loc);
119    nir_def *loc_x_u4 = nir_iand_imm(b, loc, 0xf);
120    nir_def *loc_y_u4 = nir_iand_imm(b, nir_ushr_imm(b, loc, 4), 0xf);
121    return nir_vec2(b, loc_x_u4, loc_y_u4);
122 }
123 
124 static nir_def *
load_pass_sample_mask_at(nir_builder * b,nir_def * sample_id,const struct nak_fs_key * fs_key)125 load_pass_sample_mask_at(nir_builder *b, nir_def *sample_id,
126                          const struct nak_fs_key *fs_key)
127 {
128    nir_def *offset =
129       nir_imul_imm(b, sample_id, sizeof(struct nak_sample_mask));
130    offset = nir_iadd_imm(b, offset, fs_key->sample_masks_offset);
131 
132    return nir_ldc_nv(b, 1, 8 * sizeof(struct nak_sample_mask),
133                      nir_imm_int(b, fs_key->sample_info_cb), offset,
134                      .align_mul = sizeof(struct nak_sample_mask),
135                      .align_offset = 0);
136 }
137 
138 static nir_def *
load_sample_pos_at(nir_builder * b,nir_def * sample_id,const struct nak_fs_key * fs_key)139 load_sample_pos_at(nir_builder *b, nir_def *sample_id,
140                    const struct nak_fs_key *fs_key)
141 {
142    nir_def *loc_u4 = load_sample_pos_u4_at(b, sample_id, fs_key);
143    nir_def *result = nir_fmul_imm(b, nir_i2f32(b, loc_u4), 1.0 / 16.0);
144 
145    return result;
146 }
147 
148 static nir_def *
load_barycentric_offset(nir_builder * b,nir_intrinsic_instr * bary,const struct nak_fs_key * fs_key)149 load_barycentric_offset(nir_builder *b, nir_intrinsic_instr *bary,
150                         const struct nak_fs_key *fs_key)
151 {
152    nir_def *offset_s12;
153 
154    if (bary->intrinsic == nir_intrinsic_load_barycentric_coord_at_sample ||
155        bary->intrinsic == nir_intrinsic_load_barycentric_at_sample) {
156       nir_def *sample_id = bary->src[0].ssa;
157       nir_def *offset_u4 = load_sample_pos_u4_at(b, sample_id, fs_key);
158       /* The sample position we loaded is a u4 from the upper-left and the
159        * sample position wanted by ipa.offset is s12
160        */
161       offset_s12 = nir_iadd_imm(b, nir_ishl_imm(b, offset_u4, 8), -2048);
162    } else {
163       nir_def *offset_f = bary->src[0].ssa;
164 
165       offset_f = nir_fclamp(b, offset_f, nir_imm_float(b, -0.5),
166                             nir_imm_float(b, 0.437500));
167       offset_s12 = nir_f2i32(b, nir_fmul_imm(b, offset_f, 4096.0));
168    }
169 
170    return nir_prmt_nv(b, nir_imm_int(b, 0x5410),
171                          nir_channel(b, offset_s12, 0),
172                          nir_channel(b, offset_s12, 1));
173 }
174 
175 struct lower_fs_input_ctx {
176    const struct nak_compiler *nak;
177    const struct nak_fs_key *fs_key;
178 };
179 
180 static uint16_t
fs_input_intrin_addr(nir_intrinsic_instr * intrin,const struct nak_compiler * nak)181 fs_input_intrin_addr(nir_intrinsic_instr *intrin,
182                      const struct nak_compiler *nak)
183 {
184    const nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
185    return nak_varying_attr_addr(nak, sem.location) +
186           nir_src_as_uint(*nir_get_io_offset_src(intrin)) * 16 +
187           nir_intrinsic_component(intrin) * 4;
188 }
189 
190 static bool
lower_fs_input_intrin(nir_builder * b,nir_intrinsic_instr * intrin,void * data)191 lower_fs_input_intrin(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
192 {
193    const struct lower_fs_input_ctx *ctx = data;
194    const struct nak_compiler *nak = ctx->nak;
195 
196    b->cursor = nir_before_instr(&intrin->instr);
197 
198    nir_def *res;
199    switch (intrin->intrinsic) {
200    case nir_intrinsic_load_barycentric_pixel: {
201       if (!(ctx->fs_key && ctx->fs_key->force_sample_shading))
202          return false;
203 
204       intrin->intrinsic = nir_intrinsic_load_barycentric_sample;
205       return true;
206    }
207 
208    case nir_intrinsic_load_frag_coord:
209    case nir_intrinsic_load_point_coord: {
210       const enum nak_interp_loc interp_loc =
211          b->shader->info.fs.uses_sample_shading ? NAK_INTERP_LOC_CENTROID
212                                                 : NAK_INTERP_LOC_DEFAULT;
213       const uint32_t addr =
214          intrin->intrinsic == nir_intrinsic_load_point_coord ?
215          nak_sysval_attr_addr(nak, SYSTEM_VALUE_POINT_COORD) :
216          nak_sysval_attr_addr(nak, SYSTEM_VALUE_FRAG_COORD);
217 
218       res = interp_fs_input(b, intrin->def.num_components, addr,
219                             NAK_INTERP_MODE_SCREEN_LINEAR,
220                             interp_loc, NULL, NULL,
221                             ctx->nak);
222       break;
223    }
224 
225    case nir_intrinsic_load_front_face:
226    case nir_intrinsic_load_layer_id: {
227       assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
228       const gl_system_value sysval =
229          nir_system_value_from_intrinsic(intrin->intrinsic);
230       const uint32_t addr = nak_sysval_attr_addr(nak, sysval);
231 
232       res = load_fs_input(b, intrin->def.num_components, addr, ctx->nak);
233       if (intrin->def.bit_size == 1)
234          res = nir_i2b(b, res);
235       break;
236    }
237 
238    case nir_intrinsic_load_input: {
239       const uint16_t addr = fs_input_intrin_addr(intrin, ctx->nak);
240       res = load_fs_input(b, intrin->def.num_components, addr, ctx->nak);
241       break;
242    }
243 
244    case nir_intrinsic_load_barycentric_coord_pixel:
245    case nir_intrinsic_load_barycentric_coord_centroid:
246    case nir_intrinsic_load_barycentric_coord_sample:
247    case nir_intrinsic_load_barycentric_coord_at_sample:
248    case nir_intrinsic_load_barycentric_coord_at_offset: {
249       uint32_t addr;
250       enum nak_interp_mode interp_mode;
251       if (nir_intrinsic_interp_mode(intrin) == INTERP_MODE_NOPERSPECTIVE) {
252          addr = NAK_ATTR_BARY_COORD_NO_PERSP;
253          interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR;
254       } else {
255          addr = NAK_ATTR_BARY_COORD;
256          interp_mode = NAK_INTERP_MODE_PERSPECTIVE;
257       }
258 
259       nir_def *offset = NULL;
260       enum nak_interp_loc interp_loc;
261       switch (intrin->intrinsic) {
262       case nir_intrinsic_load_barycentric_coord_at_sample:
263       case nir_intrinsic_load_barycentric_coord_at_offset:
264          interp_loc = NAK_INTERP_LOC_OFFSET;
265          offset = load_barycentric_offset(b, intrin, ctx->fs_key);
266          break;
267       case nir_intrinsic_load_barycentric_coord_centroid:
268       case nir_intrinsic_load_barycentric_coord_sample:
269          interp_loc = NAK_INTERP_LOC_CENTROID;
270          break;
271       case nir_intrinsic_load_barycentric_coord_pixel:
272          interp_loc = NAK_INTERP_LOC_DEFAULT;
273          break;
274       default:
275          unreachable("Unknown intrinsic");
276       }
277 
278       nir_def *inv_w = NULL;
279       if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
280          inv_w = nir_frcp(b, load_frag_w(b, interp_loc, offset, nak));
281 
282       res = interp_fs_input(b, intrin->def.num_components,
283                             addr, interp_mode, interp_loc,
284                             inv_w, offset, ctx->nak);
285       break;
286    }
287 
288    case nir_intrinsic_load_interpolated_input: {
289       const uint16_t addr = fs_input_intrin_addr(intrin, ctx->nak);
290       nir_intrinsic_instr *bary = nir_src_as_intrinsic(intrin->src[0]);
291 
292       enum nak_interp_mode interp_mode;
293       if (nir_intrinsic_interp_mode(bary) == INTERP_MODE_SMOOTH ||
294           nir_intrinsic_interp_mode(bary) == INTERP_MODE_NONE)
295          interp_mode = NAK_INTERP_MODE_PERSPECTIVE;
296       else
297          interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR;
298 
299       nir_def *offset = NULL;
300       enum nak_interp_loc interp_loc;
301       switch (bary->intrinsic) {
302       case nir_intrinsic_load_barycentric_at_offset:
303       case nir_intrinsic_load_barycentric_at_sample: {
304          interp_loc = NAK_INTERP_LOC_OFFSET;
305          offset = load_barycentric_offset(b, bary, ctx->fs_key);
306          break;
307       }
308 
309       case nir_intrinsic_load_barycentric_centroid:
310       case nir_intrinsic_load_barycentric_sample:
311          interp_loc = NAK_INTERP_LOC_CENTROID;
312          break;
313 
314       case nir_intrinsic_load_barycentric_pixel:
315          interp_loc = NAK_INTERP_LOC_DEFAULT;
316          break;
317 
318       default:
319          unreachable("Unsupported barycentric");
320       }
321 
322       nir_def *inv_w = NULL;
323       if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
324          inv_w = nir_frcp(b, load_frag_w(b, interp_loc, offset, nak));
325 
326       res = interp_fs_input(b, intrin->def.num_components,
327                             addr, interp_mode, interp_loc,
328                             inv_w, offset, ctx->nak);
329       break;
330    }
331 
332    case nir_intrinsic_load_sample_mask_in:
333       b->cursor = nir_after_instr(&intrin->instr);
334 
335       /* pixld.covmask returns the coverage mask for the entire pixel being
336        * shaded, not the set of samples covered by the current FS invocation.
337        * We need to mask off excess samples in order to get the GL/Vulkan
338        * behavior.
339        */
340       if (b->shader->info.fs.uses_sample_shading) {
341          /* Mask off just the current sample */
342          nir_def *sample = nir_load_sample_id(b);
343          nir_def *mask = nir_ishl(b, nir_imm_int(b, 1), sample);
344          mask = nir_iand(b, &intrin->def, mask);
345          nir_def_rewrite_uses_after(&intrin->def, mask, mask->parent_instr);
346 
347          return true;
348       } else if (ctx->fs_key && ctx->fs_key->force_sample_shading) {
349          /* In this case we don't know up-front how many passes will be run so
350           * we need to take the per-pass sample mask from the driver and AND
351           * that with the coverage mask.
352           */
353          nir_def *sample = nir_load_sample_id(b);
354          nir_def *mask = load_pass_sample_mask_at(b, sample, ctx->fs_key);
355          mask = nir_iand(b, &intrin->def, nir_u2u32(b, mask));
356          nir_def_rewrite_uses_after(&intrin->def, mask, mask->parent_instr);
357 
358          return true;
359       } else {
360          /* We're always executing single-pass so just use the sample mask as
361           * given by the hardware.
362           */
363          return false;
364       }
365       break;
366 
367    case nir_intrinsic_load_sample_pos:
368       res = load_sample_pos_at(b, nir_load_sample_id(b), ctx->fs_key);
369       break;
370 
371    case nir_intrinsic_load_input_vertex: {
372       const uint16_t addr = fs_input_intrin_addr(intrin, ctx->nak);
373       unsigned vertex_id = nir_src_as_uint(intrin->src[0]);
374       assert(vertex_id < 3);
375 
376       nir_def *comps[NIR_MAX_VEC_COMPONENTS];
377       for (unsigned c = 0; c < intrin->def.num_components; c++) {
378          nir_def *data = nir_ldtram_nv(b, .base = addr + c * 4,
379                                        .flags = vertex_id == 2);
380          comps[c] = nir_channel(b, data, vertex_id & 1);
381       }
382       res = nir_vec(b, comps, intrin->num_components);
383       break;
384    }
385 
386    default:
387       return false;
388    }
389 
390    nir_def_replace(&intrin->def, res);
391 
392    return true;
393 }
394 
395 bool
nak_nir_lower_fs_inputs(nir_shader * nir,const struct nak_compiler * nak,const struct nak_fs_key * fs_key)396 nak_nir_lower_fs_inputs(nir_shader *nir,
397                         const struct nak_compiler *nak,
398                         const struct nak_fs_key *fs_key)
399 {
400    const struct lower_fs_input_ctx fs_in_ctx = {
401       .nak = nak,
402       .fs_key = fs_key,
403    };
404    NIR_PASS_V(nir, nir_shader_intrinsics_pass, lower_fs_input_intrin,
405               nir_metadata_control_flow,
406               (void *)&fs_in_ctx);
407 
408    return true;
409 }
410