1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_nir.h"
28
29 bool
r600_lower_tess_io_filter(const nir_instr * instr,gl_shader_stage stage)30 r600_lower_tess_io_filter(const nir_instr *instr, gl_shader_stage stage)
31 {
32 if (instr->type != nir_instr_type_intrinsic)
33 return false;
34
35 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
36 switch (op->intrinsic) {
37 case nir_intrinsic_load_input:
38 return stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL;
39 case nir_intrinsic_load_output:
40 case nir_intrinsic_load_per_vertex_input:
41 case nir_intrinsic_load_per_vertex_output:
42 case nir_intrinsic_store_per_vertex_output:
43 case nir_intrinsic_load_patch_vertices_in:
44 case nir_intrinsic_load_tess_level_outer:
45 case nir_intrinsic_load_tess_level_inner:
46 return true;
47 case nir_intrinsic_store_output:
48 return stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_VERTEX;
49 default:;
50 }
51 return false;
52 }
53
54 static int
get_tcs_varying_offset(nir_intrinsic_instr * op)55 get_tcs_varying_offset(nir_intrinsic_instr *op)
56 {
57 unsigned location = nir_intrinsic_io_semantics(op).location;
58
59 switch (location) {
60 case VARYING_SLOT_POS:
61 return 0;
62 case VARYING_SLOT_PSIZ:
63 return 0x10;
64 case VARYING_SLOT_CLIP_DIST0:
65 return 0x20;
66 case VARYING_SLOT_CLIP_DIST1:
67 return 0x30;
68 case VARYING_SLOT_COL0:
69 return 0x40;
70 case VARYING_SLOT_COL1:
71 return 0x50;
72 case VARYING_SLOT_BFC0:
73 return 0x60;
74 case VARYING_SLOT_BFC1:
75 return 0x70;
76 case VARYING_SLOT_CLIP_VERTEX:
77 return 0x80;
78 case VARYING_SLOT_TESS_LEVEL_OUTER:
79 return 0;
80 case VARYING_SLOT_TESS_LEVEL_INNER:
81 return 0x10;
82 default:
83 if (location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31)
84 return 0x10 * (location - VARYING_SLOT_VAR0) + 0x90;
85
86 if (location >= VARYING_SLOT_PATCH0) {
87 return 0x10 * (location - VARYING_SLOT_PATCH0) + 0x20;
88 }
89 }
90 return 0;
91 }
92
93 static inline nir_def *
r600_tcs_base_address(nir_builder * b,nir_def * param_base,nir_def * rel_patch_id)94 r600_tcs_base_address(nir_builder *b, nir_def *param_base, nir_def *rel_patch_id)
95 {
96 return nir_umad24(b,
97 nir_channel(b, param_base, 0),
98 rel_patch_id,
99 nir_channel(b, param_base, 3));
100 }
101
102 static nir_def *
emil_lsd_in_addr(nir_builder * b,nir_def * base,nir_def * patch_id,nir_intrinsic_instr * op)103 emil_lsd_in_addr(nir_builder *b,
104 nir_def *base,
105 nir_def *patch_id,
106 nir_intrinsic_instr *op)
107 {
108 nir_def *addr =
109 nir_build_alu(b, nir_op_umul24, nir_channel(b, base, 0), patch_id, NULL, NULL);
110
111 auto idx1 = nir_src_as_const_value(op->src[0]);
112 if (!idx1 || idx1->u32 != 0)
113 addr = nir_umad24(b, nir_channel(b, base, 1), op->src[0].ssa, addr);
114
115 auto offset = nir_imm_int(b, get_tcs_varying_offset(op));
116
117 auto idx2 = nir_src_as_const_value(op->src[1]);
118 if (!idx2 || idx2->u32 != 0)
119 offset = nir_iadd(b, nir_ishl_imm(b, op->src[1].ssa, 4), offset);
120
121 return nir_iadd(b, addr, offset);
122 }
123
124 static nir_def *
emil_lsd_out_addr(nir_builder * b,nir_def * base,nir_def * patch_id,nir_intrinsic_instr * op,UNUSED nir_variable_mode mode,int src_offset)125 emil_lsd_out_addr(nir_builder *b,
126 nir_def *base,
127 nir_def *patch_id,
128 nir_intrinsic_instr *op,
129 UNUSED nir_variable_mode mode,
130 int src_offset)
131 {
132
133 nir_def *addr1 =
134 nir_umad24(b, nir_channel(b, base, 0), patch_id, nir_channel(b, base, 2));
135 nir_def *addr2 =
136 nir_umad24(b, nir_channel(b, base, 1), op->src[src_offset].ssa, addr1);
137 int offset = get_tcs_varying_offset(op);
138 return nir_iadd_imm(b,
139 nir_iadd(b,
140 addr2,
141 nir_ishl_imm(b, op->src[src_offset + 1].ssa, 4)),
142 offset);
143 }
144
145 static nir_def *
load_offset_group(nir_builder * b,int ncomponents)146 load_offset_group(nir_builder *b, int ncomponents)
147 {
148 switch (ncomponents) {
149 /* tess outer offsets */
150 case 1:
151 return nir_imm_int(b, 0);
152 case 2:
153 return nir_imm_ivec2(b, 0, 4);
154 case 3:
155 return r600_imm_ivec3(b, 0, 4, 8);
156 case 4:
157 return nir_imm_ivec4(b, 0, 4, 8, 12);
158 /* tess inner offsets */
159 case 5:
160 return nir_imm_int(b, 16);
161 case 6:
162 return nir_imm_ivec2(b, 16, 20);
163 default:
164 debug_printf("Got %d components\n", ncomponents);
165 unreachable("Unsupported component count");
166 }
167 }
168
169 static nir_def *
load_offset_group_from_mask(nir_builder * b,uint32_t mask)170 load_offset_group_from_mask(nir_builder *b, uint32_t mask)
171 {
172 auto full_mask = nir_imm_ivec4(b, 0, 4, 8, 12);
173 return nir_channels(b, full_mask, mask);
174 }
175
176 struct MaskQuery {
177 uint32_t mask;
178 uint32_t ssa_index;
179 nir_alu_instr *alu;
180 int index;
181 uint32_t full_mask;
182 };
183
184 static bool
update_alu_mask(nir_src * src,void * data)185 update_alu_mask(nir_src *src, void *data)
186 {
187 auto mq = reinterpret_cast<MaskQuery *>(data);
188
189 if (mq->ssa_index == src->ssa->index) {
190 mq->mask |= nir_alu_instr_src_read_mask(mq->alu, mq->index);
191 }
192 ++mq->index;
193
194 return mq->mask != mq->full_mask;
195 }
196
197 static uint32_t
get_dest_usee_mask(nir_intrinsic_instr * op)198 get_dest_usee_mask(nir_intrinsic_instr *op)
199 {
200 MaskQuery mq = {0};
201 mq.full_mask = (1 << op->def.num_components) - 1;
202
203 nir_foreach_use(use_src, &op->def)
204 {
205 auto use_instr = nir_src_parent_instr(use_src);
206 mq.ssa_index = use_src->ssa->index;
207
208 switch (use_instr->type) {
209 case nir_instr_type_alu: {
210 mq.alu = nir_instr_as_alu(use_instr);
211 mq.index = 0;
212 if (!nir_foreach_src(use_instr, update_alu_mask, &mq))
213 return 0xf;
214 break;
215 }
216 case nir_instr_type_intrinsic: {
217 auto intr = nir_instr_as_intrinsic(use_instr);
218 switch (intr->intrinsic) {
219 case nir_intrinsic_store_output:
220 case nir_intrinsic_store_per_vertex_output:
221 mq.mask |= nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr);
222 break;
223 case nir_intrinsic_store_scratch:
224 case nir_intrinsic_store_local_shared_r600:
225 mq.mask |= nir_intrinsic_write_mask(intr);
226 break;
227 default:
228 return 0xf;
229 }
230 break;
231 }
232 default:
233 return 0xf;
234 }
235 }
236 return mq.mask;
237 }
238
239 static void
replace_load_instr(nir_builder * b,nir_intrinsic_instr * op,nir_def * addr)240 replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_def *addr)
241 {
242 uint32_t mask = get_dest_usee_mask(op);
243 if (mask) {
244 nir_def *addr_outer = nir_iadd(b, addr, load_offset_group_from_mask(b, mask));
245 if (nir_intrinsic_component(op))
246 addr_outer =
247 nir_iadd_imm(b, addr_outer, 4 * nir_intrinsic_component(op));
248
249 auto new_load = nir_load_local_shared_r600(b, 32, addr_outer);
250
251 auto undef = nir_undef(b, 1, 32);
252 int comps = op->def.num_components;
253 nir_def *remix[4] = {undef, undef, undef, undef};
254
255 int chan = 0;
256 for (int i = 0; i < comps; ++i) {
257 if (mask & (1 << i)) {
258 remix[i] = nir_channel(b, new_load, chan++);
259 }
260 }
261 auto new_load_remixed = nir_vec(b, remix, comps);
262 nir_def_rewrite_uses(&op->def, new_load_remixed);
263 }
264 nir_instr_remove(&op->instr);
265 }
266
267 static void
emit_store_lds(nir_builder * b,nir_intrinsic_instr * op,nir_def * addr)268 emit_store_lds(nir_builder *b, nir_intrinsic_instr *op, nir_def *addr)
269 {
270 uint32_t orig_writemask = nir_intrinsic_write_mask(op) << nir_intrinsic_component(op);
271
272 for (int i = 0; i < 2; ++i) {
273 unsigned test_mask = (0x3 << 2 * i);
274 unsigned wmask = orig_writemask & test_mask;
275 if (!(wmask))
276 continue;
277
278 uint32_t writemask = wmask >> nir_intrinsic_component(op);
279
280 bool start_even = (orig_writemask & (1u << (2 * i)));
281 nir_def *addr2 = nir_iadd_imm(b, addr, 8 * i + (start_even ? 0 : 4));
282 nir_store_local_shared_r600(b, op->src[0].ssa, addr2,
283 .write_mask = writemask);
284 }
285 }
286
287 static nir_def *
emil_tcs_io_offset(nir_builder * b,nir_def * addr,nir_intrinsic_instr * op,int src_offset)288 emil_tcs_io_offset(nir_builder *b,
289 nir_def *addr,
290 nir_intrinsic_instr *op,
291 int src_offset)
292 {
293 int offset = get_tcs_varying_offset(op);
294 return nir_iadd_imm(b,
295 nir_iadd(b,
296 addr,
297 nir_ishl_imm(b, op->src[src_offset].ssa, 4)),
298 offset);
299 }
300
301 inline unsigned
outer_tf_components(mesa_prim prim_type)302 outer_tf_components(mesa_prim prim_type)
303 {
304 switch (prim_type) {
305 case MESA_PRIM_LINES:
306 return 2;
307 case MESA_PRIM_TRIANGLES:
308 return 3;
309 case MESA_PRIM_QUADS:
310 return 4;
311 default:
312 return 0;
313 }
314 }
315
316 static bool
r600_lower_tess_io_impl(nir_builder * b,nir_instr * instr,enum mesa_prim prim_type)317 r600_lower_tess_io_impl(nir_builder *b, nir_instr *instr, enum mesa_prim prim_type)
318 {
319 static nir_def *load_in_param_base = nullptr;
320 static nir_def *load_out_param_base = nullptr;
321
322 b->cursor = nir_before_instr(instr);
323 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
324
325 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) {
326 load_in_param_base = nir_load_tcs_in_param_base_r600(b);
327 load_out_param_base = nir_load_tcs_out_param_base_r600(b);
328 } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
329 load_in_param_base = nir_load_tcs_out_param_base_r600(b);
330 } else if (b->shader->info.stage == MESA_SHADER_VERTEX) {
331 load_out_param_base = nir_load_tcs_in_param_base_r600(b);
332 }
333
334 auto rel_patch_id = nir_load_tcs_rel_patch_id_r600(b);
335
336 unsigned tf_inner_address_offset = 0;
337 unsigned ncomps_correct = 0;
338
339 switch (op->intrinsic) {
340 case nir_intrinsic_load_patch_vertices_in: {
341 nir_def *vertices_in;
342 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
343 vertices_in = nir_channel(b, load_in_param_base, 2);
344 else {
345 auto base = nir_load_tcs_in_param_base_r600(b);
346 vertices_in = nir_channel(b, base, 2);
347 }
348 nir_def_rewrite_uses(&op->def, vertices_in);
349 nir_instr_remove(&op->instr);
350 return true;
351 }
352 case nir_intrinsic_load_per_vertex_input: {
353 nir_def *addr =
354 b->shader->info.stage == MESA_SHADER_TESS_CTRL
355 ? emil_lsd_in_addr(b, load_in_param_base, rel_patch_id, op)
356 : emil_lsd_out_addr(
357 b, load_in_param_base, rel_patch_id, op, nir_var_shader_in, 0);
358 replace_load_instr(b, op, addr);
359 return true;
360 }
361 case nir_intrinsic_store_per_vertex_output: {
362 nir_def *addr = emil_lsd_out_addr(
363 b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 1);
364 emit_store_lds(b, op, addr);
365 nir_instr_remove(instr);
366 return true;
367 }
368 case nir_intrinsic_load_per_vertex_output: {
369 nir_def *addr = emil_lsd_out_addr(
370 b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 0);
371 replace_load_instr(b, op, addr);
372 return true;
373 }
374 case nir_intrinsic_store_output: {
375 nir_def *addr = (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
376 ? r600_tcs_base_address(b, load_out_param_base, rel_patch_id)
377 : nir_build_alu(b,
378 nir_op_umul24,
379 nir_channel(b, load_out_param_base, 1),
380 rel_patch_id,
381 NULL,
382 NULL);
383 addr = emil_tcs_io_offset(b, addr, op, 1);
384 emit_store_lds(b, op, addr);
385 nir_instr_remove(instr);
386 return true;
387 }
388 case nir_intrinsic_load_output: {
389 nir_def *addr = r600_tcs_base_address(b, load_out_param_base, rel_patch_id);
390 addr = emil_tcs_io_offset(b, addr, op, 0);
391 replace_load_instr(b, op, addr);
392 return true;
393 }
394 case nir_intrinsic_load_input: {
395 nir_def *addr = r600_tcs_base_address(b, load_in_param_base, rel_patch_id);
396 addr = emil_tcs_io_offset(b, addr, op, 0);
397 replace_load_instr(b, op, addr);
398 return true;
399 }
400 case nir_intrinsic_load_tess_level_inner:
401 tf_inner_address_offset = 4;
402 ncomps_correct = 2;
403 FALLTHROUGH;
404 case nir_intrinsic_load_tess_level_outer: {
405 auto ncomps = outer_tf_components(prim_type);
406 if (!ncomps)
407 return false;
408 ncomps -= ncomps_correct;
409 auto base = nir_load_tcs_out_param_base_r600(b);
410 auto rel_patch_id = nir_load_tcs_rel_patch_id_r600(b);
411 nir_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
412 nir_def *addr_outer =
413 nir_iadd(b, addr0, load_offset_group(b, tf_inner_address_offset + ncomps));
414
415 nir_def *tf = nir_load_local_shared_r600(b, 32, addr_outer);
416 if (ncomps < 4 && b->shader->info.stage != MESA_SHADER_TESS_EVAL) {
417 auto undef = nir_undef(b, 1, 32);
418 nir_def *srcs[4] = {undef, undef, undef, undef};
419 for (unsigned i = 0; i < ncomps; ++i)
420 srcs[i] = nir_channel(b, tf, i);
421 auto help = nir_vec(b, srcs, 4);
422 nir_def_rewrite_uses(&op->def, help);
423 } else {
424 nir_def_rewrite_uses(&op->def, tf);
425 }
426 nir_instr_remove(instr);
427 return true;
428 }
429 default:;
430 }
431
432 return false;
433 }
434
435 bool
r600_lower_tess_io(nir_shader * shader,enum mesa_prim prim_type)436 r600_lower_tess_io(nir_shader *shader, enum mesa_prim prim_type)
437 {
438 bool progress = false;
439 nir_foreach_function_impl(impl, shader)
440 {
441 nir_builder b = nir_builder_create(impl);
442
443 nir_foreach_block(block, impl)
444 {
445 nir_foreach_instr_safe(instr, block)
446 {
447 if (instr->type != nir_instr_type_intrinsic)
448 continue;
449
450 if (r600_lower_tess_io_filter(instr, shader->info.stage))
451 progress |= r600_lower_tess_io_impl(&b, instr, prim_type);
452 }
453 }
454 }
455 return progress;
456 }
457
458 bool
r600_emit_tf(nir_builder * b,nir_def * val)459 r600_emit_tf(nir_builder *b, nir_def *val)
460 {
461 nir_intrinsic_instr *store_tf =
462 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_tf_r600);
463 store_tf->num_components = val->num_components;
464 store_tf->src[0] = nir_src_for_ssa(val);
465 nir_builder_instr_insert(b, &store_tf->instr);
466 return true;
467 }
468
469 bool
r600_append_tcs_TF_emission(nir_shader * shader,enum mesa_prim prim_type)470 r600_append_tcs_TF_emission(nir_shader *shader, enum mesa_prim prim_type)
471 {
472 if (shader->info.stage != MESA_SHADER_TESS_CTRL)
473 return false;
474
475 nir_foreach_function_impl(impl, shader)
476 {
477 nir_foreach_block(block, impl)
478 {
479 nir_foreach_instr_safe(instr, block)
480 {
481 if (instr->type != nir_instr_type_intrinsic)
482 continue;
483 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
484 if (intr->intrinsic == nir_intrinsic_store_tf_r600) {
485 return false;
486 }
487 }
488 }
489 }
490
491 assert(exec_list_length(&shader->functions) == 1);
492 nir_function *f = (nir_function *)shader->functions.get_head();
493 nir_builder builder = nir_builder_create(f->impl);
494 nir_builder *b = &builder;
495
496 auto outer_comps = outer_tf_components(prim_type);
497 if (!outer_comps)
498 return false;
499
500 unsigned inner_comps = outer_comps - 2;
501 unsigned stride = (inner_comps + outer_comps) * 4;
502
503 b->cursor = nir_after_cf_list(&f->impl->body);
504
505 nir_def *invocation_id = nir_load_invocation_id(b);
506
507 nir_push_if(b, nir_ieq_imm(b, invocation_id, 0));
508 auto base = nir_load_tcs_out_param_base_r600(b);
509 auto rel_patch_id = nir_load_tcs_rel_patch_id_r600(b);
510
511 nir_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
512
513 nir_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, outer_comps));
514 nir_def *tf_outer = nir_load_local_shared_r600(b, 32, addr_outer);
515
516 std::vector<nir_def *> tf_out;
517
518 nir_def *tf_out_base = nir_load_tcs_tess_factor_base_r600(b);
519 nir_def *out_addr0 = nir_umad24(b,
520 rel_patch_id,
521 nir_imm_int(b, stride),
522 tf_out_base);
523 int chanx = 0;
524 int chany = 1;
525
526 if (prim_type == MESA_PRIM_LINES)
527 std::swap(chanx, chany);
528
529 int inner_base = 12;
530
531 tf_out.push_back(nir_vec2(b,
532 out_addr0,
533 nir_channel(b, tf_outer, chanx)));
534
535 tf_out.push_back(nir_vec2(b, nir_iadd_imm(b, out_addr0, 4),
536 nir_channel(b, tf_outer, chany)));
537
538
539 if (outer_comps > 2) {
540 tf_out.push_back(nir_vec2(b,
541 nir_iadd_imm(b, out_addr0, 8),
542 nir_channel(b, tf_outer, 2)));
543 }
544
545 if (outer_comps > 3) {
546 tf_out.push_back(nir_vec2(b,
547 nir_iadd_imm(b, out_addr0, 12),
548 nir_channel(b, tf_outer, 3)));
549 inner_base = 16;
550
551 }
552
553 if (inner_comps) {
554 nir_def *addr1 = nir_iadd(b, addr0, load_offset_group(b, 4 + inner_comps));
555 nir_def *tf_inner = nir_load_local_shared_r600(b, 32, addr1);
556
557 tf_out.push_back(nir_vec2(b,
558 nir_iadd_imm(b, out_addr0, inner_base),
559 nir_channel(b, tf_inner, 0)));
560
561
562 if (inner_comps > 1) {
563 tf_out.push_back(nir_vec2(b,
564 nir_iadd_imm(b, out_addr0, inner_base + 4),
565 nir_channel(b, tf_inner, 1)));
566
567 }
568 }
569
570 for (auto tf : tf_out)
571 r600_emit_tf(b, tf);
572
573 nir_pop_if(b, nullptr);
574
575 nir_metadata_preserve(f->impl, nir_metadata_none);
576
577 return true;
578 }
579