• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_nir.h"
28 
29 bool
r600_lower_tess_io_filter(const nir_instr * instr,gl_shader_stage stage)30 r600_lower_tess_io_filter(const nir_instr *instr, gl_shader_stage stage)
31 {
32    if (instr->type != nir_instr_type_intrinsic)
33       return false;
34 
35    nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
36    switch (op->intrinsic) {
37    case nir_intrinsic_load_input:
38       return stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL;
39    case nir_intrinsic_load_output:
40    case nir_intrinsic_load_per_vertex_input:
41    case nir_intrinsic_load_per_vertex_output:
42    case nir_intrinsic_store_per_vertex_output:
43    case nir_intrinsic_load_patch_vertices_in:
44    case nir_intrinsic_load_tess_level_outer:
45    case nir_intrinsic_load_tess_level_inner:
46       return true;
47    case nir_intrinsic_store_output:
48       return stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_VERTEX;
49    default:;
50    }
51    return false;
52 }
53 
54 static int
get_tcs_varying_offset(nir_intrinsic_instr * op)55 get_tcs_varying_offset(nir_intrinsic_instr *op)
56 {
57    unsigned location = nir_intrinsic_io_semantics(op).location;
58 
59    switch (location) {
60    case VARYING_SLOT_POS:
61       return 0;
62    case VARYING_SLOT_PSIZ:
63       return 0x10;
64    case VARYING_SLOT_CLIP_DIST0:
65       return 0x20;
66    case VARYING_SLOT_CLIP_DIST1:
67       return 0x30;
68    case VARYING_SLOT_COL0:
69       return 0x40;
70    case VARYING_SLOT_COL1:
71       return 0x50;
72    case VARYING_SLOT_BFC0:
73       return 0x60;
74    case VARYING_SLOT_BFC1:
75       return 0x70;
76    case VARYING_SLOT_CLIP_VERTEX:
77       return 0x80;
78    case VARYING_SLOT_TESS_LEVEL_OUTER:
79       return 0;
80    case VARYING_SLOT_TESS_LEVEL_INNER:
81       return 0x10;
82    default:
83       if (location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31)
84          return 0x10 * (location - VARYING_SLOT_VAR0) + 0x90;
85 
86       if (location >= VARYING_SLOT_PATCH0) {
87          return 0x10 * (location - VARYING_SLOT_PATCH0) + 0x20;
88       }
89    }
90    return 0;
91 }
92 
93 static inline nir_def *
r600_tcs_base_address(nir_builder * b,nir_def * param_base,nir_def * rel_patch_id)94 r600_tcs_base_address(nir_builder *b, nir_def *param_base, nir_def *rel_patch_id)
95 {
96    return nir_umad24(b,
97                      nir_channel(b, param_base, 0),
98                      rel_patch_id,
99                      nir_channel(b, param_base, 3));
100 }
101 
102 static nir_def *
emil_lsd_in_addr(nir_builder * b,nir_def * base,nir_def * patch_id,nir_intrinsic_instr * op)103 emil_lsd_in_addr(nir_builder *b,
104                  nir_def *base,
105                  nir_def *patch_id,
106                  nir_intrinsic_instr *op)
107 {
108    nir_def *addr =
109       nir_build_alu(b, nir_op_umul24, nir_channel(b, base, 0), patch_id, NULL, NULL);
110 
111    auto idx1 = nir_src_as_const_value(op->src[0]);
112    if (!idx1 || idx1->u32 != 0)
113       addr = nir_umad24(b, nir_channel(b, base, 1), op->src[0].ssa, addr);
114 
115    auto offset = nir_imm_int(b, get_tcs_varying_offset(op));
116 
117    auto idx2 = nir_src_as_const_value(op->src[1]);
118    if (!idx2 || idx2->u32 != 0)
119       offset = nir_iadd(b, nir_ishl_imm(b, op->src[1].ssa, 4), offset);
120 
121    return nir_iadd(b, addr, offset);
122 }
123 
124 static nir_def *
emil_lsd_out_addr(nir_builder * b,nir_def * base,nir_def * patch_id,nir_intrinsic_instr * op,UNUSED nir_variable_mode mode,int src_offset)125 emil_lsd_out_addr(nir_builder *b,
126                   nir_def *base,
127                   nir_def *patch_id,
128                   nir_intrinsic_instr *op,
129                   UNUSED nir_variable_mode mode,
130                   int src_offset)
131 {
132 
133    nir_def *addr1 =
134       nir_umad24(b, nir_channel(b, base, 0), patch_id, nir_channel(b, base, 2));
135    nir_def *addr2 =
136       nir_umad24(b, nir_channel(b, base, 1), op->src[src_offset].ssa, addr1);
137    int offset = get_tcs_varying_offset(op);
138    return nir_iadd_imm(b,
139                        nir_iadd(b,
140                                 addr2,
141                                 nir_ishl_imm(b, op->src[src_offset + 1].ssa, 4)),
142                        offset);
143 }
144 
145 static nir_def *
load_offset_group(nir_builder * b,int ncomponents)146 load_offset_group(nir_builder *b, int ncomponents)
147 {
148    switch (ncomponents) {
149    /* tess outer offsets */
150    case 1:
151       return nir_imm_int(b, 0);
152    case 2:
153       return nir_imm_ivec2(b, 0, 4);
154    case 3:
155       return r600_imm_ivec3(b, 0, 4, 8);
156    case 4:
157       return nir_imm_ivec4(b, 0, 4, 8, 12);
158       /* tess inner offsets */
159    case 5:
160       return nir_imm_int(b, 16);
161    case 6:
162       return nir_imm_ivec2(b, 16, 20);
163    default:
164       debug_printf("Got %d components\n", ncomponents);
165       unreachable("Unsupported component count");
166    }
167 }
168 
169 static nir_def *
load_offset_group_from_mask(nir_builder * b,uint32_t mask)170 load_offset_group_from_mask(nir_builder *b, uint32_t mask)
171 {
172    auto full_mask = nir_imm_ivec4(b, 0, 4, 8, 12);
173    return nir_channels(b, full_mask, mask);
174 }
175 
176 struct MaskQuery {
177    uint32_t mask;
178    uint32_t ssa_index;
179    nir_alu_instr *alu;
180    int index;
181    uint32_t full_mask;
182 };
183 
184 static bool
update_alu_mask(nir_src * src,void * data)185 update_alu_mask(nir_src *src, void *data)
186 {
187    auto mq = reinterpret_cast<MaskQuery *>(data);
188 
189    if (mq->ssa_index == src->ssa->index) {
190       mq->mask |= nir_alu_instr_src_read_mask(mq->alu, mq->index);
191    }
192    ++mq->index;
193 
194    return mq->mask != mq->full_mask;
195 }
196 
197 static uint32_t
get_dest_usee_mask(nir_intrinsic_instr * op)198 get_dest_usee_mask(nir_intrinsic_instr *op)
199 {
200    MaskQuery mq = {0};
201    mq.full_mask = (1 << op->def.num_components) - 1;
202 
203    nir_foreach_use(use_src, &op->def)
204    {
205       auto use_instr = nir_src_parent_instr(use_src);
206       mq.ssa_index = use_src->ssa->index;
207 
208       switch (use_instr->type) {
209       case nir_instr_type_alu: {
210          mq.alu = nir_instr_as_alu(use_instr);
211          mq.index = 0;
212          if (!nir_foreach_src(use_instr, update_alu_mask, &mq))
213             return 0xf;
214          break;
215       }
216       case nir_instr_type_intrinsic: {
217          auto intr = nir_instr_as_intrinsic(use_instr);
218          switch (intr->intrinsic) {
219          case nir_intrinsic_store_output:
220          case nir_intrinsic_store_per_vertex_output:
221             mq.mask |= nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr);
222             break;
223          case nir_intrinsic_store_scratch:
224          case nir_intrinsic_store_local_shared_r600:
225             mq.mask |= nir_intrinsic_write_mask(intr);
226             break;
227          default:
228             return 0xf;
229          }
230          break;
231       }
232       default:
233          return 0xf;
234       }
235    }
236    return mq.mask;
237 }
238 
239 static void
replace_load_instr(nir_builder * b,nir_intrinsic_instr * op,nir_def * addr)240 replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_def *addr)
241 {
242    uint32_t mask = get_dest_usee_mask(op);
243    if (mask) {
244       nir_def *addr_outer = nir_iadd(b, addr, load_offset_group_from_mask(b, mask));
245       if (nir_intrinsic_component(op))
246          addr_outer =
247             nir_iadd_imm(b, addr_outer, 4 * nir_intrinsic_component(op));
248 
249       auto new_load = nir_load_local_shared_r600(b, 32, addr_outer);
250 
251       auto undef = nir_undef(b, 1, 32);
252       int comps = op->def.num_components;
253       nir_def *remix[4] = {undef, undef, undef, undef};
254 
255       int chan = 0;
256       for (int i = 0; i < comps; ++i) {
257          if (mask & (1 << i)) {
258             remix[i] = nir_channel(b, new_load, chan++);
259          }
260       }
261       auto new_load_remixed = nir_vec(b, remix, comps);
262       nir_def_rewrite_uses(&op->def, new_load_remixed);
263    }
264    nir_instr_remove(&op->instr);
265 }
266 
267 static void
emit_store_lds(nir_builder * b,nir_intrinsic_instr * op,nir_def * addr)268 emit_store_lds(nir_builder *b, nir_intrinsic_instr *op, nir_def *addr)
269 {
270    uint32_t orig_writemask = nir_intrinsic_write_mask(op) << nir_intrinsic_component(op);
271 
272    for (int i = 0; i < 2; ++i) {
273       unsigned test_mask = (0x3 << 2 * i);
274       unsigned wmask = orig_writemask & test_mask;
275       if (!(wmask))
276          continue;
277 
278       uint32_t writemask = wmask >> nir_intrinsic_component(op);
279 
280       bool start_even = (orig_writemask & (1u << (2 * i)));
281       nir_def *addr2 = nir_iadd_imm(b, addr, 8 * i + (start_even ? 0 : 4));
282       nir_store_local_shared_r600(b, op->src[0].ssa, addr2,
283                                   .write_mask = writemask);
284    }
285 }
286 
287 static nir_def *
emil_tcs_io_offset(nir_builder * b,nir_def * addr,nir_intrinsic_instr * op,int src_offset)288 emil_tcs_io_offset(nir_builder *b,
289                    nir_def *addr,
290                    nir_intrinsic_instr *op,
291                    int src_offset)
292 {
293    int offset = get_tcs_varying_offset(op);
294    return nir_iadd_imm(b,
295                        nir_iadd(b,
296                                 addr,
297                                 nir_ishl_imm(b, op->src[src_offset].ssa, 4)),
298                        offset);
299 }
300 
301 inline unsigned
outer_tf_components(mesa_prim prim_type)302 outer_tf_components(mesa_prim prim_type)
303 {
304    switch (prim_type) {
305    case MESA_PRIM_LINES:
306       return 2;
307    case MESA_PRIM_TRIANGLES:
308       return 3;
309    case MESA_PRIM_QUADS:
310       return 4;
311    default:
312       return 0;
313    }
314 }
315 
316 static bool
r600_lower_tess_io_impl(nir_builder * b,nir_instr * instr,enum mesa_prim prim_type)317 r600_lower_tess_io_impl(nir_builder *b, nir_instr *instr, enum mesa_prim prim_type)
318 {
319    static nir_def *load_in_param_base = nullptr;
320    static nir_def *load_out_param_base = nullptr;
321 
322    b->cursor = nir_before_instr(instr);
323    nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
324 
325    if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) {
326       load_in_param_base = nir_load_tcs_in_param_base_r600(b);
327       load_out_param_base = nir_load_tcs_out_param_base_r600(b);
328    } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
329       load_in_param_base = nir_load_tcs_out_param_base_r600(b);
330    } else if (b->shader->info.stage == MESA_SHADER_VERTEX) {
331       load_out_param_base = nir_load_tcs_in_param_base_r600(b);
332    }
333 
334    auto rel_patch_id = nir_load_tcs_rel_patch_id_r600(b);
335 
336    unsigned tf_inner_address_offset = 0;
337    unsigned ncomps_correct = 0;
338 
339    switch (op->intrinsic) {
340    case nir_intrinsic_load_patch_vertices_in: {
341       nir_def *vertices_in;
342       if (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
343          vertices_in = nir_channel(b, load_in_param_base, 2);
344       else {
345          auto base = nir_load_tcs_in_param_base_r600(b);
346          vertices_in = nir_channel(b, base, 2);
347       }
348       nir_def_rewrite_uses(&op->def, vertices_in);
349       nir_instr_remove(&op->instr);
350       return true;
351    }
352    case nir_intrinsic_load_per_vertex_input: {
353       nir_def *addr =
354          b->shader->info.stage == MESA_SHADER_TESS_CTRL
355             ? emil_lsd_in_addr(b, load_in_param_base, rel_patch_id, op)
356             : emil_lsd_out_addr(
357                  b, load_in_param_base, rel_patch_id, op, nir_var_shader_in, 0);
358       replace_load_instr(b, op, addr);
359       return true;
360    }
361    case nir_intrinsic_store_per_vertex_output: {
362       nir_def *addr = emil_lsd_out_addr(
363          b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 1);
364       emit_store_lds(b, op, addr);
365       nir_instr_remove(instr);
366       return true;
367    }
368    case nir_intrinsic_load_per_vertex_output: {
369       nir_def *addr = emil_lsd_out_addr(
370          b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 0);
371       replace_load_instr(b, op, addr);
372       return true;
373    }
374    case nir_intrinsic_store_output: {
375       nir_def *addr = (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
376                              ? r600_tcs_base_address(b, load_out_param_base, rel_patch_id)
377                              : nir_build_alu(b,
378                                              nir_op_umul24,
379                                              nir_channel(b, load_out_param_base, 1),
380                                              rel_patch_id,
381                                              NULL,
382                                              NULL);
383       addr = emil_tcs_io_offset(b, addr, op, 1);
384       emit_store_lds(b, op, addr);
385       nir_instr_remove(instr);
386       return true;
387    }
388    case nir_intrinsic_load_output: {
389       nir_def *addr = r600_tcs_base_address(b, load_out_param_base, rel_patch_id);
390       addr = emil_tcs_io_offset(b, addr, op, 0);
391       replace_load_instr(b, op, addr);
392       return true;
393    }
394    case nir_intrinsic_load_input: {
395       nir_def *addr = r600_tcs_base_address(b, load_in_param_base, rel_patch_id);
396       addr = emil_tcs_io_offset(b, addr, op, 0);
397       replace_load_instr(b, op, addr);
398       return true;
399    }
400    case nir_intrinsic_load_tess_level_inner:
401       tf_inner_address_offset = 4;
402       ncomps_correct = 2;
403       FALLTHROUGH;
404    case nir_intrinsic_load_tess_level_outer: {
405       auto ncomps = outer_tf_components(prim_type);
406       if (!ncomps)
407          return false;
408       ncomps -= ncomps_correct;
409       auto base = nir_load_tcs_out_param_base_r600(b);
410       auto rel_patch_id = nir_load_tcs_rel_patch_id_r600(b);
411       nir_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
412       nir_def *addr_outer =
413          nir_iadd(b, addr0, load_offset_group(b, tf_inner_address_offset + ncomps));
414 
415       nir_def *tf = nir_load_local_shared_r600(b, 32, addr_outer);
416       if (ncomps < 4 && b->shader->info.stage != MESA_SHADER_TESS_EVAL) {
417          auto undef = nir_undef(b, 1, 32);
418          nir_def *srcs[4] = {undef, undef, undef, undef};
419          for (unsigned i = 0; i < ncomps; ++i)
420             srcs[i] = nir_channel(b, tf, i);
421          auto help = nir_vec(b, srcs, 4);
422          nir_def_rewrite_uses(&op->def, help);
423       } else {
424          nir_def_rewrite_uses(&op->def, tf);
425       }
426       nir_instr_remove(instr);
427       return true;
428    }
429    default:;
430    }
431 
432    return false;
433 }
434 
435 bool
r600_lower_tess_io(nir_shader * shader,enum mesa_prim prim_type)436 r600_lower_tess_io(nir_shader *shader, enum mesa_prim prim_type)
437 {
438    bool progress = false;
439    nir_foreach_function_impl(impl, shader)
440    {
441       nir_builder b = nir_builder_create(impl);
442 
443       nir_foreach_block(block, impl)
444       {
445          nir_foreach_instr_safe(instr, block)
446          {
447             if (instr->type != nir_instr_type_intrinsic)
448                continue;
449 
450             if (r600_lower_tess_io_filter(instr, shader->info.stage))
451                progress |= r600_lower_tess_io_impl(&b, instr, prim_type);
452          }
453       }
454    }
455    return progress;
456 }
457 
458 bool
r600_emit_tf(nir_builder * b,nir_def * val)459 r600_emit_tf(nir_builder *b, nir_def *val)
460 {
461    nir_intrinsic_instr *store_tf =
462       nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_tf_r600);
463    store_tf->num_components = val->num_components;
464    store_tf->src[0] = nir_src_for_ssa(val);
465    nir_builder_instr_insert(b, &store_tf->instr);
466    return true;
467 }
468 
469 bool
r600_append_tcs_TF_emission(nir_shader * shader,enum mesa_prim prim_type)470 r600_append_tcs_TF_emission(nir_shader *shader, enum mesa_prim prim_type)
471 {
472    if (shader->info.stage != MESA_SHADER_TESS_CTRL)
473       return false;
474 
475    nir_foreach_function_impl(impl, shader)
476    {
477       nir_foreach_block(block, impl)
478       {
479          nir_foreach_instr_safe(instr, block)
480          {
481             if (instr->type != nir_instr_type_intrinsic)
482                continue;
483             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
484             if (intr->intrinsic == nir_intrinsic_store_tf_r600) {
485                return false;
486             }
487          }
488       }
489    }
490 
491    assert(exec_list_length(&shader->functions) == 1);
492    nir_function *f = (nir_function *)shader->functions.get_head();
493    nir_builder builder = nir_builder_create(f->impl);
494    nir_builder *b = &builder;
495 
496    auto outer_comps = outer_tf_components(prim_type);
497    if (!outer_comps)
498       return false;
499 
500    unsigned inner_comps = outer_comps - 2;
501    unsigned stride = (inner_comps + outer_comps) * 4;
502 
503    b->cursor = nir_after_cf_list(&f->impl->body);
504 
505    nir_def *invocation_id = nir_load_invocation_id(b);
506 
507    nir_push_if(b, nir_ieq_imm(b, invocation_id, 0));
508    auto base = nir_load_tcs_out_param_base_r600(b);
509    auto rel_patch_id = nir_load_tcs_rel_patch_id_r600(b);
510 
511    nir_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
512 
513    nir_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, outer_comps));
514    nir_def *tf_outer = nir_load_local_shared_r600(b, 32, addr_outer);
515 
516    std::vector<nir_def *> tf_out;
517 
518    nir_def *tf_out_base = nir_load_tcs_tess_factor_base_r600(b);
519    nir_def *out_addr0 = nir_umad24(b,
520                                    rel_patch_id,
521                                    nir_imm_int(b, stride),
522                                    tf_out_base);
523    int chanx = 0;
524    int chany = 1;
525 
526    if (prim_type == MESA_PRIM_LINES)
527       std::swap(chanx, chany);
528 
529    int inner_base = 12;
530 
531    tf_out.push_back(nir_vec2(b,
532                              out_addr0,
533                              nir_channel(b, tf_outer, chanx)));
534 
535    tf_out.push_back(nir_vec2(b, nir_iadd_imm(b, out_addr0, 4),
536                              nir_channel(b, tf_outer, chany)));
537 
538 
539    if (outer_comps > 2) {
540       tf_out.push_back(nir_vec2(b,
541                                 nir_iadd_imm(b, out_addr0, 8),
542                                 nir_channel(b, tf_outer, 2)));
543    }
544 
545    if (outer_comps > 3) {
546       tf_out.push_back(nir_vec2(b,
547                                 nir_iadd_imm(b, out_addr0, 12),
548                                 nir_channel(b, tf_outer, 3)));
549       inner_base = 16;
550 
551    }
552 
553    if (inner_comps) {
554       nir_def *addr1 = nir_iadd(b, addr0, load_offset_group(b, 4 + inner_comps));
555       nir_def *tf_inner = nir_load_local_shared_r600(b, 32, addr1);
556 
557       tf_out.push_back(nir_vec2(b,
558                                 nir_iadd_imm(b, out_addr0, inner_base),
559                                 nir_channel(b, tf_inner, 0)));
560 
561 
562       if (inner_comps > 1) {
563          tf_out.push_back(nir_vec2(b,
564                                    nir_iadd_imm(b, out_addr0, inner_base + 4),
565                                    nir_channel(b, tf_inner, 1)));
566 
567       }
568    }
569 
570    for (auto tf : tf_out)
571       r600_emit_tf(b, tf);
572 
573    nir_pop_if(b, nullptr);
574 
575    nir_metadata_preserve(f->impl, nir_metadata_none);
576 
577    return true;
578 }
579