• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "sfn_nir.h"
2 
r600_lower_tess_io_filter(const nir_instr * instr)3 bool r600_lower_tess_io_filter(const nir_instr *instr)
4 {
5    if (instr->type != nir_instr_type_intrinsic)
6       return false;
7 
8    nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
9    switch (op->intrinsic) {
10    case nir_intrinsic_load_input:
11    case nir_intrinsic_store_output:
12    case nir_intrinsic_load_output:
13    case nir_intrinsic_load_per_vertex_input:
14    case nir_intrinsic_load_per_vertex_output:
15    case nir_intrinsic_store_per_vertex_output:
16    case nir_intrinsic_load_patch_vertices_in:
17    case nir_intrinsic_load_tess_level_outer:
18    case nir_intrinsic_load_tess_level_inner:
19       return true;
20    default:
21       ;
22    }
23    return false;
24 }
25 
26 static nir_ssa_def *
emit_load_param_base(nir_builder * b,nir_intrinsic_op op)27 emit_load_param_base(nir_builder *b, nir_intrinsic_op op)
28 {
29    nir_intrinsic_instr *result = nir_intrinsic_instr_create(b->shader, op);
30 	nir_ssa_dest_init(&result->instr, &result->dest,
31                      4, 32, NULL);
32    nir_builder_instr_insert(b, &result->instr);
33    return &result->dest.ssa;
34 }
35 
get_tcs_varying_offset(nir_shader * nir,nir_variable_mode mode,unsigned index)36 static int get_tcs_varying_offset(nir_shader *nir, nir_variable_mode mode,
37                                   unsigned index)
38 {
39    nir_foreach_variable_with_modes(var, nir, mode) {
40       if (var->data.driver_location == index) {
41          switch (var->data.location) {
42          case VARYING_SLOT_POS:
43             return 0;
44          case VARYING_SLOT_PSIZ:
45             return 0x10;
46          case VARYING_SLOT_CLIP_DIST0:
47             return 0x20;
48          case VARYING_SLOT_CLIP_DIST1:
49             return 0x30;
50          case VARYING_SLOT_TESS_LEVEL_OUTER:
51             return 0;
52          case VARYING_SLOT_TESS_LEVEL_INNER:
53             return 0x10;
54          default:
55             if (var->data.location >= VARYING_SLOT_VAR0 &&
56                 var->data.location <= VARYING_SLOT_VAR31)
57                return 0x10 * (var->data.location - VARYING_SLOT_VAR0) + 0x40;
58 
59             if (var->data.location >=  VARYING_SLOT_PATCH0) {
60                return 0x10 * (var->data.location - VARYING_SLOT_PATCH0) + 0x20;
61             }
62          }
63          /* TODO: PATCH is missing */
64       }
65    }
66    return 0;
67 }
68 
69 static inline nir_ssa_def *
r600_umad_24(nir_builder * b,nir_ssa_def * op1,nir_ssa_def * op2,nir_ssa_def * op3)70 r600_umad_24(nir_builder *b, nir_ssa_def *op1, nir_ssa_def *op2, nir_ssa_def *op3)
71 {
72    return nir_build_alu(b, nir_op_umad24, op1, op2, op3, NULL);
73 }
74 
75 static inline nir_ssa_def *
r600_tcs_base_address(nir_builder * b,nir_ssa_def * param_base,nir_ssa_def * rel_patch_id)76 r600_tcs_base_address(nir_builder *b, nir_ssa_def *param_base, nir_ssa_def *rel_patch_id)
77 {
78    return r600_umad_24(b,  nir_channel(b, param_base, 0),
79                        rel_patch_id,
80                        nir_channel(b, param_base, 3));
81 }
82 
83 
84 static nir_ssa_def *
emil_lsd_in_addr(nir_builder * b,nir_ssa_def * base,nir_ssa_def * patch_id,nir_intrinsic_instr * op)85 emil_lsd_in_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op)
86 {
87    nir_ssa_def *addr = nir_build_alu(b, nir_op_umul24,
88                                       nir_channel(b, base, 0),
89                                       patch_id, NULL, NULL);
90 
91    auto idx1 = nir_src_as_const_value(op->src[0]);
92    if (!idx1 || idx1->u32 != 0)
93       addr = r600_umad_24(b, nir_channel(b, base, 1),
94                           op->src[0].ssa, addr);
95 
96    auto offset = nir_imm_int(b, get_tcs_varying_offset(b->shader, nir_var_shader_in, nir_intrinsic_base(op)));
97 
98    auto idx2 = nir_src_as_const_value(op->src[1]);
99    if (!idx2 || idx2->u32 != 0)
100       offset = nir_iadd(b, offset, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)));
101 
102    return nir_iadd(b, addr, offset);
103 }
104 
105 static nir_ssa_def *
emil_lsd_out_addr(nir_builder * b,nir_ssa_def * base,nir_ssa_def * patch_id,nir_intrinsic_instr * op,nir_variable_mode mode,int src_offset)106 emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, nir_variable_mode mode, int src_offset)
107 {
108 
109    nir_ssa_def *addr1 = r600_umad_24(b, nir_channel(b, base, 0),
110                                      patch_id,
111                                      nir_channel(b, base, 2));
112    nir_ssa_def *addr2 = r600_umad_24(b, nir_channel(b, base, 1),
113                                      op->src[src_offset].ssa, addr1);
114 
115    int offset = get_tcs_varying_offset(b->shader, mode, nir_intrinsic_base(op));
116    return nir_iadd(b, nir_iadd(b, addr2,
117                                nir_ishl(b, op->src[src_offset + 1].ssa, nir_imm_int(b,4))),
118                                nir_imm_int(b, offset));
119 }
120 
load_offset_group(nir_builder * b,int ncomponents)121 static nir_ssa_def *load_offset_group(nir_builder *b, int ncomponents)
122 {
123    switch (ncomponents) {
124    /* tess outer offsets */
125    case 1: return nir_imm_int(b, 0);
126    case 2: return nir_imm_ivec2(b, 0, 4);
127    case 3: return r600_imm_ivec3(b, 0, 4, 8);
128    case 4: return nir_imm_ivec4(b, 0, 4, 8, 12);
129       /* tess inner offsets */
130    case 5: return nir_imm_int(b, 16);
131    case 6: return nir_imm_ivec2(b, 16, 20);
132    default:
133       debug_printf("Got %d components\n", ncomponents);
134       unreachable("Unsupported component count");
135    }
136 }
137 
replace_load_instr(nir_builder * b,nir_intrinsic_instr * op,nir_ssa_def * addr)138 static void replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
139 {
140    nir_intrinsic_instr *load_tcs_in = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
141    load_tcs_in->num_components = op->num_components;
142    nir_ssa_dest_init(&load_tcs_in->instr, &load_tcs_in->dest,
143                      load_tcs_in->num_components, 32, NULL);
144 
145    nir_ssa_def *addr_outer = nir_iadd(b, addr, load_offset_group(b, load_tcs_in->num_components));
146    load_tcs_in->src[0] = nir_src_for_ssa(addr_outer);
147    nir_intrinsic_set_component(load_tcs_in, nir_intrinsic_component(op));
148    nir_builder_instr_insert(b, &load_tcs_in->instr);
149    nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load_tcs_in->dest.ssa));
150    nir_instr_remove(&op->instr);
151 
152 }
153 
154 static nir_ssa_def *
r600_load_rel_patch_id(nir_builder * b)155 r600_load_rel_patch_id(nir_builder *b)
156 {
157    auto patch_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_rel_patch_id_r600);
158    nir_ssa_dest_init(&patch_id->instr, &patch_id->dest,
159                      1, 32, NULL);
160    nir_builder_instr_insert(b, &patch_id->instr);
161    return &patch_id->dest.ssa;
162 }
163 
164 static void
emit_store_lds(nir_builder * b,nir_intrinsic_instr * op,nir_ssa_def * addr)165 emit_store_lds(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
166 {
167    for (int i = 0; i < 2; ++i) {
168       unsigned test_mask = (0x3 << 2 * i);
169       if (!(nir_intrinsic_write_mask(op) & test_mask))
170          continue;
171 
172       auto store_tcs_out = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_local_shared_r600);
173       unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
174       nir_intrinsic_set_write_mask(store_tcs_out, writemask);
175       store_tcs_out->src[0] = nir_src_for_ssa(op->src[0].ssa);
176       store_tcs_out->num_components = store_tcs_out->src[0].ssa->num_components;
177       bool start_even = (writemask & (1u << (2 * i)));
178 
179       auto addr2 = nir_iadd(b, addr, nir_imm_int(b, 8 * i + (start_even ? 0 : 4)));
180       store_tcs_out->src[1] = nir_src_for_ssa(addr2);
181 
182       nir_builder_instr_insert(b, &store_tcs_out->instr);
183    }
184 }
185 
186 static nir_ssa_def *
emil_tcs_io_offset(nir_builder * b,nir_ssa_def * addr,nir_intrinsic_instr * op,nir_variable_mode mode,int src_offset)187 emil_tcs_io_offset(nir_builder *b, nir_ssa_def *addr, nir_intrinsic_instr *op, nir_variable_mode mode, int src_offset)
188 {
189 
190    int offset = get_tcs_varying_offset(b->shader, mode, nir_intrinsic_base(op));
191    return nir_iadd(b, nir_iadd(b, addr,
192                                nir_ishl(b, op->src[src_offset].ssa, nir_imm_int(b,4))),
193                                nir_imm_int(b, offset));
194 }
195 
196 
197 inline unsigned
outer_tf_components(pipe_prim_type prim_type)198 outer_tf_components(pipe_prim_type prim_type)
199 {
200    switch (prim_type) {
201    case PIPE_PRIM_LINES: return 2;
202    case PIPE_PRIM_TRIANGLES: return 3;
203    case PIPE_PRIM_QUADS: return 4;
204    default:
205       return 0;
206    }
207 }
208 
209 
210 
211 static bool
r600_lower_tess_io_impl(nir_builder * b,nir_instr * instr,enum pipe_prim_type prim_type)212 r600_lower_tess_io_impl(nir_builder *b, nir_instr *instr, enum pipe_prim_type prim_type)
213 {
214    static nir_ssa_def *load_in_param_base = nullptr;
215    static nir_ssa_def *load_out_param_base = nullptr;
216 
217    b->cursor = nir_before_instr(instr);
218    nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
219 
220    if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) {
221       load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
222       load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
223    } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
224       load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
225    } else if (b->shader->info.stage == MESA_SHADER_VERTEX) {
226       load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
227    }
228 
229    auto rel_patch_id = r600_load_rel_patch_id(b);
230 
231    unsigned tf_inner_address_offset = 0;
232    unsigned ncomps_correct = 0;
233 
234    switch (op->intrinsic) {
235    case nir_intrinsic_load_patch_vertices_in: {
236       auto vertices_in = nir_channel(b, load_in_param_base, 2);
237       nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(vertices_in));
238       nir_instr_remove(&op->instr);
239       return true;
240    }
241    case nir_intrinsic_load_per_vertex_input: {
242       nir_ssa_def *addr =
243             b->shader->info.stage == MESA_SHADER_TESS_CTRL ?
244                emil_lsd_in_addr(b, load_in_param_base, rel_patch_id, op) :
245                emil_lsd_out_addr(b, load_in_param_base, rel_patch_id, op, nir_var_shader_in, 0);
246       replace_load_instr(b, op, addr);
247       return true;
248    }
249    case nir_intrinsic_store_per_vertex_output: {
250       nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 1);
251       emit_store_lds(b, op, addr);
252       nir_instr_remove(instr);
253       return true;
254    }
255    case nir_intrinsic_load_per_vertex_output: {
256       nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 0);
257       replace_load_instr(b, op, addr);
258       return true;
259    }
260    case nir_intrinsic_store_output: {
261       nir_ssa_def *addr = (b->shader->info.stage == MESA_SHADER_TESS_CTRL) ?
262                              r600_tcs_base_address(b, load_out_param_base, rel_patch_id):
263                              nir_build_alu(b, nir_op_umul24,
264                                            nir_channel(b, load_out_param_base, 1),
265                                            rel_patch_id, NULL, NULL);
266       addr = emil_tcs_io_offset(b, addr, op, nir_var_shader_out, 1);
267       emit_store_lds(b, op, addr);
268       nir_instr_remove(instr);
269       return true;
270    }
271    case nir_intrinsic_load_output: {
272       nir_ssa_def *addr = r600_tcs_base_address(b, load_out_param_base, rel_patch_id);
273       addr = emil_tcs_io_offset(b, addr, op, nir_var_shader_out, 0);
274       replace_load_instr(b, op, addr);
275       return true;
276    }
277    case nir_intrinsic_load_input: {
278       nir_ssa_def *addr = r600_tcs_base_address(b, load_in_param_base, rel_patch_id);
279       addr = emil_tcs_io_offset(b, addr, op, nir_var_shader_in, 0);
280       replace_load_instr(b, op, addr);
281       return true;
282    }
283    case nir_intrinsic_load_tess_level_inner:
284       tf_inner_address_offset = 4;
285       ncomps_correct = 2;
286       /* fallthrough */
287    case nir_intrinsic_load_tess_level_outer: {
288       auto ncomps = outer_tf_components(prim_type);
289       if (!ncomps)
290          return false;
291       ncomps -= ncomps_correct;
292       auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
293       auto rel_patch_id = r600_load_rel_patch_id(b);
294       nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
295       nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, tf_inner_address_offset + ncomps));
296 
297       auto tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
298       tf->num_components = ncomps;
299       tf->src[0] = nir_src_for_ssa(addr_outer);
300       nir_ssa_dest_init(&tf->instr, &tf->dest,
301                         tf->num_components, 32, NULL);
302       nir_intrinsic_set_component(tf, 0);
303       nir_builder_instr_insert(b, &tf->instr);
304 
305       nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&tf->dest.ssa));
306       nir_instr_remove(instr);
307       return true;
308    }
309    default:
310       ;
311    }
312 
313    return false;
314 }
315 
r600_lower_tess_io(nir_shader * shader,enum pipe_prim_type prim_type)316 bool r600_lower_tess_io(nir_shader *shader, enum pipe_prim_type prim_type)
317 {
318    bool progress = false;
319    nir_foreach_function(function, shader) {
320       if (function->impl) {
321          nir_builder b;
322          nir_builder_init(&b, function->impl);
323 
324          nir_foreach_block(block, function->impl) {
325             nir_foreach_instr_safe(instr, block) {
326                if (instr->type != nir_instr_type_intrinsic)
327                   continue;
328 
329                if (r600_lower_tess_io_filter(instr))
330                   progress |= r600_lower_tess_io_impl(&b, instr, prim_type);
331             }
332          }
333       }
334    }
335    return progress;
336 }
337 
r600_emit_tf(nir_builder * b,nir_ssa_def * val)338 bool r600_emit_tf(nir_builder *b, nir_ssa_def *val)
339 {
340    nir_intrinsic_instr *store_tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_tf_r600);
341    store_tf->num_components = val->num_components;
342    store_tf->src[0] = nir_src_for_ssa(val);
343    nir_builder_instr_insert(b, &store_tf->instr);
344    return true;
345 }
346 
r600_append_tcs_TF_emission(nir_shader * shader,enum pipe_prim_type prim_type)347 bool r600_append_tcs_TF_emission(nir_shader *shader, enum pipe_prim_type prim_type) {
348    if (shader->info.stage != MESA_SHADER_TESS_CTRL)
349       return false;
350 
351    nir_foreach_function(function, shader) {
352       nir_foreach_block(block, function->impl) {
353          nir_foreach_instr_safe(instr, block) {
354             if (instr->type != nir_instr_type_intrinsic)
355                continue;
356             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
357             if (intr->intrinsic == nir_intrinsic_store_tf_r600) {
358                return false;
359             }
360          }
361       }
362    }
363    nir_builder builder;
364    nir_builder *b = &builder;
365 
366    assert(exec_list_length(&shader->functions) == 1);
367    nir_function *f = (nir_function *)shader->functions.get_head();
368    nir_builder_init(b, f->impl);
369 
370    auto outer_comps = outer_tf_components(prim_type);
371    if (!outer_comps)
372       return false;
373 
374    unsigned inner_comps = outer_comps - 2;
375    unsigned stride = (inner_comps + outer_comps) * 4;
376 
377    b->cursor = nir_after_cf_list(&f->impl->body);
378 
379    auto invocation_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_invocation_id);
380 	nir_ssa_dest_init(&invocation_id->instr, &invocation_id->dest,
381                      1, 32, NULL);
382    nir_builder_instr_insert(b, &invocation_id->instr);
383 
384    nir_push_if(b, nir_ieq_imm(b, &invocation_id->dest.ssa, 0));
385    auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
386    auto rel_patch_id = r600_load_rel_patch_id(b);
387 
388    nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
389 
390    nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, outer_comps));
391    auto tf_outer = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
392    tf_outer->num_components = outer_comps;
393    tf_outer->src[0] = nir_src_for_ssa(addr_outer);
394    nir_ssa_dest_init(&tf_outer->instr, &tf_outer->dest,
395                      tf_outer->num_components, 32, NULL);
396    nir_intrinsic_set_component(tf_outer, 15);
397    nir_builder_instr_insert(b, &tf_outer->instr);
398 
399    std::vector<nir_ssa_def *> tf_out;
400 
401 
402    auto tf_out_base = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_tess_factor_base_r600);
403 	nir_ssa_dest_init(&tf_out_base->instr, &tf_out_base->dest,
404                      1, 32, NULL);
405    nir_builder_instr_insert(b, &tf_out_base->instr);
406 
407    auto out_addr0 = nir_build_alu(b, nir_op_umad24,
408                                   rel_patch_id,
409                                   nir_imm_int(b, stride),
410                                   &tf_out_base->dest.ssa,
411                                   NULL);
412    int chanx = 0;
413    int chany = 1;
414 
415    if (prim_type == PIPE_PRIM_LINES)
416       std::swap(chanx, chany);
417 
418 
419    auto v0 = nir_vec4(b, out_addr0, nir_channel(b, &tf_outer->dest.ssa, chanx),
420                       nir_iadd(b, out_addr0, nir_imm_int(b, 4)),
421                       nir_channel(b, &tf_outer->dest.ssa, chany));
422 
423    tf_out.push_back(v0);
424    if (outer_comps > 2) {
425       auto v1 = (outer_comps > 3) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)),
426                                              nir_channel(b, &tf_outer->dest.ssa, 2),
427                                              nir_iadd(b, out_addr0, nir_imm_int(b, 12)),
428                                              nir_channel(b, &tf_outer->dest.ssa, 3)) :
429                                     nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)),
430                                              nir_channel(b, &tf_outer->dest.ssa, 2));
431       tf_out.push_back(v1);
432    }
433 
434    if (inner_comps) {
435       nir_ssa_def *addr1 = nir_iadd(b, addr0, load_offset_group(b, 4 + inner_comps));
436       auto tf_inner = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
437       tf_inner->num_components = inner_comps;
438       tf_inner->src[0] = nir_src_for_ssa(addr1);
439       nir_ssa_dest_init(&tf_inner->instr, &tf_inner->dest,
440                         tf_inner->num_components, 32, NULL);
441       nir_intrinsic_set_component(tf_inner, 3);
442       nir_builder_instr_insert(b, &tf_inner->instr);
443 
444       auto v2 = (inner_comps > 1) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 16)),
445                                              nir_channel(b, &tf_inner->dest.ssa, 0),
446                                              nir_iadd(b, out_addr0, nir_imm_int(b, 20)),
447                                              nir_channel(b, &tf_inner->dest.ssa, 1)):
448                                     nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 12)),
449                                              nir_channel(b, &tf_inner->dest.ssa, 0));
450       tf_out.push_back(v2);
451    }
452 
453    for (auto tf: tf_out)
454       r600_emit_tf(b, tf);
455 
456    nir_pop_if(b, nullptr);
457 
458    nir_metadata_preserve(f->impl, nir_metadata_none);
459 
460    return true;
461 }
462