1 /*
2 * Copyright 2020 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "si_pipe.h"
8 #include "si_shader_internal.h"
9 #include "si_shader_llvm.h"
10 #include "sid.h"
11
si_get_rel_patch_id(struct si_shader_context * ctx)12 LLVMValueRef si_get_rel_patch_id(struct si_shader_context *ctx)
13 {
14 switch (ctx->stage) {
15 case MESA_SHADER_TESS_CTRL:
16 return si_unpack_param(ctx, ctx->args->ac.tcs_rel_ids, 0, 8);
17
18 case MESA_SHADER_TESS_EVAL:
19 return ctx->abi.tes_rel_patch_id_replaced ?
20 ctx->abi.tes_rel_patch_id_replaced :
21 ac_get_arg(&ctx->ac, ctx->args->ac.tes_rel_patch_id);
22
23 default:
24 assert(0);
25 return NULL;
26 }
27 }
28
29 /* Tessellation shaders pass outputs to the next shader using LDS.
30 *
31 * LS outputs = TCS inputs
32 * TCS outputs = TES inputs
33 *
34 * The LDS layout is:
35 * - TCS inputs for patch 0
36 * - TCS inputs for patch 1
37 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
38 * - ...
39 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
40 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
41 * - TCS outputs for patch 1
42 * - Per-patch TCS outputs for patch 1
43 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
44 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
45 * - ...
46 *
47 * All three shaders VS(LS), TCS, TES share the same LDS space.
48 */
49
get_tcs_out_patch0_patch_data_offset(struct si_shader_context * ctx)50 static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)
51 {
52 return si_unpack_param(ctx, ctx->args->vs_state_bits, 10, 14);
53 }
54
get_tcs_out_current_patch_data_offset(struct si_shader_context * ctx)55 static LLVMValueRef get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
56 {
57 LLVMValueRef patch0_patch_data_offset = get_tcs_out_patch0_patch_data_offset(ctx);
58 unsigned patch_dw_stride = si_get_tcs_out_patch_stride(&ctx->shader->selector->info);
59 LLVMValueRef patch_stride = LLVMConstInt(ctx->ac.i32, patch_dw_stride, 0);
60 LLVMValueRef rel_patch_id = si_get_rel_patch_id(ctx);
61
62 return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_patch_data_offset);
63 }
64
65 /* The offchip buffer layout for TCS->TES is
66 *
67 * - attribute 0 of patch 0 vertex 0
68 * - attribute 0 of patch 0 vertex 1
69 * - attribute 0 of patch 0 vertex 2
70 * ...
71 * - attribute 0 of patch 1 vertex 0
72 * - attribute 0 of patch 1 vertex 1
73 * ...
74 * - attribute 1 of patch 0 vertex 0
75 * - attribute 1 of patch 0 vertex 1
76 * ...
77 * - per patch attribute 0 of patch 0
78 * - per patch attribute 0 of patch 1
79 * ...
80 *
81 * Note that every attribute has 4 components.
82 */
get_tcs_tes_buffer_address(struct si_shader_context * ctx,LLVMValueRef rel_patch_id,LLVMValueRef param_index)83 static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,
84 LLVMValueRef rel_patch_id,
85 LLVMValueRef param_index)
86 {
87 LLVMValueRef base_addr, num_patches;
88 LLVMValueRef param_stride, constant16;
89
90 num_patches = si_unpack_param(ctx, ctx->args->tcs_offchip_layout, 0, 6);
91 num_patches = LLVMBuildAdd(ctx->ac.builder, num_patches, ctx->ac.i32_1, "");
92
93 constant16 = LLVMConstInt(ctx->ac.i32, 16, 0);
94 base_addr = rel_patch_id;
95 param_stride = num_patches;
96
97 base_addr = ac_build_imad(&ctx->ac, param_index, param_stride, base_addr);
98 base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");
99
100 LLVMValueRef patch_data_offset = si_unpack_param(ctx, ctx->args->tcs_offchip_layout, 16, 16);
101 return LLVMBuildAdd(ctx->ac.builder, base_addr, patch_data_offset, "");
102 }
103
104 /**
105 * Load from LSHS LDS storage.
106 *
107 * \param type output value type
108 * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
109 * \param dw_addr address in dwords
110 */
lshs_lds_load(struct si_shader_context * ctx,LLVMTypeRef type,unsigned swizzle,LLVMValueRef dw_addr)111 static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle,
112 LLVMValueRef dw_addr)
113 {
114 LLVMValueRef value;
115
116 if (swizzle == ~0) {
117 LLVMValueRef values[4];
118
119 for (unsigned chan = 0; chan < 4; chan++)
120 values[chan] = lshs_lds_load(ctx, type, chan, dw_addr);
121
122 return ac_build_gather_values(&ctx->ac, values, 4);
123 }
124
125 dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, swizzle, 0), "");
126 value = ac_lds_load(&ctx->ac, dw_addr);
127 return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
128 }
129
130 enum si_tess_ring
131 {
132 TESS_FACTOR_RING,
133 TESS_OFFCHIP_RING,
134 };
135
get_tess_ring_descriptor(struct si_shader_context * ctx,enum si_tess_ring ring)136 static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, enum si_tess_ring ring)
137 {
138 LLVMBuilderRef builder = ctx->ac.builder;
139 LLVMValueRef addr = ac_get_arg(&ctx->ac, ctx->args->tes_offchip_addr);
140
141 if (ring == TESS_FACTOR_RING) {
142 unsigned tf_offset = ctx->screen->hs.tess_offchip_ring_size;
143 addr = LLVMBuildAdd(builder, addr, LLVMConstInt(ctx->ac.i32, tf_offset, 0), "");
144 }
145
146 uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
147 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
148
149 if (ctx->screen->info.gfx_level >= GFX11)
150 rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
151 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
152 else if (ctx->screen->info.gfx_level >= GFX10)
153 rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
154 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
155 else
156 rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
157 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
158
159 LLVMValueRef desc[4];
160 desc[0] = addr;
161 desc[1] = LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
162 desc[2] = LLVMConstInt(ctx->ac.i32, 0xffffffff, 0);
163 desc[3] = LLVMConstInt(ctx->ac.i32, rsrc3, false);
164
165 return ac_build_gather_values(&ctx->ac, desc, 4);
166 }
167
si_nir_load_tcs_varyings(struct ac_shader_abi * abi,LLVMTypeRef type,LLVMValueRef vertex_index,LLVMValueRef param_index,unsigned driver_location,unsigned component,unsigned num_components,bool load_input)168 static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
169 LLVMValueRef vertex_index, LLVMValueRef param_index,
170 unsigned driver_location, unsigned component,
171 unsigned num_components, bool load_input)
172 {
173 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
174 struct si_shader_info *info = &ctx->shader->selector->info;
175
176 assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index);
177
178 uint8_t semantic = info->input[driver_location].semantic;
179 /* Load the TCS input from a VGPR. */
180 unsigned func_param = ctx->args->ac.tcs_rel_ids.arg_index + 1 +
181 si_shader_io_get_unique_index(semantic) * 4;
182
183 LLVMValueRef value[4];
184 for (unsigned i = component; i < component + num_components; i++) {
185 value[i] = LLVMGetParam(ctx->main_fn.value, func_param + i);
186 value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
187 }
188
189 return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
190 }
191
si_write_tess_factors(struct si_shader_context * ctx,union si_shader_part_key * key,LLVMValueRef rel_patch_id,LLVMValueRef invocation_id,LLVMValueRef tcs_out_current_patch_data_offset,LLVMValueRef invoc0_tf_outer[4],LLVMValueRef invoc0_tf_inner[2])192 static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader_part_key *key,
193 LLVMValueRef rel_patch_id, LLVMValueRef invocation_id,
194 LLVMValueRef tcs_out_current_patch_data_offset,
195 LLVMValueRef invoc0_tf_outer[4], LLVMValueRef invoc0_tf_inner[2])
196 {
197 struct si_shader *shader = ctx->shader;
198 unsigned tess_inner_index, tess_outer_index;
199 LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
200 LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
201 unsigned stride, outer_comps, inner_comps, i, offset;
202
203 /* Add a barrier before loading tess factors from LDS. */
204 if (!shader->key.ge.part.tcs.epilog.invoc0_tess_factors_are_def) {
205 ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
206
207 if (!key->tcs_epilog.noop_s_barrier)
208 ac_build_s_barrier(&ctx->ac, ctx->stage);
209 }
210
211 /* Do this only for invocation 0, because the tess levels are per-patch,
212 * not per-vertex.
213 *
214 * This can't jump, because invocation 0 executes this. It should
215 * at least mask out the loads and stores for other invocations.
216 */
217 ac_build_ifcc(&ctx->ac,
218 LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, invocation_id, ctx->ac.i32_0, ""), 6503);
219
220 /* Determine the layout of one tess factor element in the buffer. */
221 switch (shader->key.ge.part.tcs.epilog.prim_mode) {
222 case TESS_PRIMITIVE_ISOLINES:
223 stride = 2; /* 2 dwords, 1 vec2 store */
224 outer_comps = 2;
225 inner_comps = 0;
226 break;
227 case TESS_PRIMITIVE_TRIANGLES:
228 stride = 4; /* 4 dwords, 1 vec4 store */
229 outer_comps = 3;
230 inner_comps = 1;
231 break;
232 case TESS_PRIMITIVE_QUADS:
233 stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
234 outer_comps = 4;
235 inner_comps = 2;
236 break;
237 default:
238 assert(0);
239 return;
240 }
241
242 for (i = 0; i < 4; i++) {
243 inner[i] = LLVMGetUndef(ctx->ac.i32);
244 outer[i] = LLVMGetUndef(ctx->ac.i32);
245 }
246
247 if (shader->key.ge.part.tcs.epilog.invoc0_tess_factors_are_def) {
248 /* Tess factors are in VGPRs. */
249 for (i = 0; i < outer_comps; i++)
250 outer[i] = out[i] = invoc0_tf_outer[i];
251 for (i = 0; i < inner_comps; i++)
252 inner[i] = out[outer_comps + i] = invoc0_tf_inner[i];
253 } else {
254 /* Load tess_inner and tess_outer from LDS.
255 * Any invocation can write them, so we can't get them from a temporary.
256 */
257 tess_inner_index = ac_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER);
258 tess_outer_index = ac_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER);
259
260 lds_base = tcs_out_current_patch_data_offset;
261 lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
262 LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, 0), "");
263 lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
264 LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, 0), "");
265
266 for (i = 0; i < outer_comps; i++) {
267 outer[i] = out[i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer);
268 }
269 for (i = 0; i < inner_comps; i++) {
270 inner[i] = out[outer_comps + i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner);
271 }
272 }
273
274 if (shader->key.ge.part.tcs.epilog.prim_mode == TESS_PRIMITIVE_ISOLINES) {
275 /* For isolines, the hardware expects tess factors in the
276 * reverse order from what NIR specifies.
277 */
278 LLVMValueRef tmp = out[0];
279 out[0] = out[1];
280 out[1] = tmp;
281 }
282
283 /* Convert the outputs to vectors for stores. */
284 vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
285 vec1 = NULL;
286
287 if (stride > 4)
288 vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
289
290 /* Get the buffer. */
291 buffer = get_tess_ring_descriptor(ctx, TESS_FACTOR_RING);
292
293 /* Get the offset. */
294 tf_base = ac_get_arg(&ctx->ac, ctx->args->ac.tcs_factor_offset);
295 byteoffset =
296 LLVMBuildMul(ctx->ac.builder, rel_patch_id, LLVMConstInt(ctx->ac.i32, 4 * stride, 0), "");
297 offset = 0;
298
299 /* Store the dynamic HS control word. */
300 if (ctx->screen->info.gfx_level <= GFX8) {
301 ac_build_ifcc(&ctx->ac,
302 LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, ""), 6504);
303 ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0),
304 NULL, LLVMConstInt(ctx->ac.i32, offset, 0), tf_base,
305 ACCESS_COHERENT);
306 ac_build_endif(&ctx->ac, 6504);
307 offset += 4;
308 }
309
310 /* Store the tessellation factors. */
311 ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, NULL,
312 LLVMBuildAdd(ctx->ac.builder, byteoffset,
313 LLVMConstInt(ctx->ac.i32, offset, 0), ""),
314 tf_base, ACCESS_COHERENT);
315 offset += 16;
316 if (vec1)
317 ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, NULL,
318 LLVMBuildAdd(ctx->ac.builder, byteoffset,
319 LLVMConstInt(ctx->ac.i32, offset, 0), ""),
320 tf_base, ACCESS_COHERENT);
321
322 /* Store the tess factors into the offchip buffer if TES reads them. */
323 if (shader->key.ge.part.tcs.epilog.tes_reads_tess_factors) {
324 LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;
325 LLVMValueRef tf_inner_offset;
326 unsigned param_outer, param_inner;
327
328 buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING);
329 base = ac_get_arg(&ctx->ac, ctx->args->ac.tess_offchip_offset);
330
331 param_outer = ac_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER);
332 tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id,
333 LLVMConstInt(ctx->ac.i32, param_outer, 0));
334
335 outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_comps);
336
337 ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, NULL, tf_outer_offset,
338 base, ACCESS_COHERENT);
339 if (inner_comps) {
340 param_inner = ac_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER);
341 tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id,
342 LLVMConstInt(ctx->ac.i32, param_inner, 0));
343
344 inner_vec = ac_build_gather_values(&ctx->ac, inner, inner_comps);
345 ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, NULL,
346 tf_inner_offset, base, ACCESS_COHERENT);
347 }
348 }
349
350 ac_build_endif(&ctx->ac, 6503);
351 }
352
353 /* This only writes the tessellation factor levels. */
si_llvm_tcs_build_end(struct si_shader_context * ctx)354 void si_llvm_tcs_build_end(struct si_shader_context *ctx)
355 {
356 LLVMBuilderRef builder = ctx->ac.builder;
357 LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
358
359 rel_patch_id = si_get_rel_patch_id(ctx);
360 invocation_id = si_unpack_param(ctx, ctx->args->ac.tcs_rel_ids, 8, 5);
361 tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
362
363 if (ctx->screen->info.gfx_level >= GFX9) {
364 LLVMBasicBlockRef blocks[2] = {LLVMGetInsertBlock(builder), ctx->merged_wrap_if_entry_block};
365 LLVMValueRef values[2];
366
367 ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
368
369 values[0] = rel_patch_id;
370 values[1] = LLVMGetUndef(ctx->ac.i32);
371 rel_patch_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
372
373 values[0] = tf_lds_offset;
374 values[1] = LLVMGetUndef(ctx->ac.i32);
375 tf_lds_offset = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
376
377 values[0] = invocation_id;
378 values[1] = ctx->ac.i32_1; /* cause the epilog to skip threads */
379 invocation_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
380 }
381
382 /* Return epilog parameters from this function. */
383 LLVMValueRef ret = ctx->return_value;
384 unsigned vgpr;
385
386 if (ctx->screen->info.gfx_level >= GFX9) {
387 ret =
388 si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
389 ret = si_insert_input_ret(ctx, ret, ctx->args->tes_offchip_addr, 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR);
390 /* Tess offchip and tess factor offsets are at the beginning. */
391 ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tess_offchip_offset, 2);
392 ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tcs_factor_offset, 4);
393 vgpr = 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR + 1;
394 } else {
395 ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT);
396 ret = si_insert_input_ret(ctx, ret, ctx->args->tes_offchip_addr, GFX6_SGPR_TCS_OFFCHIP_ADDR);
397 /* Tess offchip and tess factor offsets are after user SGPRs. */
398 ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tess_offchip_offset, GFX6_TCS_NUM_USER_SGPR);
399 ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1);
400 vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
401 }
402
403 /* VGPRs */
404 rel_patch_id = ac_to_float(&ctx->ac, rel_patch_id);
405 invocation_id = ac_to_float(&ctx->ac, invocation_id);
406 tf_lds_offset = ac_to_float(&ctx->ac, tf_lds_offset);
407
408 /* Leave a hole corresponding to the two input VGPRs. This ensures that
409 * the invocation_id output does not alias the tcs_rel_ids input,
410 * which saves a V_MOV on gfx9.
411 */
412 vgpr += 2;
413
414 ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
415 ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
416
417 struct si_shader_info *info = &ctx->shader->selector->info;
418 if (info->tessfactors_are_def_in_all_invocs) {
419 vgpr++; /* skip the tess factor LDS offset */
420
421 /* get tess factor driver location */
422 int outer_loc = -1;
423 int inner_loc = -1;
424 for (int i = 0; i < info->num_outputs; i++) {
425 unsigned semantic = info->output_semantic[i];
426 if (semantic == VARYING_SLOT_TESS_LEVEL_OUTER)
427 outer_loc = i;
428 else if (semantic == VARYING_SLOT_TESS_LEVEL_INNER)
429 inner_loc = i;
430 }
431
432 for (unsigned i = 0; i < 6; i++) {
433 int loc = i < 4 ? outer_loc : inner_loc;
434 LLVMValueRef value = loc < 0 ? LLVMGetUndef(ctx->ac.f32) :
435 LLVMBuildLoad2(builder, ctx->ac.f32, ctx->abi.outputs[loc * 4 + i % 4], "");
436 value = ac_to_float(&ctx->ac, value);
437 ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, "");
438 }
439 } else {
440 ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
441 }
442 ctx->return_value = ret;
443 }
444
si_llvm_ls_build_end(struct si_shader_context * ctx)445 void si_llvm_ls_build_end(struct si_shader_context *ctx)
446 {
447 struct si_shader *shader = ctx->shader;
448 bool same_thread_count = shader->key.ge.opt.same_patch_vertices;
449
450 /* Only need return value when merged shader on part mode or mono mode with same thread count. */
451 if (ctx->screen->info.gfx_level < GFX9 || (shader->is_monolithic && !same_thread_count))
452 return;
453
454 if (!ctx->shader->is_monolithic)
455 ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
456
457 LLVMValueRef ret = ctx->return_value;
458
459 ret = si_insert_input_ptr(ctx, ret, ctx->args->other_const_and_shader_buffers, 0);
460 ret = si_insert_input_ptr(ctx, ret, ctx->args->other_samplers_and_images, 1);
461 ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tess_offchip_offset, 2);
462 ret = si_insert_input_ret(ctx, ret, ctx->args->ac.merged_wave_info, 3);
463 ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tcs_factor_offset, 4);
464 if (ctx->screen->info.gfx_level <= GFX10_3)
465 ret = si_insert_input_ret(ctx, ret, ctx->args->ac.scratch_offset, 5);
466
467 ret = si_insert_input_ptr(ctx, ret, ctx->args->internal_bindings, 8 + SI_SGPR_INTERNAL_BINDINGS);
468 ret = si_insert_input_ptr(ctx, ret, ctx->args->bindless_samplers_and_images,
469 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
470
471 ret = si_insert_input_ret(ctx, ret, ctx->args->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
472
473 ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
474 ret = si_insert_input_ret(ctx, ret, ctx->args->tes_offchip_addr, 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR);
475
476 unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
477 ret = si_insert_input_ret_float(ctx, ret, ctx->args->ac.tcs_patch_id, vgpr++);
478 ret = si_insert_input_ret_float(ctx, ret, ctx->args->ac.tcs_rel_ids, vgpr++);
479
480 if (same_thread_count) {
481 /* Same thread count is set only when mono mode. */
482 assert(shader->is_monolithic);
483
484 struct si_shader_info *info = &shader->selector->info;
485 LLVMValueRef *addrs = ctx->abi.outputs;
486
487 for (unsigned i = 0; i < info->num_outputs; i++) {
488 unsigned semantic = info->output_semantic[i];
489 int param = si_shader_io_get_unique_index(semantic);
490
491 if (!(info->outputs_written_before_tes_gs & BITFIELD64_BIT(param)))
492 continue;
493
494 for (unsigned chan = 0; chan < 4; chan++) {
495 if (!(info->output_usagemask[i] & (1 << chan)))
496 continue;
497
498 LLVMValueRef value = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + chan], "");
499
500 ret = LLVMBuildInsertValue(ctx->ac.builder, ret, value, vgpr + param * 4 + chan, "");
501 }
502 }
503 }
504
505 ctx->return_value = ret;
506 }
507
508 /**
509 * Compile the TCS epilog function. This writes tessellation factors to memory
510 * based on the output primitive type of the tessellator (determined by TES).
511 */
si_llvm_build_tcs_epilog(struct si_shader_context * ctx,union si_shader_part_key * key)512 void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key)
513 {
514 struct ac_arg rel_patch_id;
515 struct ac_arg invocation_id;
516 struct ac_arg tcs_out_current_patch_data_offset;
517 struct ac_arg tess_factors[6];
518 si_get_tcs_epilog_args(ctx->screen->info.gfx_level, ctx->args, &rel_patch_id, &invocation_id,
519 &tcs_out_current_patch_data_offset, tess_factors);
520
521 /* Create the function. */
522 si_llvm_create_func(ctx, "tcs_epilog", NULL, 0, ctx->screen->info.gfx_level >= GFX7 ? 128 : 0);
523 ac_declare_lds_as_pointer(&ctx->ac);
524
525 LLVMValueRef invoc0_tess_factors[6];
526 for (unsigned i = 0; i < 6; i++)
527 invoc0_tess_factors[i] = ac_get_arg(&ctx->ac, tess_factors[i]);
528
529 si_write_tess_factors(ctx, key, ac_get_arg(&ctx->ac, rel_patch_id),
530 ac_get_arg(&ctx->ac, invocation_id),
531 ac_get_arg(&ctx->ac, tcs_out_current_patch_data_offset),
532 invoc0_tess_factors, invoc0_tess_factors + 4);
533
534 LLVMBuildRetVoid(ctx->ac.builder);
535 }
536
si_llvm_init_tcs_callbacks(struct si_shader_context * ctx)537 void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx)
538 {
539 ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
540 }
541