• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2020 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "si_pipe.h"
8 #include "si_shader_internal.h"
9 #include "si_shader_llvm.h"
10 #include "sid.h"
11 
si_get_rel_patch_id(struct si_shader_context * ctx)12 LLVMValueRef si_get_rel_patch_id(struct si_shader_context *ctx)
13 {
14    switch (ctx->stage) {
15    case MESA_SHADER_TESS_CTRL:
16       return si_unpack_param(ctx, ctx->args->ac.tcs_rel_ids, 0, 8);
17 
18    case MESA_SHADER_TESS_EVAL:
19       return ctx->abi.tes_rel_patch_id_replaced ?
20          ctx->abi.tes_rel_patch_id_replaced :
21          ac_get_arg(&ctx->ac, ctx->args->ac.tes_rel_patch_id);
22 
23    default:
24       assert(0);
25       return NULL;
26    }
27 }
28 
29 /* Tessellation shaders pass outputs to the next shader using LDS.
30  *
31  * LS outputs = TCS inputs
32  * TCS outputs = TES inputs
33  *
34  * The LDS layout is:
35  * - TCS inputs for patch 0
36  * - TCS inputs for patch 1
37  * - TCS inputs for patch 2             = get_tcs_in_current_patch_offset (if RelPatchID==2)
38  * - ...
39  * - TCS outputs for patch 0            = get_tcs_out_patch0_offset
40  * - Per-patch TCS outputs for patch 0  = get_tcs_out_patch0_patch_data_offset
41  * - TCS outputs for patch 1
42  * - Per-patch TCS outputs for patch 1
43  * - TCS outputs for patch 2            = get_tcs_out_current_patch_offset (if RelPatchID==2)
44  * - Per-patch TCS outputs for patch 2  = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
45  * - ...
46  *
47  * All three shaders VS(LS), TCS, TES share the same LDS space.
48  */
49 
get_tcs_out_patch0_patch_data_offset(struct si_shader_context * ctx)50 static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)
51 {
52    return si_unpack_param(ctx, ctx->args->vs_state_bits, 10, 14);
53 }
54 
get_tcs_out_current_patch_data_offset(struct si_shader_context * ctx)55 static LLVMValueRef get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
56 {
57    LLVMValueRef patch0_patch_data_offset = get_tcs_out_patch0_patch_data_offset(ctx);
58    unsigned patch_dw_stride = si_get_tcs_out_patch_stride(&ctx->shader->selector->info);
59    LLVMValueRef patch_stride = LLVMConstInt(ctx->ac.i32, patch_dw_stride, 0);
60    LLVMValueRef rel_patch_id = si_get_rel_patch_id(ctx);
61 
62    return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_patch_data_offset);
63 }
64 
65 /* The offchip buffer layout for TCS->TES is
66  *
67  * - attribute 0 of patch 0 vertex 0
68  * - attribute 0 of patch 0 vertex 1
69  * - attribute 0 of patch 0 vertex 2
70  *   ...
71  * - attribute 0 of patch 1 vertex 0
72  * - attribute 0 of patch 1 vertex 1
73  *   ...
74  * - attribute 1 of patch 0 vertex 0
75  * - attribute 1 of patch 0 vertex 1
76  *   ...
77  * - per patch attribute 0 of patch 0
78  * - per patch attribute 0 of patch 1
79  *   ...
80  *
81  * Note that every attribute has 4 components.
82  */
get_tcs_tes_buffer_address(struct si_shader_context * ctx,LLVMValueRef rel_patch_id,LLVMValueRef param_index)83 static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,
84                                                LLVMValueRef rel_patch_id,
85                                                LLVMValueRef param_index)
86 {
87    LLVMValueRef base_addr, num_patches;
88    LLVMValueRef param_stride, constant16;
89 
90    num_patches = si_unpack_param(ctx, ctx->args->tcs_offchip_layout, 0, 6);
91    num_patches = LLVMBuildAdd(ctx->ac.builder, num_patches, ctx->ac.i32_1, "");
92 
93    constant16 = LLVMConstInt(ctx->ac.i32, 16, 0);
94    base_addr = rel_patch_id;
95    param_stride = num_patches;
96 
97    base_addr = ac_build_imad(&ctx->ac, param_index, param_stride, base_addr);
98    base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");
99 
100    LLVMValueRef patch_data_offset = si_unpack_param(ctx, ctx->args->tcs_offchip_layout, 16, 16);
101    return LLVMBuildAdd(ctx->ac.builder, base_addr, patch_data_offset, "");
102 }
103 
104 /**
105  * Load from LSHS LDS storage.
106  *
107  * \param type     output value type
108  * \param swizzle  offset (typically 0..3); it can be ~0, which loads a vec4
109  * \param dw_addr  address in dwords
110  */
lshs_lds_load(struct si_shader_context * ctx,LLVMTypeRef type,unsigned swizzle,LLVMValueRef dw_addr)111 static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle,
112                                   LLVMValueRef dw_addr)
113 {
114    LLVMValueRef value;
115 
116    if (swizzle == ~0) {
117       LLVMValueRef values[4];
118 
119       for (unsigned chan = 0; chan < 4; chan++)
120          values[chan] = lshs_lds_load(ctx, type, chan, dw_addr);
121 
122       return ac_build_gather_values(&ctx->ac, values, 4);
123    }
124 
125    dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, swizzle, 0), "");
126    value = ac_lds_load(&ctx->ac, dw_addr);
127    return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
128 }
129 
130 enum si_tess_ring
131 {
132    TESS_FACTOR_RING,
133    TESS_OFFCHIP_RING,
134 };
135 
get_tess_ring_descriptor(struct si_shader_context * ctx,enum si_tess_ring ring)136 static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, enum si_tess_ring ring)
137 {
138    LLVMBuilderRef builder = ctx->ac.builder;
139    LLVMValueRef addr = ac_get_arg(&ctx->ac, ctx->args->tes_offchip_addr);
140 
141    if (ring == TESS_FACTOR_RING) {
142       unsigned tf_offset = ctx->screen->hs.tess_offchip_ring_size;
143       addr = LLVMBuildAdd(builder, addr, LLVMConstInt(ctx->ac.i32, tf_offset, 0), "");
144    }
145 
146    uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
147                     S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
148 
149    if (ctx->screen->info.gfx_level >= GFX11)
150       rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
151                S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
152    else if (ctx->screen->info.gfx_level >= GFX10)
153       rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
154                S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
155    else
156       rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
157                S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
158 
159    LLVMValueRef desc[4];
160    desc[0] = addr;
161    desc[1] = LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
162    desc[2] = LLVMConstInt(ctx->ac.i32, 0xffffffff, 0);
163    desc[3] = LLVMConstInt(ctx->ac.i32, rsrc3, false);
164 
165    return ac_build_gather_values(&ctx->ac, desc, 4);
166 }
167 
si_nir_load_tcs_varyings(struct ac_shader_abi * abi,LLVMTypeRef type,LLVMValueRef vertex_index,LLVMValueRef param_index,unsigned driver_location,unsigned component,unsigned num_components,bool load_input)168 static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
169                                              LLVMValueRef vertex_index, LLVMValueRef param_index,
170                                              unsigned driver_location, unsigned component,
171                                              unsigned num_components, bool load_input)
172 {
173    struct si_shader_context *ctx = si_shader_context_from_abi(abi);
174    struct si_shader_info *info = &ctx->shader->selector->info;
175 
176    assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index);
177 
178    uint8_t semantic = info->input[driver_location].semantic;
179    /* Load the TCS input from a VGPR. */
180    unsigned func_param = ctx->args->ac.tcs_rel_ids.arg_index + 1 +
181       si_shader_io_get_unique_index(semantic) * 4;
182 
183    LLVMValueRef value[4];
184    for (unsigned i = component; i < component + num_components; i++) {
185       value[i] = LLVMGetParam(ctx->main_fn.value, func_param + i);
186       value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
187    }
188 
189    return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
190 }
191 
si_write_tess_factors(struct si_shader_context * ctx,union si_shader_part_key * key,LLVMValueRef rel_patch_id,LLVMValueRef invocation_id,LLVMValueRef tcs_out_current_patch_data_offset,LLVMValueRef invoc0_tf_outer[4],LLVMValueRef invoc0_tf_inner[2])192 static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader_part_key *key,
193                                   LLVMValueRef rel_patch_id, LLVMValueRef invocation_id,
194                                   LLVMValueRef tcs_out_current_patch_data_offset,
195                                   LLVMValueRef invoc0_tf_outer[4], LLVMValueRef invoc0_tf_inner[2])
196 {
197    struct si_shader *shader = ctx->shader;
198    unsigned tess_inner_index, tess_outer_index;
199    LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
200    LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
201    unsigned stride, outer_comps, inner_comps, i, offset;
202 
203    /* Add a barrier before loading tess factors from LDS. */
204    if (!shader->key.ge.part.tcs.epilog.invoc0_tess_factors_are_def) {
205       ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
206 
207       if (!key->tcs_epilog.noop_s_barrier)
208          ac_build_s_barrier(&ctx->ac, ctx->stage);
209    }
210 
211    /* Do this only for invocation 0, because the tess levels are per-patch,
212     * not per-vertex.
213     *
214     * This can't jump, because invocation 0 executes this. It should
215     * at least mask out the loads and stores for other invocations.
216     */
217    ac_build_ifcc(&ctx->ac,
218                  LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, invocation_id, ctx->ac.i32_0, ""), 6503);
219 
220    /* Determine the layout of one tess factor element in the buffer. */
221    switch (shader->key.ge.part.tcs.epilog.prim_mode) {
222    case TESS_PRIMITIVE_ISOLINES:
223       stride = 2; /* 2 dwords, 1 vec2 store */
224       outer_comps = 2;
225       inner_comps = 0;
226       break;
227    case TESS_PRIMITIVE_TRIANGLES:
228       stride = 4; /* 4 dwords, 1 vec4 store */
229       outer_comps = 3;
230       inner_comps = 1;
231       break;
232    case TESS_PRIMITIVE_QUADS:
233       stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
234       outer_comps = 4;
235       inner_comps = 2;
236       break;
237    default:
238       assert(0);
239       return;
240    }
241 
242    for (i = 0; i < 4; i++) {
243       inner[i] = LLVMGetUndef(ctx->ac.i32);
244       outer[i] = LLVMGetUndef(ctx->ac.i32);
245    }
246 
247    if (shader->key.ge.part.tcs.epilog.invoc0_tess_factors_are_def) {
248       /* Tess factors are in VGPRs. */
249       for (i = 0; i < outer_comps; i++)
250          outer[i] = out[i] = invoc0_tf_outer[i];
251       for (i = 0; i < inner_comps; i++)
252          inner[i] = out[outer_comps + i] = invoc0_tf_inner[i];
253    } else {
254       /* Load tess_inner and tess_outer from LDS.
255        * Any invocation can write them, so we can't get them from a temporary.
256        */
257       tess_inner_index = ac_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER);
258       tess_outer_index = ac_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER);
259 
260       lds_base = tcs_out_current_patch_data_offset;
261       lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
262                                LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, 0), "");
263       lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
264                                LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, 0), "");
265 
266       for (i = 0; i < outer_comps; i++) {
267          outer[i] = out[i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer);
268       }
269       for (i = 0; i < inner_comps; i++) {
270          inner[i] = out[outer_comps + i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner);
271       }
272    }
273 
274    if (shader->key.ge.part.tcs.epilog.prim_mode == TESS_PRIMITIVE_ISOLINES) {
275       /* For isolines, the hardware expects tess factors in the
276        * reverse order from what NIR specifies.
277        */
278       LLVMValueRef tmp = out[0];
279       out[0] = out[1];
280       out[1] = tmp;
281    }
282 
283    /* Convert the outputs to vectors for stores. */
284    vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
285    vec1 = NULL;
286 
287    if (stride > 4)
288       vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
289 
290    /* Get the buffer. */
291    buffer = get_tess_ring_descriptor(ctx, TESS_FACTOR_RING);
292 
293    /* Get the offset. */
294    tf_base = ac_get_arg(&ctx->ac, ctx->args->ac.tcs_factor_offset);
295    byteoffset =
296       LLVMBuildMul(ctx->ac.builder, rel_patch_id, LLVMConstInt(ctx->ac.i32, 4 * stride, 0), "");
297    offset = 0;
298 
299    /* Store the dynamic HS control word. */
300    if (ctx->screen->info.gfx_level <= GFX8) {
301       ac_build_ifcc(&ctx->ac,
302                     LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, ""), 6504);
303       ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0),
304                                   NULL, LLVMConstInt(ctx->ac.i32, offset, 0), tf_base,
305                                   ACCESS_COHERENT);
306       ac_build_endif(&ctx->ac, 6504);
307       offset += 4;
308    }
309 
310    /* Store the tessellation factors. */
311    ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, NULL,
312                                LLVMBuildAdd(ctx->ac.builder, byteoffset,
313                                             LLVMConstInt(ctx->ac.i32, offset, 0), ""),
314                                tf_base, ACCESS_COHERENT);
315    offset += 16;
316    if (vec1)
317       ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, NULL,
318                                   LLVMBuildAdd(ctx->ac.builder, byteoffset,
319                                                LLVMConstInt(ctx->ac.i32, offset, 0), ""),
320                                   tf_base, ACCESS_COHERENT);
321 
322    /* Store the tess factors into the offchip buffer if TES reads them. */
323    if (shader->key.ge.part.tcs.epilog.tes_reads_tess_factors) {
324       LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;
325       LLVMValueRef tf_inner_offset;
326       unsigned param_outer, param_inner;
327 
328       buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING);
329       base = ac_get_arg(&ctx->ac, ctx->args->ac.tess_offchip_offset);
330 
331       param_outer = ac_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER);
332       tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id,
333                                                    LLVMConstInt(ctx->ac.i32, param_outer, 0));
334 
335       outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_comps);
336 
337       ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, NULL, tf_outer_offset,
338                                   base, ACCESS_COHERENT);
339       if (inner_comps) {
340          param_inner = ac_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER);
341          tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id,
342                                                       LLVMConstInt(ctx->ac.i32, param_inner, 0));
343 
344          inner_vec = ac_build_gather_values(&ctx->ac, inner, inner_comps);
345          ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, NULL,
346                                      tf_inner_offset, base, ACCESS_COHERENT);
347       }
348    }
349 
350    ac_build_endif(&ctx->ac, 6503);
351 }
352 
353 /* This only writes the tessellation factor levels. */
si_llvm_tcs_build_end(struct si_shader_context * ctx)354 void si_llvm_tcs_build_end(struct si_shader_context *ctx)
355 {
356    LLVMBuilderRef builder = ctx->ac.builder;
357    LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
358 
359    rel_patch_id = si_get_rel_patch_id(ctx);
360    invocation_id = si_unpack_param(ctx, ctx->args->ac.tcs_rel_ids, 8, 5);
361    tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
362 
363    if (ctx->screen->info.gfx_level >= GFX9) {
364       LLVMBasicBlockRef blocks[2] = {LLVMGetInsertBlock(builder), ctx->merged_wrap_if_entry_block};
365       LLVMValueRef values[2];
366 
367       ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
368 
369       values[0] = rel_patch_id;
370       values[1] = LLVMGetUndef(ctx->ac.i32);
371       rel_patch_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
372 
373       values[0] = tf_lds_offset;
374       values[1] = LLVMGetUndef(ctx->ac.i32);
375       tf_lds_offset = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
376 
377       values[0] = invocation_id;
378       values[1] = ctx->ac.i32_1; /* cause the epilog to skip threads */
379       invocation_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
380    }
381 
382    /* Return epilog parameters from this function. */
383    LLVMValueRef ret = ctx->return_value;
384    unsigned vgpr;
385 
386    if (ctx->screen->info.gfx_level >= GFX9) {
387       ret =
388          si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
389       ret = si_insert_input_ret(ctx, ret, ctx->args->tes_offchip_addr, 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR);
390       /* Tess offchip and tess factor offsets are at the beginning. */
391       ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tess_offchip_offset, 2);
392       ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tcs_factor_offset, 4);
393       vgpr = 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR + 1;
394    } else {
395       ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT);
396       ret = si_insert_input_ret(ctx, ret, ctx->args->tes_offchip_addr, GFX6_SGPR_TCS_OFFCHIP_ADDR);
397       /* Tess offchip and tess factor offsets are after user SGPRs. */
398       ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tess_offchip_offset, GFX6_TCS_NUM_USER_SGPR);
399       ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1);
400       vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
401    }
402 
403    /* VGPRs */
404    rel_patch_id = ac_to_float(&ctx->ac, rel_patch_id);
405    invocation_id = ac_to_float(&ctx->ac, invocation_id);
406    tf_lds_offset = ac_to_float(&ctx->ac, tf_lds_offset);
407 
408    /* Leave a hole corresponding to the two input VGPRs. This ensures that
409     * the invocation_id output does not alias the tcs_rel_ids input,
410     * which saves a V_MOV on gfx9.
411     */
412    vgpr += 2;
413 
414    ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
415    ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
416 
417    struct si_shader_info *info = &ctx->shader->selector->info;
418    if (info->tessfactors_are_def_in_all_invocs) {
419       vgpr++; /* skip the tess factor LDS offset */
420 
421       /* get tess factor driver location */
422       int outer_loc = -1;
423       int inner_loc = -1;
424       for (int i = 0; i < info->num_outputs; i++) {
425          unsigned semantic = info->output_semantic[i];
426          if (semantic == VARYING_SLOT_TESS_LEVEL_OUTER)
427             outer_loc = i;
428          else if (semantic == VARYING_SLOT_TESS_LEVEL_INNER)
429             inner_loc = i;
430       }
431 
432       for (unsigned i = 0; i < 6; i++) {
433          int loc = i < 4 ? outer_loc : inner_loc;
434          LLVMValueRef value = loc < 0 ? LLVMGetUndef(ctx->ac.f32) :
435             LLVMBuildLoad2(builder, ctx->ac.f32, ctx->abi.outputs[loc * 4 + i % 4], "");
436          value = ac_to_float(&ctx->ac, value);
437          ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, "");
438       }
439    } else {
440       ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
441    }
442    ctx->return_value = ret;
443 }
444 
si_llvm_ls_build_end(struct si_shader_context * ctx)445 void si_llvm_ls_build_end(struct si_shader_context *ctx)
446 {
447    struct si_shader *shader = ctx->shader;
448    bool same_thread_count = shader->key.ge.opt.same_patch_vertices;
449 
450    /* Only need return value when merged shader on part mode or mono mode with same thread count. */
451    if (ctx->screen->info.gfx_level < GFX9 || (shader->is_monolithic && !same_thread_count))
452       return;
453 
454    if (!ctx->shader->is_monolithic)
455       ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
456 
457    LLVMValueRef ret = ctx->return_value;
458 
459    ret = si_insert_input_ptr(ctx, ret, ctx->args->other_const_and_shader_buffers, 0);
460    ret = si_insert_input_ptr(ctx, ret, ctx->args->other_samplers_and_images, 1);
461    ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tess_offchip_offset, 2);
462    ret = si_insert_input_ret(ctx, ret, ctx->args->ac.merged_wave_info, 3);
463    ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tcs_factor_offset, 4);
464    if (ctx->screen->info.gfx_level <= GFX10_3)
465       ret = si_insert_input_ret(ctx, ret, ctx->args->ac.scratch_offset, 5);
466 
467    ret = si_insert_input_ptr(ctx, ret, ctx->args->internal_bindings, 8 + SI_SGPR_INTERNAL_BINDINGS);
468    ret = si_insert_input_ptr(ctx, ret, ctx->args->bindless_samplers_and_images,
469                              8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
470 
471    ret = si_insert_input_ret(ctx, ret, ctx->args->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
472 
473    ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
474    ret = si_insert_input_ret(ctx, ret, ctx->args->tes_offchip_addr, 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR);
475 
476    unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
477    ret = si_insert_input_ret_float(ctx, ret, ctx->args->ac.tcs_patch_id, vgpr++);
478    ret = si_insert_input_ret_float(ctx, ret, ctx->args->ac.tcs_rel_ids, vgpr++);
479 
480    if (same_thread_count) {
481       /* Same thread count is set only when mono mode. */
482       assert(shader->is_monolithic);
483 
484       struct si_shader_info *info = &shader->selector->info;
485       LLVMValueRef *addrs = ctx->abi.outputs;
486 
487       for (unsigned i = 0; i < info->num_outputs; i++) {
488          unsigned semantic = info->output_semantic[i];
489          int param = si_shader_io_get_unique_index(semantic);
490 
491          if (!(info->outputs_written_before_tes_gs & BITFIELD64_BIT(param)))
492             continue;
493 
494          for (unsigned chan = 0; chan < 4; chan++) {
495             if (!(info->output_usagemask[i] & (1 << chan)))
496                continue;
497 
498             LLVMValueRef value = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + chan], "");
499 
500             ret = LLVMBuildInsertValue(ctx->ac.builder, ret, value, vgpr + param * 4 + chan, "");
501          }
502       }
503    }
504 
505    ctx->return_value = ret;
506 }
507 
508 /**
509  * Compile the TCS epilog function. This writes tessellation factors to memory
510  * based on the output primitive type of the tessellator (determined by TES).
511  */
si_llvm_build_tcs_epilog(struct si_shader_context * ctx,union si_shader_part_key * key)512 void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key)
513 {
514    struct ac_arg rel_patch_id;
515    struct ac_arg invocation_id;
516    struct ac_arg tcs_out_current_patch_data_offset;
517    struct ac_arg tess_factors[6];
518    si_get_tcs_epilog_args(ctx->screen->info.gfx_level, ctx->args, &rel_patch_id, &invocation_id,
519                           &tcs_out_current_patch_data_offset, tess_factors);
520 
521    /* Create the function. */
522    si_llvm_create_func(ctx, "tcs_epilog", NULL, 0, ctx->screen->info.gfx_level >= GFX7 ? 128 : 0);
523    ac_declare_lds_as_pointer(&ctx->ac);
524 
525    LLVMValueRef invoc0_tess_factors[6];
526    for (unsigned i = 0; i < 6; i++)
527       invoc0_tess_factors[i] = ac_get_arg(&ctx->ac, tess_factors[i]);
528 
529    si_write_tess_factors(ctx, key, ac_get_arg(&ctx->ac, rel_patch_id),
530                          ac_get_arg(&ctx->ac, invocation_id),
531                          ac_get_arg(&ctx->ac, tcs_out_current_patch_data_offset),
532                          invoc0_tess_factors, invoc0_tess_factors + 4);
533 
534    LLVMBuildRetVoid(ctx->ac.builder);
535 }
536 
si_llvm_init_tcs_callbacks(struct si_shader_context * ctx)537 void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx)
538 {
539    ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
540 }
541