• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2020 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include "ac_nir.h"
26 #include "si_pipe.h"
27 #include "si_shader_internal.h"
28 #include "si_query.h"
29 #include "sid.h"
30 #include "util/u_memory.h"
31 
si_is_es_thread(struct si_shader_context * ctx)32 LLVMValueRef si_is_es_thread(struct si_shader_context *ctx)
33 {
34    /* Return true if the current thread should execute an ES thread. */
35    return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac),
36                         si_unpack_param(ctx, ctx->args.merged_wave_info, 0, 8), "");
37 }
38 
si_is_gs_thread(struct si_shader_context * ctx)39 LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx)
40 {
41    /* Return true if the current thread should execute a GS thread. */
42    return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac),
43                         si_unpack_param(ctx, ctx->args.merged_wave_info, 8, 8), "");
44 }
45 
46 /* Pass GS inputs from ES to GS on GFX9. */
si_set_es_return_value_for_gs(struct si_shader_context * ctx)47 static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
48 {
49    if (!ctx->shader->is_monolithic)
50       ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
51 
52    LLVMValueRef ret = ctx->return_value;
53 
54    ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
55    ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
56    if (ctx->shader->key.ge.as_ngg)
57       ret = si_insert_input_ptr(ctx, ret, ctx->args.gs_tg_info, 2);
58    else
59       ret = si_insert_input_ret(ctx, ret, ctx->args.gs2vs_offset, 2);
60    ret = si_insert_input_ret(ctx, ret, ctx->args.merged_wave_info, 3);
61    if (ctx->screen->info.gfx_level >= GFX11)
62       ret = si_insert_input_ret(ctx, ret, ctx->args.gs_attr_offset, 5);
63    else
64       ret = si_insert_input_ret(ctx, ret, ctx->args.scratch_offset, 5);
65    ret = si_insert_input_ptr(ctx, ret, ctx->internal_bindings, 8 + SI_SGPR_INTERNAL_BINDINGS);
66    ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images,
67                              8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
68    if (ctx->screen->use_ngg) {
69       ret = si_insert_input_ptr(ctx, ret, ctx->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
70       ret = si_insert_input_ptr(ctx, ret, ctx->small_prim_cull_info, 8 + GFX9_SGPR_SMALL_PRIM_CULL_INFO);
71       if (ctx->screen->info.gfx_level >= GFX11)
72          ret = si_insert_input_ptr(ctx, ret, ctx->gs_attr_address, 8 + GFX9_SGPR_ATTRIBUTE_RING_ADDR);
73    }
74 
75    unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
76 
77    ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_vtx_offset[0], vgpr++);
78    ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_vtx_offset[1], vgpr++);
79    ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_prim_id, vgpr++);
80    ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_invocation_id, vgpr++);
81    ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_vtx_offset[2], vgpr++);
82    ctx->return_value = ret;
83 }
84 
si_llvm_es_build_end(struct si_shader_context * ctx)85 void si_llvm_es_build_end(struct si_shader_context *ctx)
86 {
87    if (ctx->screen->info.gfx_level >= GFX9)
88       si_set_es_return_value_for_gs(ctx);
89 }
90 
si_get_gs_wave_id(struct si_shader_context * ctx)91 static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)
92 {
93    if (ctx->screen->info.gfx_level >= GFX9)
94       return si_unpack_param(ctx, ctx->args.merged_wave_info, 16, 8);
95    else
96       return ac_get_arg(&ctx->ac, ctx->args.gs_wave_id);
97 }
98 
ngg_get_emulated_counters_buf(struct si_shader_context * ctx)99 static LLVMValueRef ngg_get_emulated_counters_buf(struct si_shader_context *ctx)
100 {
101    LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->internal_bindings);
102 
103    return ac_build_load_to_sgpr(&ctx->ac, buf_ptr,
104                                 LLVMConstInt(ctx->ac.i32, SI_GS_QUERY_EMULATED_COUNTERS_BUF, false));
105 }
106 
si_llvm_gs_build_end(struct si_shader_context * ctx)107 void si_llvm_gs_build_end(struct si_shader_context *ctx)
108 {
109    struct si_shader_info UNUSED *info = &ctx->shader->selector->info;
110 
111    assert(info->num_outputs <= AC_LLVM_MAX_OUTPUTS);
112 
113    if (ctx->screen->info.gfx_level >= GFX10)
114       ac_build_waitcnt(&ctx->ac, AC_WAIT_VSTORE);
115 
116    if (ctx->screen->use_ngg) {
117       /* Implement PIPE_STAT_QUERY_GS_PRIMITIVES for non-ngg draws because we can't
118        * use pipeline statistics (they would be correct but when screen->use_ngg, we
119        * can't know when the query is started if the next draw(s) will use ngg or not).
120        */
121       LLVMValueRef tmp = GET_FIELD(ctx, GS_STATE_PIPELINE_STATS_EMU);
122       tmp = LLVMBuildTrunc(ctx->ac.builder, tmp, ctx->ac.i1, "");
123       ac_build_ifcc(&ctx->ac, tmp, 5229); /* if (GS_PIPELINE_STATS_EMU) */
124       {
125          LLVMValueRef prim = ctx->ac.i32_0;
126          switch (ctx->shader->selector->info.base.gs.output_primitive) {
127          case SHADER_PRIM_POINTS:
128             prim = ctx->gs_emitted_vertices;
129             break;
130          case SHADER_PRIM_LINE_STRIP:
131             prim = LLVMBuildSub(ctx->ac.builder, ctx->gs_emitted_vertices, ctx->ac.i32_1, "");
132             prim = ac_build_imax(&ctx->ac, prim, ctx->ac.i32_0);
133             break;
134          case SHADER_PRIM_TRIANGLE_STRIP:
135             prim = LLVMBuildSub(ctx->ac.builder, ctx->gs_emitted_vertices, LLVMConstInt(ctx->ac.i32, 2, 0), "");
136             prim = ac_build_imax(&ctx->ac, prim, ctx->ac.i32_0);
137             break;
138          }
139 
140          LLVMValueRef args[] = {
141             prim,
142             ngg_get_emulated_counters_buf(ctx),
143             LLVMConstInt(ctx->ac.i32,
144                          si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4,
145                          false),
146             ctx->ac.i32_0,                            /* soffset */
147             ctx->ac.i32_0,                            /* cachepolicy */
148          };
149          ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", ctx->ac.i32, args, 5, 0);
150 
151          args[0] = ctx->ac.i32_1;
152          args[2] = LLVMConstInt(ctx->ac.i32,
153                                 si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_INVOCATIONS) * 4,
154                                 false);
155          ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", ctx->ac.i32, args, 5, 0);
156       }
157       ac_build_endif(&ctx->ac, 5229);
158    }
159 
160    ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, si_get_gs_wave_id(ctx));
161 
162    if (ctx->screen->info.gfx_level >= GFX9)
163       ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
164 }
165 
166 /* Emit one vertex from the geometry shader */
si_llvm_emit_vertex(struct ac_shader_abi * abi,unsigned stream,LLVMValueRef * addrs)167 static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs)
168 {
169    struct si_shader_context *ctx = si_shader_context_from_abi(abi);
170 
171    if (ctx->shader->key.ge.as_ngg) {
172       gfx10_ngg_gs_emit_vertex(ctx, stream, addrs);
173       return;
174    }
175 
176    struct si_shader_info *info = &ctx->shader->selector->info;
177    struct si_shader *shader = ctx->shader;
178    LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->args.gs2vs_offset);
179    LLVMValueRef gs_next_vertex;
180    LLVMValueRef can_emit;
181    unsigned chan, offset;
182    int i;
183 
184    /* Write vertex attribute values to GSVS ring */
185    gs_next_vertex = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i32, ctx->gs_next_vertex[stream], "");
186 
187    /* If this thread has already emitted the declared maximum number of
188     * vertices, skip the write: excessive vertex emissions are not
189     * supposed to have any effect.
190     *
191     * If the shader has no writes to memory, kill it instead. This skips
192     * further memory loads and may allow LLVM to skip to the end
193     * altogether.
194     */
195    can_emit =
196       LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,
197                     LLVMConstInt(ctx->ac.i32, shader->selector->info.base.gs.vertices_out, 0), "");
198 
199    bool use_kill = !info->base.writes_memory;
200    if (use_kill) {
201       ac_build_kill_if_false(&ctx->ac, can_emit);
202    } else {
203       ac_build_ifcc(&ctx->ac, can_emit, 6505);
204    }
205 
206    offset = 0;
207    for (i = 0; i < info->num_outputs; i++) {
208       for (chan = 0; chan < 4; chan++) {
209          if (!(info->output_usagemask[i] & (1 << chan)) ||
210              ((info->output_streams[i] >> (2 * chan)) & 3) != stream)
211             continue;
212 
213          LLVMValueRef out_val = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + chan], "");
214          LLVMValueRef voffset =
215             LLVMConstInt(ctx->ac.i32, offset * shader->selector->info.base.gs.vertices_out, 0);
216          offset++;
217 
218          voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, "");
219          voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, 0), "");
220 
221          out_val = ac_to_integer(&ctx->ac, out_val);
222 
223          ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, NULL,
224                                      voffset, soffset, ac_glc | ac_slc | ac_swizzled);
225       }
226    }
227 
228    gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, ctx->ac.i32_1, "");
229    LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
230 
231    /* Signal vertex emission if vertex data was written. */
232    if (offset) {
233       ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
234                        si_get_gs_wave_id(ctx));
235 
236       ctx->gs_emitted_vertices = LLVMBuildAdd(ctx->ac.builder, ctx->gs_emitted_vertices,
237                                               ctx->ac.i32_1, "vert");
238    }
239 
240    if (!use_kill)
241       ac_build_endif(&ctx->ac, 6505);
242 }
243 
244 /* Cut one primitive from the geometry shader */
si_llvm_emit_primitive(struct ac_shader_abi * abi,unsigned stream)245 static void si_llvm_emit_primitive(struct ac_shader_abi *abi, unsigned stream)
246 {
247    struct si_shader_context *ctx = si_shader_context_from_abi(abi);
248 
249    if (ctx->shader->key.ge.as_ngg) {
250       LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]);
251       return;
252    }
253 
254    /* Signal primitive cut */
255    ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8),
256                     si_get_gs_wave_id(ctx));
257 }
258 
si_preload_esgs_ring(struct si_shader_context * ctx)259 void si_preload_esgs_ring(struct si_shader_context *ctx)
260 {
261    LLVMBuilderRef builder = ctx->ac.builder;
262 
263    if (ctx->screen->info.gfx_level <= GFX8) {
264       LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, SI_RING_ESGS, 0);
265       LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->internal_bindings);
266 
267       ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
268 
269       if (ctx->stage != MESA_SHADER_GEOMETRY) {
270          LLVMValueRef desc1 = LLVMBuildExtractElement(builder, ctx->esgs_ring, ctx->ac.i32_1, "");
271          LLVMValueRef desc3 = LLVMBuildExtractElement(builder, ctx->esgs_ring,
272                                                       LLVMConstInt(ctx->ac.i32, 3, 0), "");
273          desc1 = LLVMBuildOr(builder, desc1, LLVMConstInt(ctx->ac.i32,
274                                                           S_008F04_SWIZZLE_ENABLE_GFX6(1), 0), "");
275          desc3 = LLVMBuildOr(builder, desc3, LLVMConstInt(ctx->ac.i32,
276                                                           S_008F0C_ELEMENT_SIZE(1) |
277                                                           S_008F0C_INDEX_STRIDE(3) |
278                                                           S_008F0C_ADD_TID_ENABLE(1), 0), "");
279 
280          /* If MUBUF && ADD_TID_ENABLE, DATA_FORMAT means STRIDE[14:17] on gfx8-9, so set 0. */
281          if (ctx->screen->info.gfx_level == GFX8) {
282             desc3 = LLVMBuildAnd(builder, desc3,
283                                  LLVMConstInt(ctx->ac.i32, C_008F0C_DATA_FORMAT, 0), "");
284          }
285 
286          ctx->esgs_ring = LLVMBuildInsertElement(builder, ctx->esgs_ring, desc1, ctx->ac.i32_1, "");
287          ctx->esgs_ring = LLVMBuildInsertElement(builder, ctx->esgs_ring, desc3,
288                                                  LLVMConstInt(ctx->ac.i32, 3, 0), "");
289       }
290    } else {
291       if (USE_LDS_SYMBOLS) {
292          /* Declare the ESGS ring as an explicit LDS symbol. */
293          si_llvm_declare_esgs_ring(ctx);
294          ctx->ac.lds = ctx->esgs_ring;
295       } else {
296          ac_declare_lds_as_pointer(&ctx->ac);
297          ctx->esgs_ring = ctx->ac.lds;
298       }
299    }
300 }
301 
si_preload_gs_rings(struct si_shader_context * ctx)302 void si_preload_gs_rings(struct si_shader_context *ctx)
303 {
304    if (ctx->ac.gfx_level >= GFX11)
305       return;
306 
307    const struct si_shader_selector *sel = ctx->shader->selector;
308    LLVMBuilderRef builder = ctx->ac.builder;
309    LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, SI_RING_GSVS, 0);
310    LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->internal_bindings);
311    LLVMValueRef base_ring = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
312 
313    /* The conceptual layout of the GSVS ring is
314     *   v0c0 .. vLv0 v0c1 .. vLc1 ..
315     * but the real memory layout is swizzled across
316     * threads:
317     *   t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
318     *   t16v0c0 ..
319     * Override the buffer descriptor accordingly.
320     */
321    LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2);
322    uint64_t stream_offset = 0;
323 
324    for (unsigned stream = 0; stream < 4; ++stream) {
325       unsigned num_components;
326       unsigned stride;
327       unsigned num_records;
328       LLVMValueRef ring, tmp;
329 
330       num_components = sel->info.num_stream_output_components[stream];
331       if (!num_components)
332          continue;
333 
334       stride = 4 * num_components * sel->info.base.gs.vertices_out;
335 
336       /* Limit on the stride field for <= GFX7. */
337       assert(stride < (1 << 14));
338 
339       num_records = ctx->ac.wave_size;
340 
341       ring = LLVMBuildBitCast(builder, base_ring, v2i64, "");
342       tmp = LLVMBuildExtractElement(builder, ring, ctx->ac.i32_0, "");
343       tmp = LLVMBuildAdd(builder, tmp, LLVMConstInt(ctx->ac.i64, stream_offset, 0), "");
344       stream_offset += stride * ctx->ac.wave_size;
345 
346       ring = LLVMBuildInsertElement(builder, ring, tmp, ctx->ac.i32_0, "");
347       ring = LLVMBuildBitCast(builder, ring, ctx->ac.v4i32, "");
348       tmp = LLVMBuildExtractElement(builder, ring, ctx->ac.i32_1, "");
349       tmp = LLVMBuildOr(
350          builder, tmp,
351          LLVMConstInt(ctx->ac.i32, S_008F04_STRIDE(stride) | S_008F04_SWIZZLE_ENABLE_GFX6(1), 0), "");
352       ring = LLVMBuildInsertElement(builder, ring, tmp, ctx->ac.i32_1, "");
353       ring = LLVMBuildInsertElement(builder, ring, LLVMConstInt(ctx->ac.i32, num_records, 0),
354                                     LLVMConstInt(ctx->ac.i32, 2, 0), "");
355 
356       uint32_t rsrc3 =
357          S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
358          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
359          S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */
360          S_008F0C_ADD_TID_ENABLE(1);
361 
362       if (ctx->ac.gfx_level >= GFX10) {
363          rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
364                   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
365       } else {
366          /* If MUBUF && ADD_TID_ENABLE, DATA_FORMAT means STRIDE[14:17] on gfx8-9, so set 0. */
367          unsigned data_format = ctx->ac.gfx_level == GFX8 || ctx->ac.gfx_level == GFX9 ?
368                                    0 : V_008F0C_BUF_DATA_FORMAT_32;
369 
370          rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
371                   S_008F0C_DATA_FORMAT(data_format) |
372                   S_008F0C_ELEMENT_SIZE(1); /* element_size = 4 (bytes) */
373       }
374 
375       ring = LLVMBuildInsertElement(builder, ring, LLVMConstInt(ctx->ac.i32, rsrc3, false),
376                                     LLVMConstInt(ctx->ac.i32, 3, 0), "");
377 
378       ctx->gsvs_ring[stream] = ring;
379    }
380 }
381 
382 /* Generate code for the hardware VS shader stage to go with a geometry shader */
si_generate_gs_copy_shader(struct si_screen * sscreen,struct ac_llvm_compiler * compiler,struct si_shader_selector * gs_selector,const struct pipe_stream_output_info * so,struct util_debug_callback * debug)383 struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
384                                              struct ac_llvm_compiler *compiler,
385                                              struct si_shader_selector *gs_selector,
386                                              const struct pipe_stream_output_info *so,
387                                              struct util_debug_callback *debug)
388 {
389    struct si_shader_context ctx;
390    struct si_shader *shader;
391    LLVMBuilderRef builder;
392    struct si_shader_output_values outputs[SI_MAX_VS_OUTPUTS];
393    struct si_shader_info *gsinfo = &gs_selector->info;
394    int i;
395 
396    shader = CALLOC_STRUCT(si_shader);
397    if (!shader)
398       return NULL;
399 
400    /* We can leave the fence as permanently signaled because the GS copy
401     * shader only becomes visible globally after it has been compiled. */
402    util_queue_fence_init(&shader->ready);
403 
404    shader->selector = gs_selector;
405    shader->is_gs_copy_shader = true;
406    shader->wave_size = si_determine_wave_size(sscreen, shader);
407 
408    STATIC_ASSERT(sizeof(shader->info.vs_output_param_offset[0]) == 1);
409    memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,
410           sizeof(shader->info.vs_output_param_offset));
411 
412    for (unsigned i = 0; i < gsinfo->num_outputs; i++) {
413       unsigned semantic = gsinfo->output_semantic[i];
414 
415       /* Skip if no channel writes to stream 0. */
416       if (!nir_slot_is_varying(semantic) ||
417           (gsinfo->output_streams[i] & 0x03 &&
418            gsinfo->output_streams[i] & 0x0c &&
419            gsinfo->output_streams[i] & 0x30 &&
420            gsinfo->output_streams[i] & 0xc0))
421          continue;
422 
423       shader->info.vs_output_param_offset[semantic] = shader->info.nr_param_exports++;
424       shader->info.vs_output_param_mask |= BITFIELD64_BIT(i);
425    }
426 
427    si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
428    ctx.shader = shader;
429    ctx.stage = MESA_SHADER_VERTEX;
430    ctx.so = *so;
431 
432    builder = ctx.ac.builder;
433 
434    /* Build the main function. */
435    si_llvm_create_main_func(&ctx, false);
436 
437    LLVMValueRef buf_ptr = ac_get_arg(&ctx.ac, ctx.internal_bindings);
438    ctx.gsvs_ring[0] =
439       ac_build_load_to_sgpr(&ctx.ac, buf_ptr, LLVMConstInt(ctx.ac.i32, SI_RING_GSVS, 0));
440 
441    LLVMValueRef voffset =
442       LLVMBuildMul(ctx.ac.builder, ctx.abi.vertex_id, LLVMConstInt(ctx.ac.i32, 4, 0), "");
443 
444    /* Fetch the vertex stream ID.*/
445    LLVMValueRef stream_id;
446 
447    if (!sscreen->use_ngg_streamout && ctx.so.num_outputs)
448       stream_id = si_unpack_param(&ctx, ctx.args.streamout_config, 24, 2);
449    else
450       stream_id = ctx.ac.i32_0;
451 
452    /* Fill in output information. */
453    for (i = 0; i < gsinfo->num_outputs; ++i) {
454       outputs[i].semantic = gsinfo->output_semantic[i];
455       outputs[i].vertex_streams = gsinfo->output_streams[i];
456    }
457 
458    LLVMBasicBlockRef end_bb;
459    LLVMValueRef switch_inst;
460 
461    end_bb = LLVMAppendBasicBlockInContext(ctx.ac.context, ctx.main_fn, "end");
462    switch_inst = LLVMBuildSwitch(builder, stream_id, end_bb, 4);
463 
464    for (int stream = 0; stream < 4; stream++) {
465       LLVMBasicBlockRef bb;
466       unsigned offset;
467 
468       if (!gsinfo->num_stream_output_components[stream])
469          continue;
470 
471       if (stream > 0 && !ctx.so.num_outputs)
472          continue;
473 
474       bb = LLVMInsertBasicBlockInContext(ctx.ac.context, end_bb, "out");
475       LLVMAddCase(switch_inst, LLVMConstInt(ctx.ac.i32, stream, 0), bb);
476       LLVMPositionBuilderAtEnd(builder, bb);
477 
478       /* Fetch vertex data from GSVS ring */
479       offset = 0;
480       for (i = 0; i < gsinfo->num_outputs; ++i) {
481          for (unsigned chan = 0; chan < 4; chan++) {
482             if (!(gsinfo->output_usagemask[i] & (1 << chan)) ||
483                 ((outputs[i].vertex_streams >> (chan * 2)) & 0x3) != stream) {
484                outputs[i].values[chan] = LLVMGetUndef(ctx.ac.f32);
485                continue;
486             }
487 
488             LLVMValueRef soffset =
489                LLVMConstInt(ctx.ac.i32, offset * gs_selector->info.base.gs.vertices_out * 16 * 4, 0);
490             offset++;
491 
492             outputs[i].values[chan] =
493                ac_build_buffer_load(&ctx.ac, ctx.gsvs_ring[0], 1, ctx.ac.i32_0, voffset, soffset,
494                                     ctx.ac.f32, ac_glc | ac_slc, true, false);
495          }
496       }
497 
498       /* Streamout and exports. */
499       if (!sscreen->use_ngg_streamout && ctx.so.num_outputs) {
500          si_llvm_emit_streamout(&ctx, outputs, gsinfo->num_outputs, stream);
501       }
502 
503       if (stream == 0)
504          si_llvm_build_vs_exports(&ctx, NULL, outputs, gsinfo->num_outputs);
505 
506       LLVMBuildBr(builder, end_bb);
507    }
508 
509    LLVMPositionBuilderAtEnd(builder, end_bb);
510 
511    LLVMBuildRetVoid(ctx.ac.builder);
512 
513    ctx.stage = MESA_SHADER_GEOMETRY; /* override for shader dumping */
514    si_llvm_optimize_module(&ctx);
515 
516    bool ok = false;
517    if (si_compile_llvm(sscreen, &ctx.shader->binary, &ctx.shader->config, ctx.compiler, &ctx.ac,
518                        debug, MESA_SHADER_GEOMETRY, "GS Copy Shader", false)) {
519       assert(!ctx.shader->config.scratch_bytes_per_wave);
520       if (!ctx.shader->config.scratch_bytes_per_wave)
521          ok = si_shader_binary_upload(sscreen, ctx.shader, 0);
522 
523       if (si_can_dump_shader(sscreen, MESA_SHADER_GEOMETRY))
524          fprintf(stderr, "GS Copy Shader:\n");
525       si_shader_dump(sscreen, ctx.shader, debug, stderr, true);
526    }
527 
528    si_llvm_dispose(&ctx);
529 
530    if (!ok) {
531       FREE(shader);
532       shader = NULL;
533    } else {
534       si_fix_resource_usage(sscreen, shader);
535    }
536    return shader;
537 }
538 
si_llvm_init_gs_callbacks(struct si_shader_context * ctx)539 void si_llvm_init_gs_callbacks(struct si_shader_context *ctx)
540 {
541    ctx->abi.emit_vertex = si_llvm_emit_vertex;
542    ctx->abi.emit_primitive = si_llvm_emit_primitive;
543 }
544