• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 
11 #include "radv_nir_to_llvm.h"
12 #include "nir/nir.h"
13 #include "radv_debug.h"
14 #include "radv_llvm_helper.h"
15 #include "radv_shader.h"
16 #include "radv_shader_args.h"
17 
18 #include "ac_binary.h"
19 #include "ac_llvm_build.h"
20 #include "ac_nir.h"
21 #include "ac_nir_to_llvm.h"
22 #include "ac_shader_abi.h"
23 #include "ac_shader_util.h"
24 #include "sid.h"
25 
26 struct radv_shader_context {
27    struct ac_llvm_context ac;
28    const struct nir_shader *shader;
29    struct ac_shader_abi abi;
30    const struct radv_nir_compiler_options *options;
31    const struct radv_shader_info *shader_info;
32    const struct radv_shader_args *args;
33 
34    gl_shader_stage stage;
35 
36    unsigned max_workgroup_size;
37    LLVMContextRef context;
38    struct ac_llvm_pointer main_function;
39 };
40 
41 static inline struct radv_shader_context *
radv_shader_context_from_abi(struct ac_shader_abi * abi)42 radv_shader_context_from_abi(struct ac_shader_abi *abi)
43 {
44    return container_of(abi, struct radv_shader_context, abi);
45 }
46 
47 static struct ac_llvm_pointer
create_llvm_function(struct ac_llvm_context * ctx,LLVMModuleRef module,LLVMBuilderRef builder,const struct ac_shader_args * args,enum ac_llvm_calling_convention convention,unsigned max_workgroup_size,const struct radv_nir_compiler_options * options)48 create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuilderRef builder,
49                      const struct ac_shader_args *args, enum ac_llvm_calling_convention convention,
50                      unsigned max_workgroup_size, const struct radv_nir_compiler_options *options)
51 {
52    struct ac_llvm_pointer main_function = ac_build_main(args, ctx, convention, "main", ctx->voidt, module);
53 
54    if (options->info->address32_hi) {
55       ac_llvm_add_target_dep_function_attr(main_function.value, "amdgpu-32bit-address-high-bits",
56                                            options->info->address32_hi);
57    }
58 
59    ac_llvm_set_workgroup_size(main_function.value, max_workgroup_size);
60    ac_llvm_set_target_features(main_function.value, ctx, true);
61 
62    return main_function;
63 }
64 
65 static enum ac_llvm_calling_convention
get_llvm_calling_convention(LLVMValueRef func,gl_shader_stage stage)66 get_llvm_calling_convention(LLVMValueRef func, gl_shader_stage stage)
67 {
68    switch (stage) {
69    case MESA_SHADER_VERTEX:
70    case MESA_SHADER_TESS_EVAL:
71       return AC_LLVM_AMDGPU_VS;
72       break;
73    case MESA_SHADER_GEOMETRY:
74       return AC_LLVM_AMDGPU_GS;
75       break;
76    case MESA_SHADER_TESS_CTRL:
77       return AC_LLVM_AMDGPU_HS;
78       break;
79    case MESA_SHADER_FRAGMENT:
80       return AC_LLVM_AMDGPU_PS;
81       break;
82    case MESA_SHADER_COMPUTE:
83       return AC_LLVM_AMDGPU_CS;
84       break;
85    default:
86       unreachable("Unhandle shader type");
87    }
88 }
89 
90 /* Returns whether the stage is a stage that can be directly before the GS */
91 static bool
is_pre_gs_stage(gl_shader_stage stage)92 is_pre_gs_stage(gl_shader_stage stage)
93 {
94    return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
95 }
96 
97 static void
create_function(struct radv_shader_context * ctx,gl_shader_stage stage,bool has_previous_stage)98 create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has_previous_stage)
99 {
100    if (ctx->ac.gfx_level >= GFX10) {
101       if (is_pre_gs_stage(stage) && ctx->shader_info->is_ngg) {
102          /* On GFX10+, VS and TES are merged into GS for NGG. */
103          stage = MESA_SHADER_GEOMETRY;
104          has_previous_stage = true;
105       }
106    }
107 
108    ctx->main_function = create_llvm_function(&ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac,
109                                              get_llvm_calling_convention(ctx->main_function.value, stage),
110                                              ctx->max_workgroup_size, ctx->options);
111 
112    if (stage == MESA_SHADER_TESS_CTRL || (stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.as_ls) ||
113        ctx->shader_info->is_ngg ||
114        /* GFX9 has the ESGS ring buffer in LDS. */
115        (stage == MESA_SHADER_GEOMETRY && has_previous_stage)) {
116       ac_declare_lds_as_pointer(&ctx->ac);
117    }
118 }
119 
120 static LLVMValueRef
radv_load_base_vertex(struct ac_shader_abi * abi,bool non_indexed_is_zero)121 radv_load_base_vertex(struct ac_shader_abi *abi, bool non_indexed_is_zero)
122 {
123    struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
124    return ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex);
125 }
126 
127 static LLVMValueRef
radv_load_rsrc(struct radv_shader_context * ctx,LLVMValueRef ptr,LLVMTypeRef type)128 radv_load_rsrc(struct radv_shader_context *ctx, LLVMValueRef ptr, LLVMTypeRef type)
129 {
130    if (ptr && LLVMTypeOf(ptr) == ctx->ac.i32) {
131       LLVMValueRef result;
132 
133       LLVMTypeRef ptr_type = LLVMPointerType(type, AC_ADDR_SPACE_CONST_32BIT);
134       ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, ptr_type, "");
135       LLVMSetMetadata(ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
136 
137       result = LLVMBuildLoad2(ctx->ac.builder, type, ptr, "");
138       LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
139 
140       return result;
141    }
142 
143    return ptr;
144 }
145 
146 static LLVMValueRef
radv_load_ubo(struct ac_shader_abi * abi,LLVMValueRef buffer_ptr)147 radv_load_ubo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr)
148 {
149    struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
150    return radv_load_rsrc(ctx, buffer_ptr, ctx->ac.v4i32);
151 }
152 
153 static LLVMValueRef
radv_load_ssbo(struct ac_shader_abi * abi,LLVMValueRef buffer_ptr,bool write,bool non_uniform)154 radv_load_ssbo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr, bool write, bool non_uniform)
155 {
156    struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
157    return radv_load_rsrc(ctx, buffer_ptr, ctx->ac.v4i32);
158 }
159 
160 static LLVMValueRef
radv_get_sampler_desc(struct ac_shader_abi * abi,LLVMValueRef index,enum ac_descriptor_type desc_type)161 radv_get_sampler_desc(struct ac_shader_abi *abi, LLVMValueRef index, enum ac_descriptor_type desc_type)
162 {
163    struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
164 
165    /* 3 plane formats always have same size and format for plane 1 & 2, so
166     * use the tail from plane 1 so that we can store only the first 16 bytes
167     * of the last plane. */
168    if (desc_type == AC_DESC_PLANE_2 && index && LLVMTypeOf(index) == ctx->ac.i32) {
169       LLVMValueRef plane1_addr = LLVMBuildSub(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 32, false), "");
170       LLVMValueRef descriptor1 = radv_load_rsrc(ctx, plane1_addr, ctx->ac.v8i32);
171       LLVMValueRef descriptor2 = radv_load_rsrc(ctx, index, ctx->ac.v4i32);
172 
173       LLVMValueRef components[8];
174       for (unsigned i = 0; i < 4; ++i)
175          components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i);
176 
177       for (unsigned i = 4; i < 8; ++i)
178          components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor1, i);
179       return ac_build_gather_values(&ctx->ac, components, 8);
180    }
181 
182    bool v4 = desc_type == AC_DESC_BUFFER || desc_type == AC_DESC_SAMPLER;
183    return radv_load_rsrc(ctx, index, v4 ? ctx->ac.v4i32 : ctx->ac.v8i32);
184 }
185 
186 static LLVMValueRef
radv_load_output(struct radv_shader_context * ctx,unsigned index,unsigned chan)187 radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan)
188 {
189    int idx = ac_llvm_reg_index_soa(index, chan);
190    LLVMValueRef output = ctx->abi.outputs[idx];
191    LLVMTypeRef type = ctx->abi.is_16bit[idx] ? ctx->ac.f16 : ctx->ac.f32;
192    return LLVMBuildLoad2(ctx->ac.builder, type, output, "");
193 }
194 
195 static void
ac_llvm_finalize_module(struct radv_shader_context * ctx,struct ac_midend_optimizer * meo)196 ac_llvm_finalize_module(struct radv_shader_context *ctx, struct ac_midend_optimizer *meo)
197 {
198    ac_llvm_optimize_module(meo, ctx->ac.module);
199    ac_llvm_context_dispose(&ctx->ac);
200 }
201 
202 /* Ensure that the esgs ring is declared.
203  *
204  * We declare it with 64KB alignment as a hint that the
205  * pointer value will always be 0.
206  */
207 static void
declare_esgs_ring(struct radv_shader_context * ctx)208 declare_esgs_ring(struct radv_shader_context *ctx)
209 {
210    assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
211 
212    LLVMValueRef esgs_ring =
213       LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0), "esgs_ring", AC_ADDR_SPACE_LDS);
214    LLVMSetLinkage(esgs_ring, LLVMExternalLinkage);
215    LLVMSetAlignment(esgs_ring, 64 * 1024);
216 }
217 
218 static LLVMModuleRef
ac_translate_nir_to_llvm(struct ac_llvm_compiler * ac_llvm,const struct radv_nir_compiler_options * options,const struct radv_shader_info * info,struct nir_shader * const * shaders,int shader_count,const struct radv_shader_args * args)219 ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, const struct radv_nir_compiler_options *options,
220                          const struct radv_shader_info *info, struct nir_shader *const *shaders, int shader_count,
221                          const struct radv_shader_args *args)
222 {
223    struct radv_shader_context ctx = {0};
224    ctx.args = args;
225    ctx.options = options;
226    ctx.shader_info = info;
227 
228    enum ac_float_mode float_mode = AC_FLOAT_MODE_DEFAULT;
229 
230    if (shaders[0]->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) {
231       float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO;
232    }
233 
234    bool exports_mrtz = false;
235    bool exports_color_null = false;
236    if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT) {
237       exports_mrtz = info->ps.writes_z || info->ps.writes_stencil || info->ps.writes_sample_mask;
238       exports_color_null = !exports_mrtz || (shaders[0]->info.outputs_written & (0xffu << FRAG_RESULT_DATA0));
239    }
240 
241    ac_llvm_context_init(&ctx.ac, ac_llvm, options->info, float_mode, info->wave_size, info->ballot_bit_size,
242                         exports_color_null, exports_mrtz);
243 
244    uint32_t length = 1;
245    for (uint32_t i = 0; i < shader_count; i++)
246       if (shaders[i]->info.name)
247          length += strlen(shaders[i]->info.name) + 1;
248 
249    char *name = malloc(length);
250    if (name) {
251       uint32_t offset = 0;
252       for (uint32_t i = 0; i < shader_count; i++) {
253          if (!shaders[i]->info.name)
254             continue;
255 
256          strcpy(name + offset, shaders[i]->info.name);
257          offset += strlen(shaders[i]->info.name);
258          if (i != shader_count - 1)
259             name[offset++] = ',';
260       }
261 
262       LLVMSetSourceFileName(ctx.ac.module, name, offset);
263    }
264 
265    ctx.context = ctx.ac.context;
266 
267    ctx.max_workgroup_size = info->workgroup_size;
268 
269    create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2);
270 
271    ctx.abi.load_ubo = radv_load_ubo;
272    ctx.abi.load_ssbo = radv_load_ssbo;
273    ctx.abi.load_sampler_desc = radv_get_sampler_desc;
274    ctx.abi.clamp_shadow_reference = false;
275    ctx.abi.robust_buffer_access = options->robust_buffer_access_llvm;
276    ctx.abi.load_grid_size_from_user_sgpr = args->load_grid_size_from_user_sgpr;
277 
278    bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg;
279    if (shader_count >= 2 || is_ngg)
280       ac_init_exec_full_mask(&ctx.ac);
281 
282    if (is_ngg) {
283       if (!info->is_ngg_passthrough)
284          declare_esgs_ring(&ctx);
285 
286       if (ctx.stage == MESA_SHADER_GEOMETRY) {
287          /* Scratch space used by NGG GS for repacking vertices at the end. */
288          LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, 8);
289          LLVMValueRef gs_ngg_scratch =
290             LLVMAddGlobalInAddressSpace(ctx.ac.module, ai32, "ngg_scratch", AC_ADDR_SPACE_LDS);
291          LLVMSetInitializer(gs_ngg_scratch, LLVMGetUndef(ai32));
292          LLVMSetLinkage(gs_ngg_scratch, LLVMExternalLinkage);
293          LLVMSetAlignment(gs_ngg_scratch, 4);
294 
295          /* Vertex emit space used by NGG GS for storing all vertex attributes. */
296          LLVMValueRef gs_ngg_emit =
297             LLVMAddGlobalInAddressSpace(ctx.ac.module, LLVMArrayType(ctx.ac.i32, 0), "ngg_emit", AC_ADDR_SPACE_LDS);
298          LLVMSetInitializer(gs_ngg_emit, LLVMGetUndef(ai32));
299          LLVMSetLinkage(gs_ngg_emit, LLVMExternalLinkage);
300          LLVMSetAlignment(gs_ngg_emit, 4);
301       }
302 
303       /* GFX10 hang workaround - there needs to be an s_barrier before gs_alloc_req always */
304       if (ctx.ac.gfx_level == GFX10 && shader_count == 1)
305          ac_build_s_barrier(&ctx.ac, shaders[0]->info.stage);
306    }
307 
308    for (int shader_idx = 0; shader_idx < shader_count; ++shader_idx) {
309       ctx.stage = shaders[shader_idx]->info.stage;
310       ctx.shader = shaders[shader_idx];
311 
312       if (shader_idx && !(shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && info->is_ngg)) {
313          /* Execute a barrier before the second shader in
314           * a merged shader.
315           *
316           * Execute the barrier inside the conditional block,
317           * so that empty waves can jump directly to s_endpgm,
318           * which will also signal the barrier.
319           *
320           * This is possible in gfx9, because an empty wave
321           * for the second shader does not participate in
322           * the epilogue. With NGG, empty waves may still
323           * be required to export data (e.g. GS output vertices),
324           * so we cannot let them exit early.
325           *
326           * If the shader is TCS and the TCS epilog is present
327           * and contains a barrier, it will wait there and then
328           * reach s_endpgm.
329           */
330          ac_build_waitcnt(&ctx.ac, AC_WAIT_DS);
331          ac_build_s_barrier(&ctx.ac, shaders[shader_idx]->info.stage);
332       }
333 
334       bool check_merged_wave_info = shader_count >= 2 && !(is_ngg && shader_idx == 1);
335       LLVMBasicBlockRef merge_block = NULL;
336 
337       if (check_merged_wave_info) {
338          LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
339          LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
340          merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
341 
342          LLVMValueRef count =
343             ac_unpack_param(&ctx.ac, ac_get_arg(&ctx.ac, args->ac.merged_wave_info), 8 * shader_idx, 8);
344          LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac);
345          LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT, thread_id, count, "");
346          LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block);
347 
348          LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block);
349       }
350 
351       if (!ac_nir_translate(&ctx.ac, &ctx.abi, &args->ac, shaders[shader_idx])) {
352          abort();
353       }
354 
355       if (check_merged_wave_info) {
356          LLVMBuildBr(ctx.ac.builder, merge_block);
357          LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block);
358       }
359    }
360 
361    LLVMBuildRetVoid(ctx.ac.builder);
362 
363    if (options->dump_preoptir) {
364       fprintf(stderr, "%s LLVM IR:\n\n", radv_get_shader_name(info, shaders[shader_count - 1]->info.stage));
365       ac_dump_module(ctx.ac.module);
366       fprintf(stderr, "\n");
367    }
368 
369    ac_llvm_finalize_module(&ctx, ac_llvm->meo);
370 
371    free(name);
372 
373    return ctx.ac.module;
374 }
375 
376 static void
ac_diagnostic_handler(LLVMDiagnosticInfoRef di,void * context)377 ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
378 {
379    unsigned *retval = (unsigned *)context;
380    LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
381    char *description = LLVMGetDiagInfoDescription(di);
382 
383    if (severity == LLVMDSError) {
384       *retval = 1;
385       fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n", description);
386    }
387 
388    LLVMDisposeMessage(description);
389 }
390 
391 static unsigned
radv_llvm_compile(LLVMModuleRef M,char ** pelf_buffer,size_t * pelf_size,struct ac_llvm_compiler * ac_llvm)392 radv_llvm_compile(LLVMModuleRef M, char **pelf_buffer, size_t *pelf_size, struct ac_llvm_compiler *ac_llvm)
393 {
394    unsigned retval = 0;
395    LLVMContextRef llvm_ctx;
396 
397    /* Setup Diagnostic Handler*/
398    llvm_ctx = LLVMGetModuleContext(M);
399 
400    LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler, &retval);
401 
402    /* Compile IR*/
403    if (!radv_compile_to_elf(ac_llvm, M, pelf_buffer, pelf_size))
404       retval = 1;
405    return retval;
406 }
407 
408 static void
ac_compile_llvm_module(struct ac_llvm_compiler * ac_llvm,LLVMModuleRef llvm_module,struct radv_shader_binary ** rbinary,const char * name,const struct radv_nir_compiler_options * options)409 ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_module, struct radv_shader_binary **rbinary,
410                        const char *name, const struct radv_nir_compiler_options *options)
411 {
412    char *elf_buffer = NULL;
413    size_t elf_size = 0;
414    char *llvm_ir_string = NULL;
415 
416    if (options->dump_shader) {
417       fprintf(stderr, "%s LLVM IR:\n\n", name);
418       ac_dump_module(llvm_module);
419       fprintf(stderr, "\n");
420    }
421 
422    if (options->record_ir) {
423       char *llvm_ir = LLVMPrintModuleToString(llvm_module);
424       llvm_ir_string = strdup(llvm_ir);
425       LLVMDisposeMessage(llvm_ir);
426    }
427 
428    int v = radv_llvm_compile(llvm_module, &elf_buffer, &elf_size, ac_llvm);
429    if (v) {
430       fprintf(stderr, "compile failed\n");
431    }
432 
433    LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
434    LLVMDisposeModule(llvm_module);
435    LLVMContextDispose(ctx);
436 
437    size_t llvm_ir_size = llvm_ir_string ? strlen(llvm_ir_string) : 0;
438    size_t alloc_size = sizeof(struct radv_shader_binary_rtld) + elf_size + llvm_ir_size + 1;
439    struct radv_shader_binary_rtld *rbin = calloc(1, alloc_size);
440    memcpy(rbin->data, elf_buffer, elf_size);
441    if (llvm_ir_string)
442       memcpy(rbin->data + elf_size, llvm_ir_string, llvm_ir_size + 1);
443 
444    rbin->base.type = RADV_BINARY_TYPE_RTLD;
445    rbin->base.total_size = alloc_size;
446    rbin->elf_size = elf_size;
447    rbin->llvm_ir_size = llvm_ir_size;
448    *rbinary = &rbin->base;
449 
450    free(llvm_ir_string);
451    free(elf_buffer);
452 }
453 
454 static void
radv_compile_nir_shader(struct ac_llvm_compiler * ac_llvm,const struct radv_nir_compiler_options * options,const struct radv_shader_info * info,struct radv_shader_binary ** rbinary,const struct radv_shader_args * args,struct nir_shader * const * nir,int nir_count)455 radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, const struct radv_nir_compiler_options *options,
456                         const struct radv_shader_info *info, struct radv_shader_binary **rbinary,
457                         const struct radv_shader_args *args, struct nir_shader *const *nir, int nir_count)
458 {
459 
460    LLVMModuleRef llvm_module;
461 
462    llvm_module = ac_translate_nir_to_llvm(ac_llvm, options, info, nir, nir_count, args);
463 
464    ac_compile_llvm_module(ac_llvm, llvm_module, rbinary, radv_get_shader_name(info, nir[nir_count - 1]->info.stage),
465                           options);
466 }
467 
468 void
llvm_compile_shader(const struct radv_nir_compiler_options * options,const struct radv_shader_info * info,unsigned shader_count,struct nir_shader * const * shaders,struct radv_shader_binary ** binary,const struct radv_shader_args * args)469 llvm_compile_shader(const struct radv_nir_compiler_options *options, const struct radv_shader_info *info,
470                     unsigned shader_count, struct nir_shader *const *shaders, struct radv_shader_binary **binary,
471                     const struct radv_shader_args *args)
472 {
473    enum ac_target_machine_options tm_options = 0;
474    struct ac_llvm_compiler ac_llvm;
475 
476    tm_options |= AC_TM_SUPPORTS_SPILL;
477    if (options->check_ir)
478       tm_options |= AC_TM_CHECK_IR;
479 
480    radv_init_llvm_compiler(&ac_llvm, options->info->family, tm_options, info->wave_size);
481 
482    radv_compile_nir_shader(&ac_llvm, options, info, binary, args, shaders, shader_count);
483 }
484