/* * Copyright © 2016 Red Hat. * Copyright © 2016 Bas Nieuwenhuizen * * based in part on anv driver which is: * Copyright © 2015 Intel Corporation * * SPDX-License-Identifier: MIT */ #include "radv_nir_to_llvm.h" #include "nir/nir.h" #include "radv_debug.h" #include "radv_llvm_helper.h" #include "radv_shader.h" #include "radv_shader_args.h" #include "ac_binary.h" #include "ac_llvm_build.h" #include "ac_nir.h" #include "ac_nir_to_llvm.h" #include "ac_shader_abi.h" #include "ac_shader_util.h" #include "sid.h" struct radv_shader_context { struct ac_llvm_context ac; const struct nir_shader *shader; struct ac_shader_abi abi; const struct radv_nir_compiler_options *options; const struct radv_shader_info *shader_info; const struct radv_shader_args *args; gl_shader_stage stage; unsigned max_workgroup_size; LLVMContextRef context; struct ac_llvm_pointer main_function; }; static inline struct radv_shader_context * radv_shader_context_from_abi(struct ac_shader_abi *abi) { return container_of(abi, struct radv_shader_context, abi); } static struct ac_llvm_pointer create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuilderRef builder, const struct ac_shader_args *args, enum ac_llvm_calling_convention convention, unsigned max_workgroup_size, const struct radv_nir_compiler_options *options) { struct ac_llvm_pointer main_function = ac_build_main(args, ctx, convention, "main", ctx->voidt, module); if (options->info->address32_hi) { ac_llvm_add_target_dep_function_attr(main_function.value, "amdgpu-32bit-address-high-bits", options->info->address32_hi); } ac_llvm_set_workgroup_size(main_function.value, max_workgroup_size); ac_llvm_set_target_features(main_function.value, ctx, true); return main_function; } static enum ac_llvm_calling_convention get_llvm_calling_convention(LLVMValueRef func, gl_shader_stage stage) { switch (stage) { case MESA_SHADER_VERTEX: case MESA_SHADER_TESS_EVAL: return AC_LLVM_AMDGPU_VS; break; case MESA_SHADER_GEOMETRY: return AC_LLVM_AMDGPU_GS; break; case MESA_SHADER_TESS_CTRL: return AC_LLVM_AMDGPU_HS; break; case MESA_SHADER_FRAGMENT: return AC_LLVM_AMDGPU_PS; break; case MESA_SHADER_COMPUTE: return AC_LLVM_AMDGPU_CS; break; default: unreachable("Unhandle shader type"); } } /* Returns whether the stage is a stage that can be directly before the GS */ static bool is_pre_gs_stage(gl_shader_stage stage) { return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL; } static void create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has_previous_stage) { if (ctx->ac.gfx_level >= GFX10) { if (is_pre_gs_stage(stage) && ctx->shader_info->is_ngg) { /* On GFX10+, VS and TES are merged into GS for NGG. */ stage = MESA_SHADER_GEOMETRY; has_previous_stage = true; } } ctx->main_function = create_llvm_function(&ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac, get_llvm_calling_convention(ctx->main_function.value, stage), ctx->max_workgroup_size, ctx->options); if (stage == MESA_SHADER_TESS_CTRL || (stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.as_ls) || ctx->shader_info->is_ngg || /* GFX9 has the ESGS ring buffer in LDS. */ (stage == MESA_SHADER_GEOMETRY && has_previous_stage)) { ac_declare_lds_as_pointer(&ctx->ac); } } static LLVMValueRef radv_load_base_vertex(struct ac_shader_abi *abi, bool non_indexed_is_zero) { struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); return ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex); } static LLVMValueRef radv_load_rsrc(struct radv_shader_context *ctx, LLVMValueRef ptr, LLVMTypeRef type) { if (ptr && LLVMTypeOf(ptr) == ctx->ac.i32) { LLVMValueRef result; LLVMTypeRef ptr_type = LLVMPointerType(type, AC_ADDR_SPACE_CONST_32BIT); ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, ptr_type, ""); LLVMSetMetadata(ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); result = LLVMBuildLoad2(ctx->ac.builder, type, ptr, ""); LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md); return result; } return ptr; } static LLVMValueRef radv_load_ubo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr) { struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); return radv_load_rsrc(ctx, buffer_ptr, ctx->ac.v4i32); } static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr, bool write, bool non_uniform) { struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); return radv_load_rsrc(ctx, buffer_ptr, ctx->ac.v4i32); } static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, LLVMValueRef index, enum ac_descriptor_type desc_type) { struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); /* 3 plane formats always have same size and format for plane 1 & 2, so * use the tail from plane 1 so that we can store only the first 16 bytes * of the last plane. */ if (desc_type == AC_DESC_PLANE_2 && index && LLVMTypeOf(index) == ctx->ac.i32) { LLVMValueRef plane1_addr = LLVMBuildSub(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 32, false), ""); LLVMValueRef descriptor1 = radv_load_rsrc(ctx, plane1_addr, ctx->ac.v8i32); LLVMValueRef descriptor2 = radv_load_rsrc(ctx, index, ctx->ac.v4i32); LLVMValueRef components[8]; for (unsigned i = 0; i < 4; ++i) components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i); for (unsigned i = 4; i < 8; ++i) components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor1, i); return ac_build_gather_values(&ctx->ac, components, 8); } bool v4 = desc_type == AC_DESC_BUFFER || desc_type == AC_DESC_SAMPLER; return radv_load_rsrc(ctx, index, v4 ? ctx->ac.v4i32 : ctx->ac.v8i32); } static LLVMValueRef radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan) { int idx = ac_llvm_reg_index_soa(index, chan); LLVMValueRef output = ctx->abi.outputs[idx]; LLVMTypeRef type = ctx->abi.is_16bit[idx] ? ctx->ac.f16 : ctx->ac.f32; return LLVMBuildLoad2(ctx->ac.builder, type, output, ""); } static void ac_llvm_finalize_module(struct radv_shader_context *ctx, struct ac_midend_optimizer *meo) { ac_llvm_optimize_module(meo, ctx->ac.module); ac_llvm_context_dispose(&ctx->ac); } /* Ensure that the esgs ring is declared. * * We declare it with 64KB alignment as a hint that the * pointer value will always be 0. */ static void declare_esgs_ring(struct radv_shader_context *ctx) { assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring")); LLVMValueRef esgs_ring = LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0), "esgs_ring", AC_ADDR_SPACE_LDS); LLVMSetLinkage(esgs_ring, LLVMExternalLinkage); LLVMSetAlignment(esgs_ring, 64 * 1024); } static LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, const struct radv_nir_compiler_options *options, const struct radv_shader_info *info, struct nir_shader *const *shaders, int shader_count, const struct radv_shader_args *args) { struct radv_shader_context ctx = {0}; ctx.args = args; ctx.options = options; ctx.shader_info = info; enum ac_float_mode float_mode = AC_FLOAT_MODE_DEFAULT; if (shaders[0]->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) { float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO; } bool exports_mrtz = false; bool exports_color_null = false; if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT) { exports_mrtz = info->ps.writes_z || info->ps.writes_stencil || info->ps.writes_sample_mask; exports_color_null = !exports_mrtz || (shaders[0]->info.outputs_written & (0xffu << FRAG_RESULT_DATA0)); } ac_llvm_context_init(&ctx.ac, ac_llvm, options->info, float_mode, info->wave_size, info->ballot_bit_size, exports_color_null, exports_mrtz); uint32_t length = 1; for (uint32_t i = 0; i < shader_count; i++) if (shaders[i]->info.name) length += strlen(shaders[i]->info.name) + 1; char *name = malloc(length); if (name) { uint32_t offset = 0; for (uint32_t i = 0; i < shader_count; i++) { if (!shaders[i]->info.name) continue; strcpy(name + offset, shaders[i]->info.name); offset += strlen(shaders[i]->info.name); if (i != shader_count - 1) name[offset++] = ','; } LLVMSetSourceFileName(ctx.ac.module, name, offset); } ctx.context = ctx.ac.context; ctx.max_workgroup_size = info->workgroup_size; create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2); ctx.abi.load_ubo = radv_load_ubo; ctx.abi.load_ssbo = radv_load_ssbo; ctx.abi.load_sampler_desc = radv_get_sampler_desc; ctx.abi.clamp_shadow_reference = false; ctx.abi.robust_buffer_access = options->robust_buffer_access_llvm; ctx.abi.load_grid_size_from_user_sgpr = args->load_grid_size_from_user_sgpr; bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg; if (shader_count >= 2 || is_ngg) ac_init_exec_full_mask(&ctx.ac); if (is_ngg) { if (!info->is_ngg_passthrough) declare_esgs_ring(&ctx); if (ctx.stage == MESA_SHADER_GEOMETRY) { /* Scratch space used by NGG GS for repacking vertices at the end. */ LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, 8); LLVMValueRef gs_ngg_scratch = LLVMAddGlobalInAddressSpace(ctx.ac.module, ai32, "ngg_scratch", AC_ADDR_SPACE_LDS); LLVMSetInitializer(gs_ngg_scratch, LLVMGetUndef(ai32)); LLVMSetLinkage(gs_ngg_scratch, LLVMExternalLinkage); LLVMSetAlignment(gs_ngg_scratch, 4); /* Vertex emit space used by NGG GS for storing all vertex attributes. */ LLVMValueRef gs_ngg_emit = LLVMAddGlobalInAddressSpace(ctx.ac.module, LLVMArrayType(ctx.ac.i32, 0), "ngg_emit", AC_ADDR_SPACE_LDS); LLVMSetInitializer(gs_ngg_emit, LLVMGetUndef(ai32)); LLVMSetLinkage(gs_ngg_emit, LLVMExternalLinkage); LLVMSetAlignment(gs_ngg_emit, 4); } /* GFX10 hang workaround - there needs to be an s_barrier before gs_alloc_req always */ if (ctx.ac.gfx_level == GFX10 && shader_count == 1) ac_build_s_barrier(&ctx.ac, shaders[0]->info.stage); } for (int shader_idx = 0; shader_idx < shader_count; ++shader_idx) { ctx.stage = shaders[shader_idx]->info.stage; ctx.shader = shaders[shader_idx]; if (shader_idx && !(shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && info->is_ngg)) { /* Execute a barrier before the second shader in * a merged shader. * * Execute the barrier inside the conditional block, * so that empty waves can jump directly to s_endpgm, * which will also signal the barrier. * * This is possible in gfx9, because an empty wave * for the second shader does not participate in * the epilogue. With NGG, empty waves may still * be required to export data (e.g. GS output vertices), * so we cannot let them exit early. * * If the shader is TCS and the TCS epilog is present * and contains a barrier, it will wait there and then * reach s_endpgm. */ ac_build_waitcnt(&ctx.ac, AC_WAIT_DS); ac_build_s_barrier(&ctx.ac, shaders[shader_idx]->info.stage); } bool check_merged_wave_info = shader_count >= 2 && !(is_ngg && shader_idx == 1); LLVMBasicBlockRef merge_block = NULL; if (check_merged_wave_info) { LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder)); LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, ""); merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, ""); LLVMValueRef count = ac_unpack_param(&ctx.ac, ac_get_arg(&ctx.ac, args->ac.merged_wave_info), 8 * shader_idx, 8); LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac); LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT, thread_id, count, ""); LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block); LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block); } if (!ac_nir_translate(&ctx.ac, &ctx.abi, &args->ac, shaders[shader_idx])) { abort(); } if (check_merged_wave_info) { LLVMBuildBr(ctx.ac.builder, merge_block); LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block); } } LLVMBuildRetVoid(ctx.ac.builder); if (options->dump_preoptir) { fprintf(stderr, "%s LLVM IR:\n\n", radv_get_shader_name(info, shaders[shader_count - 1]->info.stage)); ac_dump_module(ctx.ac.module); fprintf(stderr, "\n"); } ac_llvm_finalize_module(&ctx, ac_llvm->meo); free(name); return ctx.ac.module; } static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) { unsigned *retval = (unsigned *)context; LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); char *description = LLVMGetDiagInfoDescription(di); if (severity == LLVMDSError) { *retval = 1; fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n", description); } LLVMDisposeMessage(description); } static unsigned radv_llvm_compile(LLVMModuleRef M, char **pelf_buffer, size_t *pelf_size, struct ac_llvm_compiler *ac_llvm) { unsigned retval = 0; LLVMContextRef llvm_ctx; /* Setup Diagnostic Handler*/ llvm_ctx = LLVMGetModuleContext(M); LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler, &retval); /* Compile IR*/ if (!radv_compile_to_elf(ac_llvm, M, pelf_buffer, pelf_size)) retval = 1; return retval; } static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_module, struct radv_shader_binary **rbinary, const char *name, const struct radv_nir_compiler_options *options) { char *elf_buffer = NULL; size_t elf_size = 0; char *llvm_ir_string = NULL; if (options->dump_shader) { fprintf(stderr, "%s LLVM IR:\n\n", name); ac_dump_module(llvm_module); fprintf(stderr, "\n"); } if (options->record_ir) { char *llvm_ir = LLVMPrintModuleToString(llvm_module); llvm_ir_string = strdup(llvm_ir); LLVMDisposeMessage(llvm_ir); } int v = radv_llvm_compile(llvm_module, &elf_buffer, &elf_size, ac_llvm); if (v) { fprintf(stderr, "compile failed\n"); } LLVMContextRef ctx = LLVMGetModuleContext(llvm_module); LLVMDisposeModule(llvm_module); LLVMContextDispose(ctx); size_t llvm_ir_size = llvm_ir_string ? strlen(llvm_ir_string) : 0; size_t alloc_size = sizeof(struct radv_shader_binary_rtld) + elf_size + llvm_ir_size + 1; struct radv_shader_binary_rtld *rbin = calloc(1, alloc_size); memcpy(rbin->data, elf_buffer, elf_size); if (llvm_ir_string) memcpy(rbin->data + elf_size, llvm_ir_string, llvm_ir_size + 1); rbin->base.type = RADV_BINARY_TYPE_RTLD; rbin->base.total_size = alloc_size; rbin->elf_size = elf_size; rbin->llvm_ir_size = llvm_ir_size; *rbinary = &rbin->base; free(llvm_ir_string); free(elf_buffer); } static void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, const struct radv_nir_compiler_options *options, const struct radv_shader_info *info, struct radv_shader_binary **rbinary, const struct radv_shader_args *args, struct nir_shader *const *nir, int nir_count) { LLVMModuleRef llvm_module; llvm_module = ac_translate_nir_to_llvm(ac_llvm, options, info, nir, nir_count, args); ac_compile_llvm_module(ac_llvm, llvm_module, rbinary, radv_get_shader_name(info, nir[nir_count - 1]->info.stage), options); } void llvm_compile_shader(const struct radv_nir_compiler_options *options, const struct radv_shader_info *info, unsigned shader_count, struct nir_shader *const *shaders, struct radv_shader_binary **binary, const struct radv_shader_args *args) { enum ac_target_machine_options tm_options = 0; struct ac_llvm_compiler ac_llvm; tm_options |= AC_TM_SUPPORTS_SPILL; if (options->check_ir) tm_options |= AC_TM_CHECK_IR; radv_init_llvm_compiler(&ac_llvm, options->info->family, tm_options, info->wave_size); radv_compile_nir_shader(&ac_llvm, options, info, binary, args, shaders, shader_count); }