/* * Copyright 2014 Advanced Micro Devices, Inc. * * SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "llvm/CodeGen/SelectionDAGNodes.h" #include /* DO NOT REORDER THE HEADERS * The LLVM headers need to all be included before any Mesa header, * as they use the `restrict` keyword in ways that are incompatible * with our #define in include/c99_compat.h */ #include "ac_binary.h" #include "ac_llvm_util.h" #include "ac_llvm_build.h" #include "util/macros.h" using namespace llvm; class RunAtExitForStaticDestructors : public SDNode { public: /* getSDVTList (protected) calls getValueTypeList (private), which contains static variables. */ RunAtExitForStaticDestructors(): SDNode(0, 0, DebugLoc(), getSDVTList(MVT::Other)) { } }; void ac_llvm_run_atexit_for_destructors(void) { /* LLVM >= 16 registers static variable destructors on the first compile, which gcc * implements by calling atexit there. Before that, u_queue registers its atexit * handler to kill all threads. Since exit() runs atexit handlers in the reverse order, * the LLVM destructors are called first while shader compiler threads may still be * running, which crashes in LLVM in SelectionDAG.cpp. * * The solution is to run the code that declares the LLVM static variables first, * so that atexit for LLVM is registered first and u_queue is registered after that, * which ensures that all u_queue threads are terminated before LLVM destructors are * called. * * This just executes the code that declares static variables. */ RunAtExitForStaticDestructors(); } bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor) { TargetMachine *TM = reinterpret_cast(tm); return TM->getMCSubtargetInfo()->isCPUStringValid(processor); } void ac_reset_llvm_all_options_occurrences() { cl::ResetAllOptionOccurrences(); } void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes) { Argument *A = unwrap(val); A->addAttr(Attribute::getWithDereferenceableBytes(A->getContext(), bytes)); } void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes) { Argument *A = unwrap(val); A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(bytes))); } LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx) { TargetMachine *TM = reinterpret_cast(tm); LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx); unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple()); unwrap(module)->setDataLayout(TM->createDataLayout()); return module; } LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode) { LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx); FastMathFlags flags; switch (float_mode) { case AC_FLOAT_MODE_DEFAULT: case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO: break; case AC_FLOAT_MODE_DEFAULT_OPENGL: /* Allow optimizations to treat the sign of a zero argument or * result as insignificant. */ flags.setNoSignedZeros(); /* nsz */ /* Allow optimizations to use the reciprocal of an argument * rather than perform division. */ flags.setAllowReciprocal(); /* arcp */ unwrap(builder)->setFastMathFlags(flags); break; } return builder; } void ac_enable_signed_zeros(struct ac_llvm_context *ctx) { if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) { auto *b = unwrap(ctx->builder); FastMathFlags flags = b->getFastMathFlags(); /* This disables the optimization of (x + 0), which is used * to convert negative zero to positive zero. */ flags.setNoSignedZeros(false); b->setFastMathFlags(flags); } } void ac_disable_signed_zeros(struct ac_llvm_context *ctx) { if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) { auto *b = unwrap(ctx->builder); FastMathFlags flags = b->getFastMathFlags(); flags.setNoSignedZeros(); b->setFastMathFlags(flags); } } /* Implementation of raw_pwrite_stream that works on malloc()ed memory for * better compatibility with C code. */ struct raw_memory_ostream : public raw_pwrite_stream { char *buffer; size_t written; size_t bufsize; raw_memory_ostream() { buffer = NULL; written = 0; bufsize = 0; SetUnbuffered(); } ~raw_memory_ostream() { free(buffer); } void take(char *&out_buffer, size_t &out_size) { out_buffer = buffer; out_size = written; buffer = NULL; written = 0; bufsize = 0; } void flush() = delete; void write_impl(const char *ptr, size_t size) override { if (unlikely(written + size < written)) abort(); if (written + size > bufsize) { bufsize = MAX3(1024, written + size, bufsize / 3 * 4); buffer = (char *)realloc(buffer, bufsize); if (!buffer) { fprintf(stderr, "amd: out of memory allocating ELF buffer\n"); abort(); } } memcpy(buffer + written, ptr, size); written += size; } void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override { assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written); memcpy(buffer + offset, ptr, size); } uint64_t current_pos() const override { return written; } }; /* The middle-end optimization passes are run using * the LLVM's new pass manager infrastructure. */ struct ac_midend_optimizer { TargetMachine *target_machine; PassBuilder pass_builder; TargetLibraryInfoImpl target_library_info; /* Should be declared in this order only, * so that they are destroyed in the correct order * due to inter-analysis-manager references. */ LoopAnalysisManager loop_am; FunctionAnalysisManager function_am; CGSCCAnalysisManager cgscc_am; ModuleAnalysisManager module_am; /* Pass Managers */ LoopPassManager loop_pm; FunctionPassManager function_pm; ModulePassManager module_pm; ac_midend_optimizer(TargetMachine *arg_target_machine, bool arg_check_ir) : target_machine(arg_target_machine), pass_builder(target_machine, PipelineTuningOptions(), {}), target_library_info(Triple(target_machine->getTargetTriple())) { /* Build the pipeline and optimize. * Any custom analyses should be registered * before LLVM's default analysis sets. */ function_am.registerPass( [&] { return TargetLibraryAnalysis(target_library_info); } ); pass_builder.registerModuleAnalyses(module_am); pass_builder.registerCGSCCAnalyses(cgscc_am); pass_builder.registerFunctionAnalyses(function_am); pass_builder.registerLoopAnalyses(loop_am); pass_builder.crossRegisterProxies(loop_am, function_am, cgscc_am, module_am); if (arg_check_ir) module_pm.addPass(VerifierPass()); /* Adding inliner pass to the module pass manager directly * ensures that the pass is run on all functions first, which makes sure * that the following passes are only run on the remaining non-inline * function, so it removes useless work done on dead inline functions. */ module_pm.addPass(AlwaysInlinerPass()); /* The following set of passes run on an individual function/loop first * before proceeding to the next. */ #if LLVM_VERSION_MAJOR >= 16 function_pm.addPass(SROAPass(SROAOptions::ModifyCFG)); #else // Old version of the code function_pm.addPass(SROAPass()); #endif loop_pm.addPass(LICMPass(LICMOptions())); function_pm.addPass(createFunctionToLoopPassAdaptor(std::move(loop_pm), true)); function_pm.addPass(SimplifyCFGPass()); function_pm.addPass(EarlyCSEPass(true)); module_pm.addPass(createModuleToFunctionPassAdaptor(std::move(function_pm))); } void run(Module &module) { module_pm.run(module, module_am); /* After a run(), the results in the analyses managers * aren't useful to optimize a subsequent LLVM module. * If used, it can lead to unexpected crashes. * Hence, the results in the analyses managers * need to be invalidated and cleared before * running optimizations on a new LLVM module. */ module_am.invalidate(module, PreservedAnalyses::none()); module_am.clear(); cgscc_am.clear(); function_am.clear(); loop_am.clear(); } }; /* The backend passes for optimizations, instruction selection, * and code generation in the LLVM compiler still requires the * legacy::PassManager. The use of the legacy PM will be * deprecated when the new PM can handle backend passes. */ struct ac_backend_optimizer { raw_memory_ostream ostream; /* ELF shader binary stream */ legacy::PassManager backend_pass_manager; /* for codegen only */ ac_backend_optimizer(TargetMachine *arg_target_machine) { /* add backend passes */ if (arg_target_machine->addPassesToEmitFile(backend_pass_manager, ostream, nullptr, #if LLVM_VERSION_MAJOR >= 18 CodeGenFileType::ObjectFile)) { #else CGFT_ObjectFile)) { #endif fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n"); } } void run(Module &module, char *&out_buffer, size_t &out_size) { backend_pass_manager.run(module); ostream.take(out_buffer, out_size); } }; ac_midend_optimizer *ac_create_midend_optimizer(LLVMTargetMachineRef tm, bool check_ir) { TargetMachine *TM = reinterpret_cast(tm); return new ac_midend_optimizer(TM, check_ir); } void ac_destroy_midend_optimiser(ac_midend_optimizer *meo) { delete meo; } bool ac_llvm_optimize_module(ac_midend_optimizer *meo, LLVMModuleRef module) { if (!meo) return false; /* Runs all the middle-end optimizations, no code generation */ meo->run(*unwrap(module)); return true; } ac_backend_optimizer *ac_create_backend_optimizer(LLVMTargetMachineRef tm) { TargetMachine *TM = reinterpret_cast(tm); return new ac_backend_optimizer(TM); } void ac_destroy_backend_optimizer(ac_backend_optimizer *beo) { delete beo; } bool ac_compile_module_to_elf(ac_backend_optimizer *beo, LLVMModuleRef module, char **pelf_buffer, size_t *pelf_size) { if (!beo) return false; /* Runs all backend optimizations and code generation */ beo->run(*unwrap(module), *pelf_buffer, *pelf_size); return true; } LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op, LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope) { AtomicRMWInst::BinOp binop; switch (op) { case LLVMAtomicRMWBinOpXchg: binop = AtomicRMWInst::Xchg; break; case LLVMAtomicRMWBinOpAdd: binop = AtomicRMWInst::Add; break; case LLVMAtomicRMWBinOpSub: binop = AtomicRMWInst::Sub; break; case LLVMAtomicRMWBinOpAnd: binop = AtomicRMWInst::And; break; case LLVMAtomicRMWBinOpNand: binop = AtomicRMWInst::Nand; break; case LLVMAtomicRMWBinOpOr: binop = AtomicRMWInst::Or; break; case LLVMAtomicRMWBinOpXor: binop = AtomicRMWInst::Xor; break; case LLVMAtomicRMWBinOpMax: binop = AtomicRMWInst::Max; break; case LLVMAtomicRMWBinOpMin: binop = AtomicRMWInst::Min; break; case LLVMAtomicRMWBinOpUMax: binop = AtomicRMWInst::UMax; break; case LLVMAtomicRMWBinOpUMin: binop = AtomicRMWInst::UMin; break; case LLVMAtomicRMWBinOpFAdd: binop = AtomicRMWInst::FAdd; break; default: unreachable("invalid LLVMAtomicRMWBinOp"); break; } unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope); return wrap(unwrap(ctx->builder) ->CreateAtomicRMW(binop, unwrap(ptr), unwrap(val), MaybeAlign(0), AtomicOrdering::SequentiallyConsistent, SSID)); } LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr, LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope) { unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope); return wrap(unwrap(ctx->builder) ->CreateAtomicCmpXchg(unwrap(ptr), unwrap(cmp), unwrap(val), MaybeAlign(0), AtomicOrdering::SequentiallyConsistent, AtomicOrdering::SequentiallyConsistent, SSID)); }