• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <llvm-c/Core.h>
8 #include <llvm/Analysis/TargetLibraryInfo.h>
9 #include <llvm/IR/IRBuilder.h>
10 #include <llvm/IR/LegacyPassManager.h>
11 #include <llvm/IR/Module.h>
12 #include <llvm/IR/Verifier.h>
13 #include <llvm/Target/TargetMachine.h>
14 #include <llvm/MC/MCSubtargetInfo.h>
15 #include <llvm/Support/CommandLine.h>
16 #include <llvm/Transforms/IPO.h>
17 #include <llvm/Transforms/Scalar.h>
18 #include <llvm/Transforms/Utils.h>
19 #include <llvm/CodeGen/Passes.h>
20 #include <llvm/Passes/PassBuilder.h>
21 #include <llvm/Transforms/InstCombine/InstCombine.h>
22 #include <llvm/Transforms/IPO/AlwaysInliner.h>
23 #include <llvm/Transforms/IPO/SCCP.h>
24 #include <llvm/Transforms/Scalar/EarlyCSE.h>
25 #include <llvm/Transforms/Scalar/LICM.h>
26 #include <llvm/Transforms/Scalar/SROA.h>
27 #include <llvm/Transforms/Scalar/SimplifyCFG.h>
28 #include "llvm/CodeGen/SelectionDAGNodes.h"
29 
30 #include <cstring>
31 
32 /* DO NOT REORDER THE HEADERS
33  * The LLVM headers need to all be included before any Mesa header,
34  * as they use the `restrict` keyword in ways that are incompatible
35  * with our #define in include/c99_compat.h
36  */
37 
38 #include "ac_binary.h"
39 #include "ac_llvm_util.h"
40 #include "ac_llvm_build.h"
41 #include "util/macros.h"
42 
43 using namespace llvm;
44 
45 class RunAtExitForStaticDestructors : public SDNode
46 {
47 public:
48    /* getSDVTList (protected) calls getValueTypeList (private), which contains static variables. */
RunAtExitForStaticDestructors()49    RunAtExitForStaticDestructors(): SDNode(0, 0, DebugLoc(), getSDVTList(MVT::Other))
50    {
51    }
52 };
53 
ac_llvm_run_atexit_for_destructors(void)54 void ac_llvm_run_atexit_for_destructors(void)
55 {
56    /* LLVM >= 16 registers static variable destructors on the first compile, which gcc
57     * implements by calling atexit there. Before that, u_queue registers its atexit
58     * handler to kill all threads. Since exit() runs atexit handlers in the reverse order,
59     * the LLVM destructors are called first while shader compiler threads may still be
60     * running, which crashes in LLVM in SelectionDAG.cpp.
61     *
62     * The solution is to run the code that declares the LLVM static variables first,
63     * so that atexit for LLVM is registered first and u_queue is registered after that,
64     * which ensures that all u_queue threads are terminated before LLVM destructors are
65     * called.
66     *
67     * This just executes the code that declares static variables.
68     */
69    RunAtExitForStaticDestructors();
70 }
71 
ac_is_llvm_processor_supported(LLVMTargetMachineRef tm,const char * processor)72 bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor)
73 {
74    TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
75    return TM->getMCSubtargetInfo()->isCPUStringValid(processor);
76 }
77 
ac_reset_llvm_all_options_occurrences()78 void ac_reset_llvm_all_options_occurrences()
79 {
80    cl::ResetAllOptionOccurrences();
81 }
82 
ac_add_attr_dereferenceable(LLVMValueRef val,uint64_t bytes)83 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
84 {
85    Argument *A = unwrap<Argument>(val);
86    A->addAttr(Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
87 }
88 
ac_add_attr_alignment(LLVMValueRef val,uint64_t bytes)89 void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
90 {
91    Argument *A = unwrap<Argument>(val);
92    A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(bytes)));
93 }
94 
ac_create_module(LLVMTargetMachineRef tm,LLVMContextRef ctx)95 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
96 {
97    TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
98    LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
99 
100    unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
101    unwrap(module)->setDataLayout(TM->createDataLayout());
102    return module;
103 }
104 
ac_create_builder(LLVMContextRef ctx,enum ac_float_mode float_mode)105 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
106 {
107    LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
108 
109    FastMathFlags flags;
110 
111    switch (float_mode) {
112    case AC_FLOAT_MODE_DEFAULT:
113    case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
114       break;
115 
116    case AC_FLOAT_MODE_DEFAULT_OPENGL:
117       /* Allow optimizations to treat the sign of a zero argument or
118        * result as insignificant.
119        */
120       flags.setNoSignedZeros(); /* nsz */
121 
122       /* Allow optimizations to use the reciprocal of an argument
123        * rather than perform division.
124        */
125       flags.setAllowReciprocal(); /* arcp */
126 
127       unwrap(builder)->setFastMathFlags(flags);
128       break;
129    }
130 
131    return builder;
132 }
133 
ac_enable_signed_zeros(struct ac_llvm_context * ctx)134 void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
135 {
136    if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
137       auto *b = unwrap(ctx->builder);
138       FastMathFlags flags = b->getFastMathFlags();
139 
140       /* This disables the optimization of (x + 0), which is used
141        * to convert negative zero to positive zero.
142        */
143       flags.setNoSignedZeros(false);
144       b->setFastMathFlags(flags);
145    }
146 }
147 
ac_disable_signed_zeros(struct ac_llvm_context * ctx)148 void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
149 {
150    if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
151       auto *b = unwrap(ctx->builder);
152       FastMathFlags flags = b->getFastMathFlags();
153 
154       flags.setNoSignedZeros();
155       b->setFastMathFlags(flags);
156    }
157 }
158 
159 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
160  * better compatibility with C code. */
161 struct raw_memory_ostream : public raw_pwrite_stream {
162    char *buffer;
163    size_t written;
164    size_t bufsize;
165 
raw_memory_ostreamraw_memory_ostream166    raw_memory_ostream()
167    {
168       buffer = NULL;
169       written = 0;
170       bufsize = 0;
171       SetUnbuffered();
172    }
173 
~raw_memory_ostreamraw_memory_ostream174    ~raw_memory_ostream()
175    {
176       free(buffer);
177    }
178 
takeraw_memory_ostream179    void take(char *&out_buffer, size_t &out_size)
180    {
181       out_buffer = buffer;
182       out_size = written;
183       buffer = NULL;
184       written = 0;
185       bufsize = 0;
186    }
187 
188    void flush() = delete;
189 
write_implraw_memory_ostream190    void write_impl(const char *ptr, size_t size) override
191    {
192       if (unlikely(written + size < written))
193          abort();
194       if (written + size > bufsize) {
195          bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
196          buffer = (char *)realloc(buffer, bufsize);
197          if (!buffer) {
198             fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
199             abort();
200          }
201       }
202       memcpy(buffer + written, ptr, size);
203       written += size;
204    }
205 
pwrite_implraw_memory_ostream206    void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
207    {
208       assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
209       memcpy(buffer + offset, ptr, size);
210    }
211 
current_posraw_memory_ostream212    uint64_t current_pos() const override
213    {
214       return written;
215    }
216 };
217 
218 /* The middle-end optimization passes are run using
219  * the LLVM's new pass manager infrastructure.
220  */
221 struct ac_midend_optimizer
222 {
223    TargetMachine *target_machine;
224    PassBuilder pass_builder;
225    TargetLibraryInfoImpl target_library_info;
226 
227    /* Should be declared in this order only,
228     * so that they are destroyed in the correct order
229     * due to inter-analysis-manager references.
230     */
231    LoopAnalysisManager loop_am;
232    FunctionAnalysisManager function_am;
233    CGSCCAnalysisManager cgscc_am;
234    ModuleAnalysisManager module_am;
235 
236    /* Pass Managers */
237    LoopPassManager loop_pm;
238    FunctionPassManager function_pm;
239    ModulePassManager module_pm;
240 
ac_midend_optimizerac_midend_optimizer241    ac_midend_optimizer(TargetMachine *arg_target_machine, bool arg_check_ir)
242       : target_machine(arg_target_machine),
243         pass_builder(target_machine, PipelineTuningOptions(), {}),
244         target_library_info(Triple(target_machine->getTargetTriple()))
245    {
246       /* Build the pipeline and optimize.
247        * Any custom analyses should be registered
248        * before LLVM's default analysis sets.
249        */
250       function_am.registerPass(
__anona8c9a3230102null251          [&] { return TargetLibraryAnalysis(target_library_info); }
252       );
253 
254       pass_builder.registerModuleAnalyses(module_am);
255       pass_builder.registerCGSCCAnalyses(cgscc_am);
256       pass_builder.registerFunctionAnalyses(function_am);
257       pass_builder.registerLoopAnalyses(loop_am);
258       pass_builder.crossRegisterProxies(loop_am, function_am, cgscc_am, module_am);
259 
260       if (arg_check_ir)
261          module_pm.addPass(VerifierPass());
262 
263       /* Adding inliner pass to the module pass manager directly
264        * ensures that the pass is run on all functions first, which makes sure
265        * that the following passes are only run on the remaining non-inline
266        * function, so it removes useless work done on dead inline functions.
267        */
268       module_pm.addPass(AlwaysInlinerPass());
269 
270       /* The following set of passes run on an individual function/loop first
271        * before proceeding to the next.
272        */
273 #if LLVM_VERSION_MAJOR >= 16
274       function_pm.addPass(SROAPass(SROAOptions::ModifyCFG));
275 #else
276       // Old version of the code
277       function_pm.addPass(SROAPass());
278 #endif
279 
280       loop_pm.addPass(LICMPass(LICMOptions()));
281       function_pm.addPass(createFunctionToLoopPassAdaptor(std::move(loop_pm), true));
282       function_pm.addPass(SimplifyCFGPass());
283       function_pm.addPass(EarlyCSEPass(true));
284 
285       module_pm.addPass(createModuleToFunctionPassAdaptor(std::move(function_pm)));
286    }
287 
runac_midend_optimizer288    void run(Module &module)
289    {
290       module_pm.run(module, module_am);
291 
292       /* After a run(), the results in the analyses managers
293        * aren't useful to optimize a subsequent LLVM module.
294        * If used, it can lead to unexpected crashes.
295        * Hence, the results in the analyses managers
296        * need to be invalidated and cleared before
297        * running optimizations on a new LLVM module.
298        */
299       module_am.invalidate(module, PreservedAnalyses::none());
300       module_am.clear();
301       cgscc_am.clear();
302       function_am.clear();
303       loop_am.clear();
304    }
305 };
306 
307 /* The backend passes for optimizations, instruction selection,
308  * and code generation in the LLVM compiler still requires the
309  * legacy::PassManager. The use of the legacy PM will be
310  * deprecated when the new PM can handle backend passes.
311  */
312 struct ac_backend_optimizer
313 {
314    raw_memory_ostream ostream; /* ELF shader binary stream */
315    legacy::PassManager backend_pass_manager; /* for codegen only */
316 
ac_backend_optimizerac_backend_optimizer317    ac_backend_optimizer(TargetMachine *arg_target_machine)
318    {
319       /* add backend passes */
320       if (arg_target_machine->addPassesToEmitFile(backend_pass_manager, ostream, nullptr,
321 #if LLVM_VERSION_MAJOR >= 18
322                                              CodeGenFileType::ObjectFile)) {
323 #else
324                                              CGFT_ObjectFile)) {
325 #endif
326          fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
327       }
328    }
329 
330    void run(Module &module, char *&out_buffer, size_t &out_size)
331    {
332       backend_pass_manager.run(module);
333       ostream.take(out_buffer, out_size);
334    }
335 };
336 
ac_create_midend_optimizer(LLVMTargetMachineRef tm,bool check_ir)337 ac_midend_optimizer *ac_create_midend_optimizer(LLVMTargetMachineRef tm,
338                                                 bool check_ir)
339 {
340    TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
341    return new ac_midend_optimizer(TM, check_ir);
342 }
343 
ac_destroy_midend_optimiser(ac_midend_optimizer * meo)344 void ac_destroy_midend_optimiser(ac_midend_optimizer *meo)
345 {
346    delete meo;
347 }
348 
ac_llvm_optimize_module(ac_midend_optimizer * meo,LLVMModuleRef module)349 bool ac_llvm_optimize_module(ac_midend_optimizer *meo, LLVMModuleRef module)
350 {
351    if (!meo)
352       return false;
353 
354    /* Runs all the middle-end optimizations, no code generation */
355    meo->run(*unwrap(module));
356    return true;
357 }
358 
ac_create_backend_optimizer(LLVMTargetMachineRef tm)359 ac_backend_optimizer *ac_create_backend_optimizer(LLVMTargetMachineRef tm)
360 {
361    TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
362    return new ac_backend_optimizer(TM);
363 }
364 
ac_destroy_backend_optimizer(ac_backend_optimizer * beo)365 void ac_destroy_backend_optimizer(ac_backend_optimizer *beo)
366 {
367    delete beo;
368 }
369 
ac_compile_module_to_elf(ac_backend_optimizer * beo,LLVMModuleRef module,char ** pelf_buffer,size_t * pelf_size)370 bool ac_compile_module_to_elf(ac_backend_optimizer *beo, LLVMModuleRef module,
371                               char **pelf_buffer, size_t *pelf_size)
372 {
373    if (!beo)
374       return false;
375 
376    /* Runs all backend optimizations and code generation */
377    beo->run(*unwrap(module), *pelf_buffer, *pelf_size);
378    return true;
379 }
380 
ac_build_atomic_rmw(struct ac_llvm_context * ctx,LLVMAtomicRMWBinOp op,LLVMValueRef ptr,LLVMValueRef val,const char * sync_scope)381 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
382                                  LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
383 {
384    AtomicRMWInst::BinOp binop;
385    switch (op) {
386    case LLVMAtomicRMWBinOpXchg:
387       binop = AtomicRMWInst::Xchg;
388       break;
389    case LLVMAtomicRMWBinOpAdd:
390       binop = AtomicRMWInst::Add;
391       break;
392    case LLVMAtomicRMWBinOpSub:
393       binop = AtomicRMWInst::Sub;
394       break;
395    case LLVMAtomicRMWBinOpAnd:
396       binop = AtomicRMWInst::And;
397       break;
398    case LLVMAtomicRMWBinOpNand:
399       binop = AtomicRMWInst::Nand;
400       break;
401    case LLVMAtomicRMWBinOpOr:
402       binop = AtomicRMWInst::Or;
403       break;
404    case LLVMAtomicRMWBinOpXor:
405       binop = AtomicRMWInst::Xor;
406       break;
407    case LLVMAtomicRMWBinOpMax:
408       binop = AtomicRMWInst::Max;
409       break;
410    case LLVMAtomicRMWBinOpMin:
411       binop = AtomicRMWInst::Min;
412       break;
413    case LLVMAtomicRMWBinOpUMax:
414       binop = AtomicRMWInst::UMax;
415       break;
416    case LLVMAtomicRMWBinOpUMin:
417       binop = AtomicRMWInst::UMin;
418       break;
419    case LLVMAtomicRMWBinOpFAdd:
420       binop = AtomicRMWInst::FAdd;
421       break;
422    default:
423       unreachable("invalid LLVMAtomicRMWBinOp");
424       break;
425    }
426    unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
427    return wrap(unwrap(ctx->builder)
428                         ->CreateAtomicRMW(binop, unwrap(ptr), unwrap(val),
429                                           MaybeAlign(0),
430                                           AtomicOrdering::SequentiallyConsistent, SSID));
431 }
432 
ac_build_atomic_cmp_xchg(struct ac_llvm_context * ctx,LLVMValueRef ptr,LLVMValueRef cmp,LLVMValueRef val,const char * sync_scope)433 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
434                                       LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
435 {
436    unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
437    return wrap(unwrap(ctx->builder)
438                         ->CreateAtomicCmpXchg(unwrap(ptr), unwrap(cmp),
439                                               unwrap(val),
440                                               MaybeAlign(0),
441                                               AtomicOrdering::SequentiallyConsistent,
442                                               AtomicOrdering::SequentiallyConsistent, SSID));
443 }
444