• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <llvm-c/Core.h>
8 #include <llvm/Analysis/TargetLibraryInfo.h>
9 #include <llvm/IR/IRBuilder.h>
10 #include <llvm/IR/LegacyPassManager.h>
11 #include <llvm/IR/Verifier.h>
12 #include <llvm/Target/TargetMachine.h>
13 #include <llvm/MC/MCSubtargetInfo.h>
14 #include <llvm/Support/CommandLine.h>
15 #include <llvm/Transforms/IPO.h>
16 #include <llvm/Transforms/Scalar.h>
17 #include <llvm/Transforms/Utils.h>
18 #include <llvm/CodeGen/Passes.h>
19 #include <llvm/Transforms/IPO/AlwaysInliner.h>
20 #include <llvm/Transforms/InstCombine/InstCombine.h>
21 #include <llvm/Transforms/IPO/SCCP.h>
22 #include "llvm/CodeGen/SelectionDAGNodes.h"
23 
24 #include <cstring>
25 
26 /* DO NOT REORDER THE HEADERS
27  * The LLVM headers need to all be included before any Mesa header,
28  * as they use the `restrict` keyword in ways that are incompatible
29  * with our #define in include/c99_compat.h
30  */
31 
32 #include "ac_binary.h"
33 #include "ac_llvm_util.h"
34 #include "ac_llvm_build.h"
35 #include "util/macros.h"
36 
37 using namespace llvm;
38 
39 class RunAtExitForStaticDestructors : public SDNode
40 {
41 public:
42    /* getSDVTList (protected) calls getValueTypeList (private), which contains static variables. */
RunAtExitForStaticDestructors()43    RunAtExitForStaticDestructors(): SDNode(0, 0, DebugLoc(), getSDVTList(MVT::Other))
44    {
45    }
46 };
47 
ac_llvm_run_atexit_for_destructors(void)48 void ac_llvm_run_atexit_for_destructors(void)
49 {
50    /* LLVM >= 16 registers static variable destructors on the first compile, which gcc
51     * implements by calling atexit there. Before that, u_queue registers its atexit
52     * handler to kill all threads. Since exit() runs atexit handlers in the reverse order,
53     * the LLVM destructors are called first while shader compiler threads may still be
54     * running, which crashes in LLVM in SelectionDAG.cpp.
55     *
56     * The solution is to run the code that declares the LLVM static variables first,
57     * so that atexit for LLVM is registered first and u_queue is registered after that,
58     * which ensures that all u_queue threads are terminated before LLVM destructors are
59     * called.
60     *
61     * This just executes the code that declares static variables.
62     */
63    RunAtExitForStaticDestructors();
64 }
65 
ac_is_llvm_processor_supported(LLVMTargetMachineRef tm,const char * processor)66 bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor)
67 {
68    TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
69    return TM->getMCSubtargetInfo()->isCPUStringValid(processor);
70 }
71 
ac_reset_llvm_all_options_occurrences()72 void ac_reset_llvm_all_options_occurrences()
73 {
74    cl::ResetAllOptionOccurrences();
75 }
76 
ac_add_attr_dereferenceable(LLVMValueRef val,uint64_t bytes)77 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
78 {
79    Argument *A = unwrap<Argument>(val);
80    A->addAttr(Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
81 }
82 
ac_add_attr_alignment(LLVMValueRef val,uint64_t bytes)83 void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
84 {
85    Argument *A = unwrap<Argument>(val);
86    A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(bytes)));
87 }
88 
ac_is_sgpr_param(LLVMValueRef arg)89 bool ac_is_sgpr_param(LLVMValueRef arg)
90 {
91    Argument *A = unwrap<Argument>(arg);
92    AttributeList AS = A->getParent()->getAttributes();
93    unsigned ArgNo = A->getArgNo();
94    return AS.hasParamAttr(ArgNo, Attribute::InReg);
95 }
96 
ac_create_module(LLVMTargetMachineRef tm,LLVMContextRef ctx)97 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
98 {
99    TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
100    LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
101 
102    unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
103    unwrap(module)->setDataLayout(TM->createDataLayout());
104    return module;
105 }
106 
ac_create_builder(LLVMContextRef ctx,enum ac_float_mode float_mode)107 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
108 {
109    LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
110 
111    FastMathFlags flags;
112 
113    switch (float_mode) {
114    case AC_FLOAT_MODE_DEFAULT:
115    case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
116       break;
117 
118    case AC_FLOAT_MODE_DEFAULT_OPENGL:
119       /* Allow optimizations to treat the sign of a zero argument or
120        * result as insignificant.
121        */
122       flags.setNoSignedZeros(); /* nsz */
123 
124       /* Allow optimizations to use the reciprocal of an argument
125        * rather than perform division.
126        */
127       flags.setAllowReciprocal(); /* arcp */
128 
129       unwrap(builder)->setFastMathFlags(flags);
130       break;
131    }
132 
133    return builder;
134 }
135 
ac_enable_signed_zeros(struct ac_llvm_context * ctx)136 void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
137 {
138    if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
139       auto *b = unwrap(ctx->builder);
140       FastMathFlags flags = b->getFastMathFlags();
141 
142       /* This disables the optimization of (x + 0), which is used
143        * to convert negative zero to positive zero.
144        */
145       flags.setNoSignedZeros(false);
146       b->setFastMathFlags(flags);
147    }
148 }
149 
ac_disable_signed_zeros(struct ac_llvm_context * ctx)150 void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
151 {
152    if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
153       auto *b = unwrap(ctx->builder);
154       FastMathFlags flags = b->getFastMathFlags();
155 
156       flags.setNoSignedZeros();
157       b->setFastMathFlags(flags);
158    }
159 }
160 
ac_create_target_library_info(const char * triple)161 LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)
162 {
163    return reinterpret_cast<LLVMTargetLibraryInfoRef>(
164       new TargetLibraryInfoImpl(Triple(triple)));
165 }
166 
ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)167 void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
168 {
169    delete reinterpret_cast<TargetLibraryInfoImpl *>(library_info);
170 }
171 
172 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
173  * better compatibility with C code. */
174 struct raw_memory_ostream : public raw_pwrite_stream {
175    char *buffer;
176    size_t written;
177    size_t bufsize;
178 
raw_memory_ostreamraw_memory_ostream179    raw_memory_ostream()
180    {
181       buffer = NULL;
182       written = 0;
183       bufsize = 0;
184       SetUnbuffered();
185    }
186 
~raw_memory_ostreamraw_memory_ostream187    ~raw_memory_ostream()
188    {
189       free(buffer);
190    }
191 
clearraw_memory_ostream192    void clear()
193    {
194       written = 0;
195    }
196 
takeraw_memory_ostream197    void take(char *&out_buffer, size_t &out_size)
198    {
199       out_buffer = buffer;
200       out_size = written;
201       buffer = NULL;
202       written = 0;
203       bufsize = 0;
204    }
205 
206    void flush() = delete;
207 
write_implraw_memory_ostream208    void write_impl(const char *ptr, size_t size) override
209    {
210       if (unlikely(written + size < written))
211          abort();
212       if (written + size > bufsize) {
213          bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
214          buffer = (char *)realloc(buffer, bufsize);
215          if (!buffer) {
216             fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
217             abort();
218          }
219       }
220       memcpy(buffer + written, ptr, size);
221       written += size;
222    }
223 
pwrite_implraw_memory_ostream224    void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
225    {
226       assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
227       memcpy(buffer + offset, ptr, size);
228    }
229 
current_posraw_memory_ostream230    uint64_t current_pos() const override
231    {
232       return written;
233    }
234 };
235 
236 /* The LLVM compiler is represented as a pass manager containing passes for
237  * optimizations, instruction selection, and code generation.
238  */
239 struct ac_compiler_passes {
240    raw_memory_ostream ostream;        /* ELF shader binary stream */
241    legacy::PassManager passmgr; /* list of passes */
242 };
243 
ac_create_llvm_passes(LLVMTargetMachineRef tm)244 struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
245 {
246    struct ac_compiler_passes *p = new ac_compiler_passes();
247    if (!p)
248       return NULL;
249 
250    TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
251 
252    if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,
253 #if LLVM_VERSION_MAJOR >= 18
254                                CodeGenFileType::ObjectFile)) {
255 #else
256                                CGFT_ObjectFile)) {
257 #endif
258       fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
259       delete p;
260       return NULL;
261    }
262    return p;
263 }
264 
265 void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
266 {
267    delete p;
268 }
269 
270 /* This returns false on failure. */
271 bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
272                               char **pelf_buffer, size_t *pelf_size)
273 {
274    p->passmgr.run(*unwrap(module));
275    p->ostream.take(*pelf_buffer, *pelf_size);
276    return true;
277 }
278 
279 LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
280                                      bool check_ir)
281 {
282    LLVMPassManagerRef passmgr = LLVMCreatePassManager();
283    if (!passmgr)
284       return NULL;
285 
286    if (target_library_info)
287       LLVMAddTargetLibraryInfo(target_library_info, passmgr);
288 
289    if (check_ir)
290       unwrap(passmgr)->add(createVerifierPass());
291 
292    unwrap(passmgr)->add(createAlwaysInlinerLegacyPass());
293 
294    /* Normally, the pass manager runs all passes on one function before
295     * moving onto another. Adding a barrier no-op pass forces the pass
296     * manager to run the inliner on all functions first, which makes sure
297     * that the following passes are only run on the remaining non-inline
298     * function, so it removes useless work done on dead inline functions.
299     */
300    unwrap(passmgr)->add(createBarrierNoopPass());
301 
302    #if LLVM_VERSION_MAJOR >= 16
303    unwrap(passmgr)->add(createSROAPass(true));
304    #else
305    unwrap(passmgr)->add(createSROAPass());
306    #endif
307    /* TODO: restore IPSCCP */
308    unwrap(passmgr)->add(createLICMPass());
309    unwrap(passmgr)->add(createCFGSimplificationPass());
310    /* This is recommended by the instruction combining pass. */
311    unwrap(passmgr)->add(createEarlyCSEPass(true));
312    unwrap(passmgr)->add(createInstructionCombiningPass());
313    return passmgr;
314 }
315 
316 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
317                                  LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
318 {
319    AtomicRMWInst::BinOp binop;
320    switch (op) {
321    case LLVMAtomicRMWBinOpXchg:
322       binop = AtomicRMWInst::Xchg;
323       break;
324    case LLVMAtomicRMWBinOpAdd:
325       binop = AtomicRMWInst::Add;
326       break;
327    case LLVMAtomicRMWBinOpSub:
328       binop = AtomicRMWInst::Sub;
329       break;
330    case LLVMAtomicRMWBinOpAnd:
331       binop = AtomicRMWInst::And;
332       break;
333    case LLVMAtomicRMWBinOpNand:
334       binop = AtomicRMWInst::Nand;
335       break;
336    case LLVMAtomicRMWBinOpOr:
337       binop = AtomicRMWInst::Or;
338       break;
339    case LLVMAtomicRMWBinOpXor:
340       binop = AtomicRMWInst::Xor;
341       break;
342    case LLVMAtomicRMWBinOpMax:
343       binop = AtomicRMWInst::Max;
344       break;
345    case LLVMAtomicRMWBinOpMin:
346       binop = AtomicRMWInst::Min;
347       break;
348    case LLVMAtomicRMWBinOpUMax:
349       binop = AtomicRMWInst::UMax;
350       break;
351    case LLVMAtomicRMWBinOpUMin:
352       binop = AtomicRMWInst::UMin;
353       break;
354    case LLVMAtomicRMWBinOpFAdd:
355       binop = AtomicRMWInst::FAdd;
356       break;
357    default:
358       unreachable("invalid LLVMAtomicRMWBinOp");
359       break;
360    }
361    unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
362    return wrap(unwrap(ctx->builder)
363                         ->CreateAtomicRMW(binop, unwrap(ptr), unwrap(val),
364                                           MaybeAlign(0),
365                                           AtomicOrdering::SequentiallyConsistent, SSID));
366 }
367 
368 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
369                                       LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
370 {
371    unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
372    return wrap(unwrap(ctx->builder)
373                         ->CreateAtomicCmpXchg(unwrap(ptr), unwrap(cmp),
374                                               unwrap(val),
375                                               MaybeAlign(0),
376                                               AtomicOrdering::SequentiallyConsistent,
377                                               AtomicOrdering::SequentiallyConsistent, SSID));
378 }
379