1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include <llvm-c/Core.h>
8 #include <llvm/Analysis/TargetLibraryInfo.h>
9 #include <llvm/IR/IRBuilder.h>
10 #include <llvm/IR/LegacyPassManager.h>
11 #include <llvm/IR/Verifier.h>
12 #include <llvm/Target/TargetMachine.h>
13 #include <llvm/MC/MCSubtargetInfo.h>
14 #include <llvm/Support/CommandLine.h>
15 #include <llvm/Transforms/IPO.h>
16 #include <llvm/Transforms/Scalar.h>
17 #include <llvm/Transforms/Utils.h>
18 #include <llvm/CodeGen/Passes.h>
19 #include <llvm/Transforms/IPO/AlwaysInliner.h>
20 #include <llvm/Transforms/InstCombine/InstCombine.h>
21 #include <llvm/Transforms/IPO/SCCP.h>
22 #include "llvm/CodeGen/SelectionDAGNodes.h"
23
24 #include <cstring>
25
26 /* DO NOT REORDER THE HEADERS
27 * The LLVM headers need to all be included before any Mesa header,
28 * as they use the `restrict` keyword in ways that are incompatible
29 * with our #define in include/c99_compat.h
30 */
31
32 #include "ac_binary.h"
33 #include "ac_llvm_util.h"
34 #include "ac_llvm_build.h"
35 #include "util/macros.h"
36
37 using namespace llvm;
38
39 class RunAtExitForStaticDestructors : public SDNode
40 {
41 public:
42 /* getSDVTList (protected) calls getValueTypeList (private), which contains static variables. */
RunAtExitForStaticDestructors()43 RunAtExitForStaticDestructors(): SDNode(0, 0, DebugLoc(), getSDVTList(MVT::Other))
44 {
45 }
46 };
47
ac_llvm_run_atexit_for_destructors(void)48 void ac_llvm_run_atexit_for_destructors(void)
49 {
50 /* LLVM >= 16 registers static variable destructors on the first compile, which gcc
51 * implements by calling atexit there. Before that, u_queue registers its atexit
52 * handler to kill all threads. Since exit() runs atexit handlers in the reverse order,
53 * the LLVM destructors are called first while shader compiler threads may still be
54 * running, which crashes in LLVM in SelectionDAG.cpp.
55 *
56 * The solution is to run the code that declares the LLVM static variables first,
57 * so that atexit for LLVM is registered first and u_queue is registered after that,
58 * which ensures that all u_queue threads are terminated before LLVM destructors are
59 * called.
60 *
61 * This just executes the code that declares static variables.
62 */
63 RunAtExitForStaticDestructors();
64 }
65
ac_is_llvm_processor_supported(LLVMTargetMachineRef tm,const char * processor)66 bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor)
67 {
68 TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
69 return TM->getMCSubtargetInfo()->isCPUStringValid(processor);
70 }
71
ac_reset_llvm_all_options_occurrences()72 void ac_reset_llvm_all_options_occurrences()
73 {
74 cl::ResetAllOptionOccurrences();
75 }
76
ac_add_attr_dereferenceable(LLVMValueRef val,uint64_t bytes)77 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
78 {
79 Argument *A = unwrap<Argument>(val);
80 A->addAttr(Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
81 }
82
ac_add_attr_alignment(LLVMValueRef val,uint64_t bytes)83 void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
84 {
85 Argument *A = unwrap<Argument>(val);
86 A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(bytes)));
87 }
88
ac_is_sgpr_param(LLVMValueRef arg)89 bool ac_is_sgpr_param(LLVMValueRef arg)
90 {
91 Argument *A = unwrap<Argument>(arg);
92 AttributeList AS = A->getParent()->getAttributes();
93 unsigned ArgNo = A->getArgNo();
94 return AS.hasParamAttr(ArgNo, Attribute::InReg);
95 }
96
ac_create_module(LLVMTargetMachineRef tm,LLVMContextRef ctx)97 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
98 {
99 TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
100 LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
101
102 unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
103 unwrap(module)->setDataLayout(TM->createDataLayout());
104 return module;
105 }
106
ac_create_builder(LLVMContextRef ctx,enum ac_float_mode float_mode)107 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
108 {
109 LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
110
111 FastMathFlags flags;
112
113 switch (float_mode) {
114 case AC_FLOAT_MODE_DEFAULT:
115 case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
116 break;
117
118 case AC_FLOAT_MODE_DEFAULT_OPENGL:
119 /* Allow optimizations to treat the sign of a zero argument or
120 * result as insignificant.
121 */
122 flags.setNoSignedZeros(); /* nsz */
123
124 /* Allow optimizations to use the reciprocal of an argument
125 * rather than perform division.
126 */
127 flags.setAllowReciprocal(); /* arcp */
128
129 unwrap(builder)->setFastMathFlags(flags);
130 break;
131 }
132
133 return builder;
134 }
135
ac_enable_signed_zeros(struct ac_llvm_context * ctx)136 void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
137 {
138 if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
139 auto *b = unwrap(ctx->builder);
140 FastMathFlags flags = b->getFastMathFlags();
141
142 /* This disables the optimization of (x + 0), which is used
143 * to convert negative zero to positive zero.
144 */
145 flags.setNoSignedZeros(false);
146 b->setFastMathFlags(flags);
147 }
148 }
149
ac_disable_signed_zeros(struct ac_llvm_context * ctx)150 void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
151 {
152 if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
153 auto *b = unwrap(ctx->builder);
154 FastMathFlags flags = b->getFastMathFlags();
155
156 flags.setNoSignedZeros();
157 b->setFastMathFlags(flags);
158 }
159 }
160
ac_create_target_library_info(const char * triple)161 LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)
162 {
163 return reinterpret_cast<LLVMTargetLibraryInfoRef>(
164 new TargetLibraryInfoImpl(Triple(triple)));
165 }
166
ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)167 void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
168 {
169 delete reinterpret_cast<TargetLibraryInfoImpl *>(library_info);
170 }
171
172 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
173 * better compatibility with C code. */
174 struct raw_memory_ostream : public raw_pwrite_stream {
175 char *buffer;
176 size_t written;
177 size_t bufsize;
178
raw_memory_ostreamraw_memory_ostream179 raw_memory_ostream()
180 {
181 buffer = NULL;
182 written = 0;
183 bufsize = 0;
184 SetUnbuffered();
185 }
186
~raw_memory_ostreamraw_memory_ostream187 ~raw_memory_ostream()
188 {
189 free(buffer);
190 }
191
clearraw_memory_ostream192 void clear()
193 {
194 written = 0;
195 }
196
takeraw_memory_ostream197 void take(char *&out_buffer, size_t &out_size)
198 {
199 out_buffer = buffer;
200 out_size = written;
201 buffer = NULL;
202 written = 0;
203 bufsize = 0;
204 }
205
206 void flush() = delete;
207
write_implraw_memory_ostream208 void write_impl(const char *ptr, size_t size) override
209 {
210 if (unlikely(written + size < written))
211 abort();
212 if (written + size > bufsize) {
213 bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
214 buffer = (char *)realloc(buffer, bufsize);
215 if (!buffer) {
216 fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
217 abort();
218 }
219 }
220 memcpy(buffer + written, ptr, size);
221 written += size;
222 }
223
pwrite_implraw_memory_ostream224 void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
225 {
226 assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
227 memcpy(buffer + offset, ptr, size);
228 }
229
current_posraw_memory_ostream230 uint64_t current_pos() const override
231 {
232 return written;
233 }
234 };
235
236 /* The LLVM compiler is represented as a pass manager containing passes for
237 * optimizations, instruction selection, and code generation.
238 */
239 struct ac_compiler_passes {
240 raw_memory_ostream ostream; /* ELF shader binary stream */
241 legacy::PassManager passmgr; /* list of passes */
242 };
243
ac_create_llvm_passes(LLVMTargetMachineRef tm)244 struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
245 {
246 struct ac_compiler_passes *p = new ac_compiler_passes();
247 if (!p)
248 return NULL;
249
250 TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
251
252 if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,
253 #if LLVM_VERSION_MAJOR >= 18
254 CodeGenFileType::ObjectFile)) {
255 #else
256 CGFT_ObjectFile)) {
257 #endif
258 fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
259 delete p;
260 return NULL;
261 }
262 return p;
263 }
264
265 void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
266 {
267 delete p;
268 }
269
270 /* This returns false on failure. */
271 bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
272 char **pelf_buffer, size_t *pelf_size)
273 {
274 p->passmgr.run(*unwrap(module));
275 p->ostream.take(*pelf_buffer, *pelf_size);
276 return true;
277 }
278
279 LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
280 bool check_ir)
281 {
282 LLVMPassManagerRef passmgr = LLVMCreatePassManager();
283 if (!passmgr)
284 return NULL;
285
286 if (target_library_info)
287 LLVMAddTargetLibraryInfo(target_library_info, passmgr);
288
289 if (check_ir)
290 unwrap(passmgr)->add(createVerifierPass());
291
292 unwrap(passmgr)->add(createAlwaysInlinerLegacyPass());
293
294 /* Normally, the pass manager runs all passes on one function before
295 * moving onto another. Adding a barrier no-op pass forces the pass
296 * manager to run the inliner on all functions first, which makes sure
297 * that the following passes are only run on the remaining non-inline
298 * function, so it removes useless work done on dead inline functions.
299 */
300 unwrap(passmgr)->add(createBarrierNoopPass());
301
302 #if LLVM_VERSION_MAJOR >= 16
303 unwrap(passmgr)->add(createSROAPass(true));
304 #else
305 unwrap(passmgr)->add(createSROAPass());
306 #endif
307 /* TODO: restore IPSCCP */
308 unwrap(passmgr)->add(createLICMPass());
309 unwrap(passmgr)->add(createCFGSimplificationPass());
310 /* This is recommended by the instruction combining pass. */
311 unwrap(passmgr)->add(createEarlyCSEPass(true));
312 unwrap(passmgr)->add(createInstructionCombiningPass());
313 return passmgr;
314 }
315
316 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
317 LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
318 {
319 AtomicRMWInst::BinOp binop;
320 switch (op) {
321 case LLVMAtomicRMWBinOpXchg:
322 binop = AtomicRMWInst::Xchg;
323 break;
324 case LLVMAtomicRMWBinOpAdd:
325 binop = AtomicRMWInst::Add;
326 break;
327 case LLVMAtomicRMWBinOpSub:
328 binop = AtomicRMWInst::Sub;
329 break;
330 case LLVMAtomicRMWBinOpAnd:
331 binop = AtomicRMWInst::And;
332 break;
333 case LLVMAtomicRMWBinOpNand:
334 binop = AtomicRMWInst::Nand;
335 break;
336 case LLVMAtomicRMWBinOpOr:
337 binop = AtomicRMWInst::Or;
338 break;
339 case LLVMAtomicRMWBinOpXor:
340 binop = AtomicRMWInst::Xor;
341 break;
342 case LLVMAtomicRMWBinOpMax:
343 binop = AtomicRMWInst::Max;
344 break;
345 case LLVMAtomicRMWBinOpMin:
346 binop = AtomicRMWInst::Min;
347 break;
348 case LLVMAtomicRMWBinOpUMax:
349 binop = AtomicRMWInst::UMax;
350 break;
351 case LLVMAtomicRMWBinOpUMin:
352 binop = AtomicRMWInst::UMin;
353 break;
354 case LLVMAtomicRMWBinOpFAdd:
355 binop = AtomicRMWInst::FAdd;
356 break;
357 default:
358 unreachable("invalid LLVMAtomicRMWBinOp");
359 break;
360 }
361 unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
362 return wrap(unwrap(ctx->builder)
363 ->CreateAtomicRMW(binop, unwrap(ptr), unwrap(val),
364 MaybeAlign(0),
365 AtomicOrdering::SequentiallyConsistent, SSID));
366 }
367
368 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
369 LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
370 {
371 unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
372 return wrap(unwrap(ctx->builder)
373 ->CreateAtomicCmpXchg(unwrap(ptr), unwrap(cmp),
374 unwrap(val),
375 MaybeAlign(0),
376 AtomicOrdering::SequentiallyConsistent,
377 AtomicOrdering::SequentiallyConsistent, SSID));
378 }
379