1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25
26 #include <llvm-c/Core.h>
27 #include <llvm/Analysis/TargetLibraryInfo.h>
28 #include <llvm/IR/IRBuilder.h>
29 #include <llvm/IR/LegacyPassManager.h>
30 #include <llvm/Target/TargetMachine.h>
31 #include <llvm/MC/MCSubtargetInfo.h>
32 #include <llvm/Support/CommandLine.h>
33 #include <llvm/Transforms/IPO.h>
34
35 #include <cstring>
36
37 /* DO NOT REORDER THE HEADERS
38 * The LLVM headers need to all be included before any Mesa header,
39 * as they use the `restrict` keyword in ways that are incompatible
40 * with our #define in include/c99_compat.h
41 */
42
43 #include "ac_binary.h"
44 #include "ac_llvm_util.h"
45 #include "ac_llvm_build.h"
46 #include "util/macros.h"
47
ac_is_llvm_processor_supported(LLVMTargetMachineRef tm,const char * processor)48 bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor)
49 {
50 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
51 return TM->getMCSubtargetInfo()->isCPUStringValid(processor);
52 }
53
ac_reset_llvm_all_options_occurences()54 void ac_reset_llvm_all_options_occurences()
55 {
56 llvm::cl::ResetAllOptionOccurrences();
57 }
58
ac_add_attr_dereferenceable(LLVMValueRef val,uint64_t bytes)59 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
60 {
61 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
62 A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
63 }
64
ac_add_attr_alignment(LLVMValueRef val,uint64_t bytes)65 void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
66 {
67 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
68 A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
69 }
70
ac_is_sgpr_param(LLVMValueRef arg)71 bool ac_is_sgpr_param(LLVMValueRef arg)
72 {
73 llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
74 llvm::AttributeList AS = A->getParent()->getAttributes();
75 unsigned ArgNo = A->getArgNo();
76 return AS.hasParamAttr(ArgNo, llvm::Attribute::InReg);
77 }
78
ac_create_module(LLVMTargetMachineRef tm,LLVMContextRef ctx)79 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
80 {
81 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
82 LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
83
84 llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
85 llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
86 return module;
87 }
88
ac_create_builder(LLVMContextRef ctx,enum ac_float_mode float_mode)89 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
90 {
91 LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
92
93 llvm::FastMathFlags flags;
94
95 switch (float_mode) {
96 case AC_FLOAT_MODE_DEFAULT:
97 case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
98 break;
99
100 case AC_FLOAT_MODE_DEFAULT_OPENGL:
101 /* Allow optimizations to treat the sign of a zero argument or
102 * result as insignificant.
103 */
104 flags.setNoSignedZeros(); /* nsz */
105
106 /* Allow optimizations to use the reciprocal of an argument
107 * rather than perform division.
108 */
109 flags.setAllowReciprocal(); /* arcp */
110
111 llvm::unwrap(builder)->setFastMathFlags(flags);
112 break;
113 }
114
115 return builder;
116 }
117
ac_enable_signed_zeros(struct ac_llvm_context * ctx)118 void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
119 {
120 if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
121 auto *b = llvm::unwrap(ctx->builder);
122 llvm::FastMathFlags flags = b->getFastMathFlags();
123
124 /* This disables the optimization of (x + 0), which is used
125 * to convert negative zero to positive zero.
126 */
127 flags.setNoSignedZeros(false);
128 b->setFastMathFlags(flags);
129 }
130 }
131
ac_disable_signed_zeros(struct ac_llvm_context * ctx)132 void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
133 {
134 if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
135 auto *b = llvm::unwrap(ctx->builder);
136 llvm::FastMathFlags flags = b->getFastMathFlags();
137
138 flags.setNoSignedZeros();
139 b->setFastMathFlags(flags);
140 }
141 }
142
ac_create_target_library_info(const char * triple)143 LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)
144 {
145 return reinterpret_cast<LLVMTargetLibraryInfoRef>(
146 new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
147 }
148
ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)149 void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
150 {
151 delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
152 }
153
154 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
155 * better compatibility with C code. */
156 struct raw_memory_ostream : public llvm::raw_pwrite_stream {
157 char *buffer;
158 size_t written;
159 size_t bufsize;
160
raw_memory_ostreamraw_memory_ostream161 raw_memory_ostream()
162 {
163 buffer = NULL;
164 written = 0;
165 bufsize = 0;
166 SetUnbuffered();
167 }
168
~raw_memory_ostreamraw_memory_ostream169 ~raw_memory_ostream()
170 {
171 free(buffer);
172 }
173
clearraw_memory_ostream174 void clear()
175 {
176 written = 0;
177 }
178
takeraw_memory_ostream179 void take(char *&out_buffer, size_t &out_size)
180 {
181 out_buffer = buffer;
182 out_size = written;
183 buffer = NULL;
184 written = 0;
185 bufsize = 0;
186 }
187
188 void flush() = delete;
189
write_implraw_memory_ostream190 void write_impl(const char *ptr, size_t size) override
191 {
192 if (unlikely(written + size < written))
193 abort();
194 if (written + size > bufsize) {
195 bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
196 buffer = (char *)realloc(buffer, bufsize);
197 if (!buffer) {
198 fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
199 abort();
200 }
201 }
202 memcpy(buffer + written, ptr, size);
203 written += size;
204 }
205
pwrite_implraw_memory_ostream206 void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
207 {
208 assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
209 memcpy(buffer + offset, ptr, size);
210 }
211
current_posraw_memory_ostream212 uint64_t current_pos() const override
213 {
214 return written;
215 }
216 };
217
218 /* The LLVM compiler is represented as a pass manager containing passes for
219 * optimizations, instruction selection, and code generation.
220 */
221 struct ac_compiler_passes {
222 raw_memory_ostream ostream; /* ELF shader binary stream */
223 llvm::legacy::PassManager passmgr; /* list of passes */
224 };
225
ac_create_llvm_passes(LLVMTargetMachineRef tm)226 struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
227 {
228 struct ac_compiler_passes *p = new ac_compiler_passes();
229 if (!p)
230 return NULL;
231
232 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
233
234 if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,
235 llvm::CGFT_ObjectFile)) {
236 fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
237 delete p;
238 return NULL;
239 }
240 return p;
241 }
242
ac_destroy_llvm_passes(struct ac_compiler_passes * p)243 void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
244 {
245 delete p;
246 }
247
248 /* This returns false on failure. */
ac_compile_module_to_elf(struct ac_compiler_passes * p,LLVMModuleRef module,char ** pelf_buffer,size_t * pelf_size)249 bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
250 char **pelf_buffer, size_t *pelf_size)
251 {
252 p->passmgr.run(*llvm::unwrap(module));
253 p->ostream.take(*pelf_buffer, *pelf_size);
254 return true;
255 }
256
ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)257 void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
258 {
259 llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
260 }
261
ac_build_atomic_rmw(struct ac_llvm_context * ctx,LLVMAtomicRMWBinOp op,LLVMValueRef ptr,LLVMValueRef val,const char * sync_scope)262 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
263 LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
264 {
265 llvm::AtomicRMWInst::BinOp binop;
266 switch (op) {
267 case LLVMAtomicRMWBinOpXchg:
268 binop = llvm::AtomicRMWInst::Xchg;
269 break;
270 case LLVMAtomicRMWBinOpAdd:
271 binop = llvm::AtomicRMWInst::Add;
272 break;
273 case LLVMAtomicRMWBinOpSub:
274 binop = llvm::AtomicRMWInst::Sub;
275 break;
276 case LLVMAtomicRMWBinOpAnd:
277 binop = llvm::AtomicRMWInst::And;
278 break;
279 case LLVMAtomicRMWBinOpNand:
280 binop = llvm::AtomicRMWInst::Nand;
281 break;
282 case LLVMAtomicRMWBinOpOr:
283 binop = llvm::AtomicRMWInst::Or;
284 break;
285 case LLVMAtomicRMWBinOpXor:
286 binop = llvm::AtomicRMWInst::Xor;
287 break;
288 case LLVMAtomicRMWBinOpMax:
289 binop = llvm::AtomicRMWInst::Max;
290 break;
291 case LLVMAtomicRMWBinOpMin:
292 binop = llvm::AtomicRMWInst::Min;
293 break;
294 case LLVMAtomicRMWBinOpUMax:
295 binop = llvm::AtomicRMWInst::UMax;
296 break;
297 case LLVMAtomicRMWBinOpUMin:
298 binop = llvm::AtomicRMWInst::UMin;
299 break;
300 case LLVMAtomicRMWBinOpFAdd:
301 binop = llvm::AtomicRMWInst::FAdd;
302 break;
303 default:
304 unreachable("invalid LLVMAtomicRMWBinOp");
305 break;
306 }
307 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
308 return llvm::wrap(llvm::unwrap(ctx->builder)
309 ->CreateAtomicRMW(binop, llvm::unwrap(ptr), llvm::unwrap(val),
310 #if LLVM_VERSION_MAJOR >= 13
311 llvm::MaybeAlign(0),
312 #endif
313 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
314 }
315
ac_build_atomic_cmp_xchg(struct ac_llvm_context * ctx,LLVMValueRef ptr,LLVMValueRef cmp,LLVMValueRef val,const char * sync_scope)316 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
317 LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
318 {
319 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
320 return llvm::wrap(llvm::unwrap(ctx->builder)
321 ->CreateAtomicCmpXchg(llvm::unwrap(ptr), llvm::unwrap(cmp),
322 llvm::unwrap(val),
323 #if LLVM_VERSION_MAJOR >= 13
324 llvm::MaybeAlign(0),
325 #endif
326 llvm::AtomicOrdering::SequentiallyConsistent,
327 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
328 }
329