1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25
26 #include <llvm-c/Core.h>
27 #include <llvm/Analysis/TargetLibraryInfo.h>
28 #include <llvm/IR/IRBuilder.h>
29 #include <llvm/IR/LegacyPassManager.h>
30 #include <llvm/Target/TargetMachine.h>
31 #include <llvm/Transforms/IPO.h>
32
33 #include <cstring>
34
35 /* DO NOT REORDER THE HEADERS
36 * The LLVM headers need to all be included before any Mesa header,
37 * as they use the `restrict` keyword in ways that are incompatible
38 * with our #define in include/c99_compat.h
39 */
40
41 #include "ac_binary.h"
42 #include "ac_llvm_util.h"
43 #include "ac_llvm_build.h"
44 #include "util/macros.h"
45
ac_add_attr_dereferenceable(LLVMValueRef val,uint64_t bytes)46 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
47 {
48 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
49 A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
50 }
51
ac_add_attr_alignment(LLVMValueRef val,uint64_t bytes)52 void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
53 {
54 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
55 A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
56 }
57
ac_is_sgpr_param(LLVMValueRef arg)58 bool ac_is_sgpr_param(LLVMValueRef arg)
59 {
60 llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
61 llvm::AttributeList AS = A->getParent()->getAttributes();
62 unsigned ArgNo = A->getArgNo();
63 return AS.hasParamAttr(ArgNo, llvm::Attribute::InReg);
64 }
65
ac_llvm_get_called_value(LLVMValueRef call)66 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
67 {
68 return LLVMGetCalledValue(call);
69 }
70
ac_llvm_is_function(LLVMValueRef v)71 bool ac_llvm_is_function(LLVMValueRef v)
72 {
73 return LLVMGetValueKind(v) == LLVMFunctionValueKind;
74 }
75
ac_create_module(LLVMTargetMachineRef tm,LLVMContextRef ctx)76 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
77 {
78 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
79 LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
80
81 llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
82 llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
83 return module;
84 }
85
ac_create_builder(LLVMContextRef ctx,enum ac_float_mode float_mode)86 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
87 {
88 LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
89
90 llvm::FastMathFlags flags;
91
92 switch (float_mode) {
93 case AC_FLOAT_MODE_DEFAULT:
94 case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
95 break;
96
97 case AC_FLOAT_MODE_DEFAULT_OPENGL:
98 /* Allow optimizations to treat the sign of a zero argument or
99 * result as insignificant.
100 */
101 flags.setNoSignedZeros(); /* nsz */
102
103 /* Allow optimizations to use the reciprocal of an argument
104 * rather than perform division.
105 */
106 flags.setAllowReciprocal(); /* arcp */
107
108 llvm::unwrap(builder)->setFastMathFlags(flags);
109 break;
110 }
111
112 return builder;
113 }
114
ac_enable_signed_zeros(struct ac_llvm_context * ctx)115 void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
116 {
117 if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
118 auto *b = llvm::unwrap(ctx->builder);
119 llvm::FastMathFlags flags = b->getFastMathFlags();
120
121 /* This disables the optimization of (x + 0), which is used
122 * to convert negative zero to positive zero.
123 */
124 flags.setNoSignedZeros(false);
125 b->setFastMathFlags(flags);
126 }
127 }
128
ac_disable_signed_zeros(struct ac_llvm_context * ctx)129 void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
130 {
131 if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
132 auto *b = llvm::unwrap(ctx->builder);
133 llvm::FastMathFlags flags = b->getFastMathFlags();
134
135 flags.setNoSignedZeros();
136 b->setFastMathFlags(flags);
137 }
138 }
139
ac_create_target_library_info(const char * triple)140 LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)
141 {
142 return reinterpret_cast<LLVMTargetLibraryInfoRef>(
143 new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
144 }
145
ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)146 void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
147 {
148 delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
149 }
150
151 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
152 * better compatibility with C code. */
153 struct raw_memory_ostream : public llvm::raw_pwrite_stream {
154 char *buffer;
155 size_t written;
156 size_t bufsize;
157
raw_memory_ostreamraw_memory_ostream158 raw_memory_ostream()
159 {
160 buffer = NULL;
161 written = 0;
162 bufsize = 0;
163 SetUnbuffered();
164 }
165
~raw_memory_ostreamraw_memory_ostream166 ~raw_memory_ostream()
167 {
168 free(buffer);
169 }
170
clearraw_memory_ostream171 void clear()
172 {
173 written = 0;
174 }
175
takeraw_memory_ostream176 void take(char *&out_buffer, size_t &out_size)
177 {
178 out_buffer = buffer;
179 out_size = written;
180 buffer = NULL;
181 written = 0;
182 bufsize = 0;
183 }
184
185 void flush() = delete;
186
write_implraw_memory_ostream187 void write_impl(const char *ptr, size_t size) override
188 {
189 if (unlikely(written + size < written))
190 abort();
191 if (written + size > bufsize) {
192 bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
193 buffer = (char *)realloc(buffer, bufsize);
194 if (!buffer) {
195 fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
196 abort();
197 }
198 }
199 memcpy(buffer + written, ptr, size);
200 written += size;
201 }
202
pwrite_implraw_memory_ostream203 void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
204 {
205 assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
206 memcpy(buffer + offset, ptr, size);
207 }
208
current_posraw_memory_ostream209 uint64_t current_pos() const override
210 {
211 return written;
212 }
213 };
214
215 /* The LLVM compiler is represented as a pass manager containing passes for
216 * optimizations, instruction selection, and code generation.
217 */
218 struct ac_compiler_passes {
219 raw_memory_ostream ostream; /* ELF shader binary stream */
220 llvm::legacy::PassManager passmgr; /* list of passes */
221 };
222
ac_create_llvm_passes(LLVMTargetMachineRef tm)223 struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
224 {
225 struct ac_compiler_passes *p = new ac_compiler_passes();
226 if (!p)
227 return NULL;
228
229 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
230
231 if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,
232 llvm::CGFT_ObjectFile)) {
233 fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
234 delete p;
235 return NULL;
236 }
237 return p;
238 }
239
ac_destroy_llvm_passes(struct ac_compiler_passes * p)240 void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
241 {
242 delete p;
243 }
244
245 /* This returns false on failure. */
ac_compile_module_to_elf(struct ac_compiler_passes * p,LLVMModuleRef module,char ** pelf_buffer,size_t * pelf_size)246 bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
247 char **pelf_buffer, size_t *pelf_size)
248 {
249 p->passmgr.run(*llvm::unwrap(module));
250 p->ostream.take(*pelf_buffer, *pelf_size);
251 return true;
252 }
253
ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)254 void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
255 {
256 llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
257 }
258
ac_enable_global_isel(LLVMTargetMachineRef tm)259 void ac_enable_global_isel(LLVMTargetMachineRef tm)
260 {
261 reinterpret_cast<llvm::TargetMachine *>(tm)->setGlobalISel(true);
262 }
263
ac_build_atomic_rmw(struct ac_llvm_context * ctx,LLVMAtomicRMWBinOp op,LLVMValueRef ptr,LLVMValueRef val,const char * sync_scope)264 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
265 LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
266 {
267 llvm::AtomicRMWInst::BinOp binop;
268 switch (op) {
269 case LLVMAtomicRMWBinOpXchg:
270 binop = llvm::AtomicRMWInst::Xchg;
271 break;
272 case LLVMAtomicRMWBinOpAdd:
273 binop = llvm::AtomicRMWInst::Add;
274 break;
275 case LLVMAtomicRMWBinOpSub:
276 binop = llvm::AtomicRMWInst::Sub;
277 break;
278 case LLVMAtomicRMWBinOpAnd:
279 binop = llvm::AtomicRMWInst::And;
280 break;
281 case LLVMAtomicRMWBinOpNand:
282 binop = llvm::AtomicRMWInst::Nand;
283 break;
284 case LLVMAtomicRMWBinOpOr:
285 binop = llvm::AtomicRMWInst::Or;
286 break;
287 case LLVMAtomicRMWBinOpXor:
288 binop = llvm::AtomicRMWInst::Xor;
289 break;
290 case LLVMAtomicRMWBinOpMax:
291 binop = llvm::AtomicRMWInst::Max;
292 break;
293 case LLVMAtomicRMWBinOpMin:
294 binop = llvm::AtomicRMWInst::Min;
295 break;
296 case LLVMAtomicRMWBinOpUMax:
297 binop = llvm::AtomicRMWInst::UMax;
298 break;
299 case LLVMAtomicRMWBinOpUMin:
300 binop = llvm::AtomicRMWInst::UMin;
301 break;
302 case LLVMAtomicRMWBinOpFAdd:
303 binop = llvm::AtomicRMWInst::FAdd;
304 break;
305 default:
306 unreachable("invalid LLVMAtomicRMWBinOp");
307 break;
308 }
309 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
310 return llvm::wrap(llvm::unwrap(ctx->builder)
311 ->CreateAtomicRMW(binop, llvm::unwrap(ptr), llvm::unwrap(val),
312 #if LLVM_VERSION_MAJOR >= 13
313 llvm::MaybeAlign(0),
314 #endif
315 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
316 }
317
ac_build_atomic_cmp_xchg(struct ac_llvm_context * ctx,LLVMValueRef ptr,LLVMValueRef cmp,LLVMValueRef val,const char * sync_scope)318 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
319 LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
320 {
321 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
322 return llvm::wrap(llvm::unwrap(ctx->builder)
323 ->CreateAtomicCmpXchg(llvm::unwrap(ptr), llvm::unwrap(cmp),
324 llvm::unwrap(val),
325 #if LLVM_VERSION_MAJOR >= 13
326 llvm::MaybeAlign(0),
327 #endif
328 llvm::AtomicOrdering::SequentiallyConsistent,
329 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
330 }
331