1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25
26 #include <llvm-c/Core.h>
27 #include <llvm/Analysis/TargetLibraryInfo.h>
28 #include <llvm/IR/IRBuilder.h>
29 #include <llvm/IR/LegacyPassManager.h>
30 #include <llvm/Target/TargetMachine.h>
31 #include <llvm/Transforms/IPO.h>
32
33 #include <cstring>
34
35 /* DO NOT REORDER THE HEADERS
36 * The LLVM headers need to all be included before any Mesa header,
37 * as they use the `restrict` keyword in ways that are incompatible
38 * with our #define in include/c99_compat.h
39 */
40
41 #include "ac_binary.h"
42 #include "ac_llvm_util.h"
43 #include "ac_llvm_build.h"
44 #include "util/macros.h"
45
ac_add_attr_dereferenceable(LLVMValueRef val,uint64_t bytes)46 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
47 {
48 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
49 A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
50 }
51
ac_add_attr_alignment(LLVMValueRef val,uint64_t bytes)52 void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
53 {
54 #if LLVM_VERSION_MAJOR >= 10
55 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
56 A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
57 #else
58 /* Avoid unused parameter warnings. */
59 (void)val;
60 (void)bytes;
61 #endif
62 }
63
ac_is_sgpr_param(LLVMValueRef arg)64 bool ac_is_sgpr_param(LLVMValueRef arg)
65 {
66 llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
67 llvm::AttributeList AS = A->getParent()->getAttributes();
68 unsigned ArgNo = A->getArgNo();
69 return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
70 }
71
ac_llvm_get_called_value(LLVMValueRef call)72 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
73 {
74 return LLVMGetCalledValue(call);
75 }
76
ac_llvm_is_function(LLVMValueRef v)77 bool ac_llvm_is_function(LLVMValueRef v)
78 {
79 return LLVMGetValueKind(v) == LLVMFunctionValueKind;
80 }
81
ac_create_module(LLVMTargetMachineRef tm,LLVMContextRef ctx)82 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
83 {
84 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
85 LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
86
87 llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
88 llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
89 return module;
90 }
91
ac_create_builder(LLVMContextRef ctx,enum ac_float_mode float_mode)92 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
93 {
94 LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
95
96 llvm::FastMathFlags flags;
97
98 switch (float_mode) {
99 case AC_FLOAT_MODE_DEFAULT:
100 case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
101 break;
102
103 case AC_FLOAT_MODE_DEFAULT_OPENGL:
104 /* Allow optimizations to treat the sign of a zero argument or
105 * result as insignificant.
106 */
107 flags.setNoSignedZeros(); /* nsz */
108
109 /* Allow optimizations to use the reciprocal of an argument
110 * rather than perform division.
111 */
112 flags.setAllowReciprocal(); /* arcp */
113
114 llvm::unwrap(builder)->setFastMathFlags(flags);
115 break;
116 }
117
118 return builder;
119 }
120
ac_enable_signed_zeros(struct ac_llvm_context * ctx)121 void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
122 {
123 if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
124 auto *b = llvm::unwrap(ctx->builder);
125 llvm::FastMathFlags flags = b->getFastMathFlags();
126
127 /* This disables the optimization of (x + 0), which is used
128 * to convert negative zero to positive zero.
129 */
130 flags.setNoSignedZeros(false);
131 b->setFastMathFlags(flags);
132 }
133 }
134
ac_disable_signed_zeros(struct ac_llvm_context * ctx)135 void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
136 {
137 if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
138 auto *b = llvm::unwrap(ctx->builder);
139 llvm::FastMathFlags flags = b->getFastMathFlags();
140
141 flags.setNoSignedZeros();
142 b->setFastMathFlags(flags);
143 }
144 }
145
ac_create_target_library_info(const char * triple)146 LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)
147 {
148 return reinterpret_cast<LLVMTargetLibraryInfoRef>(
149 new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
150 }
151
ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)152 void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
153 {
154 delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
155 }
156
157 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
158 * better compatibility with C code. */
159 struct raw_memory_ostream : public llvm::raw_pwrite_stream {
160 char *buffer;
161 size_t written;
162 size_t bufsize;
163
raw_memory_ostreamraw_memory_ostream164 raw_memory_ostream()
165 {
166 buffer = NULL;
167 written = 0;
168 bufsize = 0;
169 SetUnbuffered();
170 }
171
~raw_memory_ostreamraw_memory_ostream172 ~raw_memory_ostream()
173 {
174 free(buffer);
175 }
176
clearraw_memory_ostream177 void clear()
178 {
179 written = 0;
180 }
181
takeraw_memory_ostream182 void take(char *&out_buffer, size_t &out_size)
183 {
184 out_buffer = buffer;
185 out_size = written;
186 buffer = NULL;
187 written = 0;
188 bufsize = 0;
189 }
190
191 void flush() = delete;
192
write_implraw_memory_ostream193 void write_impl(const char *ptr, size_t size) override
194 {
195 if (unlikely(written + size < written))
196 abort();
197 if (written + size > bufsize) {
198 bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
199 buffer = (char *)realloc(buffer, bufsize);
200 if (!buffer) {
201 fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
202 abort();
203 }
204 }
205 memcpy(buffer + written, ptr, size);
206 written += size;
207 }
208
pwrite_implraw_memory_ostream209 void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
210 {
211 assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
212 memcpy(buffer + offset, ptr, size);
213 }
214
current_posraw_memory_ostream215 uint64_t current_pos() const override
216 {
217 return written;
218 }
219 };
220
221 /* The LLVM compiler is represented as a pass manager containing passes for
222 * optimizations, instruction selection, and code generation.
223 */
224 struct ac_compiler_passes {
225 raw_memory_ostream ostream; /* ELF shader binary stream */
226 llvm::legacy::PassManager passmgr; /* list of passes */
227 };
228
ac_create_llvm_passes(LLVMTargetMachineRef tm)229 struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
230 {
231 struct ac_compiler_passes *p = new ac_compiler_passes();
232 if (!p)
233 return NULL;
234
235 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
236
237 if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,
238 #if LLVM_VERSION_MAJOR >= 10
239 llvm::CGFT_ObjectFile)) {
240 #else
241 llvm::TargetMachine::CGFT_ObjectFile)) {
242 #endif
243 fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
244 delete p;
245 return NULL;
246 }
247 return p;
248 }
249
250 void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
251 {
252 delete p;
253 }
254
255 /* This returns false on failure. */
256 bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
257 char **pelf_buffer, size_t *pelf_size)
258 {
259 p->passmgr.run(*llvm::unwrap(module));
260 p->ostream.take(*pelf_buffer, *pelf_size);
261 return true;
262 }
263
264 void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
265 {
266 llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
267 }
268
269 void ac_enable_global_isel(LLVMTargetMachineRef tm)
270 {
271 reinterpret_cast<llvm::TargetMachine *>(tm)->setGlobalISel(true);
272 }
273
274 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
275 LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
276 {
277 llvm::AtomicRMWInst::BinOp binop;
278 switch (op) {
279 case LLVMAtomicRMWBinOpXchg:
280 binop = llvm::AtomicRMWInst::Xchg;
281 break;
282 case LLVMAtomicRMWBinOpAdd:
283 binop = llvm::AtomicRMWInst::Add;
284 break;
285 case LLVMAtomicRMWBinOpSub:
286 binop = llvm::AtomicRMWInst::Sub;
287 break;
288 case LLVMAtomicRMWBinOpAnd:
289 binop = llvm::AtomicRMWInst::And;
290 break;
291 case LLVMAtomicRMWBinOpNand:
292 binop = llvm::AtomicRMWInst::Nand;
293 break;
294 case LLVMAtomicRMWBinOpOr:
295 binop = llvm::AtomicRMWInst::Or;
296 break;
297 case LLVMAtomicRMWBinOpXor:
298 binop = llvm::AtomicRMWInst::Xor;
299 break;
300 case LLVMAtomicRMWBinOpMax:
301 binop = llvm::AtomicRMWInst::Max;
302 break;
303 case LLVMAtomicRMWBinOpMin:
304 binop = llvm::AtomicRMWInst::Min;
305 break;
306 case LLVMAtomicRMWBinOpUMax:
307 binop = llvm::AtomicRMWInst::UMax;
308 break;
309 case LLVMAtomicRMWBinOpUMin:
310 binop = llvm::AtomicRMWInst::UMin;
311 break;
312 #if LLVM_VERSION_MAJOR >= 10
313 case LLVMAtomicRMWBinOpFAdd:
314 binop = llvm::AtomicRMWInst::FAdd;
315 break;
316 #endif
317 default:
318 unreachable("invalid LLVMAtomicRMWBinOp");
319 break;
320 }
321 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
322 return llvm::wrap(llvm::unwrap(ctx->builder)
323 ->CreateAtomicRMW(binop, llvm::unwrap(ptr), llvm::unwrap(val),
324 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
325 }
326
327 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
328 LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
329 {
330 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
331 return llvm::wrap(llvm::unwrap(ctx->builder)
332 ->CreateAtomicCmpXchg(llvm::unwrap(ptr), llvm::unwrap(cmp),
333 llvm::unwrap(val),
334 llvm::AtomicOrdering::SequentiallyConsistent,
335 llvm::AtomicOrdering::SequentiallyConsistent, SSID));
336 }
337