• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18  * USE OR OTHER DEALINGS IN THE SOFTWARE.
19  *
20  * The above copyright notice and this permission notice (including the
21  * next paragraph) shall be included in all copies or substantial portions
22  * of the Software.
23  *
24  */
25 
26 #include <llvm-c/Core.h>
27 #include <llvm/Analysis/TargetLibraryInfo.h>
28 #include <llvm/IR/IRBuilder.h>
29 #include <llvm/IR/LegacyPassManager.h>
30 #include <llvm/Target/TargetMachine.h>
31 #include <llvm/Transforms/IPO.h>
32 
33 #include <cstring>
34 
35 /* DO NOT REORDER THE HEADERS
36  * The LLVM headers need to all be included before any Mesa header,
37  * as they use the `restrict` keyword in ways that are incompatible
38  * with our #define in include/c99_compat.h
39  */
40 
41 #include "ac_binary.h"
42 #include "ac_llvm_util.h"
43 #include "ac_llvm_build.h"
44 #include "util/macros.h"
45 
ac_add_attr_dereferenceable(LLVMValueRef val,uint64_t bytes)46 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
47 {
48    llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
49    A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
50 }
51 
ac_add_attr_alignment(LLVMValueRef val,uint64_t bytes)52 void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
53 {
54 #if LLVM_VERSION_MAJOR >= 10
55    llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
56    A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
57 #else
58    /* Avoid unused parameter warnings. */
59    (void)val;
60    (void)bytes;
61 #endif
62 }
63 
ac_is_sgpr_param(LLVMValueRef arg)64 bool ac_is_sgpr_param(LLVMValueRef arg)
65 {
66    llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
67    llvm::AttributeList AS = A->getParent()->getAttributes();
68    unsigned ArgNo = A->getArgNo();
69    return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
70 }
71 
ac_llvm_get_called_value(LLVMValueRef call)72 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
73 {
74    return LLVMGetCalledValue(call);
75 }
76 
ac_llvm_is_function(LLVMValueRef v)77 bool ac_llvm_is_function(LLVMValueRef v)
78 {
79    return LLVMGetValueKind(v) == LLVMFunctionValueKind;
80 }
81 
ac_create_module(LLVMTargetMachineRef tm,LLVMContextRef ctx)82 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
83 {
84    llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
85    LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
86 
87    llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
88    llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
89    return module;
90 }
91 
ac_create_builder(LLVMContextRef ctx,enum ac_float_mode float_mode)92 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
93 {
94    LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
95 
96    llvm::FastMathFlags flags;
97 
98    switch (float_mode) {
99    case AC_FLOAT_MODE_DEFAULT:
100    case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
101       break;
102 
103    case AC_FLOAT_MODE_DEFAULT_OPENGL:
104       /* Allow optimizations to treat the sign of a zero argument or
105        * result as insignificant.
106        */
107       flags.setNoSignedZeros(); /* nsz */
108 
109       /* Allow optimizations to use the reciprocal of an argument
110        * rather than perform division.
111        */
112       flags.setAllowReciprocal(); /* arcp */
113 
114       llvm::unwrap(builder)->setFastMathFlags(flags);
115       break;
116    }
117 
118    return builder;
119 }
120 
ac_enable_signed_zeros(struct ac_llvm_context * ctx)121 void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
122 {
123    if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
124       auto *b = llvm::unwrap(ctx->builder);
125       llvm::FastMathFlags flags = b->getFastMathFlags();
126 
127       /* This disables the optimization of (x + 0), which is used
128        * to convert negative zero to positive zero.
129        */
130       flags.setNoSignedZeros(false);
131       b->setFastMathFlags(flags);
132    }
133 }
134 
ac_disable_signed_zeros(struct ac_llvm_context * ctx)135 void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
136 {
137    if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
138       auto *b = llvm::unwrap(ctx->builder);
139       llvm::FastMathFlags flags = b->getFastMathFlags();
140 
141       flags.setNoSignedZeros();
142       b->setFastMathFlags(flags);
143    }
144 }
145 
ac_create_target_library_info(const char * triple)146 LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)
147 {
148    return reinterpret_cast<LLVMTargetLibraryInfoRef>(
149       new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
150 }
151 
ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)152 void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
153 {
154    delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
155 }
156 
157 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
158  * better compatibility with C code. */
159 struct raw_memory_ostream : public llvm::raw_pwrite_stream {
160    char *buffer;
161    size_t written;
162    size_t bufsize;
163 
raw_memory_ostreamraw_memory_ostream164    raw_memory_ostream()
165    {
166       buffer = NULL;
167       written = 0;
168       bufsize = 0;
169       SetUnbuffered();
170    }
171 
~raw_memory_ostreamraw_memory_ostream172    ~raw_memory_ostream()
173    {
174       free(buffer);
175    }
176 
clearraw_memory_ostream177    void clear()
178    {
179       written = 0;
180    }
181 
takeraw_memory_ostream182    void take(char *&out_buffer, size_t &out_size)
183    {
184       out_buffer = buffer;
185       out_size = written;
186       buffer = NULL;
187       written = 0;
188       bufsize = 0;
189    }
190 
191    void flush() = delete;
192 
write_implraw_memory_ostream193    void write_impl(const char *ptr, size_t size) override
194    {
195       if (unlikely(written + size < written))
196          abort();
197       if (written + size > bufsize) {
198          bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
199          buffer = (char *)realloc(buffer, bufsize);
200          if (!buffer) {
201             fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
202             abort();
203          }
204       }
205       memcpy(buffer + written, ptr, size);
206       written += size;
207    }
208 
pwrite_implraw_memory_ostream209    void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
210    {
211       assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
212       memcpy(buffer + offset, ptr, size);
213    }
214 
current_posraw_memory_ostream215    uint64_t current_pos() const override
216    {
217       return written;
218    }
219 };
220 
221 /* The LLVM compiler is represented as a pass manager containing passes for
222  * optimizations, instruction selection, and code generation.
223  */
224 struct ac_compiler_passes {
225    raw_memory_ostream ostream;        /* ELF shader binary stream */
226    llvm::legacy::PassManager passmgr; /* list of passes */
227 };
228 
ac_create_llvm_passes(LLVMTargetMachineRef tm)229 struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
230 {
231    struct ac_compiler_passes *p = new ac_compiler_passes();
232    if (!p)
233       return NULL;
234 
235    llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
236 
237    if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,
238 #if LLVM_VERSION_MAJOR >= 10
239                                llvm::CGFT_ObjectFile)) {
240 #else
241                                llvm::TargetMachine::CGFT_ObjectFile)) {
242 #endif
243       fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
244       delete p;
245       return NULL;
246    }
247    return p;
248 }
249 
250 void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
251 {
252    delete p;
253 }
254 
255 /* This returns false on failure. */
256 bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
257                               char **pelf_buffer, size_t *pelf_size)
258 {
259    p->passmgr.run(*llvm::unwrap(module));
260    p->ostream.take(*pelf_buffer, *pelf_size);
261    return true;
262 }
263 
264 void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
265 {
266    llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
267 }
268 
269 void ac_enable_global_isel(LLVMTargetMachineRef tm)
270 {
271    reinterpret_cast<llvm::TargetMachine *>(tm)->setGlobalISel(true);
272 }
273 
274 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
275                                  LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
276 {
277    llvm::AtomicRMWInst::BinOp binop;
278    switch (op) {
279    case LLVMAtomicRMWBinOpXchg:
280       binop = llvm::AtomicRMWInst::Xchg;
281       break;
282    case LLVMAtomicRMWBinOpAdd:
283       binop = llvm::AtomicRMWInst::Add;
284       break;
285    case LLVMAtomicRMWBinOpSub:
286       binop = llvm::AtomicRMWInst::Sub;
287       break;
288    case LLVMAtomicRMWBinOpAnd:
289       binop = llvm::AtomicRMWInst::And;
290       break;
291    case LLVMAtomicRMWBinOpNand:
292       binop = llvm::AtomicRMWInst::Nand;
293       break;
294    case LLVMAtomicRMWBinOpOr:
295       binop = llvm::AtomicRMWInst::Or;
296       break;
297    case LLVMAtomicRMWBinOpXor:
298       binop = llvm::AtomicRMWInst::Xor;
299       break;
300    case LLVMAtomicRMWBinOpMax:
301       binop = llvm::AtomicRMWInst::Max;
302       break;
303    case LLVMAtomicRMWBinOpMin:
304       binop = llvm::AtomicRMWInst::Min;
305       break;
306    case LLVMAtomicRMWBinOpUMax:
307       binop = llvm::AtomicRMWInst::UMax;
308       break;
309    case LLVMAtomicRMWBinOpUMin:
310       binop = llvm::AtomicRMWInst::UMin;
311       break;
312 #if LLVM_VERSION_MAJOR >= 10
313    case LLVMAtomicRMWBinOpFAdd:
314       binop = llvm::AtomicRMWInst::FAdd;
315       break;
316 #endif
317    default:
318       unreachable("invalid LLVMAtomicRMWBinOp");
319       break;
320    }
321    unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
322    return llvm::wrap(llvm::unwrap(ctx->builder)
323                         ->CreateAtomicRMW(binop, llvm::unwrap(ptr), llvm::unwrap(val),
324                                           llvm::AtomicOrdering::SequentiallyConsistent, SSID));
325 }
326 
327 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
328                                       LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
329 {
330    unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
331    return llvm::wrap(llvm::unwrap(ctx->builder)
332                         ->CreateAtomicCmpXchg(llvm::unwrap(ptr), llvm::unwrap(cmp),
333                                               llvm::unwrap(val),
334                                               llvm::AtomicOrdering::SequentiallyConsistent,
335                                               llvm::AtomicOrdering::SequentiallyConsistent, SSID));
336 }
337