• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18  * USE OR OTHER DEALINGS IN THE SOFTWARE.
19  *
20  * The above copyright notice and this permission notice (including the
21  * next paragraph) shall be included in all copies or substantial portions
22  * of the Software.
23  *
24  */
25 
26 #include <llvm-c/Core.h>
27 #include <llvm/Analysis/TargetLibraryInfo.h>
28 #include <llvm/IR/IRBuilder.h>
29 #include <llvm/IR/LegacyPassManager.h>
30 #include <llvm/Target/TargetMachine.h>
31 #include <llvm/MC/MCSubtargetInfo.h>
32 #include <llvm/Support/CommandLine.h>
33 #include <llvm/Transforms/IPO.h>
34 
35 #include <cstring>
36 
37 /* DO NOT REORDER THE HEADERS
38  * The LLVM headers need to all be included before any Mesa header,
39  * as they use the `restrict` keyword in ways that are incompatible
40  * with our #define in include/c99_compat.h
41  */
42 
43 #include "ac_binary.h"
44 #include "ac_llvm_util.h"
45 #include "ac_llvm_build.h"
46 #include "util/macros.h"
47 
ac_is_llvm_processor_supported(LLVMTargetMachineRef tm,const char * processor)48 bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor)
49 {
50    llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
51    return TM->getMCSubtargetInfo()->isCPUStringValid(processor);
52 }
53 
ac_reset_llvm_all_options_occurences()54 void ac_reset_llvm_all_options_occurences()
55 {
56    llvm::cl::ResetAllOptionOccurrences();
57 }
58 
ac_add_attr_dereferenceable(LLVMValueRef val,uint64_t bytes)59 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
60 {
61    llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
62    A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
63 }
64 
ac_add_attr_alignment(LLVMValueRef val,uint64_t bytes)65 void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
66 {
67    llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
68    A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
69 }
70 
ac_is_sgpr_param(LLVMValueRef arg)71 bool ac_is_sgpr_param(LLVMValueRef arg)
72 {
73    llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
74    llvm::AttributeList AS = A->getParent()->getAttributes();
75    unsigned ArgNo = A->getArgNo();
76    return AS.hasParamAttr(ArgNo, llvm::Attribute::InReg);
77 }
78 
ac_create_module(LLVMTargetMachineRef tm,LLVMContextRef ctx)79 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
80 {
81    llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
82    LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
83 
84    llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
85    llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
86    return module;
87 }
88 
ac_create_builder(LLVMContextRef ctx,enum ac_float_mode float_mode)89 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
90 {
91    LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
92 
93    llvm::FastMathFlags flags;
94 
95    switch (float_mode) {
96    case AC_FLOAT_MODE_DEFAULT:
97    case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
98       break;
99 
100    case AC_FLOAT_MODE_DEFAULT_OPENGL:
101       /* Allow optimizations to treat the sign of a zero argument or
102        * result as insignificant.
103        */
104       flags.setNoSignedZeros(); /* nsz */
105 
106       /* Allow optimizations to use the reciprocal of an argument
107        * rather than perform division.
108        */
109       flags.setAllowReciprocal(); /* arcp */
110 
111       llvm::unwrap(builder)->setFastMathFlags(flags);
112       break;
113    }
114 
115    return builder;
116 }
117 
ac_enable_signed_zeros(struct ac_llvm_context * ctx)118 void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
119 {
120    if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
121       auto *b = llvm::unwrap(ctx->builder);
122       llvm::FastMathFlags flags = b->getFastMathFlags();
123 
124       /* This disables the optimization of (x + 0), which is used
125        * to convert negative zero to positive zero.
126        */
127       flags.setNoSignedZeros(false);
128       b->setFastMathFlags(flags);
129    }
130 }
131 
ac_disable_signed_zeros(struct ac_llvm_context * ctx)132 void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
133 {
134    if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
135       auto *b = llvm::unwrap(ctx->builder);
136       llvm::FastMathFlags flags = b->getFastMathFlags();
137 
138       flags.setNoSignedZeros();
139       b->setFastMathFlags(flags);
140    }
141 }
142 
ac_create_target_library_info(const char * triple)143 LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)
144 {
145    return reinterpret_cast<LLVMTargetLibraryInfoRef>(
146       new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
147 }
148 
ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)149 void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
150 {
151    delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
152 }
153 
154 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
155  * better compatibility with C code. */
156 struct raw_memory_ostream : public llvm::raw_pwrite_stream {
157    char *buffer;
158    size_t written;
159    size_t bufsize;
160 
raw_memory_ostreamraw_memory_ostream161    raw_memory_ostream()
162    {
163       buffer = NULL;
164       written = 0;
165       bufsize = 0;
166       SetUnbuffered();
167    }
168 
~raw_memory_ostreamraw_memory_ostream169    ~raw_memory_ostream()
170    {
171       free(buffer);
172    }
173 
clearraw_memory_ostream174    void clear()
175    {
176       written = 0;
177    }
178 
takeraw_memory_ostream179    void take(char *&out_buffer, size_t &out_size)
180    {
181       out_buffer = buffer;
182       out_size = written;
183       buffer = NULL;
184       written = 0;
185       bufsize = 0;
186    }
187 
188    void flush() = delete;
189 
write_implraw_memory_ostream190    void write_impl(const char *ptr, size_t size) override
191    {
192       if (unlikely(written + size < written))
193          abort();
194       if (written + size > bufsize) {
195          bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
196          buffer = (char *)realloc(buffer, bufsize);
197          if (!buffer) {
198             fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
199             abort();
200          }
201       }
202       memcpy(buffer + written, ptr, size);
203       written += size;
204    }
205 
pwrite_implraw_memory_ostream206    void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
207    {
208       assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
209       memcpy(buffer + offset, ptr, size);
210    }
211 
current_posraw_memory_ostream212    uint64_t current_pos() const override
213    {
214       return written;
215    }
216 };
217 
218 /* The LLVM compiler is represented as a pass manager containing passes for
219  * optimizations, instruction selection, and code generation.
220  */
221 struct ac_compiler_passes {
222    raw_memory_ostream ostream;        /* ELF shader binary stream */
223    llvm::legacy::PassManager passmgr; /* list of passes */
224 };
225 
ac_create_llvm_passes(LLVMTargetMachineRef tm)226 struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
227 {
228    struct ac_compiler_passes *p = new ac_compiler_passes();
229    if (!p)
230       return NULL;
231 
232    llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
233 
234    if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,
235                                llvm::CGFT_ObjectFile)) {
236       fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
237       delete p;
238       return NULL;
239    }
240    return p;
241 }
242 
ac_destroy_llvm_passes(struct ac_compiler_passes * p)243 void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
244 {
245    delete p;
246 }
247 
248 /* This returns false on failure. */
ac_compile_module_to_elf(struct ac_compiler_passes * p,LLVMModuleRef module,char ** pelf_buffer,size_t * pelf_size)249 bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
250                               char **pelf_buffer, size_t *pelf_size)
251 {
252    p->passmgr.run(*llvm::unwrap(module));
253    p->ostream.take(*pelf_buffer, *pelf_size);
254    return true;
255 }
256 
ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)257 void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
258 {
259    llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
260 }
261 
ac_build_atomic_rmw(struct ac_llvm_context * ctx,LLVMAtomicRMWBinOp op,LLVMValueRef ptr,LLVMValueRef val,const char * sync_scope)262 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
263                                  LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
264 {
265    llvm::AtomicRMWInst::BinOp binop;
266    switch (op) {
267    case LLVMAtomicRMWBinOpXchg:
268       binop = llvm::AtomicRMWInst::Xchg;
269       break;
270    case LLVMAtomicRMWBinOpAdd:
271       binop = llvm::AtomicRMWInst::Add;
272       break;
273    case LLVMAtomicRMWBinOpSub:
274       binop = llvm::AtomicRMWInst::Sub;
275       break;
276    case LLVMAtomicRMWBinOpAnd:
277       binop = llvm::AtomicRMWInst::And;
278       break;
279    case LLVMAtomicRMWBinOpNand:
280       binop = llvm::AtomicRMWInst::Nand;
281       break;
282    case LLVMAtomicRMWBinOpOr:
283       binop = llvm::AtomicRMWInst::Or;
284       break;
285    case LLVMAtomicRMWBinOpXor:
286       binop = llvm::AtomicRMWInst::Xor;
287       break;
288    case LLVMAtomicRMWBinOpMax:
289       binop = llvm::AtomicRMWInst::Max;
290       break;
291    case LLVMAtomicRMWBinOpMin:
292       binop = llvm::AtomicRMWInst::Min;
293       break;
294    case LLVMAtomicRMWBinOpUMax:
295       binop = llvm::AtomicRMWInst::UMax;
296       break;
297    case LLVMAtomicRMWBinOpUMin:
298       binop = llvm::AtomicRMWInst::UMin;
299       break;
300    case LLVMAtomicRMWBinOpFAdd:
301       binop = llvm::AtomicRMWInst::FAdd;
302       break;
303    default:
304       unreachable("invalid LLVMAtomicRMWBinOp");
305       break;
306    }
307    unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
308    return llvm::wrap(llvm::unwrap(ctx->builder)
309                         ->CreateAtomicRMW(binop, llvm::unwrap(ptr), llvm::unwrap(val),
310 #if LLVM_VERSION_MAJOR >= 13
311                                           llvm::MaybeAlign(0),
312 #endif
313                                           llvm::AtomicOrdering::SequentiallyConsistent, SSID));
314 }
315 
ac_build_atomic_cmp_xchg(struct ac_llvm_context * ctx,LLVMValueRef ptr,LLVMValueRef cmp,LLVMValueRef val,const char * sync_scope)316 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
317                                       LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
318 {
319    unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
320    return llvm::wrap(llvm::unwrap(ctx->builder)
321                         ->CreateAtomicCmpXchg(llvm::unwrap(ptr), llvm::unwrap(cmp),
322                                               llvm::unwrap(val),
323 #if LLVM_VERSION_MAJOR >= 13
324                                               llvm::MaybeAlign(0),
325 #endif
326                                               llvm::AtomicOrdering::SequentiallyConsistent,
327                                               llvm::AtomicOrdering::SequentiallyConsistent, SSID));
328 }
329