1 // Copyright 2020 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "LLVMReactor.hpp"
16
17 #include "Debug.hpp"
18 #include "ExecutableMemory.hpp"
19 #include "LLVMAsm.hpp"
20 #include "Routine.hpp"
21
22 // TODO(b/143539525): Eliminate when warning has been fixed.
23 #ifdef _MSC_VER
24 __pragma(warning(push))
25 __pragma(warning(disable : 4146)) // unary minus operator applied to unsigned type, result still unsigned
26 #endif
27
28 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
29 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
30 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
31 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
32 #include "llvm/IR/DiagnosticInfo.h"
33 #include "llvm/IR/LegacyPassManager.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Host.h"
36 #include "llvm/Support/TargetSelect.h"
37 #include "llvm/Transforms/InstCombine/InstCombine.h"
38 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
39 #include "llvm/Transforms/Scalar.h"
40 #include "llvm/Transforms/Scalar/GVN.h"
41
42 #ifdef _MSC_VER
43 __pragma(warning(pop))
44 #endif
45
46 #if defined(_WIN64)
47 extern "C" void __chkstk();
48 #elif defined(_WIN32)
49 extern "C" void _chkstk();
50 #endif
51
52 #ifdef __ARM_EABI__
53 extern "C" signed __aeabi_idivmod();
54 #endif
55
56 #if __has_feature(memory_sanitizer)
57
58 // TODO(b/155148722): Remove when we no longer unpoison all writes.
59 # if !REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION
60 # include "sanitizer/msan_interface.h"
61 # endif
62
63 # include <dlfcn.h> // dlsym()
64
65 // MemorySanitizer uses thread-local storage (TLS) data arrays for passing around
66 // the 'shadow' values of function arguments and return values. The LLVM JIT can't
67 // access TLS directly, but it calls __emutls_get_address() to obtain the address.
68 // Typically, it would be passed a pointer to an __emutls_control structure with a
69 // name starting with "__emutls_v." that represents the TLS. Both the address of
70 // __emutls_get_address and the __emutls_v. structures are provided to the JIT by
71 // the symbol resolver, which can be overridden.
72 // We take advantage of this by substituting __emutls_get_address() with our own
73 // implementation, namely rr::getTLSAddress(), and substituting the __emutls_v
74 // variables with rr::MSanTLS enums. getTLSAddress() can then provide the address
75 // of the real TLS variable corresponding to the enum, in statically compiled C++.
76
77 // Forward declare the real TLS variables used by MemorySanitizer. These are
78 // defined in llvm-project/compiler-rt/lib/msan/msan.cpp.
79 extern __thread unsigned long long __msan_param_tls[];
80 extern __thread unsigned long long __msan_retval_tls[];
81 extern __thread unsigned long long __msan_va_arg_tls[];
82 extern __thread unsigned long long __msan_va_arg_overflow_size_tls;
83
84 namespace rr {
85
86 enum class MSanTLS
87 {
88 param = 1, // __msan_param_tls
89 retval, // __msan_retval_tls
90 va_arg, // __msan_va_arg_tls
91 va_arg_overflow_size // __msan_va_arg_overflow_size_tls
92 };
93
getTLSAddress(void * control)94 static void *getTLSAddress(void *control)
95 {
96 auto tlsIndex = static_cast<MSanTLS>(reinterpret_cast<uintptr_t>(control));
97 switch(tlsIndex)
98 {
99
100 case MSanTLS::param: return reinterpret_cast<void *>(&__msan_param_tls);
101 case MSanTLS::retval: return reinterpret_cast<void *>(&__msan_retval_tls);
102 case MSanTLS::va_arg: return reinterpret_cast<void *>(&__msan_va_arg_tls);
103 case MSanTLS::va_arg_overflow_size: return reinterpret_cast<void *>(&__msan_va_arg_overflow_size_tls);
104 default:
105 UNSUPPORTED("MemorySanitizer used an unrecognized TLS variable: %d", tlsIndex);
106 return nullptr;
107 }
108 }
109
110 } // namespace rr
111 #endif
112
113 namespace {
114
115 // TODO(b/174587935): Eliminate command-line parsing.
parseCommandLineOptionsOnce(int argc,const char * const * argv)116 bool parseCommandLineOptionsOnce(int argc, const char *const *argv)
117 {
118 // Use a static immediately invoked lambda to make this thread safe
119 static auto initialized = [=]() {
120 return llvm::cl::ParseCommandLineOptions(argc, argv);
121 }();
122
123 return initialized;
124 }
125
126 // JITGlobals is a singleton that holds all the immutable machine specific
127 // information for the host device.
128 class JITGlobals
129 {
130 public:
131 static JITGlobals *get();
132
133 llvm::orc::JITTargetMachineBuilder getTargetMachineBuilder(rr::Optimization::Level optLevel) const;
134 const llvm::DataLayout &getDataLayout() const;
135 const llvm::Triple &getTargetTriple() const;
136
137 private:
138 JITGlobals(llvm::orc::JITTargetMachineBuilder &&jitTargetMachineBuilder, llvm::DataLayout &&dataLayout);
139
140 static llvm::CodeGenOpt::Level toLLVM(rr::Optimization::Level level);
141
142 const llvm::orc::JITTargetMachineBuilder jitTargetMachineBuilder;
143 const llvm::DataLayout dataLayout;
144 };
145
get()146 JITGlobals *JITGlobals::get()
147 {
148 static JITGlobals instance = [] {
149 const char *argv[] = {
150 "Reactor",
151 #if defined(__i386__) || defined(__x86_64__)
152 "-x86-asm-syntax=intel", // Use Intel syntax rather than the default AT&T
153 #endif
154 "-warn-stack-size=524288" // Warn when a function uses more than 512 KiB of stack memory
155 };
156
157 parseCommandLineOptionsOnce(sizeof(argv) / sizeof(argv[0]), argv);
158
159 llvm::InitializeNativeTarget();
160 llvm::InitializeNativeTargetAsmPrinter();
161 llvm::InitializeNativeTargetAsmParser();
162
163 // TODO(b/171236524): JITTargetMachineBuilder::detectHost() currently uses the target triple of the host,
164 // rather than a valid triple for the current process. Once fixed, we can use that function instead.
165 llvm::orc::JITTargetMachineBuilder jitTargetMachineBuilder(llvm::Triple(LLVM_DEFAULT_TARGET_TRIPLE));
166
167 // Retrieve host CPU name and sub-target features and add them to builder.
168 // Relocation model, code model and codegen opt level are kept to default values.
169 llvm::StringMap<bool> cpuFeatures;
170 bool ok = llvm::sys::getHostCPUFeatures(cpuFeatures);
171
172 #if defined(__i386__) || defined(__x86_64__) || \
173 (defined(__linux__) && (defined(__arm__) || defined(__aarch64__)))
174 ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
175 #else
176 (void)ok; // getHostCPUFeatures always returns false on other platforms
177 #endif
178
179 for(auto &feature : cpuFeatures)
180 {
181 jitTargetMachineBuilder.getFeatures().AddFeature(feature.first(), feature.second);
182 }
183
184 #if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
185 jitTargetMachineBuilder.setCPU(std::string(llvm::sys::getHostCPUName()));
186 #else
187 jitTargetMachineBuilder.setCPU(llvm::sys::getHostCPUName());
188 #endif
189
190 // Reactor's MemorySanitizer support depends on intercepting __emutls_get_address calls.
191 ASSERT(!__has_feature(memory_sanitizer) || (jitTargetMachineBuilder.getOptions().ExplicitEmulatedTLS &&
192 jitTargetMachineBuilder.getOptions().EmulatedTLS));
193
194 auto dataLayout = jitTargetMachineBuilder.getDefaultDataLayoutForTarget();
195 ASSERT_MSG(dataLayout, "JITTargetMachineBuilder::getDefaultDataLayoutForTarget() failed");
196
197 return JITGlobals(std::move(jitTargetMachineBuilder), std::move(dataLayout.get()));
198 }();
199
200 return &instance;
201 }
202
getTargetMachineBuilder(rr::Optimization::Level optLevel) const203 llvm::orc::JITTargetMachineBuilder JITGlobals::getTargetMachineBuilder(rr::Optimization::Level optLevel) const
204 {
205 llvm::orc::JITTargetMachineBuilder out = jitTargetMachineBuilder;
206 out.setCodeGenOptLevel(toLLVM(optLevel));
207
208 return out;
209 }
210
getDataLayout() const211 const llvm::DataLayout &JITGlobals::getDataLayout() const
212 {
213 return dataLayout;
214 }
215
getTargetTriple() const216 const llvm::Triple &JITGlobals::getTargetTriple() const
217 {
218 return jitTargetMachineBuilder.getTargetTriple();
219 }
220
JITGlobals(llvm::orc::JITTargetMachineBuilder && jitTargetMachineBuilder,llvm::DataLayout && dataLayout)221 JITGlobals::JITGlobals(llvm::orc::JITTargetMachineBuilder &&jitTargetMachineBuilder, llvm::DataLayout &&dataLayout)
222 : jitTargetMachineBuilder(jitTargetMachineBuilder)
223 , dataLayout(dataLayout)
224 {
225 }
226
toLLVM(rr::Optimization::Level level)227 llvm::CodeGenOpt::Level JITGlobals::toLLVM(rr::Optimization::Level level)
228 {
229 // TODO(b/173257647): MemorySanitizer instrumentation produces IR which takes
230 // a lot longer to process by the machine code optimization passes. Disabling
231 // them has a negligible effect on code quality but compiles much faster.
232 if(__has_feature(memory_sanitizer))
233 {
234 return llvm::CodeGenOpt::None;
235 }
236
237 switch(level)
238 {
239 case rr::Optimization::Level::None: return llvm::CodeGenOpt::None;
240 case rr::Optimization::Level::Less: return llvm::CodeGenOpt::Less;
241 case rr::Optimization::Level::Default: return llvm::CodeGenOpt::Default;
242 case rr::Optimization::Level::Aggressive: return llvm::CodeGenOpt::Aggressive;
243 default: UNREACHABLE("Unknown Optimization Level %d", int(level));
244 }
245
246 return llvm::CodeGenOpt::Default;
247 }
248
249 class MemoryMapper final : public llvm::SectionMemoryManager::MemoryMapper
250 {
251 public:
MemoryMapper()252 MemoryMapper() {}
~MemoryMapper()253 ~MemoryMapper() final {}
254
allocateMappedMemory(llvm::SectionMemoryManager::AllocationPurpose purpose,size_t numBytes,const llvm::sys::MemoryBlock * const nearBlock,unsigned flags,std::error_code & errorCode)255 llvm::sys::MemoryBlock allocateMappedMemory(
256 llvm::SectionMemoryManager::AllocationPurpose purpose,
257 size_t numBytes, const llvm::sys::MemoryBlock *const nearBlock,
258 unsigned flags, std::error_code &errorCode) final
259 {
260 errorCode = std::error_code();
261
262 // Round up numBytes to page size.
263 size_t pageSize = rr::memoryPageSize();
264 numBytes = (numBytes + pageSize - 1) & ~(pageSize - 1);
265
266 bool need_exec =
267 purpose == llvm::SectionMemoryManager::AllocationPurpose::Code;
268 void *addr = rr::allocateMemoryPages(
269 numBytes, flagsToPermissions(flags), need_exec);
270 if(!addr)
271 return llvm::sys::MemoryBlock();
272 return llvm::sys::MemoryBlock(addr, numBytes);
273 }
274
protectMappedMemory(const llvm::sys::MemoryBlock & block,unsigned flags)275 std::error_code protectMappedMemory(const llvm::sys::MemoryBlock &block,
276 unsigned flags)
277 {
278 // Round down base address to align with a page boundary. This matches
279 // DefaultMMapper behavior.
280 void *addr = block.base();
281 size_t size = block.allocatedSize();
282 size_t pageSize = rr::memoryPageSize();
283 addr = reinterpret_cast<void *>(
284 reinterpret_cast<uintptr_t>(addr) & ~(pageSize - 1));
285 size += reinterpret_cast<uintptr_t>(block.base()) -
286 reinterpret_cast<uintptr_t>(addr);
287
288 rr::protectMemoryPages(addr, size, flagsToPermissions(flags));
289 return std::error_code();
290 }
291
releaseMappedMemory(llvm::sys::MemoryBlock & block)292 std::error_code releaseMappedMemory(llvm::sys::MemoryBlock &block)
293 {
294 size_t size = block.allocatedSize();
295
296 rr::deallocateMemoryPages(block.base(), size);
297 return std::error_code();
298 }
299
300 private:
flagsToPermissions(unsigned flags)301 int flagsToPermissions(unsigned flags)
302 {
303 int result = 0;
304 if(flags & llvm::sys::Memory::MF_READ)
305 {
306 result |= rr::PERMISSION_READ;
307 }
308 if(flags & llvm::sys::Memory::MF_WRITE)
309 {
310 result |= rr::PERMISSION_WRITE;
311 }
312 if(flags & llvm::sys::Memory::MF_EXEC)
313 {
314 result |= rr::PERMISSION_EXECUTE;
315 }
316 return result;
317 }
318 };
319
320 template<typename T>
alignUp(T val,T alignment)321 T alignUp(T val, T alignment)
322 {
323 return alignment * ((val + alignment - 1) / alignment);
324 }
325
alignedAlloc(size_t size,size_t alignment)326 void *alignedAlloc(size_t size, size_t alignment)
327 {
328 ASSERT(alignment < 256);
329 auto allocation = new uint8_t[size + sizeof(uint8_t) + alignment];
330 auto aligned = allocation;
331 aligned += sizeof(uint8_t); // Make space for the base-address offset.
332 aligned = reinterpret_cast<uint8_t *>(alignUp(reinterpret_cast<uintptr_t>(aligned), alignment)); // align
333 auto offset = static_cast<uint8_t>(aligned - allocation);
334 aligned[-1] = offset;
335 return aligned;
336 }
337
alignedFree(void * ptr)338 void alignedFree(void *ptr)
339 {
340 auto aligned = reinterpret_cast<uint8_t *>(ptr);
341 auto offset = aligned[-1];
342 auto allocation = aligned - offset;
343 delete[] allocation;
344 }
345
346 template<typename T>
atomicLoad(void * ptr,void * ret,llvm::AtomicOrdering ordering)347 static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
348 {
349 *reinterpret_cast<T *>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), rr::atomicOrdering(ordering));
350 }
351
352 template<typename T>
atomicStore(void * ptr,void * val,llvm::AtomicOrdering ordering)353 static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
354 {
355 std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), *reinterpret_cast<T *>(val), rr::atomicOrdering(ordering));
356 }
357
358 #ifdef __ANDROID__
359 template<typename F>
sync_fetch_and_op(uint32_t volatile * ptr,uint32_t val,F f)360 static uint32_t sync_fetch_and_op(uint32_t volatile *ptr, uint32_t val, F f)
361 {
362 // Build an arbitrary op out of looped CAS
363 for(;;)
364 {
365 uint32_t expected = *ptr;
366 uint32_t desired = f(expected, val);
367
368 if(expected == __sync_val_compare_and_swap_4(ptr, expected, desired))
369 {
370 return expected;
371 }
372 }
373 }
374 #endif
375
376 #if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
377 class ExternalSymbolGenerator : public llvm::orc::DefinitionGenerator
378 #else
379 class ExternalSymbolGenerator : public llvm::orc::JITDylib::DefinitionGenerator
380 #endif
381 {
382 struct Atomic
383 {
load__anon29c604b40111::ExternalSymbolGenerator::Atomic384 static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
385 {
386 switch(size)
387 {
388 case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
389 case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
390 case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
391 case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
392 default:
393 UNIMPLEMENTED_NO_BUG("Atomic::load(size: %d)", int(size));
394 }
395 }
store__anon29c604b40111::ExternalSymbolGenerator::Atomic396 static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
397 {
398 switch(size)
399 {
400 case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
401 case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
402 case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
403 case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
404 default:
405 UNIMPLEMENTED_NO_BUG("Atomic::store(size: %d)", int(size));
406 }
407 }
408 };
409
nop()410 static void nop() {}
neverCalled()411 static void neverCalled() { UNREACHABLE("Should never be called"); }
412
coroutine_alloc_frame(size_t size)413 static void *coroutine_alloc_frame(size_t size) { return alignedAlloc(size, 16); }
coroutine_free_frame(void * ptr)414 static void coroutine_free_frame(void *ptr) { alignedFree(ptr); }
415
416 #ifdef __ANDROID__
417 // forwarders since we can't take address of builtins
sync_synchronize()418 static void sync_synchronize() { __sync_synchronize(); }
sync_fetch_and_add_4(uint32_t * ptr,uint32_t val)419 static uint32_t sync_fetch_and_add_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_add_4(ptr, val); }
sync_fetch_and_and_4(uint32_t * ptr,uint32_t val)420 static uint32_t sync_fetch_and_and_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_and_4(ptr, val); }
sync_fetch_and_or_4(uint32_t * ptr,uint32_t val)421 static uint32_t sync_fetch_and_or_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_or_4(ptr, val); }
sync_fetch_and_xor_4(uint32_t * ptr,uint32_t val)422 static uint32_t sync_fetch_and_xor_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_xor_4(ptr, val); }
sync_fetch_and_sub_4(uint32_t * ptr,uint32_t val)423 static uint32_t sync_fetch_and_sub_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_sub_4(ptr, val); }
sync_lock_test_and_set_4(uint32_t * ptr,uint32_t val)424 static uint32_t sync_lock_test_and_set_4(uint32_t *ptr, uint32_t val) { return __sync_lock_test_and_set_4(ptr, val); }
sync_val_compare_and_swap_4(uint32_t * ptr,uint32_t expected,uint32_t desired)425 static uint32_t sync_val_compare_and_swap_4(uint32_t *ptr, uint32_t expected, uint32_t desired) { return __sync_val_compare_and_swap_4(ptr, expected, desired); }
426
sync_fetch_and_max_4(uint32_t * ptr,uint32_t val)427 static uint32_t sync_fetch_and_max_4(uint32_t *ptr, uint32_t val)
428 {
429 return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::max(a, b); });
430 }
sync_fetch_and_min_4(uint32_t * ptr,uint32_t val)431 static uint32_t sync_fetch_and_min_4(uint32_t *ptr, uint32_t val)
432 {
433 return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::min(a, b); });
434 }
sync_fetch_and_umax_4(uint32_t * ptr,uint32_t val)435 static uint32_t sync_fetch_and_umax_4(uint32_t *ptr, uint32_t val)
436 {
437 return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::max(a, b); });
438 }
sync_fetch_and_umin_4(uint32_t * ptr,uint32_t val)439 static uint32_t sync_fetch_and_umin_4(uint32_t *ptr, uint32_t val)
440 {
441 return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::min(a, b); });
442 }
443 #endif
444
445 class Resolver
446 {
447 public:
448 using FunctionMap = llvm::StringMap<void *>;
449
450 FunctionMap functions;
451
Resolver()452 Resolver()
453 {
454 #ifdef ENABLE_RR_PRINT
455 functions.try_emplace("rr::DebugPrintf", reinterpret_cast<void *>(rr::DebugPrintf));
456 #endif
457 functions.try_emplace("nop", reinterpret_cast<void *>(nop));
458 functions.try_emplace("floorf", reinterpret_cast<void *>(floorf));
459 functions.try_emplace("nearbyintf", reinterpret_cast<void *>(nearbyintf));
460 functions.try_emplace("truncf", reinterpret_cast<void *>(truncf));
461 functions.try_emplace("printf", reinterpret_cast<void *>(printf));
462 functions.try_emplace("puts", reinterpret_cast<void *>(puts));
463 functions.try_emplace("fmodf", reinterpret_cast<void *>(fmodf));
464
465 functions.try_emplace("sinf", reinterpret_cast<void *>(sinf));
466 functions.try_emplace("cosf", reinterpret_cast<void *>(cosf));
467 functions.try_emplace("asinf", reinterpret_cast<void *>(asinf));
468 functions.try_emplace("acosf", reinterpret_cast<void *>(acosf));
469 functions.try_emplace("atanf", reinterpret_cast<void *>(atanf));
470 functions.try_emplace("sinhf", reinterpret_cast<void *>(sinhf));
471 functions.try_emplace("coshf", reinterpret_cast<void *>(coshf));
472 functions.try_emplace("tanhf", reinterpret_cast<void *>(tanhf));
473 functions.try_emplace("asinhf", reinterpret_cast<void *>(asinhf));
474 functions.try_emplace("acoshf", reinterpret_cast<void *>(acoshf));
475 functions.try_emplace("atanhf", reinterpret_cast<void *>(atanhf));
476 functions.try_emplace("atan2f", reinterpret_cast<void *>(atan2f));
477 functions.try_emplace("powf", reinterpret_cast<void *>(powf));
478 functions.try_emplace("expf", reinterpret_cast<void *>(expf));
479 functions.try_emplace("logf", reinterpret_cast<void *>(logf));
480 functions.try_emplace("exp2f", reinterpret_cast<void *>(exp2f));
481 functions.try_emplace("log2f", reinterpret_cast<void *>(log2f));
482
483 functions.try_emplace("fmod", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(fmod)));
484 functions.try_emplace("sin", reinterpret_cast<void *>(static_cast<double (*)(double)>(sin)));
485 functions.try_emplace("cos", reinterpret_cast<void *>(static_cast<double (*)(double)>(cos)));
486 functions.try_emplace("asin", reinterpret_cast<void *>(static_cast<double (*)(double)>(asin)));
487 functions.try_emplace("acos", reinterpret_cast<void *>(static_cast<double (*)(double)>(acos)));
488 functions.try_emplace("atan", reinterpret_cast<void *>(static_cast<double (*)(double)>(atan)));
489 functions.try_emplace("sinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(sinh)));
490 functions.try_emplace("cosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(cosh)));
491 functions.try_emplace("tanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(tanh)));
492 functions.try_emplace("asinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(asinh)));
493 functions.try_emplace("acosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(acosh)));
494 functions.try_emplace("atanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(atanh)));
495 functions.try_emplace("atan2", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(atan2)));
496 functions.try_emplace("pow", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(pow)));
497 functions.try_emplace("exp", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp)));
498 functions.try_emplace("log", reinterpret_cast<void *>(static_cast<double (*)(double)>(log)));
499 functions.try_emplace("exp2", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp2)));
500 functions.try_emplace("log2", reinterpret_cast<void *>(static_cast<double (*)(double)>(log2)));
501
502 functions.try_emplace("atomic_load", reinterpret_cast<void *>(Atomic::load));
503 functions.try_emplace("atomic_store", reinterpret_cast<void *>(Atomic::store));
504
505 // FIXME(b/119409619): use an allocator here so we can control all memory allocations
506 functions.try_emplace("coroutine_alloc_frame", reinterpret_cast<void *>(coroutine_alloc_frame));
507 functions.try_emplace("coroutine_free_frame", reinterpret_cast<void *>(coroutine_free_frame));
508
509 functions.try_emplace("memset", reinterpret_cast<void *>(memset));
510
511 #ifdef __APPLE__
512 functions.try_emplace("sincosf_stret", reinterpret_cast<void *>(__sincosf_stret));
513 #elif defined(__linux__)
514 functions.try_emplace("sincosf", reinterpret_cast<void *>(sincosf));
515 #elif defined(_WIN64)
516 functions.try_emplace("chkstk", reinterpret_cast<void *>(__chkstk));
517 #elif defined(_WIN32)
518 functions.try_emplace("chkstk", reinterpret_cast<void *>(_chkstk));
519 #endif
520
521 #ifdef __ARM_EABI__
522 functions.try_emplace("aeabi_idivmod", reinterpret_cast<void *>(__aeabi_idivmod));
523 #endif
524 #ifdef __ANDROID__
525 functions.try_emplace("aeabi_unwind_cpp_pr0", reinterpret_cast<void *>(neverCalled));
526 functions.try_emplace("sync_synchronize", reinterpret_cast<void *>(sync_synchronize));
527 functions.try_emplace("sync_fetch_and_add_4", reinterpret_cast<void *>(sync_fetch_and_add_4));
528 functions.try_emplace("sync_fetch_and_and_4", reinterpret_cast<void *>(sync_fetch_and_and_4));
529 functions.try_emplace("sync_fetch_and_or_4", reinterpret_cast<void *>(sync_fetch_and_or_4));
530 functions.try_emplace("sync_fetch_and_xor_4", reinterpret_cast<void *>(sync_fetch_and_xor_4));
531 functions.try_emplace("sync_fetch_and_sub_4", reinterpret_cast<void *>(sync_fetch_and_sub_4));
532 functions.try_emplace("sync_lock_test_and_set_4", reinterpret_cast<void *>(sync_lock_test_and_set_4));
533 functions.try_emplace("sync_val_compare_and_swap_4", reinterpret_cast<void *>(sync_val_compare_and_swap_4));
534 functions.try_emplace("sync_fetch_and_max_4", reinterpret_cast<void *>(sync_fetch_and_max_4));
535 functions.try_emplace("sync_fetch_and_min_4", reinterpret_cast<void *>(sync_fetch_and_min_4));
536 functions.try_emplace("sync_fetch_and_umax_4", reinterpret_cast<void *>(sync_fetch_and_umax_4));
537 functions.try_emplace("sync_fetch_and_umin_4", reinterpret_cast<void *>(sync_fetch_and_umin_4));
538
539 # if defined(__i386__)
540 // TODO(b/172974501): Workaround for an x86-32 issue where an R_386_PC32 relocation is used
541 // When calling a C function from Reactor code, who's address is not associated with any symbol
542 // (since it's an absolute constant), but it still invokes the symbol resolver for "".
543 functions.try_emplace("", nullptr);
544 # endif
545 #endif
546 #if __has_feature(memory_sanitizer)
547
548 // TODO(b/155148722): Remove when we no longer unpoison all writes.
549 # if !REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION
550 functions.try_emplace("msan_unpoison", reinterpret_cast<void *>(__msan_unpoison));
551 # endif
552
553 functions.try_emplace("emutls_get_address", reinterpret_cast<void *>(rr::getTLSAddress));
554 functions.try_emplace("emutls_v.__msan_retval_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::retval)));
555 functions.try_emplace("emutls_v.__msan_param_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::param)));
556 functions.try_emplace("emutls_v.__msan_va_arg_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::va_arg)));
557 functions.try_emplace("emutls_v.__msan_va_arg_overflow_size_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::va_arg_overflow_size)));
558 #endif
559 }
560 };
561
tryToGenerate(llvm::orc::LookupState & state,llvm::orc::LookupKind kind,llvm::orc::JITDylib & dylib,llvm::orc::JITDylibLookupFlags flags,const llvm::orc::SymbolLookupSet & set)562 llvm::Error tryToGenerate(
563 #if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
564 llvm::orc::LookupState &state,
565 #endif
566 llvm::orc::LookupKind kind,
567 llvm::orc::JITDylib &dylib,
568 llvm::orc::JITDylibLookupFlags flags,
569 const llvm::orc::SymbolLookupSet &set) override
570 {
571 static Resolver resolver;
572
573 llvm::orc::SymbolMap symbols;
574
575 #if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
576 std::string missing;
577 #endif // !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
578
579 for(auto symbol : set)
580 {
581 auto name = symbol.first;
582
583 // Trim off any underscores from the start of the symbol. LLVM likes
584 // to append these on macOS.
585 auto trimmed = (*name).drop_while([](char c) { return c == '_'; });
586
587 auto it = resolver.functions.find(trimmed.str());
588 if(it != resolver.functions.end())
589 {
590 symbols[name] = llvm::JITEvaluatedSymbol(
591 static_cast<llvm::JITTargetAddress>(reinterpret_cast<uintptr_t>(it->second)),
592 llvm::JITSymbolFlags::Exported);
593
594 continue;
595 }
596
597 #if __has_feature(memory_sanitizer)
598 // MemorySanitizer uses a dynamically linked runtime. Instrumented routines reference
599 // some symbols from this library. Look them up dynamically in the default namespace.
600 // Note this approach should not be used for other symbols, since they might not be
601 // visible (e.g. due to static linking), we may wish to provide an alternate
602 // implementation, and/or it would be a security vulnerability.
603
604 void *address = dlsym(RTLD_DEFAULT, (*symbol.first).data());
605
606 if(address)
607 {
608 symbols[name] = llvm::JITEvaluatedSymbol(
609 static_cast<llvm::JITTargetAddress>(reinterpret_cast<uintptr_t>(address)),
610 llvm::JITSymbolFlags::Exported);
611
612 continue;
613 }
614 #endif
615
616 #if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
617 missing += (missing.empty() ? "'" : ", '") + (*name).str() + "'";
618 #endif
619 }
620
621 #if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
622 // Missing functions will likely make the module fail in non-obvious ways.
623 if(!missing.empty())
624 {
625 WARN("Missing external functions: %s", missing.c_str());
626 }
627 #endif
628
629 if(symbols.empty())
630 {
631 return llvm::Error::success();
632 }
633
634 return dylib.define(llvm::orc::absoluteSymbols(std::move(symbols)));
635 }
636 };
637
638 // As we must support different LLVM versions, add a generic Unwrap for functions that return Expected<T> or the actual T.
639 // TODO(b/165000222): Remove after LLVM 11 upgrade
640 template<typename T>
Unwrap(llvm::Expected<T> && v)641 auto &Unwrap(llvm::Expected<T> &&v)
642 {
643 return v.get();
644 }
645 template<typename T>
Unwrap(T && v)646 auto &Unwrap(T &&v)
647 {
648 return v;
649 }
650
651 // Sets *fatal to true if a diagnostic is received which makes a routine invalid or unusable.
652 struct FatalDiagnosticsHandler : public llvm::DiagnosticHandler
653 {
FatalDiagnosticsHandler__anon29c604b40111::FatalDiagnosticsHandler654 FatalDiagnosticsHandler(bool *fatal)
655 : fatal(fatal)
656 {}
657
handleDiagnostics__anon29c604b40111::FatalDiagnosticsHandler658 bool handleDiagnostics(const llvm::DiagnosticInfo &info) override
659 {
660 switch(info.getSeverity())
661 {
662 case llvm::DS_Error:
663 ASSERT_MSG(false, "LLVM JIT compilation failure");
664 *fatal = true;
665 break;
666 case llvm::DS_Warning:
667 if(info.getKind() == llvm::DK_StackSize)
668 {
669 // Stack size limit exceeded
670 *fatal = true;
671 }
672 break;
673 case llvm::DS_Remark:
674 break;
675 case llvm::DS_Note:
676 break;
677 }
678
679 return true; // Diagnostic handled, don't let LLVM print it.
680 }
681
682 bool *fatal;
683 };
684
685 // JITRoutine is a rr::Routine that holds a LLVM JIT session, compiler and
686 // object layer as each routine may require different target machine
687 // settings and no Reactor routine directly links against another.
688 class JITRoutine : public rr::Routine
689 {
690 public:
JITRoutine(std::unique_ptr<llvm::Module> module,std::unique_ptr<llvm::LLVMContext> context,const char * name,llvm::Function ** funcs,size_t count,const rr::Config & config)691 JITRoutine(
692 std::unique_ptr<llvm::Module> module,
693 std::unique_ptr<llvm::LLVMContext> context,
694 const char *name,
695 llvm::Function **funcs,
696 size_t count,
697 const rr::Config &config)
698 : name(name)
699 , objectLayer(session, []() {
700 static MemoryMapper memoryMapper;
701 return std::make_unique<llvm::SectionMemoryManager>(&memoryMapper);
702 })
703 , addresses(count)
704 {
705 bool fatalCompileIssue = false;
706 context->setDiagnosticHandler(std::make_unique<FatalDiagnosticsHandler>(&fatalCompileIssue), true);
707
708 #ifdef ENABLE_RR_DEBUG_INFO
709 // TODO(b/165000222): Update this on next LLVM roll.
710 // https://github.com/llvm/llvm-project/commit/98f2bb4461072347dcca7d2b1b9571b3a6525801
711 // introduces RTDyldObjectLinkingLayer::registerJITEventListener().
712 // The current API does not appear to have any way to bind the
713 // rr::DebugInfo::NotifyFreeingObject event.
714 objectLayer.setNotifyLoaded([](llvm::orc::VModuleKey,
715 const llvm::object::ObjectFile &obj,
__anon29c604b40a02(llvm::orc::VModuleKey, const llvm::object::ObjectFile &obj, const llvm::RuntimeDyld::LoadedObjectInfo &l) 716 const llvm::RuntimeDyld::LoadedObjectInfo &l) {
717 static std::atomic<uint64_t> unique_key{ 0 };
718 rr::DebugInfo::NotifyObjectEmitted(unique_key++, obj, l);
719 });
720 #endif // ENABLE_RR_DEBUG_INFO
721
722 if(JITGlobals::get()->getTargetTriple().isOSBinFormatCOFF())
723 {
724 // Hack to support symbol visibility in COFF.
725 // Matches hack in llvm::orc::LLJIT::createObjectLinkingLayer().
726 // See documentation on these functions for more detail.
727 objectLayer.setOverrideObjectFlagsWithResponsibilityFlags(true);
728 objectLayer.setAutoClaimResponsibilityForObjectSymbols(true);
729 }
730
731 llvm::SmallVector<llvm::orc::SymbolStringPtr, 8> functionNames(count);
732 llvm::orc::MangleAndInterner mangle(session, JITGlobals::get()->getDataLayout());
733
734 for(size_t i = 0; i < count; i++)
735 {
736 auto func = funcs[i];
737
738 if(!func->hasName())
739 {
740 func->setName("f" + llvm::Twine(i).str());
741 }
742
743 functionNames[i] = mangle(func->getName());
744 }
745
746 #ifdef ENABLE_RR_EMIT_ASM_FILE
747 const auto asmFilename = rr::AsmFile::generateFilename(name);
748 rr::AsmFile::emitAsmFile(asmFilename, JITGlobals::get()->getTargetMachineBuilder(config.getOptimization().getLevel()), *module);
749 #endif
750
751 // Once the module is passed to the compileLayer, the llvm::Functions are freed.
752 // Make sure funcs are not referenced after this point.
753 funcs = nullptr;
754
755 llvm::orc::IRCompileLayer compileLayer(session, objectLayer, std::make_unique<llvm::orc::ConcurrentIRCompiler>(JITGlobals::get()->getTargetMachineBuilder(config.getOptimization().getLevel())));
756 llvm::orc::JITDylib &dylib(Unwrap(session.createJITDylib("<routine>")));
757 dylib.addGenerator(std::make_unique<ExternalSymbolGenerator>());
758
759 llvm::cantFail(compileLayer.add(dylib, llvm::orc::ThreadSafeModule(std::move(module), std::move(context))));
760
761 // Resolve the function addresses.
762 for(size_t i = 0; i < count; i++)
763 {
764 fatalCompileIssue = false; // May be set to true by session.lookup()
765
766 // This is where the actual compilation happens.
767 auto symbol = session.lookup({ &dylib }, functionNames[i]);
768
769 ASSERT_MSG(symbol, "Failed to lookup address of routine function %d: %s",
770 (int)i, llvm::toString(symbol.takeError()).c_str());
771
772 if(fatalCompileIssue)
773 {
774 addresses[i] = nullptr;
775 }
776 else // Successful compilation
777 {
778 addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(symbol->getAddress()));
779 }
780 }
781
782 #ifdef ENABLE_RR_EMIT_ASM_FILE
783 rr::AsmFile::fixupAsmFile(asmFilename, addresses);
784 #endif
785 }
786
~JITRoutine()787 ~JITRoutine()
788 {
789 #if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
790 if(auto err = session.endSession())
791 {
792 session.reportError(std::move(err));
793 }
794 #endif
795 }
796
getEntry(int index) const797 const void *getEntry(int index) const override
798 {
799 return addresses[index];
800 }
801
802 private:
803 std::string name;
804 llvm::orc::ExecutionSession session;
805 llvm::orc::RTDyldObjectLinkingLayer objectLayer;
806 std::vector<const void *> addresses;
807 };
808
809 } // anonymous namespace
810
811 namespace rr {
812
JITBuilder(const rr::Config & config)813 JITBuilder::JITBuilder(const rr::Config &config)
814 : config(config)
815 , context(new llvm::LLVMContext())
816 , module(new llvm::Module("", *context))
817 , builder(new llvm::IRBuilder<>(*context))
818 {
819 module->setTargetTriple(LLVM_DEFAULT_TARGET_TRIPLE);
820 module->setDataLayout(JITGlobals::get()->getDataLayout());
821 }
822
optimize(const rr::Config & cfg)823 void JITBuilder::optimize(const rr::Config &cfg)
824 {
825 #ifdef ENABLE_RR_DEBUG_INFO
826 if(debugInfo != nullptr)
827 {
828 return; // Don't optimize if we're generating debug info.
829 }
830 #endif // ENABLE_RR_DEBUG_INFO
831
832 llvm::legacy::PassManager passManager;
833
834 #if REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION
835 if(__has_feature(memory_sanitizer))
836 {
837 passManager.add(llvm::createMemorySanitizerLegacyPassPass());
838 }
839 #endif
840
841 for(auto pass : cfg.getOptimization().getPasses())
842 {
843 switch(pass)
844 {
845 case rr::Optimization::Pass::Disabled: break;
846 case rr::Optimization::Pass::CFGSimplification: passManager.add(llvm::createCFGSimplificationPass()); break;
847 case rr::Optimization::Pass::LICM: passManager.add(llvm::createLICMPass()); break;
848 case rr::Optimization::Pass::AggressiveDCE: passManager.add(llvm::createAggressiveDCEPass()); break;
849 case rr::Optimization::Pass::GVN: passManager.add(llvm::createGVNPass()); break;
850 case rr::Optimization::Pass::InstructionCombining: passManager.add(llvm::createInstructionCombiningPass()); break;
851 case rr::Optimization::Pass::Reassociate: passManager.add(llvm::createReassociatePass()); break;
852 case rr::Optimization::Pass::DeadStoreElimination: passManager.add(llvm::createDeadStoreEliminationPass()); break;
853 case rr::Optimization::Pass::SCCP: passManager.add(llvm::createSCCPPass()); break;
854 case rr::Optimization::Pass::ScalarReplAggregates: passManager.add(llvm::createSROAPass()); break;
855 case rr::Optimization::Pass::EarlyCSEPass: passManager.add(llvm::createEarlyCSEPass()); break;
856 default:
857 UNREACHABLE("pass: %d", int(pass));
858 }
859 }
860
861 passManager.run(*module);
862 }
863
acquireRoutine(const char * name,llvm::Function ** funcs,size_t count,const rr::Config & cfg)864 std::shared_ptr<rr::Routine> JITBuilder::acquireRoutine(const char *name, llvm::Function **funcs, size_t count, const rr::Config &cfg)
865 {
866 ASSERT(module);
867 return std::make_shared<JITRoutine>(std::move(module), std::move(context), name, funcs, count, cfg);
868 }
869
870 } // namespace rr
871