• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "LLVMReactor.hpp"
16 
17 #include "Debug.hpp"
18 #include "ExecutableMemory.hpp"
19 #include "LLVMAsm.hpp"
20 #include "PragmaInternals.hpp"
21 #include "Routine.hpp"
22 
23 // TODO(b/143539525): Eliminate when warning has been fixed.
24 #ifdef _MSC_VER
25 __pragma(warning(push))
26     __pragma(warning(disable : 4146))  // unary minus operator applied to unsigned type, result still unsigned
27 #endif
28 
29 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
30 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
31 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
32 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
33 #include "llvm/IR/DiagnosticInfo.h"
34 #include "llvm/IR/LegacyPassManager.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Host.h"
37 #include "llvm/Support/TargetSelect.h"
38 #include "llvm/Transforms/InstCombine/InstCombine.h"
39 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
40 #include "llvm/Transforms/Scalar.h"
41 #include "llvm/Transforms/Scalar/GVN.h"
42 
43 #ifdef _MSC_VER
44     __pragma(warning(pop))
45 #endif
46 
47 #if defined(_WIN64)
48         extern "C" void __chkstk();
49 #elif defined(_WIN32)
50 extern "C" void _chkstk();
51 #endif
52 
53 #ifdef __ARM_EABI__
54 extern "C" signed __aeabi_idivmod();
55 #endif
56 
57 #if __has_feature(memory_sanitizer)
58 
59 // TODO(b/155148722): Remove when we no longer unpoison any writes.
60 #	include "sanitizer/msan_interface.h"
61 
62 #	include <dlfcn.h>  // dlsym()
63 
64 // MemorySanitizer uses thread-local storage (TLS) data arrays for passing around
65 // the 'shadow' values of function arguments and return values. The LLVM JIT can't
66 // access TLS directly, but it calls __emutls_get_address() to obtain the address.
67 // Typically, it would be passed a pointer to an __emutls_control structure with a
68 // name starting with "__emutls_v." that represents the TLS. Both the address of
69 // __emutls_get_address and the __emutls_v. structures are provided to the JIT by
70 // the symbol resolver, which can be overridden.
71 // We take advantage of this by substituting __emutls_get_address() with our own
72 // implementation, namely rr::getTLSAddress(), and substituting the __emutls_v
73 // variables with rr::MSanTLS enums. getTLSAddress() can then provide the address
74 // of the real TLS variable corresponding to the enum, in statically compiled C++.
75 
76 // Forward declare the real TLS variables used by MemorySanitizer. These are
77 // defined in llvm-project/compiler-rt/lib/msan/msan.cpp.
78 extern __thread unsigned long long __msan_param_tls[];
79 extern __thread unsigned long long __msan_retval_tls[];
80 extern __thread unsigned long long __msan_va_arg_tls[];
81 extern __thread unsigned long long __msan_va_arg_overflow_size_tls;
82 
83 namespace rr {
84 
85 enum class MSanTLS
86 {
87 	param = 1,            // __msan_param_tls
88 	retval,               // __msan_retval_tls
89 	va_arg,               // __msan_va_arg_tls
90 	va_arg_overflow_size  // __msan_va_arg_overflow_size_tls
91 };
92 
getTLSAddress(void * control)93 static void *getTLSAddress(void *control)
94 {
95 	auto tlsIndex = static_cast<MSanTLS>(reinterpret_cast<uintptr_t>(control));
96 	switch(tlsIndex)
97 	{
98 
99 	case MSanTLS::param: return reinterpret_cast<void *>(&__msan_param_tls);
100 	case MSanTLS::retval: return reinterpret_cast<void *>(&__msan_retval_tls);
101 	case MSanTLS::va_arg: return reinterpret_cast<void *>(&__msan_va_arg_tls);
102 	case MSanTLS::va_arg_overflow_size: return reinterpret_cast<void *>(&__msan_va_arg_overflow_size_tls);
103 	default:
104 		UNSUPPORTED("MemorySanitizer used an unrecognized TLS variable: %d", tlsIndex);
105 		return nullptr;
106 	}
107 }
108 
109 }  // namespace rr
110 #endif
111 
112 namespace {
113 
114 // TODO(b/174587935): Eliminate command-line parsing.
parseCommandLineOptionsOnce(int argc,const char * const * argv)115 bool parseCommandLineOptionsOnce(int argc, const char *const *argv)
116 {
117 	// Use a static immediately invoked lambda to make this thread safe
118 	static auto initialized = [=]() {
119 		return llvm::cl::ParseCommandLineOptions(argc, argv);
120 	}();
121 
122 	return initialized;
123 }
124 
125 // JITGlobals is a singleton that holds all the immutable machine specific
126 // information for the host device.
127 class JITGlobals
128 {
129 public:
130 	static JITGlobals *get();
131 
132 	llvm::orc::JITTargetMachineBuilder getTargetMachineBuilder(rr::Optimization::Level optLevel) const;
133 	const llvm::DataLayout &getDataLayout() const;
134 	const llvm::Triple &getTargetTriple() const;
135 
136 private:
137 	JITGlobals(llvm::orc::JITTargetMachineBuilder &&jitTargetMachineBuilder, llvm::DataLayout &&dataLayout);
138 
139 	static llvm::CodeGenOpt::Level toLLVM(rr::Optimization::Level level);
140 
141 	const llvm::orc::JITTargetMachineBuilder jitTargetMachineBuilder;
142 	const llvm::DataLayout dataLayout;
143 };
144 
get()145 JITGlobals *JITGlobals::get()
146 {
147 	static JITGlobals instance = [] {
148 		const char *argv[] = {
149 			"Reactor",
150 #if defined(__i386__) || defined(__x86_64__)
151 			"-x86-asm-syntax=intel",  // Use Intel syntax rather than the default AT&T
152 #endif
153 #if LLVM_VERSION_MAJOR <= 12
154 			"-warn-stack-size=524288"  // Warn when a function uses more than 512 KiB of stack memory
155 #else
156 		// TODO(b/191193823): TODO(ndesaulniers): Update this after
157 		// go/compilers/fc018ebb608ee0c1239b405460e49f1835ab6175
158 #	if LLVM_VERSION_MAJOR < 9999
159 #		error Implement stack size checks using the "warn-stack-size" function attribute.
160 #	endif
161 #endif
162 		};
163 
164 		parseCommandLineOptionsOnce(sizeof(argv) / sizeof(argv[0]), argv);
165 
166 		llvm::InitializeNativeTarget();
167 		llvm::InitializeNativeTargetAsmPrinter();
168 		llvm::InitializeNativeTargetAsmParser();
169 
170 		// TODO(b/171236524): JITTargetMachineBuilder::detectHost() currently uses the target triple of the host,
171 		// rather than a valid triple for the current process. Once fixed, we can use that function instead.
172 		llvm::orc::JITTargetMachineBuilder jitTargetMachineBuilder(llvm::Triple(LLVM_DEFAULT_TARGET_TRIPLE));
173 
174 		// Retrieve host CPU name and sub-target features and add them to builder.
175 		// Relocation model, code model and codegen opt level are kept to default values.
176 		llvm::StringMap<bool> cpuFeatures;
177 		bool ok = llvm::sys::getHostCPUFeatures(cpuFeatures);
178 
179 #if defined(__i386__) || defined(__x86_64__) || \
180     (defined(__linux__) && (defined(__arm__) || defined(__aarch64__)))
181 		ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
182 #else
183 		(void)ok;  // getHostCPUFeatures always returns false on other platforms
184 #endif
185 
186 		for(auto &feature : cpuFeatures)
187 		{
188 			jitTargetMachineBuilder.getFeatures().AddFeature(feature.first(), feature.second);
189 		}
190 
191 #if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
192 		jitTargetMachineBuilder.setCPU(std::string(llvm::sys::getHostCPUName()));
193 #else
194 		jitTargetMachineBuilder.setCPU(llvm::sys::getHostCPUName());
195 #endif
196 
197 		// Reactor's MemorySanitizer support depends on intercepting __emutls_get_address calls.
198 		ASSERT(!__has_feature(memory_sanitizer) || (jitTargetMachineBuilder.getOptions().ExplicitEmulatedTLS &&
199 		                                            jitTargetMachineBuilder.getOptions().EmulatedTLS));
200 
201 		auto dataLayout = jitTargetMachineBuilder.getDefaultDataLayoutForTarget();
202 		ASSERT_MSG(dataLayout, "JITTargetMachineBuilder::getDefaultDataLayoutForTarget() failed");
203 
204 		return JITGlobals(std::move(jitTargetMachineBuilder), std::move(dataLayout.get()));
205 	}();
206 
207 	return &instance;
208 }
209 
getTargetMachineBuilder(rr::Optimization::Level optLevel) const210 llvm::orc::JITTargetMachineBuilder JITGlobals::getTargetMachineBuilder(rr::Optimization::Level optLevel) const
211 {
212 	llvm::orc::JITTargetMachineBuilder out = jitTargetMachineBuilder;
213 	out.setCodeGenOptLevel(toLLVM(optLevel));
214 
215 	return out;
216 }
217 
getDataLayout() const218 const llvm::DataLayout &JITGlobals::getDataLayout() const
219 {
220 	return dataLayout;
221 }
222 
getTargetTriple() const223 const llvm::Triple &JITGlobals::getTargetTriple() const
224 {
225 	return jitTargetMachineBuilder.getTargetTriple();
226 }
227 
JITGlobals(llvm::orc::JITTargetMachineBuilder && jitTargetMachineBuilder,llvm::DataLayout && dataLayout)228 JITGlobals::JITGlobals(llvm::orc::JITTargetMachineBuilder &&jitTargetMachineBuilder, llvm::DataLayout &&dataLayout)
229     : jitTargetMachineBuilder(jitTargetMachineBuilder)
230     , dataLayout(dataLayout)
231 {
232 }
233 
toLLVM(rr::Optimization::Level level)234 llvm::CodeGenOpt::Level JITGlobals::toLLVM(rr::Optimization::Level level)
235 {
236 	// TODO(b/173257647): MemorySanitizer instrumentation produces IR which takes
237 	// a lot longer to process by the machine code optimization passes. Disabling
238 	// them has a negligible effect on code quality but compiles much faster.
239 	if(__has_feature(memory_sanitizer))
240 	{
241 		return llvm::CodeGenOpt::None;
242 	}
243 
244 	switch(level)
245 	{
246 	case rr::Optimization::Level::None: return llvm::CodeGenOpt::None;
247 	case rr::Optimization::Level::Less: return llvm::CodeGenOpt::Less;
248 	case rr::Optimization::Level::Default: return llvm::CodeGenOpt::Default;
249 	case rr::Optimization::Level::Aggressive: return llvm::CodeGenOpt::Aggressive;
250 	default: UNREACHABLE("Unknown Optimization Level %d", int(level));
251 	}
252 
253 	return llvm::CodeGenOpt::Default;
254 }
255 
256 class MemoryMapper final : public llvm::SectionMemoryManager::MemoryMapper
257 {
258 public:
MemoryMapper()259 	MemoryMapper() {}
~MemoryMapper()260 	~MemoryMapper() final {}
261 
allocateMappedMemory(llvm::SectionMemoryManager::AllocationPurpose purpose,size_t numBytes,const llvm::sys::MemoryBlock * const nearBlock,unsigned flags,std::error_code & errorCode)262 	llvm::sys::MemoryBlock allocateMappedMemory(
263 	    llvm::SectionMemoryManager::AllocationPurpose purpose,
264 	    size_t numBytes, const llvm::sys::MemoryBlock *const nearBlock,
265 	    unsigned flags, std::error_code &errorCode) final
266 	{
267 		errorCode = std::error_code();
268 
269 		// Round up numBytes to page size.
270 		size_t pageSize = rr::memoryPageSize();
271 		numBytes = (numBytes + pageSize - 1) & ~(pageSize - 1);
272 
273 		bool need_exec =
274 		    purpose == llvm::SectionMemoryManager::AllocationPurpose::Code;
275 		void *addr = rr::allocateMemoryPages(
276 		    numBytes, flagsToPermissions(flags), need_exec);
277 		if(!addr)
278 			return llvm::sys::MemoryBlock();
279 		return llvm::sys::MemoryBlock(addr, numBytes);
280 	}
281 
protectMappedMemory(const llvm::sys::MemoryBlock & block,unsigned flags)282 	std::error_code protectMappedMemory(const llvm::sys::MemoryBlock &block,
283 	                                    unsigned flags)
284 	{
285 		// Round down base address to align with a page boundary. This matches
286 		// DefaultMMapper behavior.
287 		void *addr = block.base();
288 		size_t size = block.allocatedSize();
289 		size_t pageSize = rr::memoryPageSize();
290 		addr = reinterpret_cast<void *>(
291 		    reinterpret_cast<uintptr_t>(addr) & ~(pageSize - 1));
292 		size += reinterpret_cast<uintptr_t>(block.base()) -
293 		        reinterpret_cast<uintptr_t>(addr);
294 
295 		rr::protectMemoryPages(addr, size, flagsToPermissions(flags));
296 		return std::error_code();
297 	}
298 
releaseMappedMemory(llvm::sys::MemoryBlock & block)299 	std::error_code releaseMappedMemory(llvm::sys::MemoryBlock &block)
300 	{
301 		size_t size = block.allocatedSize();
302 
303 		rr::deallocateMemoryPages(block.base(), size);
304 		return std::error_code();
305 	}
306 
307 private:
flagsToPermissions(unsigned flags)308 	int flagsToPermissions(unsigned flags)
309 	{
310 		int result = 0;
311 		if(flags & llvm::sys::Memory::MF_READ)
312 		{
313 			result |= rr::PERMISSION_READ;
314 		}
315 		if(flags & llvm::sys::Memory::MF_WRITE)
316 		{
317 			result |= rr::PERMISSION_WRITE;
318 		}
319 		if(flags & llvm::sys::Memory::MF_EXEC)
320 		{
321 			result |= rr::PERMISSION_EXECUTE;
322 		}
323 		return result;
324 	}
325 };
326 
327 template<typename T>
alignUp(T val,T alignment)328 T alignUp(T val, T alignment)
329 {
330 	return alignment * ((val + alignment - 1) / alignment);
331 }
332 
alignedAlloc(size_t size,size_t alignment)333 void *alignedAlloc(size_t size, size_t alignment)
334 {
335 	ASSERT(alignment < 256);
336 	auto allocation = new uint8_t[size + sizeof(uint8_t) + alignment];
337 	auto aligned = allocation;
338 	aligned += sizeof(uint8_t);                                                                       // Make space for the base-address offset.
339 	aligned = reinterpret_cast<uint8_t *>(alignUp(reinterpret_cast<uintptr_t>(aligned), alignment));  // align
340 	auto offset = static_cast<uint8_t>(aligned - allocation);
341 	aligned[-1] = offset;
342 	return aligned;
343 }
344 
alignedFree(void * ptr)345 void alignedFree(void *ptr)
346 {
347 	auto aligned = reinterpret_cast<uint8_t *>(ptr);
348 	auto offset = aligned[-1];
349 	auto allocation = aligned - offset;
350 	delete[] allocation;
351 }
352 
353 template<typename T>
atomicLoad(void * ptr,void * ret,llvm::AtomicOrdering ordering)354 static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
355 {
356 	*reinterpret_cast<T *>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), rr::atomicOrdering(ordering));
357 }
358 
359 template<typename T>
atomicStore(void * ptr,void * val,llvm::AtomicOrdering ordering)360 static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
361 {
362 	std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), *reinterpret_cast<T *>(val), rr::atomicOrdering(ordering));
363 }
364 
365 #ifdef __ANDROID__
366 template<typename F>
sync_fetch_and_op(uint32_t volatile * ptr,uint32_t val,F f)367 static uint32_t sync_fetch_and_op(uint32_t volatile *ptr, uint32_t val, F f)
368 {
369 	// Build an arbitrary op out of looped CAS
370 	for(;;)
371 	{
372 		uint32_t expected = *ptr;
373 		uint32_t desired = f(expected, val);
374 
375 		if(expected == __sync_val_compare_and_swap_4(ptr, expected, desired))
376 		{
377 			return expected;
378 		}
379 	}
380 }
381 #endif
382 
383 #if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
384 class ExternalSymbolGenerator : public llvm::orc::DefinitionGenerator
385 #else
386 class ExternalSymbolGenerator : public llvm::orc::JITDylib::DefinitionGenerator
387 #endif
388 {
389 	struct Atomic
390 	{
load__anon99ba1f350111::ExternalSymbolGenerator::Atomic391 		static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
392 		{
393 			switch(size)
394 			{
395 			case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
396 			case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
397 			case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
398 			case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
399 			default:
400 				UNIMPLEMENTED_NO_BUG("Atomic::load(size: %d)", int(size));
401 			}
402 		}
store__anon99ba1f350111::ExternalSymbolGenerator::Atomic403 		static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
404 		{
405 			switch(size)
406 			{
407 			case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
408 			case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
409 			case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
410 			case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
411 			default:
412 				UNIMPLEMENTED_NO_BUG("Atomic::store(size: %d)", int(size));
413 			}
414 		}
415 	};
416 
nop()417 	static void nop() {}
neverCalled()418 	static void neverCalled() { UNREACHABLE("Should never be called"); }
419 
coroutine_alloc_frame(size_t size)420 	static void *coroutine_alloc_frame(size_t size) { return alignedAlloc(size, 16); }
coroutine_free_frame(void * ptr)421 	static void coroutine_free_frame(void *ptr) { alignedFree(ptr); }
422 
423 #ifdef __ANDROID__
424 	// forwarders since we can't take address of builtins
sync_synchronize()425 	static void sync_synchronize() { __sync_synchronize(); }
sync_fetch_and_add_4(uint32_t * ptr,uint32_t val)426 	static uint32_t sync_fetch_and_add_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_add_4(ptr, val); }
sync_fetch_and_and_4(uint32_t * ptr,uint32_t val)427 	static uint32_t sync_fetch_and_and_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_and_4(ptr, val); }
sync_fetch_and_or_4(uint32_t * ptr,uint32_t val)428 	static uint32_t sync_fetch_and_or_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_or_4(ptr, val); }
sync_fetch_and_xor_4(uint32_t * ptr,uint32_t val)429 	static uint32_t sync_fetch_and_xor_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_xor_4(ptr, val); }
sync_fetch_and_sub_4(uint32_t * ptr,uint32_t val)430 	static uint32_t sync_fetch_and_sub_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_sub_4(ptr, val); }
sync_lock_test_and_set_4(uint32_t * ptr,uint32_t val)431 	static uint32_t sync_lock_test_and_set_4(uint32_t *ptr, uint32_t val) { return __sync_lock_test_and_set_4(ptr, val); }
sync_val_compare_and_swap_4(uint32_t * ptr,uint32_t expected,uint32_t desired)432 	static uint32_t sync_val_compare_and_swap_4(uint32_t *ptr, uint32_t expected, uint32_t desired) { return __sync_val_compare_and_swap_4(ptr, expected, desired); }
433 
sync_fetch_and_max_4(uint32_t * ptr,uint32_t val)434 	static uint32_t sync_fetch_and_max_4(uint32_t *ptr, uint32_t val)
435 	{
436 		return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::max(a, b); });
437 	}
sync_fetch_and_min_4(uint32_t * ptr,uint32_t val)438 	static uint32_t sync_fetch_and_min_4(uint32_t *ptr, uint32_t val)
439 	{
440 		return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::min(a, b); });
441 	}
sync_fetch_and_umax_4(uint32_t * ptr,uint32_t val)442 	static uint32_t sync_fetch_and_umax_4(uint32_t *ptr, uint32_t val)
443 	{
444 		return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::max(a, b); });
445 	}
sync_fetch_and_umin_4(uint32_t * ptr,uint32_t val)446 	static uint32_t sync_fetch_and_umin_4(uint32_t *ptr, uint32_t val)
447 	{
448 		return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::min(a, b); });
449 	}
450 #endif
451 
452 	class Resolver
453 	{
454 	public:
455 		using FunctionMap = llvm::StringMap<void *>;
456 
457 		FunctionMap functions;
458 
Resolver()459 		Resolver()
460 		{
461 #ifdef ENABLE_RR_PRINT
462 			functions.try_emplace("rr::DebugPrintf", reinterpret_cast<void *>(rr::DebugPrintf));
463 #endif
464 			functions.try_emplace("nop", reinterpret_cast<void *>(nop));
465 			functions.try_emplace("floorf", reinterpret_cast<void *>(floorf));
466 			functions.try_emplace("nearbyintf", reinterpret_cast<void *>(nearbyintf));
467 			functions.try_emplace("truncf", reinterpret_cast<void *>(truncf));
468 			functions.try_emplace("printf", reinterpret_cast<void *>(printf));
469 			functions.try_emplace("puts", reinterpret_cast<void *>(puts));
470 			functions.try_emplace("fmodf", reinterpret_cast<void *>(fmodf));
471 
472 			functions.try_emplace("sinf", reinterpret_cast<void *>(sinf));
473 			functions.try_emplace("cosf", reinterpret_cast<void *>(cosf));
474 			functions.try_emplace("asinf", reinterpret_cast<void *>(asinf));
475 			functions.try_emplace("acosf", reinterpret_cast<void *>(acosf));
476 			functions.try_emplace("atanf", reinterpret_cast<void *>(atanf));
477 			functions.try_emplace("sinhf", reinterpret_cast<void *>(sinhf));
478 			functions.try_emplace("coshf", reinterpret_cast<void *>(coshf));
479 			functions.try_emplace("tanhf", reinterpret_cast<void *>(tanhf));
480 			functions.try_emplace("asinhf", reinterpret_cast<void *>(asinhf));
481 			functions.try_emplace("acoshf", reinterpret_cast<void *>(acoshf));
482 			functions.try_emplace("atanhf", reinterpret_cast<void *>(atanhf));
483 			functions.try_emplace("atan2f", reinterpret_cast<void *>(atan2f));
484 			functions.try_emplace("powf", reinterpret_cast<void *>(powf));
485 			functions.try_emplace("expf", reinterpret_cast<void *>(expf));
486 			functions.try_emplace("logf", reinterpret_cast<void *>(logf));
487 			functions.try_emplace("exp2f", reinterpret_cast<void *>(exp2f));
488 			functions.try_emplace("log2f", reinterpret_cast<void *>(log2f));
489 
490 			functions.try_emplace("fmod", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(fmod)));
491 			functions.try_emplace("sin", reinterpret_cast<void *>(static_cast<double (*)(double)>(sin)));
492 			functions.try_emplace("cos", reinterpret_cast<void *>(static_cast<double (*)(double)>(cos)));
493 			functions.try_emplace("asin", reinterpret_cast<void *>(static_cast<double (*)(double)>(asin)));
494 			functions.try_emplace("acos", reinterpret_cast<void *>(static_cast<double (*)(double)>(acos)));
495 			functions.try_emplace("atan", reinterpret_cast<void *>(static_cast<double (*)(double)>(atan)));
496 			functions.try_emplace("sinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(sinh)));
497 			functions.try_emplace("cosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(cosh)));
498 			functions.try_emplace("tanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(tanh)));
499 			functions.try_emplace("asinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(asinh)));
500 			functions.try_emplace("acosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(acosh)));
501 			functions.try_emplace("atanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(atanh)));
502 			functions.try_emplace("atan2", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(atan2)));
503 			functions.try_emplace("pow", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(pow)));
504 			functions.try_emplace("exp", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp)));
505 			functions.try_emplace("log", reinterpret_cast<void *>(static_cast<double (*)(double)>(log)));
506 			functions.try_emplace("exp2", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp2)));
507 			functions.try_emplace("log2", reinterpret_cast<void *>(static_cast<double (*)(double)>(log2)));
508 
509 			functions.try_emplace("atomic_load", reinterpret_cast<void *>(Atomic::load));
510 			functions.try_emplace("atomic_store", reinterpret_cast<void *>(Atomic::store));
511 
512 			// FIXME(b/119409619): use an allocator here so we can control all memory allocations
513 			functions.try_emplace("coroutine_alloc_frame", reinterpret_cast<void *>(coroutine_alloc_frame));
514 			functions.try_emplace("coroutine_free_frame", reinterpret_cast<void *>(coroutine_free_frame));
515 
516 			functions.try_emplace("memset", reinterpret_cast<void *>(memset));
517 
518 #ifdef __APPLE__
519 			functions.try_emplace("sincosf_stret", reinterpret_cast<void *>(__sincosf_stret));
520 #elif defined(__linux__)
521 			functions.try_emplace("sincosf", reinterpret_cast<void *>(sincosf));
522 #elif defined(_WIN64)
523 			functions.try_emplace("chkstk", reinterpret_cast<void *>(__chkstk));
524 #elif defined(_WIN32)
525 			functions.try_emplace("chkstk", reinterpret_cast<void *>(_chkstk));
526 #endif
527 
528 #ifdef __ARM_EABI__
529 			functions.try_emplace("aeabi_idivmod", reinterpret_cast<void *>(__aeabi_idivmod));
530 #endif
531 #ifdef __ANDROID__
532 			functions.try_emplace("aeabi_unwind_cpp_pr0", reinterpret_cast<void *>(neverCalled));
533 			functions.try_emplace("sync_synchronize", reinterpret_cast<void *>(sync_synchronize));
534 			functions.try_emplace("sync_fetch_and_add_4", reinterpret_cast<void *>(sync_fetch_and_add_4));
535 			functions.try_emplace("sync_fetch_and_and_4", reinterpret_cast<void *>(sync_fetch_and_and_4));
536 			functions.try_emplace("sync_fetch_and_or_4", reinterpret_cast<void *>(sync_fetch_and_or_4));
537 			functions.try_emplace("sync_fetch_and_xor_4", reinterpret_cast<void *>(sync_fetch_and_xor_4));
538 			functions.try_emplace("sync_fetch_and_sub_4", reinterpret_cast<void *>(sync_fetch_and_sub_4));
539 			functions.try_emplace("sync_lock_test_and_set_4", reinterpret_cast<void *>(sync_lock_test_and_set_4));
540 			functions.try_emplace("sync_val_compare_and_swap_4", reinterpret_cast<void *>(sync_val_compare_and_swap_4));
541 			functions.try_emplace("sync_fetch_and_max_4", reinterpret_cast<void *>(sync_fetch_and_max_4));
542 			functions.try_emplace("sync_fetch_and_min_4", reinterpret_cast<void *>(sync_fetch_and_min_4));
543 			functions.try_emplace("sync_fetch_and_umax_4", reinterpret_cast<void *>(sync_fetch_and_umax_4));
544 			functions.try_emplace("sync_fetch_and_umin_4", reinterpret_cast<void *>(sync_fetch_and_umin_4));
545 
546 #	if defined(__i386__)
547 			// TODO(b/172974501): Workaround for an x86-32 issue where an R_386_PC32 relocation is used
548 			// When calling a C function from Reactor code, who's address is not associated with any symbol
549 			// (since it's an absolute constant), but it still invokes the symbol resolver for "".
550 			functions.try_emplace("", nullptr);
551 #	endif
552 #endif
553 #if __has_feature(memory_sanitizer)
554 			functions.try_emplace("emutls_get_address", reinterpret_cast<void *>(rr::getTLSAddress));
555 			functions.try_emplace("emutls_v.__msan_retval_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::retval)));
556 			functions.try_emplace("emutls_v.__msan_param_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::param)));
557 			functions.try_emplace("emutls_v.__msan_va_arg_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::va_arg)));
558 			functions.try_emplace("emutls_v.__msan_va_arg_overflow_size_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::va_arg_overflow_size)));
559 
560 			// TODO(b/155148722): Remove when we no longer unpoison any writes.
561 			functions.try_emplace("msan_unpoison", reinterpret_cast<void *>(__msan_unpoison));
562 			functions.try_emplace("msan_unpoison_param", reinterpret_cast<void *>(__msan_unpoison_param));
563 #endif
564 		}
565 	};
566 
tryToGenerate(llvm::orc::LookupState & state,llvm::orc::LookupKind kind,llvm::orc::JITDylib & dylib,llvm::orc::JITDylibLookupFlags flags,const llvm::orc::SymbolLookupSet & set)567 	llvm::Error tryToGenerate(
568 #if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
569 	    llvm::orc::LookupState &state,
570 #endif
571 	    llvm::orc::LookupKind kind,
572 	    llvm::orc::JITDylib &dylib,
573 	    llvm::orc::JITDylibLookupFlags flags,
574 	    const llvm::orc::SymbolLookupSet &set) override
575 	{
576 		static Resolver resolver;
577 
578 		llvm::orc::SymbolMap symbols;
579 
580 #if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
581 		std::string missing;
582 #endif  // !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
583 
584 		for(auto symbol : set)
585 		{
586 			auto name = symbol.first;
587 
588 			// Trim off any underscores from the start of the symbol. LLVM likes
589 			// to append these on macOS.
590 			auto trimmed = (*name).drop_while([](char c) { return c == '_'; });
591 
592 			auto it = resolver.functions.find(trimmed.str());
593 			if(it != resolver.functions.end())
594 			{
595 				symbols[name] = llvm::JITEvaluatedSymbol(
596 				    static_cast<llvm::JITTargetAddress>(reinterpret_cast<uintptr_t>(it->second)),
597 				    llvm::JITSymbolFlags::Exported);
598 
599 				continue;
600 			}
601 
602 #if __has_feature(memory_sanitizer)
603 			// MemorySanitizer uses a dynamically linked runtime. Instrumented routines reference
604 			// some symbols from this library. Look them up dynamically in the default namespace.
605 			// Note this approach should not be used for other symbols, since they might not be
606 			// visible (e.g. due to static linking), we may wish to provide an alternate
607 			// implementation, and/or it would be a security vulnerability.
608 
609 			void *address = dlsym(RTLD_DEFAULT, (*symbol.first).data());
610 
611 			if(address)
612 			{
613 				symbols[name] = llvm::JITEvaluatedSymbol(
614 				    static_cast<llvm::JITTargetAddress>(reinterpret_cast<uintptr_t>(address)),
615 				    llvm::JITSymbolFlags::Exported);
616 
617 				continue;
618 			}
619 #endif
620 
621 #if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
622 			missing += (missing.empty() ? "'" : ", '") + (*name).str() + "'";
623 #endif
624 		}
625 
626 #if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
627 		// Missing functions will likely make the module fail in non-obvious ways.
628 		if(!missing.empty())
629 		{
630 			WARN("Missing external functions: %s", missing.c_str());
631 		}
632 #endif
633 
634 		if(symbols.empty())
635 		{
636 			return llvm::Error::success();
637 		}
638 
639 		return dylib.define(llvm::orc::absoluteSymbols(std::move(symbols)));
640 	}
641 };
642 
643 // As we must support different LLVM versions, add a generic Unwrap for functions that return Expected<T> or the actual T.
644 // TODO(b/165000222): Remove after LLVM 11 upgrade
645 template<typename T>
Unwrap(llvm::Expected<T> && v)646 auto &Unwrap(llvm::Expected<T> &&v)
647 {
648 	return v.get();
649 }
650 template<typename T>
Unwrap(T && v)651 auto &Unwrap(T &&v)
652 {
653 	return v;
654 }
655 
656 // Sets *fatal to true if a diagnostic is received which makes a routine invalid or unusable.
657 struct FatalDiagnosticsHandler : public llvm::DiagnosticHandler
658 {
FatalDiagnosticsHandler__anon99ba1f350111::FatalDiagnosticsHandler659 	FatalDiagnosticsHandler(bool *fatal)
660 	    : fatal(fatal)
661 	{}
662 
handleDiagnostics__anon99ba1f350111::FatalDiagnosticsHandler663 	bool handleDiagnostics(const llvm::DiagnosticInfo &info) override
664 	{
665 		switch(info.getSeverity())
666 		{
667 		case llvm::DS_Error:
668 			ASSERT_MSG(false, "LLVM JIT compilation failure");
669 			*fatal = true;
670 			break;
671 		case llvm::DS_Warning:
672 			if(info.getKind() == llvm::DK_StackSize)
673 			{
674 				// Stack size limit exceeded
675 				*fatal = true;
676 			}
677 			break;
678 		case llvm::DS_Remark:
679 			break;
680 		case llvm::DS_Note:
681 			break;
682 		}
683 
684 		return true;  // Diagnostic handled, don't let LLVM print it.
685 	}
686 
687 	bool *fatal;
688 };
689 
690 // JITRoutine is a rr::Routine that holds a LLVM JIT session, compiler and
691 // object layer as each routine may require different target machine
692 // settings and no Reactor routine directly links against another.
693 class JITRoutine : public rr::Routine
694 {
695 public:
JITRoutine(std::unique_ptr<llvm::Module> module,std::unique_ptr<llvm::LLVMContext> context,const char * name,llvm::Function ** funcs,size_t count,const rr::Config & config)696 	JITRoutine(
697 	    std::unique_ptr<llvm::Module> module,
698 	    std::unique_ptr<llvm::LLVMContext> context,
699 	    const char *name,
700 	    llvm::Function **funcs,
701 	    size_t count,
702 	    const rr::Config &config)
703 	    : name(name)
704 #if LLVM_VERSION_MAJOR >= 13
705 	    , session(std::move(*llvm::orc::SelfExecutorProcessControl::Create()))
706 #endif
707 	    , objectLayer(session, []() {
708 		    static MemoryMapper memoryMapper;
709 		    return std::make_unique<llvm::SectionMemoryManager>(&memoryMapper);
710 	    })
711 	    , addresses(count)
712 	{
713 		bool fatalCompileIssue = false;
714 		context->setDiagnosticHandler(std::make_unique<FatalDiagnosticsHandler>(&fatalCompileIssue), true);
715 
716 #ifdef ENABLE_RR_DEBUG_INFO
717 		// TODO(b/165000222): Update this on next LLVM roll.
718 		// https://github.com/llvm/llvm-project/commit/98f2bb4461072347dcca7d2b1b9571b3a6525801
719 		// introduces RTDyldObjectLinkingLayer::registerJITEventListener().
720 		// The current API does not appear to have any way to bind the
721 		// rr::DebugInfo::NotifyFreeingObject event.
722 		objectLayer.setNotifyLoaded([](llvm::orc::VModuleKey,
723 		                               const llvm::object::ObjectFile &obj,
__anon99ba1f350a02(llvm::orc::VModuleKey, const llvm::object::ObjectFile &obj, const llvm::RuntimeDyld::LoadedObjectInfo &l) 724 		                               const llvm::RuntimeDyld::LoadedObjectInfo &l) {
725 			static std::atomic<uint64_t> unique_key{ 0 };
726 			rr::DebugInfo::NotifyObjectEmitted(unique_key++, obj, l);
727 		});
728 #endif  // ENABLE_RR_DEBUG_INFO
729 
730 		if(JITGlobals::get()->getTargetTriple().isOSBinFormatCOFF())
731 		{
732 			// Hack to support symbol visibility in COFF.
733 			// Matches hack in llvm::orc::LLJIT::createObjectLinkingLayer().
734 			// See documentation on these functions for more detail.
735 			objectLayer.setOverrideObjectFlagsWithResponsibilityFlags(true);
736 			objectLayer.setAutoClaimResponsibilityForObjectSymbols(true);
737 		}
738 
739 		llvm::SmallVector<llvm::orc::SymbolStringPtr, 8> functionNames(count);
740 		llvm::orc::MangleAndInterner mangle(session, JITGlobals::get()->getDataLayout());
741 
742 		for(size_t i = 0; i < count; i++)
743 		{
744 			auto func = funcs[i];
745 
746 			if(!func->hasName())
747 			{
748 				func->setName("f" + llvm::Twine(i).str());
749 			}
750 
751 			functionNames[i] = mangle(func->getName());
752 		}
753 
754 #ifdef ENABLE_RR_EMIT_ASM_FILE
755 		const auto asmFilename = rr::AsmFile::generateFilename(name);
756 		rr::AsmFile::emitAsmFile(asmFilename, JITGlobals::get()->getTargetMachineBuilder(config.getOptimization().getLevel()), *module);
757 #endif
758 
759 		// Once the module is passed to the compileLayer, the llvm::Functions are freed.
760 		// Make sure funcs are not referenced after this point.
761 		funcs = nullptr;
762 
763 		llvm::orc::IRCompileLayer compileLayer(session, objectLayer, std::make_unique<llvm::orc::ConcurrentIRCompiler>(JITGlobals::get()->getTargetMachineBuilder(config.getOptimization().getLevel())));
764 		llvm::orc::JITDylib &dylib(Unwrap(session.createJITDylib("<routine>")));
765 		dylib.addGenerator(std::make_unique<ExternalSymbolGenerator>());
766 
767 		llvm::cantFail(compileLayer.add(dylib, llvm::orc::ThreadSafeModule(std::move(module), std::move(context))));
768 
769 		// Resolve the function addresses.
770 		for(size_t i = 0; i < count; i++)
771 		{
772 			fatalCompileIssue = false;  // May be set to true by session.lookup()
773 
774 			// This is where the actual compilation happens.
775 			auto symbol = session.lookup({ &dylib }, functionNames[i]);
776 
777 			ASSERT_MSG(symbol, "Failed to lookup address of routine function %d: %s",
778 			           (int)i, llvm::toString(symbol.takeError()).c_str());
779 
780 			if(fatalCompileIssue)
781 			{
782 				addresses[i] = nullptr;
783 			}
784 			else  // Successful compilation
785 			{
786 				addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(symbol->getAddress()));
787 			}
788 		}
789 
790 #ifdef ENABLE_RR_EMIT_ASM_FILE
791 		rr::AsmFile::fixupAsmFile(asmFilename, addresses);
792 #endif
793 	}
794 
~JITRoutine()795 	~JITRoutine()
796 	{
797 #if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
798 		if(auto err = session.endSession())
799 		{
800 			session.reportError(std::move(err));
801 		}
802 #endif
803 	}
804 
getEntry(int index) const805 	const void *getEntry(int index) const override
806 	{
807 		return addresses[index];
808 	}
809 
810 private:
811 	std::string name;
812 	llvm::orc::ExecutionSession session;
813 	llvm::orc::RTDyldObjectLinkingLayer objectLayer;
814 	std::vector<const void *> addresses;
815 };
816 
817 }  // anonymous namespace
818 
819 namespace rr {
820 
JITBuilder(const rr::Config & config)821 JITBuilder::JITBuilder(const rr::Config &config)
822     : config(config)
823     , context(new llvm::LLVMContext())
824     , module(new llvm::Module("", *context))
825     , builder(new llvm::IRBuilder<>(*context))
826 {
827 	module->setTargetTriple(LLVM_DEFAULT_TARGET_TRIPLE);
828 	module->setDataLayout(JITGlobals::get()->getDataLayout());
829 
830 	if(REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION ||
831 	   getPragmaState(MemorySanitizerInstrumentation))
832 	{
833 		msanInstrumentation = true;
834 	}
835 }
836 
optimize(const rr::Config & cfg)837 void JITBuilder::optimize(const rr::Config &cfg)
838 {
839 #ifdef ENABLE_RR_DEBUG_INFO
840 	if(debugInfo != nullptr)
841 	{
842 		return;  // Don't optimize if we're generating debug info.
843 	}
844 #endif  // ENABLE_RR_DEBUG_INFO
845 
846 	llvm::legacy::PassManager passManager;
847 
848 	if(__has_feature(memory_sanitizer) && msanInstrumentation)
849 	{
850 		passManager.add(llvm::createMemorySanitizerLegacyPassPass());
851 	}
852 
853 	for(auto pass : cfg.getOptimization().getPasses())
854 	{
855 		switch(pass)
856 		{
857 		case rr::Optimization::Pass::Disabled: break;
858 		case rr::Optimization::Pass::CFGSimplification: passManager.add(llvm::createCFGSimplificationPass()); break;
859 		case rr::Optimization::Pass::LICM: passManager.add(llvm::createLICMPass()); break;
860 		case rr::Optimization::Pass::AggressiveDCE: passManager.add(llvm::createAggressiveDCEPass()); break;
861 		case rr::Optimization::Pass::GVN: passManager.add(llvm::createGVNPass()); break;
862 		case rr::Optimization::Pass::InstructionCombining: passManager.add(llvm::createInstructionCombiningPass()); break;
863 		case rr::Optimization::Pass::Reassociate: passManager.add(llvm::createReassociatePass()); break;
864 		case rr::Optimization::Pass::DeadStoreElimination: passManager.add(llvm::createDeadStoreEliminationPass()); break;
865 		case rr::Optimization::Pass::SCCP: passManager.add(llvm::createSCCPPass()); break;
866 		case rr::Optimization::Pass::ScalarReplAggregates: passManager.add(llvm::createSROAPass()); break;
867 		case rr::Optimization::Pass::EarlyCSEPass: passManager.add(llvm::createEarlyCSEPass()); break;
868 		default:
869 			UNREACHABLE("pass: %d", int(pass));
870 		}
871 	}
872 
873 	passManager.run(*module);
874 }
875 
acquireRoutine(const char * name,llvm::Function ** funcs,size_t count,const rr::Config & cfg)876 std::shared_ptr<rr::Routine> JITBuilder::acquireRoutine(const char *name, llvm::Function **funcs, size_t count, const rr::Config &cfg)
877 {
878 	ASSERT(module);
879 	return std::make_shared<JITRoutine>(std::move(module), std::move(context), name, funcs, count, cfg);
880 }
881 
882 }  // namespace rr
883