• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "LLVMReactor.hpp"
16 
17 #include "Debug.hpp"
18 #include "ExecutableMemory.hpp"
19 #include "Routine.hpp"
20 
21 #if defined(__clang__)
22 // LLVM has occurrences of the extra-semi warning in its headers, which will be
23 // treated as an error in SwiftShader targets.
24 #	pragma clang diagnostic push
25 #	pragma clang diagnostic ignored "-Wextra-semi"
26 #endif  // defined(__clang__)
27 
28 // TODO(b/143539525): Eliminate when warning has been fixed.
29 #ifdef _MSC_VER
30 __pragma(warning(push))
31     __pragma(warning(disable : 4146))  // unary minus operator applied to unsigned type, result still unsigned
32 #endif
33 
34 #include "llvm/Analysis/LoopPass.h"
35 #include "llvm/ExecutionEngine/ExecutionEngine.h"
36 #include "llvm/ExecutionEngine/JITSymbol.h"
37 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
38 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
39 #include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
40 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
41 #include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
42 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
43 #include "llvm/IR/Constants.h"
44 #include "llvm/IR/DataLayout.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/GlobalVariable.h"
47 #include "llvm/IR/LegacyPassManager.h"
48 #include "llvm/IR/Mangler.h"
49 #include "llvm/IR/Module.h"
50 #include "llvm/IR/Verifier.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Error.h"
53 #include "llvm/Support/TargetSelect.h"
54 #include "llvm/Target/TargetOptions.h"
55 #include "llvm/Transforms/Coroutines.h"
56 #include "llvm/Transforms/IPO.h"
57 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
58 #include "llvm/Transforms/InstCombine/InstCombine.h"
59 #include "llvm/Transforms/Scalar.h"
60 #include "llvm/Transforms/Scalar/GVN.h"
61 
62 #if defined(__clang__)
63 #	pragma clang diagnostic pop
64 #endif  // defined(__clang__)
65 
66 #ifdef _MSC_VER
67     __pragma(warning(pop))
68 #endif
69 
70 #include <unordered_map>
71 
72 #if defined(_WIN64)
73         extern "C" void __chkstk();
74 #elif defined(_WIN32)
75 extern "C" void _chkstk();
76 #endif
77 
78 #if __has_feature(memory_sanitizer)
79 #	include <sanitizer/msan_interface.h>
80 #endif
81 
82 #ifdef __ARM_EABI__
83 extern "C" signed __aeabi_idivmod();
84 #endif
85 
86 namespace {
87 
88 // Cache provides a simple, thread-safe key-value store.
89 template<typename KEY, typename VALUE>
90 class Cache
91 {
92 public:
93 	Cache() = default;
94 	Cache(const Cache &other);
95 	VALUE getOrCreate(KEY key, std::function<VALUE()> create);
96 
97 private:
98 	mutable std::mutex mutex;  // mutable required for copy constructor.
99 	std::unordered_map<KEY, VALUE> map;
100 };
101 
102 template<typename KEY, typename VALUE>
Cache(const Cache & other)103 Cache<KEY, VALUE>::Cache(const Cache &other)
104 {
105 	std::unique_lock<std::mutex> lock(other.mutex);
106 	map = other.map;
107 }
108 
109 template<typename KEY, typename VALUE>
getOrCreate(KEY key,std::function<VALUE ()> create)110 VALUE Cache<KEY, VALUE>::getOrCreate(KEY key, std::function<VALUE()> create)
111 {
112 	std::unique_lock<std::mutex> lock(mutex);
113 	auto it = map.find(key);
114 	if(it != map.end())
115 	{
116 		return it->second;
117 	}
118 	auto value = create();
119 	map.emplace(key, value);
120 	return value;
121 }
122 
123 // JITGlobals is a singleton that holds all the immutable machine specific
124 // information for the host device.
125 class JITGlobals
126 {
127 public:
128 	using TargetMachineSPtr = std::shared_ptr<llvm::TargetMachine>;
129 
130 	static JITGlobals *get();
131 
132 	const std::string mcpu;
133 	const std::vector<std::string> mattrs;
134 	const char *const march;
135 	const llvm::TargetOptions targetOptions;
136 	const llvm::DataLayout dataLayout;
137 
138 	TargetMachineSPtr getTargetMachine(rr::Optimization::Level optlevel);
139 
140 private:
141 	static JITGlobals create();
142 	static llvm::CodeGenOpt::Level toLLVM(rr::Optimization::Level level);
143 	JITGlobals(const char *mcpu,
144 	           const std::vector<std::string> &mattrs,
145 	           const char *march,
146 	           const llvm::TargetOptions &targetOptions,
147 	           const llvm::DataLayout &dataLayout);
148 	JITGlobals(const JITGlobals &) = default;
149 
150 	Cache<rr::Optimization::Level, TargetMachineSPtr> targetMachines;
151 };
152 
get()153 JITGlobals *JITGlobals::get()
154 {
155 	static JITGlobals instance = create();
156 	return &instance;
157 }
158 
getTargetMachine(rr::Optimization::Level optlevel)159 JITGlobals::TargetMachineSPtr JITGlobals::getTargetMachine(rr::Optimization::Level optlevel)
160 {
161 #ifdef ENABLE_RR_DEBUG_INFO
162 	auto llvmOptLevel = toLLVM(rr::Optimization::Level::None);
163 #else   // ENABLE_RR_DEBUG_INFO
164 	auto llvmOptLevel = toLLVM(optlevel);
165 #endif  // ENABLE_RR_DEBUG_INFO
166 
167 	return targetMachines.getOrCreate(optlevel, [&]() {
168 		return TargetMachineSPtr(llvm::EngineBuilder()
169 		                             .setOptLevel(llvmOptLevel)
170 		                             .setMCPU(mcpu)
171 		                             .setMArch(march)
172 		                             .setMAttrs(mattrs)
173 		                             .setTargetOptions(targetOptions)
174 		                             .selectTarget());
175 	});
176 }
177 
create()178 JITGlobals JITGlobals::create()
179 {
180 	struct LLVMInitializer
181 	{
182 		LLVMInitializer()
183 		{
184 			llvm::InitializeNativeTarget();
185 			llvm::InitializeNativeTargetAsmPrinter();
186 			llvm::InitializeNativeTargetAsmParser();
187 		}
188 	};
189 	static LLVMInitializer initializeLLVM;
190 
191 	auto mcpu = llvm::sys::getHostCPUName();
192 
193 	llvm::StringMap<bool> features;
194 	bool ok = llvm::sys::getHostCPUFeatures(features);
195 
196 #if defined(__i386__) || defined(__x86_64__) || \
197     (defined(__linux__) && (defined(__arm__) || defined(__aarch64__)))
198 	ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
199 #else
200 	(void)ok;  // getHostCPUFeatures always returns false on other platforms
201 #endif
202 
203 	std::vector<std::string> mattrs;
204 	for(auto &feature : features)
205 	{
206 		if(feature.second) { mattrs.push_back(feature.first().str()); }
207 	}
208 
209 	const char *march = nullptr;
210 #if defined(__x86_64__)
211 	march = "x86-64";
212 #elif defined(__i386__)
213 	march = "x86";
214 #elif defined(__aarch64__)
215 	march = "arm64";
216 #elif defined(__arm__)
217 	march = "arm";
218 #elif defined(__mips__)
219 #	if defined(__mips64)
220 	march = "mips64el";
221 #	else
222 	march = "mipsel";
223 #	endif
224 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
225 	march = "ppc64le";
226 #else
227 #	error "unknown architecture"
228 #endif
229 
230 	llvm::TargetOptions targetOptions;
231 	targetOptions.UnsafeFPMath = false;
232 
233 	auto targetMachine = std::unique_ptr<llvm::TargetMachine>(
234 	    llvm::EngineBuilder()
235 	        .setOptLevel(llvm::CodeGenOpt::None)
236 	        .setMCPU(mcpu)
237 	        .setMArch(march)
238 	        .setMAttrs(mattrs)
239 	        .setTargetOptions(targetOptions)
240 	        .selectTarget());
241 
242 	auto dataLayout = targetMachine->createDataLayout();
243 
244 	return JITGlobals(mcpu.data(), mattrs, march, targetOptions, dataLayout);
245 }
246 
toLLVM(rr::Optimization::Level level)247 llvm::CodeGenOpt::Level JITGlobals::toLLVM(rr::Optimization::Level level)
248 {
249 	switch(level)
250 	{
251 		case rr::Optimization::Level::None: return ::llvm::CodeGenOpt::None;
252 		case rr::Optimization::Level::Less: return ::llvm::CodeGenOpt::Less;
253 		case rr::Optimization::Level::Default: return ::llvm::CodeGenOpt::Default;
254 		case rr::Optimization::Level::Aggressive: return ::llvm::CodeGenOpt::Aggressive;
255 		default: UNREACHABLE("Unknown Optimization Level %d", int(level));
256 	}
257 	return ::llvm::CodeGenOpt::Default;
258 }
259 
JITGlobals(const char * mcpu,const std::vector<std::string> & mattrs,const char * march,const llvm::TargetOptions & targetOptions,const llvm::DataLayout & dataLayout)260 JITGlobals::JITGlobals(const char *mcpu,
261                        const std::vector<std::string> &mattrs,
262                        const char *march,
263                        const llvm::TargetOptions &targetOptions,
264                        const llvm::DataLayout &dataLayout)
265     : mcpu(mcpu)
266     , mattrs(mattrs)
267     , march(march)
268     , targetOptions(targetOptions)
269     , dataLayout(dataLayout)
270 {
271 }
272 
273 class MemoryMapper : public llvm::SectionMemoryManager::MemoryMapper
274 {
275 public:
MemoryMapper()276 	MemoryMapper() {}
~MemoryMapper()277 	~MemoryMapper() final {}
278 
allocateMappedMemory(llvm::SectionMemoryManager::AllocationPurpose purpose,size_t numBytes,const llvm::sys::MemoryBlock * const nearBlock,unsigned flags,std::error_code & errorCode)279 	llvm::sys::MemoryBlock allocateMappedMemory(
280 	    llvm::SectionMemoryManager::AllocationPurpose purpose,
281 	    size_t numBytes, const llvm::sys::MemoryBlock *const nearBlock,
282 	    unsigned flags, std::error_code &errorCode) final
283 	{
284 		errorCode = std::error_code();
285 
286 		// Round up numBytes to page size.
287 		size_t pageSize = rr::memoryPageSize();
288 		numBytes = (numBytes + pageSize - 1) & ~(pageSize - 1);
289 
290 		bool need_exec =
291 		    purpose == llvm::SectionMemoryManager::AllocationPurpose::Code;
292 		void *addr = rr::allocateMemoryPages(
293 		    numBytes, flagsToPermissions(flags), need_exec);
294 		if(!addr)
295 			return llvm::sys::MemoryBlock();
296 		return llvm::sys::MemoryBlock(addr, numBytes);
297 	}
298 
protectMappedMemory(const llvm::sys::MemoryBlock & block,unsigned flags)299 	std::error_code protectMappedMemory(const llvm::sys::MemoryBlock &block,
300 	                                    unsigned flags)
301 	{
302 		// Round down base address to align with a page boundary. This matches
303 		// DefaultMMapper behavior.
304 		void *addr = block.base();
305 #if LLVM_VERSION_MAJOR >= 9
306 		size_t size = block.allocatedSize();
307 #else
308 		size_t size = block.size();
309 #endif
310 		size_t pageSize = rr::memoryPageSize();
311 		addr = reinterpret_cast<void *>(
312 		    reinterpret_cast<uintptr_t>(addr) & ~(pageSize - 1));
313 		size += reinterpret_cast<uintptr_t>(block.base()) -
314 		        reinterpret_cast<uintptr_t>(addr);
315 
316 		rr::protectMemoryPages(addr, size, flagsToPermissions(flags));
317 		return std::error_code();
318 	}
319 
releaseMappedMemory(llvm::sys::MemoryBlock & block)320 	std::error_code releaseMappedMemory(llvm::sys::MemoryBlock &block)
321 	{
322 #if LLVM_VERSION_MAJOR >= 9
323 		size_t size = block.allocatedSize();
324 #else
325 		size_t size = block.size();
326 #endif
327 
328 		rr::deallocateMemoryPages(block.base(), size);
329 		return std::error_code();
330 	}
331 
332 private:
flagsToPermissions(unsigned flags)333 	int flagsToPermissions(unsigned flags)
334 	{
335 		int result = 0;
336 		if(flags & llvm::sys::Memory::MF_READ)
337 		{
338 			result |= rr::PERMISSION_READ;
339 		}
340 		if(flags & llvm::sys::Memory::MF_WRITE)
341 		{
342 			result |= rr::PERMISSION_WRITE;
343 		}
344 		if(flags & llvm::sys::Memory::MF_EXEC)
345 		{
346 			result |= rr::PERMISSION_EXECUTE;
347 		}
348 		return result;
349 	}
350 };
351 
352 template<typename T>
alignUp(T val,T alignment)353 T alignUp(T val, T alignment)
354 {
355 	return alignment * ((val + alignment - 1) / alignment);
356 }
357 
alignedAlloc(size_t size,size_t alignment)358 void *alignedAlloc(size_t size, size_t alignment)
359 {
360 	ASSERT(alignment < 256);
361 	auto allocation = new uint8_t[size + sizeof(uint8_t) + alignment];
362 	auto aligned = allocation;
363 	aligned += sizeof(uint8_t);                                                                       // Make space for the base-address offset.
364 	aligned = reinterpret_cast<uint8_t *>(alignUp(reinterpret_cast<uintptr_t>(aligned), alignment));  // align
365 	auto offset = static_cast<uint8_t>(aligned - allocation);
366 	aligned[-1] = offset;
367 	return aligned;
368 }
369 
alignedFree(void * ptr)370 void alignedFree(void *ptr)
371 {
372 	auto aligned = reinterpret_cast<uint8_t *>(ptr);
373 	auto offset = aligned[-1];
374 	auto allocation = aligned - offset;
375 	delete[] allocation;
376 }
377 
378 template<typename T>
atomicLoad(void * ptr,void * ret,llvm::AtomicOrdering ordering)379 static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
380 {
381 	*reinterpret_cast<T *>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), rr::atomicOrdering(ordering));
382 }
383 
384 template<typename T>
atomicStore(void * ptr,void * val,llvm::AtomicOrdering ordering)385 static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
386 {
387 	std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), *reinterpret_cast<T *>(val), rr::atomicOrdering(ordering));
388 }
389 
390 #ifdef __ANDROID__
391 template<typename F>
sync_fetch_and_op(uint32_t volatile * ptr,uint32_t val,F f)392 static uint32_t sync_fetch_and_op(uint32_t volatile *ptr, uint32_t val, F f)
393 {
394 	// Build an arbitrary op out of looped CAS
395 	for(;;)
396 	{
397 		uint32_t expected = *ptr;
398 		uint32_t desired = f(expected, val);
399 
400 		if(expected == __sync_val_compare_and_swap_4(ptr, expected, desired))
401 		{
402 			return expected;
403 		}
404 	}
405 }
406 #endif
407 
resolveExternalSymbol(const char * name)408 void *resolveExternalSymbol(const char *name)
409 {
410 	struct Atomic
411 	{
412 		static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
413 		{
414 			switch(size)
415 			{
416 				case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
417 				case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
418 				case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
419 				case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
420 				default:
421 					UNIMPLEMENTED_NO_BUG("Atomic::load(size: %d)", int(size));
422 			}
423 		}
424 		static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
425 		{
426 			switch(size)
427 			{
428 				case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
429 				case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
430 				case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
431 				case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
432 				default:
433 					UNIMPLEMENTED_NO_BUG("Atomic::store(size: %d)", int(size));
434 			}
435 		}
436 	};
437 
438 	struct F
439 	{
440 		static void nop() {}
441 		static void neverCalled() { UNREACHABLE("Should never be called"); }
442 
443 		static void *coroutine_alloc_frame(size_t size) { return alignedAlloc(size, 16); }
444 		static void coroutine_free_frame(void *ptr) { alignedFree(ptr); }
445 
446 #ifdef __ANDROID__
447 		// forwarders since we can't take address of builtins
448 		static void sync_synchronize() { __sync_synchronize(); }
449 		static uint32_t sync_fetch_and_add_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_add_4(ptr, val); }
450 		static uint32_t sync_fetch_and_and_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_and_4(ptr, val); }
451 		static uint32_t sync_fetch_and_or_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_or_4(ptr, val); }
452 		static uint32_t sync_fetch_and_xor_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_xor_4(ptr, val); }
453 		static uint32_t sync_fetch_and_sub_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_sub_4(ptr, val); }
454 		static uint32_t sync_lock_test_and_set_4(uint32_t *ptr, uint32_t val) { return __sync_lock_test_and_set_4(ptr, val); }
455 		static uint32_t sync_val_compare_and_swap_4(uint32_t *ptr, uint32_t expected, uint32_t desired) { return __sync_val_compare_and_swap_4(ptr, expected, desired); }
456 
457 		static uint32_t sync_fetch_and_max_4(uint32_t *ptr, uint32_t val)
458 		{
459 			return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::max(a, b); });
460 		}
461 		static uint32_t sync_fetch_and_min_4(uint32_t *ptr, uint32_t val)
462 		{
463 			return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::min(a, b); });
464 		}
465 		static uint32_t sync_fetch_and_umax_4(uint32_t *ptr, uint32_t val)
466 		{
467 			return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::max(a, b); });
468 		}
469 		static uint32_t sync_fetch_and_umin_4(uint32_t *ptr, uint32_t val)
470 		{
471 			return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::min(a, b); });
472 		}
473 #endif
474 	};
475 
476 	class Resolver
477 	{
478 	public:
479 		using FunctionMap = std::unordered_map<std::string, void *>;
480 
481 		FunctionMap functions;
482 
483 		Resolver()
484 		{
485 			functions.emplace("nop", reinterpret_cast<void *>(F::nop));
486 			functions.emplace("floorf", reinterpret_cast<void *>(floorf));
487 			functions.emplace("nearbyintf", reinterpret_cast<void *>(nearbyintf));
488 			functions.emplace("truncf", reinterpret_cast<void *>(truncf));
489 			functions.emplace("printf", reinterpret_cast<void *>(printf));
490 			functions.emplace("puts", reinterpret_cast<void *>(puts));
491 			functions.emplace("fmodf", reinterpret_cast<void *>(fmodf));
492 
493 			functions.emplace("sinf", reinterpret_cast<void *>(sinf));
494 			functions.emplace("cosf", reinterpret_cast<void *>(cosf));
495 			functions.emplace("asinf", reinterpret_cast<void *>(asinf));
496 			functions.emplace("acosf", reinterpret_cast<void *>(acosf));
497 			functions.emplace("atanf", reinterpret_cast<void *>(atanf));
498 			functions.emplace("sinhf", reinterpret_cast<void *>(sinhf));
499 			functions.emplace("coshf", reinterpret_cast<void *>(coshf));
500 			functions.emplace("tanhf", reinterpret_cast<void *>(tanhf));
501 			functions.emplace("asinhf", reinterpret_cast<void *>(asinhf));
502 			functions.emplace("acoshf", reinterpret_cast<void *>(acoshf));
503 			functions.emplace("atanhf", reinterpret_cast<void *>(atanhf));
504 			functions.emplace("atan2f", reinterpret_cast<void *>(atan2f));
505 			functions.emplace("powf", reinterpret_cast<void *>(powf));
506 			functions.emplace("expf", reinterpret_cast<void *>(expf));
507 			functions.emplace("logf", reinterpret_cast<void *>(logf));
508 			functions.emplace("exp2f", reinterpret_cast<void *>(exp2f));
509 			functions.emplace("log2f", reinterpret_cast<void *>(log2f));
510 
511 			functions.emplace("sin", reinterpret_cast<void *>(static_cast<double (*)(double)>(sin)));
512 			functions.emplace("cos", reinterpret_cast<void *>(static_cast<double (*)(double)>(cos)));
513 			functions.emplace("asin", reinterpret_cast<void *>(static_cast<double (*)(double)>(asin)));
514 			functions.emplace("acos", reinterpret_cast<void *>(static_cast<double (*)(double)>(acos)));
515 			functions.emplace("atan", reinterpret_cast<void *>(static_cast<double (*)(double)>(atan)));
516 			functions.emplace("sinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(sinh)));
517 			functions.emplace("cosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(cosh)));
518 			functions.emplace("tanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(tanh)));
519 			functions.emplace("asinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(asinh)));
520 			functions.emplace("acosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(acosh)));
521 			functions.emplace("atanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(atanh)));
522 			functions.emplace("atan2", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(atan2)));
523 			functions.emplace("pow", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(pow)));
524 			functions.emplace("exp", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp)));
525 			functions.emplace("log", reinterpret_cast<void *>(static_cast<double (*)(double)>(log)));
526 			functions.emplace("exp2", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp2)));
527 			functions.emplace("log2", reinterpret_cast<void *>(static_cast<double (*)(double)>(log2)));
528 
529 			functions.emplace("atomic_load", reinterpret_cast<void *>(Atomic::load));
530 			functions.emplace("atomic_store", reinterpret_cast<void *>(Atomic::store));
531 
532 			// FIXME(b/119409619): use an allocator here so we can control all memory allocations
533 			functions.emplace("coroutine_alloc_frame", reinterpret_cast<void *>(F::coroutine_alloc_frame));
534 			functions.emplace("coroutine_free_frame", reinterpret_cast<void *>(F::coroutine_free_frame));
535 
536 #ifdef __APPLE__
537 			functions.emplace("sincosf_stret", reinterpret_cast<void *>(__sincosf_stret));
538 #elif defined(__linux__)
539 			functions.emplace("sincosf", reinterpret_cast<void *>(sincosf));
540 #elif defined(_WIN64)
541 			functions.emplace("chkstk", reinterpret_cast<void *>(__chkstk));
542 #elif defined(_WIN32)
543 			functions.emplace("chkstk", reinterpret_cast<void *>(_chkstk));
544 #endif
545 
546 #ifdef __ARM_EABI__
547 			functions.emplace("aeabi_idivmod", reinterpret_cast<void *>(__aeabi_idivmod));
548 #endif
549 #ifdef __ANDROID__
550 			functions.emplace("aeabi_unwind_cpp_pr0", reinterpret_cast<void *>(F::neverCalled));
551 			functions.emplace("sync_synchronize", reinterpret_cast<void *>(F::sync_synchronize));
552 			functions.emplace("sync_fetch_and_add_4", reinterpret_cast<void *>(F::sync_fetch_and_add_4));
553 			functions.emplace("sync_fetch_and_and_4", reinterpret_cast<void *>(F::sync_fetch_and_and_4));
554 			functions.emplace("sync_fetch_and_or_4", reinterpret_cast<void *>(F::sync_fetch_and_or_4));
555 			functions.emplace("sync_fetch_and_xor_4", reinterpret_cast<void *>(F::sync_fetch_and_xor_4));
556 			functions.emplace("sync_fetch_and_sub_4", reinterpret_cast<void *>(F::sync_fetch_and_sub_4));
557 			functions.emplace("sync_lock_test_and_set_4", reinterpret_cast<void *>(F::sync_lock_test_and_set_4));
558 			functions.emplace("sync_val_compare_and_swap_4", reinterpret_cast<void *>(F::sync_val_compare_and_swap_4));
559 			functions.emplace("sync_fetch_and_max_4", reinterpret_cast<void *>(F::sync_fetch_and_max_4));
560 			functions.emplace("sync_fetch_and_min_4", reinterpret_cast<void *>(F::sync_fetch_and_min_4));
561 			functions.emplace("sync_fetch_and_umax_4", reinterpret_cast<void *>(F::sync_fetch_and_umax_4));
562 			functions.emplace("sync_fetch_and_umin_4", reinterpret_cast<void *>(F::sync_fetch_and_umin_4));
563 #endif
564 #if __has_feature(memory_sanitizer)
565 			functions.emplace("msan_unpoison", reinterpret_cast<void *>(__msan_unpoison));
566 #endif
567 		}
568 	};
569 
570 	static Resolver resolver;
571 
572 	// Trim off any underscores from the start of the symbol. LLVM likes
573 	// to append these on macOS.
574 	const char *trimmed = name;
575 	while(trimmed[0] == '_') { trimmed++; }
576 
577 	auto it = resolver.functions.find(trimmed);
578 	// Missing functions will likely make the module fail in exciting non-obvious ways.
579 	ASSERT_MSG(it != resolver.functions.end(), "Missing external function: '%s'", name);
580 	return it->second;
581 }
582 
583 // JITRoutine is a rr::Routine that holds a LLVM JIT session, compiler and
584 // object layer as each routine may require different target machine
585 // settings and no Reactor routine directly links against another.
586 class JITRoutine : public rr::Routine
587 {
588 #if LLVM_VERSION_MAJOR >= 8
589 	using ObjLayer = llvm::orc::LegacyRTDyldObjectLinkingLayer;
590 	using CompileLayer = llvm::orc::LegacyIRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
591 #else
592 	using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
593 	using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
594 #endif
595 
596 public:
JITRoutine(std::unique_ptr<llvm::Module> module,llvm::Function ** funcs,size_t count,const rr::Config & config)597 	JITRoutine(
598 	    std::unique_ptr<llvm::Module> module,
599 	    llvm::Function **funcs,
600 	    size_t count,
601 	    const rr::Config &config)
602 	    : resolver(createLegacyLookupResolver(
603 	          session,
604 	          [&](const llvm::StringRef &name) {
605 		          void *func = resolveExternalSymbol(name.str().c_str());
606 		          if(func != nullptr)
607 		          {
608 			          return llvm::JITSymbol(
609 			              reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
610 		          }
611 		          return objLayer.findSymbol(name, true);
612 	          },
__anonb4ff9e580802(llvm::Error err) 613 	          [](llvm::Error err) {
614 		          if(err)
615 		          {
616 			          // TODO: Log the symbol resolution errors.
617 			          return;
618 		          }
619 	          }))
620 	    , targetMachine(JITGlobals::get()->getTargetMachine(config.getOptimization().getLevel()))
621 	    , compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine))
622 	    , objLayer(
623 	          session,
__anonb4ff9e580902(llvm::orc::VModuleKey) 624 	          [this](llvm::orc::VModuleKey) {
625 		          return ObjLayer::Resources{ std::make_shared<llvm::SectionMemoryManager>(&memoryMapper), resolver };
626 	          },
627 	          ObjLayer::NotifyLoadedFtor(),
__anonb4ff9e580a02(llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L) 628 	          [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L) {
629 #ifdef ENABLE_RR_DEBUG_INFO
630 		          rr::DebugInfo::NotifyObjectEmitted(Obj, L);
631 #endif  // ENABLE_RR_DEBUG_INFO
632 	          },
__anonb4ff9e580b02(llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj) 633 	          [](llvm::orc::VModuleKey, const llvm::object::ObjectFile &Obj) {
634 #ifdef ENABLE_RR_DEBUG_INFO
635 		          rr::DebugInfo::NotifyFreeingObject(Obj);
636 #endif  // ENABLE_RR_DEBUG_INFO
637 	          })
638 	    , addresses(count)
639 	{
640 		std::vector<std::string> mangledNames(count);
641 		for(size_t i = 0; i < count; i++)
642 		{
643 			auto func = funcs[i];
644 			static size_t numEmittedFunctions = 0;
645 			std::string name = "f" + llvm::Twine(numEmittedFunctions++).str();
646 			func->setName(name);
647 			func->setLinkage(llvm::GlobalValue::ExternalLinkage);
648 			func->setDoesNotThrow();
649 
650 			llvm::raw_string_ostream mangledNameStream(mangledNames[i]);
651 			llvm::Mangler::getNameWithPrefix(mangledNameStream, name, JITGlobals::get()->dataLayout);
652 		}
653 
654 		auto moduleKey = session.allocateVModule();
655 
656 		// Once the module is passed to the compileLayer, the
657 		// llvm::Functions are freed. Make sure funcs are not referenced
658 		// after this point.
659 		funcs = nullptr;
660 
661 		llvm::cantFail(compileLayer.addModule(moduleKey, std::move(module)));
662 
663 		// Resolve the function addresses.
664 		for(size_t i = 0; i < count; i++)
665 		{
666 			auto symbol = compileLayer.findSymbolIn(moduleKey, mangledNames[i], false);
667 			if(auto address = symbol.getAddress())
668 			{
669 				addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(address.get()));
670 			}
671 		}
672 	}
673 
getEntry(int index) const674 	const void *getEntry(int index) const override
675 	{
676 		return addresses[index];
677 	}
678 
679 private:
680 	std::shared_ptr<llvm::orc::SymbolResolver> resolver;
681 	std::shared_ptr<llvm::TargetMachine> targetMachine;
682 	llvm::orc::ExecutionSession session;
683 	CompileLayer compileLayer;
684 	MemoryMapper memoryMapper;
685 	ObjLayer objLayer;
686 	std::vector<const void *> addresses;
687 };
688 
689 }  // anonymous namespace
690 
691 namespace rr {
692 
JITBuilder(const rr::Config & config)693 JITBuilder::JITBuilder(const rr::Config &config)
694     : config(config)
695     , module(new llvm::Module("", context))
696     , builder(new llvm::IRBuilder<>(context))
697 {
698 	module->setDataLayout(JITGlobals::get()->dataLayout);
699 }
700 
optimize(const rr::Config & cfg)701 void JITBuilder::optimize(const rr::Config &cfg)
702 {
703 
704 #ifdef ENABLE_RR_DEBUG_INFO
705 	if(debugInfo != nullptr)
706 	{
707 		return;  // Don't optimize if we're generating debug info.
708 	}
709 #endif  // ENABLE_RR_DEBUG_INFO
710 
711 	std::unique_ptr<llvm::legacy::PassManager> passManager(
712 	    new llvm::legacy::PassManager());
713 
714 	for(auto pass : cfg.getOptimization().getPasses())
715 	{
716 		switch(pass)
717 		{
718 			case rr::Optimization::Pass::Disabled: break;
719 			case rr::Optimization::Pass::CFGSimplification: passManager->add(llvm::createCFGSimplificationPass()); break;
720 			case rr::Optimization::Pass::LICM: passManager->add(llvm::createLICMPass()); break;
721 			case rr::Optimization::Pass::AggressiveDCE: passManager->add(llvm::createAggressiveDCEPass()); break;
722 			case rr::Optimization::Pass::GVN: passManager->add(llvm::createGVNPass()); break;
723 			case rr::Optimization::Pass::InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
724 			case rr::Optimization::Pass::Reassociate: passManager->add(llvm::createReassociatePass()); break;
725 			case rr::Optimization::Pass::DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
726 			case rr::Optimization::Pass::SCCP: passManager->add(llvm::createSCCPPass()); break;
727 			case rr::Optimization::Pass::ScalarReplAggregates: passManager->add(llvm::createSROAPass()); break;
728 			case rr::Optimization::Pass::EarlyCSEPass: passManager->add(llvm::createEarlyCSEPass()); break;
729 			default:
730 				UNREACHABLE("pass: %d", int(pass));
731 		}
732 	}
733 
734 	passManager->run(*module);
735 }
736 
acquireRoutine(llvm::Function ** funcs,size_t count,const rr::Config & cfg)737 std::shared_ptr<rr::Routine> JITBuilder::acquireRoutine(llvm::Function **funcs, size_t count, const rr::Config &cfg)
738 {
739 	ASSERT(module);
740 	return std::make_shared<JITRoutine>(std::move(module), funcs, count, cfg);
741 }
742 
743 }  // namespace rr
744