• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
17 
18 #include <stdint.h>
19 
20 #include <algorithm>
21 #include <cstdio>
22 #include <list>
23 #include <utility>
24 
25 #include "absl/memory/memory.h"
26 #include "llvm/ExecutionEngine/ExecutionEngine.h"
27 #include "llvm/ExecutionEngine/JITSymbol.h"
28 #include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
29 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
30 #include "llvm/IR/Mangler.h"
31 #include "llvm/IR/Operator.h"
32 #include "llvm/Support/CodeGen.h"
33 #include "llvm/Support/Host.h"
34 #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h"
35 #include "tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.h"
36 #include "tensorflow/compiler/xla/service/cpu/runtime_conv2d.h"
37 #include "tensorflow/compiler/xla/service/cpu/runtime_conv2d_mkl.h"
38 #include "tensorflow/compiler/xla/service/cpu/runtime_fft.h"
39 #include "tensorflow/compiler/xla/service/cpu/runtime_fork_join.h"
40 #include "tensorflow/compiler/xla/service/cpu/runtime_fp16.h"
41 #include "tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h"
42 #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h"
43 #include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h"
44 #include "tensorflow/compiler/xla/service/cpu/runtime_pow.h"
45 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h"
46 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h"
47 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h"
48 #include "tensorflow/compiler/xla/service/cpu/runtime_topk.h"
49 #include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h"
50 #include "tensorflow/compiler/xla/service/custom_call_target_registry.h"
51 #include "tensorflow/compiler/xla/types.h"
52 #include "tensorflow/core/platform/logging.h"
53 
54 namespace xla {
55 namespace cpu {
56 namespace {
57 
DetectMachineAttributes()58 llvm::SmallVector<std::string, 0> DetectMachineAttributes() {
59   llvm::SmallVector<std::string, 0> result;
60   llvm::StringMap<bool> host_features;
61   if (llvm::sys::getHostCPUFeatures(host_features)) {
62     for (auto& feature : host_features) {
63       result.push_back((feature.second ? '+' : '-') +
64                        std::string(feature.first()));
65     }
66   }
67   return result;
68 }
69 
70 }  // namespace
71 
72 /*static*/ std::unique_ptr<llvm::TargetMachine>
InferTargetMachineForJIT(const llvm::TargetOptions & target_options,llvm::CodeGenOpt::Level opt_level)73 SimpleOrcJIT::InferTargetMachineForJIT(
74     const llvm::TargetOptions& target_options,
75     llvm::CodeGenOpt::Level opt_level) {
76   std::unique_ptr<llvm::TargetMachine> target_machine(
77       llvm::EngineBuilder()
78           .setTargetOptions(target_options)
79           .setOptLevel(opt_level)
80           .selectTarget(
81               /*TargetTriple=*/llvm::Triple(), /*MArch=*/"",
82               /*MCPU=*/llvm::sys::getHostCPUName(),
83               /*MAttrs=*/DetectMachineAttributes()));
84   CHECK(target_machine != nullptr);
85   return target_machine;
86 }
87 
SimpleOrcJIT(std::unique_ptr<llvm::orc::ExecutorProcessControl> target_process_control,std::unique_ptr<llvm::orc::ExecutionSession> execution_session,const llvm::TargetOptions & target_options,llvm::CodeGenOpt::Level opt_level,bool optimize_for_size,bool disable_expensive_passes,llvm::FastMathFlags fast_math_flags,LLVMCompiler::ModuleHook pre_optimization_hook,LLVMCompiler::ModuleHook post_optimization_hook,std::function<void (const llvm::object::ObjectFile &)> post_codegen_hook)88 SimpleOrcJIT::SimpleOrcJIT(
89     std::unique_ptr<llvm::orc::ExecutorProcessControl> target_process_control,
90     std::unique_ptr<llvm::orc::ExecutionSession> execution_session,
91     const llvm::TargetOptions& target_options,
92     llvm::CodeGenOpt::Level opt_level, bool optimize_for_size,
93     bool disable_expensive_passes, llvm::FastMathFlags fast_math_flags,
94     LLVMCompiler::ModuleHook pre_optimization_hook,
95     LLVMCompiler::ModuleHook post_optimization_hook,
96     std::function<void(const llvm::object::ObjectFile&)> post_codegen_hook)
97     : target_machine_(InferTargetMachineForJIT(target_options, opt_level)),
98       target_triple_(target_machine_->getTargetTriple()),
99       data_layout_(target_machine_->createDataLayout()),
100       target_process_control_(std::move(target_process_control)),
101       execution_session_(std::move(execution_session)),
102       object_layer_(*execution_session_,
103                     []() {
104                       return std::make_unique<llvm::SectionMemoryManager>(
105                           orc_jit_memory_mapper::GetInstance());
106                     }),
107       compile_layer_(
108           *execution_session_, object_layer_,
109           std::make_unique<CompilerFunctor>(
110               target_machine_.get(), opt_level, optimize_for_size,
111               disable_expensive_passes, fast_math_flags,
112               std::move(pre_optimization_hook),
113               std::move(post_optimization_hook), std::move(post_codegen_hook))),
114       main_jit_dylib_(&execution_session_->createBareJITDylib("<main>")),
115       gdb_jit_event_listener_(
116           llvm::JITEventListener::createGDBRegistrationListener()) {
117   VLOG(1) << "CPU target: " << target_machine_->getTargetCPU().str()
118           << " features: " << target_machine_->getTargetFeatureString().str();
119 
120   // Materialize unknown symbols from the runtime symbol table.
121   class RuntimeSymbolGenerator : public llvm::orc::DefinitionGenerator {
122     SimpleOrcJIT& jit_;
123 
124    public:
RuntimeSymbolGenerator(SimpleOrcJIT & jit)125     explicit RuntimeSymbolGenerator(SimpleOrcJIT& jit) : jit_(jit) {}
tryToGenerate(llvm::orc::LookupState &,llvm::orc::LookupKind,llvm::orc::JITDylib & jit_dylib,llvm::orc::JITDylibLookupFlags,const llvm::orc::SymbolLookupSet & names)126     llvm::Error tryToGenerate(
127         llvm::orc::LookupState&, llvm::orc::LookupKind,
128         llvm::orc::JITDylib& jit_dylib, llvm::orc::JITDylibLookupFlags,
129         const llvm::orc::SymbolLookupSet& names) override {
130       llvm::orc::SymbolMap new_defs;
131 
132       for (const auto& kv : names) {
133         const auto& name = kv.first;
134         if (llvm::JITEvaluatedSymbol symbol =
135                 jit_.ResolveRuntimeSymbol(*name)) {
136           new_defs[name] = symbol;
137         }
138       }
139 
140       cantFail(jit_dylib.define(absoluteSymbols(std::move(new_defs))));
141       return llvm::Error::success();
142     }
143   };
144   main_jit_dylib_->addGenerator(
145       std::make_unique<RuntimeSymbolGenerator>(*this));
146   object_layer_.registerJITEventListener(*this);
147 
148   // Copied from LLJIT, required to find symbols on Windows.
149   if (target_triple_.isOSBinFormatCOFF()) {
150     object_layer_.setOverrideObjectFlagsWithResponsibilityFlags(true);
151     object_layer_.setAutoClaimResponsibilityForObjectSymbols(true);
152   }
153 }
154 
~SimpleOrcJIT()155 SimpleOrcJIT::~SimpleOrcJIT() {
156   if (auto err = execution_session_->endSession()) {
157     execution_session_->reportError(std::move(err));
158   }
159 }
160 
Create(const llvm::TargetOptions & target_options,llvm::CodeGenOpt::Level opt_level,bool optimize_for_size,bool disable_expensive_passes,llvm::FastMathFlags fast_math_flags,LLVMCompiler::ModuleHook pre_optimization_hook,LLVMCompiler::ModuleHook post_optimization_hook,std::function<void (const llvm::object::ObjectFile &)> post_codegen_hook)161 llvm::Expected<std::unique_ptr<SimpleOrcJIT>> SimpleOrcJIT::Create(
162     const llvm::TargetOptions& target_options,
163     llvm::CodeGenOpt::Level opt_level, bool optimize_for_size,
164     bool disable_expensive_passes, llvm::FastMathFlags fast_math_flags,
165     LLVMCompiler::ModuleHook pre_optimization_hook,
166     LLVMCompiler::ModuleHook post_optimization_hook,
167     std::function<void(const llvm::object::ObjectFile&)> post_codegen_hook) {
168   auto SSP = std::make_shared<llvm::orc::SymbolStringPool>();
169   auto target_process_control =
170       llvm::orc::SelfExecutorProcessControl::Create(std::move(SSP));
171   if (!target_process_control) {
172     return target_process_control.takeError();
173   }
174 
175   auto execution_session = std::make_unique<llvm::orc::ExecutionSession>(
176       std::make_unique<llvm::orc::UnsupportedExecutorProcessControl>());
177   return std::make_unique<SimpleOrcJIT>(
178       std::move(*target_process_control), std::move(execution_session),
179       target_options, opt_level, optimize_for_size, disable_expensive_passes,
180       fast_math_flags, std::move(pre_optimization_hook),
181       std::move(post_optimization_hook), std::move(post_codegen_hook));
182 }
183 
ResolveRuntimeSymbol(llvm::StringRef name)184 llvm::JITEvaluatedSymbol SimpleOrcJIT::ResolveRuntimeSymbol(
185     llvm::StringRef name) {
186   void* func_addr = nullptr;
187   if (name.size() > 1 && name.front() == data_layout_.getGlobalPrefix()) {
188     // On Mac OS X, 'name' may have a leading underscore prefix, even though the
189     // registered name may not.
190     std::string stripped_name(name.begin() + 1, name.end());
191     func_addr =
192         xla::CustomCallTargetRegistry::Global()->Lookup(stripped_name, "Host");
193   } else {
194     func_addr =
195         xla::CustomCallTargetRegistry::Global()->Lookup(name.str(), "Host");
196   }
197 
198   if (func_addr == nullptr) {
199     LOG(ERROR)
200         << "Unable to resolve runtime symbol: `" << name.str()
201         << "'.  Hint: if the symbol a custom call target, make sure you've "
202            "registered it with the JIT using "
203            "XLA_CPU_REGISTER_CUSTOM_CALL_TARGET.";
204     return nullptr;
205   }
206   llvm::JITEvaluatedSymbol symbol_info(reinterpret_cast<uint64_t>(func_addr),
207                                        llvm::JITSymbolFlags::None);
208   return symbol_info;
209 }
210 
notifyObjectLoaded(llvm::JITEventListener::ObjectKey key,const llvm::object::ObjectFile & object,const llvm::RuntimeDyld::LoadedObjectInfo & object_info)211 void SimpleOrcJIT::notifyObjectLoaded(
212     llvm::JITEventListener::ObjectKey key,
213     const llvm::object::ObjectFile& object,
214     const llvm::RuntimeDyld::LoadedObjectInfo& object_info) {
215   gdb_jit_event_listener_->notifyObjectLoaded(key, object, object_info);
216   size_of_generated_code_in_bytes_ += object.getData().size();
217 }
218 
notifyFreeingObject(llvm::JITEventListener::ObjectKey key)219 void SimpleOrcJIT::notifyFreeingObject(llvm::JITEventListener::ObjectKey key) {
220   gdb_jit_event_listener_->notifyFreeingObject(key);
221 }
222 
AddModule(llvm::orc::ThreadSafeModule module)223 llvm::Error SimpleOrcJIT::AddModule(llvm::orc::ThreadSafeModule module) {
224   return compile_layer_.add(*main_jit_dylib_, std::move(module));
225 }
226 
DoneCompiling()227 void SimpleOrcJIT::DoneCompiling() {
228   // The target machine takes a non-trivial amount of memory, so once we are
229   // done compiling throw it away.
230   target_machine_.reset();
231 }
232 
FindCompiledSymbol(const std::string & name)233 llvm::Expected<llvm::JITEvaluatedSymbol> SimpleOrcJIT::FindCompiledSymbol(
234     const std::string& name) {
235   return execution_session_->lookup({main_jit_dylib_}, name);
236 }
237 
238 #if defined(PLATFORM_WINDOWS)
239 // This function is used by compiler-generated code on windows, but it's not
240 // declared anywhere. The signature does not matter, we just need the address.
241 extern "C" void __chkstk(size_t);
242 #endif
243 
244 namespace {
245 // Register some known symbols with the CustomCallTargetRegistry.
RegisterKnownJITSymbols()246 bool RegisterKnownJITSymbols() {
247   xla::CustomCallTargetRegistry* registry =
248       xla::CustomCallTargetRegistry::Global();
249   registry->Register("printf", reinterpret_cast<void*>(&printf), "Host");
250   registry->Register("puts", reinterpret_cast<void*>(&puts), "Host");
251 
252 #define REGISTER_CPU_RUNTIME_SYMBOL(base_name)                               \
253   do {                                                                       \
254     auto* function_address =                                                 \
255         reinterpret_cast<void*>(__xla_cpu_runtime_##base_name);              \
256     registry->Register(xla::cpu::runtime::k##base_name##SymbolName,          \
257                        function_address, "Host");                            \
258     CHECK_EQ(absl::string_view(xla::cpu::runtime::k##base_name##SymbolName), \
259              "__xla_cpu_runtime_" #base_name);                               \
260   } while (false)
261 
262   REGISTER_CPU_RUNTIME_SYMBOL(AcquireInfeedBufferForDequeue);
263   REGISTER_CPU_RUNTIME_SYMBOL(AcquireOutfeedBufferForPopulation);
264   REGISTER_CPU_RUNTIME_SYMBOL(AllReduce);
265   REGISTER_CPU_RUNTIME_SYMBOL(CollectivePermute);
266   REGISTER_CPU_RUNTIME_SYMBOL(AllToAll);
267   REGISTER_CPU_RUNTIME_SYMBOL(ReplicaId);
268   REGISTER_CPU_RUNTIME_SYMBOL(MKLConvF32);
269   REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF16);
270   REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF32);
271   REGISTER_CPU_RUNTIME_SYMBOL(EigenFft);
272   REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF16);
273   REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF32);
274   REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF64);
275   REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulC64);
276   REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulC128);
277   REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulS32);
278   REGISTER_CPU_RUNTIME_SYMBOL(MKLMatMulF32);
279   REGISTER_CPU_RUNTIME_SYMBOL(MKLMatMulF64);
280   REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF32);
281   REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF64);
282   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF16);
283   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32);
284   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedFft);
285   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF16);
286   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32);
287   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64);
288   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulC64);
289   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulC128);
290   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulS32);
291   REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin);
292   REGISTER_CPU_RUNTIME_SYMBOL(PrintfToStderr);
293   REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue);
294   REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation);
295   REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSort);
296   REGISTER_CPU_RUNTIME_SYMBOL(TopKF32);
297   REGISTER_CPU_RUNTIME_SYMBOL(TracingStart);
298   REGISTER_CPU_RUNTIME_SYMBOL(TracingEnd);
299 
300   registry->Register("__gnu_f2h_ieee", reinterpret_cast<void*>(__gnu_f2h_ieee),
301                      "Host");
302   registry->Register("__gnu_h2f_ieee", reinterpret_cast<void*>(__gnu_h2f_ieee),
303                      "Host");
304   registry->Register("__truncdfhf2", reinterpret_cast<void*>(__truncdfhf2),
305                      "Host");
306   registry->Register("__powisf2", reinterpret_cast<void*>(__powisf2), "Host");
307   registry->Register("__powidf2", reinterpret_cast<void*>(__powidf2), "Host");
308 
309 #undef REGISTER_CPU_RUNTIME_SYMBOL
310 
311 // Register both the f32 (float) and f64 (double) versions of a libm symbol.
312 // Unfortunately the double versions are overloaded on some systems, e.g.
313 // Mac so we need an explicit cast. This requires passing the function signature
314 // for that case.
315 #define REGISTER_LIBM_SYMBOL(name, double_sig)                                 \
316   do {                                                                         \
317     registry->Register(#name "f", reinterpret_cast<void*>(name##f), "Host");   \
318     registry->Register(#name,                                                  \
319                        reinterpret_cast<void*>(static_cast<double_sig>(name)), \
320                        "Host");                                                \
321   } while (false)
322 
323   REGISTER_LIBM_SYMBOL(acos, double (*)(double));
324   REGISTER_LIBM_SYMBOL(acosh, double (*)(double));
325   REGISTER_LIBM_SYMBOL(asin, double (*)(double));
326   REGISTER_LIBM_SYMBOL(asinh, double (*)(double));
327   REGISTER_LIBM_SYMBOL(atan, double (*)(double));
328   REGISTER_LIBM_SYMBOL(atan2, double (*)(double, double));
329   REGISTER_LIBM_SYMBOL(atanh, double (*)(double));
330   REGISTER_LIBM_SYMBOL(cbrt, double (*)(double));
331   REGISTER_LIBM_SYMBOL(ceil, double (*)(double));
332   REGISTER_LIBM_SYMBOL(copysign, double (*)(double, double));
333   REGISTER_LIBM_SYMBOL(cos, double (*)(double));
334   REGISTER_LIBM_SYMBOL(cosh, double (*)(double));
335   REGISTER_LIBM_SYMBOL(erf, double (*)(double));
336   REGISTER_LIBM_SYMBOL(erfc, double (*)(double));
337   REGISTER_LIBM_SYMBOL(exp, double (*)(double));
338   REGISTER_LIBM_SYMBOL(exp2, double (*)(double));
339   REGISTER_LIBM_SYMBOL(expm1, double (*)(double));
340   REGISTER_LIBM_SYMBOL(fabs, double (*)(double));
341   REGISTER_LIBM_SYMBOL(fdim, double (*)(double, double));
342   REGISTER_LIBM_SYMBOL(floor, double (*)(double));
343   REGISTER_LIBM_SYMBOL(fma, double (*)(double, double, double));
344   REGISTER_LIBM_SYMBOL(fmax, double (*)(double, double));
345   REGISTER_LIBM_SYMBOL(fmin, double (*)(double, double));
346   REGISTER_LIBM_SYMBOL(fmod, double (*)(double, double));
347   REGISTER_LIBM_SYMBOL(frexp, double (*)(double, int*));
348   REGISTER_LIBM_SYMBOL(hypot, double (*)(double, double));
349   REGISTER_LIBM_SYMBOL(ilogb, int (*)(double));
350   REGISTER_LIBM_SYMBOL(ldexp, double (*)(double, int));
351   REGISTER_LIBM_SYMBOL(lgamma, double (*)(double));
352   REGISTER_LIBM_SYMBOL(llrint, long long (*)(double));   // NOLINT(runtime/int)
353   REGISTER_LIBM_SYMBOL(llround, long long (*)(double));  // NOLINT(runtime/int)
354   REGISTER_LIBM_SYMBOL(log, double (*)(double));
355   REGISTER_LIBM_SYMBOL(log10, double (*)(double));
356   REGISTER_LIBM_SYMBOL(log1p, double (*)(double));
357   REGISTER_LIBM_SYMBOL(log2, double (*)(double));
358   REGISTER_LIBM_SYMBOL(logb, double (*)(double));
359   REGISTER_LIBM_SYMBOL(lrint, long (*)(double));   // NOLINT(runtime/int)
360   REGISTER_LIBM_SYMBOL(lround, long (*)(double));  // NOLINT(runtime/int)
361   REGISTER_LIBM_SYMBOL(modf, double (*)(double, double*));
362   REGISTER_LIBM_SYMBOL(nan, double (*)(const char*));
363   REGISTER_LIBM_SYMBOL(nearbyint, double (*)(double));
364   REGISTER_LIBM_SYMBOL(nextafter, double (*)(double, double));
365   REGISTER_LIBM_SYMBOL(nexttoward, double (*)(double, long double));
366   REGISTER_LIBM_SYMBOL(pow, double (*)(double, double));
367   REGISTER_LIBM_SYMBOL(remainder, double (*)(double, double));
368   REGISTER_LIBM_SYMBOL(remquo, double (*)(double, double, int*));
369   REGISTER_LIBM_SYMBOL(rint, double (*)(double));
370   REGISTER_LIBM_SYMBOL(round, double (*)(double));
371   REGISTER_LIBM_SYMBOL(scalbln,
372                        double (*)(double, long));  // NOLINT(runtime/int)
373   REGISTER_LIBM_SYMBOL(scalbn, double (*)(double, int));
374   REGISTER_LIBM_SYMBOL(sin, double (*)(double));
375 #ifdef __APPLE__
376   REGISTER_LIBM_SYMBOL(__sincos, void (*)(double, double*, double*));
377   registry->Register("__sincosf_stret",
378                      reinterpret_cast<void*>(__sincosf_stret), "Host");
379   registry->Register("__sincos_stret", reinterpret_cast<void*>(__sincos_stret),
380                      "Host");
381 #else
382   REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*));
383 #endif
384   REGISTER_LIBM_SYMBOL(sinh, double (*)(double));
385   REGISTER_LIBM_SYMBOL(sqrt, double (*)(double));
386   REGISTER_LIBM_SYMBOL(tan, double (*)(double));
387   REGISTER_LIBM_SYMBOL(tanh, double (*)(double));
388   REGISTER_LIBM_SYMBOL(tgamma, double (*)(double));
389   REGISTER_LIBM_SYMBOL(trunc, double (*)(double));
390 
391 #undef REGISTER_LIBM_SYMBOL
392 
393   registry->Register("memcpy", reinterpret_cast<void*>(memcpy), "Host");
394   registry->Register("memmove", reinterpret_cast<void*>(memmove), "Host");
395   registry->Register("memset", reinterpret_cast<void*>(memset), "Host");
396 
397 #ifdef __APPLE__
398   registry->Register("__bzero", reinterpret_cast<void*>(bzero), "Host");
399   registry->Register("bzero", reinterpret_cast<void*>(bzero), "Host");
400   registry->Register("memset_pattern16",
401                      reinterpret_cast<void*>(memset_pattern16), "Host");
402 #endif
403 
404 #ifdef MEMORY_SANITIZER
405   registry->Register("__msan_unpoison",
406                      reinterpret_cast<void*>(__msan_unpoison), "Host");
407 #endif
408 
409 #if defined(PLATFORM_WINDOWS)
410   registry->Register("__chkstk", reinterpret_cast<void*>(__chkstk), "Host");
411 #endif
412 
413   return true;
414 }
415 
416 bool unused = RegisterKnownJITSymbols();
417 }  // namespace
418 
419 }  // namespace cpu
420 }  // namespace xla
421