1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
17
18 #include <stdint.h>
19
20 #include <algorithm>
21 #include <cstdio>
22 #include <list>
23 #include <utility>
24
25 #include "absl/memory/memory.h"
26 #include "llvm/ExecutionEngine/ExecutionEngine.h"
27 #include "llvm/ExecutionEngine/JITSymbol.h"
28 #include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
29 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
30 #include "llvm/IR/Mangler.h"
31 #include "llvm/IR/Operator.h"
32 #include "llvm/Support/CodeGen.h"
33 #include "llvm/Support/Host.h"
34 #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h"
35 #include "tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.h"
36 #include "tensorflow/compiler/xla/service/cpu/runtime_conv2d.h"
37 #include "tensorflow/compiler/xla/service/cpu/runtime_conv2d_mkl.h"
38 #include "tensorflow/compiler/xla/service/cpu/runtime_fft.h"
39 #include "tensorflow/compiler/xla/service/cpu/runtime_fork_join.h"
40 #include "tensorflow/compiler/xla/service/cpu/runtime_fp16.h"
41 #include "tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h"
42 #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h"
43 #include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h"
44 #include "tensorflow/compiler/xla/service/cpu/runtime_pow.h"
45 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h"
46 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h"
47 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h"
48 #include "tensorflow/compiler/xla/service/cpu/runtime_topk.h"
49 #include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h"
50 #include "tensorflow/compiler/xla/service/custom_call_target_registry.h"
51 #include "tensorflow/compiler/xla/types.h"
52 #include "tensorflow/core/platform/logging.h"
53
54 namespace xla {
55 namespace cpu {
56 namespace {
57
DetectMachineAttributes()58 llvm::SmallVector<std::string, 0> DetectMachineAttributes() {
59 llvm::SmallVector<std::string, 0> result;
60 llvm::StringMap<bool> host_features;
61 if (llvm::sys::getHostCPUFeatures(host_features)) {
62 for (auto& feature : host_features) {
63 result.push_back((feature.second ? '+' : '-') +
64 std::string(feature.first()));
65 }
66 }
67 return result;
68 }
69
70 } // namespace
71
72 /*static*/ std::unique_ptr<llvm::TargetMachine>
InferTargetMachineForJIT(const llvm::TargetOptions & target_options,llvm::CodeGenOpt::Level opt_level)73 SimpleOrcJIT::InferTargetMachineForJIT(
74 const llvm::TargetOptions& target_options,
75 llvm::CodeGenOpt::Level opt_level) {
76 std::unique_ptr<llvm::TargetMachine> target_machine(
77 llvm::EngineBuilder()
78 .setTargetOptions(target_options)
79 .setOptLevel(opt_level)
80 .selectTarget(
81 /*TargetTriple=*/llvm::Triple(), /*MArch=*/"",
82 /*MCPU=*/llvm::sys::getHostCPUName(),
83 /*MAttrs=*/DetectMachineAttributes()));
84 CHECK(target_machine != nullptr);
85 return target_machine;
86 }
87
SimpleOrcJIT(std::unique_ptr<llvm::orc::ExecutorProcessControl> target_process_control,std::unique_ptr<llvm::orc::ExecutionSession> execution_session,const llvm::TargetOptions & target_options,llvm::CodeGenOpt::Level opt_level,bool optimize_for_size,bool disable_expensive_passes,llvm::FastMathFlags fast_math_flags,LLVMCompiler::ModuleHook pre_optimization_hook,LLVMCompiler::ModuleHook post_optimization_hook,std::function<void (const llvm::object::ObjectFile &)> post_codegen_hook)88 SimpleOrcJIT::SimpleOrcJIT(
89 std::unique_ptr<llvm::orc::ExecutorProcessControl> target_process_control,
90 std::unique_ptr<llvm::orc::ExecutionSession> execution_session,
91 const llvm::TargetOptions& target_options,
92 llvm::CodeGenOpt::Level opt_level, bool optimize_for_size,
93 bool disable_expensive_passes, llvm::FastMathFlags fast_math_flags,
94 LLVMCompiler::ModuleHook pre_optimization_hook,
95 LLVMCompiler::ModuleHook post_optimization_hook,
96 std::function<void(const llvm::object::ObjectFile&)> post_codegen_hook)
97 : target_machine_(InferTargetMachineForJIT(target_options, opt_level)),
98 target_triple_(target_machine_->getTargetTriple()),
99 data_layout_(target_machine_->createDataLayout()),
100 target_process_control_(std::move(target_process_control)),
101 execution_session_(std::move(execution_session)),
102 object_layer_(*execution_session_,
103 []() {
104 return std::make_unique<llvm::SectionMemoryManager>(
105 orc_jit_memory_mapper::GetInstance());
106 }),
107 compile_layer_(
108 *execution_session_, object_layer_,
109 std::make_unique<CompilerFunctor>(
110 target_machine_.get(), opt_level, optimize_for_size,
111 disable_expensive_passes, fast_math_flags,
112 std::move(pre_optimization_hook),
113 std::move(post_optimization_hook), std::move(post_codegen_hook))),
114 main_jit_dylib_(&execution_session_->createBareJITDylib("<main>")),
115 gdb_jit_event_listener_(
116 llvm::JITEventListener::createGDBRegistrationListener()) {
117 VLOG(1) << "CPU target: " << target_machine_->getTargetCPU().str()
118 << " features: " << target_machine_->getTargetFeatureString().str();
119
120 // Materialize unknown symbols from the runtime symbol table.
121 class RuntimeSymbolGenerator : public llvm::orc::DefinitionGenerator {
122 SimpleOrcJIT& jit_;
123
124 public:
RuntimeSymbolGenerator(SimpleOrcJIT & jit)125 explicit RuntimeSymbolGenerator(SimpleOrcJIT& jit) : jit_(jit) {}
tryToGenerate(llvm::orc::LookupState &,llvm::orc::LookupKind,llvm::orc::JITDylib & jit_dylib,llvm::orc::JITDylibLookupFlags,const llvm::orc::SymbolLookupSet & names)126 llvm::Error tryToGenerate(
127 llvm::orc::LookupState&, llvm::orc::LookupKind,
128 llvm::orc::JITDylib& jit_dylib, llvm::orc::JITDylibLookupFlags,
129 const llvm::orc::SymbolLookupSet& names) override {
130 llvm::orc::SymbolMap new_defs;
131
132 for (const auto& kv : names) {
133 const auto& name = kv.first;
134 if (llvm::JITEvaluatedSymbol symbol =
135 jit_.ResolveRuntimeSymbol(*name)) {
136 new_defs[name] = symbol;
137 }
138 }
139
140 cantFail(jit_dylib.define(absoluteSymbols(std::move(new_defs))));
141 return llvm::Error::success();
142 }
143 };
144 main_jit_dylib_->addGenerator(
145 std::make_unique<RuntimeSymbolGenerator>(*this));
146 object_layer_.registerJITEventListener(*this);
147
148 // Copied from LLJIT, required to find symbols on Windows.
149 if (target_triple_.isOSBinFormatCOFF()) {
150 object_layer_.setOverrideObjectFlagsWithResponsibilityFlags(true);
151 object_layer_.setAutoClaimResponsibilityForObjectSymbols(true);
152 }
153 }
154
~SimpleOrcJIT()155 SimpleOrcJIT::~SimpleOrcJIT() {
156 if (auto err = execution_session_->endSession()) {
157 execution_session_->reportError(std::move(err));
158 }
159 }
160
Create(const llvm::TargetOptions & target_options,llvm::CodeGenOpt::Level opt_level,bool optimize_for_size,bool disable_expensive_passes,llvm::FastMathFlags fast_math_flags,LLVMCompiler::ModuleHook pre_optimization_hook,LLVMCompiler::ModuleHook post_optimization_hook,std::function<void (const llvm::object::ObjectFile &)> post_codegen_hook)161 llvm::Expected<std::unique_ptr<SimpleOrcJIT>> SimpleOrcJIT::Create(
162 const llvm::TargetOptions& target_options,
163 llvm::CodeGenOpt::Level opt_level, bool optimize_for_size,
164 bool disable_expensive_passes, llvm::FastMathFlags fast_math_flags,
165 LLVMCompiler::ModuleHook pre_optimization_hook,
166 LLVMCompiler::ModuleHook post_optimization_hook,
167 std::function<void(const llvm::object::ObjectFile&)> post_codegen_hook) {
168 auto SSP = std::make_shared<llvm::orc::SymbolStringPool>();
169 auto target_process_control =
170 llvm::orc::SelfExecutorProcessControl::Create(std::move(SSP));
171 if (!target_process_control) {
172 return target_process_control.takeError();
173 }
174
175 auto execution_session = std::make_unique<llvm::orc::ExecutionSession>(
176 std::make_unique<llvm::orc::UnsupportedExecutorProcessControl>());
177 return std::make_unique<SimpleOrcJIT>(
178 std::move(*target_process_control), std::move(execution_session),
179 target_options, opt_level, optimize_for_size, disable_expensive_passes,
180 fast_math_flags, std::move(pre_optimization_hook),
181 std::move(post_optimization_hook), std::move(post_codegen_hook));
182 }
183
ResolveRuntimeSymbol(llvm::StringRef name)184 llvm::JITEvaluatedSymbol SimpleOrcJIT::ResolveRuntimeSymbol(
185 llvm::StringRef name) {
186 void* func_addr = nullptr;
187 if (name.size() > 1 && name.front() == data_layout_.getGlobalPrefix()) {
188 // On Mac OS X, 'name' may have a leading underscore prefix, even though the
189 // registered name may not.
190 std::string stripped_name(name.begin() + 1, name.end());
191 func_addr =
192 xla::CustomCallTargetRegistry::Global()->Lookup(stripped_name, "Host");
193 } else {
194 func_addr =
195 xla::CustomCallTargetRegistry::Global()->Lookup(name.str(), "Host");
196 }
197
198 if (func_addr == nullptr) {
199 LOG(ERROR)
200 << "Unable to resolve runtime symbol: `" << name.str()
201 << "'. Hint: if the symbol a custom call target, make sure you've "
202 "registered it with the JIT using "
203 "XLA_CPU_REGISTER_CUSTOM_CALL_TARGET.";
204 return nullptr;
205 }
206 llvm::JITEvaluatedSymbol symbol_info(reinterpret_cast<uint64_t>(func_addr),
207 llvm::JITSymbolFlags::None);
208 return symbol_info;
209 }
210
notifyObjectLoaded(llvm::JITEventListener::ObjectKey key,const llvm::object::ObjectFile & object,const llvm::RuntimeDyld::LoadedObjectInfo & object_info)211 void SimpleOrcJIT::notifyObjectLoaded(
212 llvm::JITEventListener::ObjectKey key,
213 const llvm::object::ObjectFile& object,
214 const llvm::RuntimeDyld::LoadedObjectInfo& object_info) {
215 gdb_jit_event_listener_->notifyObjectLoaded(key, object, object_info);
216 size_of_generated_code_in_bytes_ += object.getData().size();
217 }
218
notifyFreeingObject(llvm::JITEventListener::ObjectKey key)219 void SimpleOrcJIT::notifyFreeingObject(llvm::JITEventListener::ObjectKey key) {
220 gdb_jit_event_listener_->notifyFreeingObject(key);
221 }
222
AddModule(llvm::orc::ThreadSafeModule module)223 llvm::Error SimpleOrcJIT::AddModule(llvm::orc::ThreadSafeModule module) {
224 return compile_layer_.add(*main_jit_dylib_, std::move(module));
225 }
226
DoneCompiling()227 void SimpleOrcJIT::DoneCompiling() {
228 // The target machine takes a non-trivial amount of memory, so once we are
229 // done compiling throw it away.
230 target_machine_.reset();
231 }
232
FindCompiledSymbol(const std::string & name)233 llvm::Expected<llvm::JITEvaluatedSymbol> SimpleOrcJIT::FindCompiledSymbol(
234 const std::string& name) {
235 return execution_session_->lookup({main_jit_dylib_}, name);
236 }
237
238 #if defined(PLATFORM_WINDOWS)
239 // This function is used by compiler-generated code on windows, but it's not
240 // declared anywhere. The signature does not matter, we just need the address.
241 extern "C" void __chkstk(size_t);
242 #endif
243
244 namespace {
245 // Register some known symbols with the CustomCallTargetRegistry.
RegisterKnownJITSymbols()246 bool RegisterKnownJITSymbols() {
247 xla::CustomCallTargetRegistry* registry =
248 xla::CustomCallTargetRegistry::Global();
249 registry->Register("printf", reinterpret_cast<void*>(&printf), "Host");
250 registry->Register("puts", reinterpret_cast<void*>(&puts), "Host");
251
252 #define REGISTER_CPU_RUNTIME_SYMBOL(base_name) \
253 do { \
254 auto* function_address = \
255 reinterpret_cast<void*>(__xla_cpu_runtime_##base_name); \
256 registry->Register(xla::cpu::runtime::k##base_name##SymbolName, \
257 function_address, "Host"); \
258 CHECK_EQ(absl::string_view(xla::cpu::runtime::k##base_name##SymbolName), \
259 "__xla_cpu_runtime_" #base_name); \
260 } while (false)
261
262 REGISTER_CPU_RUNTIME_SYMBOL(AcquireInfeedBufferForDequeue);
263 REGISTER_CPU_RUNTIME_SYMBOL(AcquireOutfeedBufferForPopulation);
264 REGISTER_CPU_RUNTIME_SYMBOL(AllReduce);
265 REGISTER_CPU_RUNTIME_SYMBOL(CollectivePermute);
266 REGISTER_CPU_RUNTIME_SYMBOL(AllToAll);
267 REGISTER_CPU_RUNTIME_SYMBOL(ReplicaId);
268 REGISTER_CPU_RUNTIME_SYMBOL(MKLConvF32);
269 REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF16);
270 REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF32);
271 REGISTER_CPU_RUNTIME_SYMBOL(EigenFft);
272 REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF16);
273 REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF32);
274 REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF64);
275 REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulC64);
276 REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulC128);
277 REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulS32);
278 REGISTER_CPU_RUNTIME_SYMBOL(MKLMatMulF32);
279 REGISTER_CPU_RUNTIME_SYMBOL(MKLMatMulF64);
280 REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF32);
281 REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF64);
282 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF16);
283 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32);
284 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedFft);
285 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF16);
286 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32);
287 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64);
288 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulC64);
289 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulC128);
290 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulS32);
291 REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin);
292 REGISTER_CPU_RUNTIME_SYMBOL(PrintfToStderr);
293 REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue);
294 REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation);
295 REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSort);
296 REGISTER_CPU_RUNTIME_SYMBOL(TopKF32);
297 REGISTER_CPU_RUNTIME_SYMBOL(TracingStart);
298 REGISTER_CPU_RUNTIME_SYMBOL(TracingEnd);
299
300 registry->Register("__gnu_f2h_ieee", reinterpret_cast<void*>(__gnu_f2h_ieee),
301 "Host");
302 registry->Register("__gnu_h2f_ieee", reinterpret_cast<void*>(__gnu_h2f_ieee),
303 "Host");
304 registry->Register("__truncdfhf2", reinterpret_cast<void*>(__truncdfhf2),
305 "Host");
306 registry->Register("__powisf2", reinterpret_cast<void*>(__powisf2), "Host");
307 registry->Register("__powidf2", reinterpret_cast<void*>(__powidf2), "Host");
308
309 #undef REGISTER_CPU_RUNTIME_SYMBOL
310
311 // Register both the f32 (float) and f64 (double) versions of a libm symbol.
312 // Unfortunately the double versions are overloaded on some systems, e.g.
313 // Mac so we need an explicit cast. This requires passing the function signature
314 // for that case.
315 #define REGISTER_LIBM_SYMBOL(name, double_sig) \
316 do { \
317 registry->Register(#name "f", reinterpret_cast<void*>(name##f), "Host"); \
318 registry->Register(#name, \
319 reinterpret_cast<void*>(static_cast<double_sig>(name)), \
320 "Host"); \
321 } while (false)
322
323 REGISTER_LIBM_SYMBOL(acos, double (*)(double));
324 REGISTER_LIBM_SYMBOL(acosh, double (*)(double));
325 REGISTER_LIBM_SYMBOL(asin, double (*)(double));
326 REGISTER_LIBM_SYMBOL(asinh, double (*)(double));
327 REGISTER_LIBM_SYMBOL(atan, double (*)(double));
328 REGISTER_LIBM_SYMBOL(atan2, double (*)(double, double));
329 REGISTER_LIBM_SYMBOL(atanh, double (*)(double));
330 REGISTER_LIBM_SYMBOL(cbrt, double (*)(double));
331 REGISTER_LIBM_SYMBOL(ceil, double (*)(double));
332 REGISTER_LIBM_SYMBOL(copysign, double (*)(double, double));
333 REGISTER_LIBM_SYMBOL(cos, double (*)(double));
334 REGISTER_LIBM_SYMBOL(cosh, double (*)(double));
335 REGISTER_LIBM_SYMBOL(erf, double (*)(double));
336 REGISTER_LIBM_SYMBOL(erfc, double (*)(double));
337 REGISTER_LIBM_SYMBOL(exp, double (*)(double));
338 REGISTER_LIBM_SYMBOL(exp2, double (*)(double));
339 REGISTER_LIBM_SYMBOL(expm1, double (*)(double));
340 REGISTER_LIBM_SYMBOL(fabs, double (*)(double));
341 REGISTER_LIBM_SYMBOL(fdim, double (*)(double, double));
342 REGISTER_LIBM_SYMBOL(floor, double (*)(double));
343 REGISTER_LIBM_SYMBOL(fma, double (*)(double, double, double));
344 REGISTER_LIBM_SYMBOL(fmax, double (*)(double, double));
345 REGISTER_LIBM_SYMBOL(fmin, double (*)(double, double));
346 REGISTER_LIBM_SYMBOL(fmod, double (*)(double, double));
347 REGISTER_LIBM_SYMBOL(frexp, double (*)(double, int*));
348 REGISTER_LIBM_SYMBOL(hypot, double (*)(double, double));
349 REGISTER_LIBM_SYMBOL(ilogb, int (*)(double));
350 REGISTER_LIBM_SYMBOL(ldexp, double (*)(double, int));
351 REGISTER_LIBM_SYMBOL(lgamma, double (*)(double));
352 REGISTER_LIBM_SYMBOL(llrint, long long (*)(double)); // NOLINT(runtime/int)
353 REGISTER_LIBM_SYMBOL(llround, long long (*)(double)); // NOLINT(runtime/int)
354 REGISTER_LIBM_SYMBOL(log, double (*)(double));
355 REGISTER_LIBM_SYMBOL(log10, double (*)(double));
356 REGISTER_LIBM_SYMBOL(log1p, double (*)(double));
357 REGISTER_LIBM_SYMBOL(log2, double (*)(double));
358 REGISTER_LIBM_SYMBOL(logb, double (*)(double));
359 REGISTER_LIBM_SYMBOL(lrint, long (*)(double)); // NOLINT(runtime/int)
360 REGISTER_LIBM_SYMBOL(lround, long (*)(double)); // NOLINT(runtime/int)
361 REGISTER_LIBM_SYMBOL(modf, double (*)(double, double*));
362 REGISTER_LIBM_SYMBOL(nan, double (*)(const char*));
363 REGISTER_LIBM_SYMBOL(nearbyint, double (*)(double));
364 REGISTER_LIBM_SYMBOL(nextafter, double (*)(double, double));
365 REGISTER_LIBM_SYMBOL(nexttoward, double (*)(double, long double));
366 REGISTER_LIBM_SYMBOL(pow, double (*)(double, double));
367 REGISTER_LIBM_SYMBOL(remainder, double (*)(double, double));
368 REGISTER_LIBM_SYMBOL(remquo, double (*)(double, double, int*));
369 REGISTER_LIBM_SYMBOL(rint, double (*)(double));
370 REGISTER_LIBM_SYMBOL(round, double (*)(double));
371 REGISTER_LIBM_SYMBOL(scalbln,
372 double (*)(double, long)); // NOLINT(runtime/int)
373 REGISTER_LIBM_SYMBOL(scalbn, double (*)(double, int));
374 REGISTER_LIBM_SYMBOL(sin, double (*)(double));
375 #ifdef __APPLE__
376 REGISTER_LIBM_SYMBOL(__sincos, void (*)(double, double*, double*));
377 registry->Register("__sincosf_stret",
378 reinterpret_cast<void*>(__sincosf_stret), "Host");
379 registry->Register("__sincos_stret", reinterpret_cast<void*>(__sincos_stret),
380 "Host");
381 #else
382 REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*));
383 #endif
384 REGISTER_LIBM_SYMBOL(sinh, double (*)(double));
385 REGISTER_LIBM_SYMBOL(sqrt, double (*)(double));
386 REGISTER_LIBM_SYMBOL(tan, double (*)(double));
387 REGISTER_LIBM_SYMBOL(tanh, double (*)(double));
388 REGISTER_LIBM_SYMBOL(tgamma, double (*)(double));
389 REGISTER_LIBM_SYMBOL(trunc, double (*)(double));
390
391 #undef REGISTER_LIBM_SYMBOL
392
393 registry->Register("memcpy", reinterpret_cast<void*>(memcpy), "Host");
394 registry->Register("memmove", reinterpret_cast<void*>(memmove), "Host");
395 registry->Register("memset", reinterpret_cast<void*>(memset), "Host");
396
397 #ifdef __APPLE__
398 registry->Register("__bzero", reinterpret_cast<void*>(bzero), "Host");
399 registry->Register("bzero", reinterpret_cast<void*>(bzero), "Host");
400 registry->Register("memset_pattern16",
401 reinterpret_cast<void*>(memset_pattern16), "Host");
402 #endif
403
404 #ifdef MEMORY_SANITIZER
405 registry->Register("__msan_unpoison",
406 reinterpret_cast<void*>(__msan_unpoison), "Host");
407 #endif
408
409 #if defined(PLATFORM_WINDOWS)
410 registry->Register("__chkstk", reinterpret_cast<void*>(__chkstk), "Host");
411 #endif
412
413 return true;
414 }
415
416 bool unused = RegisterKnownJITSymbols();
417 } // namespace
418
419 } // namespace cpu
420 } // namespace xla
421