1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
17
18 #include <stdint.h>
19
20 #include <algorithm>
21 #include <cstdio>
22 #include <list>
23 #include <utility>
24
25 #include "absl/memory/memory.h"
26 #include "llvm/ExecutionEngine/ExecutionEngine.h"
27 #include "llvm/ExecutionEngine/JITSymbol.h"
28 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
29 #include "llvm/IR/Mangler.h"
30 #include "llvm/IR/Operator.h"
31 #include "llvm/Support/CodeGen.h"
32 #include "llvm/Support/Host.h"
33 #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h"
34 #include "tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.h"
35 #include "tensorflow/compiler/xla/service/cpu/runtime_conv2d.h"
36 #include "tensorflow/compiler/xla/service/cpu/runtime_conv2d_mkl.h"
37 #include "tensorflow/compiler/xla/service/cpu/runtime_fft.h"
38 #include "tensorflow/compiler/xla/service/cpu/runtime_fork_join.h"
39 #include "tensorflow/compiler/xla/service/cpu/runtime_fp16.h"
40 #include "tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h"
41 #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h"
42 #include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h"
43 #include "tensorflow/compiler/xla/service/cpu/runtime_pow.h"
44 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h"
45 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h"
46 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h"
47 #include "tensorflow/compiler/xla/service/cpu/runtime_topk.h"
48 #include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h"
49 #include "tensorflow/compiler/xla/service/custom_call_target_registry.h"
50 #include "tensorflow/compiler/xla/types.h"
51 #include "tensorflow/core/platform/logging.h"
52
53 namespace xla {
54 namespace cpu {
55 namespace {
56
DetectMachineAttributes()57 llvm::SmallVector<std::string, 0> DetectMachineAttributes() {
58 llvm::SmallVector<std::string, 0> result;
59 llvm::StringMap<bool> host_features;
60 if (llvm::sys::getHostCPUFeatures(host_features)) {
61 for (auto& feature : host_features) {
62 result.push_back((feature.second ? '+' : '-') +
63 std::string(feature.first()));
64 }
65 }
66 return result;
67 }
68
69 } // namespace
70
71 /*static*/ std::unique_ptr<llvm::TargetMachine>
InferTargetMachineForJIT(const llvm::TargetOptions & target_options,llvm::CodeGenOpt::Level opt_level)72 SimpleOrcJIT::InferTargetMachineForJIT(
73 const llvm::TargetOptions& target_options,
74 llvm::CodeGenOpt::Level opt_level) {
75 std::unique_ptr<llvm::TargetMachine> target_machine(
76 llvm::EngineBuilder()
77 .setTargetOptions(target_options)
78 .setOptLevel(opt_level)
79 .selectTarget(
80 /*TargetTriple=*/llvm::Triple(), /*MArch=*/"",
81 /*MCPU=*/llvm::sys::getHostCPUName(),
82 /*MAttrs=*/DetectMachineAttributes()));
83 CHECK(target_machine != nullptr);
84 return target_machine;
85 }
86
SimpleOrcJIT(std::unique_ptr<llvm::orc::TargetProcessControl> target_process_control,std::unique_ptr<llvm::orc::ExecutionSession> execution_session,const llvm::TargetOptions & target_options,llvm::CodeGenOpt::Level opt_level,bool optimize_for_size,bool disable_expensive_passes,llvm::FastMathFlags fast_math_flags,LLVMCompiler::ModuleHook pre_optimization_hook,LLVMCompiler::ModuleHook post_optimization_hook,std::function<void (const llvm::object::ObjectFile &)> post_codegen_hook)87 SimpleOrcJIT::SimpleOrcJIT(
88 std::unique_ptr<llvm::orc::TargetProcessControl> target_process_control,
89 std::unique_ptr<llvm::orc::ExecutionSession> execution_session,
90 const llvm::TargetOptions& target_options,
91 llvm::CodeGenOpt::Level opt_level, bool optimize_for_size,
92 bool disable_expensive_passes, llvm::FastMathFlags fast_math_flags,
93 LLVMCompiler::ModuleHook pre_optimization_hook,
94 LLVMCompiler::ModuleHook post_optimization_hook,
95 std::function<void(const llvm::object::ObjectFile&)> post_codegen_hook)
96 : target_machine_(InferTargetMachineForJIT(target_options, opt_level)),
97 data_layout_(target_machine_->createDataLayout()),
98 target_process_control_(std::move(target_process_control)),
99 execution_session_(std::move(execution_session)),
100 object_layer_(*execution_session_,
101 []() {
102 return std::make_unique<llvm::SectionMemoryManager>(
103 orc_jit_memory_mapper::GetInstance());
104 }),
105 compile_layer_(
106 *execution_session_, object_layer_,
107 std::make_unique<CompilerFunctor>(
108 target_machine_.get(), opt_level, optimize_for_size,
109 disable_expensive_passes, fast_math_flags,
110 std::move(pre_optimization_hook),
111 std::move(post_optimization_hook), std::move(post_codegen_hook))),
112 main_jit_dylib_(&execution_session_->createBareJITDylib("<main>")),
113 gdb_jit_event_listener_(
114 llvm::JITEventListener::createGDBRegistrationListener()) {
115 VLOG(1) << "CPU target: " << target_machine_->getTargetCPU().str()
116 << " features: " << target_machine_->getTargetFeatureString().str();
117
118 // Materialize unknown symbols from the runtime symbol table.
119 class RuntimeSymbolGenerator : public llvm::orc::DefinitionGenerator {
120 SimpleOrcJIT& jit_;
121
122 public:
RuntimeSymbolGenerator(SimpleOrcJIT & jit)123 explicit RuntimeSymbolGenerator(SimpleOrcJIT& jit) : jit_(jit) {}
tryToGenerate(llvm::orc::LookupState &,llvm::orc::LookupKind,llvm::orc::JITDylib & jit_dylib,llvm::orc::JITDylibLookupFlags,const llvm::orc::SymbolLookupSet & names)124 llvm::Error tryToGenerate(
125 llvm::orc::LookupState&, llvm::orc::LookupKind,
126 llvm::orc::JITDylib& jit_dylib, llvm::orc::JITDylibLookupFlags,
127 const llvm::orc::SymbolLookupSet& names) override {
128 llvm::orc::SymbolMap new_defs;
129
130 for (const auto& kv : names) {
131 const auto& name = kv.first;
132 if (llvm::JITEvaluatedSymbol symbol =
133 jit_.ResolveRuntimeSymbol(*name)) {
134 new_defs[name] = symbol;
135 }
136 }
137
138 cantFail(jit_dylib.define(absoluteSymbols(std::move(new_defs))));
139 return llvm::Error::success();
140 }
141 };
142 main_jit_dylib_->addGenerator(
143 std::make_unique<RuntimeSymbolGenerator>(*this));
144 object_layer_.registerJITEventListener(*this);
145
146 // Copied from LLJIT, required to find symbols on Windows.
147 if (target_machine_->getTargetTriple().isOSBinFormatCOFF()) {
148 object_layer_.setOverrideObjectFlagsWithResponsibilityFlags(true);
149 object_layer_.setAutoClaimResponsibilityForObjectSymbols(true);
150 }
151 }
152
~SimpleOrcJIT()153 SimpleOrcJIT::~SimpleOrcJIT() {
154 if (auto err = execution_session_->endSession()) {
155 execution_session_->reportError(std::move(err));
156 }
157 }
158
Create(const llvm::TargetOptions & target_options,llvm::CodeGenOpt::Level opt_level,bool optimize_for_size,bool disable_expensive_passes,llvm::FastMathFlags fast_math_flags,LLVMCompiler::ModuleHook pre_optimization_hook,LLVMCompiler::ModuleHook post_optimization_hook,std::function<void (const llvm::object::ObjectFile &)> post_codegen_hook)159 llvm::Expected<std::unique_ptr<SimpleOrcJIT>> SimpleOrcJIT::Create(
160 const llvm::TargetOptions& target_options,
161 llvm::CodeGenOpt::Level opt_level, bool optimize_for_size,
162 bool disable_expensive_passes, llvm::FastMathFlags fast_math_flags,
163 LLVMCompiler::ModuleHook pre_optimization_hook,
164 LLVMCompiler::ModuleHook post_optimization_hook,
165 std::function<void(const llvm::object::ObjectFile&)> post_codegen_hook) {
166 auto SSP = std::make_shared<llvm::orc::SymbolStringPool>();
167 auto target_process_control =
168 llvm::orc::SelfTargetProcessControl::Create(std::move(SSP));
169 if (!target_process_control) {
170 return target_process_control.takeError();
171 }
172
173 auto execution_session = std::make_unique<llvm::orc::ExecutionSession>();
174 return std::make_unique<SimpleOrcJIT>(
175 std::move(*target_process_control), std::move(execution_session),
176 target_options, opt_level, optimize_for_size, disable_expensive_passes,
177 fast_math_flags, std::move(pre_optimization_hook),
178 std::move(post_optimization_hook), std::move(post_codegen_hook));
179 }
180
ResolveRuntimeSymbol(llvm::StringRef name)181 llvm::JITEvaluatedSymbol SimpleOrcJIT::ResolveRuntimeSymbol(
182 llvm::StringRef name) {
183 void* func_addr = nullptr;
184 if (name.size() > 1 && name.front() == data_layout_.getGlobalPrefix()) {
185 // On Mac OS X, 'name' may have a leading underscore prefix, even though the
186 // registered name may not.
187 std::string stripped_name(name.begin() + 1, name.end());
188 func_addr =
189 xla::CustomCallTargetRegistry::Global()->Lookup(stripped_name, "Host");
190 } else {
191 func_addr =
192 xla::CustomCallTargetRegistry::Global()->Lookup(name.str(), "Host");
193 }
194
195 if (func_addr == nullptr) {
196 LOG(ERROR)
197 << "Unable to resolve runtime symbol: `" << name.str()
198 << "'. Hint: if the symbol a custom call target, make sure you've "
199 "registered it with the JIT using "
200 "XLA_CPU_REGISTER_CUSTOM_CALL_TARGET.";
201 return nullptr;
202 }
203 llvm::JITEvaluatedSymbol symbol_info(reinterpret_cast<uint64_t>(func_addr),
204 llvm::JITSymbolFlags::None);
205 return symbol_info;
206 }
207
notifyObjectLoaded(llvm::JITEventListener::ObjectKey key,const llvm::object::ObjectFile & object,const llvm::RuntimeDyld::LoadedObjectInfo & object_info)208 void SimpleOrcJIT::notifyObjectLoaded(
209 llvm::JITEventListener::ObjectKey key,
210 const llvm::object::ObjectFile& object,
211 const llvm::RuntimeDyld::LoadedObjectInfo& object_info) {
212 gdb_jit_event_listener_->notifyObjectLoaded(key, object, object_info);
213 size_of_generated_code_in_bytes_ += object.getData().size();
214 }
215
notifyFreeingObject(llvm::JITEventListener::ObjectKey key)216 void SimpleOrcJIT::notifyFreeingObject(llvm::JITEventListener::ObjectKey key) {
217 gdb_jit_event_listener_->notifyFreeingObject(key);
218 }
219
AddModule(llvm::orc::ThreadSafeModule module)220 llvm::Error SimpleOrcJIT::AddModule(llvm::orc::ThreadSafeModule module) {
221 return compile_layer_.add(*main_jit_dylib_, std::move(module));
222 }
223
FindCompiledSymbol(const std::string & name)224 llvm::Expected<llvm::JITEvaluatedSymbol> SimpleOrcJIT::FindCompiledSymbol(
225 const std::string& name) {
226 return execution_session_->lookup({main_jit_dylib_}, name);
227 }
228
229 #if defined(PLATFORM_WINDOWS)
230 // This function is used by compiler-generated code on windows, but it's not
231 // declared anywhere. The signature does not matter, we just need the address.
232 extern "C" void __chkstk(size_t);
233 #endif
234
235 namespace {
236 // Register some known symbols with the CustomCallTargetRegistry.
RegisterKnownJITSymbols()237 bool RegisterKnownJITSymbols() {
238 xla::CustomCallTargetRegistry* registry =
239 xla::CustomCallTargetRegistry::Global();
240 registry->Register("printf", reinterpret_cast<void*>(&printf), "Host");
241
242 #define REGISTER_CPU_RUNTIME_SYMBOL(base_name) \
243 do { \
244 auto* function_address = \
245 reinterpret_cast<void*>(__xla_cpu_runtime_##base_name); \
246 registry->Register(xla::cpu::runtime::k##base_name##SymbolName, \
247 function_address, "Host"); \
248 CHECK_EQ(absl::string_view(xla::cpu::runtime::k##base_name##SymbolName), \
249 "__xla_cpu_runtime_" #base_name); \
250 } while (false)
251
252 REGISTER_CPU_RUNTIME_SYMBOL(AcquireInfeedBufferForDequeue);
253 REGISTER_CPU_RUNTIME_SYMBOL(AcquireOutfeedBufferForPopulation);
254 REGISTER_CPU_RUNTIME_SYMBOL(AllReduce);
255 REGISTER_CPU_RUNTIME_SYMBOL(CollectivePermute);
256 REGISTER_CPU_RUNTIME_SYMBOL(AllToAll);
257 REGISTER_CPU_RUNTIME_SYMBOL(ReplicaId);
258 REGISTER_CPU_RUNTIME_SYMBOL(MKLConvF32);
259 REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF16);
260 REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF32);
261 REGISTER_CPU_RUNTIME_SYMBOL(EigenFft);
262 REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF16);
263 REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF32);
264 REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF64);
265 REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulC64);
266 REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulC128);
267 REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulS32);
268 REGISTER_CPU_RUNTIME_SYMBOL(MKLMatMulF32);
269 REGISTER_CPU_RUNTIME_SYMBOL(MKLMatMulF64);
270 REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF32);
271 REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF64);
272 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF16);
273 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32);
274 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedFft);
275 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF16);
276 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32);
277 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64);
278 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulC64);
279 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulC128);
280 REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulS32);
281 REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin);
282 REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue);
283 REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation);
284 REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSort);
285 REGISTER_CPU_RUNTIME_SYMBOL(TopKF32);
286 REGISTER_CPU_RUNTIME_SYMBOL(TracingStart);
287 REGISTER_CPU_RUNTIME_SYMBOL(TracingEnd);
288
289 registry->Register("__gnu_f2h_ieee", reinterpret_cast<void*>(__gnu_f2h_ieee),
290 "Host");
291 registry->Register("__gnu_h2f_ieee", reinterpret_cast<void*>(__gnu_h2f_ieee),
292 "Host");
293 registry->Register("__truncdfhf2", reinterpret_cast<void*>(__truncdfhf2),
294 "Host");
295 registry->Register("__powisf2", reinterpret_cast<void*>(__powisf2), "Host");
296 registry->Register("__powidf2", reinterpret_cast<void*>(__powidf2), "Host");
297
298 #undef REGISTER_CPU_RUNTIME_SYMBOL
299
300 // Register both the f32 (float) and f64 (double) versions of a libm symbol.
301 // Unfortunately the double versions are overloaded on some systems, e.g.
302 // Mac so we need an explicit cast. This requires passing the function signature
303 // for that case.
304 #define REGISTER_LIBM_SYMBOL(name, double_sig) \
305 do { \
306 registry->Register(#name "f", reinterpret_cast<void*>(name##f), "Host"); \
307 registry->Register(#name, \
308 reinterpret_cast<void*>(static_cast<double_sig>(name)), \
309 "Host"); \
310 } while (false)
311
312 REGISTER_LIBM_SYMBOL(acos, double (*)(double));
313 REGISTER_LIBM_SYMBOL(acosh, double (*)(double));
314 REGISTER_LIBM_SYMBOL(asin, double (*)(double));
315 REGISTER_LIBM_SYMBOL(asinh, double (*)(double));
316 REGISTER_LIBM_SYMBOL(atan, double (*)(double));
317 REGISTER_LIBM_SYMBOL(atan2, double (*)(double, double));
318 REGISTER_LIBM_SYMBOL(atanh, double (*)(double));
319 REGISTER_LIBM_SYMBOL(cbrt, double (*)(double));
320 REGISTER_LIBM_SYMBOL(ceil, double (*)(double));
321 REGISTER_LIBM_SYMBOL(copysign, double (*)(double, double));
322 REGISTER_LIBM_SYMBOL(cos, double (*)(double));
323 REGISTER_LIBM_SYMBOL(cosh, double (*)(double));
324 REGISTER_LIBM_SYMBOL(erf, double (*)(double));
325 REGISTER_LIBM_SYMBOL(erfc, double (*)(double));
326 REGISTER_LIBM_SYMBOL(exp, double (*)(double));
327 REGISTER_LIBM_SYMBOL(exp2, double (*)(double));
328 REGISTER_LIBM_SYMBOL(expm1, double (*)(double));
329 REGISTER_LIBM_SYMBOL(fabs, double (*)(double));
330 REGISTER_LIBM_SYMBOL(fdim, double (*)(double, double));
331 REGISTER_LIBM_SYMBOL(floor, double (*)(double));
332 REGISTER_LIBM_SYMBOL(fma, double (*)(double, double, double));
333 REGISTER_LIBM_SYMBOL(fmax, double (*)(double, double));
334 REGISTER_LIBM_SYMBOL(fmin, double (*)(double, double));
335 REGISTER_LIBM_SYMBOL(fmod, double (*)(double, double));
336 REGISTER_LIBM_SYMBOL(frexp, double (*)(double, int*));
337 REGISTER_LIBM_SYMBOL(hypot, double (*)(double, double));
338 REGISTER_LIBM_SYMBOL(ilogb, int (*)(double));
339 REGISTER_LIBM_SYMBOL(ldexp, double (*)(double, int));
340 REGISTER_LIBM_SYMBOL(lgamma, double (*)(double));
341 REGISTER_LIBM_SYMBOL(llrint, long long (*)(double)); // NOLINT(runtime/int)
342 REGISTER_LIBM_SYMBOL(llround, long long (*)(double)); // NOLINT(runtime/int)
343 REGISTER_LIBM_SYMBOL(log, double (*)(double));
344 REGISTER_LIBM_SYMBOL(log10, double (*)(double));
345 REGISTER_LIBM_SYMBOL(log1p, double (*)(double));
346 REGISTER_LIBM_SYMBOL(log2, double (*)(double));
347 REGISTER_LIBM_SYMBOL(logb, double (*)(double));
348 REGISTER_LIBM_SYMBOL(lrint, long (*)(double)); // NOLINT(runtime/int)
349 REGISTER_LIBM_SYMBOL(lround, long (*)(double)); // NOLINT(runtime/int)
350 REGISTER_LIBM_SYMBOL(modf, double (*)(double, double*));
351 REGISTER_LIBM_SYMBOL(nan, double (*)(const char*));
352 REGISTER_LIBM_SYMBOL(nearbyint, double (*)(double));
353 REGISTER_LIBM_SYMBOL(nextafter, double (*)(double, double));
354 REGISTER_LIBM_SYMBOL(nexttoward, double (*)(double, long double));
355 REGISTER_LIBM_SYMBOL(pow, double (*)(double, double));
356 REGISTER_LIBM_SYMBOL(remainder, double (*)(double, double));
357 REGISTER_LIBM_SYMBOL(remquo, double (*)(double, double, int*));
358 REGISTER_LIBM_SYMBOL(rint, double (*)(double));
359 REGISTER_LIBM_SYMBOL(round, double (*)(double));
360 REGISTER_LIBM_SYMBOL(scalbln,
361 double (*)(double, long)); // NOLINT(runtime/int)
362 REGISTER_LIBM_SYMBOL(scalbn, double (*)(double, int));
363 REGISTER_LIBM_SYMBOL(sin, double (*)(double));
364 #ifdef __APPLE__
365 REGISTER_LIBM_SYMBOL(__sincos, void (*)(double, double*, double*));
366 registry->Register("__sincosf_stret",
367 reinterpret_cast<void*>(__sincosf_stret), "Host");
368 registry->Register("__sincos_stret", reinterpret_cast<void*>(__sincos_stret),
369 "Host");
370 #else
371 REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*));
372 #endif
373 REGISTER_LIBM_SYMBOL(sinh, double (*)(double));
374 REGISTER_LIBM_SYMBOL(sqrt, double (*)(double));
375 REGISTER_LIBM_SYMBOL(tan, double (*)(double));
376 REGISTER_LIBM_SYMBOL(tanh, double (*)(double));
377 REGISTER_LIBM_SYMBOL(tgamma, double (*)(double));
378 REGISTER_LIBM_SYMBOL(trunc, double (*)(double));
379
380 #undef REGISTER_LIBM_SYMBOL
381
382 registry->Register("memcpy", reinterpret_cast<void*>(memcpy), "Host");
383 registry->Register("memmove", reinterpret_cast<void*>(memmove), "Host");
384 registry->Register("memset", reinterpret_cast<void*>(memset), "Host");
385
386 #ifdef __APPLE__
387 registry->Register("__bzero", reinterpret_cast<void*>(bzero), "Host");
388 registry->Register("memset_pattern16",
389 reinterpret_cast<void*>(memset_pattern16), "Host");
390 #endif
391
392 #ifdef MEMORY_SANITIZER
393 registry->Register("__msan_unpoison",
394 reinterpret_cast<void*>(__msan_unpoison), "Host");
395 #endif
396
397 #if defined(PLATFORM_WINDOWS)
398 registry->Register("__chkstk", reinterpret_cast<void*>(__chkstk), "Host");
399 #endif
400
401 return true;
402 }
403
404 bool unused = RegisterKnownJITSymbols();
405 } // namespace
406
407 } // namespace cpu
408 } // namespace xla
409