1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/lite/kernels/cpu_backend_context.h" 17 18 #include <memory> 19 20 #ifdef TFLITE_HAVE_CPUINFO 21 #include "include/cpuinfo.h" 22 #endif 23 24 #include "public/gemmlowp.h" 25 #include "ruy/context.h" // from @ruy 26 #include "tensorflow/lite/c/common.h" 27 #include "tensorflow/lite/core/macros.h" 28 #include "tensorflow/lite/external_cpu_backend_context.h" 29 #include "tensorflow/lite/kernels/internal/compatibility.h" 30 #include "tensorflow/lite/kernels/op_macros.h" 31 32 namespace { 33 const int kDefaultNumThreadpoolThreads = 1; 34 35 } // namespace 36 37 namespace tflite { 38 39 // Use weak symbols if possible to dispatch to deprecated paths. 40 #if TFLITE_HAS_ATTRIBUTE_WEAK && !defined(__APPLE__) 41 extern TFLITE_ATTRIBUTE_WEAK bool UseGemmlowpOnX86(); 42 #endif // defined(TFLITE_HAS_ATTRIBUTE_WEAK) && !(__APPLE__) 43 44 // TODO(b/138922878) Enable when Ruy builds on Apple. 45 #if defined(TFLITE_HAVE_CPUINFO) && !defined(__APPLE__) ~CpuInfo()46CpuBackendContext::CpuInfo::~CpuInfo() { 47 if (init_status_ == InitStatus::kInitialized) { 48 cpuinfo_deinitialize(); 49 } 50 } 51 EnsureInitialized()52bool CpuBackendContext::CpuInfo::EnsureInitialized() { 53 if (init_status_ == InitStatus::kNotYetAttempted) { 54 init_status_ = Initialize(); 55 } 56 return init_status_ == InitStatus::kInitialized; 57 } 58 59 CpuBackendContext::CpuInfo::InitStatus Initialize()60CpuBackendContext::CpuInfo::Initialize() { 61 TFLITE_DCHECK_EQ(init_status_, InitStatus::kNotYetAttempted); 62 if (!cpuinfo_initialize()) { 63 return InitStatus::kFailed; 64 } 65 return InitStatus::kInitialized; 66 } 67 Avx2Fma()68bool CpuBackendContext::CpuInfo::Avx2Fma() { 69 return EnsureInitialized() && cpuinfo_has_x86_avx2() && 70 cpuinfo_has_x86_fma3(); 71 } 72 Avx()73bool CpuBackendContext::CpuInfo::Avx() { 74 return EnsureInitialized() && cpuinfo_has_x86_avx(); 75 } 76 Avx512()77bool CpuBackendContext::CpuInfo::Avx512() { 78 return EnsureInitialized() && cpuinfo_has_x86_avx512f() && 79 cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512cd() && 80 cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512vl(); 81 } 82 #else 83 ~CpuInfo()84CpuBackendContext::CpuInfo::~CpuInfo() {} 85 EnsureInitialized()86bool CpuBackendContext::CpuInfo::EnsureInitialized() { 87 if (init_status_ == InitStatus::kNotYetAttempted) { 88 init_status_ = InitStatus::kInitialized; 89 } 90 TFLITE_DCHECK_EQ(init_status_, InitStatus::kInitialized); 91 return true; 92 } 93 Avx2Fma()94bool CpuBackendContext::CpuInfo::Avx2Fma() { return false; } 95 Avx()96bool CpuBackendContext::CpuInfo::Avx() { return false; } 97 Avx512()98bool CpuBackendContext::CpuInfo::Avx512() { return false; } 99 #endif // TFLITE_HAVE_CPUINFO 100 GetFromContext(TfLiteContext * context)101CpuBackendContext* CpuBackendContext::GetFromContext(TfLiteContext* context) { 102 auto* external_context = static_cast<ExternalCpuBackendContext*>( 103 context->GetExternalContext(context, kTfLiteCpuBackendContext)); 104 105 if (external_context == nullptr) { 106 TF_LITE_FATAL( 107 "ExternalCpuBackendContext isn't properly initialized during TFLite " 108 "interpreter initialization."); 109 } 110 111 auto* cpu_backend_context = static_cast<CpuBackendContext*>( 112 external_context->internal_backend_context()); 113 if (cpu_backend_context == nullptr) { 114 // We do the lazy initialization here for the TfLiteInternalBackendContext 115 // that's wrapped inside ExternalCpuBackendContext. 116 cpu_backend_context = new CpuBackendContext(); 117 cpu_backend_context->SetMaxNumThreads(context->recommended_num_threads); 118 external_context->set_internal_backend_context( 119 std::unique_ptr<TfLiteInternalBackendContext>(cpu_backend_context)); 120 } 121 122 return cpu_backend_context; 123 } 124 CpuBackendContext()125CpuBackendContext::CpuBackendContext() 126 : TfLiteInternalBackendContext(), 127 ruy_context_(new ruy::Context), 128 gemmlowp_context_(new gemmlowp::GemmContext) { 129 SetMaxNumThreads(kDefaultNumThreadpoolThreads); 130 // TODO(b/148289189) Remove when clients have transitioned to runtime flag. 131 #ifdef TFLITE_WITH_RUY_GEMV 132 SetUseCaching(true); 133 #else 134 SetUseCaching(false); 135 #endif 136 } 137 ~CpuBackendContext()138CpuBackendContext::~CpuBackendContext() {} 139 SetMaxNumThreads(int max_num_threads)140void CpuBackendContext::SetMaxNumThreads(int max_num_threads) { 141 const int target_num_threads = 142 max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads; 143 max_num_threads_ = target_num_threads; 144 ruy_context_->set_max_num_threads(target_num_threads); 145 gemmlowp_context_->set_max_num_threads(target_num_threads); 146 } 147 SetUseCaching(bool flag)148void CpuBackendContext::SetUseCaching(bool flag) { use_caching_ = flag; } 149 HasAvxOrAbove()150bool CpuBackendContext::HasAvxOrAbove() { 151 return cpuinfo_.Avx() || cpuinfo_.Avx2Fma() || cpuinfo_.Avx512(); 152 } 153 PreferGemmlowpOnX86()154bool CpuBackendContext::PreferGemmlowpOnX86() { 155 bool use_gemmlowp_on_x86 = false; 156 #if defined(TFLITE_X86_PLATFORM) && TFLITE_HAS_ATTRIBUTE_WEAK && \ 157 !defined(__APPLE__) 158 if (::tflite::UseGemmlowpOnX86 != nullptr) { 159 use_gemmlowp_on_x86 = ::tflite::UseGemmlowpOnX86(); 160 } 161 #endif // TFLITE_X86_PLATFORM && TFLITE_HAS_ATTRIBUTE_WEAK && !(__APPLE__) 162 return use_gemmlowp_on_x86 || !HasAvxOrAbove(); 163 } 164 165 } // namespace tflite 166