• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/environment.h"
17 
18 #include <string>
19 #include <vector>
20 
21 #include "tensorflow/lite/delegates/gpu/cl/util.h"
22 #include "tensorflow/lite/delegates/gpu/common/shape.h"
23 
24 namespace tflite {
25 namespace gpu {
26 namespace cl {
27 namespace {
CreateEnvironment(Environment * result,bool shared,cl_context_properties egl_context,cl_context_properties egl_display)28 absl::Status CreateEnvironment(Environment* result, bool shared,
29                                cl_context_properties egl_context,
30                                cl_context_properties egl_display) {
31   CLDevice gpu;
32   RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
33 
34   CLContext context;
35   if (shared) {
36     RETURN_IF_ERROR(CreateCLGLContext(gpu, egl_context, egl_display, &context));
37   } else {
38     RETURN_IF_ERROR(CreateCLContext(gpu, &context));
39   }
40   CLCommandQueue queue;
41   RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
42   ProfilingCommandQueue profiling_queue;
43   RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
44 
45   *result = Environment(std::move(gpu), std::move(context), std::move(queue),
46                         std::move(profiling_queue));
47 
48   return result->Init();
49 }
50 
IsGpuSupportsStorageType(const GpuInfo & gpu_info,TensorStorageType storage_type)51 bool IsGpuSupportsStorageType(const GpuInfo& gpu_info,
52                               TensorStorageType storage_type) {
53   switch (storage_type) {
54     case TensorStorageType::TEXTURE_2D:
55       return !gpu_info.IsAMD();
56     case TensorStorageType::BUFFER:
57       return true;
58     case TensorStorageType::TEXTURE_ARRAY:
59       return !gpu_info.IsAMD() && gpu_info.SupportsTextureArray();
60     case TensorStorageType::IMAGE_BUFFER:
61       return (gpu_info.IsAdreno() || gpu_info.IsAMD() || gpu_info.IsNvidia()) &&
62              gpu_info.SupportsImageBuffer();
63     case TensorStorageType::TEXTURE_3D:
64       return !gpu_info.IsAMD() && gpu_info.SupportsImage3D();
65     case TensorStorageType::SINGLE_TEXTURE_2D:
66       return false;
67     case TensorStorageType::UNKNOWN:
68       return false;
69   }
70   return false;
71 }
72 
IsGpuSupportsPrecision(const GpuInfo & gpu_info,CalculationsPrecision precision)73 bool IsGpuSupportsPrecision(const GpuInfo& gpu_info,
74                             CalculationsPrecision precision) {
75   switch (precision) {
76     case CalculationsPrecision::F32_F16:
77     case CalculationsPrecision::F16:
78       return gpu_info.SupportsFP16();
79     case CalculationsPrecision::F32:
80       return true;
81   }
82 }
83 
84 }  // namespace
85 
Environment(CLDevice && device,CLContext && context,CLCommandQueue && queue,ProfilingCommandQueue && profiling_queue)86 Environment::Environment(CLDevice&& device, CLContext&& context,
87                          CLCommandQueue&& queue,
88                          ProfilingCommandQueue&& profiling_queue)
89     : device_(std::move(device)),
90       context_(std::move(context)),
91       queue_(std::move(queue)),
92       profiling_queue_(std::move(profiling_queue)) {}
93 
Environment(Environment && environment)94 Environment::Environment(Environment&& environment)
95     : device_(std::move(environment.device_)),
96       context_(std::move(environment.context_)),
97       queue_(std::move(environment.queue_)),
98       profiling_queue_(std::move(environment.profiling_queue_)),
99       program_cache_(std::move(environment.program_cache_)) {}
100 
operator =(Environment && environment)101 Environment& Environment::operator=(Environment&& environment) {
102   if (this != &environment) {
103     device_ = std::move(environment.device_);
104     context_ = std::move(environment.context_);
105     queue_ = std::move(environment.queue_);
106     profiling_queue_ = std::move(environment.profiling_queue_);
107     program_cache_ = std::move(environment.program_cache_);
108   }
109   return *this;
110 }
111 
Init()112 absl::Status Environment::Init() {
113   if (device().GetInfo().IsAdreno() &&
114       device().GetInfo().SupportsTextureArray()) {
115     const auto& adreno_info = device().info_.adreno_info;
116     // Some Adreno < 600 have bug with one layer texture array. b/131099086
117     // If we have one layer texture array and will write smt from kernel to this
118     // texture, we will get zeroes instead of actual values.
119     // The same kernel will work, if we use texture array with more than one
120     // layer.
121     if (adreno_info.IsAdreno3xx() || adreno_info.IsAdreno4xx() ||
122         adreno_info.IsAdreno5xx()) {
123       GetDevicePtr()->DisableOneLayerTextureArray();
124     }
125   }
126   return absl::OkStatus();
127 }
128 
SetHighPerformance() const129 void Environment::SetHighPerformance() const {
130   // TODO(sorokin) use cl_perf_hint if available
131 }
132 
SetDefaultPerformance() const133 void Environment::SetDefaultPerformance() const {
134   // TODO(sorokin) use cl_perf_hint if available
135 }
136 
SetLowPerformance() const137 void Environment::SetLowPerformance() const {
138   // TODO(sorokin) use cl_perf_hint if available
139 }
140 
GetSupportedPrecisions() const141 std::vector<CalculationsPrecision> Environment::GetSupportedPrecisions() const {
142   std::vector<CalculationsPrecision> precisions;
143   for (CalculationsPrecision precision :
144        {CalculationsPrecision::F32, CalculationsPrecision::F32_F16,
145         CalculationsPrecision::F16}) {
146     if (IsSupported(precision)) {
147       precisions.push_back(precision);
148     }
149   }
150   return precisions;
151 }
152 
IsSupported(CalculationsPrecision precision) const153 bool Environment::IsSupported(CalculationsPrecision precision) const {
154   return IsGpuSupportsPrecision(device_.GetInfo(), precision);
155 }
156 
GetSupportedStorages() const157 std::vector<TensorStorageType> Environment::GetSupportedStorages() const {
158   std::vector<TensorStorageType> storage_types;
159   for (auto storage_type :
160        {TensorStorageType::TEXTURE_2D, TensorStorageType::BUFFER,
161         TensorStorageType::TEXTURE_ARRAY, TensorStorageType::IMAGE_BUFFER,
162         TensorStorageType::TEXTURE_3D}) {
163     if (IsSupported(storage_type)) {
164       storage_types.push_back(storage_type);
165     }
166   }
167   return storage_types;
168 }
169 
170 std::vector<TensorStorageType>
GetSupportedStoragesWithHWZeroClampSupport() const171 Environment::GetSupportedStoragesWithHWZeroClampSupport() const {
172   std::vector<TensorStorageType> storage_types;
173   for (auto storage_type :
174        {TensorStorageType::TEXTURE_2D, TensorStorageType::TEXTURE_ARRAY,
175         TensorStorageType::TEXTURE_3D}) {
176     if (IsSupported(storage_type)) {
177       storage_types.push_back(storage_type);
178     }
179   }
180   return storage_types;
181 }
182 
IsSupported(TensorStorageType storage_type) const183 bool Environment::IsSupported(TensorStorageType storage_type) const {
184   return IsGpuSupportsStorageType(device_.GetInfo(), storage_type);
185 }
186 
GetFastestStorageType(const GpuInfo & gpu_info)187 TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info) {
188   if (gpu_info.IsAdreno()) {
189     if (gpu_info.adreno_info.IsAdreno6xxOrHigher() &&
190         !gpu_info.opencl_info.IsImage2dFromBufferSupported()) {
191       return TensorStorageType::TEXTURE_ARRAY;
192     } else {
193       return TensorStorageType::TEXTURE_2D;
194     }
195   } else if (gpu_info.IsPowerVR()) {
196     return TensorStorageType::TEXTURE_2D;
197   } else if (gpu_info.IsMali()) {
198     const MaliInfo mali_info = gpu_info.mali_info;
199     if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() ||
200         mali_info.IsValhall()) {
201       return TensorStorageType::TEXTURE_2D;
202     } else {
203       return TensorStorageType::BUFFER;
204     }
205   } else if (gpu_info.IsNvidia()) {
206     return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
207                                           : TensorStorageType::BUFFER;
208   } else if (gpu_info.IsAMD()) {
209     return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
210                                           : TensorStorageType::BUFFER;
211   } else if (gpu_info.IsIntel()) {
212     return TensorStorageType::BUFFER;
213   }
214   return TensorStorageType::BUFFER;
215 }
216 
GetStorageTypeWithMinimalMemoryConsumption(const GpuInfo & gpu_info)217 TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(
218     const GpuInfo& gpu_info) {
219   if (gpu_info.IsAdreno()) {
220     if (gpu_info.adreno_info.IsAdreno3xx() ||
221         gpu_info.adreno_info.IsAdreno4xx()) {
222       return TensorStorageType::BUFFER;
223     } else {
224       if (gpu_info.opencl_info.IsImage2dFromBufferSupported()) {
225         return TensorStorageType::TEXTURE_2D;
226       } else {
227         return TensorStorageType::IMAGE_BUFFER;
228       }
229     }
230   } else if (gpu_info.IsPowerVR()) {
231     return TensorStorageType::BUFFER;
232   } else if (gpu_info.IsMali()) {
233     const MaliInfo mali_info = gpu_info.mali_info;
234     if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() ||
235         mali_info.IsValhall()) {
236       if (gpu_info.opencl_info.IsImage2dFromBufferSupported()) {
237         return TensorStorageType::TEXTURE_2D;
238       } else {
239         return TensorStorageType::BUFFER;
240       }
241     } else {
242       return TensorStorageType::BUFFER;
243     }
244   } else if (gpu_info.IsNvidia()) {
245     return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
246                                           : TensorStorageType::BUFFER;
247   } else if (gpu_info.IsAMD()) {
248     return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
249                                           : TensorStorageType::BUFFER;
250   } else if (gpu_info.IsIntel()) {
251     return TensorStorageType::BUFFER;
252   }
253   return TensorStorageType::BUFFER;
254 }
255 
CreateEnvironment(Environment * result)256 absl::Status CreateEnvironment(Environment* result) {
257   CLDevice gpu;
258   RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
259 
260   CLContext context;
261   RETURN_IF_ERROR(CreateCLContext(gpu, &context));
262   CLCommandQueue queue;
263   RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
264   ProfilingCommandQueue profiling_queue;
265   RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
266 
267   *result = Environment(std::move(gpu), std::move(context), std::move(queue),
268                         std::move(profiling_queue));
269   return result->Init();
270 }
271 
272 }  // namespace cl
273 }  // namespace gpu
274 }  // namespace tflite
275