1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/environment.h"
17
18 #include <string>
19 #include <vector>
20
21 #include "tensorflow/lite/delegates/gpu/cl/util.h"
22 #include "tensorflow/lite/delegates/gpu/common/shape.h"
23
24 namespace tflite {
25 namespace gpu {
26 namespace cl {
27 namespace {
CreateEnvironment(Environment * result,bool shared,cl_context_properties egl_context,cl_context_properties egl_display)28 absl::Status CreateEnvironment(Environment* result, bool shared,
29 cl_context_properties egl_context,
30 cl_context_properties egl_display) {
31 CLDevice gpu;
32 RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
33
34 CLContext context;
35 if (shared) {
36 RETURN_IF_ERROR(CreateCLGLContext(gpu, egl_context, egl_display, &context));
37 } else {
38 RETURN_IF_ERROR(CreateCLContext(gpu, &context));
39 }
40 CLCommandQueue queue;
41 RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
42 ProfilingCommandQueue profiling_queue;
43 RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
44
45 *result = Environment(std::move(gpu), std::move(context), std::move(queue),
46 std::move(profiling_queue));
47
48 return result->Init();
49 }
50
IsGpuSupportsStorageType(const GpuInfo & gpu_info,TensorStorageType storage_type)51 bool IsGpuSupportsStorageType(const GpuInfo& gpu_info,
52 TensorStorageType storage_type) {
53 switch (storage_type) {
54 case TensorStorageType::TEXTURE_2D:
55 return !gpu_info.IsAMD();
56 case TensorStorageType::BUFFER:
57 return true;
58 case TensorStorageType::TEXTURE_ARRAY:
59 return !gpu_info.IsAMD() && gpu_info.SupportsTextureArray();
60 case TensorStorageType::IMAGE_BUFFER:
61 return (gpu_info.IsAdreno() || gpu_info.IsAMD() || gpu_info.IsNvidia()) &&
62 gpu_info.SupportsImageBuffer();
63 case TensorStorageType::TEXTURE_3D:
64 return !gpu_info.IsAMD() && gpu_info.SupportsImage3D();
65 case TensorStorageType::SINGLE_TEXTURE_2D:
66 return false;
67 case TensorStorageType::UNKNOWN:
68 return false;
69 }
70 return false;
71 }
72
IsGpuSupportsPrecision(const GpuInfo & gpu_info,CalculationsPrecision precision)73 bool IsGpuSupportsPrecision(const GpuInfo& gpu_info,
74 CalculationsPrecision precision) {
75 switch (precision) {
76 case CalculationsPrecision::F32_F16:
77 case CalculationsPrecision::F16:
78 return gpu_info.SupportsFP16();
79 case CalculationsPrecision::F32:
80 return true;
81 }
82 }
83
84 } // namespace
85
Environment(CLDevice && device,CLContext && context,CLCommandQueue && queue,ProfilingCommandQueue && profiling_queue)86 Environment::Environment(CLDevice&& device, CLContext&& context,
87 CLCommandQueue&& queue,
88 ProfilingCommandQueue&& profiling_queue)
89 : device_(std::move(device)),
90 context_(std::move(context)),
91 queue_(std::move(queue)),
92 profiling_queue_(std::move(profiling_queue)) {}
93
Environment(Environment && environment)94 Environment::Environment(Environment&& environment)
95 : device_(std::move(environment.device_)),
96 context_(std::move(environment.context_)),
97 queue_(std::move(environment.queue_)),
98 profiling_queue_(std::move(environment.profiling_queue_)),
99 program_cache_(std::move(environment.program_cache_)) {}
100
operator =(Environment && environment)101 Environment& Environment::operator=(Environment&& environment) {
102 if (this != &environment) {
103 device_ = std::move(environment.device_);
104 context_ = std::move(environment.context_);
105 queue_ = std::move(environment.queue_);
106 profiling_queue_ = std::move(environment.profiling_queue_);
107 program_cache_ = std::move(environment.program_cache_);
108 }
109 return *this;
110 }
111
Init()112 absl::Status Environment::Init() {
113 if (device().GetInfo().IsAdreno() &&
114 device().GetInfo().SupportsTextureArray()) {
115 const auto& adreno_info = device().info_.adreno_info;
116 // Some Adreno < 600 have bug with one layer texture array. b/131099086
117 // If we have one layer texture array and will write smt from kernel to this
118 // texture, we will get zeroes instead of actual values.
119 // The same kernel will work, if we use texture array with more than one
120 // layer.
121 if (adreno_info.IsAdreno3xx() || adreno_info.IsAdreno4xx() ||
122 adreno_info.IsAdreno5xx()) {
123 GetDevicePtr()->DisableOneLayerTextureArray();
124 }
125 }
126 return absl::OkStatus();
127 }
128
SetHighPerformance() const129 void Environment::SetHighPerformance() const {
130 // TODO(sorokin) use cl_perf_hint if available
131 }
132
SetDefaultPerformance() const133 void Environment::SetDefaultPerformance() const {
134 // TODO(sorokin) use cl_perf_hint if available
135 }
136
SetLowPerformance() const137 void Environment::SetLowPerformance() const {
138 // TODO(sorokin) use cl_perf_hint if available
139 }
140
GetSupportedPrecisions() const141 std::vector<CalculationsPrecision> Environment::GetSupportedPrecisions() const {
142 std::vector<CalculationsPrecision> precisions;
143 for (CalculationsPrecision precision :
144 {CalculationsPrecision::F32, CalculationsPrecision::F32_F16,
145 CalculationsPrecision::F16}) {
146 if (IsSupported(precision)) {
147 precisions.push_back(precision);
148 }
149 }
150 return precisions;
151 }
152
IsSupported(CalculationsPrecision precision) const153 bool Environment::IsSupported(CalculationsPrecision precision) const {
154 return IsGpuSupportsPrecision(device_.GetInfo(), precision);
155 }
156
GetSupportedStorages() const157 std::vector<TensorStorageType> Environment::GetSupportedStorages() const {
158 std::vector<TensorStorageType> storage_types;
159 for (auto storage_type :
160 {TensorStorageType::TEXTURE_2D, TensorStorageType::BUFFER,
161 TensorStorageType::TEXTURE_ARRAY, TensorStorageType::IMAGE_BUFFER,
162 TensorStorageType::TEXTURE_3D}) {
163 if (IsSupported(storage_type)) {
164 storage_types.push_back(storage_type);
165 }
166 }
167 return storage_types;
168 }
169
170 std::vector<TensorStorageType>
GetSupportedStoragesWithHWZeroClampSupport() const171 Environment::GetSupportedStoragesWithHWZeroClampSupport() const {
172 std::vector<TensorStorageType> storage_types;
173 for (auto storage_type :
174 {TensorStorageType::TEXTURE_2D, TensorStorageType::TEXTURE_ARRAY,
175 TensorStorageType::TEXTURE_3D}) {
176 if (IsSupported(storage_type)) {
177 storage_types.push_back(storage_type);
178 }
179 }
180 return storage_types;
181 }
182
IsSupported(TensorStorageType storage_type) const183 bool Environment::IsSupported(TensorStorageType storage_type) const {
184 return IsGpuSupportsStorageType(device_.GetInfo(), storage_type);
185 }
186
GetFastestStorageType(const GpuInfo & gpu_info)187 TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info) {
188 if (gpu_info.IsAdreno()) {
189 if (gpu_info.adreno_info.IsAdreno6xxOrHigher() &&
190 !gpu_info.opencl_info.IsImage2dFromBufferSupported()) {
191 return TensorStorageType::TEXTURE_ARRAY;
192 } else {
193 return TensorStorageType::TEXTURE_2D;
194 }
195 } else if (gpu_info.IsPowerVR()) {
196 return TensorStorageType::TEXTURE_2D;
197 } else if (gpu_info.IsMali()) {
198 const MaliInfo mali_info = gpu_info.mali_info;
199 if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() ||
200 mali_info.IsValhall()) {
201 return TensorStorageType::TEXTURE_2D;
202 } else {
203 return TensorStorageType::BUFFER;
204 }
205 } else if (gpu_info.IsNvidia()) {
206 return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
207 : TensorStorageType::BUFFER;
208 } else if (gpu_info.IsAMD()) {
209 return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
210 : TensorStorageType::BUFFER;
211 } else if (gpu_info.IsIntel()) {
212 return TensorStorageType::BUFFER;
213 }
214 return TensorStorageType::BUFFER;
215 }
216
GetStorageTypeWithMinimalMemoryConsumption(const GpuInfo & gpu_info)217 TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(
218 const GpuInfo& gpu_info) {
219 if (gpu_info.IsAdreno()) {
220 if (gpu_info.adreno_info.IsAdreno3xx() ||
221 gpu_info.adreno_info.IsAdreno4xx()) {
222 return TensorStorageType::BUFFER;
223 } else {
224 if (gpu_info.opencl_info.IsImage2dFromBufferSupported()) {
225 return TensorStorageType::TEXTURE_2D;
226 } else {
227 return TensorStorageType::IMAGE_BUFFER;
228 }
229 }
230 } else if (gpu_info.IsPowerVR()) {
231 return TensorStorageType::BUFFER;
232 } else if (gpu_info.IsMali()) {
233 const MaliInfo mali_info = gpu_info.mali_info;
234 if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() ||
235 mali_info.IsValhall()) {
236 if (gpu_info.opencl_info.IsImage2dFromBufferSupported()) {
237 return TensorStorageType::TEXTURE_2D;
238 } else {
239 return TensorStorageType::BUFFER;
240 }
241 } else {
242 return TensorStorageType::BUFFER;
243 }
244 } else if (gpu_info.IsNvidia()) {
245 return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
246 : TensorStorageType::BUFFER;
247 } else if (gpu_info.IsAMD()) {
248 return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
249 : TensorStorageType::BUFFER;
250 } else if (gpu_info.IsIntel()) {
251 return TensorStorageType::BUFFER;
252 }
253 return TensorStorageType::BUFFER;
254 }
255
CreateEnvironment(Environment * result)256 absl::Status CreateEnvironment(Environment* result) {
257 CLDevice gpu;
258 RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
259
260 CLContext context;
261 RETURN_IF_ERROR(CreateCLContext(gpu, &context));
262 CLCommandQueue queue;
263 RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
264 ProfilingCommandQueue profiling_queue;
265 RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
266
267 *result = Environment(std::move(gpu), std::move(context), std::move(queue),
268 std::move(profiling_queue));
269 return result->Init();
270 }
271
272 } // namespace cl
273 } // namespace gpu
274 } // namespace tflite
275