1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
17
18 #include <algorithm>
19 #include <string>
20 #include <vector>
21
22 #include "absl/strings/numbers.h"
23 #include "absl/strings/str_cat.h"
24 #include "absl/strings/str_split.h"
25 #include "tensorflow/lite/delegates/gpu/cl/util.h"
26 #include "tensorflow/lite/delegates/gpu/common/status.h"
27 #include "tensorflow/lite/experimental/acceleration/compatibility/android_info.h"
28
29 namespace tflite {
30 namespace gpu {
31 namespace cl {
32
33 template <>
GetDeviceInfo(cl_device_id id,cl_device_info info)34 std::string GetDeviceInfo<std::string>(cl_device_id id, cl_device_info info) {
35 size_t size;
36 cl_int error = clGetDeviceInfo(id, info, 0, nullptr, &size);
37 if (error != CL_SUCCESS) {
38 return "";
39 }
40
41 std::string result(size - 1, 0);
42 error = clGetDeviceInfo(id, info, size, &result[0], nullptr);
43 if (error != CL_SUCCESS) {
44 return "";
45 }
46 return result;
47 }
48
49 namespace {
50 template <typename T>
GetPlatformInfo(cl_platform_id id,cl_platform_info info)51 T GetPlatformInfo(cl_platform_id id, cl_platform_info info) {
52 T result;
53 cl_int error = clGetPlatformInfo(id, info, sizeof(T), &result, nullptr);
54 if (error != CL_SUCCESS) {
55 return -1;
56 }
57 return result;
58 }
59
GetPlatformInfo(cl_platform_id id,cl_platform_info info)60 std::string GetPlatformInfo(cl_platform_id id, cl_platform_info info) {
61 size_t size;
62 cl_int error = clGetPlatformInfo(id, info, 0, nullptr, &size);
63 if (error != CL_SUCCESS) {
64 return "";
65 }
66
67 std::string result(size - 1, 0);
68 error = clGetPlatformInfo(id, info, size, &result[0], nullptr);
69 if (error != CL_SUCCESS) {
70 return "";
71 }
72 return result;
73 }
74
GetDeviceWorkDimsSizes(cl_device_id id,int3 * result)75 void GetDeviceWorkDimsSizes(cl_device_id id, int3* result) {
76 int dims_count =
77 GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
78 if (dims_count < 3) {
79 return;
80 }
81 std::vector<size_t> limits(dims_count);
82 cl_int error =
83 clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_SIZES,
84 sizeof(size_t) * dims_count, limits.data(), nullptr);
85 if (error != CL_SUCCESS) {
86 return;
87 }
88 // dims_count must be at least 3 according to spec
89 result->x = limits[0];
90 result->y = limits[1];
91 result->z = limits[2];
92 }
93
ParseCLVersion(const std::string & version)94 OpenClVersion ParseCLVersion(const std::string& version) {
95 const auto first_dot_pos = version.find_first_of('.');
96 if (first_dot_pos == std::string::npos) {
97 return OpenClVersion::kCl1_0;
98 }
99 const int major = version[first_dot_pos - 1] - '0';
100 const int minor = version[first_dot_pos + 1] - '0';
101
102 if (major == 1) {
103 if (minor == 2) {
104 return OpenClVersion::kCl1_2;
105 } else if (minor == 1) {
106 return OpenClVersion::kCl1_1;
107 } else {
108 return OpenClVersion::kCl1_0;
109 }
110 } else if (major == 2) {
111 if (minor == 2) {
112 return OpenClVersion::kCl2_2;
113 } else if (minor == 1) {
114 return OpenClVersion::kCl2_1;
115 } else {
116 return OpenClVersion::kCl2_0;
117 }
118 } else if (major == 3) {
119 return OpenClVersion::kCl3_0;
120 } else {
121 return OpenClVersion::kCl1_0;
122 }
123 }
124
125 // check that gpu_version belong to range min_version-max_version
126 // min_version is included and max_version is excluded.
IsGPUVersionInRange(int gpu_version,int min_version,int max_version)127 bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version) {
128 return gpu_version >= min_version && gpu_version < max_version;
129 }
130
GpuInfoFromDeviceID(cl_device_id id)131 GpuInfo GpuInfoFromDeviceID(cl_device_id id) {
132 GpuInfo info;
133 const auto device_name = GetDeviceInfo<std::string>(id, CL_DEVICE_NAME);
134 const auto vendor_name = GetDeviceInfo<std::string>(id, CL_DEVICE_VENDOR);
135 const auto opencl_c_version =
136 GetDeviceInfo<std::string>(id, CL_DEVICE_OPENCL_C_VERSION);
137 const std::string gpu_description =
138 absl::StrCat(device_name, " ", vendor_name, " ", opencl_c_version);
139 GetGpuInfoFromDeviceDescription(gpu_description, GpuApi::kOpenCl, &info);
140 info.opencl_info.cl_version = ParseCLVersion(opencl_c_version);
141 info.opencl_info.extensions =
142 absl::StrSplit(GetDeviceInfo<std::string>(id, CL_DEVICE_EXTENSIONS), ' ');
143 info.opencl_info.supports_fp16 = false;
144 info.opencl_info.supports_image3d_writes = false;
145 for (const auto& ext : info.opencl_info.extensions) {
146 if (ext == "cl_khr_fp16") {
147 info.opencl_info.supports_fp16 = true;
148 }
149 if (ext == "cl_khr_3d_image_writes") {
150 info.opencl_info.supports_image3d_writes = true;
151 }
152 }
153
154 info.opencl_info.supports_images =
155 GetDeviceInfo<cl_bool>(id, CL_DEVICE_IMAGE_SUPPORT);
156
157 cl_device_fp_config f32_config =
158 GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_SINGLE_FP_CONFIG);
159 info.opencl_info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST;
160
161 if (info.opencl_info.supports_fp16) {
162 cl_device_fp_config f16_config;
163 auto status = GetDeviceInfo<cl_device_fp_config>(
164 id, CL_DEVICE_HALF_FP_CONFIG, &f16_config);
165 // AMD supports cl_khr_fp16 but CL_DEVICE_HALF_FP_CONFIG is empty.
166 if (status.ok() && !info.IsAMD()) {
167 info.opencl_info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST;
168 } else { // happens on PowerVR
169 f16_config = f32_config;
170 info.opencl_info.supports_fp16_rtn = info.opencl_info.supports_fp32_rtn;
171 }
172 } else {
173 info.opencl_info.supports_fp16_rtn = false;
174 }
175
176 if (info.IsPowerVR() && !info.opencl_info.supports_fp16) {
177 // PowerVR doesn't have full support of fp16 and so doesn't list this
178 // extension. But it can support fp16 in MADs and as buffers/textures types,
179 // so we will use it.
180 info.opencl_info.supports_fp16 = true;
181 info.opencl_info.supports_fp16_rtn = info.opencl_info.supports_fp32_rtn;
182 }
183
184 if (!info.opencl_info.supports_image3d_writes &&
185 ((info.IsAdreno() && info.adreno_info.IsAdreno4xx()) ||
186 info.IsNvidia())) {
187 // in local tests Adreno 430 can write in image 3d, at least on small sizes,
188 // but it doesn't have cl_khr_3d_image_writes in list of available
189 // extensions
190 // The same for NVidia
191 info.opencl_info.supports_image3d_writes = true;
192 }
193 info.opencl_info.compute_units_count =
194 GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_COMPUTE_UNITS);
195 info.opencl_info.image2d_max_width =
196 GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH);
197 info.opencl_info.image2d_max_height =
198 GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
199 info.opencl_info.buffer_max_size =
200 GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
201 if (info.opencl_info.cl_version >= OpenClVersion::kCl1_2) {
202 info.opencl_info.image_buffer_max_size =
203 GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE);
204 info.opencl_info.image_array_max_layers =
205 GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE);
206 }
207 info.opencl_info.image3d_max_width =
208 GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_WIDTH);
209 info.opencl_info.image3d_max_height =
210 GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
211 info.opencl_info.image3d_max_depth =
212 GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_DEPTH);
213 int3 max_work_group_sizes;
214 GetDeviceWorkDimsSizes(id, &max_work_group_sizes);
215 info.opencl_info.max_work_group_size_x = max_work_group_sizes.x;
216 info.opencl_info.max_work_group_size_y = max_work_group_sizes.y;
217 info.opencl_info.max_work_group_size_z = max_work_group_sizes.z;
218 info.opencl_info.max_work_group_total_size =
219 GetDeviceInfo<size_t>(id, CL_DEVICE_MAX_WORK_GROUP_SIZE);
220
221 if (info.IsIntel()) {
222 if (info.SupportsExtension("cl_intel_required_subgroup_size")) {
223 size_t sub_groups_count;
224 cl_int status =
225 clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/, 0,
226 nullptr, &sub_groups_count);
227 if (status == CL_SUCCESS) {
228 std::vector<size_t> sub_group_sizes(sub_groups_count);
229 status = clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/,
230 sizeof(size_t) * sub_groups_count,
231 sub_group_sizes.data(), nullptr);
232 if (status == CL_SUCCESS) {
233 for (int i = 0; i < sub_groups_count; ++i) {
234 info.supported_subgroup_sizes.push_back(sub_group_sizes[i]);
235 }
236 }
237 }
238 }
239 }
240 return info;
241 }
242
243 } // namespace
244
CLDevice(cl_device_id id,cl_platform_id platform_id)245 CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id)
246 : info_(GpuInfoFromDeviceID(id)), id_(id), platform_id_(platform_id) {
247 if (info_.IsAdreno() &&
248 info_.adreno_info.adreno_gpu == AdrenoGpu::kAdreno630) {
249 acceleration::AndroidInfo android_info;
250 if (acceleration::RequestAndroidInfo(&android_info).ok()) {
251 info_.adreno_info.compiler_bugs_in_a6xx =
252 android_info.android_sdk_version == "26";
253 }
254 }
255 }
256
CLDevice(const CLDevice & device)257 CLDevice::CLDevice(const CLDevice& device)
258 : info_(device.info_), id_(device.id_), platform_id_(device.platform_id_) {}
259
operator =(const CLDevice & device)260 CLDevice& CLDevice::operator=(const CLDevice& device) {
261 if (this != &device) {
262 info_ = device.info_;
263 id_ = device.id_;
264 platform_id_ = device.platform_id_;
265 }
266 return *this;
267 }
268
CLDevice(CLDevice && device)269 CLDevice::CLDevice(CLDevice&& device)
270 : info_(std::move(device.info_)),
271 id_(device.id_),
272 platform_id_(device.platform_id_) {
273 device.id_ = nullptr;
274 device.platform_id_ = nullptr;
275 }
276
operator =(CLDevice && device)277 CLDevice& CLDevice::operator=(CLDevice&& device) {
278 if (this != &device) {
279 id_ = nullptr;
280 platform_id_ = nullptr;
281 info_ = std::move(device.info_);
282 std::swap(id_, device.id_);
283 std::swap(platform_id_, device.platform_id_);
284 }
285 return *this;
286 }
287
GetPlatformVersion() const288 std::string CLDevice::GetPlatformVersion() const {
289 return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION);
290 }
291
DisableOneLayerTextureArray()292 void CLDevice::DisableOneLayerTextureArray() {
293 info_.adreno_info.support_one_layer_texture_array = false;
294 }
295
CreateDefaultGPUDevice(CLDevice * result)296 absl::Status CreateDefaultGPUDevice(CLDevice* result) {
297 cl_uint num_platforms;
298 clGetPlatformIDs(0, nullptr, &num_platforms);
299 if (num_platforms == 0) {
300 return absl::UnknownError("No supported OpenCL platform.");
301 }
302 std::vector<cl_platform_id> platforms(num_platforms);
303 clGetPlatformIDs(num_platforms, platforms.data(), nullptr);
304
305 cl_platform_id platform_id = platforms[0];
306 cl_uint num_devices;
307 clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 0, nullptr, &num_devices);
308 if (num_devices == 0) {
309 return absl::UnknownError("No GPU on current platform.");
310 }
311
312 std::vector<cl_device_id> devices(num_devices);
313 clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, num_devices, devices.data(),
314 nullptr);
315
316 *result = CLDevice(devices[0], platform_id);
317 return absl::OkStatus();
318 }
319
320 } // namespace cl
321 } // namespace gpu
322 } // namespace tflite
323