1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_DEVICE_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_DEVICE_H_
18
19 #include <string>
20 #include <vector>
21
22 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
23 #include "tensorflow/lite/delegates/gpu/cl/util.h"
24 #include "tensorflow/lite/delegates/gpu/common/status.h"
25 #include "tensorflow/lite/delegates/gpu/common/types.h"
26
27 namespace tflite {
28 namespace gpu {
29 namespace cl {
30
31 enum class Vendor { QUALCOMM, MALI, POWERVR, NVIDIA, AMD, UNKNOWN };
32 std::string VendorToString(Vendor v);
33
34 enum class OpenCLVersion { CL_1_0, CL_1_1, CL_1_2, CL_2_0 };
35 std::string OpenCLVersionToString(OpenCLVersion version);
36
37 // for use only in cl_device.cc, but putted here to make tests
38 int GetAdrenoGPUVersion(const std::string& gpu_version);
39
40 struct AdrenoInfo {
41 AdrenoInfo() = default;
42 explicit AdrenoInfo(const std::string& device_version);
43 int gpu_version = -1; // can be, for example, 405/430/540/530/630 etc.
44
45 // This function returns some not very documented physical parameter of
46 // Adreno6xx GPU.
47 // We obtained it using Snapdragon Profiler.
48 int GetMaximumWavesCount() const;
49
50 // returns amount of register memory per CU(Compute Unit) in bytes.
51 int GetRegisterMemorySizePerComputeUnit() const;
52
53 // returns maximum possible amount of waves based on register usage.
54 int GetMaximumWavesCount(int register_footprint_per_tread,
55 bool full_wave = true) const;
56
57 int GetWaveSize(bool full_wave) const;
58
59 // Not supported on some Adreno devices with specific driver version.
60 // b/131099086
61 bool support_one_layer_texture_array = true;
62 };
63
64 struct DeviceInfo {
65 DeviceInfo() = default;
66 explicit DeviceInfo(cl_device_id id);
67
68 bool SupportsTextureArray() const;
69 bool SupportsImageBuffer() const;
70 bool SupportsImage3D() const;
71
72 std::vector<std::string> extensions;
73 bool supports_fp16;
74 bool supports_image3d_writes;
75 Vendor vendor;
76 OpenCLVersion cl_version;
77 int compute_units_count;
78 uint64_t buffer_max_size;
79 uint64_t image2d_max_width;
80 uint64_t image2d_max_height;
81 uint64_t image_buffer_max_size;
82 uint64_t image_array_max_layers;
83 uint64_t image3d_max_width;
84 uint64_t image3d_max_height;
85 uint64_t image3d_max_depth;
86 int3 max_work_group_sizes;
87
88 cl_device_fp_config f32_config;
89 // valid only with cl_khr_fp16
90 cl_device_fp_config f16_config;
91
92 // rtn is ROUND_TO_NEAREST
93 // with rtn precision is much better then with rtz (ROUND_TO_ZERO)
94 // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn
95 // Mali from T6xx supports rtn
96 // PowerVR supports only rtz
97 bool supports_fp32_rtn;
98 bool supports_fp16_rtn;
99
100 AdrenoInfo adreno_info;
101 };
102
103 // A wrapper around opencl device id
104 class CLDevice {
105 public:
106 CLDevice() = default;
107 CLDevice(cl_device_id id, cl_platform_id platform_id);
108
109 CLDevice(CLDevice&& device);
110 CLDevice& operator=(CLDevice&& device);
111 CLDevice(const CLDevice&);
112 CLDevice& operator=(const CLDevice&);
113
~CLDevice()114 ~CLDevice() {}
115
id()116 cl_device_id id() const { return id_; }
platform()117 cl_platform_id platform() const { return platform_id_; }
118 std::string GetPlatformVersion() const;
119
GetInfo()120 const DeviceInfo& GetInfo() const { return info_; }
GetInfoPtr()121 const DeviceInfo* GetInfoPtr() const { return &info_; }
122
vendor()123 Vendor vendor() const { return info_.vendor; }
cl_version()124 OpenCLVersion cl_version() const { return info_.cl_version; }
125 bool SupportsFP16() const;
126 bool SupportsTextureArray() const;
127 bool SupportsImageBuffer() const;
128 bool SupportsImage3D() const;
129 bool SupportsExtension(const std::string& extension) const;
130 bool SupportsFP32RTN() const;
131 bool SupportsFP16RTN() const;
132 bool IsAdreno() const;
133 bool IsAdreno3xx() const;
134 bool IsAdreno4xx() const;
135 bool IsAdreno5xx() const;
136 bool IsAdreno6xx() const;
137 bool IsAdreno6xxOrHigher() const;
138 bool IsPowerVR() const;
139 bool IsNvidia() const;
140 bool IsMali() const;
141 bool IsAMD() const;
142
143 // To track bug on some Adreno. b/131099086
144 bool SupportsOneLayerTextureArray() const;
145 void DisableOneLayerTextureArray();
146
147 private:
148 cl_device_id id_ = nullptr;
149 cl_platform_id platform_id_ = nullptr;
150 DeviceInfo info_;
151 };
152
153 Status CreateDefaultGPUDevice(CLDevice* result);
154
155 template <typename T>
GetDeviceInfo(cl_device_id id,cl_device_info info)156 T GetDeviceInfo(cl_device_id id, cl_device_info info) {
157 T result;
158 cl_int error = clGetDeviceInfo(id, info, sizeof(T), &result, nullptr);
159 if (error != CL_SUCCESS) {
160 return -1;
161 }
162 return result;
163 }
164
165 template <typename T>
GetDeviceInfo(cl_device_id id,cl_device_info info,T * result)166 Status GetDeviceInfo(cl_device_id id, cl_device_info info, T* result) {
167 cl_int error = clGetDeviceInfo(id, info, sizeof(T), result, nullptr);
168 if (error != CL_SUCCESS) {
169 return InvalidArgumentError(CLErrorCodeToString(error));
170 }
171 return OkStatus();
172 }
173
174 } // namespace cl
175 } // namespace gpu
176 } // namespace tflite
177
178 #endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_DEVICE_H_
179