• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_DEVICE_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_DEVICE_H_
18 
19 #include <string>
20 #include <vector>
21 
22 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
23 #include "tensorflow/lite/delegates/gpu/cl/util.h"
24 #include "tensorflow/lite/delegates/gpu/common/status.h"
25 #include "tensorflow/lite/delegates/gpu/common/types.h"
26 
27 namespace tflite {
28 namespace gpu {
29 namespace cl {
30 
31 enum class Vendor { QUALCOMM, MALI, POWERVR, NVIDIA, AMD, UNKNOWN };
32 std::string VendorToString(Vendor v);
33 
34 enum class OpenCLVersion { CL_1_0, CL_1_1, CL_1_2, CL_2_0 };
35 std::string OpenCLVersionToString(OpenCLVersion version);
36 
37 // for use only in cl_device.cc, but putted here to make tests
38 int GetAdrenoGPUVersion(const std::string& gpu_version);
39 
40 struct AdrenoInfo {
41   AdrenoInfo() = default;
42   explicit AdrenoInfo(const std::string& device_version);
43   int gpu_version = -1;  // can be, for example, 405/430/540/530/630 etc.
44 
45   // This function returns some not very documented physical parameter of
46   // Adreno6xx GPU.
47   // We obtained it using Snapdragon Profiler.
48   int GetMaximumWavesCount() const;
49 
50   // returns amount of register memory per CU(Compute Unit) in bytes.
51   int GetRegisterMemorySizePerComputeUnit() const;
52 
53   // returns maximum possible amount of waves based on register usage.
54   int GetMaximumWavesCount(int register_footprint_per_tread,
55                            bool full_wave = true) const;
56 
57   int GetWaveSize(bool full_wave) const;
58 
59   // Not supported on some Adreno devices with specific driver version.
60   // b/131099086
61   bool support_one_layer_texture_array = true;
62 };
63 
64 struct DeviceInfo {
65   DeviceInfo() = default;
66   explicit DeviceInfo(cl_device_id id);
67 
68   bool SupportsTextureArray() const;
69   bool SupportsImageBuffer() const;
70   bool SupportsImage3D() const;
71 
72   std::vector<std::string> extensions;
73   bool supports_fp16;
74   bool supports_image3d_writes;
75   Vendor vendor;
76   OpenCLVersion cl_version;
77   int compute_units_count;
78   uint64_t buffer_max_size;
79   uint64_t image2d_max_width;
80   uint64_t image2d_max_height;
81   uint64_t image_buffer_max_size;
82   uint64_t image_array_max_layers;
83   uint64_t image3d_max_width;
84   uint64_t image3d_max_height;
85   uint64_t image3d_max_depth;
86   int3 max_work_group_sizes;
87 
88   cl_device_fp_config f32_config;
89   // valid only with cl_khr_fp16
90   cl_device_fp_config f16_config;
91 
92   // rtn is ROUND_TO_NEAREST
93   // with rtn precision is much better then with rtz (ROUND_TO_ZERO)
94   // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn
95   // Mali from T6xx supports rtn
96   // PowerVR supports only rtz
97   bool supports_fp32_rtn;
98   bool supports_fp16_rtn;
99 
100   AdrenoInfo adreno_info;
101 };
102 
103 // A wrapper around opencl device id
104 class CLDevice {
105  public:
106   CLDevice() = default;
107   CLDevice(cl_device_id id, cl_platform_id platform_id);
108 
109   CLDevice(CLDevice&& device);
110   CLDevice& operator=(CLDevice&& device);
111   CLDevice(const CLDevice&);
112   CLDevice& operator=(const CLDevice&);
113 
~CLDevice()114   ~CLDevice() {}
115 
id()116   cl_device_id id() const { return id_; }
platform()117   cl_platform_id platform() const { return platform_id_; }
118   std::string GetPlatformVersion() const;
119 
GetInfo()120   const DeviceInfo& GetInfo() const { return info_; }
GetInfoPtr()121   const DeviceInfo* GetInfoPtr() const { return &info_; }
122 
vendor()123   Vendor vendor() const { return info_.vendor; }
cl_version()124   OpenCLVersion cl_version() const { return info_.cl_version; }
125   bool SupportsFP16() const;
126   bool SupportsTextureArray() const;
127   bool SupportsImageBuffer() const;
128   bool SupportsImage3D() const;
129   bool SupportsExtension(const std::string& extension) const;
130   bool SupportsFP32RTN() const;
131   bool SupportsFP16RTN() const;
132   bool IsAdreno() const;
133   bool IsAdreno3xx() const;
134   bool IsAdreno4xx() const;
135   bool IsAdreno5xx() const;
136   bool IsAdreno6xx() const;
137   bool IsAdreno6xxOrHigher() const;
138   bool IsPowerVR() const;
139   bool IsNvidia() const;
140   bool IsMali() const;
141   bool IsAMD() const;
142 
143   // To track bug on some Adreno. b/131099086
144   bool SupportsOneLayerTextureArray() const;
145   void DisableOneLayerTextureArray();
146 
147  private:
148   cl_device_id id_ = nullptr;
149   cl_platform_id platform_id_ = nullptr;
150   DeviceInfo info_;
151 };
152 
153 Status CreateDefaultGPUDevice(CLDevice* result);
154 
155 template <typename T>
GetDeviceInfo(cl_device_id id,cl_device_info info)156 T GetDeviceInfo(cl_device_id id, cl_device_info info) {
157   T result;
158   cl_int error = clGetDeviceInfo(id, info, sizeof(T), &result, nullptr);
159   if (error != CL_SUCCESS) {
160     return -1;
161   }
162   return result;
163 }
164 
165 template <typename T>
GetDeviceInfo(cl_device_id id,cl_device_info info,T * result)166 Status GetDeviceInfo(cl_device_id id, cl_device_info info, T* result) {
167   cl_int error = clGetDeviceInfo(id, info, sizeof(T), result, nullptr);
168   if (error != CL_SUCCESS) {
169     return InvalidArgumentError(CLErrorCodeToString(error));
170   }
171   return OkStatus();
172 }
173 
174 }  // namespace cl
175 }  // namespace gpu
176 }  // namespace tflite
177 
178 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_DEVICE_H_
179