• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_
18 
19 #include <cstdint>
20 #include <string>
21 #include <vector>
22 
23 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
24 
25 namespace tflite {
26 namespace gpu {
27 
28 // The VendorID returned by the GPU driver.
29 enum class GpuVendor {
30   kApple,
31   kQualcomm,
32   kMali,
33   kPowerVR,
34   kNvidia,
35   kAMD,
36   kIntel,
37   kUnknown
38 };
39 
40 enum class GpuApi {
41   kUnknown,
42   kOpenCl,
43   kMetal,
44   kVulkan,
45   kOpenGl,
46 };
47 
48 enum class AdrenoGpu {
49   // Adreno 6xx series
50   kAdreno685,
51   kAdreno680,
52   kAdreno675,
53   kAdreno660,
54   kAdreno650,
55   kAdreno640,
56   kAdreno630,
57   kAdreno620,
58   kAdreno618,
59   kAdreno616,
60   kAdreno615,
61   kAdreno612,
62   kAdreno610,
63   kAdreno605,
64   // Adreno 5xx series
65   kAdreno540,
66   kAdreno530,
67   kAdreno512,
68   kAdreno510,
69   kAdreno509,
70   kAdreno508,
71   kAdreno506,
72   kAdreno505,
73   kAdreno504,
74   // Adreno 4xx series
75   kAdreno430,
76   kAdreno420,
77   kAdreno418,
78   kAdreno405,
79   // Adreno 3xx series
80   kAdreno330,
81   kAdreno320,
82   kAdreno308,
83   kAdreno306,
84   kAdreno305,
85   kAdreno304,
86   // Adreno 2xx series
87   kAdreno225,
88   kAdreno220,
89   kAdreno205,
90   kAdreno203,
91   kAdreno200,
92   // Adreno 1xx series
93   kAdreno130,
94   kAdreno120,
95   kUnknown
96 };
97 
98 struct AMDInfo {
99   AMDInfo() = default;
100   int shader_engines;
101   int compute_units_per_shader_engine;
GetComputeUnitsCountAMDInfo102   int GetComputeUnitsCount() const {
103     return shader_engines * compute_units_per_shader_engine;
104   }
105 };
106 
107 struct AdrenoInfo {
108   AdrenoInfo() = default;
109   explicit AdrenoInfo(const std::string& device_version);
110 
111   AdrenoGpu adreno_gpu;
112 
113   bool IsAdreno1xx() const;
114   bool IsAdreno2xx() const;
115   bool IsAdreno3xx() const;
116   bool IsAdreno4xx() const;
117   bool IsAdreno5xx() const;
118   bool IsAdreno6xx() const;
119   bool IsAdreno6xxOrHigher() const;
120 
121   // This function returns some not very documented physical parameter of
122   // Adreno6xx GPU.
123   // We obtained it using Snapdragon Profiler.
124   int GetMaximumWavesCount() const;
125 
126   // returns amount of register memory per CU(Compute Unit) in bytes.
127   int GetRegisterMemorySizePerComputeUnit() const;
128 
129   // returns maximum possible amount of waves based on register usage.
130   int GetMaximumWavesCount(int register_footprint_per_tread,
131                            bool full_wave = true) const;
132 
133   int GetWaveSize(bool full_wave) const;
134 
135   // Not supported on some Adreno devices with specific driver version.
136   // b/131099086
137   bool support_one_layer_texture_array = true;
138 
139   bool compiler_bugs_in_a6xx = false;
140 };
141 
142 enum class AppleGpu {
143   kUnknown,
144   kA7,
145   kA8,
146   kA8X,
147   kA9,
148   kA9X,
149   kA10,
150   kA10X,
151   kA11,
152   kA12,
153   kA12X,
154   kA12Z,
155   kA13,
156   kA14,
157 };
158 
159 struct AppleInfo {
160   AppleInfo() = default;
161   explicit AppleInfo(const std::string& gpu_description);
162   AppleGpu gpu_type;
163 
164   bool IsLocalMemoryPreferredOverGlobal() const;
165 
166   bool IsBionic() const;
167 
168   // floating point rounding mode
169   bool IsRoundToNearestSupported() const;
170 
171   int GetComputeUnitsCount() const;
172 };
173 
174 enum class MaliGpu {
175   kUnknown,
176   kT604,
177   kT622,
178   kT624,
179   kT628,
180   kT658,
181   kT678,
182   kT720,
183   kT760,
184   kT820,
185   kT830,
186   kT860,
187   kT880,
188   kG31,
189   kG51,
190   kG71,
191   kG52,
192   kG72,
193   kG76,
194   kG57,
195   kG77,
196   kG68,
197   kG78,
198 };
199 
200 struct MaliInfo {
201   MaliInfo() = default;
202   explicit MaliInfo(const std::string& gpu_description);
203   MaliGpu gpu_version;
204 
205   bool IsMaliT6xx() const;
206   bool IsMaliT7xx() const;
207   bool IsMaliT8xx() const;
208   bool IsMidgard() const;
209   bool IsBifrostGen1() const;
210   bool IsBifrostGen2() const;
211   bool IsBifrostGen3() const;
212   bool IsBifrost() const;
213   bool IsValhall() const;
214 };
215 
216 struct OpenGlInfo {
217   std::string renderer_name;
218   std::string vendor_name;
219   std::string version;
220   int major_version = -1;
221   int minor_version = -1;
222 
223   int max_image_units = 0;
224   int max_ssbo_bindings = 0;
225   int max_image_bindings = 0;
226   int max_work_group_invocations = 0;
227   int max_texture_size = 0;
228   int max_array_texture_layers = 0;
229   int max_fragment_image_units = 0;
230   int max_fragment_uniform_vec4_count = 0;
231   int max_color_atttachments = 0;
232   int max_viewport_width = 0;
233   int max_viewport_height = 0;
234   int max_renderbuffer_size = 0;
235 
236   std::vector<std::string> extensions;
237   int max_compute_work_group_size_x;
238   int max_compute_work_group_size_y;
239   int max_compute_work_group_size_z;
240 };
241 
242 struct VulkanInfo {
243   std::string vendor_name;
244   uint32_t api_version = -1;
245   uint32_t api_version_major = -1;
246   uint32_t api_version_minor = -1;
247   uint32_t api_version_patch = -1;
248 
249   int max_per_stage_descriptor_sampled_images = 0;
250   uint32_t max_compute_work_group_invocations;
251   uint32_t max_image_dimension_2d;
252   uint32_t max_image_array_layers;
253 
254   uint32_t subgroup_size = 0;
255   bool supports_subgroup_arithmetic = false;
256 
257   std::vector<std::string> extensions;
258   int max_compute_work_group_size_x;
259   int max_compute_work_group_size_y;
260   int max_compute_work_group_size_z;
261 };
262 
263 enum class OpenClVersion {
264   kCl1_0,
265   kCl1_1,
266   kCl1_2,
267   kCl2_0,
268   kCl2_1,
269   kCl2_2,
270   kCl3_0,
271   kUnknown,
272 };
273 std::string OpenClVersionToString(OpenClVersion version);
274 
275 struct OpenClInfo {
276   std::string device_name;
277   std::string vendor_name;
278   std::string opencl_c_version;
279   std::string platform_version;
280 
281   OpenClVersion cl_version;
282 
283   std::vector<std::string> extensions;
284   bool supports_fp16;
285   bool supports_image3d_writes;
286   bool supports_images;
287   int compute_units_count;
288   uint64_t buffer_max_size;
289   uint64_t max_allocation_size;
290   uint64_t image2d_max_width;
291   uint64_t image2d_max_height;
292   uint64_t image_buffer_max_size;
293   uint64_t image_array_max_layers;
294   uint64_t image3d_max_width;
295   uint64_t image3d_max_height;
296   uint64_t image3d_max_depth;
297   int max_work_group_size_x;
298   int max_work_group_size_y;
299   int max_work_group_size_z;
300   int max_work_group_total_size;
301   uint64_t image_pitch_alignment;
302   uint64_t base_addr_align_in_bits;
303 
304   // rtn is ROUND_TO_NEAREST
305   // with rtn precision is much better then with rtz (ROUND_TO_ZERO)
306   // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn
307   // Mali from T6xx supports rtn
308   // PowerVR supports only rtz
309   bool supports_fp32_rtn;
310   bool supports_fp16_rtn;
311 
312   bool supports_r_f16_tex2d = false;
313   bool supports_rg_f16_tex2d = false;
314   bool supports_rgb_f16_tex2d = false;
315   bool supports_rgba_f16_tex2d = false;
316 
317   bool supports_r_f32_tex2d = false;
318   bool supports_rg_f32_tex2d = false;
319   bool supports_rgb_f32_tex2d = false;
320   bool supports_rgba_f32_tex2d = false;
321 
322   bool IsImage2dFromBufferSupported() const;
323 };
324 
325 enum class MetalLanguageVersion {
326   kMetal1_0,
327   kMetal1_1,
328   kMetal1_2,
329   kMetal2_0,
330   kMetal2_1,
331   kMetal2_2,
332   kMetal2_3,
333   kUnknown,
334 };
335 
336 struct MetalInfo {
337   MetalLanguageVersion language_version;
338 
339   int max_work_group_size_x;
340   int max_work_group_size_y;
341   int max_work_group_size_z;
342 
343   uint64_t buffer_max_size;
344 };
345 
346 struct GpuInfo {
347   bool IsAdreno() const;
348   bool IsApple() const;
349   bool IsMali() const;
350   bool IsPowerVR() const;
351   bool IsNvidia() const;
352   bool IsAMD() const;
353   bool IsIntel() const;
354 
355   bool IsGlsl() const;
356 
357   // floating point rounding mode
358   bool IsRoundToNearestSupported() const;
359 
360   bool SupportsFP16() const;
361 
362   bool SupportsImages() const;
363   bool SupportsTextureArray() const;
364   bool SupportsImageBuffer() const;
365   bool SupportsImage3D() const;
366 
367   bool SupportsPointersInKernels() const;
368 
369   // returns true if device have fixed wave size equal to 32
370   bool IsWaveSizeEqualTo32() const;
371   bool SupportsSubGroupWithSize(int sub_group_size) const;
372 
373   bool SupportsFloatImage2D(DataType data_type, int channels) const;
374   bool SupportsExtension(const std::string& extension) const;
375 
376   int GetComputeUnitsCount() const;
377 
378   int GetMaxImageArguments() const;
379 
380   int GetMaxWorkGroupSizeForX() const;
381   int GetMaxWorkGroupSizeForY() const;
382   int GetMaxWorkGroupSizeForZ() const;
383   int GetMaxWorkGroupTotalSize() const;
384 
385   uint64_t GetMaxImage2DWidth() const;
386   uint64_t GetMaxImage2DHeight() const;
387   uint64_t GetMaxImage2DArrayLayers() const;
388   uint64_t GetMaxImage3DWidth() const;
389   uint64_t GetMaxImage3DHeight() const;
390   uint64_t GetMaxImage3DDepth() const;
391   uint64_t GetMaxBufferSize() const;
392   uint64_t GetMaxMemoryAllocationSize() const;
393   uint64_t GetMaxImageBufferWidth() const;
394 
395   GpuVendor vendor = GpuVendor::kUnknown;
396   GpuApi gpu_api = GpuApi::kUnknown;
397 
398   std::vector<int> supported_subgroup_sizes;
399 
400   AdrenoInfo adreno_info;
401   AMDInfo amd_info;
402   AppleInfo apple_info;
403   MaliInfo mali_info;
404 
405   // OpenGL specific, gpu_api should be kOpenGl
406   OpenGlInfo opengl_info;
407   bool IsApiOpenGl() const;
408   bool IsApiOpenGl31OrAbove() const;
409 
410   // Vulkan specific, gpu_api should be kVulkan
411   VulkanInfo vulkan_info;
412   bool IsApiVulkan() const;
413 
414   MetalInfo metal_info;
415   bool IsApiMetal() const;
416 
417   OpenClInfo opencl_info;
418   bool IsApiOpenCl() const;
419   bool IsCL11OrHigher() const;
420   bool IsCL20OrHigher() const;
421   bool IsCL30OrHigher() const;
422 };
423 
424 // Currently it initializes:
425 // vendor
426 // AdrenoInfo if vendor is kQualcomm
427 // AppleInfo if vendor is kApple
428 // MaliInfo if vendor is kMali
429 void GetGpuInfoFromDeviceDescription(const std::string& gpu_description,
430                                      GpuApi gpu_api, GpuInfo* gpu_info);
431 
432 }  // namespace gpu
433 }  // namespace tflite
434 
435 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_
436