• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_
18 
19 #include <cstdint>
20 #include <string>
21 #include <vector>
22 
23 #include "absl/container/flat_hash_set.h"
24 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
25 
26 namespace tflite {
27 namespace gpu {
28 
29 // The VendorID returned by the GPU driver.
30 enum class GpuVendor {
31   kApple,
32   kQualcomm,
33   kMali,
34   kPowerVR,
35   kNvidia,
36   kAMD,
37   kIntel,
38   kUnknown
39 };
40 
41 enum class GpuApi {
42   kUnknown,
43   kOpenCl,
44   kMetal,
45   kVulkan,
46   kOpenGl,
47 };
48 
49 enum class AdrenoGpu {
50   // Adreno 7xx series
51   kAdreno730,
52   // Adreno 6xx series
53   kAdreno685,
54   kAdreno680,
55   kAdreno675,
56   kAdreno660,
57   kAdreno650,
58   kAdreno640,
59   kAdreno630,
60   kAdreno620,
61   kAdreno618,
62   kAdreno616,
63   kAdreno615,
64   kAdreno612,
65   kAdreno610,
66   kAdreno605,
67   // Adreno 5xx series
68   kAdreno540,
69   kAdreno530,
70   kAdreno512,
71   kAdreno510,
72   kAdreno509,
73   kAdreno508,
74   kAdreno506,
75   kAdreno505,
76   kAdreno504,
77   // Adreno 4xx series
78   kAdreno430,
79   kAdreno420,
80   kAdreno418,
81   kAdreno405,
82   // Adreno 3xx series
83   kAdreno330,
84   kAdreno320,
85   kAdreno308,
86   kAdreno306,
87   kAdreno305,
88   kAdreno304,
89   // Adreno 2xx series
90   kAdreno225,
91   kAdreno220,
92   kAdreno205,
93   kAdreno203,
94   kAdreno200,
95   // Adreno 1xx series
96   kAdreno130,
97   kAdreno120,
98   kUnknown
99 };
100 
101 struct AMDInfo {
102   AMDInfo() = default;
103   int shader_engines = 0;
104   int compute_units_per_shader_engine = 0;
GetComputeUnitsCountAMDInfo105   int GetComputeUnitsCount() const {
106     return shader_engines * compute_units_per_shader_engine;
107   }
108 };
109 
110 struct AdrenoInfo {
111   struct OpenClCompilerVersion {
112     int major = 0;
113     int minor = 0;
114     int patch = 0;
115   };
116   AdrenoInfo() = default;
117   explicit AdrenoInfo(const std::string& device_version);
118 
119   AdrenoGpu adreno_gpu;
120 
121   bool IsAdreno1xx() const;
122   bool IsAdreno2xx() const;
123   bool IsAdreno3xx() const;
124   bool IsAdreno4xx() const;
125   bool IsAdreno5xx() const;
126   bool IsAdreno6xx() const;
127   bool IsAdreno7xx() const;
128   bool IsAdreno6xxOrHigher() const;
129 
130   // This function returns some not very documented physical parameter of
131   // Adreno6xx GPU.
132   // We obtained it using Snapdragon Profiler.
133   int GetMaximumWavesCount() const;
134 
135   // returns amount of register memory per CU(Compute Unit) in bytes.
136   int GetRegisterMemorySizePerComputeUnit() const;
137 
138   // returns maximum possible amount of waves based on register usage.
139   int GetMaximumWavesCount(int register_footprint_per_tread,
140                            bool full_wave = true) const;
141 
142   int GetWaveSize(bool full_wave) const;
143 
144   int GetComputeUnitsCount() const;
145 
146   // Not supported on some Adreno devices with specific driver version.
147   // b/131099086
148   bool support_one_layer_texture_array = true;
149 
150   bool compiler_bugs_in_a6xx = false;
151 
152   OpenClCompilerVersion cl_compiler_version;
153 };
154 
155 enum class AppleGpu {
156   kUnknown,
157   kA7,
158   kA8,
159   kA8X,
160   kA9,
161   kA9X,
162   kA10,
163   kA10X,
164   kA11,
165   kA12,
166   kA12X,
167   kA12Z,
168   kA13,
169   kA14,
170   kA15,
171   kM1,
172   kM1Pro,
173   kM1Max,
174   kM1Ultra,
175   kM2,
176 };
177 
178 struct AppleInfo {
179   AppleInfo() = default;
180   explicit AppleInfo(const std::string& gpu_description);
181   AppleGpu gpu_type;
182 
183   bool IsA7GenerationGpu() const;
184   bool IsA8GenerationGpu() const;
185   bool IsLocalMemoryPreferredOverGlobal() const;
186 
187   bool IsBionic() const;
188 
189   bool IsSIMDMatMulSupported() const;
190   // Often, fp32 alu performance is 1/2 of fp16 alu performance
191   // But, on some devices, fp32 alu performance equal to fp16 alu performance,
192   // at least in some scenarios.
193   // This method returns true if SIMDMatMul performance in fp32 equal to fp16
194   bool IsSIMDMatMulFp32Perf2x() const;
195 
196   // floating point rounding mode
197   bool IsRoundToNearestSupported() const;
198 
199   int GetComputeUnitsCount() const;
200 
201   // do not use, for internal usage
202   void SetComputeUnits(int compute_units_count);
203 
204  private:
205   int compute_units = -1;
206 };
207 
208 enum class MaliGpu {
209   kUnknown,
210   kT604,
211   kT622,
212   kT624,
213   kT628,
214   kT658,
215   kT678,
216   kT720,
217   kT760,
218   kT820,
219   kT830,
220   kT860,
221   kT880,
222   kG31,
223   kG51,
224   kG71,
225   kG52,
226   kG72,
227   kG76,
228   kG57,
229   kG77,
230   kG68,
231   kG78,
232   kG310,
233   kG510,
234   kG610,
235   kG710,
236 };
237 
238 struct MaliInfo {
239   MaliInfo() = default;
240   explicit MaliInfo(const std::string& gpu_description);
241   MaliGpu gpu_version;
242 
243   bool IsMaliT6xx() const;
244   bool IsMaliT7xx() const;
245   bool IsMaliT8xx() const;
246   bool IsMidgard() const;
247   bool IsBifrostGen1() const;
248   bool IsBifrostGen2() const;
249   bool IsBifrostGen3() const;
250   bool IsBifrost() const;
251   bool IsValhallGen1() const;
252   bool IsValhallGen2() const;
253   bool IsValhallGen3() const;
254   bool IsValhall() const;
255 
256   // returns approximate compute units count using GPU name
257   int GetApproximateComputeUnitsCount() const;
258 };
259 
260 struct OpenGlInfo {
261   std::string renderer_name;
262   std::string vendor_name;
263   std::string version;
264   int major_version = -1;
265   int minor_version = -1;
266 
267   int max_image_units = 0;
268   int max_ssbo_bindings = 0;
269   int max_image_bindings = 0;
270   int max_work_group_invocations = 0;
271   int max_texture_size = 0;
272   int max_array_texture_layers = 0;
273   int max_fragment_image_units = 0;
274   int max_fragment_uniform_vec4_count = 0;
275   int max_color_atttachments = 0;
276   int max_viewport_width = 0;
277   int max_viewport_height = 0;
278   int max_renderbuffer_size = 0;
279 
280   std::vector<std::string> extensions;
281   int max_compute_work_group_size_x;
282   int max_compute_work_group_size_y;
283   int max_compute_work_group_size_z;
284 
285   bool SupportsExplicitFp16() const;
286 
287   bool IsApiOpenGl31OrAbove() const;
288   bool IsApiOpenGl32OrAbove() const;
289 };
290 
291 struct VulkanInfo {
292   std::string vendor_name;
293   uint32_t api_version = -1;
294   uint32_t api_version_major = -1;
295   uint32_t api_version_minor = -1;
296   uint32_t api_version_patch = -1;
297 
298   int max_per_stage_descriptor_sampled_images = 0;
299   uint32_t max_compute_work_group_invocations;
300   uint32_t max_image_dimension_1d;
301   uint32_t max_image_dimension_2d;
302   uint32_t max_image_dimension_3d;
303   uint32_t max_image_array_layers;
304   uint64_t max_texel_buffer_elements;
305   uint64_t max_uniform_buffer_range;
306   uint64_t max_storage_buffer_range;
307   uint64_t max_push_constants_size;
308 
309   uint32_t subgroup_size = 0;
310   bool supports_subgroup_arithmetic = false;
311 
312   std::vector<std::string> extensions;
313   int max_compute_work_group_size_x;
314   int max_compute_work_group_size_y;
315   int max_compute_work_group_size_z;
316 
317   bool SupportsExplicitFp16() const;
318 };
319 
320 enum class OpenClVersion {
321   kCl1_0,
322   kCl1_1,
323   kCl1_2,
324   kCl2_0,
325   kCl2_1,
326   kCl2_2,
327   kCl3_0,
328   kUnknown,
329 };
330 std::string OpenClVersionToString(OpenClVersion version);
331 
332 struct OpenClInfo {
333   std::string device_name;
334   std::string vendor_name;
335   std::string opencl_c_version;
336   std::string platform_version;
337   std::string driver_version;
338 
339   OpenClVersion cl_version;
340 
341   std::vector<std::string> extensions;
342   bool supports_fp16;
343   bool supports_image3d_writes;
344   bool supports_images;
345   int compute_units_count;
346   uint64_t buffer_max_size;
347   uint64_t max_allocation_size;
348   uint64_t image2d_max_width;
349   uint64_t image2d_max_height;
350   uint64_t image_buffer_max_size;
351   uint64_t image_array_max_layers;
352   uint64_t image3d_max_width;
353   uint64_t image3d_max_height;
354   uint64_t image3d_max_depth;
355   int max_work_group_size_x;
356   int max_work_group_size_y;
357   int max_work_group_size_z;
358   int max_work_group_total_size;
359 
360   // The row pitch alignment size in pixels for 2D images created from a buffer.
361   // The value must be a power of 2.
362   uint64_t image_pitch_alignment = 0;
363   // The minimum alignment in pixels. The value must be a power of 2.
364   uint64_t image_base_address_alignment = 0;
365   uint64_t base_addr_align_in_bits;
366 
367   // rtn is ROUND_TO_NEAREST
368   // with rtn precision is much better then with rtz (ROUND_TO_ZERO)
369   // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn
370   // Mali from T6xx supports rtn
371   // PowerVR supports only rtz
372   bool supports_fp32_rtn;
373   bool supports_fp16_rtn;
374 
375   struct SupportedImage2dTypes {
376     absl::flat_hash_set<DataType> r_layout;
377     absl::flat_hash_set<DataType> rg_layout;
378     absl::flat_hash_set<DataType> rgb_layout;
379     absl::flat_hash_set<DataType> rgba_layout;
380 
381     bool SupportsImage2D(DataType data_type, int channels) const;
382   };
383 
384   SupportedImage2dTypes supported_images_2d;
385 
386   bool IsImage2dFromBufferSupported() const;
387 };
388 
389 enum class MetalLanguageVersion {
390   kMetal1_0,
391   kMetal1_1,
392   kMetal1_2,
393   kMetal2_0,
394   kMetal2_1,
395   kMetal2_2,
396   kMetal2_3,
397   kMetal2_4,
398   kMetal3_0,
399   kUnknown,
400 };
401 
402 struct MetalInfo {
403   MetalLanguageVersion language_version;
404 
405   int max_work_group_size_x;
406   int max_work_group_size_y;
407   int max_work_group_size_z;
408 
409   uint64_t buffer_max_size;
410 
411   uint64_t image2d_max_width;
412   uint64_t image2d_max_height;
413   uint64_t image_array_max_layers;
414   uint64_t image3d_max_width;
415   uint64_t image3d_max_height;
416   uint64_t image3d_max_depth;
417 
418   bool IsSIMDMatMulSupported() const;
419   // MSL is Metal shading language
420   bool IsMslVersionEqualOrHigher(int major, int minor = 0) const;
421 };
422 
423 struct GpuInfo {
424   bool IsAdreno() const;
425   bool IsApple() const;
426   bool IsMali() const;
427   bool IsPowerVR() const;
428   bool IsNvidia() const;
429   bool IsAMD() const;
430   bool IsIntel() const;
431 
432   bool IsGlsl() const;
433   bool IsGlslSupportsExplicitFp16() const;
434 
435   // floating point rounding mode
436   bool IsRoundToNearestSupported() const;
437 
438   bool SupportsFP16() const;
439 
440   bool SupportsImages() const;
441   bool SupportsTextureArray() const;
442   bool SupportsImageBuffer() const;
443   bool SupportsImage3D() const;
444 
445   bool SupportsPointersInKernels() const;
446 
447   // returns true if device have fixed wave size equal to 32
448   bool IsWaveSizeEqualTo32() const;
449   bool SupportsSubGroupWithSize(int sub_group_size) const;
450 
451   bool SupportsFloatImage2D(DataType data_type, int channels) const;
452   bool SupportsExtension(const std::string& extension) const;
453 
454   bool SupportsZeroClampForImageBuffer() const;
455   bool SupportsZeroClampForImages() const;
456 
457   int GetComputeUnitsCount() const;
458 
459   int GetMaxImageArguments() const;
460 
461   int GetMaxWorkGroupSizeForX() const;
462   int GetMaxWorkGroupSizeForY() const;
463   int GetMaxWorkGroupSizeForZ() const;
464   int GetMaxWorkGroupTotalSize() const;
465 
466   uint64_t GetMaxImage2DWidth() const;
467   uint64_t GetMaxImage2DHeight() const;
468   uint64_t GetMaxImage2DArrayLayers() const;
469   uint64_t GetMaxImage3DWidth() const;
470   uint64_t GetMaxImage3DHeight() const;
471   uint64_t GetMaxImage3DDepth() const;
472   uint64_t GetMaxBufferSize() const;
473   uint64_t GetMaxMemoryAllocationSize() const;
474   uint64_t GetMaxImageBufferWidth() const;
475 
476   GpuVendor vendor = GpuVendor::kUnknown;
477   GpuApi gpu_api = GpuApi::kUnknown;
478 
479   std::vector<int> supported_subgroup_sizes;
480 
481   AdrenoInfo adreno_info;
482   AMDInfo amd_info;
483   AppleInfo apple_info;
484   MaliInfo mali_info;
485 
486   // OpenGL specific, gpu_api should be kOpenGl
487   OpenGlInfo opengl_info;
488   bool IsApiOpenGl() const;
489   bool IsApiOpenGl31OrAbove() const;
490 
491   // Vulkan specific, gpu_api should be kVulkan
492   VulkanInfo vulkan_info;
493   bool IsApiVulkan() const;
494 
495   MetalInfo metal_info;
496   bool IsApiMetal() const;
497 
498   OpenClInfo opencl_info;
499   bool IsApiOpenCl() const;
500   bool IsCL11OrHigher() const;
501   bool IsCL20OrHigher() const;
502   bool IsCL30OrHigher() const;
503 };
504 
505 // Currently it initializes:
506 // vendor
507 // AdrenoInfo if vendor is kQualcomm
508 // AppleInfo if vendor is kApple
509 // MaliInfo if vendor is kMali
510 void GetGpuInfoFromDeviceDescription(const std::string& gpu_description,
511                                      GpuApi gpu_api, GpuInfo* gpu_info);
512 
513 }  // namespace gpu
514 }  // namespace tflite
515 
516 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_
517