1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_ 17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_ 18 19 #include <cstdint> 20 #include <string> 21 #include <vector> 22 23 #include "absl/container/flat_hash_set.h" 24 #include "tensorflow/lite/delegates/gpu/common/data_type.h" 25 26 namespace tflite { 27 namespace gpu { 28 29 // The VendorID returned by the GPU driver. 30 enum class GpuVendor { 31 kApple, 32 kQualcomm, 33 kMali, 34 kPowerVR, 35 kNvidia, 36 kAMD, 37 kIntel, 38 kUnknown 39 }; 40 41 enum class GpuApi { 42 kUnknown, 43 kOpenCl, 44 kMetal, 45 kVulkan, 46 kOpenGl, 47 }; 48 49 enum class AdrenoGpu { 50 // Adreno 7xx series 51 kAdreno730, 52 // Adreno 6xx series 53 kAdreno685, 54 kAdreno680, 55 kAdreno675, 56 kAdreno660, 57 kAdreno650, 58 kAdreno640, 59 kAdreno630, 60 kAdreno620, 61 kAdreno618, 62 kAdreno616, 63 kAdreno615, 64 kAdreno612, 65 kAdreno610, 66 kAdreno605, 67 // Adreno 5xx series 68 kAdreno540, 69 kAdreno530, 70 kAdreno512, 71 kAdreno510, 72 kAdreno509, 73 kAdreno508, 74 kAdreno506, 75 kAdreno505, 76 kAdreno504, 77 // Adreno 4xx series 78 kAdreno430, 79 kAdreno420, 80 kAdreno418, 81 kAdreno405, 82 // Adreno 3xx series 83 kAdreno330, 84 kAdreno320, 85 kAdreno308, 86 kAdreno306, 87 kAdreno305, 88 kAdreno304, 89 // Adreno 2xx series 90 kAdreno225, 91 kAdreno220, 92 kAdreno205, 93 kAdreno203, 94 kAdreno200, 95 // Adreno 1xx series 96 kAdreno130, 97 kAdreno120, 98 kUnknown 99 }; 100 101 struct AMDInfo { 102 AMDInfo() = default; 103 int shader_engines = 0; 104 int compute_units_per_shader_engine = 0; GetComputeUnitsCountAMDInfo105 int GetComputeUnitsCount() const { 106 return shader_engines * compute_units_per_shader_engine; 107 } 108 }; 109 110 struct AdrenoInfo { 111 struct OpenClCompilerVersion { 112 int major = 0; 113 int minor = 0; 114 int patch = 0; 115 }; 116 AdrenoInfo() = default; 117 explicit AdrenoInfo(const std::string& device_version); 118 119 AdrenoGpu adreno_gpu; 120 121 bool IsAdreno1xx() const; 122 bool IsAdreno2xx() const; 123 bool IsAdreno3xx() const; 124 bool IsAdreno4xx() const; 125 bool IsAdreno5xx() const; 126 bool IsAdreno6xx() const; 127 bool IsAdreno7xx() const; 128 bool IsAdreno6xxOrHigher() const; 129 130 // This function returns some not very documented physical parameter of 131 // Adreno6xx GPU. 132 // We obtained it using Snapdragon Profiler. 133 int GetMaximumWavesCount() const; 134 135 // returns amount of register memory per CU(Compute Unit) in bytes. 136 int GetRegisterMemorySizePerComputeUnit() const; 137 138 // returns maximum possible amount of waves based on register usage. 139 int GetMaximumWavesCount(int register_footprint_per_tread, 140 bool full_wave = true) const; 141 142 int GetWaveSize(bool full_wave) const; 143 144 int GetComputeUnitsCount() const; 145 146 // Not supported on some Adreno devices with specific driver version. 147 // b/131099086 148 bool support_one_layer_texture_array = true; 149 150 bool compiler_bugs_in_a6xx = false; 151 152 OpenClCompilerVersion cl_compiler_version; 153 }; 154 155 enum class AppleGpu { 156 kUnknown, 157 kA7, 158 kA8, 159 kA8X, 160 kA9, 161 kA9X, 162 kA10, 163 kA10X, 164 kA11, 165 kA12, 166 kA12X, 167 kA12Z, 168 kA13, 169 kA14, 170 kA15, 171 kM1, 172 kM1Pro, 173 kM1Max, 174 kM1Ultra, 175 kM2, 176 }; 177 178 struct AppleInfo { 179 AppleInfo() = default; 180 explicit AppleInfo(const std::string& gpu_description); 181 AppleGpu gpu_type; 182 183 bool IsA7GenerationGpu() const; 184 bool IsA8GenerationGpu() const; 185 bool IsLocalMemoryPreferredOverGlobal() const; 186 187 bool IsBionic() const; 188 189 bool IsSIMDMatMulSupported() const; 190 // Often, fp32 alu performance is 1/2 of fp16 alu performance 191 // But, on some devices, fp32 alu performance equal to fp16 alu performance, 192 // at least in some scenarios. 193 // This method returns true if SIMDMatMul performance in fp32 equal to fp16 194 bool IsSIMDMatMulFp32Perf2x() const; 195 196 // floating point rounding mode 197 bool IsRoundToNearestSupported() const; 198 199 int GetComputeUnitsCount() const; 200 201 // do not use, for internal usage 202 void SetComputeUnits(int compute_units_count); 203 204 private: 205 int compute_units = -1; 206 }; 207 208 enum class MaliGpu { 209 kUnknown, 210 kT604, 211 kT622, 212 kT624, 213 kT628, 214 kT658, 215 kT678, 216 kT720, 217 kT760, 218 kT820, 219 kT830, 220 kT860, 221 kT880, 222 kG31, 223 kG51, 224 kG71, 225 kG52, 226 kG72, 227 kG76, 228 kG57, 229 kG77, 230 kG68, 231 kG78, 232 kG310, 233 kG510, 234 kG610, 235 kG710, 236 }; 237 238 struct MaliInfo { 239 MaliInfo() = default; 240 explicit MaliInfo(const std::string& gpu_description); 241 MaliGpu gpu_version; 242 243 bool IsMaliT6xx() const; 244 bool IsMaliT7xx() const; 245 bool IsMaliT8xx() const; 246 bool IsMidgard() const; 247 bool IsBifrostGen1() const; 248 bool IsBifrostGen2() const; 249 bool IsBifrostGen3() const; 250 bool IsBifrost() const; 251 bool IsValhallGen1() const; 252 bool IsValhallGen2() const; 253 bool IsValhallGen3() const; 254 bool IsValhall() const; 255 256 // returns approximate compute units count using GPU name 257 int GetApproximateComputeUnitsCount() const; 258 }; 259 260 struct OpenGlInfo { 261 std::string renderer_name; 262 std::string vendor_name; 263 std::string version; 264 int major_version = -1; 265 int minor_version = -1; 266 267 int max_image_units = 0; 268 int max_ssbo_bindings = 0; 269 int max_image_bindings = 0; 270 int max_work_group_invocations = 0; 271 int max_texture_size = 0; 272 int max_array_texture_layers = 0; 273 int max_fragment_image_units = 0; 274 int max_fragment_uniform_vec4_count = 0; 275 int max_color_atttachments = 0; 276 int max_viewport_width = 0; 277 int max_viewport_height = 0; 278 int max_renderbuffer_size = 0; 279 280 std::vector<std::string> extensions; 281 int max_compute_work_group_size_x; 282 int max_compute_work_group_size_y; 283 int max_compute_work_group_size_z; 284 285 bool SupportsExplicitFp16() const; 286 287 bool IsApiOpenGl31OrAbove() const; 288 bool IsApiOpenGl32OrAbove() const; 289 }; 290 291 struct VulkanInfo { 292 std::string vendor_name; 293 uint32_t api_version = -1; 294 uint32_t api_version_major = -1; 295 uint32_t api_version_minor = -1; 296 uint32_t api_version_patch = -1; 297 298 int max_per_stage_descriptor_sampled_images = 0; 299 uint32_t max_compute_work_group_invocations; 300 uint32_t max_image_dimension_1d; 301 uint32_t max_image_dimension_2d; 302 uint32_t max_image_dimension_3d; 303 uint32_t max_image_array_layers; 304 uint64_t max_texel_buffer_elements; 305 uint64_t max_uniform_buffer_range; 306 uint64_t max_storage_buffer_range; 307 uint64_t max_push_constants_size; 308 309 uint32_t subgroup_size = 0; 310 bool supports_subgroup_arithmetic = false; 311 312 std::vector<std::string> extensions; 313 int max_compute_work_group_size_x; 314 int max_compute_work_group_size_y; 315 int max_compute_work_group_size_z; 316 317 bool SupportsExplicitFp16() const; 318 }; 319 320 enum class OpenClVersion { 321 kCl1_0, 322 kCl1_1, 323 kCl1_2, 324 kCl2_0, 325 kCl2_1, 326 kCl2_2, 327 kCl3_0, 328 kUnknown, 329 }; 330 std::string OpenClVersionToString(OpenClVersion version); 331 332 struct OpenClInfo { 333 std::string device_name; 334 std::string vendor_name; 335 std::string opencl_c_version; 336 std::string platform_version; 337 std::string driver_version; 338 339 OpenClVersion cl_version; 340 341 std::vector<std::string> extensions; 342 bool supports_fp16; 343 bool supports_image3d_writes; 344 bool supports_images; 345 int compute_units_count; 346 uint64_t buffer_max_size; 347 uint64_t max_allocation_size; 348 uint64_t image2d_max_width; 349 uint64_t image2d_max_height; 350 uint64_t image_buffer_max_size; 351 uint64_t image_array_max_layers; 352 uint64_t image3d_max_width; 353 uint64_t image3d_max_height; 354 uint64_t image3d_max_depth; 355 int max_work_group_size_x; 356 int max_work_group_size_y; 357 int max_work_group_size_z; 358 int max_work_group_total_size; 359 360 // The row pitch alignment size in pixels for 2D images created from a buffer. 361 // The value must be a power of 2. 362 uint64_t image_pitch_alignment = 0; 363 // The minimum alignment in pixels. The value must be a power of 2. 364 uint64_t image_base_address_alignment = 0; 365 uint64_t base_addr_align_in_bits; 366 367 // rtn is ROUND_TO_NEAREST 368 // with rtn precision is much better then with rtz (ROUND_TO_ZERO) 369 // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn 370 // Mali from T6xx supports rtn 371 // PowerVR supports only rtz 372 bool supports_fp32_rtn; 373 bool supports_fp16_rtn; 374 375 struct SupportedImage2dTypes { 376 absl::flat_hash_set<DataType> r_layout; 377 absl::flat_hash_set<DataType> rg_layout; 378 absl::flat_hash_set<DataType> rgb_layout; 379 absl::flat_hash_set<DataType> rgba_layout; 380 381 bool SupportsImage2D(DataType data_type, int channels) const; 382 }; 383 384 SupportedImage2dTypes supported_images_2d; 385 386 bool IsImage2dFromBufferSupported() const; 387 }; 388 389 enum class MetalLanguageVersion { 390 kMetal1_0, 391 kMetal1_1, 392 kMetal1_2, 393 kMetal2_0, 394 kMetal2_1, 395 kMetal2_2, 396 kMetal2_3, 397 kMetal2_4, 398 kMetal3_0, 399 kUnknown, 400 }; 401 402 struct MetalInfo { 403 MetalLanguageVersion language_version; 404 405 int max_work_group_size_x; 406 int max_work_group_size_y; 407 int max_work_group_size_z; 408 409 uint64_t buffer_max_size; 410 411 uint64_t image2d_max_width; 412 uint64_t image2d_max_height; 413 uint64_t image_array_max_layers; 414 uint64_t image3d_max_width; 415 uint64_t image3d_max_height; 416 uint64_t image3d_max_depth; 417 418 bool IsSIMDMatMulSupported() const; 419 // MSL is Metal shading language 420 bool IsMslVersionEqualOrHigher(int major, int minor = 0) const; 421 }; 422 423 struct GpuInfo { 424 bool IsAdreno() const; 425 bool IsApple() const; 426 bool IsMali() const; 427 bool IsPowerVR() const; 428 bool IsNvidia() const; 429 bool IsAMD() const; 430 bool IsIntel() const; 431 432 bool IsGlsl() const; 433 bool IsGlslSupportsExplicitFp16() const; 434 435 // floating point rounding mode 436 bool IsRoundToNearestSupported() const; 437 438 bool SupportsFP16() const; 439 440 bool SupportsImages() const; 441 bool SupportsTextureArray() const; 442 bool SupportsImageBuffer() const; 443 bool SupportsImage3D() const; 444 445 bool SupportsPointersInKernels() const; 446 447 // returns true if device have fixed wave size equal to 32 448 bool IsWaveSizeEqualTo32() const; 449 bool SupportsSubGroupWithSize(int sub_group_size) const; 450 451 bool SupportsFloatImage2D(DataType data_type, int channels) const; 452 bool SupportsExtension(const std::string& extension) const; 453 454 bool SupportsZeroClampForImageBuffer() const; 455 bool SupportsZeroClampForImages() const; 456 457 int GetComputeUnitsCount() const; 458 459 int GetMaxImageArguments() const; 460 461 int GetMaxWorkGroupSizeForX() const; 462 int GetMaxWorkGroupSizeForY() const; 463 int GetMaxWorkGroupSizeForZ() const; 464 int GetMaxWorkGroupTotalSize() const; 465 466 uint64_t GetMaxImage2DWidth() const; 467 uint64_t GetMaxImage2DHeight() const; 468 uint64_t GetMaxImage2DArrayLayers() const; 469 uint64_t GetMaxImage3DWidth() const; 470 uint64_t GetMaxImage3DHeight() const; 471 uint64_t GetMaxImage3DDepth() const; 472 uint64_t GetMaxBufferSize() const; 473 uint64_t GetMaxMemoryAllocationSize() const; 474 uint64_t GetMaxImageBufferWidth() const; 475 476 GpuVendor vendor = GpuVendor::kUnknown; 477 GpuApi gpu_api = GpuApi::kUnknown; 478 479 std::vector<int> supported_subgroup_sizes; 480 481 AdrenoInfo adreno_info; 482 AMDInfo amd_info; 483 AppleInfo apple_info; 484 MaliInfo mali_info; 485 486 // OpenGL specific, gpu_api should be kOpenGl 487 OpenGlInfo opengl_info; 488 bool IsApiOpenGl() const; 489 bool IsApiOpenGl31OrAbove() const; 490 491 // Vulkan specific, gpu_api should be kVulkan 492 VulkanInfo vulkan_info; 493 bool IsApiVulkan() const; 494 495 MetalInfo metal_info; 496 bool IsApiMetal() const; 497 498 OpenClInfo opencl_info; 499 bool IsApiOpenCl() const; 500 bool IsCL11OrHigher() const; 501 bool IsCL20OrHigher() const; 502 bool IsCL30OrHigher() const; 503 }; 504 505 // Currently it initializes: 506 // vendor 507 // AdrenoInfo if vendor is kQualcomm 508 // AppleInfo if vendor is kApple 509 // MaliInfo if vendor is kMali 510 void GetGpuInfoFromDeviceDescription(const std::string& gpu_description, 511 GpuApi gpu_api, GpuInfo* gpu_info); 512 513 } // namespace gpu 514 } // namespace tflite 515 516 #endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_ 517