1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_ 17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_ 18 19 #include <cstdint> 20 #include <string> 21 #include <vector> 22 23 #include "tensorflow/lite/delegates/gpu/common/data_type.h" 24 25 namespace tflite { 26 namespace gpu { 27 28 // The VendorID returned by the GPU driver. 29 enum class GpuVendor { 30 kApple, 31 kQualcomm, 32 kMali, 33 kPowerVR, 34 kNvidia, 35 kAMD, 36 kIntel, 37 kUnknown 38 }; 39 40 enum class GpuApi { 41 kUnknown, 42 kOpenCl, 43 kMetal, 44 kVulkan, 45 kOpenGl, 46 }; 47 48 enum class AdrenoGpu { 49 // Adreno 6xx series 50 kAdreno685, 51 kAdreno680, 52 kAdreno675, 53 kAdreno660, 54 kAdreno650, 55 kAdreno640, 56 kAdreno630, 57 kAdreno620, 58 kAdreno618, 59 kAdreno616, 60 kAdreno615, 61 kAdreno612, 62 kAdreno610, 63 kAdreno605, 64 // Adreno 5xx series 65 kAdreno540, 66 kAdreno530, 67 kAdreno512, 68 kAdreno510, 69 kAdreno509, 70 kAdreno508, 71 kAdreno506, 72 kAdreno505, 73 kAdreno504, 74 // Adreno 4xx series 75 kAdreno430, 76 kAdreno420, 77 kAdreno418, 78 kAdreno405, 79 // Adreno 3xx series 80 kAdreno330, 81 kAdreno320, 82 kAdreno308, 83 kAdreno306, 84 kAdreno305, 85 kAdreno304, 86 // Adreno 2xx series 87 kAdreno225, 88 kAdreno220, 89 kAdreno205, 90 kAdreno203, 91 kAdreno200, 92 // Adreno 1xx series 93 kAdreno130, 94 kAdreno120, 95 kUnknown 96 }; 97 98 struct AMDInfo { 99 AMDInfo() = default; 100 int shader_engines; 101 int compute_units_per_shader_engine; GetComputeUnitsCountAMDInfo102 int GetComputeUnitsCount() const { 103 return shader_engines * compute_units_per_shader_engine; 104 } 105 }; 106 107 struct AdrenoInfo { 108 AdrenoInfo() = default; 109 explicit AdrenoInfo(const std::string& device_version); 110 111 AdrenoGpu adreno_gpu; 112 113 bool IsAdreno1xx() const; 114 bool IsAdreno2xx() const; 115 bool IsAdreno3xx() const; 116 bool IsAdreno4xx() const; 117 bool IsAdreno5xx() const; 118 bool IsAdreno6xx() const; 119 bool IsAdreno6xxOrHigher() const; 120 121 // This function returns some not very documented physical parameter of 122 // Adreno6xx GPU. 123 // We obtained it using Snapdragon Profiler. 124 int GetMaximumWavesCount() const; 125 126 // returns amount of register memory per CU(Compute Unit) in bytes. 127 int GetRegisterMemorySizePerComputeUnit() const; 128 129 // returns maximum possible amount of waves based on register usage. 130 int GetMaximumWavesCount(int register_footprint_per_tread, 131 bool full_wave = true) const; 132 133 int GetWaveSize(bool full_wave) const; 134 135 // Not supported on some Adreno devices with specific driver version. 136 // b/131099086 137 bool support_one_layer_texture_array = true; 138 139 bool compiler_bugs_in_a6xx = false; 140 }; 141 142 enum class AppleGpu { 143 kUnknown, 144 kA7, 145 kA8, 146 kA8X, 147 kA9, 148 kA9X, 149 kA10, 150 kA10X, 151 kA11, 152 kA12, 153 kA12X, 154 kA12Z, 155 kA13, 156 kA14, 157 }; 158 159 struct AppleInfo { 160 AppleInfo() = default; 161 explicit AppleInfo(const std::string& gpu_description); 162 AppleGpu gpu_type; 163 164 bool IsLocalMemoryPreferredOverGlobal() const; 165 166 bool IsBionic() const; 167 168 // floating point rounding mode 169 bool IsRoundToNearestSupported() const; 170 171 int GetComputeUnitsCount() const; 172 }; 173 174 enum class MaliGpu { 175 kUnknown, 176 kT604, 177 kT622, 178 kT624, 179 kT628, 180 kT658, 181 kT678, 182 kT720, 183 kT760, 184 kT820, 185 kT830, 186 kT860, 187 kT880, 188 kG31, 189 kG51, 190 kG71, 191 kG52, 192 kG72, 193 kG76, 194 kG57, 195 kG77, 196 kG68, 197 kG78, 198 }; 199 200 struct MaliInfo { 201 MaliInfo() = default; 202 explicit MaliInfo(const std::string& gpu_description); 203 MaliGpu gpu_version; 204 205 bool IsMaliT6xx() const; 206 bool IsMaliT7xx() const; 207 bool IsMaliT8xx() const; 208 bool IsMidgard() const; 209 bool IsBifrostGen1() const; 210 bool IsBifrostGen2() const; 211 bool IsBifrostGen3() const; 212 bool IsBifrost() const; 213 bool IsValhall() const; 214 }; 215 216 struct OpenGlInfo { 217 std::string renderer_name; 218 std::string vendor_name; 219 std::string version; 220 int major_version = -1; 221 int minor_version = -1; 222 223 int max_image_units = 0; 224 int max_ssbo_bindings = 0; 225 int max_image_bindings = 0; 226 int max_work_group_invocations = 0; 227 int max_texture_size = 0; 228 int max_array_texture_layers = 0; 229 int max_fragment_image_units = 0; 230 int max_fragment_uniform_vec4_count = 0; 231 int max_color_atttachments = 0; 232 int max_viewport_width = 0; 233 int max_viewport_height = 0; 234 int max_renderbuffer_size = 0; 235 236 std::vector<std::string> extensions; 237 int max_compute_work_group_size_x; 238 int max_compute_work_group_size_y; 239 int max_compute_work_group_size_z; 240 }; 241 242 struct VulkanInfo { 243 std::string vendor_name; 244 uint32_t api_version = -1; 245 uint32_t api_version_major = -1; 246 uint32_t api_version_minor = -1; 247 uint32_t api_version_patch = -1; 248 249 int max_per_stage_descriptor_sampled_images = 0; 250 uint32_t max_compute_work_group_invocations; 251 uint32_t max_image_dimension_2d; 252 uint32_t max_image_array_layers; 253 254 uint32_t subgroup_size = 0; 255 bool supports_subgroup_arithmetic = false; 256 257 std::vector<std::string> extensions; 258 int max_compute_work_group_size_x; 259 int max_compute_work_group_size_y; 260 int max_compute_work_group_size_z; 261 }; 262 263 enum class OpenClVersion { 264 kCl1_0, 265 kCl1_1, 266 kCl1_2, 267 kCl2_0, 268 kCl2_1, 269 kCl2_2, 270 kCl3_0, 271 kUnknown, 272 }; 273 std::string OpenClVersionToString(OpenClVersion version); 274 275 struct OpenClInfo { 276 std::string device_name; 277 std::string vendor_name; 278 std::string opencl_c_version; 279 std::string platform_version; 280 281 OpenClVersion cl_version; 282 283 std::vector<std::string> extensions; 284 bool supports_fp16; 285 bool supports_image3d_writes; 286 bool supports_images; 287 int compute_units_count; 288 uint64_t buffer_max_size; 289 uint64_t max_allocation_size; 290 uint64_t image2d_max_width; 291 uint64_t image2d_max_height; 292 uint64_t image_buffer_max_size; 293 uint64_t image_array_max_layers; 294 uint64_t image3d_max_width; 295 uint64_t image3d_max_height; 296 uint64_t image3d_max_depth; 297 int max_work_group_size_x; 298 int max_work_group_size_y; 299 int max_work_group_size_z; 300 int max_work_group_total_size; 301 uint64_t image_pitch_alignment; 302 uint64_t base_addr_align_in_bits; 303 304 // rtn is ROUND_TO_NEAREST 305 // with rtn precision is much better then with rtz (ROUND_TO_ZERO) 306 // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn 307 // Mali from T6xx supports rtn 308 // PowerVR supports only rtz 309 bool supports_fp32_rtn; 310 bool supports_fp16_rtn; 311 312 bool supports_r_f16_tex2d = false; 313 bool supports_rg_f16_tex2d = false; 314 bool supports_rgb_f16_tex2d = false; 315 bool supports_rgba_f16_tex2d = false; 316 317 bool supports_r_f32_tex2d = false; 318 bool supports_rg_f32_tex2d = false; 319 bool supports_rgb_f32_tex2d = false; 320 bool supports_rgba_f32_tex2d = false; 321 322 bool IsImage2dFromBufferSupported() const; 323 }; 324 325 enum class MetalLanguageVersion { 326 kMetal1_0, 327 kMetal1_1, 328 kMetal1_2, 329 kMetal2_0, 330 kMetal2_1, 331 kMetal2_2, 332 kMetal2_3, 333 kUnknown, 334 }; 335 336 struct MetalInfo { 337 MetalLanguageVersion language_version; 338 339 int max_work_group_size_x; 340 int max_work_group_size_y; 341 int max_work_group_size_z; 342 343 uint64_t buffer_max_size; 344 }; 345 346 struct GpuInfo { 347 bool IsAdreno() const; 348 bool IsApple() const; 349 bool IsMali() const; 350 bool IsPowerVR() const; 351 bool IsNvidia() const; 352 bool IsAMD() const; 353 bool IsIntel() const; 354 355 bool IsGlsl() const; 356 357 // floating point rounding mode 358 bool IsRoundToNearestSupported() const; 359 360 bool SupportsFP16() const; 361 362 bool SupportsImages() const; 363 bool SupportsTextureArray() const; 364 bool SupportsImageBuffer() const; 365 bool SupportsImage3D() const; 366 367 bool SupportsPointersInKernels() const; 368 369 // returns true if device have fixed wave size equal to 32 370 bool IsWaveSizeEqualTo32() const; 371 bool SupportsSubGroupWithSize(int sub_group_size) const; 372 373 bool SupportsFloatImage2D(DataType data_type, int channels) const; 374 bool SupportsExtension(const std::string& extension) const; 375 376 int GetComputeUnitsCount() const; 377 378 int GetMaxImageArguments() const; 379 380 int GetMaxWorkGroupSizeForX() const; 381 int GetMaxWorkGroupSizeForY() const; 382 int GetMaxWorkGroupSizeForZ() const; 383 int GetMaxWorkGroupTotalSize() const; 384 385 uint64_t GetMaxImage2DWidth() const; 386 uint64_t GetMaxImage2DHeight() const; 387 uint64_t GetMaxImage2DArrayLayers() const; 388 uint64_t GetMaxImage3DWidth() const; 389 uint64_t GetMaxImage3DHeight() const; 390 uint64_t GetMaxImage3DDepth() const; 391 uint64_t GetMaxBufferSize() const; 392 uint64_t GetMaxMemoryAllocationSize() const; 393 uint64_t GetMaxImageBufferWidth() const; 394 395 GpuVendor vendor = GpuVendor::kUnknown; 396 GpuApi gpu_api = GpuApi::kUnknown; 397 398 std::vector<int> supported_subgroup_sizes; 399 400 AdrenoInfo adreno_info; 401 AMDInfo amd_info; 402 AppleInfo apple_info; 403 MaliInfo mali_info; 404 405 // OpenGL specific, gpu_api should be kOpenGl 406 OpenGlInfo opengl_info; 407 bool IsApiOpenGl() const; 408 bool IsApiOpenGl31OrAbove() const; 409 410 // Vulkan specific, gpu_api should be kVulkan 411 VulkanInfo vulkan_info; 412 bool IsApiVulkan() const; 413 414 MetalInfo metal_info; 415 bool IsApiMetal() const; 416 417 OpenClInfo opencl_info; 418 bool IsApiOpenCl() const; 419 bool IsCL11OrHigher() const; 420 bool IsCL20OrHigher() const; 421 bool IsCL30OrHigher() const; 422 }; 423 424 // Currently it initializes: 425 // vendor 426 // AdrenoInfo if vendor is kQualcomm 427 // AppleInfo if vendor is kApple 428 // MaliInfo if vendor is kMali 429 void GetGpuInfoFromDeviceDescription(const std::string& gpu_description, 430 GpuApi gpu_api, GpuInfo* gpu_info); 431 432 } // namespace gpu 433 } // namespace tflite 434 435 #endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_ 436