1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_ 17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_ 18 19 #include <cstdint> 20 #include <string> 21 #include <vector> 22 23 #include "tensorflow/lite/delegates/gpu/common/data_type.h" 24 25 namespace tflite { 26 namespace gpu { 27 28 // The VendorID returned by the GPU driver. 29 enum class GpuVendor { 30 kApple, 31 kQualcomm, 32 kMali, 33 kPowerVR, 34 kNvidia, 35 kAMD, 36 kIntel, 37 kUnknown 38 }; 39 40 enum class GpuApi { 41 kUnknown, 42 kOpenCl, 43 kMetal, 44 kVulkan, 45 kOpenGl, 46 }; 47 48 enum class AdrenoGpu { 49 // Adreno 6xx series 50 kAdreno685, 51 kAdreno680, 52 kAdreno675, 53 kAdreno650, 54 kAdreno640, 55 kAdreno630, 56 kAdreno620, 57 kAdreno618, 58 kAdreno616, 59 kAdreno615, 60 kAdreno612, 61 kAdreno610, 62 kAdreno605, 63 // Adreno 5xx series 64 kAdreno540, 65 kAdreno530, 66 kAdreno512, 67 kAdreno510, 68 kAdreno509, 69 kAdreno508, 70 kAdreno506, 71 kAdreno505, 72 kAdreno504, 73 // Adreno 4xx series 74 kAdreno430, 75 kAdreno420, 76 kAdreno418, 77 kAdreno405, 78 // Adreno 3xx series 79 kAdreno330, 80 kAdreno320, 81 kAdreno308, 82 kAdreno306, 83 kAdreno305, 84 kAdreno304, 85 // Adreno 2xx series 86 kAdreno225, 87 kAdreno220, 88 kAdreno205, 89 kAdreno203, 90 kAdreno200, 91 // Adreno 1xx series 92 kAdreno130, 93 kAdreno120, 94 kUnknown 95 }; 96 97 struct AdrenoInfo { 98 AdrenoInfo() = default; 99 explicit AdrenoInfo(const std::string& device_version); 100 101 AdrenoGpu adreno_gpu; 102 103 bool IsAdreno1xx() const; 104 bool IsAdreno2xx() const; 105 bool IsAdreno3xx() const; 106 bool IsAdreno4xx() const; 107 bool IsAdreno5xx() const; 108 bool IsAdreno6xx() const; 109 bool IsAdreno6xxOrHigher() const; 110 111 // This function returns some not very documented physical parameter of 112 // Adreno6xx GPU. 113 // We obtained it using Snapdragon Profiler. 114 int GetMaximumWavesCount() const; 115 116 // returns amount of register memory per CU(Compute Unit) in bytes. 117 int GetRegisterMemorySizePerComputeUnit() const; 118 119 // returns maximum possible amount of waves based on register usage. 120 int GetMaximumWavesCount(int register_footprint_per_tread, 121 bool full_wave = true) const; 122 123 int GetWaveSize(bool full_wave) const; 124 125 // Not supported on some Adreno devices with specific driver version. 126 // b/131099086 127 bool support_one_layer_texture_array = true; 128 129 bool compiler_bugs_in_a6xx = false; 130 }; 131 132 enum class AppleGpu { 133 kUnknown, 134 kA7, 135 kA8, 136 kA8X, 137 kA9, 138 kA9X, 139 kA10, 140 kA10X, 141 kA11, 142 kA12, 143 kA12X, 144 kA12Z, 145 kA13, 146 kA14, 147 }; 148 149 struct AppleInfo { 150 AppleInfo() = default; 151 explicit AppleInfo(const std::string& gpu_description); 152 AppleGpu gpu_type; 153 154 bool IsLocalMemoryPreferredOverGlobal() const; 155 156 bool IsBionic() const; 157 158 // floating point rounding mode 159 bool IsRoundToNearestSupported() const; 160 161 int GetComputeUnitsCount() const; 162 }; 163 164 enum class MaliGpu { 165 kUnknown, 166 kT604, 167 kT622, 168 kT624, 169 kT628, 170 kT658, 171 kT678, 172 kT720, 173 kT760, 174 kT820, 175 kT830, 176 kT860, 177 kT880, 178 kG31, 179 kG51, 180 kG71, 181 kG52, 182 kG72, 183 kG76, 184 kG57, 185 kG77, 186 kG68, 187 kG78, 188 }; 189 190 struct MaliInfo { 191 MaliInfo() = default; 192 explicit MaliInfo(const std::string& gpu_description); 193 MaliGpu gpu_version; 194 195 bool IsMaliT6xx() const; 196 bool IsMaliT7xx() const; 197 bool IsMaliT8xx() const; 198 bool IsMidgard() const; 199 bool IsBifrostGen1() const; 200 bool IsBifrostGen2() const; 201 bool IsBifrostGen3() const; 202 bool IsBifrost() const; 203 bool IsValhall() const; 204 }; 205 206 struct OpenGlInfo { 207 std::string renderer_name; 208 std::string vendor_name; 209 std::string version; 210 int major_version = -1; 211 int minor_version = -1; 212 213 int max_image_units = 0; 214 int max_ssbo_bindings = 0; 215 int max_image_bindings = 0; 216 int max_work_group_invocations = 0; 217 int max_texture_size = 0; 218 int max_array_texture_layers = 0; 219 220 std::vector<std::string> extensions; 221 int max_compute_work_group_size_x; 222 int max_compute_work_group_size_y; 223 int max_compute_work_group_size_z; 224 }; 225 226 struct VulkanInfo { 227 std::string vendor_name; 228 uint32_t api_version = -1; 229 uint32_t api_version_major = -1; 230 uint32_t api_version_minor = -1; 231 uint32_t api_version_patch = -1; 232 233 uint32_t max_per_stage_descriptor_sampled_images = 0; 234 uint32_t max_compute_work_group_invocations; 235 uint32_t max_image_dimension_2d; 236 uint32_t max_image_array_layers; 237 238 uint32_t subgroup_size = 0; 239 bool supports_subgroup_arithmetic = false; 240 241 std::vector<std::string> extensions; 242 int max_compute_work_group_size_x; 243 int max_compute_work_group_size_y; 244 int max_compute_work_group_size_z; 245 }; 246 247 enum class OpenClVersion { 248 kCl1_0, 249 kCl1_1, 250 kCl1_2, 251 kCl2_0, 252 kCl2_1, 253 kCl2_2, 254 kCl3_0, 255 kUnknown, 256 }; 257 std::string OpenClVersionToString(OpenClVersion version); 258 259 struct OpenClInfo { 260 OpenClVersion cl_version; 261 262 std::vector<std::string> extensions; 263 bool supports_fp16; 264 bool supports_image3d_writes; 265 bool supports_images; 266 int compute_units_count; 267 uint64_t buffer_max_size; 268 uint64_t image2d_max_width; 269 uint64_t image2d_max_height; 270 uint64_t image_buffer_max_size; 271 uint64_t image_array_max_layers; 272 uint64_t image3d_max_width; 273 uint64_t image3d_max_height; 274 uint64_t image3d_max_depth; 275 int max_work_group_size_x; 276 int max_work_group_size_y; 277 int max_work_group_size_z; 278 int max_work_group_total_size; 279 280 // rtn is ROUND_TO_NEAREST 281 // with rtn precision is much better then with rtz (ROUND_TO_ZERO) 282 // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn 283 // Mali from T6xx supports rtn 284 // PowerVR supports only rtz 285 bool supports_fp32_rtn; 286 bool supports_fp16_rtn; 287 288 bool supports_r_f16_tex2d = false; 289 bool supports_rg_f16_tex2d = false; 290 bool supports_rgb_f16_tex2d = false; 291 bool supports_rgba_f16_tex2d = false; 292 293 bool supports_r_f32_tex2d = false; 294 bool supports_rg_f32_tex2d = false; 295 bool supports_rgb_f32_tex2d = false; 296 bool supports_rgba_f32_tex2d = false; 297 }; 298 299 enum class MetalLanguageVersion { 300 kMetal1_0, 301 kMetal1_1, 302 kMetal1_2, 303 kMetal2_0, 304 kMetal2_1, 305 kMetal2_2, 306 kMetal2_3, 307 kUnknown, 308 }; 309 310 struct MetalInfo { 311 MetalLanguageVersion language_version; 312 313 int max_work_group_size_x; 314 int max_work_group_size_y; 315 int max_work_group_size_z; 316 317 uint64_t buffer_max_size; 318 }; 319 320 struct GpuInfo { 321 bool IsAdreno() const; 322 bool IsApple() const; 323 bool IsMali() const; 324 bool IsPowerVR() const; 325 bool IsNvidia() const; 326 bool IsAMD() const; 327 bool IsIntel() const; 328 329 // floating point rounding mode 330 bool IsRoundToNearestSupported() const; 331 332 bool SupportsFP16() const; 333 334 bool SupportsImages() const; 335 bool SupportsTextureArray() const; 336 bool SupportsImageBuffer() const; 337 bool SupportsImage3D() const; 338 339 // returns true if device have fixed wave size equal to 32 340 bool IsWaveSizeEqualTo32() const; 341 bool SupportsSubGroupWithSize(int sub_group_size) const; 342 343 bool SupportsFloatImage2D(DataType data_type, int channels) const; 344 bool SupportsExtension(const std::string& extension) const; 345 346 int GetComputeUnitsCount() const; 347 348 int GetMaxImageArguments() const; 349 350 int GetMaxWorkGroupSizeForX() const; 351 int GetMaxWorkGroupSizeForY() const; 352 int GetMaxWorkGroupSizeForZ() const; 353 int GetMaxWorkGroupTotalSize() const; 354 355 uint64_t GetMaxImage2DWidth() const; 356 uint64_t GetMaxImage2DHeight() const; 357 uint64_t GetMaxImage2DArrayLayers() const; 358 uint64_t GetMaxImage3DWidth() const; 359 uint64_t GetMaxImage3DHeight() const; 360 uint64_t GetMaxImage3DDepth() const; 361 uint64_t GetMaxBufferSize() const; 362 uint64_t GetMaxImageBufferWidth() const; 363 364 GpuVendor vendor = GpuVendor::kUnknown; 365 GpuApi gpu_api = GpuApi::kUnknown; 366 367 std::vector<int> supported_subgroup_sizes; 368 369 AdrenoInfo adreno_info; 370 AppleInfo apple_info; 371 MaliInfo mali_info; 372 373 // OpenGL specific, gpu_api should be kOpenGl 374 OpenGlInfo opengl_info; 375 bool IsApiOpenGl() const; 376 bool IsApiOpenGl31OrAbove() const; 377 378 // Vulkan specific, gpu_api should be kVulkan 379 VulkanInfo vulkan_info; 380 bool IsApiVulkan() const; 381 382 MetalInfo metal_info; 383 bool IsApiMetal() const; 384 385 OpenClInfo opencl_info; 386 bool IsApiOpenCl() const; 387 bool IsCL20OrHigher() const; 388 bool IsCL30OrHigher() const; 389 }; 390 391 // Currently it initializes: 392 // vendor 393 // AdrenoInfo if vendor is kQualcomm 394 // AppleInfo if vendor is kApple 395 // MaliInfo if vendor is kMali 396 void GetGpuInfoFromDeviceDescription(const std::string& gpu_description, 397 GpuApi gpu_api, GpuInfo* gpu_info); 398 399 } // namespace gpu 400 } // namespace tflite 401 402 #endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_ 403