1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 /* 3 * 4 * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. 5 * 6 * This program is free software and is provided to you under the terms of the 7 * GNU General Public License version 2 as published by the Free Software 8 * Foundation, and any use by you of this program is subject to the terms 9 * of such GNU license. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, you can access it online at 18 * http://www.gnu.org/licenses/gpl-2.0.html. 19 * 20 */ 21 22 /* 23 * Base structures shared with the kernel. 24 */ 25 26 #ifndef _UAPI_BASE_KERNEL_H_ 27 #define _UAPI_BASE_KERNEL_H_ 28 29 #include <linux/types.h> 30 31 struct base_mem_handle { 32 struct { 33 __u64 handle; 34 } basep; 35 }; 36 37 #include "mali_base_mem_priv.h" 38 #include "gpu/mali_kbase_gpu_id.h" 39 #include "gpu/mali_kbase_gpu_coherency.h" 40 41 #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 42 43 #define BASE_MAX_COHERENT_GROUPS 16 44 45 #if defined(PAGE_MASK) && defined(PAGE_SHIFT) 46 #define LOCAL_PAGE_SHIFT PAGE_SHIFT 47 #define LOCAL_PAGE_LSB ~PAGE_MASK 48 #else 49 #ifndef OSU_CONFIG_CPU_PAGE_SIZE_LOG2 50 #define OSU_CONFIG_CPU_PAGE_SIZE_LOG2 12 51 #endif 52 53 #if defined(OSU_CONFIG_CPU_PAGE_SIZE_LOG2) 54 #define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2 55 #define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1) 56 #else 57 #error Failed to find page size 58 #endif 59 #endif 60 61 /* Physical memory group ID for normal usage. 62 */ 63 #define BASE_MEM_GROUP_DEFAULT (0) 64 65 /* Number of physical memory groups. 66 */ 67 #define BASE_MEM_GROUP_COUNT (16) 68 69 /** 70 * typedef base_mem_alloc_flags - Memory allocation, access/hint flags. 71 * 72 * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator 73 * in order to determine the best cache policy. Some combinations are 74 * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD), 75 * which defines a write-only region on the CPU side, which is 76 * heavily read by the CPU... 77 * Other flags are only meaningful to a particular allocator. 78 * More flags can be added to this list, as long as they don't clash 79 * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit). 80 */ 81 typedef __u32 base_mem_alloc_flags; 82 83 /* A mask for all the flags which are modifiable via the base_mem_set_flags 84 * interface. 85 */ 86 #define BASE_MEM_FLAGS_MODIFIABLE \ 87 (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ 88 BASE_MEM_COHERENT_LOCAL) 89 90 /* A mask of all the flags that can be returned via the base_mem_get_flags() 91 * interface. 92 */ 93 #define BASE_MEM_FLAGS_QUERYABLE \ 94 (BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \ 95 BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \ 96 BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \ 97 BASEP_MEM_FLAGS_KERNEL_ONLY)) 98 99 /** 100 * enum base_mem_import_type - Memory types supported by @a base_mem_import 101 * 102 * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type 103 * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int) 104 * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a 105 * base_mem_import_user_buffer 106 * 107 * Each type defines what the supported handle type is. 108 * 109 * If any new type is added here ARM must be contacted 110 * to allocate a numeric value for it. 111 * Do not just add a new type without synchronizing with ARM 112 * as future releases from ARM might include other new types 113 * which could clash with your custom types. 114 */ 115 enum base_mem_import_type { 116 BASE_MEM_IMPORT_TYPE_INVALID = 0, 117 /* 118 * Import type with value 1 is deprecated. 119 */ 120 BASE_MEM_IMPORT_TYPE_UMM = 2, 121 BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3 122 }; 123 124 /** 125 * struct base_mem_import_user_buffer - Handle of an imported user buffer 126 * 127 * @ptr: address of imported user buffer 128 * @length: length of imported user buffer in bytes 129 * 130 * This structure is used to represent a handle of an imported user buffer. 131 */ 132 133 struct base_mem_import_user_buffer { 134 __u64 ptr; 135 __u64 length; 136 }; 137 138 /* Mask to detect 4GB boundary alignment */ 139 #define BASE_MEM_MASK_4GB 0xfffff000UL 140 /* Mask to detect 4GB boundary (in page units) alignment */ 141 #define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT) 142 143 /* Limit on the 'extension' parameter for an allocation with the 144 * BASE_MEM_TILER_ALIGN_TOP flag set 145 * 146 * This is the same as the maximum limit for a Buffer Descriptor's chunk size 147 */ 148 #define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2 \ 149 (21u - (LOCAL_PAGE_SHIFT)) 150 #define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES \ 151 (1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2)) 152 153 /* Bit mask of cookies used for for memory allocation setup */ 154 #define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ 155 156 /* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */ 157 #define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ 158 159 /* 160 * struct base_fence - Cross-device synchronisation fence. 161 * 162 * A fence is used to signal when the GPU has finished accessing a resource that 163 * may be shared with other devices, and also to delay work done asynchronously 164 * by the GPU until other devices have finished accessing a shared resource. 165 */ 166 struct base_fence { 167 struct { 168 int fd; 169 int stream_fd; 170 } basep; 171 }; 172 173 /** 174 * struct base_mem_aliasing_info - Memory aliasing info 175 * 176 * Describes a memory handle to be aliased. 177 * A subset of the handle can be chosen for aliasing, given an offset and a 178 * length. 179 * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a 180 * region where a special page is mapped with a write-alloc cache setup, 181 * typically used when the write result of the GPU isn't needed, but the GPU 182 * must write anyway. 183 * 184 * Offset and length are specified in pages. 185 * Offset must be within the size of the handle. 186 * Offset+length must not overrun the size of the handle. 187 * 188 * @handle: Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE 189 * @offset: Offset within the handle to start aliasing from, in pages. 190 * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. 191 * @length: Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE 192 * specifies the number of times the special page is needed. 193 */ 194 struct base_mem_aliasing_info { 195 struct base_mem_handle handle; 196 __u64 offset; 197 __u64 length; 198 }; 199 200 /* Maximum percentage of just-in-time memory allocation trimming to perform 201 * on free. 202 */ 203 #define BASE_JIT_MAX_TRIM_LEVEL (100) 204 205 /* Maximum number of concurrent just-in-time memory allocations. 206 */ 207 #define BASE_JIT_ALLOC_COUNT (255) 208 209 /* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5 210 * 211 * jit_version is 1 212 * 213 * Due to the lack of padding specified, user clients between 32 and 64-bit 214 * may have assumed a different size of the struct 215 * 216 * An array of structures was not supported 217 */ 218 struct base_jit_alloc_info_10_2 { 219 __u64 gpu_alloc_addr; 220 __u64 va_pages; 221 __u64 commit_pages; 222 __u64 extension; 223 __u8 id; 224 }; 225 226 /* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up 227 * to 11.19 228 * 229 * This structure had a number of modifications during and after kernel driver 230 * version 11.5, but remains size-compatible throughout its version history, and 231 * with earlier variants compatible with future variants by requiring 232 * zero-initialization to the unused space in the structure. 233 * 234 * jit_version is 2 235 * 236 * Kernel driver version history: 237 * 11.5: Initial introduction with 'usage_id' and padding[5]. All padding bytes 238 * must be zero. Kbase minor version was not incremented, so some 239 * versions of 11.5 do not have this change. 240 * 11.5: Added 'bin_id' and 'max_allocations', replacing 2 padding bytes (Kbase 241 * minor version not incremented) 242 * 11.6: Added 'flags', replacing 1 padding byte 243 * 11.10: Arrays of this structure are supported 244 */ 245 struct base_jit_alloc_info_11_5 { 246 __u64 gpu_alloc_addr; 247 __u64 va_pages; 248 __u64 commit_pages; 249 __u64 extension; 250 __u8 id; 251 __u8 bin_id; 252 __u8 max_allocations; 253 __u8 flags; 254 __u8 padding[2]; 255 __u16 usage_id; 256 }; 257 258 /** 259 * struct base_jit_alloc_info - Structure which describes a JIT allocation 260 * request. 261 * @gpu_alloc_addr: The GPU virtual address to write the JIT 262 * allocated GPU virtual address to. 263 * @va_pages: The minimum number of virtual pages required. 264 * @commit_pages: The minimum number of physical pages which 265 * should back the allocation. 266 * @extension: Granularity of physical pages to grow the 267 * allocation by during a fault. 268 * @id: Unique ID provided by the caller, this is used 269 * to pair allocation and free requests. 270 * Zero is not a valid value. 271 * @bin_id: The JIT allocation bin, used in conjunction with 272 * @max_allocations to limit the number of each 273 * type of JIT allocation. 274 * @max_allocations: The maximum number of allocations allowed within 275 * the bin specified by @bin_id. Should be the same 276 * for all allocations within the same bin. 277 * @flags: flags specifying the special requirements for 278 * the JIT allocation, see 279 * %BASE_JIT_ALLOC_VALID_FLAGS 280 * @padding: Expansion space - should be initialised to zero 281 * @usage_id: A hint about which allocation should be reused. 282 * The kernel should attempt to use a previous 283 * allocation with the same usage_id 284 * @heap_info_gpu_addr: Pointer to an object in GPU memory describing 285 * the actual usage of the region. 286 * 287 * jit_version is 3. 288 * 289 * When modifications are made to this structure, it is still compatible with 290 * jit_version 3 when: a) the size is unchanged, and b) new members only 291 * replace the padding bytes. 292 * 293 * Previous jit_version history: 294 * jit_version == 1, refer to &base_jit_alloc_info_10_2 295 * jit_version == 2, refer to &base_jit_alloc_info_11_5 296 * 297 * Kbase version history: 298 * 11.20: added @heap_info_gpu_addr 299 */ 300 struct base_jit_alloc_info { 301 __u64 gpu_alloc_addr; 302 __u64 va_pages; 303 __u64 commit_pages; 304 __u64 extension; 305 __u8 id; 306 __u8 bin_id; 307 __u8 max_allocations; 308 __u8 flags; 309 __u8 padding[2]; 310 __u16 usage_id; 311 __u64 heap_info_gpu_addr; 312 }; 313 314 enum base_external_resource_access { 315 BASE_EXT_RES_ACCESS_SHARED, 316 BASE_EXT_RES_ACCESS_EXCLUSIVE 317 }; 318 319 struct base_external_resource { 320 __u64 ext_resource; 321 }; 322 323 324 /** 325 * The maximum number of external resources which can be mapped/unmapped 326 * in a single request. 327 */ 328 #define BASE_EXT_RES_COUNT_MAX 10 329 330 /** 331 * struct base_external_resource_list - Structure which describes a list of 332 * external resources. 333 * @count: The number of resources. 334 * @ext_res: Array of external resources which is 335 * sized at allocation time. 336 */ 337 struct base_external_resource_list { 338 __u64 count; 339 struct base_external_resource ext_res[1]; 340 }; 341 342 struct base_jd_debug_copy_buffer { 343 __u64 address; 344 __u64 size; 345 struct base_external_resource extres; 346 }; 347 348 #define GPU_MAX_JOB_SLOTS 16 349 350 /** 351 * User-side Base GPU Property Queries 352 * 353 * The User-side Base GPU Property Query interface encapsulates two 354 * sub-modules: 355 * 356 * - "Dynamic GPU Properties" 357 * - "Base Platform Config GPU Properties" 358 * 359 * Base only deals with properties that vary between different GPU 360 * implementations - the Dynamic GPU properties and the Platform Config 361 * properties. 362 * 363 * For properties that are constant for the GPU Architecture, refer to the 364 * GPU module. However, we will discuss their relevance here just to 365 * provide background information. 366 * 367 * About the GPU Properties in Base and GPU modules 368 * 369 * The compile-time properties (Platform Config, GPU Compile-time 370 * properties) are exposed as pre-processor macros. 371 * 372 * Complementing the compile-time properties are the Dynamic GPU 373 * Properties, which act as a conduit for the GPU Configuration 374 * Discovery. 375 * 376 * In general, the dynamic properties are present to verify that the platform 377 * has been configured correctly with the right set of Platform Config 378 * Compile-time Properties. 379 * 380 * As a consistent guide across the entire DDK, the choice for dynamic or 381 * compile-time should consider the following, in order: 382 * 1. Can the code be written so that it doesn't need to know the 383 * implementation limits at all? 384 * 2. If you need the limits, get the information from the Dynamic Property 385 * lookup. This should be done once as you fetch the context, and then cached 386 * as part of the context data structure, so it's cheap to access. 387 * 3. If there's a clear and arguable inefficiency in using Dynamic Properties, 388 * then use a Compile-Time Property (Platform Config, or GPU Compile-time 389 * property). Examples of where this might be sensible follow: 390 * - Part of a critical inner-loop 391 * - Frequent re-use throughout the driver, causing significant extra load 392 * instructions or control flow that would be worthwhile optimizing out. 393 * 394 * We cannot provide an exhaustive set of examples, neither can we provide a 395 * rule for every possible situation. Use common sense, and think about: what 396 * the rest of the driver will be doing; how the compiler might represent the 397 * value if it is a compile-time constant; whether an OEM shipping multiple 398 * devices would benefit much more from a single DDK binary, instead of 399 * insignificant micro-optimizations. 400 * 401 * Dynamic GPU Properties 402 * 403 * Dynamic GPU properties are presented in two sets: 404 * 1. the commonly used properties in @ref base_gpu_props, which have been 405 * unpacked from GPU register bitfields. 406 * 2. The full set of raw, unprocessed properties in gpu_raw_gpu_props 407 * (also a member of base_gpu_props). All of these are presented in 408 * the packed form, as presented by the GPU registers themselves. 409 * 410 * The raw properties in gpu_raw_gpu_props are necessary to 411 * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device 412 * behaving differently?". In this case, all information about the 413 * configuration is potentially useful, but it does not need to be processed 414 * by the driver. Instead, the raw registers can be processed by the Mali 415 * Tools software on the host PC. 416 * 417 * The properties returned extend the GPU Configuration Discovery 418 * registers. For example, GPU clock speed is not specified in the GPU 419 * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function. 420 * 421 * The GPU properties are obtained by a call to 422 * base_get_gpu_props(). This simply returns a pointer to a const 423 * base_gpu_props structure. It is constant for the life of a base 424 * context. Multiple calls to base_get_gpu_props() to a base context 425 * return the same pointer to a constant structure. This avoids cache pollution 426 * of the common data. 427 * 428 * This pointer must not be freed, because it does not point to the start of a 429 * region allocated by the memory allocator; instead, just close the @ref 430 * base_context. 431 * 432 * 433 * Kernel Operation 434 * 435 * During Base Context Create time, user-side makes a single kernel call: 436 * - A call to fill user memory with GPU information structures 437 * 438 * The kernel-side will fill the provided the entire processed base_gpu_props 439 * structure, because this information is required in both 440 * user and kernel side; it does not make sense to decode it twice. 441 * 442 * Coherency groups must be derived from the bitmasks, but this can be done 443 * kernel side, and just once at kernel startup: Coherency groups must already 444 * be known kernel-side, to support chains that specify a 'Only Coherent Group' 445 * SW requirement, or 'Only Coherent Group with Tiler' SW requirement. 446 * 447 * Coherency Group calculation 448 * 449 * Creation of the coherent group data is done at device-driver startup, and so 450 * is one-time. This will most likely involve a loop with CLZ, shifting, and 451 * bit clearing on the L2_PRESENT mask, depending on whether the 452 * system is L2 Coherent. The number of shader cores is done by a 453 * population count, since faulty cores may be disabled during production, 454 * producing a non-contiguous mask. 455 * 456 * The memory requirements for this algorithm can be determined either by a __u64 457 * population count on the L2_PRESENT mask (a LUT helper already is 458 * required for the above), or simple assumption that there can be no more than 459 * 16 coherent groups, since core groups are typically 4 cores. 460 */ 461 462 #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 463 464 #define BASE_MAX_COHERENT_GROUPS 16 465 /** 466 * struct mali_base_gpu_core_props - GPU core props info 467 * @product_id: Pro specific value. 468 * @version_status: Status of the GPU release. No defined values, but starts at 469 * 0 and increases by one for each release status (alpha, beta, EAC, etc.). 470 * 4 bit values (0-15). 471 * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn" 472 * release number. 473 * 8 bit values (0-255). 474 * @major_revision: Major release number of the GPU. "R" part of an "RnPn" 475 * release number. 476 * 4 bit values (0-15). 477 * @padding: padding to allign to 8-byte 478 * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by 479 * clGetDeviceInfo() 480 * @log2_program_counter_size: Size of the shader program counter, in bits. 481 * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This 482 * is a bitpattern where a set bit indicates that the format is supported. 483 * Before using a texture format, it is recommended that the corresponding 484 * bit be checked. 485 * @gpu_available_memory_size: Theoretical maximum memory available to the GPU. 486 * It is unlikely that a client will be able to allocate all of this memory 487 * for their own purposes, but this at least provides an upper bound on the 488 * memory available to the GPU. 489 * This is required for OpenCL's clGetDeviceInfo() call when 490 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The 491 * client will not be expecting to allocate anywhere near this value. 492 * @num_exec_engines: The number of execution engines. 493 */ 494 struct mali_base_gpu_core_props { 495 __u32 product_id; 496 __u16 version_status; 497 __u16 minor_revision; 498 __u16 major_revision; 499 __u16 padding; 500 __u32 gpu_freq_khz_max; 501 __u32 log2_program_counter_size; 502 __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; 503 __u64 gpu_available_memory_size; 504 __u8 num_exec_engines; 505 }; 506 507 /* 508 * More information is possible - but associativity and bus width are not 509 * required by upper-level apis. 510 */ 511 struct mali_base_gpu_l2_cache_props { 512 __u8 log2_line_size; 513 __u8 log2_cache_size; 514 __u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ 515 __u8 padding[5]; 516 }; 517 518 struct mali_base_gpu_tiler_props { 519 __u32 bin_size_bytes; /* Max is 4*2^15 */ 520 __u32 max_active_levels; /* Max is 2^15 */ 521 }; 522 523 /** 524 * struct mali_base_gpu_thread_props - GPU threading system details. 525 * @max_threads: Max. number of threads per core 526 * @max_workgroup_size: Max. number of threads per workgroup 527 * @max_barrier_size: Max. number of threads that can synchronize on a 528 * simple barrier 529 * @max_registers: Total size [1..65535] of the register file available 530 * per core. 531 * @max_task_queue: Max. tasks [1..255] which may be sent to a core 532 * before it becomes blocked. 533 * @max_thread_group_split: Max. allowed value [1..15] of the Thread Group Split 534 * field. 535 * @impl_tech: 0 = Not specified, 1 = Silicon, 2 = FPGA, 536 * 3 = SW Model/Emulation 537 * @padding: padding to allign to 8-byte 538 * @tls_alloc: Number of threads per core that TLS must be 539 * allocated for 540 */ 541 struct mali_base_gpu_thread_props { 542 __u32 max_threads; 543 __u32 max_workgroup_size; 544 __u32 max_barrier_size; 545 __u16 max_registers; 546 __u8 max_task_queue; 547 __u8 max_thread_group_split; 548 __u8 impl_tech; 549 __u8 padding[3]; 550 __u32 tls_alloc; 551 }; 552 553 /** 554 * struct mali_base_gpu_coherent_group - descriptor for a coherent group 555 * @core_mask: Core restriction mask required for the group 556 * @num_cores: Number of cores in the group 557 * @padding: padding to allign to 8-byte 558 * 559 * \c core_mask exposes all cores in that coherent group, and \c num_cores 560 * provides a cached population-count for that mask. 561 * 562 * @note Whilst all cores are exposed in the mask, not all may be available to 563 * the application, depending on the Kernel Power policy. 564 * 565 * @note if u64s must be 8-byte aligned, then this structure has 32-bits of 566 * wastage. 567 */ 568 struct mali_base_gpu_coherent_group { 569 __u64 core_mask; 570 __u16 num_cores; 571 __u16 padding[3]; 572 }; 573 574 /** 575 * struct mali_base_gpu_coherent_group_info - Coherency group information 576 * @num_groups: Number of coherent groups in the GPU. 577 * @num_core_groups: Number of core groups (coherent or not) in the GPU. 578 * Equivalent to the number of L2 Caches. 579 * The GPU Counter dumping writes 2048 bytes per core group, regardless 580 * of whether the core groups are coherent or not. Hence this member is 581 * needed to calculate how much memory is required for dumping. 582 * @note Do not use it to work out how many valid elements are in the 583 * group[] member. Use num_groups instead. 584 * @coherency: Coherency features of the memory, accessed by gpu_mem_features 585 * methods 586 * @padding: padding to allign to 8-byte 587 * @group: Descriptors of coherent groups 588 * 589 * Note that the sizes of the members could be reduced. However, the \c group 590 * member might be 8-byte aligned to ensure the __u64 core_mask is 8-byte 591 * aligned, thus leading to wastage if the other members sizes were reduced. 592 * 593 * The groups are sorted by core mask. The core masks are non-repeating and do 594 * not intersect. 595 */ 596 struct mali_base_gpu_coherent_group_info { 597 __u32 num_groups; 598 __u32 num_core_groups; 599 __u32 coherency; 600 __u32 padding; 601 struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; 602 }; 603 604 /** 605 * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware 606 * Configuration Discovery registers. 607 * @shader_present: Shader core present bitmap 608 * @tiler_present: Tiler core present bitmap 609 * @l2_present: Level 2 cache present bitmap 610 * @stack_present: Core stack present bitmap 611 * @l2_features: L2 features 612 * @core_features: Core features 613 * @mem_features: Mem features 614 * @mmu_features: Mmu features 615 * @as_present: Bitmap of address spaces present 616 * @js_present: Job slots present 617 * @js_features: Array of job slot features. 618 * @tiler_features: Tiler features 619 * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU 620 * @gpu_id: GPU and revision identifier 621 * @thread_max_threads: Maximum number of threads per core 622 * @thread_max_workgroup_size: Maximum number of threads per workgroup 623 * @thread_max_barrier_size: Maximum number of threads per barrier 624 * @thread_features: Thread features 625 * @coherency_mode: Note: This is the _selected_ coherency mode rather than the 626 * available modes as exposed in the coherency_features register 627 * @thread_tls_alloc: Number of threads per core that TLS must be allocated for 628 * @gpu_features: GPU features 629 * 630 * The information is presented inefficiently for access. For frequent access, 631 * the values should be better expressed in an unpacked form in the 632 * base_gpu_props structure. 633 * 634 * The raw properties in gpu_raw_gpu_props are necessary to 635 * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device 636 * behaving differently?". In this case, all information about the 637 * configuration is potentially useful, but it does not need to be processed 638 * by the driver. Instead, the raw registers can be processed by the Mali 639 * Tools software on the host PC. 640 * 641 */ 642 struct gpu_raw_gpu_props { 643 __u64 shader_present; 644 __u64 tiler_present; 645 __u64 l2_present; 646 __u64 stack_present; 647 __u32 l2_features; 648 __u32 core_features; 649 __u32 mem_features; 650 __u32 mmu_features; 651 652 __u32 as_present; 653 654 __u32 js_present; 655 __u32 js_features[GPU_MAX_JOB_SLOTS]; 656 __u32 tiler_features; 657 __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; 658 659 __u32 gpu_id; 660 661 __u32 thread_max_threads; 662 __u32 thread_max_workgroup_size; 663 __u32 thread_max_barrier_size; 664 __u32 thread_features; 665 666 /* 667 * Note: This is the _selected_ coherency mode rather than the 668 * available modes as exposed in the coherency_features register. 669 */ 670 __u32 coherency_mode; 671 672 __u32 thread_tls_alloc; 673 __u64 gpu_features; 674 }; 675 676 /** 677 * struct base_gpu_props - Return structure for base_get_gpu_props(). 678 * @core_props: Core props. 679 * @l2_props: L2 props. 680 * @unused_1: Keep for backwards compatibility. 681 * @tiler_props: Tiler props. 682 * @thread_props: Thread props. 683 * @raw_props: This member is large, likely to be 128 bytes. 684 * @coherency_info: This must be last member of the structure. 685 * 686 * NOTE: the raw_props member in this data structure contains the register 687 * values from which the value of the other members are derived. The derived 688 * members exist to allow for efficient access and/or shielding the details 689 * of the layout of the registers. 690 */ 691 struct base_gpu_props { 692 struct mali_base_gpu_core_props core_props; 693 struct mali_base_gpu_l2_cache_props l2_props; 694 __u64 unused_1; 695 struct mali_base_gpu_tiler_props tiler_props; 696 struct mali_base_gpu_thread_props thread_props; 697 struct gpu_raw_gpu_props raw_props; 698 struct mali_base_gpu_coherent_group_info coherency_info; 699 }; 700 701 #if MALI_USE_CSF 702 #include "csf/mali_base_csf_kernel.h" 703 #else 704 #include "jm/mali_base_jm_kernel.h" 705 #endif 706 707 #define BASE_MEM_GROUP_ID_GET(flags) \ 708 ((flags & BASE_MEM_GROUP_ID_MASK) >> BASEP_MEM_GROUP_ID_SHIFT) 709 710 #define BASE_MEM_GROUP_ID_SET(id) \ 711 (((base_mem_alloc_flags)((id < 0 || id >= BASE_MEM_GROUP_COUNT) ? \ 712 BASE_MEM_GROUP_DEFAULT : \ 713 id) \ 714 << BASEP_MEM_GROUP_ID_SHIFT) & \ 715 BASE_MEM_GROUP_ID_MASK) 716 717 #define BASE_CONTEXT_MMU_GROUP_ID_SET(group_id) \ 718 (BASEP_CONTEXT_MMU_GROUP_ID_MASK & \ 719 ((base_context_create_flags)(group_id) \ 720 << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)) 721 722 #define BASE_CONTEXT_MMU_GROUP_ID_GET(flags) \ 723 ((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >> \ 724 BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) 725 726 /* 727 * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These 728 * flags are also used, where applicable, for specifying which fields 729 * are valid following the request operation. 730 */ 731 732 /* For monotonic (counter) timefield */ 733 #define BASE_TIMEINFO_MONOTONIC_FLAG (1UL << 0) 734 /* For system wide timestamp */ 735 #define BASE_TIMEINFO_TIMESTAMP_FLAG (1UL << 1) 736 /* For GPU cycle counter */ 737 #define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1UL << 2) 738 /* Specify kernel GPU register timestamp */ 739 #define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1UL << 30) 740 /* Specify userspace cntvct_el0 timestamp source */ 741 #define BASE_TIMEINFO_USER_SOURCE_FLAG (1UL << 31) 742 743 #define BASE_TIMEREQUEST_ALLOWED_FLAGS (\ 744 BASE_TIMEINFO_MONOTONIC_FLAG | \ 745 BASE_TIMEINFO_TIMESTAMP_FLAG | \ 746 BASE_TIMEINFO_CYCLE_COUNTER_FLAG | \ 747 BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \ 748 BASE_TIMEINFO_USER_SOURCE_FLAG) 749 750 /* Maximum number of source allocations allowed to create an alias allocation. 751 * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array 752 * layers, since each cube map in the array will have 6 faces. 753 */ 754 #define BASE_MEM_ALIAS_MAX_ENTS ((size_t)24576) 755 756 #endif /* _UAPI_BASE_KERNEL_H_ */ 757