• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 /*
3  *
4  * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 /*
23  * Base structures shared with the kernel.
24  */
25 
26 #ifndef _UAPI_BASE_KERNEL_H_
27 #define _UAPI_BASE_KERNEL_H_
28 
29 #include <linux/types.h>
30 
31 struct base_mem_handle {
32 	struct {
33 		__u64 handle;
34 	} basep;
35 };
36 
37 #include "mali_base_mem_priv.h"
38 #include "gpu/mali_kbase_gpu_id.h"
39 #include "gpu/mali_kbase_gpu_coherency.h"
40 
41 #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
42 
43 #define BASE_MAX_COHERENT_GROUPS 16
44 
45 #if defined(PAGE_MASK) && defined(PAGE_SHIFT)
46 #define LOCAL_PAGE_SHIFT PAGE_SHIFT
47 #define LOCAL_PAGE_LSB ~PAGE_MASK
48 #else
49 #ifndef OSU_CONFIG_CPU_PAGE_SIZE_LOG2
50 #define OSU_CONFIG_CPU_PAGE_SIZE_LOG2 12
51 #endif
52 
53 #if defined(OSU_CONFIG_CPU_PAGE_SIZE_LOG2)
54 #define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2
55 #define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
56 #else
57 #error Failed to find page size
58 #endif
59 #endif
60 
61 /* Physical memory group ID for normal usage.
62  */
63 #define BASE_MEM_GROUP_DEFAULT (0)
64 
65 /* Number of physical memory groups.
66  */
67 #define BASE_MEM_GROUP_COUNT (16)
68 
69 /**
70  * typedef base_mem_alloc_flags - Memory allocation, access/hint flags.
71  *
72  * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
73  * in order to determine the best cache policy. Some combinations are
74  * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD),
75  * which defines a write-only region on the CPU side, which is
76  * heavily read by the CPU...
77  * Other flags are only meaningful to a particular allocator.
78  * More flags can be added to this list, as long as they don't clash
79  * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
80  */
81 typedef __u32 base_mem_alloc_flags;
82 
83 /* A mask for all the flags which are modifiable via the base_mem_set_flags
84  * interface.
85  */
86 #define BASE_MEM_FLAGS_MODIFIABLE \
87 	(BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \
88 	 BASE_MEM_COHERENT_LOCAL)
89 
90 /* A mask of all the flags that can be returned via the base_mem_get_flags()
91  * interface.
92  */
93 #define BASE_MEM_FLAGS_QUERYABLE \
94 	(BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \
95 		BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \
96 		BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \
97 		BASEP_MEM_FLAGS_KERNEL_ONLY))
98 
99 /**
100  * enum base_mem_import_type - Memory types supported by @a base_mem_import
101  *
102  * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type
103  * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int)
104  * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a
105  * base_mem_import_user_buffer
106  *
107  * Each type defines what the supported handle type is.
108  *
109  * If any new type is added here ARM must be contacted
110  * to allocate a numeric value for it.
111  * Do not just add a new type without synchronizing with ARM
112  * as future releases from ARM might include other new types
113  * which could clash with your custom types.
114  */
115 enum base_mem_import_type {
116 	BASE_MEM_IMPORT_TYPE_INVALID = 0,
117 	/*
118 	 * Import type with value 1 is deprecated.
119 	 */
120 	BASE_MEM_IMPORT_TYPE_UMM = 2,
121 	BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3
122 };
123 
124 /**
125  * struct base_mem_import_user_buffer - Handle of an imported user buffer
126  *
127  * @ptr:	address of imported user buffer
128  * @length:	length of imported user buffer in bytes
129  *
130  * This structure is used to represent a handle of an imported user buffer.
131  */
132 
133 struct base_mem_import_user_buffer {
134 	__u64 ptr;
135 	__u64 length;
136 };
137 
138 /* Mask to detect 4GB boundary alignment */
139 #define BASE_MEM_MASK_4GB  0xfffff000UL
140 /* Mask to detect 4GB boundary (in page units) alignment */
141 #define BASE_MEM_PFN_MASK_4GB  (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT)
142 
143 /* Limit on the 'extension' parameter for an allocation with the
144  * BASE_MEM_TILER_ALIGN_TOP flag set
145  *
146  * This is the same as the maximum limit for a Buffer Descriptor's chunk size
147  */
148 #define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2                      \
149 	(21u - (LOCAL_PAGE_SHIFT))
150 #define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES                           \
151 	(1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2))
152 
153 /* Bit mask of cookies used for for memory allocation setup */
154 #define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
155 
156 /* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */
157 #define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */
158 
159 /*
160  * struct base_fence - Cross-device synchronisation fence.
161  *
162  * A fence is used to signal when the GPU has finished accessing a resource that
163  * may be shared with other devices, and also to delay work done asynchronously
164  * by the GPU until other devices have finished accessing a shared resource.
165  */
166 struct base_fence {
167 	struct {
168 		int fd;
169 		int stream_fd;
170 	} basep;
171 };
172 
173 /**
174  * struct base_mem_aliasing_info - Memory aliasing info
175  *
176  * Describes a memory handle to be aliased.
177  * A subset of the handle can be chosen for aliasing, given an offset and a
178  * length.
179  * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
180  * region where a special page is mapped with a write-alloc cache setup,
181  * typically used when the write result of the GPU isn't needed, but the GPU
182  * must write anyway.
183  *
184  * Offset and length are specified in pages.
185  * Offset must be within the size of the handle.
186  * Offset+length must not overrun the size of the handle.
187  *
188  * @handle: Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
189  * @offset: Offset within the handle to start aliasing from, in pages.
190  *          Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
191  * @length: Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
192  *          specifies the number of times the special page is needed.
193  */
194 struct base_mem_aliasing_info {
195 	struct base_mem_handle handle;
196 	__u64 offset;
197 	__u64 length;
198 };
199 
200 /* Maximum percentage of just-in-time memory allocation trimming to perform
201  * on free.
202  */
203 #define BASE_JIT_MAX_TRIM_LEVEL (100)
204 
205 /* Maximum number of concurrent just-in-time memory allocations.
206  */
207 #define BASE_JIT_ALLOC_COUNT (255)
208 
209 /* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5
210  *
211  * jit_version is 1
212  *
213  * Due to the lack of padding specified, user clients between 32 and 64-bit
214  * may have assumed a different size of the struct
215  *
216  * An array of structures was not supported
217  */
218 struct base_jit_alloc_info_10_2 {
219 	__u64 gpu_alloc_addr;
220 	__u64 va_pages;
221 	__u64 commit_pages;
222 	__u64 extension;
223 	__u8 id;
224 };
225 
226 /* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up
227  * to 11.19
228  *
229  * This structure had a number of modifications during and after kernel driver
230  * version 11.5, but remains size-compatible throughout its version history, and
231  * with earlier variants compatible with future variants by requiring
232  * zero-initialization to the unused space in the structure.
233  *
234  * jit_version is 2
235  *
236  * Kernel driver version history:
237  * 11.5: Initial introduction with 'usage_id' and padding[5]. All padding bytes
238  *       must be zero. Kbase minor version was not incremented, so some
239  *       versions of 11.5 do not have this change.
240  * 11.5: Added 'bin_id' and 'max_allocations', replacing 2 padding bytes (Kbase
241  *       minor version not incremented)
242  * 11.6: Added 'flags', replacing 1 padding byte
243  * 11.10: Arrays of this structure are supported
244  */
245 struct base_jit_alloc_info_11_5 {
246 	__u64 gpu_alloc_addr;
247 	__u64 va_pages;
248 	__u64 commit_pages;
249 	__u64 extension;
250 	__u8 id;
251 	__u8 bin_id;
252 	__u8 max_allocations;
253 	__u8 flags;
254 	__u8 padding[2];
255 	__u16 usage_id;
256 };
257 
258 /**
259  * struct base_jit_alloc_info - Structure which describes a JIT allocation
260  *                              request.
261  * @gpu_alloc_addr:             The GPU virtual address to write the JIT
262  *                              allocated GPU virtual address to.
263  * @va_pages:                   The minimum number of virtual pages required.
264  * @commit_pages:               The minimum number of physical pages which
265  *                              should back the allocation.
266  * @extension:                     Granularity of physical pages to grow the
267  *                              allocation by during a fault.
268  * @id:                         Unique ID provided by the caller, this is used
269  *                              to pair allocation and free requests.
270  *                              Zero is not a valid value.
271  * @bin_id:                     The JIT allocation bin, used in conjunction with
272  *                              @max_allocations to limit the number of each
273  *                              type of JIT allocation.
274  * @max_allocations:            The maximum number of allocations allowed within
275  *                              the bin specified by @bin_id. Should be the same
276  *                              for all allocations within the same bin.
277  * @flags:                      flags specifying the special requirements for
278  *                              the JIT allocation, see
279  *                              %BASE_JIT_ALLOC_VALID_FLAGS
280  * @padding:                    Expansion space - should be initialised to zero
281  * @usage_id:                   A hint about which allocation should be reused.
282  *                              The kernel should attempt to use a previous
283  *                              allocation with the same usage_id
284  * @heap_info_gpu_addr:         Pointer to an object in GPU memory describing
285  *                              the actual usage of the region.
286  *
287  * jit_version is 3.
288  *
289  * When modifications are made to this structure, it is still compatible with
290  * jit_version 3 when: a) the size is unchanged, and b) new members only
291  * replace the padding bytes.
292  *
293  * Previous jit_version history:
294  * jit_version == 1, refer to &base_jit_alloc_info_10_2
295  * jit_version == 2, refer to &base_jit_alloc_info_11_5
296  *
297  * Kbase version history:
298  * 11.20: added @heap_info_gpu_addr
299  */
300 struct base_jit_alloc_info {
301 	__u64 gpu_alloc_addr;
302 	__u64 va_pages;
303 	__u64 commit_pages;
304 	__u64 extension;
305 	__u8 id;
306 	__u8 bin_id;
307 	__u8 max_allocations;
308 	__u8 flags;
309 	__u8 padding[2];
310 	__u16 usage_id;
311 	__u64 heap_info_gpu_addr;
312 };
313 
314 enum base_external_resource_access {
315 	BASE_EXT_RES_ACCESS_SHARED,
316 	BASE_EXT_RES_ACCESS_EXCLUSIVE
317 };
318 
319 struct base_external_resource {
320 	__u64 ext_resource;
321 };
322 
323 
324 /**
325  * The maximum number of external resources which can be mapped/unmapped
326  * in a single request.
327  */
328 #define BASE_EXT_RES_COUNT_MAX 10
329 
330 /**
331  * struct base_external_resource_list - Structure which describes a list of
332  *                                      external resources.
333  * @count:                              The number of resources.
334  * @ext_res:                            Array of external resources which is
335  *                                      sized at allocation time.
336  */
337 struct base_external_resource_list {
338 	__u64 count;
339 	struct base_external_resource ext_res[1];
340 };
341 
342 struct base_jd_debug_copy_buffer {
343 	__u64 address;
344 	__u64 size;
345 	struct base_external_resource extres;
346 };
347 
348 #define GPU_MAX_JOB_SLOTS 16
349 
350 /**
351  * User-side Base GPU Property Queries
352  *
353  * The User-side Base GPU Property Query interface encapsulates two
354  * sub-modules:
355  *
356  * - "Dynamic GPU Properties"
357  * - "Base Platform Config GPU Properties"
358  *
359  * Base only deals with properties that vary between different GPU
360  * implementations - the Dynamic GPU properties and the Platform Config
361  * properties.
362  *
363  * For properties that are constant for the GPU Architecture, refer to the
364  * GPU module. However, we will discuss their relevance here just to
365  * provide background information.
366  *
367  * About the GPU Properties in Base and GPU modules
368  *
369  * The compile-time properties (Platform Config, GPU Compile-time
370  * properties) are exposed as pre-processor macros.
371  *
372  * Complementing the compile-time properties are the Dynamic GPU
373  * Properties, which act as a conduit for the GPU Configuration
374  * Discovery.
375  *
376  * In general, the dynamic properties are present to verify that the platform
377  * has been configured correctly with the right set of Platform Config
378  * Compile-time Properties.
379  *
380  * As a consistent guide across the entire DDK, the choice for dynamic or
381  * compile-time should consider the following, in order:
382  * 1. Can the code be written so that it doesn't need to know the
383  * implementation limits at all?
384  * 2. If you need the limits, get the information from the Dynamic Property
385  * lookup. This should be done once as you fetch the context, and then cached
386  * as part of the context data structure, so it's cheap to access.
387  * 3. If there's a clear and arguable inefficiency in using Dynamic Properties,
388  * then use a Compile-Time Property (Platform Config, or GPU Compile-time
389  * property). Examples of where this might be sensible follow:
390  *  - Part of a critical inner-loop
391  *  - Frequent re-use throughout the driver, causing significant extra load
392  * instructions or control flow that would be worthwhile optimizing out.
393  *
394  * We cannot provide an exhaustive set of examples, neither can we provide a
395  * rule for every possible situation. Use common sense, and think about: what
396  * the rest of the driver will be doing; how the compiler might represent the
397  * value if it is a compile-time constant; whether an OEM shipping multiple
398  * devices would benefit much more from a single DDK binary, instead of
399  * insignificant micro-optimizations.
400  *
401  * Dynamic GPU Properties
402  *
403  * Dynamic GPU properties are presented in two sets:
404  * 1. the commonly used properties in @ref base_gpu_props, which have been
405  * unpacked from GPU register bitfields.
406  * 2. The full set of raw, unprocessed properties in gpu_raw_gpu_props
407  * (also a member of base_gpu_props). All of these are presented in
408  * the packed form, as presented by the GPU  registers themselves.
409  *
410  * The raw properties in gpu_raw_gpu_props are necessary to
411  * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
412  * behaving differently?". In this case, all information about the
413  * configuration is potentially useful, but it does not need to be processed
414  * by the driver. Instead, the raw registers can be processed by the Mali
415  * Tools software on the host PC.
416  *
417  * The properties returned extend the GPU Configuration Discovery
418  * registers. For example, GPU clock speed is not specified in the GPU
419  * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function.
420  *
421  * The GPU properties are obtained by a call to
422  * base_get_gpu_props(). This simply returns a pointer to a const
423  * base_gpu_props structure. It is constant for the life of a base
424  * context. Multiple calls to base_get_gpu_props() to a base context
425  * return the same pointer to a constant structure. This avoids cache pollution
426  * of the common data.
427  *
428  * This pointer must not be freed, because it does not point to the start of a
429  * region allocated by the memory allocator; instead, just close the @ref
430  * base_context.
431  *
432  *
433  * Kernel Operation
434  *
435  * During Base Context Create time, user-side makes a single kernel call:
436  * - A call to fill user memory with GPU information structures
437  *
438  * The kernel-side will fill the provided the entire processed base_gpu_props
439  * structure, because this information is required in both
440  * user and kernel side; it does not make sense to decode it twice.
441  *
442  * Coherency groups must be derived from the bitmasks, but this can be done
443  * kernel side, and just once at kernel startup: Coherency groups must already
444  * be known kernel-side, to support chains that specify a 'Only Coherent Group'
445  * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
446  *
447  * Coherency Group calculation
448  *
449  * Creation of the coherent group data is done at device-driver startup, and so
450  * is one-time. This will most likely involve a loop with CLZ, shifting, and
451  * bit clearing on the L2_PRESENT mask, depending on whether the
452  * system is L2 Coherent. The number of shader cores is done by a
453  * population count, since faulty cores may be disabled during production,
454  * producing a non-contiguous mask.
455  *
456  * The memory requirements for this algorithm can be determined either by a __u64
457  * population count on the L2_PRESENT mask (a LUT helper already is
458  * required for the above), or simple assumption that there can be no more than
459  * 16 coherent groups, since core groups are typically 4 cores.
460  */
461 
462 #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4
463 
464 #define BASE_MAX_COHERENT_GROUPS 16
465 /**
466  * struct mali_base_gpu_core_props - GPU core props info
467  * @product_id: Pro specific value.
468  * @version_status: Status of the GPU release. No defined values, but starts at
469  * 	0 and increases by one for each release status (alpha, beta, EAC, etc.).
470  * 	4 bit values (0-15).
471  * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn"
472  * 	release number.
473  * 	8 bit values (0-255).
474  * @major_revision: Major release number of the GPU. "R" part of an "RnPn"
475  * 	release number.
476  * 	4 bit values (0-15).
477  * @padding: padding to allign to 8-byte
478  * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by
479  * 	clGetDeviceInfo()
480  * @log2_program_counter_size: Size of the shader program counter, in bits.
481  * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This
482  * 	is a bitpattern where a set bit indicates that the format is supported.
483  * 	Before using a texture format, it is recommended that the corresponding
484  * 	bit be checked.
485  * @gpu_available_memory_size: Theoretical maximum memory available to the GPU.
486  * 	It is unlikely that a client will be able to allocate all of this memory
487  * 	for their own purposes, but this at least provides an upper bound on the
488  * 	memory available to the GPU.
489  * 	This is required for OpenCL's clGetDeviceInfo() call when
490  * 	CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
491  * 	client will not be expecting to allocate anywhere near this value.
492  * @num_exec_engines: The number of execution engines.
493  */
494 struct mali_base_gpu_core_props {
495 	__u32 product_id;
496 	__u16 version_status;
497 	__u16 minor_revision;
498 	__u16 major_revision;
499 	__u16 padding;
500 	__u32 gpu_freq_khz_max;
501 	__u32 log2_program_counter_size;
502 	__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
503 	__u64 gpu_available_memory_size;
504 	__u8 num_exec_engines;
505 };
506 
507 /*
508  * More information is possible - but associativity and bus width are not
509  * required by upper-level apis.
510  */
511 struct mali_base_gpu_l2_cache_props {
512 	__u8 log2_line_size;
513 	__u8 log2_cache_size;
514 	__u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
515 	__u8 padding[5];
516 };
517 
518 struct mali_base_gpu_tiler_props {
519 	__u32 bin_size_bytes;	/* Max is 4*2^15 */
520 	__u32 max_active_levels;	/* Max is 2^15 */
521 };
522 
523 /**
524  * struct mali_base_gpu_thread_props - GPU threading system details.
525  * @max_threads: Max. number of threads per core
526  * @max_workgroup_size:     Max. number of threads per workgroup
527  * @max_barrier_size:       Max. number of threads that can synchronize on a
528  *                          simple barrier
529  * @max_registers:          Total size [1..65535] of the register file available
530  *                          per core.
531  * @max_task_queue:         Max. tasks [1..255] which may be sent to a core
532  *                          before it becomes blocked.
533  * @max_thread_group_split: Max. allowed value [1..15] of the Thread Group Split
534  *                          field.
535  * @impl_tech:              0 = Not specified, 1 = Silicon, 2 = FPGA,
536  *                          3 = SW Model/Emulation
537  * @padding:                padding to allign to 8-byte
538  * @tls_alloc:              Number of threads per core that TLS must be
539  *                          allocated for
540  */
541 struct mali_base_gpu_thread_props {
542 	__u32 max_threads;
543 	__u32 max_workgroup_size;
544 	__u32 max_barrier_size;
545 	__u16 max_registers;
546 	__u8 max_task_queue;
547 	__u8 max_thread_group_split;
548 	__u8 impl_tech;
549 	__u8  padding[3];
550 	__u32 tls_alloc;
551 };
552 
553 /**
554  * struct mali_base_gpu_coherent_group - descriptor for a coherent group
555  * @core_mask: Core restriction mask required for the group
556  * @num_cores: Number of cores in the group
557  * @padding:   padding to allign to 8-byte
558  *
559  * \c core_mask exposes all cores in that coherent group, and \c num_cores
560  * 	provides a cached population-count for that mask.
561  *
562  * @note Whilst all cores are exposed in the mask, not all may be available to
563  * 	the application, depending on the Kernel Power policy.
564  *
565  * @note if u64s must be 8-byte aligned, then this structure has 32-bits of
566  * 	wastage.
567  */
568 struct mali_base_gpu_coherent_group {
569 	__u64 core_mask;
570 	__u16 num_cores;
571 	__u16 padding[3];
572 };
573 
574 /**
575  * struct mali_base_gpu_coherent_group_info - Coherency group information
576  * @num_groups: Number of coherent groups in the GPU.
577  * @num_core_groups: Number of core groups (coherent or not) in the GPU.
578  * 	Equivalent to the number of L2 Caches.
579  * 	  The GPU Counter dumping writes 2048 bytes per core group, regardless
580  * 	of whether the core groups are coherent or not. Hence this member is
581  * 	needed to calculate how much memory is required for dumping.
582  * 	  @note Do not use it to work out how many valid elements are in the
583  * 	group[] member. Use num_groups instead.
584  * @coherency: Coherency features of the memory, accessed by gpu_mem_features
585  * 	methods
586  * @padding: padding to allign to 8-byte
587  * @group: Descriptors of coherent groups
588  *
589  * Note that the sizes of the members could be reduced. However, the \c group
590  * member might be 8-byte aligned to ensure the __u64 core_mask is 8-byte
591  * aligned, thus leading to wastage if the other members sizes were reduced.
592  *
593  * The groups are sorted by core mask. The core masks are non-repeating and do
594  * not intersect.
595  */
596 struct mali_base_gpu_coherent_group_info {
597 	__u32 num_groups;
598 	__u32 num_core_groups;
599 	__u32 coherency;
600 	__u32 padding;
601 	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
602 };
603 
604 /**
605  * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware
606  *                            Configuration Discovery registers.
607  * @shader_present: Shader core present bitmap
608  * @tiler_present: Tiler core present bitmap
609  * @l2_present: Level 2 cache present bitmap
610  * @stack_present: Core stack present bitmap
611  * @l2_features: L2 features
612  * @core_features: Core features
613  * @mem_features: Mem features
614  * @mmu_features: Mmu features
615  * @as_present: Bitmap of address spaces present
616  * @js_present: Job slots present
617  * @js_features: Array of job slot features.
618  * @tiler_features: Tiler features
619  * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU
620  * @gpu_id: GPU and revision identifier
621  * @thread_max_threads: Maximum number of threads per core
622  * @thread_max_workgroup_size: Maximum number of threads per workgroup
623  * @thread_max_barrier_size: Maximum number of threads per barrier
624  * @thread_features: Thread features
625  * @coherency_mode: Note: This is the _selected_ coherency mode rather than the
626  *                  available modes as exposed in the coherency_features register
627  * @thread_tls_alloc: Number of threads per core that TLS must be allocated for
628  * @gpu_features: GPU features
629  *
630  * The information is presented inefficiently for access. For frequent access,
631  * the values should be better expressed in an unpacked form in the
632  * base_gpu_props structure.
633  *
634  * The raw properties in gpu_raw_gpu_props are necessary to
635  * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
636  * behaving differently?". In this case, all information about the
637  * configuration is potentially useful, but it does not need to be processed
638  * by the driver. Instead, the raw registers can be processed by the Mali
639  * Tools software on the host PC.
640  *
641  */
642 struct gpu_raw_gpu_props {
643 	__u64 shader_present;
644 	__u64 tiler_present;
645 	__u64 l2_present;
646 	__u64 stack_present;
647 	__u32 l2_features;
648 	__u32 core_features;
649 	__u32 mem_features;
650 	__u32 mmu_features;
651 
652 	__u32 as_present;
653 
654 	__u32 js_present;
655 	__u32 js_features[GPU_MAX_JOB_SLOTS];
656 	__u32 tiler_features;
657 	__u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
658 
659 	__u32 gpu_id;
660 
661 	__u32 thread_max_threads;
662 	__u32 thread_max_workgroup_size;
663 	__u32 thread_max_barrier_size;
664 	__u32 thread_features;
665 
666 	/*
667 	 * Note: This is the _selected_ coherency mode rather than the
668 	 * available modes as exposed in the coherency_features register.
669 	 */
670 	__u32 coherency_mode;
671 
672 	__u32 thread_tls_alloc;
673 	__u64 gpu_features;
674 };
675 
676 /**
677  * struct base_gpu_props - Return structure for base_get_gpu_props().
678  * @core_props:     Core props.
679  * @l2_props:       L2 props.
680  * @unused_1:       Keep for backwards compatibility.
681  * @tiler_props:    Tiler props.
682  * @thread_props:   Thread props.
683  * @raw_props:      This member is large, likely to be 128 bytes.
684  * @coherency_info: This must be last member of the structure.
685  *
686  * NOTE: the raw_props member in this data structure contains the register
687  * values from which the value of the other members are derived. The derived
688  * members exist to allow for efficient access and/or shielding the details
689  * of the layout of the registers.
690  */
691 struct base_gpu_props {
692 	struct mali_base_gpu_core_props core_props;
693 	struct mali_base_gpu_l2_cache_props l2_props;
694 	__u64 unused_1;
695 	struct mali_base_gpu_tiler_props tiler_props;
696 	struct mali_base_gpu_thread_props thread_props;
697 	struct gpu_raw_gpu_props raw_props;
698 	struct mali_base_gpu_coherent_group_info coherency_info;
699 };
700 
701 #if MALI_USE_CSF
702 #include "csf/mali_base_csf_kernel.h"
703 #else
704 #include "jm/mali_base_jm_kernel.h"
705 #endif
706 
707 #define BASE_MEM_GROUP_ID_GET(flags)                                           \
708 	((flags & BASE_MEM_GROUP_ID_MASK) >> BASEP_MEM_GROUP_ID_SHIFT)
709 
710 #define BASE_MEM_GROUP_ID_SET(id)                                              \
711 	(((base_mem_alloc_flags)((id < 0 || id >= BASE_MEM_GROUP_COUNT) ?      \
712 					 BASE_MEM_GROUP_DEFAULT :              \
713 					 id)                                   \
714 	  << BASEP_MEM_GROUP_ID_SHIFT) &                                       \
715 	 BASE_MEM_GROUP_ID_MASK)
716 
717 #define BASE_CONTEXT_MMU_GROUP_ID_SET(group_id)                                \
718 	(BASEP_CONTEXT_MMU_GROUP_ID_MASK &                                     \
719 	 ((base_context_create_flags)(group_id)                                \
720 	  << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT))
721 
722 #define BASE_CONTEXT_MMU_GROUP_ID_GET(flags)                                   \
723 	((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >>                          \
724 	 BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
725 
726 /*
727  * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These
728  * flags are also used, where applicable, for specifying which fields
729  * are valid following the request operation.
730  */
731 
732 /* For monotonic (counter) timefield */
733 #define BASE_TIMEINFO_MONOTONIC_FLAG (1UL << 0)
734 /* For system wide timestamp */
735 #define BASE_TIMEINFO_TIMESTAMP_FLAG (1UL << 1)
736 /* For GPU cycle counter */
737 #define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1UL << 2)
738 /* Specify kernel GPU register timestamp */
739 #define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1UL << 30)
740 /* Specify userspace cntvct_el0 timestamp source */
741 #define BASE_TIMEINFO_USER_SOURCE_FLAG (1UL << 31)
742 
743 #define BASE_TIMEREQUEST_ALLOWED_FLAGS (\
744 		BASE_TIMEINFO_MONOTONIC_FLAG | \
745 		BASE_TIMEINFO_TIMESTAMP_FLAG | \
746 		BASE_TIMEINFO_CYCLE_COUNTER_FLAG | \
747 		BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \
748 		BASE_TIMEINFO_USER_SOURCE_FLAG)
749 
750 /* Maximum number of source allocations allowed to create an alias allocation.
751  * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array
752  * layers, since each cube map in the array will have 6 faces.
753  */
754 #define BASE_MEM_ALIAS_MAX_ENTS ((size_t)24576)
755 
756 #endif /* _UAPI_BASE_KERNEL_H_ */
757