1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 /*
3 *
4 * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
5 *
6 * This program is free software and is provided to you under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation, and any use by you of this program is subject to the terms
9 * of such GNU license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 */
21
22 #ifndef _KBASE_HWCNT_GPU_H_
23 #define _KBASE_HWCNT_GPU_H_
24
25 #include <linux/types.h>
26
27 struct kbase_device;
28 struct kbase_hwcnt_metadata;
29 struct kbase_hwcnt_enable_map;
30 struct kbase_hwcnt_dump_buffer;
31
32 /* Hardware counter version 5 definitions, V5 is the only supported version. */
33 #define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
34 #define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
35 #define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60
36 #define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \
37 (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + \
38 KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK)
39
40 /* FrontEnd block count in V5 GPU hardware counter. */
41 #define KBASE_HWCNT_V5_FE_BLOCK_COUNT 1
42 /* Tiler block count in V5 GPU hardware counter. */
43 #define KBASE_HWCNT_V5_TILER_BLOCK_COUNT 1
44
45 /* Index of the PRFCNT_EN header into a V5 counter block */
46 #define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2
47
48 /* Number of bytes for each counter value in hardware. */
49 #define KBASE_HWCNT_VALUE_HW_BYTES (sizeof(u32))
50
51 /**
52 * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to
53 * identify metadata groups.
54 * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type.
55 */
56 enum kbase_hwcnt_gpu_group_type {
57 KBASE_HWCNT_GPU_GROUP_TYPE_V5,
58 };
59
60 /**
61 * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types,
62 * used to identify metadata blocks.
63 * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: Undefined block (e.g. if a
64 * counter set that a block
65 * doesn't support is used).
66 * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: Front End block (Job manager
67 * or CSF HW).
68 * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: Secondary Front End block (Job
69 * manager or CSF HW).
70 * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: Tertiary Front End block (Job
71 * manager or CSF HW).
72 * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block.
73 * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block.
74 * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block.
75 * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: Tertiary Shader Core block.
76 * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block.
77 * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block.
78 */
79 enum kbase_hwcnt_gpu_v5_block_type {
80 KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED,
81 KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE,
82 KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2,
83 KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3,
84 KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER,
85 KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC,
86 KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2,
87 KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3,
88 KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS,
89 KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2,
90 };
91
92 /**
93 * enum kbase_hwcnt_set - GPU hardware counter sets
94 * @KBASE_HWCNT_SET_PRIMARY: The Primary set of counters
95 * @KBASE_HWCNT_SET_SECONDARY: The Secondary set of counters
96 * @KBASE_HWCNT_SET_TERTIARY: The Tertiary set of counters
97 * @KBASE_HWCNT_SET_UNDEFINED: Undefined set of counters
98 */
99 enum kbase_hwcnt_set {
100 KBASE_HWCNT_SET_PRIMARY,
101 KBASE_HWCNT_SET_SECONDARY,
102 KBASE_HWCNT_SET_TERTIARY,
103 KBASE_HWCNT_SET_UNDEFINED = 255,
104 };
105
106 /**
107 * struct kbase_hwcnt_physical_enable_map - Representation of enable map
108 * directly used by GPU.
109 * @fe_bm: Front end (JM/CSHW) counters selection bitmask.
110 * @shader_bm: Shader counters selection bitmask.
111 * @tiler_bm: Tiler counters selection bitmask.
112 * @mmu_l2_bm: MMU_L2 counters selection bitmask.
113 */
114 struct kbase_hwcnt_physical_enable_map {
115 u32 fe_bm;
116 u32 shader_bm;
117 u32 tiler_bm;
118 u32 mmu_l2_bm;
119 };
120
121 /*
122 * Values for Hardware Counter SET_SELECT value.
123 * Directly passed to HW.
124 */
125 enum kbase_hwcnt_physical_set {
126 KBASE_HWCNT_PHYSICAL_SET_PRIMARY = 0,
127 KBASE_HWCNT_PHYSICAL_SET_SECONDARY = 1,
128 KBASE_HWCNT_PHYSICAL_SET_TERTIARY = 2,
129 };
130
131 /**
132 * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs.
133 * @l2_count: L2 cache count.
134 * @core_mask: Shader core mask. May be sparse.
135 * @clk_cnt: Number of clock domains available.
136 * @prfcnt_values_per_block: Total entries (header + counters) of performance
137 * counter per block.
138 */
139 struct kbase_hwcnt_gpu_info {
140 size_t l2_count;
141 u64 core_mask;
142 u8 clk_cnt;
143 size_t prfcnt_values_per_block;
144 };
145
146 /**
147 * struct kbase_hwcnt_curr_config - Current Configuration of HW allocated to the
148 * GPU.
149 * @num_l2_slices: Current number of L2 slices allocated to the GPU.
150 * @shader_present: Current shader present bitmap that is allocated to the GPU.
151 *
152 * For architectures with the max_config interface available from the Arbiter,
153 * the current resources allocated may change during runtime due to a
154 * re-partitioning (possible with partition manager). Thus, the HWC needs to be
155 * prepared to report any possible set of counters. For this reason the memory
156 * layout in the userspace is based on the maximum possible allocation. On the
157 * other hand, each partition has just the view of its currently allocated
158 * resources. Therefore, it is necessary to correctly map the dumped HWC values
159 * from the registers into this maximum memory layout so that it can be exposed
160 * to the userspace side correctly.
161 *
162 * For L2 cache just the number is enough once the allocated ones will be
163 * accumulated on the first L2 slots available in the destination buffer.
164 *
165 * For the correct mapping of the shader cores it is necessary to jump all the
166 * L2 cache slots in the destination buffer that are not allocated. But, it is
167 * not necessary to add any logic to map the shader cores bitmap into the memory
168 * layout because the shader_present allocated will always be a subset of the
169 * maximum shader_present. It is possible because:
170 * 1 - Partitions are made of slices and they are always ordered from the ones
171 * with more shader cores to the ones with less.
172 * 2 - The shader cores in a slice are always contiguous.
173 * 3 - A partition can only have a contiguous set of slices allocated to it.
174 * So, for example, if 4 slices are available in total, 1 with 4 cores, 2 with
175 * 3 cores and 1 with 2 cores. The maximum possible shader_present would be:
176 * 0x0011|0111|0111|1111 -> note the order and that the shader cores are
177 * contiguous in any slice.
178 * Supposing that a partition takes the two slices in the middle, the current
179 * config shader_present for this partition would be:
180 * 0x0111|0111 -> note that this is a subset of the maximum above and the slices
181 * are contiguous.
182 * Therefore, by directly copying any subset of the maximum possible
183 * shader_present the mapping is already achieved.
184 */
185 struct kbase_hwcnt_curr_config {
186 size_t num_l2_slices;
187 u64 shader_present;
188 };
189
190 /**
191 * kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the
192 * JM GPUs.
193 * @info: Non-NULL pointer to info struct.
194 * @counter_set: The performance counter set used.
195 * @out_metadata: Non-NULL pointer to where created metadata is stored on
196 * success.
197 * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump
198 * buffer is stored on success.
199 *
200 * Return: 0 on success, else error code.
201 */
202 int kbase_hwcnt_jm_metadata_create(
203 const struct kbase_hwcnt_gpu_info *info,
204 enum kbase_hwcnt_set counter_set,
205 const struct kbase_hwcnt_metadata **out_metadata,
206 size_t *out_dump_bytes);
207
208 /**
209 * kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata.
210 *
211 * @metadata: Pointer to metadata to destroy.
212 */
213 void kbase_hwcnt_jm_metadata_destroy(
214 const struct kbase_hwcnt_metadata *metadata);
215
216 /**
217 * kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the
218 * CSF GPUs.
219 * @info: Non-NULL pointer to info struct.
220 * @counter_set: The performance counter set used.
221 * @out_metadata: Non-NULL pointer to where created metadata is stored on
222 * success.
223 *
224 * Return: 0 on success, else error code.
225 */
226 int kbase_hwcnt_csf_metadata_create(
227 const struct kbase_hwcnt_gpu_info *info,
228 enum kbase_hwcnt_set counter_set,
229 const struct kbase_hwcnt_metadata **out_metadata);
230
231 /**
232 * kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter
233 * metadata.
234 * @metadata: Pointer to metadata to destroy.
235 */
236 void kbase_hwcnt_csf_metadata_destroy(
237 const struct kbase_hwcnt_metadata *metadata);
238
239 /**
240 * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw
241 * dump buffer in src into the dump buffer
242 * abstraction in dst.
243 * @dst: Non-NULL pointer to destination dump buffer.
244 * @src: Non-NULL pointer to source raw dump buffer, of same length
245 * as dump_buf_bytes in the metadata of destination dump
246 * buffer.
247 * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
248 * @pm_core_mask: PM state synchronized shaders core mask with the dump.
249 * @curr_config: Current allocated hardware resources to correctly map the
250 * source raw dump buffer to the destination dump buffer.
251 * @accumulate: True if counters in source should be accumulated into
252 * destination, rather than copied.
253 *
254 * The dst and dst_enable_map MUST have been created from the same metadata as
255 * returned from the call to kbase_hwcnt_jm_metadata_create as was used to get
256 * the length of src.
257 *
258 * Return: 0 on success, else error code.
259 */
260 int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
261 const struct kbase_hwcnt_enable_map *dst_enable_map,
262 const u64 pm_core_mask,
263 const struct kbase_hwcnt_curr_config *curr_config,
264 bool accumulate);
265
266 /**
267 * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
268 * dump buffer in src into the dump buffer
269 * abstraction in dst.
270 * @dst: Non-NULL pointer to destination dump buffer.
271 * @src: Non-NULL pointer to source raw dump buffer, of same length
272 * as dump_buf_bytes in the metadata of dst dump buffer.
273 * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
274 * @accumulate: True if counters in src should be accumulated into
275 * destination, rather than copied.
276 *
277 * The dst and dst_enable_map MUST have been created from the same metadata as
278 * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get
279 * the length of src.
280 *
281 * Return: 0 on success, else error code.
282 */
283 int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
284 const struct kbase_hwcnt_enable_map *dst_enable_map,
285 bool accumulate);
286
287 /**
288 * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
289 * enable map abstraction to
290 * a physical block enable
291 * map.
292 * @lo: Low 64 bits of block enable map abstraction.
293 * @hi: High 64 bits of block enable map abstraction.
294 *
295 * The abstraction uses 128 bits to enable 128 block values, whereas the
296 * physical uses just 32 bits, as bit n enables values [n*4, n*4+3].
297 * Therefore, this conversion is lossy.
298 *
299 * Return: 32-bit physical block enable map.
300 */
kbase_hwcnt_backend_gpu_block_map_to_physical(u64 lo,u64 hi)301 static inline u32 kbase_hwcnt_backend_gpu_block_map_to_physical(u64 lo, u64 hi)
302 {
303 u32 phys = 0;
304 u64 dwords[2] = { lo, hi };
305 size_t dword_idx;
306
307 for (dword_idx = 0; dword_idx < 2; dword_idx++) {
308 const u64 dword = dwords[dword_idx];
309 u16 packed = 0;
310
311 size_t hword_bit;
312
313 for (hword_bit = 0; hword_bit < 16; hword_bit++) {
314 const size_t dword_bit = hword_bit * 4;
315 const u16 mask = ((dword >> (dword_bit + 0)) & 0x1) |
316 ((dword >> (dword_bit + 1)) & 0x1) |
317 ((dword >> (dword_bit + 2)) & 0x1) |
318 ((dword >> (dword_bit + 3)) & 0x1);
319 packed |= (mask << hword_bit);
320 }
321 phys |= ((u32)packed) << (16 * dword_idx);
322 }
323 return phys;
324 }
325
326 /**
327 * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction
328 * into a physical enable map.
329 * @dst: Non-NULL pointer to destination physical enable map.
330 * @src: Non-NULL pointer to source enable map abstraction.
331 *
332 * The src must have been created from a metadata returned from a call to
333 * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
334 *
335 * This is a lossy conversion, as the enable map abstraction has one bit per
336 * individual counter block value, but the physical enable map uses 1 bit for
337 * every 4 counters, shared over all instances of a block.
338 */
339 void kbase_hwcnt_gpu_enable_map_to_physical(
340 struct kbase_hwcnt_physical_enable_map *dst,
341 const struct kbase_hwcnt_enable_map *src);
342
343 /**
344 * kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical
345 * SET_SELECT value.
346 *
347 * @dst: Non-NULL pointer to destination physical SET_SELECT value.
348 * @src: Non-NULL pointer to source counter set selection.
349 */
350 void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
351 enum kbase_hwcnt_set src);
352
353 /**
354 * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to
355 * an enable map abstraction.
356 * @dst: Non-NULL pointer to destination enable map abstraction.
357 * @src: Non-NULL pointer to source physical enable map.
358 *
359 * The dst must have been created from a metadata returned from a call to
360 * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
361 *
362 * This is a lossy conversion, as the physical enable map can technically
363 * support counter blocks with 128 counters each, but no hardware actually uses
364 * more than 64, so the enable map abstraction has nowhere to store the enable
365 * information for the 64 non-existent counters.
366 */
367 void kbase_hwcnt_gpu_enable_map_from_physical(
368 struct kbase_hwcnt_enable_map *dst,
369 const struct kbase_hwcnt_physical_enable_map *src);
370
371 /**
372 * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter
373 * enable headers in a dump buffer to
374 * reflect the specified enable map.
375 * @buf: Non-NULL pointer to dump buffer to patch.
376 * @enable_map: Non-NULL pointer to enable map.
377 *
378 * The buf and enable_map must have been created from a metadata returned from
379 * a call to kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
380 *
381 * This function should be used before handing off a dump buffer over the
382 * kernel-user boundary, to ensure the header is accurate for the enable map
383 * used by the user.
384 */
385 void kbase_hwcnt_gpu_patch_dump_headers(
386 struct kbase_hwcnt_dump_buffer *buf,
387 const struct kbase_hwcnt_enable_map *enable_map);
388
389 #endif /* _KBASE_HWCNT_GPU_H_ */
390