1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3 *
4 * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
5 *
6 * This program is free software and is provided to you under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation, and any use by you of this program is subject to the terms
9 * of such GNU license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 */
21
22 /*
23 * CSF GPU HWC backend firmware interface APIs.
24 */
25
26 #include <mali_kbase.h>
27 #include <gpu/mali_kbase_gpu_regmap.h>
28 #include <device/mali_kbase_device.h>
29 #include "mali_kbase_hwcnt_gpu.h"
30 #include "mali_kbase_hwcnt_types.h"
31 #include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
32
33 #include "csf/mali_kbase_csf_firmware.h"
34 #include "mali_kbase_hwcnt_backend_csf_if_fw.h"
35 #include "mali_kbase_hwaccess_time.h"
36 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
37
38 #include <linux/log2.h>
39 #include "mali_kbase_ccswe.h"
40
41 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
42 #include <backend/gpu/mali_kbase_model_dummy.h>
43 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
44
45 /** The number of nanoseconds in a second. */
46 #define NSECS_IN_SEC 1000000000ull /* ns */
47
48 /* Ring buffer virtual address start at 4GB */
49 #define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
50
51 /**
52 * struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface
53 * used to save the manual and
54 * auto HWC samples from
55 * firmware.
56 * @gpu_dump_base: Starting GPU base address of the ring buffer.
57 * @cpu_dump_base: Starting CPU address for the mapping.
58 * @buf_count: Buffer count in the ring buffer, MUST be power of 2.
59 * @as_nr: Address space number for the memory mapping.
60 * @phys: Physical memory allocation used by the mapping.
61 * @num_pages: Size of the mapping, in memory pages.
62 */
63 struct kbase_hwcnt_backend_csf_if_fw_ring_buf {
64 u64 gpu_dump_base;
65 void *cpu_dump_base;
66 size_t buf_count;
67 u32 as_nr;
68 struct tagged_addr *phys;
69 size_t num_pages;
70 };
71
72 /**
73 * struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF
74 * interface, used to communicate
75 * with firmware.
76 * @kbdev: KBase device.
77 * @buf_bytes: The size in bytes for each buffer in the ring buffer.
78 * @clk_cnt: The number of clock domains in the system.
79 * The maximum is 64.
80 * @clk_enable_map: Bitmask of enabled clocks
81 * @rate_listener: Clock rate listener callback state.
82 * @ccswe_shader_cores: Shader cores cycle count software estimator.
83 */
84 struct kbase_hwcnt_backend_csf_if_fw_ctx {
85 struct kbase_device *kbdev;
86 size_t buf_bytes;
87 u8 clk_cnt;
88 u64 clk_enable_map;
89 struct kbase_clk_rate_listener rate_listener;
90 struct kbase_ccswe ccswe_shader_cores;
91 };
92
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx * ctx)93 static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(
94 struct kbase_hwcnt_backend_csf_if_ctx *ctx)
95 {
96 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
97 struct kbase_device *kbdev;
98
99 WARN_ON(!ctx);
100
101 fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
102 kbdev = fw_ctx->kbdev;
103
104 kbase_csf_scheduler_spin_lock_assert_held(kbdev);
105 }
106
107 static void
kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx * ctx,unsigned long * flags)108 kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
109 unsigned long *flags)
110 {
111 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
112 struct kbase_device *kbdev;
113
114 WARN_ON(!ctx);
115
116 fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
117 kbdev = fw_ctx->kbdev;
118
119 kbase_csf_scheduler_spin_lock(kbdev, flags);
120 }
121
kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx * ctx,unsigned long flags)122 static void kbasep_hwcnt_backend_csf_if_fw_unlock(
123 struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags)
124 {
125 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
126 struct kbase_device *kbdev;
127
128 WARN_ON(!ctx);
129
130 fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
131 kbdev = fw_ctx->kbdev;
132
133 kbase_csf_scheduler_spin_lock_assert_held(kbdev);
134 kbase_csf_scheduler_spin_unlock(kbdev, flags);
135 }
136
137 /**
138 * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback
139 *
140 * @rate_listener: Callback state
141 * @clk_index: Clock index
142 * @clk_rate_hz: Clock frequency(hz)
143 */
kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener * rate_listener,u32 clk_index,u32 clk_rate_hz)144 static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change(
145 struct kbase_clk_rate_listener *rate_listener, u32 clk_index,
146 u32 clk_rate_hz)
147 {
148 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
149 container_of(rate_listener,
150 struct kbase_hwcnt_backend_csf_if_fw_ctx,
151 rate_listener);
152 u64 timestamp_ns;
153
154 if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
155 return;
156
157 timestamp_ns = ktime_get_raw_ns();
158 kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns,
159 clk_rate_hz);
160 }
161
162 /**
163 * kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking
164 *
165 * @fw_ctx: Non-NULL pointer to CSF firmware interface context.
166 * @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters.
167 */
kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx * fw_ctx,u64 clk_enable_map)168 static void kbasep_hwcnt_backend_csf_if_fw_cc_enable(
169 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, u64 clk_enable_map)
170 {
171 struct kbase_device *kbdev = fw_ctx->kbdev;
172
173 if (kbase_hwcnt_clk_enable_map_enabled(
174 clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
175 /* software estimation for non-top clock domains */
176 struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
177 const struct kbase_clk_data *clk_data =
178 rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
179 u32 cur_freq;
180 unsigned long flags;
181 u64 timestamp_ns;
182
183 timestamp_ns = ktime_get_raw_ns();
184
185 spin_lock_irqsave(&rtm->lock, flags);
186
187 cur_freq = (u32)clk_data->clock_val;
188 kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores);
189 kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores,
190 timestamp_ns, cur_freq);
191
192 kbase_clk_rate_trace_manager_subscribe_no_lock(
193 rtm, &fw_ctx->rate_listener);
194
195 spin_unlock_irqrestore(&rtm->lock, flags);
196 }
197
198 fw_ctx->clk_enable_map = clk_enable_map;
199 }
200
201 /**
202 * kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking
203 *
204 * @fw_ctx: Non-NULL pointer to CSF firmware interface context.
205 */
kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx * fw_ctx)206 static void kbasep_hwcnt_backend_csf_if_fw_cc_disable(
207 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
208 {
209 struct kbase_device *kbdev = fw_ctx->kbdev;
210 struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
211 u64 clk_enable_map = fw_ctx->clk_enable_map;
212
213 if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map,
214 KBASE_CLOCK_DOMAIN_SHADER_CORES))
215 kbase_clk_rate_trace_manager_unsubscribe(
216 rtm, &fw_ctx->rate_listener);
217 }
218
kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_prfcnt_info * prfcnt_info)219 static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
220 struct kbase_hwcnt_backend_csf_if_ctx *ctx,
221 struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info)
222 {
223 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
224 size_t dummy_model_blk_count;
225 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
226 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
227
228 prfcnt_info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
229 prfcnt_info->core_mask =
230 (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
231 /* 1 FE block + 1 Tiler block + l2_count blocks + shader_core blocks */
232 dummy_model_blk_count =
233 2 + prfcnt_info->l2_count + fls64(prfcnt_info->core_mask);
234 prfcnt_info->dump_bytes =
235 dummy_model_blk_count * KBASE_DUMMY_MODEL_BLOCK_SIZE;
236 prfcnt_info->prfcnt_block_size =
237 KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
238 KBASE_HWCNT_VALUE_HW_BYTES;
239 prfcnt_info->clk_cnt = 1;
240 prfcnt_info->clearing_samples = true;
241 fw_ctx->buf_bytes = prfcnt_info->dump_bytes;
242 #else
243 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
244 struct kbase_device *kbdev;
245 u32 prfcnt_size;
246 u32 prfcnt_hw_size = 0;
247 u32 prfcnt_fw_size = 0;
248 u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
249 KBASE_HWCNT_VALUE_HW_BYTES;
250
251 WARN_ON(!ctx);
252 WARN_ON(!prfcnt_info);
253
254 fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
255 kbdev = fw_ctx->kbdev;
256 prfcnt_size = kbdev->csf.global_iface.prfcnt_size;
257 prfcnt_hw_size = (prfcnt_size & 0xFF) << 8;
258 prfcnt_fw_size = (prfcnt_size >> 16) << 8;
259 fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size;
260
261 /* Read the block size if the GPU has the register PRFCNT_FEATURES
262 * which was introduced in architecture version 11.x.7.
263 */
264 if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >=
265 GPU_ID2_PRODUCT_TTUX) {
266 prfcnt_block_size =
267 PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(kbase_reg_read(
268 kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES)))
269 << 8;
270 }
271
272 prfcnt_info->dump_bytes = fw_ctx->buf_bytes;
273 prfcnt_info->prfcnt_block_size = prfcnt_block_size;
274 prfcnt_info->l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices;
275 prfcnt_info->core_mask =
276 kbdev->gpu_props.props.coherency_info.group[0].core_mask;
277
278 prfcnt_info->clk_cnt = fw_ctx->clk_cnt;
279 prfcnt_info->clearing_samples = true;
280
281 /* Block size must be multiple of counter size. */
282 WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) !=
283 0);
284 /* Total size must be multiple of block size. */
285 WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) !=
286 0);
287 #endif
288 }
289
kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u32 buf_count,void ** cpu_dump_base,struct kbase_hwcnt_backend_csf_if_ring_buf ** out_ring_buf)290 static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
291 struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count,
292 void **cpu_dump_base,
293 struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf)
294 {
295 struct kbase_device *kbdev;
296 struct tagged_addr *phys;
297 struct page **page_list;
298 void *cpu_addr;
299 int ret;
300 int i;
301 size_t num_pages;
302 u64 flags;
303 struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf;
304
305 pgprot_t cpu_map_prot = PAGE_KERNEL;
306 u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
307
308 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
309 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
310
311 /* Calls to this function are inherently asynchronous, with respect to
312 * MMU operations.
313 */
314 const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
315
316 WARN_ON(!ctx);
317 WARN_ON(!cpu_dump_base);
318 WARN_ON(!out_ring_buf);
319
320 kbdev = fw_ctx->kbdev;
321
322 /* The buffer count must be power of 2 */
323 if (!is_power_of_2(buf_count))
324 return -EINVAL;
325
326 /* alignment failure */
327 if (gpu_va_base & (2048 - 1))
328 return -EINVAL;
329
330 fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL);
331 if (!fw_ring_buf)
332 return -ENOMEM;
333
334 num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count);
335 phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
336 if (!phys)
337 goto phys_alloc_error;
338
339 page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL);
340 if (!page_list)
341 goto page_list_alloc_error;
342
343 /* Get physical page for the buffer */
344 ret = kbase_mem_pool_alloc_pages(
345 &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
346 phys, false);
347 if (ret != num_pages)
348 goto phys_mem_pool_alloc_error;
349
350 /* Get the CPU virtual address */
351 for (i = 0; i < num_pages; i++)
352 page_list[i] = as_page(phys[i]);
353
354 cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot);
355 if (!cpu_addr)
356 goto vmap_error;
357
358 flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX |
359 KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
360
361 /* Update MMU table */
362 ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
363 gpu_va_base >> PAGE_SHIFT, phys, num_pages,
364 flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
365 mmu_sync_info);
366 if (ret)
367 goto mmu_insert_failed;
368
369 kfree(page_list);
370
371 fw_ring_buf->gpu_dump_base = gpu_va_base;
372 fw_ring_buf->cpu_dump_base = cpu_addr;
373 fw_ring_buf->phys = phys;
374 fw_ring_buf->num_pages = num_pages;
375 fw_ring_buf->buf_count = buf_count;
376 fw_ring_buf->as_nr = MCU_AS_NR;
377
378 *cpu_dump_base = fw_ring_buf->cpu_dump_base;
379 *out_ring_buf =
380 (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
381
382 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
383 /* The dummy model needs the CPU mapping. */
384 gpu_model_set_dummy_prfcnt_base_cpu(fw_ring_buf->cpu_dump_base, kbdev,
385 phys, num_pages);
386 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
387
388 return 0;
389
390 mmu_insert_failed:
391 vunmap(cpu_addr);
392 vmap_error:
393 kbase_mem_pool_free_pages(
394 &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
395 phys, false, false);
396 phys_mem_pool_alloc_error:
397 kfree(page_list);
398 page_list_alloc_error:
399 kfree(phys);
400 phys_alloc_error:
401 kfree(fw_ring_buf);
402 return -ENOMEM;
403 }
404
kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_ring_buf * ring_buf,u32 buf_index_first,u32 buf_index_last,bool for_cpu)405 static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
406 struct kbase_hwcnt_backend_csf_if_ctx *ctx,
407 struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
408 u32 buf_index_first, u32 buf_index_last, bool for_cpu)
409 {
410 struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
411 (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
412 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
413 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
414 size_t i;
415 size_t pg_first;
416 size_t pg_last;
417 u64 start_address;
418 u64 stop_address;
419 u32 ring_buf_index_first;
420 u32 ring_buf_index_last;
421
422 WARN_ON(!ctx);
423 WARN_ON(!ring_buf);
424
425 /* The index arguments for this function form an inclusive, exclusive
426 * range.
427 * However, when masking back to the available buffers we will make this
428 * inclusive at both ends so full flushes are not 0 -> 0.
429 */
430 ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1);
431 ring_buf_index_last =
432 (buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
433
434 /* The start address is the offset of the first buffer. */
435 start_address = fw_ctx->buf_bytes * ring_buf_index_first;
436 pg_first = start_address >> PAGE_SHIFT;
437
438 /* The stop address is the last byte in the final buffer. */
439 stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1;
440 pg_last = stop_address >> PAGE_SHIFT;
441
442 /* Check whether the buffer range wraps. */
443 if (start_address > stop_address) {
444 /* sync the first part to the end of ring buffer. */
445 for (i = pg_first; i < fw_ring_buf->num_pages; i++) {
446 struct page *pg = as_page(fw_ring_buf->phys[i]);
447
448 if (for_cpu) {
449 kbase_sync_single_for_cpu(fw_ctx->kbdev,
450 kbase_dma_addr(pg),
451 PAGE_SIZE,
452 DMA_BIDIRECTIONAL);
453 } else {
454 kbase_sync_single_for_device(fw_ctx->kbdev,
455 kbase_dma_addr(pg),
456 PAGE_SIZE,
457 DMA_BIDIRECTIONAL);
458 }
459 }
460
461 /* second part starts from page 0. */
462 pg_first = 0;
463 }
464
465 for (i = pg_first; i <= pg_last; i++) {
466 struct page *pg = as_page(fw_ring_buf->phys[i]);
467
468 if (for_cpu) {
469 kbase_sync_single_for_cpu(fw_ctx->kbdev,
470 kbase_dma_addr(pg), PAGE_SIZE,
471 DMA_BIDIRECTIONAL);
472 } else {
473 kbase_sync_single_for_device(fw_ctx->kbdev,
474 kbase_dma_addr(pg),
475 PAGE_SIZE,
476 DMA_BIDIRECTIONAL);
477 }
478 }
479 }
480
kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx * ctx)481 static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(
482 struct kbase_hwcnt_backend_csf_if_ctx *ctx)
483 {
484 CSTD_UNUSED(ctx);
485 return ktime_get_raw_ns();
486 }
487
kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_ring_buf * ring_buf)488 static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
489 struct kbase_hwcnt_backend_csf_if_ctx *ctx,
490 struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf)
491 {
492 struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
493 (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
494 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
495 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
496
497 if (!fw_ring_buf)
498 return;
499
500 if (fw_ring_buf->phys) {
501 u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
502
503 WARN_ON(kbase_mmu_teardown_pages(
504 fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
505 gpu_va_base >> PAGE_SHIFT, fw_ring_buf->num_pages,
506 MCU_AS_NR));
507
508 vunmap(fw_ring_buf->cpu_dump_base);
509
510 kbase_mem_pool_free_pages(
511 &fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
512 fw_ring_buf->num_pages, fw_ring_buf->phys, false,
513 false);
514
515 kfree(fw_ring_buf->phys);
516
517 kfree(fw_ring_buf);
518 }
519 }
520
kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_ring_buf * ring_buf,struct kbase_hwcnt_backend_csf_if_enable * enable)521 static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
522 struct kbase_hwcnt_backend_csf_if_ctx *ctx,
523 struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
524 struct kbase_hwcnt_backend_csf_if_enable *enable)
525 {
526 u32 prfcnt_config;
527 struct kbase_device *kbdev;
528 struct kbase_csf_global_iface *global_iface;
529 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
530 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
531 struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
532 (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
533
534 WARN_ON(!ctx);
535 WARN_ON(!ring_buf);
536 WARN_ON(!enable);
537 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
538
539 kbdev = fw_ctx->kbdev;
540 global_iface = &kbdev->csf.global_iface;
541
542 /* Configure */
543 prfcnt_config = fw_ring_buf->buf_count;
544 prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT;
545
546 /* Configure the ring buffer base address */
547 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID,
548 fw_ring_buf->as_nr);
549 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO,
550 fw_ring_buf->gpu_dump_base & U32_MAX);
551 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI,
552 fw_ring_buf->gpu_dump_base >> 32);
553
554 /* Set extract position to 0 */
555 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0);
556
557 /* Configure the enable bitmap */
558 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN,
559 enable->fe_bm);
560 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN,
561 enable->shader_bm);
562 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN,
563 enable->mmu_l2_bm);
564 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN,
565 enable->tiler_bm);
566
567 /* Configure the HWC set and buffer size */
568 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG,
569 prfcnt_config);
570
571 kbdev->csf.hwcnt.enable_pending = true;
572
573 /* Unmask the interrupts */
574 kbase_csf_firmware_global_input_mask(
575 global_iface, GLB_ACK_IRQ_MASK,
576 GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK,
577 GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
578 kbase_csf_firmware_global_input_mask(
579 global_iface, GLB_ACK_IRQ_MASK,
580 GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK,
581 GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
582 kbase_csf_firmware_global_input_mask(
583 global_iface, GLB_ACK_IRQ_MASK,
584 GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK,
585 GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
586 kbase_csf_firmware_global_input_mask(
587 global_iface, GLB_ACK_IRQ_MASK,
588 GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK,
589 GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK);
590
591 /* Enable the HWC */
592 kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
593 (1 << GLB_REQ_PRFCNT_ENABLE_SHIFT),
594 GLB_REQ_PRFCNT_ENABLE_MASK);
595 kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
596
597 prfcnt_config = kbase_csf_firmware_global_input_read(global_iface,
598 GLB_PRFCNT_CONFIG);
599
600 kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx,
601 enable->clk_enable_map);
602 }
603
kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx * ctx)604 static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
605 struct kbase_hwcnt_backend_csf_if_ctx *ctx)
606 {
607 struct kbase_device *kbdev;
608 struct kbase_csf_global_iface *global_iface;
609 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
610 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
611
612 WARN_ON(!ctx);
613 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
614
615 kbdev = fw_ctx->kbdev;
616 global_iface = &kbdev->csf.global_iface;
617
618 /* Disable the HWC */
619 kbdev->csf.hwcnt.enable_pending = true;
620 kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0,
621 GLB_REQ_PRFCNT_ENABLE_MASK);
622 kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
623
624 /* mask the interrupts */
625 kbase_csf_firmware_global_input_mask(
626 global_iface, GLB_ACK_IRQ_MASK, 0,
627 GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
628 kbase_csf_firmware_global_input_mask(
629 global_iface, GLB_ACK_IRQ_MASK, 0,
630 GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
631 kbase_csf_firmware_global_input_mask(
632 global_iface, GLB_ACK_IRQ_MASK, 0,
633 GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
634
635 /* In case we have a previous request in flight when the disable
636 * happens.
637 */
638 kbdev->csf.hwcnt.request_pending = false;
639
640 kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx);
641 }
642
kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx * ctx)643 static void kbasep_hwcnt_backend_csf_if_fw_dump_request(
644 struct kbase_hwcnt_backend_csf_if_ctx *ctx)
645 {
646 u32 glb_req;
647 struct kbase_device *kbdev;
648 struct kbase_csf_global_iface *global_iface;
649 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
650 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
651
652 WARN_ON(!ctx);
653 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
654
655 kbdev = fw_ctx->kbdev;
656 global_iface = &kbdev->csf.global_iface;
657
658 /* Trigger dumping */
659 kbdev->csf.hwcnt.request_pending = true;
660 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
661 glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK;
662 kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req,
663 GLB_REQ_PRFCNT_SAMPLE_MASK);
664 kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
665 }
666
kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u32 * extract_index,u32 * insert_index)667 static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(
668 struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index,
669 u32 *insert_index)
670 {
671 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
672 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
673
674 WARN_ON(!ctx);
675 WARN_ON(!extract_index);
676 WARN_ON(!insert_index);
677 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
678
679 *extract_index = kbase_csf_firmware_global_input_read(
680 &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT);
681 *insert_index = kbase_csf_firmware_global_output(
682 &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT);
683 }
684
kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u32 extract_idx)685 static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index(
686 struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx)
687 {
688 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
689 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
690
691 WARN_ON(!ctx);
692 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
693
694 /* Set the raw extract index to release the buffer back to the ring
695 * buffer.
696 */
697 kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface,
698 GLB_PRFCNT_EXTRACT, extract_idx);
699 }
700
kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u64 * cycle_counts,u64 clk_enable_map)701 static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
702 struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts,
703 u64 clk_enable_map)
704 {
705 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
706 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
707 u8 clk;
708 u64 timestamp_ns = ktime_get_raw_ns();
709
710 WARN_ON(!ctx);
711 WARN_ON(!cycle_counts);
712 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
713
714 for (clk = 0; clk < fw_ctx->clk_cnt; clk++) {
715 if (!(clk_enable_map & (1ull << clk)))
716 continue;
717
718 if (clk == KBASE_CLOCK_DOMAIN_TOP) {
719 /* Read cycle count for top clock domain. */
720 kbase_backend_get_gpu_time_norequest(
721 fw_ctx->kbdev, &cycle_counts[clk], NULL, NULL);
722 } else {
723 /* Estimate cycle count for non-top clock domain. */
724 cycle_counts[clk] = kbase_ccswe_cycle_at(
725 &fw_ctx->ccswe_shader_cores, timestamp_ns);
726 }
727 }
728 }
729
730 /**
731 * kbasep_hwcnt_backend_csf_if_fw_ctx_destroy() - Destroy a CSF FW interface context.
732 *
733 * @fw_ctx: Pointer to context to destroy.
734 */
kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx * fw_ctx)735 static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
736 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
737 {
738 if (!fw_ctx)
739 return;
740
741 kfree(fw_ctx);
742 }
743
744 /**
745 * kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context.
746 *
747 * @kbdev: Non_NULL pointer to kbase device.
748 * @out_ctx: Non-NULL pointer to where info is stored on success.
749 * Return: 0 on success, else error code.
750 */
kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device * kbdev,struct kbase_hwcnt_backend_csf_if_fw_ctx ** out_ctx)751 static int kbasep_hwcnt_backend_csf_if_fw_ctx_create(
752 struct kbase_device *kbdev,
753 struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx)
754 {
755 u8 clk;
756 int errcode = -ENOMEM;
757 struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
758
759 WARN_ON(!kbdev);
760 WARN_ON(!out_ctx);
761
762 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
763 if (!ctx)
764 goto error;
765
766 ctx->kbdev = kbdev;
767
768 /* Determine the number of available clock domains. */
769 for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
770 if (kbdev->pm.clk_rtm.clks[clk] == NULL)
771 break;
772 }
773 ctx->clk_cnt = clk;
774
775 ctx->clk_enable_map = 0;
776 kbase_ccswe_init(&ctx->ccswe_shader_cores);
777 ctx->rate_listener.notify =
778 kbasep_hwcnt_backend_csf_if_fw_on_freq_change;
779
780 *out_ctx = ctx;
781
782 return 0;
783 error:
784 kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx);
785 return errcode;
786 }
787
kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if * if_fw)788 void kbase_hwcnt_backend_csf_if_fw_destroy(
789 struct kbase_hwcnt_backend_csf_if *if_fw)
790 {
791 if (!if_fw)
792 return;
793
794 kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
795 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx);
796 memset(if_fw, 0, sizeof(*if_fw));
797 }
798
kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device * kbdev,struct kbase_hwcnt_backend_csf_if * if_fw)799 int kbase_hwcnt_backend_csf_if_fw_create(
800 struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw)
801 {
802 int errcode;
803 struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
804
805 if (!kbdev || !if_fw)
806 return -EINVAL;
807
808 errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx);
809 if (errcode)
810 return errcode;
811
812 if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx;
813 if_fw->assert_lock_held =
814 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
815 if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock;
816 if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock;
817 if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info;
818 if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc;
819 if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync;
820 if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free;
821 if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns;
822 if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable;
823 if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable;
824 if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request;
825 if_fw->get_gpu_cycle_count =
826 kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count;
827 if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes;
828 if_fw->set_extract_index =
829 kbasep_hwcnt_backend_csf_if_fw_set_extract_index;
830
831 return 0;
832 }
833