• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 /*
23  * CSF GPU HWC backend firmware interface APIs.
24  */
25 
26 #include <mali_kbase.h>
27 #include <gpu/mali_kbase_gpu_regmap.h>
28 #include <device/mali_kbase_device.h>
29 #include "mali_kbase_hwcnt_gpu.h"
30 #include "mali_kbase_hwcnt_types.h"
31 #include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
32 
33 #include "csf/mali_kbase_csf_firmware.h"
34 #include "mali_kbase_hwcnt_backend_csf_if_fw.h"
35 #include "mali_kbase_hwaccess_time.h"
36 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
37 
38 #include <linux/log2.h>
39 #include "mali_kbase_ccswe.h"
40 
41 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
42 #include <backend/gpu/mali_kbase_model_dummy.h>
43 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
44 
45 /** The number of nanoseconds in a second. */
46 #define NSECS_IN_SEC 1000000000ull /* ns */
47 
48 /* Ring buffer virtual address start at 4GB  */
49 #define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
50 
51 /**
52  * struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface
53  *                                                 used to save the manual and
54  *                                                 auto HWC samples from
55  *                                                 firmware.
56  * @gpu_dump_base: Starting GPU base address of the ring buffer.
57  * @cpu_dump_base: Starting CPU address for the mapping.
58  * @buf_count:     Buffer count in the ring buffer, MUST be power of 2.
59  * @as_nr:         Address space number for the memory mapping.
60  * @phys:          Physical memory allocation used by the mapping.
61  * @num_pages:     Size of the mapping, in memory pages.
62  */
63 struct kbase_hwcnt_backend_csf_if_fw_ring_buf {
64 	u64 gpu_dump_base;
65 	void *cpu_dump_base;
66 	size_t buf_count;
67 	u32 as_nr;
68 	struct tagged_addr *phys;
69 	size_t num_pages;
70 };
71 
72 /**
73  * struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF
74  *                                            interface, used to communicate
75  *                                            with firmware.
76  * @kbdev:              KBase device.
77  * @buf_bytes:	        The size in bytes for each buffer in the ring buffer.
78  * @clk_cnt:            The number of clock domains in the system.
79  *                      The maximum is 64.
80  * @clk_enable_map:     Bitmask of enabled clocks
81  * @rate_listener:      Clock rate listener callback state.
82  * @ccswe_shader_cores: Shader cores cycle count software estimator.
83  */
84 struct kbase_hwcnt_backend_csf_if_fw_ctx {
85 	struct kbase_device *kbdev;
86 	size_t buf_bytes;
87 	u8 clk_cnt;
88 	u64 clk_enable_map;
89 	struct kbase_clk_rate_listener rate_listener;
90 	struct kbase_ccswe ccswe_shader_cores;
91 };
92 
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx * ctx)93 static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(
94 	struct kbase_hwcnt_backend_csf_if_ctx *ctx)
95 {
96 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
97 	struct kbase_device *kbdev;
98 
99 	WARN_ON(!ctx);
100 
101 	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
102 	kbdev = fw_ctx->kbdev;
103 
104 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
105 }
106 
107 static void
kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx * ctx,unsigned long * flags)108 kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
109 				    unsigned long *flags)
110 {
111 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
112 	struct kbase_device *kbdev;
113 
114 	WARN_ON(!ctx);
115 
116 	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
117 	kbdev = fw_ctx->kbdev;
118 
119 	kbase_csf_scheduler_spin_lock(kbdev, flags);
120 }
121 
kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx * ctx,unsigned long flags)122 static void kbasep_hwcnt_backend_csf_if_fw_unlock(
123 	struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags)
124 {
125 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
126 	struct kbase_device *kbdev;
127 
128 	WARN_ON(!ctx);
129 
130 	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
131 	kbdev = fw_ctx->kbdev;
132 
133 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
134 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
135 }
136 
137 /**
138  * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback
139  *
140  * @rate_listener:    Callback state
141  * @clk_index:        Clock index
142  * @clk_rate_hz:      Clock frequency(hz)
143  */
kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener * rate_listener,u32 clk_index,u32 clk_rate_hz)144 static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change(
145 	struct kbase_clk_rate_listener *rate_listener, u32 clk_index,
146 	u32 clk_rate_hz)
147 {
148 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
149 		container_of(rate_listener,
150 			     struct kbase_hwcnt_backend_csf_if_fw_ctx,
151 			     rate_listener);
152 	u64 timestamp_ns;
153 
154 	if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
155 		return;
156 
157 	timestamp_ns = ktime_get_raw_ns();
158 	kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns,
159 				clk_rate_hz);
160 }
161 
162 /**
163  * kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking
164  *
165  * @fw_ctx:         Non-NULL pointer to CSF firmware interface context.
166  * @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters.
167  */
kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx * fw_ctx,u64 clk_enable_map)168 static void kbasep_hwcnt_backend_csf_if_fw_cc_enable(
169 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, u64 clk_enable_map)
170 {
171 	struct kbase_device *kbdev = fw_ctx->kbdev;
172 
173 	if (kbase_hwcnt_clk_enable_map_enabled(
174 		    clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
175 		/* software estimation for non-top clock domains */
176 		struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
177 		const struct kbase_clk_data *clk_data =
178 			rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
179 		u32 cur_freq;
180 		unsigned long flags;
181 		u64 timestamp_ns;
182 
183 		timestamp_ns = ktime_get_raw_ns();
184 
185 		spin_lock_irqsave(&rtm->lock, flags);
186 
187 		cur_freq = (u32)clk_data->clock_val;
188 		kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores);
189 		kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores,
190 					timestamp_ns, cur_freq);
191 
192 		kbase_clk_rate_trace_manager_subscribe_no_lock(
193 			rtm, &fw_ctx->rate_listener);
194 
195 		spin_unlock_irqrestore(&rtm->lock, flags);
196 	}
197 
198 	fw_ctx->clk_enable_map = clk_enable_map;
199 }
200 
201 /**
202  * kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking
203  *
204  * @fw_ctx:     Non-NULL pointer to CSF firmware interface context.
205  */
kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx * fw_ctx)206 static void kbasep_hwcnt_backend_csf_if_fw_cc_disable(
207 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
208 {
209 	struct kbase_device *kbdev = fw_ctx->kbdev;
210 	struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
211 	u64 clk_enable_map = fw_ctx->clk_enable_map;
212 
213 	if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map,
214 					       KBASE_CLOCK_DOMAIN_SHADER_CORES))
215 		kbase_clk_rate_trace_manager_unsubscribe(
216 			rtm, &fw_ctx->rate_listener);
217 }
218 
kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_prfcnt_info * prfcnt_info)219 static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
220 	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
221 	struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info)
222 {
223 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
224 	size_t dummy_model_blk_count;
225 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
226 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
227 
228 	prfcnt_info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
229 	prfcnt_info->core_mask =
230 		(1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
231 	/* 1 FE block + 1 Tiler block + l2_count blocks + shader_core blocks */
232 	dummy_model_blk_count =
233 		2 + prfcnt_info->l2_count + fls64(prfcnt_info->core_mask);
234 	prfcnt_info->dump_bytes =
235 		dummy_model_blk_count * KBASE_DUMMY_MODEL_BLOCK_SIZE;
236 	prfcnt_info->prfcnt_block_size =
237 		KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
238 		KBASE_HWCNT_VALUE_HW_BYTES;
239 	prfcnt_info->clk_cnt = 1;
240 	prfcnt_info->clearing_samples = true;
241 	fw_ctx->buf_bytes = prfcnt_info->dump_bytes;
242 #else
243 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
244 	struct kbase_device *kbdev;
245 	u32 prfcnt_size;
246 	u32 prfcnt_hw_size = 0;
247 	u32 prfcnt_fw_size = 0;
248 	u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK *
249 				KBASE_HWCNT_VALUE_HW_BYTES;
250 
251 	WARN_ON(!ctx);
252 	WARN_ON(!prfcnt_info);
253 
254 	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
255 	kbdev = fw_ctx->kbdev;
256 	prfcnt_size = kbdev->csf.global_iface.prfcnt_size;
257 	prfcnt_hw_size = (prfcnt_size & 0xFF) << 8;
258 	prfcnt_fw_size = (prfcnt_size >> 16) << 8;
259 	fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size;
260 
261 	/* Read the block size if the GPU has the register PRFCNT_FEATURES
262 	 * which was introduced in architecture version 11.x.7.
263 	 */
264 	if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >=
265 	    GPU_ID2_PRODUCT_TTUX) {
266 		prfcnt_block_size =
267 			PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(kbase_reg_read(
268 				kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES)))
269 			<< 8;
270 	}
271 
272 	prfcnt_info->dump_bytes = fw_ctx->buf_bytes;
273 	prfcnt_info->prfcnt_block_size = prfcnt_block_size;
274 	prfcnt_info->l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices;
275 	prfcnt_info->core_mask =
276 		kbdev->gpu_props.props.coherency_info.group[0].core_mask;
277 
278 	prfcnt_info->clk_cnt = fw_ctx->clk_cnt;
279 	prfcnt_info->clearing_samples = true;
280 
281 	/* Block size must be multiple of counter size. */
282 	WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) !=
283 		0);
284 	/* Total size must be multiple of block size. */
285 	WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) !=
286 		0);
287 #endif
288 }
289 
kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u32 buf_count,void ** cpu_dump_base,struct kbase_hwcnt_backend_csf_if_ring_buf ** out_ring_buf)290 static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
291 	struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count,
292 	void **cpu_dump_base,
293 	struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf)
294 {
295 	struct kbase_device *kbdev;
296 	struct tagged_addr *phys;
297 	struct page **page_list;
298 	void *cpu_addr;
299 	int ret;
300 	int i;
301 	size_t num_pages;
302 	u64 flags;
303 	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf;
304 
305 	pgprot_t cpu_map_prot = PAGE_KERNEL;
306 	u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
307 
308 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
309 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
310 
311 	/* Calls to this function are inherently asynchronous, with respect to
312 	 * MMU operations.
313 	 */
314 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
315 
316 	WARN_ON(!ctx);
317 	WARN_ON(!cpu_dump_base);
318 	WARN_ON(!out_ring_buf);
319 
320 	kbdev = fw_ctx->kbdev;
321 
322 	/* The buffer count must be power of 2 */
323 	if (!is_power_of_2(buf_count))
324 		return -EINVAL;
325 
326 	/* alignment failure */
327 	if (gpu_va_base & (2048 - 1))
328 		return -EINVAL;
329 
330 	fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL);
331 	if (!fw_ring_buf)
332 		return -ENOMEM;
333 
334 	num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count);
335 	phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
336 	if (!phys)
337 		goto phys_alloc_error;
338 
339 	page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL);
340 	if (!page_list)
341 		goto page_list_alloc_error;
342 
343 	/* Get physical page for the buffer */
344 	ret = kbase_mem_pool_alloc_pages(
345 		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
346 		phys, false);
347 	if (ret != num_pages)
348 		goto phys_mem_pool_alloc_error;
349 
350 	/* Get the CPU virtual address */
351 	for (i = 0; i < num_pages; i++)
352 		page_list[i] = as_page(phys[i]);
353 
354 	cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot);
355 	if (!cpu_addr)
356 		goto vmap_error;
357 
358 	flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX |
359 		KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
360 
361 	/* Update MMU table */
362 	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
363 				     gpu_va_base >> PAGE_SHIFT, phys, num_pages,
364 				     flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
365 				     mmu_sync_info);
366 	if (ret)
367 		goto mmu_insert_failed;
368 
369 	kfree(page_list);
370 
371 	fw_ring_buf->gpu_dump_base = gpu_va_base;
372 	fw_ring_buf->cpu_dump_base = cpu_addr;
373 	fw_ring_buf->phys = phys;
374 	fw_ring_buf->num_pages = num_pages;
375 	fw_ring_buf->buf_count = buf_count;
376 	fw_ring_buf->as_nr = MCU_AS_NR;
377 
378 	*cpu_dump_base = fw_ring_buf->cpu_dump_base;
379 	*out_ring_buf =
380 		(struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
381 
382 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
383 	/* The dummy model needs the CPU mapping. */
384 	gpu_model_set_dummy_prfcnt_base_cpu(fw_ring_buf->cpu_dump_base, kbdev,
385 					    phys, num_pages);
386 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
387 
388 	return 0;
389 
390 mmu_insert_failed:
391 	vunmap(cpu_addr);
392 vmap_error:
393 	kbase_mem_pool_free_pages(
394 		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
395 		phys, false, false);
396 phys_mem_pool_alloc_error:
397 	kfree(page_list);
398 page_list_alloc_error:
399 	kfree(phys);
400 phys_alloc_error:
401 	kfree(fw_ring_buf);
402 	return -ENOMEM;
403 }
404 
kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_ring_buf * ring_buf,u32 buf_index_first,u32 buf_index_last,bool for_cpu)405 static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(
406 	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
407 	struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
408 	u32 buf_index_first, u32 buf_index_last, bool for_cpu)
409 {
410 	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
411 		(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
412 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
413 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
414 	size_t i;
415 	size_t pg_first;
416 	size_t pg_last;
417 	u64 start_address;
418 	u64 stop_address;
419 	u32 ring_buf_index_first;
420 	u32 ring_buf_index_last;
421 
422 	WARN_ON(!ctx);
423 	WARN_ON(!ring_buf);
424 
425 	/* The index arguments for this function form an inclusive, exclusive
426 	 * range.
427 	 * However, when masking back to the available buffers we will make this
428 	 * inclusive at both ends so full flushes are not 0 -> 0.
429 	 */
430 	ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1);
431 	ring_buf_index_last =
432 		(buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
433 
434 	/* The start address is the offset of the first buffer. */
435 	start_address = fw_ctx->buf_bytes * ring_buf_index_first;
436 	pg_first = start_address >> PAGE_SHIFT;
437 
438 	/* The stop address is the last byte in the final buffer. */
439 	stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1;
440 	pg_last = stop_address >> PAGE_SHIFT;
441 
442 	/* Check whether the buffer range wraps. */
443 	if (start_address > stop_address) {
444 		/* sync the first part to the end of ring buffer. */
445 		for (i = pg_first; i < fw_ring_buf->num_pages; i++) {
446 			struct page *pg = as_page(fw_ring_buf->phys[i]);
447 
448 			if (for_cpu) {
449 				kbase_sync_single_for_cpu(fw_ctx->kbdev,
450 							  kbase_dma_addr(pg),
451 							  PAGE_SIZE,
452 							  DMA_BIDIRECTIONAL);
453 			} else {
454 				kbase_sync_single_for_device(fw_ctx->kbdev,
455 							     kbase_dma_addr(pg),
456 							     PAGE_SIZE,
457 							     DMA_BIDIRECTIONAL);
458 			}
459 		}
460 
461 		/* second part starts from page 0. */
462 		pg_first = 0;
463 	}
464 
465 	for (i = pg_first; i <= pg_last; i++) {
466 		struct page *pg = as_page(fw_ring_buf->phys[i]);
467 
468 		if (for_cpu) {
469 			kbase_sync_single_for_cpu(fw_ctx->kbdev,
470 						  kbase_dma_addr(pg), PAGE_SIZE,
471 						  DMA_BIDIRECTIONAL);
472 		} else {
473 			kbase_sync_single_for_device(fw_ctx->kbdev,
474 						     kbase_dma_addr(pg),
475 						     PAGE_SIZE,
476 						     DMA_BIDIRECTIONAL);
477 		}
478 	}
479 }
480 
kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx * ctx)481 static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(
482 	struct kbase_hwcnt_backend_csf_if_ctx *ctx)
483 {
484 	CSTD_UNUSED(ctx);
485 	return ktime_get_raw_ns();
486 }
487 
kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_ring_buf * ring_buf)488 static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(
489 	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
490 	struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf)
491 {
492 	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
493 		(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
494 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
495 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
496 
497 	if (!fw_ring_buf)
498 		return;
499 
500 	if (fw_ring_buf->phys) {
501 		u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
502 
503 		WARN_ON(kbase_mmu_teardown_pages(
504 			fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
505 			gpu_va_base >> PAGE_SHIFT, fw_ring_buf->num_pages,
506 			MCU_AS_NR));
507 
508 		vunmap(fw_ring_buf->cpu_dump_base);
509 
510 		kbase_mem_pool_free_pages(
511 			&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
512 			fw_ring_buf->num_pages, fw_ring_buf->phys, false,
513 			false);
514 
515 		kfree(fw_ring_buf->phys);
516 
517 		kfree(fw_ring_buf);
518 	}
519 }
520 
kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_ring_buf * ring_buf,struct kbase_hwcnt_backend_csf_if_enable * enable)521 static void kbasep_hwcnt_backend_csf_if_fw_dump_enable(
522 	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
523 	struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
524 	struct kbase_hwcnt_backend_csf_if_enable *enable)
525 {
526 	u32 prfcnt_config;
527 	struct kbase_device *kbdev;
528 	struct kbase_csf_global_iface *global_iface;
529 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
530 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
531 	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
532 		(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
533 
534 	WARN_ON(!ctx);
535 	WARN_ON(!ring_buf);
536 	WARN_ON(!enable);
537 	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
538 
539 	kbdev = fw_ctx->kbdev;
540 	global_iface = &kbdev->csf.global_iface;
541 
542 	/* Configure */
543 	prfcnt_config = fw_ring_buf->buf_count;
544 	prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT;
545 
546 	/* Configure the ring buffer base address */
547 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID,
548 					fw_ring_buf->as_nr);
549 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO,
550 					fw_ring_buf->gpu_dump_base & U32_MAX);
551 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI,
552 					fw_ring_buf->gpu_dump_base >> 32);
553 
554 	/* Set extract position to 0 */
555 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0);
556 
557 	/* Configure the enable bitmap */
558 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN,
559 					enable->fe_bm);
560 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN,
561 					enable->shader_bm);
562 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN,
563 					enable->mmu_l2_bm);
564 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN,
565 					enable->tiler_bm);
566 
567 	/* Configure the HWC set and buffer size */
568 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG,
569 					prfcnt_config);
570 
571 	kbdev->csf.hwcnt.enable_pending = true;
572 
573 	/* Unmask the interrupts */
574 	kbase_csf_firmware_global_input_mask(
575 		global_iface, GLB_ACK_IRQ_MASK,
576 		GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK,
577 		GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
578 	kbase_csf_firmware_global_input_mask(
579 		global_iface, GLB_ACK_IRQ_MASK,
580 		GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK,
581 		GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
582 	kbase_csf_firmware_global_input_mask(
583 		global_iface, GLB_ACK_IRQ_MASK,
584 		GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK,
585 		GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
586 	kbase_csf_firmware_global_input_mask(
587 		global_iface, GLB_ACK_IRQ_MASK,
588 		GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK,
589 		GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK);
590 
591 	/* Enable the HWC */
592 	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
593 					     (1 << GLB_REQ_PRFCNT_ENABLE_SHIFT),
594 					     GLB_REQ_PRFCNT_ENABLE_MASK);
595 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
596 
597 	prfcnt_config = kbase_csf_firmware_global_input_read(global_iface,
598 							     GLB_PRFCNT_CONFIG);
599 
600 	kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx,
601 						 enable->clk_enable_map);
602 }
603 
kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx * ctx)604 static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(
605 	struct kbase_hwcnt_backend_csf_if_ctx *ctx)
606 {
607 	struct kbase_device *kbdev;
608 	struct kbase_csf_global_iface *global_iface;
609 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
610 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
611 
612 	WARN_ON(!ctx);
613 	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
614 
615 	kbdev = fw_ctx->kbdev;
616 	global_iface = &kbdev->csf.global_iface;
617 
618 	/* Disable the HWC */
619 	kbdev->csf.hwcnt.enable_pending = true;
620 	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0,
621 					     GLB_REQ_PRFCNT_ENABLE_MASK);
622 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
623 
624 	/* mask the interrupts */
625 	kbase_csf_firmware_global_input_mask(
626 		global_iface, GLB_ACK_IRQ_MASK, 0,
627 		GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
628 	kbase_csf_firmware_global_input_mask(
629 		global_iface, GLB_ACK_IRQ_MASK, 0,
630 		GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
631 	kbase_csf_firmware_global_input_mask(
632 		global_iface, GLB_ACK_IRQ_MASK, 0,
633 		GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
634 
635 	/* In case we have a previous request in flight when the disable
636 	 * happens.
637 	 */
638 	kbdev->csf.hwcnt.request_pending = false;
639 
640 	kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx);
641 }
642 
kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx * ctx)643 static void kbasep_hwcnt_backend_csf_if_fw_dump_request(
644 	struct kbase_hwcnt_backend_csf_if_ctx *ctx)
645 {
646 	u32 glb_req;
647 	struct kbase_device *kbdev;
648 	struct kbase_csf_global_iface *global_iface;
649 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
650 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
651 
652 	WARN_ON(!ctx);
653 	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
654 
655 	kbdev = fw_ctx->kbdev;
656 	global_iface = &kbdev->csf.global_iface;
657 
658 	/* Trigger dumping */
659 	kbdev->csf.hwcnt.request_pending = true;
660 	glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
661 	glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK;
662 	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req,
663 					     GLB_REQ_PRFCNT_SAMPLE_MASK);
664 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
665 }
666 
kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u32 * extract_index,u32 * insert_index)667 static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(
668 	struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index,
669 	u32 *insert_index)
670 {
671 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
672 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
673 
674 	WARN_ON(!ctx);
675 	WARN_ON(!extract_index);
676 	WARN_ON(!insert_index);
677 	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
678 
679 	*extract_index = kbase_csf_firmware_global_input_read(
680 		&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT);
681 	*insert_index = kbase_csf_firmware_global_output(
682 		&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT);
683 }
684 
kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u32 extract_idx)685 static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index(
686 	struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx)
687 {
688 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
689 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
690 
691 	WARN_ON(!ctx);
692 	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
693 
694 	/* Set the raw extract index to release the buffer back to the ring
695 	 * buffer.
696 	 */
697 	kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface,
698 					GLB_PRFCNT_EXTRACT, extract_idx);
699 }
700 
kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u64 * cycle_counts,u64 clk_enable_map)701 static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(
702 	struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts,
703 	u64 clk_enable_map)
704 {
705 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
706 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
707 	u8 clk;
708 	u64 timestamp_ns = ktime_get_raw_ns();
709 
710 	WARN_ON(!ctx);
711 	WARN_ON(!cycle_counts);
712 	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
713 
714 	for (clk = 0; clk < fw_ctx->clk_cnt; clk++) {
715 		if (!(clk_enable_map & (1ull << clk)))
716 			continue;
717 
718 		if (clk == KBASE_CLOCK_DOMAIN_TOP) {
719 			/* Read cycle count for top clock domain. */
720 			kbase_backend_get_gpu_time_norequest(
721 				fw_ctx->kbdev, &cycle_counts[clk], NULL, NULL);
722 		} else {
723 			/* Estimate cycle count for non-top clock domain. */
724 			cycle_counts[clk] = kbase_ccswe_cycle_at(
725 				&fw_ctx->ccswe_shader_cores, timestamp_ns);
726 		}
727 	}
728 }
729 
730 /**
731  * kbasep_hwcnt_backend_csf_if_fw_ctx_destroy() - Destroy a CSF FW interface context.
732  *
733  * @fw_ctx: Pointer to context to destroy.
734  */
kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx * fw_ctx)735 static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
736 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
737 {
738 	if (!fw_ctx)
739 		return;
740 
741 	kfree(fw_ctx);
742 }
743 
744 /**
745  * kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context.
746  *
747  * @kbdev:   Non_NULL pointer to kbase device.
748  * @out_ctx: Non-NULL pointer to where info is stored on success.
749  * Return: 0 on success, else error code.
750  */
kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device * kbdev,struct kbase_hwcnt_backend_csf_if_fw_ctx ** out_ctx)751 static int kbasep_hwcnt_backend_csf_if_fw_ctx_create(
752 	struct kbase_device *kbdev,
753 	struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx)
754 {
755 	u8 clk;
756 	int errcode = -ENOMEM;
757 	struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
758 
759 	WARN_ON(!kbdev);
760 	WARN_ON(!out_ctx);
761 
762 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
763 	if (!ctx)
764 		goto error;
765 
766 	ctx->kbdev = kbdev;
767 
768 	/* Determine the number of available clock domains. */
769 	for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
770 		if (kbdev->pm.clk_rtm.clks[clk] == NULL)
771 			break;
772 	}
773 	ctx->clk_cnt = clk;
774 
775 	ctx->clk_enable_map = 0;
776 	kbase_ccswe_init(&ctx->ccswe_shader_cores);
777 	ctx->rate_listener.notify =
778 		kbasep_hwcnt_backend_csf_if_fw_on_freq_change;
779 
780 	*out_ctx = ctx;
781 
782 	return 0;
783 error:
784 	kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx);
785 	return errcode;
786 }
787 
kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if * if_fw)788 void kbase_hwcnt_backend_csf_if_fw_destroy(
789 	struct kbase_hwcnt_backend_csf_if *if_fw)
790 {
791 	if (!if_fw)
792 		return;
793 
794 	kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
795 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx);
796 	memset(if_fw, 0, sizeof(*if_fw));
797 }
798 
kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device * kbdev,struct kbase_hwcnt_backend_csf_if * if_fw)799 int kbase_hwcnt_backend_csf_if_fw_create(
800 	struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw)
801 {
802 	int errcode;
803 	struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
804 
805 	if (!kbdev || !if_fw)
806 		return -EINVAL;
807 
808 	errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx);
809 	if (errcode)
810 		return errcode;
811 
812 	if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx;
813 	if_fw->assert_lock_held =
814 		kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
815 	if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock;
816 	if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock;
817 	if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info;
818 	if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc;
819 	if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync;
820 	if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free;
821 	if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns;
822 	if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable;
823 	if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable;
824 	if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request;
825 	if_fw->get_gpu_cycle_count =
826 		kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count;
827 	if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes;
828 	if_fw->set_extract_index =
829 		kbasep_hwcnt_backend_csf_if_fw_set_extract_index;
830 
831 	return 0;
832 }
833