• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 #include <mali_kbase.h>
23 #include <gpu/mali_kbase_gpu_fault.h>
24 #include <mali_kbase_reset_gpu.h>
25 #include "mali_kbase_csf.h"
26 #include "backend/gpu/mali_kbase_pm_internal.h"
27 #include <linux/export.h>
28 #include <linux/priority_control_manager.h>
29 #include <linux/shmem_fs.h>
30 #include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
31 #include "mali_kbase_csf_tiler_heap.h"
32 #include <mmu/mali_kbase_mmu.h>
33 #include "mali_kbase_csf_timeout.h"
34 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
35 #include <mali_kbase_hwaccess_time.h>
36 #include "mali_kbase_csf_event.h"
37 
38 #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
39 #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
40 #define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
41 
42 const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = {
43 	KBASE_QUEUE_GROUP_PRIORITY_HIGH,
44 	KBASE_QUEUE_GROUP_PRIORITY_MEDIUM,
45 	KBASE_QUEUE_GROUP_PRIORITY_LOW,
46 	KBASE_QUEUE_GROUP_PRIORITY_REALTIME
47 };
48 const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_COUNT] = {
49 	BASE_QUEUE_GROUP_PRIORITY_REALTIME,
50 	BASE_QUEUE_GROUP_PRIORITY_HIGH,
51 	BASE_QUEUE_GROUP_PRIORITY_MEDIUM,
52 	BASE_QUEUE_GROUP_PRIORITY_LOW
53 };
54 
put_user_pages_mmap_handle(struct kbase_context * kctx,struct kbase_queue * queue)55 static void put_user_pages_mmap_handle(struct kbase_context *kctx,
56 			struct kbase_queue *queue)
57 {
58 	unsigned long cookie_nr;
59 
60 	lockdep_assert_held(&kctx->csf.lock);
61 
62 	if (queue->handle == BASEP_MEM_INVALID_HANDLE)
63 		return;
64 
65 	cookie_nr =
66 		PFN_DOWN(queue->handle - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE);
67 
68 	if (!WARN_ON(kctx->csf.user_pages_info[cookie_nr] != queue)) {
69 		/* free up cookie */
70 		kctx->csf.user_pages_info[cookie_nr] = NULL;
71 		bitmap_set(kctx->csf.cookies, cookie_nr, 1);
72 	}
73 
74 	queue->handle = BASEP_MEM_INVALID_HANDLE;
75 }
76 
77 /* Reserve a cookie, to be returned as a handle to userspace for creating
78  * the CPU mapping of the pair of input/output pages and Hw doorbell page.
79  * Will return 0 in case of success otherwise negative on failure.
80  */
get_user_pages_mmap_handle(struct kbase_context * kctx,struct kbase_queue * queue)81 static int get_user_pages_mmap_handle(struct kbase_context *kctx,
82 			struct kbase_queue *queue)
83 {
84 	unsigned long cookie, cookie_nr;
85 
86 	lockdep_assert_held(&kctx->csf.lock);
87 
88 	if (bitmap_empty(kctx->csf.cookies,
89 				KBASE_CSF_NUM_USER_IO_PAGES_HANDLE)) {
90 		dev_err(kctx->kbdev->dev,
91 			"No csf cookies available for allocation!");
92 		return -ENOMEM;
93 	}
94 
95 	/* allocate a cookie */
96 	cookie_nr = find_first_bit(kctx->csf.cookies,
97 				KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
98 	if (kctx->csf.user_pages_info[cookie_nr]) {
99 		dev_err(kctx->kbdev->dev,
100 			"Inconsistent state of csf cookies!");
101 		return -EINVAL;
102 	}
103 	kctx->csf.user_pages_info[cookie_nr] = queue;
104 	bitmap_clear(kctx->csf.cookies, cookie_nr, 1);
105 
106 	/* relocate to correct base */
107 	cookie = cookie_nr + PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE);
108 	cookie <<= PAGE_SHIFT;
109 
110 	queue->handle = (u64)cookie;
111 
112 	return 0;
113 }
114 
gpu_munmap_user_io_pages(struct kbase_context * kctx,struct kbase_va_region * reg)115 static void gpu_munmap_user_io_pages(struct kbase_context *kctx,
116 			struct kbase_va_region *reg)
117 {
118 	size_t num_pages = 2;
119 
120 	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
121 				 reg->start_pfn, num_pages, MCU_AS_NR);
122 
123 	WARN_ON(reg->flags & KBASE_REG_FREE);
124 
125 	mutex_lock(&kctx->kbdev->csf.reg_lock);
126 	kbase_remove_va_region(kctx->kbdev, reg);
127 	mutex_unlock(&kctx->kbdev->csf.reg_lock);
128 }
129 
init_user_io_pages(struct kbase_queue * queue)130 static void init_user_io_pages(struct kbase_queue *queue)
131 {
132 	u32 *input_addr = (u32 *)(queue->user_io_addr);
133 	u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
134 
135 	input_addr[CS_INSERT_LO/4] = 0;
136 	input_addr[CS_INSERT_HI/4] = 0;
137 
138 	input_addr[CS_EXTRACT_INIT_LO/4] = 0;
139 	input_addr[CS_EXTRACT_INIT_HI/4] = 0;
140 
141 	output_addr[CS_EXTRACT_LO/4] = 0;
142 	output_addr[CS_EXTRACT_HI/4] = 0;
143 
144 	output_addr[CS_ACTIVE/4] = 0;
145 }
146 
147 /* Map the input/output pages in the shared interface segment of MCU firmware
148  * address space.
149  */
gpu_mmap_user_io_pages(struct kbase_device * kbdev,struct tagged_addr * phys,struct kbase_va_region * reg)150 static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
151 		struct tagged_addr *phys, struct kbase_va_region *reg)
152 {
153 	unsigned long mem_flags = KBASE_REG_GPU_RD;
154 	const size_t num_pages = 2;
155 	int ret;
156 
157 	/* Calls to this function are inherently asynchronous, with respect to
158 	 * MMU operations.
159 	 */
160 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
161 
162 #if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
163 		((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
164 		 (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
165 	mem_flags |=
166 		KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
167 #else
168 	if (kbdev->system_coherency == COHERENCY_NONE) {
169 		mem_flags |=
170 			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
171 	} else {
172 		mem_flags |= KBASE_REG_SHARE_BOTH |
173 			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
174 	}
175 #endif
176 
177 	mutex_lock(&kbdev->csf.reg_lock);
178 	ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1);
179 	reg->flags &= ~KBASE_REG_FREE;
180 	mutex_unlock(&kbdev->csf.reg_lock);
181 
182 	if (ret)
183 		return ret;
184 
185 	/* Map input page */
186 	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn,
187 				     &phys[0], 1, mem_flags, MCU_AS_NR,
188 				     KBASE_MEM_GROUP_CSF_IO, mmu_sync_info);
189 	if (ret)
190 		goto bad_insert;
191 
192 	/* Map output page, it needs rw access */
193 	mem_flags |= KBASE_REG_GPU_WR;
194 	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
195 				     reg->start_pfn + 1, &phys[1], 1, mem_flags,
196 				     MCU_AS_NR, KBASE_MEM_GROUP_CSF_IO,
197 				     mmu_sync_info);
198 	if (ret)
199 		goto bad_insert_output_page;
200 
201 	return 0;
202 
203 bad_insert_output_page:
204 	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu,
205 				 reg->start_pfn, 1, MCU_AS_NR);
206 bad_insert:
207 	mutex_lock(&kbdev->csf.reg_lock);
208 	kbase_remove_va_region(kbdev, reg);
209 	mutex_unlock(&kbdev->csf.reg_lock);
210 
211 	return ret;
212 }
213 
kernel_unmap_user_io_pages(struct kbase_context * kctx,struct kbase_queue * queue)214 static void kernel_unmap_user_io_pages(struct kbase_context *kctx,
215 			struct kbase_queue *queue)
216 {
217 	const size_t num_pages = 2;
218 
219 	kbase_gpu_vm_lock(kctx);
220 
221 	vunmap(queue->user_io_addr);
222 
223 	WARN_ON(num_pages > atomic_read(&kctx->permanent_mapped_pages));
224 	atomic_sub(num_pages, &kctx->permanent_mapped_pages);
225 
226 	kbase_gpu_vm_unlock(kctx);
227 }
228 
kernel_map_user_io_pages(struct kbase_context * kctx,struct kbase_queue * queue)229 static int kernel_map_user_io_pages(struct kbase_context *kctx,
230 			struct kbase_queue *queue)
231 {
232 	struct page *page_list[2];
233 	pgprot_t cpu_map_prot;
234 	int ret = 0;
235 	size_t i;
236 
237 	kbase_gpu_vm_lock(kctx);
238 
239 	if (ARRAY_SIZE(page_list) > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES -
240 			 atomic_read(&kctx->permanent_mapped_pages))) {
241 		ret = -ENOMEM;
242 		goto unlock;
243 	}
244 
245 	/* The pages are mapped to Userspace also, so use the same mapping
246 	 * attributes as used inside the CPU page fault handler.
247 	 */
248 #if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
249 		((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
250 		 (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
251 	cpu_map_prot = pgprot_device(PAGE_KERNEL);
252 #else
253 	if (kctx->kbdev->system_coherency == COHERENCY_NONE)
254 		cpu_map_prot = pgprot_writecombine(PAGE_KERNEL);
255 	else
256 		cpu_map_prot = PAGE_KERNEL;
257 #endif
258 
259 	for (i = 0; i < ARRAY_SIZE(page_list); i++)
260 		page_list[i] = as_page(queue->phys[i]);
261 
262 	queue->user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot);
263 
264 	if (!queue->user_io_addr)
265 		ret = -ENOMEM;
266 	else
267 		atomic_add(ARRAY_SIZE(page_list), &kctx->permanent_mapped_pages);
268 
269 unlock:
270 	kbase_gpu_vm_unlock(kctx);
271 	return ret;
272 }
273 
274 static void term_queue_group(struct kbase_queue_group *group);
275 static void get_queue(struct kbase_queue *queue);
276 static void release_queue(struct kbase_queue *queue);
277 
278 /**
279  * kbase_csf_free_command_stream_user_pages() - Free the resources allocated
280  *				    for a queue at the time of bind.
281  *
282  * @kctx:	Address of the kbase context within which the queue was created.
283  * @queue:	Pointer to the queue to be unlinked.
284  *
285  * This function will free the pair of physical pages allocated for a GPU
286  * command queue, and also release the hardware doorbell page, that were mapped
287  * into the process address space to enable direct submission of commands to
288  * the hardware. Also releases the reference taken on the queue when the mapping
289  * was created.
290  *
291  * This function will be called only when the mapping is being removed and
292  * so the resources for queue will not get freed up until the mapping is
293  * removed even though userspace could have terminated the queue.
294  * Kernel will ensure that the termination of Kbase context would only be
295  * triggered after the mapping is removed.
296  *
297  * If an explicit or implicit unbind was missed by the userspace then the
298  * mapping will persist. On process exit kernel itself will remove the mapping.
299  */
kbase_csf_free_command_stream_user_pages(struct kbase_context * kctx,struct kbase_queue * queue)300 static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
301 		struct kbase_queue *queue)
302 {
303 	const size_t num_pages = 2;
304 
305 	gpu_munmap_user_io_pages(kctx, queue->reg);
306 	kernel_unmap_user_io_pages(kctx, queue);
307 
308 	kbase_mem_pool_free_pages(
309 		&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
310 		num_pages, queue->phys, true, false);
311 
312 	kfree(queue->reg);
313 	queue->reg = NULL;
314 
315 	/* If the queue has already been terminated by userspace
316 	 * then the ref count for queue object will drop to 0 here.
317 	 */
318 	release_queue(queue);
319 }
320 
kbase_csf_alloc_command_stream_user_pages(struct kbase_context * kctx,struct kbase_queue * queue)321 int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
322 			struct kbase_queue *queue)
323 {
324 	struct kbase_device *kbdev = kctx->kbdev;
325 	struct kbase_va_region *reg;
326 	const size_t num_pages = 2;
327 	int ret;
328 
329 	lockdep_assert_held(&kctx->csf.lock);
330 
331 	reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
332 				      num_pages, KBASE_REG_ZONE_MCU_SHARED);
333 	if (!reg)
334 		return -ENOMEM;
335 
336 	ret = kbase_mem_pool_alloc_pages(
337 				&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
338 				num_pages, queue->phys, false);
339 
340 	if (ret != num_pages)
341 		goto phys_alloc_failed;
342 
343 	ret = kernel_map_user_io_pages(kctx, queue);
344 	if (ret)
345 		goto kernel_map_failed;
346 
347 	init_user_io_pages(queue);
348 
349 	ret = gpu_mmap_user_io_pages(kctx->kbdev, queue->phys, reg);
350 	if (ret)
351 		goto gpu_mmap_failed;
352 
353 	queue->reg = reg;
354 
355 	mutex_lock(&kbdev->csf.reg_lock);
356 	if (kbdev->csf.db_file_offsets >
357 			(U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
358 		kbdev->csf.db_file_offsets = 0;
359 
360 	queue->db_file_offset = kbdev->csf.db_file_offsets;
361 	kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES;
362 
363 	WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
364 	/* This is the second reference taken on the queue object and
365 	 * would be dropped only when the IO mapping is removed either
366 	 * explicitly by userspace or implicitly by kernel on process exit.
367 	 */
368 	get_queue(queue);
369 	queue->bind_state = KBASE_CSF_QUEUE_BOUND;
370 	mutex_unlock(&kbdev->csf.reg_lock);
371 
372 	return 0;
373 
374 gpu_mmap_failed:
375 	kernel_unmap_user_io_pages(kctx, queue);
376 
377 kernel_map_failed:
378 	kbase_mem_pool_free_pages(
379 		&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
380 		num_pages, queue->phys, false, false);
381 
382 phys_alloc_failed:
383 	kfree(reg);
384 
385 	return -ENOMEM;
386 }
387 
find_queue_group(struct kbase_context * kctx,u8 group_handle)388 static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx,
389 	u8 group_handle)
390 {
391 	uint index = group_handle;
392 
393 	lockdep_assert_held(&kctx->csf.lock);
394 
395 	if (index < MAX_QUEUE_GROUP_NUM && kctx->csf.queue_groups[index]) {
396 		if (WARN_ON(kctx->csf.queue_groups[index]->handle != index))
397 			return NULL;
398 		return kctx->csf.queue_groups[index];
399 	}
400 
401 	return NULL;
402 }
403 
kbase_csf_queue_group_handle_is_valid(struct kbase_context * kctx,u8 group_handle)404 int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx,
405 	u8 group_handle)
406 {
407 	struct kbase_queue_group *group;
408 
409 	mutex_lock(&kctx->csf.lock);
410 	group = find_queue_group(kctx, group_handle);
411 	mutex_unlock(&kctx->csf.lock);
412 
413 	return group ? 0 : -EINVAL;
414 }
415 
find_queue(struct kbase_context * kctx,u64 base_addr)416 static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr)
417 {
418 	struct kbase_queue *queue;
419 
420 	lockdep_assert_held(&kctx->csf.lock);
421 
422 	list_for_each_entry(queue, &kctx->csf.queue_list, link) {
423 		if (base_addr == queue->base_addr)
424 			return queue;
425 	}
426 
427 	return NULL;
428 }
429 
get_queue(struct kbase_queue * queue)430 static void get_queue(struct kbase_queue *queue)
431 {
432 	WARN_ON(!atomic_inc_not_zero(&queue->refcount));
433 }
434 
release_queue(struct kbase_queue * queue)435 static void release_queue(struct kbase_queue *queue)
436 {
437 	lockdep_assert_held(&queue->kctx->csf.lock);
438 
439 	WARN_ON(atomic_read(&queue->refcount) <= 0);
440 
441 	if (atomic_dec_and_test(&queue->refcount)) {
442 		/* The queue can't still be on the per context list. */
443 		WARN_ON(!list_empty(&queue->link));
444 		WARN_ON(queue->group);
445 		kfree(queue);
446 	}
447 }
448 
449 static void oom_event_worker(struct work_struct *data);
450 static void fatal_event_worker(struct work_struct *data);
451 
452 /* Between reg and reg_ex, one and only one must be null */
csf_queue_register_internal(struct kbase_context * kctx,struct kbase_ioctl_cs_queue_register * reg,struct kbase_ioctl_cs_queue_register_ex * reg_ex)453 static int csf_queue_register_internal(struct kbase_context *kctx,
454 			struct kbase_ioctl_cs_queue_register *reg,
455 			struct kbase_ioctl_cs_queue_register_ex *reg_ex)
456 {
457 	struct kbase_queue *queue;
458 	int ret = 0;
459 	struct kbase_va_region *region;
460 	u64 queue_addr;
461 	size_t queue_size;
462 
463 	/* Only one pointer expected, otherwise coding error */
464 	if ((reg == NULL && reg_ex == NULL) || (reg && reg_ex)) {
465 		dev_dbg(kctx->kbdev->dev,
466 			"Error, one and only one param-ptr expected!");
467 		return -EINVAL;
468 	}
469 
470 	/* struct kbase_ioctl_cs_queue_register_ex contains a full
471 	 * struct kbase_ioctl_cs_queue_register at the start address. So
472 	 * the pointer can be safely cast to pointing to a
473 	 * kbase_ioctl_cs_queue_register object.
474 	 */
475 	if (reg_ex)
476 		reg = (struct kbase_ioctl_cs_queue_register *)reg_ex;
477 
478 	/* Validate the queue priority */
479 	if (reg->priority > BASE_QUEUE_MAX_PRIORITY)
480 		return -EINVAL;
481 
482 	queue_addr = reg->buffer_gpu_addr;
483 	queue_size = reg->buffer_size >> PAGE_SHIFT;
484 
485 	mutex_lock(&kctx->csf.lock);
486 
487 	/* Check if queue is already registered */
488 	if (find_queue(kctx, queue_addr) != NULL) {
489 		ret = -EINVAL;
490 		goto out;
491 	}
492 
493 	/* Check if the queue address is valid */
494 	kbase_gpu_vm_lock(kctx);
495 	region = kbase_region_tracker_find_region_enclosing_address(kctx,
496 								    queue_addr);
497 
498 	if (kbase_is_region_invalid_or_free(region)) {
499 		ret = -ENOENT;
500 		goto out_unlock_vm;
501 	}
502 
503 	if (queue_size > (region->nr_pages -
504 			  ((queue_addr >> PAGE_SHIFT) - region->start_pfn))) {
505 		ret = -EINVAL;
506 		goto out_unlock_vm;
507 	}
508 
509 	/* Check address validity on cs_trace buffer etc. Don't care
510 	 * if not enabled (i.e. when size is 0).
511 	 */
512 	if (reg_ex && reg_ex->ex_buffer_size) {
513 		int buf_pages = (reg_ex->ex_buffer_size +
514 				 (1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT;
515 		struct kbase_va_region *region_ex =
516 			kbase_region_tracker_find_region_enclosing_address(kctx,
517 									   reg_ex->ex_buffer_base);
518 
519 		if (kbase_is_region_invalid_or_free(region_ex)) {
520 			ret = -ENOENT;
521 			goto out_unlock_vm;
522 		}
523 
524 		if (buf_pages > (region_ex->nr_pages -
525 				 ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - region_ex->start_pfn))) {
526 			ret = -EINVAL;
527 			goto out_unlock_vm;
528 		}
529 
530 		region_ex = kbase_region_tracker_find_region_enclosing_address(
531 			kctx, reg_ex->ex_offset_var_addr);
532 		if (kbase_is_region_invalid_or_free(region_ex)) {
533 			ret = -ENOENT;
534 			goto out_unlock_vm;
535 		}
536 	}
537 
538 	queue = kzalloc(sizeof(struct kbase_queue), GFP_KERNEL);
539 
540 	if (!queue) {
541 		ret = -ENOMEM;
542 		goto out_unlock_vm;
543 	}
544 
545 	queue->kctx = kctx;
546 	queue->base_addr = queue_addr;
547 	queue->queue_reg = region;
548 	queue->size = (queue_size << PAGE_SHIFT);
549 	queue->csi_index = KBASEP_IF_NR_INVALID;
550 	queue->enabled = false;
551 
552 	queue->priority = reg->priority;
553 	atomic_set(&queue->refcount, 1);
554 
555 	queue->group = NULL;
556 	queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
557 	queue->handle = BASEP_MEM_INVALID_HANDLE;
558 	queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
559 
560 	queue->status_wait = 0;
561 	queue->sync_ptr = 0;
562 	queue->sync_value = 0;
563 
564 	queue->sb_status = 0;
565 	queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
566 
567 	atomic_set(&queue->pending, 0);
568 
569 	INIT_LIST_HEAD(&queue->link);
570 	INIT_LIST_HEAD(&queue->error.link);
571 	INIT_WORK(&queue->oom_event_work, oom_event_worker);
572 	INIT_WORK(&queue->fatal_event_work, fatal_event_worker);
573 	list_add(&queue->link, &kctx->csf.queue_list);
574 
575 	region->flags |= KBASE_REG_NO_USER_FREE;
576 	region->user_data = queue;
577 
578 	/* Initialize the cs_trace configuration parameters, When buffer_size
579 	 * is 0, trace is disabled. Here we only update the fields when
580 	 * enabled, otherwise leave them as default zeros.
581 	 */
582 	if (reg_ex && reg_ex->ex_buffer_size) {
583 		u32 cfg = CS_INSTR_CONFIG_EVENT_SIZE_SET(
584 					0, reg_ex->ex_event_size);
585 		cfg = CS_INSTR_CONFIG_EVENT_STATE_SET(
586 					cfg, reg_ex->ex_event_state);
587 
588 		queue->trace_cfg = cfg;
589 		queue->trace_buffer_size = reg_ex->ex_buffer_size;
590 		queue->trace_buffer_base = reg_ex->ex_buffer_base;
591 		queue->trace_offset_ptr = reg_ex->ex_offset_var_addr;
592 	}
593 
594 out_unlock_vm:
595 	kbase_gpu_vm_unlock(kctx);
596 out:
597 	mutex_unlock(&kctx->csf.lock);
598 
599 	return ret;
600 }
601 
kbase_csf_queue_register(struct kbase_context * kctx,struct kbase_ioctl_cs_queue_register * reg)602 int kbase_csf_queue_register(struct kbase_context *kctx,
603 			     struct kbase_ioctl_cs_queue_register *reg)
604 {
605 	return csf_queue_register_internal(kctx, reg, NULL);
606 }
607 
kbase_csf_queue_register_ex(struct kbase_context * kctx,struct kbase_ioctl_cs_queue_register_ex * reg)608 int kbase_csf_queue_register_ex(struct kbase_context *kctx,
609 				struct kbase_ioctl_cs_queue_register_ex *reg)
610 {
611 	struct kbase_csf_global_iface const *const iface =
612 						&kctx->kbdev->csf.global_iface;
613 	u32 const glb_version = iface->version;
614 	u32 instr = iface->instr_features;
615 	u8 max_size = GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(instr);
616 	u32 min_buf_size = (1u << reg->ex_event_size) *
617 			GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr);
618 
619 	/* If cs_trace_command not supported, the call fails */
620 	if (glb_version < kbase_csf_interface_version(1, 1, 0))
621 		return -EINVAL;
622 
623 	/* Validate the cs_trace configuration parameters */
624         if (reg->ex_buffer_size &&
625             ((reg->ex_event_size > max_size) ||
626              (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
627              (reg->ex_buffer_size < min_buf_size)))
628           return -EINVAL;
629 
630         return csf_queue_register_internal(kctx, NULL, reg);
631 }
632 
633 static void unbind_queue(struct kbase_context *kctx,
634 		struct kbase_queue *queue);
635 
kbase_csf_queue_terminate(struct kbase_context * kctx,struct kbase_ioctl_cs_queue_terminate * term)636 void kbase_csf_queue_terminate(struct kbase_context *kctx,
637 			      struct kbase_ioctl_cs_queue_terminate *term)
638 {
639 	struct kbase_device *kbdev = kctx->kbdev;
640 	struct kbase_queue *queue;
641 	int err;
642 	bool reset_prevented = false;
643 
644 	err = kbase_reset_gpu_prevent_and_wait(kbdev);
645 	if (err)
646 		dev_warn(
647 			kbdev->dev,
648 			"Unsuccessful GPU reset detected when terminating queue (buffer_addr=0x%.16llx), attempting to terminate regardless",
649 			term->buffer_gpu_addr);
650 	else
651 		reset_prevented = true;
652 
653 	mutex_lock(&kctx->csf.lock);
654 	queue = find_queue(kctx, term->buffer_gpu_addr);
655 
656 	if (queue) {
657 		/* As the GPU queue has been terminated by the
658 		 * user space, undo the actions that were performed when the
659 		 * queue was registered i.e. remove the queue from the per
660 		 * context list & release the initial reference. The subsequent
661 		 * lookups for the queue in find_queue() would fail.
662 		 */
663 		list_del_init(&queue->link);
664 
665 		/* Stop the CSI to which queue was bound */
666 		unbind_queue(kctx, queue);
667 
668 		kbase_gpu_vm_lock(kctx);
669 		if (!WARN_ON(!queue->queue_reg)) {
670 			/* After this the Userspace would be able to free the
671 			 * memory for GPU queue. In case the Userspace missed
672 			 * terminating the queue, the cleanup will happen on
673 			 * context termination where tear down of region tracker
674 			 * would free up the GPU queue memory.
675 			 */
676 			queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE;
677 			queue->queue_reg->user_data = NULL;
678 		}
679 		kbase_gpu_vm_unlock(kctx);
680 
681 		dev_dbg(kctx->kbdev->dev,
682 			"Remove any pending command queue fatal from context %pK\n",
683 			(void *)kctx);
684 		kbase_csf_event_remove_error(kctx, &queue->error);
685 
686 		release_queue(queue);
687 	}
688 
689 	mutex_unlock(&kctx->csf.lock);
690 	if (reset_prevented)
691 		kbase_reset_gpu_allow(kbdev);
692 }
693 
kbase_csf_queue_bind(struct kbase_context * kctx,union kbase_ioctl_cs_queue_bind * bind)694 int kbase_csf_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_bind *bind)
695 {
696 	struct kbase_queue *queue;
697 	struct kbase_queue_group *group;
698 	u8 max_streams;
699 	int ret = -EINVAL;
700 
701 	mutex_lock(&kctx->csf.lock);
702 
703 	group = find_queue_group(kctx, bind->in.group_handle);
704 	queue = find_queue(kctx, bind->in.buffer_gpu_addr);
705 
706 	if (!group || !queue)
707 		goto out;
708 
709 	/* For the time being, all CSGs have the same number of CSs
710 	 * so we check CSG 0 for this number
711 	 */
712 	max_streams = kctx->kbdev->csf.global_iface.groups[0].stream_num;
713 
714 	if (bind->in.csi_index >= max_streams)
715 		goto out;
716 
717 	if (group->run_state == KBASE_CSF_GROUP_TERMINATED)
718 		goto out;
719 
720 	if (queue->group || group->bound_queues[bind->in.csi_index])
721 		goto out;
722 
723 	ret = get_user_pages_mmap_handle(kctx, queue);
724 	if (ret)
725 		goto out;
726 
727 	bind->out.mmap_handle = queue->handle;
728 	group->bound_queues[bind->in.csi_index] = queue;
729 	queue->group = group;
730 	queue->csi_index = bind->in.csi_index;
731 	queue->bind_state = KBASE_CSF_QUEUE_BIND_IN_PROGRESS;
732 
733 out:
734 	mutex_unlock(&kctx->csf.lock);
735 
736 	return ret;
737 }
738 
get_bound_queue_group(struct kbase_queue * queue)739 static struct kbase_queue_group *get_bound_queue_group(
740 					struct kbase_queue *queue)
741 {
742 	struct kbase_context *kctx = queue->kctx;
743 	struct kbase_queue_group *group;
744 
745 	if (queue->bind_state == KBASE_CSF_QUEUE_UNBOUND)
746 		return NULL;
747 
748 	if (!queue->group)
749 		return NULL;
750 
751 	if (queue->csi_index == KBASEP_IF_NR_INVALID) {
752 		dev_warn(kctx->kbdev->dev, "CS interface index is incorrect\n");
753 		return NULL;
754 	}
755 
756 	group = queue->group;
757 
758 	if (group->bound_queues[queue->csi_index] != queue) {
759 		dev_warn(kctx->kbdev->dev, "Incorrect mapping between queues & queue groups\n");
760 		return NULL;
761 	}
762 
763 	return group;
764 }
765 
766 /**
767  * pending_submission_worker() - Work item to process pending kicked GPU command queues.
768  *
769  * @work: Pointer to pending_submission_work.
770  *
771  * This function starts all pending queues, for which the work
772  * was previously submitted via ioctl call from application thread.
773  * If the queue is already scheduled and resident, it will be started
774  * right away, otherwise once the group is made resident.
775  */
pending_submission_worker(struct work_struct * work)776 static void pending_submission_worker(struct work_struct *work)
777 {
778 	struct kbase_context *kctx =
779 		container_of(work, struct kbase_context, csf.pending_submission_work);
780 	struct kbase_device *kbdev = kctx->kbdev;
781 	struct kbase_queue *queue;
782 	int err = kbase_reset_gpu_prevent_and_wait(kbdev);
783 
784 	if (err) {
785 		dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue ");
786 		return;
787 	}
788 
789 	mutex_lock(&kctx->csf.lock);
790 
791 	/* Iterate through the queue list and schedule the pending ones for submission. */
792 	list_for_each_entry(queue, &kctx->csf.queue_list, link) {
793 		if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) {
794 			struct kbase_queue_group *group = get_bound_queue_group(queue);
795 
796 			if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)
797 				dev_dbg(kbdev->dev, "queue is not bound to a group");
798 			else
799 				WARN_ON(kbase_csf_scheduler_queue_start(queue));
800 		}
801 	}
802 
803 	mutex_unlock(&kctx->csf.lock);
804 
805 	kbase_reset_gpu_allow(kbdev);
806 }
807 
kbase_csf_ring_csg_doorbell(struct kbase_device * kbdev,int slot)808 void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot)
809 {
810 	if (WARN_ON(slot < 0))
811 		return;
812 
813 	kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot));
814 }
815 
kbase_csf_ring_csg_slots_doorbell(struct kbase_device * kbdev,u32 slot_bitmap)816 void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
817 				       u32 slot_bitmap)
818 {
819 	const struct kbase_csf_global_iface *const global_iface =
820 		&kbdev->csf.global_iface;
821 	const u32 allowed_bitmap =
822 		(u32) ((1U << kbdev->csf.global_iface.group_num) - 1);
823 	u32 value;
824 
825 	if (WARN_ON(slot_bitmap > allowed_bitmap))
826 		return;
827 
828 	value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK);
829 	value ^= slot_bitmap;
830 	kbase_csf_firmware_global_input_mask(global_iface, GLB_DB_REQ, value,
831 					     slot_bitmap);
832 
833 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
834 }
835 
kbase_csf_ring_cs_user_doorbell(struct kbase_device * kbdev,struct kbase_queue * queue)836 void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev,
837 			struct kbase_queue *queue)
838 {
839 	mutex_lock(&kbdev->csf.reg_lock);
840 
841 	if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID)
842 		kbase_csf_ring_doorbell(kbdev, queue->doorbell_nr);
843 
844 	mutex_unlock(&kbdev->csf.reg_lock);
845 }
846 
kbase_csf_ring_cs_kernel_doorbell(struct kbase_device * kbdev,int csi_index,int csg_nr,bool ring_csg_doorbell)847 void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
848 				       int csi_index, int csg_nr,
849 				       bool ring_csg_doorbell)
850 {
851 	struct kbase_csf_cmd_stream_group_info *ginfo;
852 	u32 value;
853 
854 	if (WARN_ON(csg_nr < 0) ||
855 	    WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
856 		return;
857 
858 	ginfo = &kbdev->csf.global_iface.groups[csg_nr];
859 
860 	if (WARN_ON(csi_index < 0) ||
861 	    WARN_ON(csi_index >= ginfo->stream_num))
862 		return;
863 
864 	value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK);
865 	value ^= (1 << csi_index);
866 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value,
867 					  1 << csi_index);
868 
869 	if (likely(ring_csg_doorbell))
870 		kbase_csf_ring_csg_doorbell(kbdev, csg_nr);
871 }
872 
enqueue_gpu_submission_work(struct kbase_context * const kctx)873 static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
874 {
875 	queue_work(system_highpri_wq, &kctx->csf.pending_submission_work);
876 }
877 
kbase_csf_queue_kick(struct kbase_context * kctx,struct kbase_ioctl_cs_queue_kick * kick)878 int kbase_csf_queue_kick(struct kbase_context *kctx,
879 			 struct kbase_ioctl_cs_queue_kick *kick)
880 {
881 	struct kbase_device *kbdev = kctx->kbdev;
882 	bool trigger_submission = false;
883 	struct kbase_va_region *region;
884 	int err = 0;
885 
886 	/* GPU work submission happening asynchronously to prevent the contention with
887 	 * scheduler lock and as the result blocking application thread. For this reason,
888 	 * the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr
889 	 * from the context list of active va_regions.
890 	 * Once the target queue is found the pending flag is set to one atomically avoiding
891 	 * a race between submission ioctl thread and the work item.
892 	 */
893 	kbase_gpu_vm_lock(kctx);
894 	region = kbase_region_tracker_find_region_enclosing_address(kctx, kick->buffer_gpu_addr);
895 	if (!kbase_is_region_invalid_or_free(region)) {
896 		struct kbase_queue *queue = region->user_data;
897 
898 		if (queue) {
899 			atomic_cmpxchg(&queue->pending, 0, 1);
900 			trigger_submission = true;
901 		}
902 	} else {
903 		dev_dbg(kbdev->dev,
904 			"Attempt to kick GPU queue without a valid command buffer region");
905 		err = -EFAULT;
906 	}
907 	kbase_gpu_vm_unlock(kctx);
908 
909 	if (likely(trigger_submission))
910 		enqueue_gpu_submission_work(kctx);
911 
912 	return err;
913 }
914 
unbind_stopped_queue(struct kbase_context * kctx,struct kbase_queue * queue)915 static void unbind_stopped_queue(struct kbase_context *kctx,
916 			struct kbase_queue *queue)
917 {
918 	lockdep_assert_held(&kctx->csf.lock);
919 
920 	if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) {
921 		unsigned long flags;
922 
923 		kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags);
924 		bitmap_clear(queue->group->protm_pending_bitmap,
925 				queue->csi_index, 1);
926 		KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, PROTM_PENDING_CLEAR,
927 			 queue->group, queue, queue->group->protm_pending_bitmap[0]);
928 		queue->group->bound_queues[queue->csi_index] = NULL;
929 		queue->group = NULL;
930 		kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
931 
932 		put_user_pages_mmap_handle(kctx, queue);
933 		queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
934 	}
935 }
936 /**
937  * unbind_queue() - Remove the linkage between a GPU command queue and the group
938  *		    to which it was bound or being bound.
939  *
940  * @kctx:	Address of the kbase context within which the queue was created.
941  * @queue:	Pointer to the queue to be unlinked.
942  *
943  * This function will also send the stop request to firmware for the CS
944  * if the group to which the GPU command queue was bound is scheduled.
945  *
946  * This function would be called when :-
947  * - queue is being unbound. This would happen when the IO mapping
948  *   created on bind is removed explicitly by userspace or the process
949  *   is getting exited.
950  * - queue group is being terminated which still has queues bound
951  *   to it. This could happen on an explicit terminate request from userspace
952  *   or when the kbase context is being terminated.
953  * - queue is being terminated without completing the bind operation.
954  *   This could happen if either the queue group is terminated
955  *   after the CS_QUEUE_BIND ioctl but before the 2nd part of bind operation
956  *   to create the IO mapping is initiated.
957  * - There is a failure in executing the 2nd part of bind operation, inside the
958  *   mmap handler, which creates the IO mapping for queue.
959  */
960 
unbind_queue(struct kbase_context * kctx,struct kbase_queue * queue)961 static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue)
962 {
963 	kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev);
964 	lockdep_assert_held(&kctx->csf.lock);
965 
966 	if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) {
967 		if (queue->bind_state == KBASE_CSF_QUEUE_BOUND)
968 			kbase_csf_scheduler_queue_stop(queue);
969 
970 		unbind_stopped_queue(kctx, queue);
971 	}
972 }
973 
kbase_csf_queue_unbind(struct kbase_queue * queue)974 void kbase_csf_queue_unbind(struct kbase_queue *queue)
975 {
976 	struct kbase_context *kctx = queue->kctx;
977 
978 	lockdep_assert_held(&kctx->csf.lock);
979 
980 	/* As the process itself is exiting, the termination of queue group can
981 	 * be done which would be much faster than stopping of individual
982 	 * queues. This would ensure a faster exit for the process especially
983 	 * in the case where CSI gets stuck.
984 	 * The CSI STOP request will wait for the in flight work to drain
985 	 * whereas CSG TERM request would result in an immediate abort or
986 	 * cancellation of the pending work.
987 	 */
988 	if (current->flags & PF_EXITING) {
989 		struct kbase_queue_group *group = get_bound_queue_group(queue);
990 
991 		if (group)
992 			term_queue_group(group);
993 
994 		WARN_ON(queue->bind_state != KBASE_CSF_QUEUE_UNBOUND);
995 	} else {
996 		unbind_queue(kctx, queue);
997 	}
998 
999 	/* Free the resources, if allocated for this queue. */
1000 	if (queue->reg)
1001 		kbase_csf_free_command_stream_user_pages(kctx, queue);
1002 }
1003 
kbase_csf_queue_unbind_stopped(struct kbase_queue * queue)1004 void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue)
1005 {
1006 	struct kbase_context *kctx = queue->kctx;
1007 
1008 	lockdep_assert_held(&kctx->csf.lock);
1009 
1010 	WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND);
1011 	unbind_stopped_queue(kctx, queue);
1012 
1013 	/* Free the resources, if allocated for this queue. */
1014 	if (queue->reg)
1015 		kbase_csf_free_command_stream_user_pages(kctx, queue);
1016 }
1017 
1018 /**
1019  * find_free_group_handle() - Find a free handle for a queue group
1020  *
1021  * @kctx: Address of the kbase context within which the queue group
1022  *        is to be created.
1023  *
1024  * Return: a queue group handle on success, or a negative error code on failure.
1025  */
find_free_group_handle(struct kbase_context * const kctx)1026 static int find_free_group_handle(struct kbase_context *const kctx)
1027 {
1028 	/* find the available index in the array of CSGs per this context */
1029 	int idx, group_handle = -ENOMEM;
1030 
1031 	lockdep_assert_held(&kctx->csf.lock);
1032 
1033 	for (idx = 0;
1034 		(idx != MAX_QUEUE_GROUP_NUM) && (group_handle < 0);
1035 		idx++) {
1036 		if (!kctx->csf.queue_groups[idx])
1037 			group_handle = idx;
1038 	}
1039 
1040 	return group_handle;
1041 }
1042 
1043 /**
1044  * iface_has_enough_streams() - Check that at least one CSG supports
1045  *                              a given number of CS
1046  *
1047  * @kbdev:  Instance of a GPU platform device that implements a CSF interface.
1048  * @cs_min: Minimum number of CSs required.
1049  *
1050  * Return: true if at least one CSG supports the given number
1051  *         of CSs (or more); otherwise false.
1052  */
iface_has_enough_streams(struct kbase_device * const kbdev,u32 const cs_min)1053 static bool iface_has_enough_streams(struct kbase_device *const kbdev,
1054 	u32 const cs_min)
1055 {
1056 	bool has_enough = false;
1057 	struct kbase_csf_cmd_stream_group_info *const groups =
1058 		kbdev->csf.global_iface.groups;
1059 	const u32 group_num = kbdev->csf.global_iface.group_num;
1060 	u32 i;
1061 
1062 	for (i = 0; (i < group_num) && !has_enough; i++) {
1063 		if (groups[i].stream_num >= cs_min)
1064 			has_enough = true;
1065 	}
1066 
1067 	return has_enough;
1068 }
1069 
1070 /**
1071  * create_normal_suspend_buffer() - Create normal-mode suspend buffer per
1072  *					queue group
1073  *
1074  * @kctx:	Pointer to kbase context where the queue group is created at
1075  * @s_buf:	Pointer to suspend buffer that is attached to queue group
1076  *
1077  * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
1078  *         MMU page table. Otherwise -ENOMEM.
1079  */
create_normal_suspend_buffer(struct kbase_context * const kctx,struct kbase_normal_suspend_buffer * s_buf)1080 static int create_normal_suspend_buffer(struct kbase_context *const kctx,
1081 		struct kbase_normal_suspend_buffer *s_buf)
1082 {
1083 	struct kbase_va_region *reg = NULL;
1084 	const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
1085 	const size_t nr_pages =
1086 		PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
1087 	int err = 0;
1088 
1089 	/* Calls to this function are inherently asynchronous, with respect to
1090 	 * MMU operations.
1091 	 */
1092 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
1093 
1094 	lockdep_assert_held(&kctx->csf.lock);
1095 
1096 	/* Allocate and initialize Region Object */
1097 	reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
1098 			nr_pages, KBASE_REG_ZONE_MCU_SHARED);
1099 
1100 	if (!reg)
1101 		return -ENOMEM;
1102 
1103 	s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL);
1104 
1105 	if (!s_buf->phy) {
1106 		err = -ENOMEM;
1107 		goto phy_alloc_failed;
1108 	}
1109 
1110 	/* Get physical page for a normal suspend buffer */
1111 	err = kbase_mem_pool_alloc_pages(
1112 			&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
1113 			nr_pages, &s_buf->phy[0], false);
1114 
1115 	if (err < 0)
1116 		goto phy_pages_alloc_failed;
1117 
1118 	/* Insert Region Object into rbtree and make virtual address available
1119 	 * to map it to physical page
1120 	 */
1121 	mutex_lock(&kctx->kbdev->csf.reg_lock);
1122 	err = kbase_add_va_region_rbtree(kctx->kbdev, reg, 0, nr_pages, 1);
1123 	reg->flags &= ~KBASE_REG_FREE;
1124 	mutex_unlock(&kctx->kbdev->csf.reg_lock);
1125 
1126 	if (err)
1127 		goto add_va_region_failed;
1128 
1129 	/* Update MMU table */
1130 	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
1131 				     reg->start_pfn, &s_buf->phy[0], nr_pages,
1132 				     mem_flags, MCU_AS_NR,
1133 				     KBASE_MEM_GROUP_CSF_FW, mmu_sync_info);
1134 	if (err)
1135 		goto mmu_insert_failed;
1136 
1137 	s_buf->reg = reg;
1138 
1139 	return 0;
1140 
1141 mmu_insert_failed:
1142 	mutex_lock(&kctx->kbdev->csf.reg_lock);
1143 	kbase_remove_va_region(kctx->kbdev, reg);
1144 	mutex_unlock(&kctx->kbdev->csf.reg_lock);
1145 
1146 add_va_region_failed:
1147 	kbase_mem_pool_free_pages(
1148 		&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
1149 		&s_buf->phy[0], false, false);
1150 
1151 phy_pages_alloc_failed:
1152 	kfree(s_buf->phy);
1153 phy_alloc_failed:
1154 	kfree(reg);
1155 
1156 	return err;
1157 }
1158 
1159 /**
1160  * create_protected_suspend_buffer() - Create protected-mode suspend buffer
1161  *					per queue group
1162  *
1163  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
1164  * @s_buf: Pointer to suspend buffer that is attached to queue group
1165  *
1166  * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
1167  *         MMU page table. Otherwise -ENOMEM.
1168  */
create_protected_suspend_buffer(struct kbase_device * const kbdev,struct kbase_protected_suspend_buffer * s_buf)1169 static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
1170 		struct kbase_protected_suspend_buffer *s_buf)
1171 {
1172 	struct kbase_va_region *reg = NULL;
1173 	struct tagged_addr *phys = NULL;
1174 	const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
1175 	const size_t nr_pages =
1176 		PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
1177 	int err = 0;
1178 
1179 	/* Calls to this function are inherently asynchronous, with respect to
1180 	 * MMU operations.
1181 	 */
1182 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
1183 
1184 	/* Allocate and initialize Region Object */
1185 	reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
1186 			nr_pages, KBASE_REG_ZONE_MCU_SHARED);
1187 
1188 	if (!reg)
1189 		return -ENOMEM;
1190 
1191 	phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
1192 	if (!phys) {
1193 		err = -ENOMEM;
1194 		goto phy_alloc_failed;
1195 	}
1196 
1197 	s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys,
1198 			nr_pages);
1199 	if (s_buf->pma == NULL) {
1200 		err = -ENOMEM;
1201 		goto pma_alloc_failed;
1202 	}
1203 
1204 	/* Insert Region Object into rbtree and make virtual address available
1205 	 * to map it to physical page
1206 	 */
1207 	mutex_lock(&kbdev->csf.reg_lock);
1208 	err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1);
1209 	reg->flags &= ~KBASE_REG_FREE;
1210 	mutex_unlock(&kbdev->csf.reg_lock);
1211 
1212 	if (err)
1213 		goto add_va_region_failed;
1214 
1215 	/* Update MMU table */
1216 	err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn,
1217 				     phys, nr_pages, mem_flags, MCU_AS_NR,
1218 				     KBASE_MEM_GROUP_CSF_FW, mmu_sync_info);
1219 	if (err)
1220 		goto mmu_insert_failed;
1221 
1222 	s_buf->reg = reg;
1223 	kfree(phys);
1224 	return 0;
1225 
1226 mmu_insert_failed:
1227 	mutex_lock(&kbdev->csf.reg_lock);
1228 	kbase_remove_va_region(kbdev, reg);
1229 	mutex_unlock(&kbdev->csf.reg_lock);
1230 
1231 add_va_region_failed:
1232 	kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
1233 pma_alloc_failed:
1234 	kfree(phys);
1235 phy_alloc_failed:
1236 	kfree(reg);
1237 
1238 	return err;
1239 }
1240 
1241 static void timer_event_worker(struct work_struct *data);
1242 static void protm_event_worker(struct work_struct *data);
1243 static void term_normal_suspend_buffer(struct kbase_context *const kctx,
1244 		struct kbase_normal_suspend_buffer *s_buf);
1245 
1246 /**
1247  * create_suspend_buffers - Setup normal and protected mode
1248  *				suspend buffers.
1249  *
1250  * @kctx:	Address of the kbase context within which the queue group
1251  *		is to be created.
1252  * @group:	Pointer to GPU command queue group data.
1253  *
1254  * Return: 0 if suspend buffers are successfully allocated. Otherwise -ENOMEM.
1255  */
create_suspend_buffers(struct kbase_context * const kctx,struct kbase_queue_group * const group)1256 static int create_suspend_buffers(struct kbase_context *const kctx,
1257 		struct kbase_queue_group * const group)
1258 {
1259 	int err = 0;
1260 
1261 	if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) {
1262 		dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n");
1263 		return -ENOMEM;
1264 	}
1265 
1266 	if (kctx->kbdev->csf.pma_dev) {
1267 		err = create_protected_suspend_buffer(kctx->kbdev,
1268 				&group->protected_suspend_buf);
1269 		if (err) {
1270 			term_normal_suspend_buffer(kctx,
1271 					&group->normal_suspend_buf);
1272 			dev_err(kctx->kbdev->dev, "Failed to create protected suspend buffer\n");
1273 		}
1274 	} else {
1275 		group->protected_suspend_buf.reg = NULL;
1276 	}
1277 
1278 	return err;
1279 }
1280 
1281 /**
1282  * generate_group_uid() - Makes an ID unique to all kernel base devices
1283  *                        and contexts, for a queue group and CSG.
1284  *
1285  * Return:      A unique ID in the form of an unsigned 32-bit integer
1286  */
generate_group_uid(void)1287 static u32 generate_group_uid(void)
1288 {
1289 	static atomic_t global_csg_uid = ATOMIC_INIT(0);
1290 
1291 	return (u32)atomic_inc_return(&global_csg_uid);
1292 }
1293 
1294 /**
1295  * create_queue_group() - Create a queue group
1296  *
1297  * @kctx:	Address of the kbase context within which the queue group
1298  *		is to be created.
1299  * @create:	Address of a structure which contains details of the
1300  *		queue group which is to be created.
1301  *
1302  * Return: a queue group handle on success, or a negative error code on failure.
1303  */
create_queue_group(struct kbase_context * const kctx,union kbase_ioctl_cs_queue_group_create * const create)1304 static int create_queue_group(struct kbase_context *const kctx,
1305 	union kbase_ioctl_cs_queue_group_create *const create)
1306 {
1307 	int group_handle = find_free_group_handle(kctx);
1308 
1309 	if (group_handle < 0) {
1310 		dev_dbg(kctx->kbdev->dev,
1311 			"All queue group handles are already in use");
1312 	} else {
1313 		struct kbase_queue_group * const group =
1314 			kmalloc(sizeof(struct kbase_queue_group),
1315 					GFP_KERNEL);
1316 
1317 		lockdep_assert_held(&kctx->csf.lock);
1318 
1319 		if (!group) {
1320 			dev_err(kctx->kbdev->dev, "Failed to allocate a queue\n");
1321 			group_handle = -ENOMEM;
1322 		} else {
1323 			int err = 0;
1324 
1325 			group->kctx = kctx;
1326 			group->handle = group_handle;
1327 			group->csg_nr = KBASEP_CSG_NR_INVALID;
1328 
1329 			group->tiler_mask = create->in.tiler_mask;
1330 			group->fragment_mask = create->in.fragment_mask;
1331 			group->compute_mask = create->in.compute_mask;
1332 
1333 			group->tiler_max = create->in.tiler_max;
1334 			group->fragment_max = create->in.fragment_max;
1335 			group->compute_max = create->in.compute_max;
1336 			group->priority = kbase_csf_priority_queue_group_priority_to_relative(
1337 				kbase_csf_priority_check(kctx->kbdev, create->in.priority));
1338 			group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
1339 			group->faulted = false;
1340 
1341 
1342 			group->group_uid = generate_group_uid();
1343 			create->out.group_uid = group->group_uid;
1344 
1345 			INIT_LIST_HEAD(&group->link);
1346 			INIT_LIST_HEAD(&group->link_to_schedule);
1347 			INIT_LIST_HEAD(&group->error_fatal.link);
1348 			INIT_LIST_HEAD(&group->error_timeout.link);
1349 			INIT_LIST_HEAD(&group->error_tiler_oom.link);
1350 			INIT_WORK(&group->timer_event_work, timer_event_worker);
1351 			INIT_WORK(&group->protm_event_work, protm_event_worker);
1352 			bitmap_zero(group->protm_pending_bitmap,
1353 					MAX_SUPPORTED_STREAMS_PER_GROUP);
1354 
1355 			group->run_state = KBASE_CSF_GROUP_INACTIVE;
1356 			err = create_suspend_buffers(kctx, group);
1357 
1358 			if (err < 0) {
1359 				kfree(group);
1360 				group_handle = err;
1361 			} else {
1362 				int j;
1363 
1364 				kctx->csf.queue_groups[group_handle] = group;
1365 				for (j = 0; j < MAX_SUPPORTED_STREAMS_PER_GROUP;
1366 						j++)
1367 					group->bound_queues[j] = NULL;
1368 			}
1369 		}
1370 	}
1371 
1372 	return group_handle;
1373 }
1374 
1375 
kbase_csf_queue_group_create(struct kbase_context * const kctx,union kbase_ioctl_cs_queue_group_create * const create)1376 int kbase_csf_queue_group_create(struct kbase_context *const kctx,
1377 			union kbase_ioctl_cs_queue_group_create *const create)
1378 {
1379 	int err = 0;
1380 	const u32 tiler_count = hweight64(create->in.tiler_mask);
1381 	const u32 fragment_count = hweight64(create->in.fragment_mask);
1382 	const u32 compute_count = hweight64(create->in.compute_mask);
1383 
1384 	mutex_lock(&kctx->csf.lock);
1385 
1386 	if ((create->in.tiler_max > tiler_count) ||
1387 	    (create->in.fragment_max > fragment_count) ||
1388 	    (create->in.compute_max > compute_count)) {
1389 		dev_dbg(kctx->kbdev->dev,
1390 			"Invalid maximum number of endpoints for a queue group");
1391 		err = -EINVAL;
1392 	} else if (create->in.priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) {
1393 		dev_dbg(kctx->kbdev->dev, "Invalid queue group priority %u",
1394 			(unsigned int)create->in.priority);
1395 		err = -EINVAL;
1396 	} else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) {
1397 		dev_dbg(kctx->kbdev->dev,
1398 			"No CSG has at least %d CSs",
1399 			create->in.cs_min);
1400 		err = -EINVAL;
1401 	} else if (create->in.reserved) {
1402 		dev_warn(kctx->kbdev->dev, "Reserved field was set to non-0");
1403 		err = -EINVAL;
1404 	} else {
1405 		/* For the CSG which satisfies the condition for having
1406 		 * the needed number of CSs, check whether it also conforms
1407 		 * with the requirements for at least one of its CSs having
1408 		 * the iterator of the needed type
1409 		 * (note: for CSF v1.0 all CSs in a CSG will have access to
1410 		 * the same iterators)
1411 		 */
1412 		const int group_handle = create_queue_group(kctx, create);
1413 
1414 		if (group_handle >= 0)
1415 			create->out.group_handle = group_handle;
1416 		else
1417 			err = group_handle;
1418 	}
1419 
1420 	mutex_unlock(&kctx->csf.lock);
1421 
1422 	return err;
1423 }
1424 
1425 /**
1426  * term_normal_suspend_buffer() - Free normal-mode suspend buffer of queue group
1427  *
1428  * @kctx:	Pointer to kbase context where queue group belongs to
1429  * @s_buf:	Pointer to queue group suspend buffer to be freed
1430  */
term_normal_suspend_buffer(struct kbase_context * const kctx,struct kbase_normal_suspend_buffer * s_buf)1431 static void term_normal_suspend_buffer(struct kbase_context *const kctx,
1432 		struct kbase_normal_suspend_buffer *s_buf)
1433 {
1434 	const size_t nr_pages =
1435 		PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
1436 
1437 	lockdep_assert_held(&kctx->csf.lock);
1438 
1439 	WARN_ON(kbase_mmu_teardown_pages(
1440 				kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
1441 				s_buf->reg->start_pfn, nr_pages, MCU_AS_NR));
1442 
1443 	WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
1444 
1445 	mutex_lock(&kctx->kbdev->csf.reg_lock);
1446 	kbase_remove_va_region(kctx->kbdev, s_buf->reg);
1447 	mutex_unlock(&kctx->kbdev->csf.reg_lock);
1448 
1449 	kbase_mem_pool_free_pages(
1450 			&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
1451 			nr_pages, &s_buf->phy[0], false, false);
1452 
1453 	kfree(s_buf->phy);
1454 	s_buf->phy = NULL;
1455 	kfree(s_buf->reg);
1456 	s_buf->reg = NULL;
1457 }
1458 
1459 /**
1460  * term_protected_suspend_buffer() - Free normal-mode suspend buffer of
1461  *					queue group
1462  *
1463  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
1464  * @s_buf: Pointer to queue group suspend buffer to be freed
1465  */
term_protected_suspend_buffer(struct kbase_device * const kbdev,struct kbase_protected_suspend_buffer * s_buf)1466 static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
1467 		struct kbase_protected_suspend_buffer *s_buf)
1468 {
1469 	const size_t nr_pages =
1470 		PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
1471 
1472 	WARN_ON(kbase_mmu_teardown_pages(
1473 			kbdev, &kbdev->csf.mcu_mmu,
1474 			s_buf->reg->start_pfn, nr_pages, MCU_AS_NR));
1475 
1476 	WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
1477 
1478 	mutex_lock(&kbdev->csf.reg_lock);
1479 	kbase_remove_va_region(kbdev, s_buf->reg);
1480 	mutex_unlock(&kbdev->csf.reg_lock);
1481 
1482 	kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
1483 	s_buf->pma = NULL;
1484 	kfree(s_buf->reg);
1485 	s_buf->reg = NULL;
1486 }
1487 
kbase_csf_term_descheduled_queue_group(struct kbase_queue_group * group)1488 void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
1489 {
1490 	struct kbase_context *kctx = group->kctx;
1491 
1492 	/* Currently each group supports the same number of CS */
1493 	u32 max_streams =
1494 		kctx->kbdev->csf.global_iface.groups[0].stream_num;
1495 	u32 i;
1496 
1497 	lockdep_assert_held(&kctx->csf.lock);
1498 
1499 	WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE &&
1500 		group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED);
1501 
1502 	for (i = 0; i < max_streams; i++) {
1503 		struct kbase_queue *queue =
1504 				group->bound_queues[i];
1505 
1506 		/* The group is already being evicted from the scheduler */
1507 		if (queue)
1508 			unbind_stopped_queue(kctx, queue);
1509 	}
1510 
1511 	term_normal_suspend_buffer(kctx, &group->normal_suspend_buf);
1512 	if (kctx->kbdev->csf.pma_dev)
1513 		term_protected_suspend_buffer(kctx->kbdev,
1514 			&group->protected_suspend_buf);
1515 
1516 	group->run_state = KBASE_CSF_GROUP_TERMINATED;
1517 }
1518 
1519 /**
1520  * term_queue_group - Terminate a GPU command queue group.
1521  *
1522  * @group: Pointer to GPU command queue group data.
1523  *
1524  * Terminates a GPU command queue group. From the userspace perspective the
1525  * group will still exist but it can't bind new queues to it. Userspace can
1526  * still add work in queues bound to the group but it won't be executed. (This
1527  * is because the IO mapping created upon binding such queues is still intact.)
1528  */
term_queue_group(struct kbase_queue_group * group)1529 static void term_queue_group(struct kbase_queue_group *group)
1530 {
1531 	struct kbase_context *kctx = group->kctx;
1532 
1533 	kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev);
1534 	lockdep_assert_held(&kctx->csf.lock);
1535 
1536 	/* Stop the group and evict it from the scheduler */
1537 	kbase_csf_scheduler_group_deschedule(group);
1538 
1539 	if (group->run_state == KBASE_CSF_GROUP_TERMINATED)
1540 		return;
1541 
1542 	dev_dbg(kctx->kbdev->dev, "group %d terminating", group->handle);
1543 
1544 	kbase_csf_term_descheduled_queue_group(group);
1545 }
1546 
cancel_queue_group_events(struct kbase_queue_group * group)1547 static void cancel_queue_group_events(struct kbase_queue_group *group)
1548 {
1549 	cancel_work_sync(&group->timer_event_work);
1550 	cancel_work_sync(&group->protm_event_work);
1551 }
1552 
remove_pending_group_fatal_error(struct kbase_queue_group * group)1553 static void remove_pending_group_fatal_error(struct kbase_queue_group *group)
1554 {
1555 	struct kbase_context *kctx = group->kctx;
1556 
1557 	dev_dbg(kctx->kbdev->dev,
1558 		"Remove any pending group fatal error from context %pK\n",
1559 		(void *)group->kctx);
1560 
1561 	kbase_csf_event_remove_error(kctx, &group->error_tiler_oom);
1562 	kbase_csf_event_remove_error(kctx, &group->error_timeout);
1563 	kbase_csf_event_remove_error(kctx, &group->error_fatal);
1564 }
1565 
kbase_csf_queue_group_terminate(struct kbase_context * kctx,u8 group_handle)1566 void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
1567 				     u8 group_handle)
1568 {
1569 	struct kbase_queue_group *group;
1570 	int err;
1571 	bool reset_prevented = false;
1572 	struct kbase_device *const kbdev = kctx->kbdev;
1573 
1574 	err = kbase_reset_gpu_prevent_and_wait(kbdev);
1575 	if (err)
1576 		dev_warn(
1577 			kbdev->dev,
1578 			"Unsuccessful GPU reset detected when terminating group %d, attempting to terminate regardless",
1579 			group_handle);
1580 	else
1581 		reset_prevented = true;
1582 
1583 	mutex_lock(&kctx->csf.lock);
1584 
1585 	group = find_queue_group(kctx, group_handle);
1586 
1587 	if (group) {
1588 		remove_pending_group_fatal_error(group);
1589 		term_queue_group(group);
1590 		kctx->csf.queue_groups[group_handle] = NULL;
1591 	}
1592 
1593 	mutex_unlock(&kctx->csf.lock);
1594 	if (reset_prevented)
1595 		kbase_reset_gpu_allow(kbdev);
1596 
1597 	if (!group)
1598 		return;
1599 
1600 	/* Cancel any pending event callbacks. If one is in progress
1601 	 * then this thread waits synchronously for it to complete (which
1602 	 * is why we must unlock the context first). We already ensured
1603 	 * that no more callbacks can be enqueued by terminating the group.
1604 	 */
1605 	cancel_queue_group_events(group);
1606 	kfree(group);
1607 }
1608 
kbase_csf_queue_group_suspend(struct kbase_context * kctx,struct kbase_suspend_copy_buffer * sus_buf,u8 group_handle)1609 int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
1610 				  struct kbase_suspend_copy_buffer *sus_buf,
1611 				  u8 group_handle)
1612 {
1613 	struct kbase_device *const kbdev = kctx->kbdev;
1614 	int err;
1615 	struct kbase_queue_group *group;
1616 
1617 	err = kbase_reset_gpu_prevent_and_wait(kbdev);
1618 	if (err) {
1619 		dev_warn(
1620 			kbdev->dev,
1621 			"Unsuccessful GPU reset detected when suspending group %d",
1622 			group_handle);
1623 		return err;
1624 	}
1625 	mutex_lock(&kctx->csf.lock);
1626 
1627 	group = find_queue_group(kctx, group_handle);
1628 	if (group)
1629 		err = kbase_csf_scheduler_group_copy_suspend_buf(group,
1630 								 sus_buf);
1631 	else
1632 		err = -EINVAL;
1633 
1634 	mutex_unlock(&kctx->csf.lock);
1635 	kbase_reset_gpu_allow(kbdev);
1636 
1637 	return err;
1638 }
1639 
kbase_csf_add_group_fatal_error(struct kbase_queue_group * const group,struct base_gpu_queue_group_error const * const err_payload)1640 void kbase_csf_add_group_fatal_error(
1641 	struct kbase_queue_group *const group,
1642 	struct base_gpu_queue_group_error const *const err_payload)
1643 {
1644 	struct base_csf_notification error;
1645 
1646 	if (WARN_ON(!group))
1647 		return;
1648 
1649 	if (WARN_ON(!err_payload))
1650 		return;
1651 
1652 	error = (struct base_csf_notification) {
1653 		.type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
1654 		.payload = {
1655 			.csg_error = {
1656 				.handle = group->handle,
1657 				.error = *err_payload
1658 			}
1659 		}
1660 	};
1661 
1662 	kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error);
1663 }
1664 
kbase_csf_active_queue_groups_reset(struct kbase_device * kbdev,struct kbase_context * kctx)1665 void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
1666 					 struct kbase_context *kctx)
1667 {
1668 	struct list_head evicted_groups;
1669 	struct kbase_queue_group *group;
1670 	int i;
1671 
1672 	INIT_LIST_HEAD(&evicted_groups);
1673 
1674 	mutex_lock(&kctx->csf.lock);
1675 
1676 	kbase_csf_scheduler_evict_ctx_slots(kbdev, kctx, &evicted_groups);
1677 	while (!list_empty(&evicted_groups)) {
1678 		group = list_first_entry(&evicted_groups,
1679 				struct kbase_queue_group, link);
1680 
1681 		dev_dbg(kbdev->dev, "Context %d_%d active group %d terminated",
1682 			    kctx->tgid, kctx->id, group->handle);
1683 		kbase_csf_term_descheduled_queue_group(group);
1684 		list_del_init(&group->link);
1685 	}
1686 
1687 	/* Acting on the queue groups that are pending to be terminated. */
1688 	for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
1689 		group = kctx->csf.queue_groups[i];
1690 		if (group &&
1691 		    group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED)
1692 			kbase_csf_term_descheduled_queue_group(group);
1693 	}
1694 
1695 	mutex_unlock(&kctx->csf.lock);
1696 }
1697 
kbase_csf_ctx_init(struct kbase_context * kctx)1698 int kbase_csf_ctx_init(struct kbase_context *kctx)
1699 {
1700 	struct kbase_device *kbdev = kctx->kbdev;
1701 	int err = -ENOMEM;
1702 
1703 	INIT_LIST_HEAD(&kctx->csf.queue_list);
1704 	INIT_LIST_HEAD(&kctx->csf.link);
1705 
1706 	kbase_csf_event_init(kctx);
1707 
1708 	kctx->csf.user_reg_vma = NULL;
1709 	mutex_lock(&kbdev->pm.lock);
1710 	/* The inode information for /dev/malixx file is not available at the
1711 	 * time of device probe as the inode is created when the device node
1712 	 * is created by udevd (through mknod).
1713 	 */
1714 	if (kctx->filp) {
1715 		if (!kbdev->csf.mali_file_inode)
1716 			kbdev->csf.mali_file_inode = kctx->filp->f_inode;
1717 
1718 		/* inode is unique for a file */
1719 		WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode);
1720 	}
1721 	mutex_unlock(&kbdev->pm.lock);
1722 
1723 	/* Mark all the cookies as 'free' */
1724 	bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
1725 
1726 	kctx->csf.wq = alloc_workqueue("mali_kbase_csf_wq",
1727 					WQ_UNBOUND, 1);
1728 
1729 	if (likely(kctx->csf.wq)) {
1730 		err = kbase_csf_scheduler_context_init(kctx);
1731 
1732 		if (likely(!err)) {
1733 			err = kbase_csf_kcpu_queue_context_init(kctx);
1734 
1735 			if (likely(!err)) {
1736 				err = kbase_csf_tiler_heap_context_init(kctx);
1737 
1738 				if (likely(!err)) {
1739 					mutex_init(&kctx->csf.lock);
1740 					INIT_WORK(&kctx->csf.pending_submission_work,
1741 						  pending_submission_worker);
1742 				} else
1743 					kbase_csf_kcpu_queue_context_term(kctx);
1744 			}
1745 
1746 			if (unlikely(err))
1747 				kbase_csf_scheduler_context_term(kctx);
1748 		}
1749 
1750 		if (unlikely(err))
1751 			destroy_workqueue(kctx->csf.wq);
1752 	}
1753 
1754 	return err;
1755 }
1756 
kbase_csf_ctx_handle_fault(struct kbase_context * kctx,struct kbase_fault * fault)1757 void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
1758 		struct kbase_fault *fault)
1759 {
1760 	int gr;
1761 	bool reported = false;
1762 	struct base_gpu_queue_group_error err_payload;
1763 	int err;
1764 	struct kbase_device *kbdev;
1765 
1766 	if (WARN_ON(!kctx))
1767 		return;
1768 
1769 	if (WARN_ON(!fault))
1770 		return;
1771 
1772 	kbdev = kctx->kbdev;
1773 	err = kbase_reset_gpu_try_prevent(kbdev);
1774 	/* Regardless of whether reset failed or is currently happening, exit
1775 	 * early
1776 	 */
1777 	if (err)
1778 		return;
1779 
1780 	err_payload = (struct base_gpu_queue_group_error) {
1781 		.error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
1782 		.payload = {
1783 			.fatal_group = {
1784 				.sideband = fault->addr,
1785 				.status = fault->status,
1786 			}
1787 		}
1788 	};
1789 
1790 	mutex_lock(&kctx->csf.lock);
1791 
1792 	for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) {
1793 		struct kbase_queue_group *const group =
1794 			kctx->csf.queue_groups[gr];
1795 
1796 		if (group && group->run_state != KBASE_CSF_GROUP_TERMINATED) {
1797 			term_queue_group(group);
1798 			kbase_csf_add_group_fatal_error(group, &err_payload);
1799 			reported = true;
1800 		}
1801 	}
1802 
1803 	mutex_unlock(&kctx->csf.lock);
1804 
1805 	if (reported)
1806 		kbase_event_wakeup(kctx);
1807 
1808 	kbase_reset_gpu_allow(kbdev);
1809 }
1810 
kbase_csf_ctx_term(struct kbase_context * kctx)1811 void kbase_csf_ctx_term(struct kbase_context *kctx)
1812 {
1813 	struct kbase_device *kbdev = kctx->kbdev;
1814 	struct kbase_as *as = NULL;
1815 	unsigned long flags;
1816 	u32 i;
1817 	int err;
1818 	bool reset_prevented = false;
1819 
1820 	/* As the kbase context is terminating, its debugfs sub-directory would
1821 	 * have been removed already and so would be the debugfs file created
1822 	 * for queue groups & kcpu queues, hence no need to explicitly remove
1823 	 * those debugfs files.
1824 	 */
1825 
1826 	/* Wait for a GPU reset if it is happening, prevent it if not happening */
1827 	err = kbase_reset_gpu_prevent_and_wait(kbdev);
1828 	if (err)
1829 		dev_warn(
1830 			kbdev->dev,
1831 			"Unsuccessful GPU reset detected when terminating csf context (%d_%d), attempting to terminate regardless",
1832 			kctx->tgid, kctx->id);
1833 	else
1834 		reset_prevented = true;
1835 
1836 	cancel_work_sync(&kctx->csf.pending_submission_work);
1837 
1838 	mutex_lock(&kctx->csf.lock);
1839 
1840 	/* Iterate through the queue groups that were not terminated by
1841 	 * userspace and issue the term request to firmware for them.
1842 	 */
1843 	for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
1844 		struct kbase_queue_group *group = kctx->csf.queue_groups[i];
1845 
1846 		if (group) {
1847 			remove_pending_group_fatal_error(group);
1848 			term_queue_group(group);
1849 		}
1850 	}
1851 	mutex_unlock(&kctx->csf.lock);
1852 
1853 	if (reset_prevented)
1854 		kbase_reset_gpu_allow(kbdev);
1855 
1856 	/* Now that all queue groups have been terminated, there can be no
1857 	 * more OoM or timer event interrupts but there can be inflight work
1858 	 * items. Destroying the wq will implicitly flush those work items.
1859 	 */
1860 	destroy_workqueue(kctx->csf.wq);
1861 
1862 	/* Wait for the firmware error work item to also finish as it could
1863 	 * be affecting this outgoing context also.
1864 	 */
1865 	flush_work(&kctx->kbdev->csf.fw_error_work);
1866 
1867 	/* A work item to handle page_fault/bus_fault/gpu_fault could be
1868 	 * pending for the outgoing context. Flush the workqueue that will
1869 	 * execute that work item.
1870 	 */
1871 	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
1872 	if (kctx->as_nr != KBASEP_AS_NR_INVALID)
1873 		as = &kctx->kbdev->as[kctx->as_nr];
1874 	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
1875 	if (as)
1876 		flush_workqueue(as->pf_wq);
1877 
1878 	mutex_lock(&kctx->csf.lock);
1879 
1880 	for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
1881 		kfree(kctx->csf.queue_groups[i]);
1882 		kctx->csf.queue_groups[i] = NULL;
1883 	}
1884 
1885 	/* Iterate through the queues that were not terminated by
1886 	 * userspace and do the required cleanup for them.
1887 	 */
1888 	while (!list_empty(&kctx->csf.queue_list)) {
1889 		struct kbase_queue *queue;
1890 
1891 		queue = list_first_entry(&kctx->csf.queue_list,
1892 						struct kbase_queue, link);
1893 
1894 		/* The reference held when the IO mapping was created on bind
1895 		 * would have been dropped otherwise the termination of Kbase
1896 		 * context itself wouldn't have kicked-in. So there shall be
1897 		 * only one reference left that was taken when queue was
1898 		 * registered.
1899 		 */
1900 		if (atomic_read(&queue->refcount) != 1)
1901 			dev_warn(kctx->kbdev->dev,
1902 				 "Releasing queue with incorrect refcounting!\n");
1903 		list_del_init(&queue->link);
1904 		release_queue(queue);
1905 	}
1906 
1907 	mutex_unlock(&kctx->csf.lock);
1908 
1909 	kbase_csf_tiler_heap_context_term(kctx);
1910 	kbase_csf_kcpu_queue_context_term(kctx);
1911 	kbase_csf_scheduler_context_term(kctx);
1912 	kbase_csf_event_term(kctx);
1913 
1914 	mutex_destroy(&kctx->csf.lock);
1915 }
1916 
1917 /**
1918  * handle_oom_event - Handle the OoM event generated by the firmware for the
1919  *                    CSI.
1920  *
1921  * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
1922  * @stream: Pointer to the structure containing info provided by the firmware
1923  *          about the CSI.
1924  *
1925  * This function will handle the OoM event request from the firmware for the
1926  * CS. It will retrieve the address of heap context and heap's
1927  * statistics (like number of render passes in-flight) from the CS's kernel
1928  * kernel output page and pass them to the tiler heap function to allocate a
1929  * new chunk.
1930  * It will also update the CS's kernel input page with the address
1931  * of a new chunk that was allocated.
1932  *
1933  * Return: 0 if successfully handled the request, otherwise a negative error
1934  *         code on failure.
1935  */
handle_oom_event(struct kbase_context * const kctx,struct kbase_csf_cmd_stream_info const * const stream)1936 static int handle_oom_event(struct kbase_context *const kctx,
1937 		struct kbase_csf_cmd_stream_info const *const stream)
1938 {
1939 	u64 gpu_heap_va =
1940 		kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) |
1941 		((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32);
1942 	const u32 vt_start =
1943 		kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_START);
1944 	const u32 vt_end =
1945 		kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_END);
1946 	const u32 frag_end =
1947 		kbase_csf_firmware_cs_output(stream, CS_HEAP_FRAG_END);
1948 	u32 renderpasses_in_flight;
1949 	u32 pending_frag_count;
1950 	u64 new_chunk_ptr;
1951 	int err;
1952 
1953 	if ((frag_end > vt_end) || (vt_end >= vt_start)) {
1954 		dev_warn(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n",
1955 			 vt_start, vt_end, frag_end);
1956 		return -EINVAL;
1957 	}
1958 
1959 	renderpasses_in_flight = vt_start - frag_end;
1960 	pending_frag_count = vt_end - frag_end;
1961 
1962 	err = kbase_csf_tiler_heap_alloc_new_chunk(kctx,
1963 		gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr);
1964 
1965 	/* It is okay to acknowledge with a NULL chunk (firmware will then wait
1966 	 * for the fragment jobs to complete and release chunks)
1967 	 */
1968 	if (err == -EBUSY)
1969 		new_chunk_ptr = 0;
1970 	else if (err)
1971 		return err;
1972 
1973 	kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO,
1974 				new_chunk_ptr & 0xFFFFFFFF);
1975 	kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_HI,
1976 				new_chunk_ptr >> 32);
1977 
1978 	kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_LO,
1979 				new_chunk_ptr & 0xFFFFFFFF);
1980 	kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_HI,
1981 				new_chunk_ptr >> 32);
1982 
1983 	return 0;
1984 }
1985 
1986 /**
1987  * report_tiler_oom_error - Report a CSG error due to a tiler heap OOM event
1988  *
1989  * @group: Pointer to the GPU command queue group that encountered the error
1990  */
report_tiler_oom_error(struct kbase_queue_group * group)1991 static void report_tiler_oom_error(struct kbase_queue_group *group)
1992 {
1993 	struct base_csf_notification const
1994 		error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
1995 			  .payload = {
1996 				  .csg_error = {
1997 					  .handle = group->handle,
1998 					  .error = {
1999 						  .error_type =
2000 							  BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
2001 					  } } } };
2002 
2003 	kbase_csf_event_add_error(group->kctx,
2004 				  &group->error_tiler_oom,
2005 				  &error);
2006 	kbase_event_wakeup(group->kctx);
2007 }
2008 
2009 /**
2010  * kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue.
2011  *
2012  * @queue: Pointer to queue for which out-of-memory event was received.
2013  *
2014  * Called with the CSF locked for the affected GPU virtual address space.
2015  * Do not call in interrupt context.
2016  *
2017  * Handles tiler out-of-memory for a GPU command queue and then clears the
2018  * notification to allow the firmware to report out-of-memory again in future.
2019  * If the out-of-memory condition was successfully handled then this function
2020  * rings the relevant doorbell to notify the firmware; otherwise, it terminates
2021  * the GPU command queue group to which the queue is bound. See
2022  * term_queue_group() for details.
2023  */
kbase_queue_oom_event(struct kbase_queue * const queue)2024 static void kbase_queue_oom_event(struct kbase_queue *const queue)
2025 {
2026 	struct kbase_context *const kctx = queue->kctx;
2027 	struct kbase_device *const kbdev = kctx->kbdev;
2028 	struct kbase_queue_group *group;
2029 	int slot_num, err;
2030 	struct kbase_csf_cmd_stream_group_info const *ginfo;
2031 	struct kbase_csf_cmd_stream_info const *stream;
2032 	int csi_index = queue->csi_index;
2033 	u32 cs_oom_ack, cs_oom_req;
2034 
2035 	lockdep_assert_held(&kctx->csf.lock);
2036 
2037 	group = get_bound_queue_group(queue);
2038 	if (!group) {
2039 		dev_warn(kctx->kbdev->dev, "queue not bound\n");
2040 		return;
2041 	}
2042 
2043 	kbase_csf_scheduler_lock(kbdev);
2044 
2045 	slot_num = kbase_csf_scheduler_group_get_slot(group);
2046 
2047 	/* The group could have gone off slot before this work item got
2048 	 * a chance to execute.
2049 	 */
2050 	if (slot_num < 0)
2051 		goto unlock;
2052 
2053 	/* If the bound group is on slot yet the kctx is marked with disabled
2054 	 * on address-space fault, the group is pending to be killed. So skip
2055 	 * the inflight oom operation.
2056 	 */
2057 	if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT))
2058 		goto unlock;
2059 
2060 	ginfo = &kbdev->csf.global_iface.groups[slot_num];
2061 	stream = &ginfo->streams[csi_index];
2062 	cs_oom_ack = kbase_csf_firmware_cs_output(stream, CS_ACK) &
2063 		     CS_ACK_TILER_OOM_MASK;
2064 	cs_oom_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ) &
2065 		     CS_REQ_TILER_OOM_MASK;
2066 
2067 	/* The group could have already undergone suspend-resume cycle before
2068 	 * this work item got a chance to execute. On CSG resume the CS_ACK
2069 	 * register is set by firmware to reflect the CS_REQ register, which
2070 	 * implies that all events signaled before suspension are implicitly
2071 	 * acknowledged.
2072 	 * A new OoM event is expected to be generated after resume.
2073 	 */
2074 	if (cs_oom_ack == cs_oom_req)
2075 		goto unlock;
2076 
2077 	err = handle_oom_event(kctx, stream);
2078 
2079 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack,
2080 					 CS_REQ_TILER_OOM_MASK);
2081 
2082 	if (err) {
2083 		dev_warn(
2084 			kbdev->dev,
2085 			"Queue group to be terminated, couldn't handle the OoM event\n");
2086 		kbase_csf_scheduler_unlock(kbdev);
2087 		term_queue_group(group);
2088 		report_tiler_oom_error(group);
2089 		return;
2090 	}
2091 
2092 	kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
2093 unlock:
2094 	kbase_csf_scheduler_unlock(kbdev);
2095 }
2096 
2097 /**
2098  * oom_event_worker - Tiler out-of-memory handler called from a workqueue.
2099  *
2100  * @data: Pointer to a work_struct embedded in GPU command queue data.
2101  *
2102  * Handles a tiler out-of-memory condition for a GPU command queue and then
2103  * releases a reference that was added to prevent the queue being destroyed
2104  * while this work item was pending on a workqueue.
2105  */
oom_event_worker(struct work_struct * data)2106 static void oom_event_worker(struct work_struct *data)
2107 {
2108 	struct kbase_queue *queue =
2109 		container_of(data, struct kbase_queue, oom_event_work);
2110 	struct kbase_context *kctx = queue->kctx;
2111 	struct kbase_device *const kbdev = kctx->kbdev;
2112 
2113 	int err = kbase_reset_gpu_try_prevent(kbdev);
2114 	/* Regardless of whether reset failed or is currently happening, exit
2115 	 * early
2116 	 */
2117 	if (err)
2118 		return;
2119 
2120 	mutex_lock(&kctx->csf.lock);
2121 
2122 	kbase_queue_oom_event(queue);
2123 	release_queue(queue);
2124 
2125 	mutex_unlock(&kctx->csf.lock);
2126 	kbase_reset_gpu_allow(kbdev);
2127 }
2128 
2129 /**
2130  * report_group_timeout_error - Report the timeout error for the group to userspace.
2131  *
2132  * @group: Pointer to the group for which timeout error occurred
2133  */
report_group_timeout_error(struct kbase_queue_group * const group)2134 static void report_group_timeout_error(struct kbase_queue_group *const group)
2135 {
2136 	struct base_csf_notification const
2137 		error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
2138 			  .payload = {
2139 				  .csg_error = {
2140 					  .handle = group->handle,
2141 					  .error = {
2142 						  .error_type =
2143 							  BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT,
2144 					  } } } };
2145 
2146 	dev_warn(group->kctx->kbdev->dev,
2147 		 "Notify the event notification thread, forward progress timeout (%llu cycles)\n",
2148 		 kbase_csf_timeout_get(group->kctx->kbdev));
2149 
2150 	kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error);
2151 	kbase_event_wakeup(group->kctx);
2152 }
2153 
2154 /**
2155  * timer_event_worker - Handle the progress timeout error for the group
2156  *
2157  * @data: Pointer to a work_struct embedded in GPU command queue group data.
2158  *
2159  * Terminate the CSG and report the error to userspace
2160  */
timer_event_worker(struct work_struct * data)2161 static void timer_event_worker(struct work_struct *data)
2162 {
2163 	struct kbase_queue_group *const group =
2164 		container_of(data, struct kbase_queue_group, timer_event_work);
2165 	struct kbase_context *const kctx = group->kctx;
2166 	bool reset_prevented = false;
2167 	int err = kbase_reset_gpu_prevent_and_wait(kctx->kbdev);
2168 
2169 	if (err)
2170 		dev_warn(
2171 			kctx->kbdev->dev,
2172 			"Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless",
2173 			group->handle);
2174 	else
2175 		reset_prevented = true;
2176 
2177 	mutex_lock(&kctx->csf.lock);
2178 
2179 	term_queue_group(group);
2180 	report_group_timeout_error(group);
2181 
2182 	mutex_unlock(&kctx->csf.lock);
2183 	if (reset_prevented)
2184 		kbase_reset_gpu_allow(kctx->kbdev);
2185 }
2186 
2187 /**
2188  * handle_progress_timer_event - Progress timer timeout event handler.
2189  *
2190  * @group: Pointer to GPU queue group for which the timeout event is received.
2191  *
2192  * Enqueue a work item to terminate the group and notify the event notification
2193  * thread of progress timeout fault for the GPU command queue group.
2194  */
handle_progress_timer_event(struct kbase_queue_group * const group)2195 static void handle_progress_timer_event(struct kbase_queue_group *const group)
2196 {
2197 	queue_work(group->kctx->csf.wq, &group->timer_event_work);
2198 }
2199 
2200 /**
2201  * protm_event_worker - Protected mode switch request event handler
2202  *			called from a workqueue.
2203  *
2204  * @data: Pointer to a work_struct embedded in GPU command queue group data.
2205  *
2206  * Request to switch to protected mode.
2207  */
protm_event_worker(struct work_struct * data)2208 static void protm_event_worker(struct work_struct *data)
2209 {
2210 	struct kbase_queue_group *const group =
2211 		container_of(data, struct kbase_queue_group, protm_event_work);
2212 
2213 	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_BEGIN,
2214 				 group, 0u);
2215 	kbase_csf_scheduler_group_protm_enter(group);
2216 	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END,
2217 				 group, 0u);
2218 }
2219 
2220 /**
2221  * handle_fault_event - Handler for CS fault.
2222  *
2223  * @queue:  Pointer to queue for which fault event was received.
2224  * @stream: Pointer to the structure containing info provided by the
2225  *          firmware about the CSI.
2226  *
2227  * Prints meaningful CS fault information.
2228  *
2229  */
2230 static void
handle_fault_event(struct kbase_queue * const queue,struct kbase_csf_cmd_stream_info const * const stream)2231 handle_fault_event(struct kbase_queue *const queue,
2232 		   struct kbase_csf_cmd_stream_info const *const stream)
2233 {
2234 	const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT);
2235 	const u64 cs_fault_info =
2236 		kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) |
2237 		((u64)kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_HI)
2238 		 << 32);
2239 	const u8 cs_fault_exception_type =
2240 		CS_FAULT_EXCEPTION_TYPE_GET(cs_fault);
2241 	const u32 cs_fault_exception_data =
2242 		CS_FAULT_EXCEPTION_DATA_GET(cs_fault);
2243 	const u64 cs_fault_info_exception_data =
2244 		CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info);
2245 	struct kbase_device *const kbdev = queue->kctx->kbdev;
2246 
2247 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2248 
2249 	dev_warn(kbdev->dev,
2250 		 "Ctx %d_%d Group %d CSG %d CSI: %d\n"
2251 		 "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
2252 		 "CS_FAULT.EXCEPTION_DATA: 0x%x\n"
2253 		 "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n",
2254 		 queue->kctx->tgid, queue->kctx->id, queue->group->handle,
2255 		 queue->group->csg_nr, queue->csi_index,
2256 		 cs_fault_exception_type,
2257 		 kbase_gpu_exception_name(cs_fault_exception_type),
2258 		 cs_fault_exception_data, cs_fault_info_exception_data);
2259 
2260 }
2261 
report_queue_fatal_error(struct kbase_queue * const queue,u32 cs_fatal,u64 cs_fatal_info,u8 group_handle)2262 static void report_queue_fatal_error(struct kbase_queue *const queue,
2263 				     u32 cs_fatal, u64 cs_fatal_info,
2264 				     u8 group_handle)
2265 {
2266 	struct base_csf_notification error = {
2267 		.type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
2268 		.payload = {
2269 			.csg_error = {
2270 				.handle = group_handle,
2271 				.error = {
2272 					.error_type =
2273 					BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
2274 					.payload = {
2275 						.fatal_queue = {
2276 						.sideband = cs_fatal_info,
2277 						.status = cs_fatal,
2278 						.csi_index = queue->csi_index,
2279 						}
2280 					}
2281 				}
2282 			}
2283 		}
2284 	};
2285 
2286 	kbase_csf_event_add_error(queue->kctx, &queue->error, &error);
2287 	kbase_event_wakeup(queue->kctx);
2288 }
2289 
2290 /**
2291  * fatal_event_worker - Handle the fatal error for the GPU queue
2292  *
2293  * @data: Pointer to a work_struct embedded in GPU command queue.
2294  *
2295  * Terminate the CSG and report the error to userspace.
2296  */
fatal_event_worker(struct work_struct * const data)2297 static void fatal_event_worker(struct work_struct *const data)
2298 {
2299 	struct kbase_queue *const queue =
2300 		container_of(data, struct kbase_queue, fatal_event_work);
2301 	struct kbase_context *const kctx = queue->kctx;
2302 	struct kbase_device *const kbdev = kctx->kbdev;
2303 	struct kbase_queue_group *group;
2304 	u8 group_handle;
2305 	bool reset_prevented = false;
2306 	int err = kbase_reset_gpu_prevent_and_wait(kbdev);
2307 
2308 	if (err)
2309 		dev_warn(
2310 			kbdev->dev,
2311 			"Unsuccessful GPU reset detected when terminating group to handle fatal event, attempting to terminate regardless");
2312 	else
2313 		reset_prevented = true;
2314 
2315 	mutex_lock(&kctx->csf.lock);
2316 
2317 	group = get_bound_queue_group(queue);
2318 	if (!group) {
2319 		dev_warn(kbdev->dev, "queue not bound when handling fatal event");
2320 		goto unlock;
2321 	}
2322 
2323 	group_handle = group->handle;
2324 	term_queue_group(group);
2325 	report_queue_fatal_error(queue, queue->cs_fatal, queue->cs_fatal_info,
2326 				 group_handle);
2327 
2328 unlock:
2329 	release_queue(queue);
2330 	mutex_unlock(&kctx->csf.lock);
2331 	if (reset_prevented)
2332 		kbase_reset_gpu_allow(kbdev);
2333 }
2334 
2335 /**
2336  * handle_fatal_event - Handler for CS fatal.
2337  *
2338  * @queue:    Pointer to queue for which fatal event was received.
2339  * @stream:   Pointer to the structure containing info provided by the
2340  *            firmware about the CSI.
2341  *
2342  * Prints meaningful CS fatal information.
2343  * Enqueue a work item to terminate the group and report the fatal error
2344  * to user space.
2345  */
2346 static void
handle_fatal_event(struct kbase_queue * const queue,struct kbase_csf_cmd_stream_info const * const stream)2347 handle_fatal_event(struct kbase_queue *const queue,
2348 		   struct kbase_csf_cmd_stream_info const *const stream)
2349 {
2350 	const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL);
2351 	const u64 cs_fatal_info =
2352 		kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_LO) |
2353 		((u64)kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_HI)
2354 		 << 32);
2355 	const u32 cs_fatal_exception_type =
2356 		CS_FATAL_EXCEPTION_TYPE_GET(cs_fatal);
2357 	const u32 cs_fatal_exception_data =
2358 		CS_FATAL_EXCEPTION_DATA_GET(cs_fatal);
2359 	const u64 cs_fatal_info_exception_data =
2360 		CS_FATAL_INFO_EXCEPTION_DATA_GET(cs_fatal_info);
2361 	struct kbase_device *const kbdev = queue->kctx->kbdev;
2362 
2363 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2364 
2365 	dev_warn(kbdev->dev,
2366 		 "Ctx %d_%d Group %d CSG %d CSI: %d\n"
2367 		 "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
2368 		 "CS_FATAL.EXCEPTION_DATA: 0x%x\n"
2369 		 "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n",
2370 		 queue->kctx->tgid, queue->kctx->id, queue->group->handle,
2371 		 queue->group->csg_nr, queue->csi_index,
2372 		 cs_fatal_exception_type,
2373 		 kbase_gpu_exception_name(cs_fatal_exception_type),
2374 		 cs_fatal_exception_data, cs_fatal_info_exception_data);
2375 
2376 	if (cs_fatal_exception_type ==
2377 			CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) {
2378 		queue_work(system_wq, &kbdev->csf.fw_error_work);
2379 	} else {
2380 		get_queue(queue);
2381 		queue->cs_fatal = cs_fatal;
2382 		queue->cs_fatal_info = cs_fatal_info;
2383 		if (!queue_work(queue->kctx->csf.wq, &queue->fatal_event_work))
2384 			release_queue(queue);
2385 	}
2386 
2387 }
2388 
2389 /**
2390  * handle_queue_exception_event - Handler for CS fatal/fault exception events.
2391  *
2392  * @queue:  Pointer to queue for which fatal/fault event was received.
2393  * @cs_req: Value of the CS_REQ register from the CS's input page.
2394  * @cs_ack: Value of the CS_ACK register from the CS's output page.
2395  */
handle_queue_exception_event(struct kbase_queue * const queue,const u32 cs_req,const u32 cs_ack)2396 static void handle_queue_exception_event(struct kbase_queue *const queue,
2397 					 const u32 cs_req, const u32 cs_ack)
2398 {
2399 	struct kbase_csf_cmd_stream_group_info const *ginfo;
2400 	struct kbase_csf_cmd_stream_info const *stream;
2401 	struct kbase_context *const kctx = queue->kctx;
2402 	struct kbase_device *const kbdev = kctx->kbdev;
2403 	struct kbase_queue_group *group = queue->group;
2404 	int csi_index = queue->csi_index;
2405 	int slot_num = group->csg_nr;
2406 
2407 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2408 
2409 	ginfo = &kbdev->csf.global_iface.groups[slot_num];
2410 	stream = &ginfo->streams[csi_index];
2411 
2412 	if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
2413 		handle_fatal_event(queue, stream);
2414 		kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
2415 						 CS_REQ_FATAL_MASK);
2416 	}
2417 
2418 	if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
2419 		handle_fault_event(queue, stream);
2420 		kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
2421 						 CS_REQ_FAULT_MASK);
2422 		kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
2423 	}
2424 }
2425 
2426 /**
2427  * process_cs_interrupts - Process interrupts for a CS.
2428  *
2429  * @group:  Pointer to GPU command queue group data.
2430  * @ginfo:  The CSG interface provided by the firmware.
2431  * @irqreq: CSG's IRQ request bitmask (one bit per CS).
2432  * @irqack: CSG's IRQ acknowledge bitmask (one bit per CS).
2433  *
2434  * If the interrupt request bitmask differs from the acknowledge bitmask
2435  * then the firmware is notifying the host of an event concerning those
2436  * CSs indicated by bits whose value differs. The actions required
2437  * are then determined by examining which notification flags differ between
2438  * the request and acknowledge registers for the individual CS(s).
2439  */
process_cs_interrupts(struct kbase_queue_group * const group,struct kbase_csf_cmd_stream_group_info const * const ginfo,u32 const irqreq,u32 const irqack)2440 static void process_cs_interrupts(struct kbase_queue_group *const group,
2441 		      struct kbase_csf_cmd_stream_group_info const *const ginfo,
2442 		      u32 const irqreq, u32 const irqack)
2443 {
2444 	struct kbase_device *const kbdev = group->kctx->kbdev;
2445 	u32 remaining = irqreq ^ irqack;
2446 	bool protm_pend = false;
2447 	const bool group_suspending =
2448 		!kbase_csf_scheduler_group_events_enabled(kbdev, group);
2449 
2450 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2451 
2452 	while (remaining != 0) {
2453 		int const i = ffs(remaining) - 1;
2454 		struct kbase_queue *const queue = group->bound_queues[i];
2455 
2456 		remaining &= ~(1 << i);
2457 
2458 		/* The queue pointer can be NULL, but if it isn't NULL then it
2459 		 * cannot disappear since scheduler spinlock is held and before
2460 		 * freeing a bound queue it has to be first unbound which
2461 		 * requires scheduler spinlock.
2462 		 */
2463 		if (queue && !WARN_ON(queue->csi_index != i)) {
2464 			struct kbase_csf_cmd_stream_info const *const stream =
2465 				&ginfo->streams[i];
2466 			u32 const cs_req = kbase_csf_firmware_cs_input_read(
2467 				stream, CS_REQ);
2468 			u32 const cs_ack =
2469 				kbase_csf_firmware_cs_output(stream, CS_ACK);
2470 			struct workqueue_struct *wq = group->kctx->csf.wq;
2471 
2472 			if ((cs_req & CS_REQ_EXCEPTION_MASK) ^
2473 			    (cs_ack & CS_ACK_EXCEPTION_MASK)) {
2474 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_FAULT_INTERRUPT, group, queue, cs_req ^ cs_ack);
2475 				handle_queue_exception_event(queue, cs_req, cs_ack);
2476 			}
2477 
2478 			/* PROTM_PEND and TILER_OOM can be safely ignored
2479 			 * because they will be raised again if the group
2480 			 * is assigned a CSG slot in future.
2481 			 */
2482 			if (group_suspending) {
2483 				u32 const cs_req_remain = cs_req & ~CS_REQ_EXCEPTION_MASK;
2484 				u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK;
2485 
2486 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND,
2487 							   group, queue, cs_req_remain ^ cs_ack_remain);
2488 				continue;
2489 			}
2490 
2491 			if (((cs_req & CS_REQ_TILER_OOM_MASK) ^
2492 			     (cs_ack & CS_ACK_TILER_OOM_MASK))) {
2493 				get_queue(queue);
2494 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_TILER_OOM_INTERRUPT, group, queue,
2495 							   cs_req ^ cs_ack);
2496 				if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) {
2497 					/* The work item shall not have been
2498 					 * already queued, there can be only
2499 					 * one pending OoM event for a
2500 					 * queue.
2501 					 */
2502 					release_queue(queue);
2503 				}
2504 			}
2505 
2506 			if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^
2507 			    (cs_ack & CS_ACK_PROTM_PEND_MASK)) {
2508 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_INTERRUPT, group, queue,
2509 							   cs_req ^ cs_ack);
2510 
2511 				dev_dbg(kbdev->dev,
2512 					"Protected mode entry request for queue on csi %d bound to group-%d on slot %d",
2513 					queue->csi_index, group->handle,
2514 					group->csg_nr);
2515 
2516 				bitmap_set(group->protm_pending_bitmap, i, 1);
2517 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_SET, group, queue,
2518 							   group->protm_pending_bitmap[0]);
2519 				protm_pend = true;
2520 			}
2521 		}
2522 	}
2523 
2524 	if (protm_pend)
2525 		queue_work(group->kctx->csf.wq, &group->protm_event_work);
2526 }
2527 
2528 /**
2529  * process_csg_interrupts - Process interrupts for a CSG.
2530  *
2531  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
2532  * @csg_nr: CSG number.
2533  *
2534  * Handles interrupts for a CSG and for CSs within it.
2535  *
2536  * If the CSG's request register value differs from its acknowledge register
2537  * then the firmware is notifying the host of an event concerning the whole
2538  * group. The actions required are then determined by examining which
2539  * notification flags differ between those two register values.
2540  *
2541  * See process_cs_interrupts() for details of per-stream interrupt handling.
2542  */
process_csg_interrupts(struct kbase_device * const kbdev,int const csg_nr)2543 static void process_csg_interrupts(struct kbase_device *const kbdev,
2544 	int const csg_nr)
2545 {
2546 	struct kbase_csf_cmd_stream_group_info *ginfo;
2547 	struct kbase_queue_group *group = NULL;
2548 	u32 req, ack, irqreq, irqack;
2549 
2550 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2551 
2552 	if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
2553 		return;
2554 
2555 	KBASE_KTRACE_ADD(kbdev, CSG_INTERRUPT_PROCESS, NULL, csg_nr);
2556 
2557 	ginfo = &kbdev->csf.global_iface.groups[csg_nr];
2558 	req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
2559 	ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
2560 	irqreq = kbase_csf_firmware_csg_output(ginfo, CSG_IRQ_REQ);
2561 	irqack = kbase_csf_firmware_csg_input_read(ginfo, CSG_IRQ_ACK);
2562 
2563 	/* There may not be any pending CSG/CS interrupts to process */
2564 	if ((req == ack) && (irqreq == irqack))
2565 		goto out;
2566 
2567 	/* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
2568 	 * examining the CS_ACK & CS_REQ bits. This would ensure that Host
2569 	 * doesn't misses an interrupt for the CS in the race scenario where
2570 	 * whilst Host is servicing an interrupt for the CS, firmware sends
2571 	 * another interrupt for that CS.
2572 	 */
2573 	kbase_csf_firmware_csg_input(ginfo, CSG_IRQ_ACK, irqreq);
2574 
2575 	group = kbase_csf_scheduler_get_group_on_slot(kbdev, csg_nr);
2576 
2577 	/* The group pointer can be NULL here if interrupts for the group
2578 	 * (like SYNC_UPDATE, IDLE notification) were delayed and arrived
2579 	 * just after the suspension of group completed. However if not NULL
2580 	 * then the group pointer cannot disappear even if User tries to
2581 	 * terminate the group whilst this loop is running as scheduler
2582 	 * spinlock is held and for freeing a group that is resident on a CSG
2583 	 * slot scheduler spinlock is required.
2584 	 */
2585 	if (!group)
2586 		goto out;
2587 
2588 	if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr))
2589 		goto out;
2590 
2591 	if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
2592 		kbase_csf_firmware_csg_input_mask(ginfo,
2593 			CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
2594 
2595 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack);
2596 		kbase_csf_event_signal_cpu_only(group->kctx);
2597 	}
2598 
2599 	if ((req ^ ack) & CSG_REQ_IDLE_MASK) {
2600 		struct kbase_csf_scheduler *scheduler =	&kbdev->csf.scheduler;
2601 
2602 		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
2603 			CSG_REQ_IDLE_MASK);
2604 
2605 		set_bit(csg_nr, scheduler->csg_slots_idle_mask);
2606 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group,
2607 					 scheduler->csg_slots_idle_mask[0]);
2608 		KBASE_KTRACE_ADD_CSF_GRP(kbdev,  CSG_IDLE_INTERRUPT, group, req ^ ack);
2609 		dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n",
2610 			 group->handle, csg_nr);
2611 
2612 		/* Check if the scheduling tick can be advanced */
2613 		if (kbase_csf_scheduler_all_csgs_idle(kbdev)) {
2614 			if (!scheduler->gpu_idle_fw_timer_enabled)
2615 				kbase_csf_scheduler_advance_tick_nolock(kbdev);
2616 		} else if (atomic_read(&scheduler->non_idle_offslot_grps)) {
2617 			/* If there are non-idle CSGs waiting for a slot, fire
2618 			 * a tock for a replacement.
2619 			 */
2620 			mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
2621 		}
2622 	}
2623 
2624 	if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) {
2625 		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
2626 			CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
2627 
2628 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PROGRESS_TIMER_INTERRUPT,
2629 					 group, req ^ ack);
2630 		dev_info(kbdev->dev,
2631 			"[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n",
2632 			kbase_backend_get_cycle_cnt(kbdev),
2633 			group->handle, group->kctx->tgid, group->kctx->id, csg_nr);
2634 
2635 		handle_progress_timer_event(group);
2636 	}
2637 
2638 	process_cs_interrupts(group, ginfo, irqreq, irqack);
2639 
2640 out:
2641 	/* group may still be NULL here */
2642 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group,
2643 				 ((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32));
2644 }
2645 
2646 /**
2647  * process_prfcnt_interrupts - Process performance counter interrupts.
2648  *
2649  * @kbdev:   Instance of a GPU platform device that implements a CSF interface.
2650  * @glb_req: Global request register value.
2651  * @glb_ack: Global acknowledge register value.
2652  *
2653  * Handles interrupts issued by the firmware that relate to the performance
2654  * counters. For example, on completion of a performance counter sample. It is
2655  * expected that the scheduler spinlock is already held on calling this
2656  * function.
2657  */
process_prfcnt_interrupts(struct kbase_device * kbdev,u32 glb_req,u32 glb_ack)2658 static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req,
2659 				      u32 glb_ack)
2660 {
2661 	const struct kbase_csf_global_iface *const global_iface =
2662 		&kbdev->csf.global_iface;
2663 
2664 	lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
2665 
2666 	/* Process PRFCNT_SAMPLE interrupt. */
2667 	if (kbdev->csf.hwcnt.request_pending &&
2668 	    ((glb_req & GLB_REQ_PRFCNT_SAMPLE_MASK) ==
2669 	     (glb_ack & GLB_REQ_PRFCNT_SAMPLE_MASK))) {
2670 		kbdev->csf.hwcnt.request_pending = false;
2671 
2672 		dev_dbg(kbdev->dev, "PRFCNT_SAMPLE done interrupt received.");
2673 
2674 		kbase_hwcnt_backend_csf_on_prfcnt_sample(
2675 			&kbdev->hwcnt_gpu_iface);
2676 	}
2677 
2678 	/* Process PRFCNT_ENABLE interrupt. */
2679 	if (kbdev->csf.hwcnt.enable_pending &&
2680 	    ((glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) ==
2681 	     (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK))) {
2682 		kbdev->csf.hwcnt.enable_pending = false;
2683 
2684 		dev_dbg(kbdev->dev,
2685 			"PRFCNT_ENABLE status changed interrupt received.");
2686 
2687 		if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK)
2688 			kbase_hwcnt_backend_csf_on_prfcnt_enable(
2689 				&kbdev->hwcnt_gpu_iface);
2690 		else
2691 			kbase_hwcnt_backend_csf_on_prfcnt_disable(
2692 				&kbdev->hwcnt_gpu_iface);
2693 	}
2694 
2695 	/* Process PRFCNT_THRESHOLD interrupt. */
2696 	if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_THRESHOLD_MASK) {
2697 		dev_dbg(kbdev->dev, "PRFCNT_THRESHOLD interrupt received.");
2698 
2699 		kbase_hwcnt_backend_csf_on_prfcnt_threshold(
2700 			&kbdev->hwcnt_gpu_iface);
2701 
2702 		/* Set the GLB_REQ.PRFCNT_THRESHOLD flag back to
2703 		 * the same value as GLB_ACK.PRFCNT_THRESHOLD
2704 		 * flag in order to enable reporting of another
2705 		 * PRFCNT_THRESHOLD event.
2706 		 */
2707 		kbase_csf_firmware_global_input_mask(
2708 			global_iface, GLB_REQ, glb_ack,
2709 			GLB_REQ_PRFCNT_THRESHOLD_MASK);
2710 	}
2711 
2712 	/* Process PRFCNT_OVERFLOW interrupt. */
2713 	if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_OVERFLOW_MASK) {
2714 		dev_dbg(kbdev->dev, "PRFCNT_OVERFLOW interrupt received.");
2715 
2716 		kbase_hwcnt_backend_csf_on_prfcnt_overflow(
2717 			&kbdev->hwcnt_gpu_iface);
2718 
2719 		/* Set the GLB_REQ.PRFCNT_OVERFLOW flag back to
2720 		 * the same value as GLB_ACK.PRFCNT_OVERFLOW
2721 		 * flag in order to enable reporting of another
2722 		 * PRFCNT_OVERFLOW event.
2723 		 */
2724 		kbase_csf_firmware_global_input_mask(
2725 			global_iface, GLB_REQ, glb_ack,
2726 			GLB_REQ_PRFCNT_OVERFLOW_MASK);
2727 	}
2728 }
2729 
2730 /**
2731  * check_protm_enter_req_complete - Check if PROTM_ENTER request completed
2732  *
2733  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
2734  * @glb_req: Global request register value.
2735  * @glb_ack: Global acknowledge register value.
2736  *
2737  * This function checks if the PROTM_ENTER Global request had completed and
2738  * appropriately sends notification about the protected mode entry to components
2739  * like IPA, HWC, IPA_CONTROL.
2740  */
check_protm_enter_req_complete(struct kbase_device * kbdev,u32 glb_req,u32 glb_ack)2741 static inline void check_protm_enter_req_complete(struct kbase_device *kbdev,
2742 						  u32 glb_req, u32 glb_ack)
2743 {
2744 	lockdep_assert_held(&kbdev->hwaccess_lock);
2745 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2746 
2747 	if (likely(!kbdev->csf.scheduler.active_protm_grp))
2748 		return;
2749 
2750 	if (kbdev->protected_mode)
2751 		return;
2752 
2753 	if ((glb_req & GLB_REQ_PROTM_ENTER_MASK) !=
2754 	    (glb_ack & GLB_REQ_PROTM_ENTER_MASK))
2755 		return;
2756 
2757 	dev_dbg(kbdev->dev, "Protected mode entry interrupt received");
2758 
2759 	kbdev->protected_mode = true;
2760 	kbase_ipa_protection_mode_switch_event(kbdev);
2761 	kbase_ipa_control_protm_entered(kbdev);
2762 	kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface);
2763 }
2764 
2765 /**
2766  * process_protm_exit - Handle the protected mode exit interrupt
2767  *
2768  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
2769  * @glb_ack: Global acknowledge register value.
2770  *
2771  * This function handles the PROTM_EXIT interrupt and sends notification
2772  * about the protected mode exit to components like HWC, IPA_CONTROL.
2773  */
process_protm_exit(struct kbase_device * kbdev,u32 glb_ack)2774 static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
2775 {
2776 	const struct kbase_csf_global_iface *const global_iface =
2777 		&kbdev->csf.global_iface;
2778 	struct kbase_csf_scheduler *scheduler =	&kbdev->csf.scheduler;
2779 
2780 	lockdep_assert_held(&kbdev->hwaccess_lock);
2781 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2782 
2783 	dev_dbg(kbdev->dev, "Protected mode exit interrupt received");
2784 
2785 	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_ack,
2786 					     GLB_REQ_PROTM_EXIT_MASK);
2787 
2788 	if (likely(scheduler->active_protm_grp)) {
2789 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM,
2790 					 scheduler->active_protm_grp, 0u);
2791 		scheduler->active_protm_grp = NULL;
2792 	} else {
2793 		dev_warn(kbdev->dev, "PROTM_EXIT interrupt after no pmode group");
2794 	}
2795 
2796 	if (!WARN_ON(!kbdev->protected_mode)) {
2797 		kbdev->protected_mode = false;
2798 		kbase_ipa_control_protm_exited(kbdev);
2799 		kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface);
2800 	}
2801 }
2802 
kbase_csf_interrupt(struct kbase_device * kbdev,u32 val)2803 void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
2804 {
2805 	unsigned long flags;
2806 	u32 remaining = val;
2807 
2808 	lockdep_assert_held(&kbdev->hwaccess_lock);
2809 
2810 	KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val);
2811 	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
2812 
2813 	if (val & JOB_IRQ_GLOBAL_IF) {
2814 		const struct kbase_csf_global_iface *const global_iface =
2815 			&kbdev->csf.global_iface;
2816 		struct kbase_csf_scheduler *scheduler =	&kbdev->csf.scheduler;
2817 
2818 		kbdev->csf.interrupt_received = true;
2819 		remaining &= ~JOB_IRQ_GLOBAL_IF;
2820 
2821 		if (!kbdev->csf.firmware_reloaded)
2822 			kbase_csf_firmware_reload_completed(kbdev);
2823 		else if (global_iface->output) {
2824 			u32 glb_req, glb_ack;
2825 
2826 			kbase_csf_scheduler_spin_lock(kbdev, &flags);
2827 			glb_req = kbase_csf_firmware_global_input_read(
2828 					global_iface, GLB_REQ);
2829 			glb_ack = kbase_csf_firmware_global_output(
2830 					global_iface, GLB_ACK);
2831 			KBASE_KTRACE_ADD(kbdev, GLB_REQ_ACQ, NULL, glb_req ^ glb_ack);
2832 
2833 			check_protm_enter_req_complete(kbdev, glb_req, glb_ack);
2834 
2835 			if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK)
2836 				process_protm_exit(kbdev, glb_ack);
2837 
2838 			/* Handle IDLE Hysteresis notification event */
2839 			if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
2840 				int non_idle_offslot_grps;
2841 				bool can_suspend_on_idle;
2842 
2843 				dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
2844 				kbase_csf_firmware_global_input_mask(
2845 						global_iface, GLB_REQ, glb_ack,
2846 						GLB_REQ_IDLE_EVENT_MASK);
2847 
2848 				non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
2849 				can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
2850 				KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
2851 					((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
2852 
2853 				if (!non_idle_offslot_grps) {
2854 					if (can_suspend_on_idle)
2855 						queue_work(system_highpri_wq,
2856 							   &scheduler->gpu_idle_work);
2857 				} else {
2858 					/* Advance the scheduling tick to get
2859 					 * the non-idle suspended groups loaded
2860 					 * soon.
2861 					 */
2862 					kbase_csf_scheduler_advance_tick_nolock(
2863 						kbdev);
2864 				}
2865 			}
2866 
2867 			process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
2868 
2869 			kbase_csf_scheduler_spin_unlock(kbdev, flags);
2870 
2871 			/* Invoke the MCU state machine as a state transition
2872 			 * might have completed.
2873 			 */
2874 			kbase_pm_update_state(kbdev);
2875 		}
2876 
2877 		if (!remaining) {
2878 			wake_up_all(&kbdev->csf.event_wait);
2879 			KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
2880 			return;
2881 		}
2882 	}
2883 
2884 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
2885 	while (remaining != 0) {
2886 		int const csg_nr = ffs(remaining) - 1;
2887 
2888 		process_csg_interrupts(kbdev, csg_nr);
2889 		remaining &= ~(1 << csg_nr);
2890 	}
2891 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
2892 
2893 	wake_up_all(&kbdev->csf.event_wait);
2894 	KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
2895 }
2896 
kbase_csf_doorbell_mapping_term(struct kbase_device * kbdev)2897 void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev)
2898 {
2899 	if (kbdev->csf.db_filp) {
2900 		struct page *page = as_page(kbdev->csf.dummy_db_page);
2901 
2902 		kbase_mem_pool_free(
2903 			&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
2904 			page, false);
2905 
2906 		fput(kbdev->csf.db_filp);
2907 	}
2908 }
2909 
kbase_csf_doorbell_mapping_init(struct kbase_device * kbdev)2910 int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
2911 {
2912 	struct tagged_addr phys;
2913 	struct file *filp;
2914 	int ret;
2915 
2916 	filp = shmem_file_setup("mali csf", MAX_LFS_FILESIZE, VM_NORESERVE);
2917 	if (IS_ERR(filp))
2918 		return PTR_ERR(filp);
2919 
2920 	ret = kbase_mem_pool_alloc_pages(
2921 		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
2922 		1, &phys, false);
2923 
2924 	if (ret <= 0) {
2925 		fput(filp);
2926 		return ret;
2927 	}
2928 
2929 	kbdev->csf.db_filp = filp;
2930 	kbdev->csf.dummy_db_page = phys;
2931 	kbdev->csf.db_file_offsets = 0;
2932 
2933 	return 0;
2934 }
2935 
kbase_csf_free_dummy_user_reg_page(struct kbase_device * kbdev)2936 void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
2937 {
2938 	if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) {
2939 		struct page *page = as_page(kbdev->csf.dummy_user_reg_page);
2940 
2941 		kbase_mem_pool_free(
2942 			&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page,
2943 			false);
2944 	}
2945 }
2946 
kbase_csf_setup_dummy_user_reg_page(struct kbase_device * kbdev)2947 int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
2948 {
2949 	struct tagged_addr phys;
2950 	struct page *page;
2951 	u32 *addr;
2952 	int ret;
2953 
2954 	kbdev->csf.dummy_user_reg_page = as_tagged(0);
2955 
2956 	ret = kbase_mem_pool_alloc_pages(
2957 		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
2958 		false);
2959 
2960 	if (ret <= 0)
2961 		return ret;
2962 
2963 	page = as_page(phys);
2964 	addr = kmap_atomic(page);
2965 
2966 	/* Write a special value for the latest flush register inside the
2967 	 * dummy page
2968 	 */
2969 	addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE;
2970 
2971 	kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32),
2972 				     DMA_BIDIRECTIONAL);
2973 	kunmap_atomic(addr);
2974 
2975 	kbdev->csf.dummy_user_reg_page = phys;
2976 
2977 	return 0;
2978 }
2979 
kbase_csf_priority_check(struct kbase_device * kbdev,u8 req_priority)2980 u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority)
2981 {
2982 	struct priority_control_manager_device *pcm_device = kbdev->pcm_dev;
2983 	u8 out_priority = req_priority;
2984 
2985 	if (pcm_device) {
2986 		req_priority = kbase_csf_priority_queue_group_priority_to_relative(req_priority);
2987 		out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, req_priority);
2988 		out_priority = kbase_csf_priority_relative_to_queue_group_priority(out_priority);
2989 	}
2990 
2991 	return out_priority;
2992 }
2993