• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 #include <tl/mali_kbase_tracepoints.h>
23 #include <tl/mali_kbase_timeline.h>
24 #include <tl/mali_kbase_timeline_priv.h>
25 
26 #include <mali_kbase.h>
27 
28 #define GPU_FEATURES_CROSS_STREAM_SYNC_MASK (1ull << 3ull)
29 
kbase_create_timeline_objects(struct kbase_device * kbdev)30 void kbase_create_timeline_objects(struct kbase_device *kbdev)
31 {
32 	unsigned int as_nr;
33 	unsigned int slot_i;
34 	struct kbase_context *kctx;
35 	struct kbase_timeline *timeline = kbdev->timeline;
36 	struct kbase_tlstream *summary =
37 		&kbdev->timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY];
38 	u32 const kbdev_has_cross_stream_sync =
39 		(kbdev->gpu_props.props.raw_props.gpu_features &
40 		 GPU_FEATURES_CROSS_STREAM_SYNC_MASK) ?
41 			1 :
42 			0;
43 	u32 const arch_maj = (kbdev->gpu_props.props.raw_props.gpu_id &
44 			      GPU_ID2_ARCH_MAJOR) >>
45 			     GPU_ID2_ARCH_MAJOR_SHIFT;
46 	u32 const num_sb_entries = arch_maj >= 11 ? 16 : 8;
47 	u32 const supports_gpu_sleep =
48 #ifdef KBASE_PM_RUNTIME
49 		kbdev->pm.backend.gpu_sleep_supported;
50 #else
51 		false;
52 #endif /* KBASE_PM_RUNTIME */
53 
54 	/* Summarize the Address Space objects. */
55 	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
56 		__kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr);
57 
58 	/* Create Legacy GPU object to track in AOM for dumping */
59 	__kbase_tlstream_tl_new_gpu(summary,
60 			kbdev,
61 			kbdev->gpu_props.props.raw_props.gpu_id,
62 			kbdev->gpu_props.num_cores);
63 
64 
65 	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
66 		__kbase_tlstream_tl_lifelink_as_gpu(summary,
67 				&kbdev->as[as_nr],
68 				kbdev);
69 
70 	/* Trace the creation of a new kbase device and set its properties. */
71 	__kbase_tlstream_tl_kbase_new_device(summary, kbdev->gpu_props.props.raw_props.gpu_id,
72 					     kbdev->gpu_props.num_cores,
73 					     kbdev->csf.global_iface.group_num,
74 					     kbdev->nr_hw_address_spaces, num_sb_entries,
75 					     kbdev_has_cross_stream_sync, supports_gpu_sleep);
76 
77 	/* Lock the context list, to ensure no changes to the list are made
78 	 * while we're summarizing the contexts and their contents.
79 	 */
80 	mutex_lock(&timeline->tl_kctx_list_lock);
81 
82 	/* Hold the scheduler lock while we emit the current state
83 	 * We also need to continue holding the lock until after the first body
84 	 * stream tracepoints are emitted to ensure we don't change the
85 	 * scheduler until after then
86 	 */
87 	mutex_lock(&kbdev->csf.scheduler.lock);
88 
89 	for (slot_i = 0; slot_i < kbdev->csf.global_iface.group_num; slot_i++) {
90 
91 		struct kbase_queue_group *group =
92 			kbdev->csf.scheduler.csg_slots[slot_i].resident_group;
93 
94 		if (group)
95 			__kbase_tlstream_tl_kbase_device_program_csg(
96 				summary,
97 				kbdev->gpu_props.props.raw_props.gpu_id,
98 				group->kctx->id, group->handle, slot_i, 0);
99 	}
100 
101 	/* Reset body stream buffers while holding the kctx lock.
102 	 * As we are holding the lock, we can guarantee that no kctx creation or
103 	 * deletion tracepoints can be fired from outside of this function by
104 	 * some other thread.
105 	 */
106 	kbase_timeline_streams_body_reset(timeline);
107 
108 	mutex_unlock(&kbdev->csf.scheduler.lock);
109 
110 	/* For each context in the device... */
111 	list_for_each_entry(kctx, &timeline->tl_kctx_list, tl_kctx_list_node) {
112 		size_t i;
113 		struct kbase_tlstream *body =
114 			&timeline->streams[TL_STREAM_TYPE_OBJ];
115 
116 		/* Lock the context's KCPU queues, to ensure no KCPU-queue
117 		 * related actions can occur in this context from now on.
118 		 */
119 		mutex_lock(&kctx->csf.kcpu_queues.lock);
120 
121 		/* Acquire the MMU lock, to ensure we don't get a concurrent
122 		 * address space assignment while summarizing this context's
123 		 * address space.
124 		 */
125 		mutex_lock(&kbdev->mmu_hw_mutex);
126 
127 		/* Trace the context itself into the body stream, not the
128 		 * summary stream.
129 		 * We place this in the body to ensure it is ordered after any
130 		 * other tracepoints related to the contents of the context that
131 		 * might have been fired before acquiring all of the per-context
132 		 * locks.
133 		 * This ensures that those tracepoints will not actually affect
134 		 * the object model state, as they reference a context that
135 		 * hasn't been traced yet. They may, however, cause benign
136 		 * errors to be emitted.
137 		 */
138 		__kbase_tlstream_tl_kbase_new_ctx(body, kctx->id,
139 				kbdev->gpu_props.props.raw_props.gpu_id);
140 
141 		/* Also trace with the legacy AOM tracepoint for dumping */
142 		__kbase_tlstream_tl_new_ctx(body,
143 				kctx,
144 				kctx->id,
145 				(u32)(kctx->tgid));
146 
147 		/* Trace the currently assigned address space */
148 		if (kctx->as_nr != KBASEP_AS_NR_INVALID)
149 			__kbase_tlstream_tl_kbase_ctx_assign_as(body, kctx->id,
150 				kctx->as_nr);
151 
152 
153 		/* Trace all KCPU queues in the context into the body stream.
154 		 * As we acquired the KCPU lock after resetting the body stream,
155 		 * it's possible that some KCPU-related events for this context
156 		 * occurred between that reset and now.
157 		 * These will cause errors to be emitted when parsing the
158 		 * timeline, but they will not affect the correctness of the
159 		 * object model.
160 		 */
161 		for (i = 0; i < KBASEP_MAX_KCPU_QUEUES; i++) {
162 			const struct kbase_kcpu_command_queue *kcpu_queue =
163 				kctx->csf.kcpu_queues.array[i];
164 
165 			if (kcpu_queue)
166 				__kbase_tlstream_tl_kbase_new_kcpuqueue(
167 					body, kcpu_queue, kcpu_queue->kctx->id,
168 					kcpu_queue->num_pending_cmds);
169 		}
170 
171 		mutex_unlock(&kbdev->mmu_hw_mutex);
172 		mutex_unlock(&kctx->csf.kcpu_queues.lock);
173 
174 		/* Now that all per-context locks for this context have been
175 		 * released, any per-context tracepoints that are fired from
176 		 * any other threads will go into the body stream after
177 		 * everything that was just summarised into the body stream in
178 		 * this iteration of the loop, so will start to correctly update
179 		 * the object model state.
180 		 */
181 	}
182 
183 	mutex_unlock(&timeline->tl_kctx_list_lock);
184 
185 	/* Static object are placed into summary packet that needs to be
186 	 * transmitted first. Flush all streams to make it available to
187 	 * user space.
188 	 */
189 	kbase_timeline_streams_flush(timeline);
190 }
191