• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 /*
3  *
4  * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 /**
23  * DOC: Job Scheduler Type Definitions
24  */
25 
26 #ifndef _KBASE_JS_DEFS_H_
27 #define _KBASE_JS_DEFS_H_
28 
29 /* Forward decls */
30 struct kbase_device;
31 struct kbase_jd_atom;
32 
33 
34 typedef u32 kbase_context_flags;
35 
36 /*
37  * typedef kbasep_js_ctx_job_cb - Callback function run on all of a context's
38  * jobs registered with the Job Scheduler
39  */
40 typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev,
41 				  struct kbase_jd_atom *katom);
42 
43 /*
44  * @brief Maximum number of jobs that can be submitted to a job slot whilst
45  * inside the IRQ handler.
46  *
47  * This is important because GPU NULL jobs can complete whilst the IRQ handler
48  * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
49  * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
50  */
51 #define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
52 
53 /**
54  * enum kbasep_js_ctx_attr - Context attributes
55  * @KBASEP_JS_CTX_ATTR_COMPUTE: Attribute indicating a context that contains
56  *                              Compute jobs.
57  * @KBASEP_JS_CTX_ATTR_NON_COMPUTE: Attribute indicating a context that contains
58  * 	Non-Compute jobs.
59  * @KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: Attribute indicating that a context
60  * 	contains compute-job atoms that aren't restricted to a coherent group,
61  * 	and can run on all cores.
62  * @KBASEP_JS_CTX_ATTR_COUNT: Must be the last in the enum
63  *
64  * Each context attribute can be thought of as a boolean value that caches some
65  * state information about either the runpool, or the context:
66  * - In the case of the runpool, it is a cache of "Do any contexts owned by
67  * the runpool have attribute X?"
68  * - In the case of a context, it is a cache of "Do any atoms owned by the
69  * context have attribute X?"
70  *
71  * The boolean value of the context attributes often affect scheduling
72  * decisions, such as affinities to use and job slots to use.
73  *
74  * To accomodate changes of state in the context, each attribute is refcounted
75  * in the context, and in the runpool for all running contexts. Specifically:
76  * - The runpool holds a refcount of how many contexts in the runpool have this
77  * attribute.
78  * - The context holds a refcount of how many atoms have this attribute.
79  *
80  * KBASEP_JS_CTX_ATTR_COMPUTE:
81  * Attribute indicating a context that contains Compute jobs. That is,
82  * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
83  *
84  * @note A context can be both 'Compute' and 'Non Compute' if it contains
85  * both types of jobs.
86  *
87  * KBASEP_JS_CTX_ATTR_NON_COMPUTE:
88  * Attribute indicating a context that contains Non-Compute jobs. That is,
89  * the context has some jobs that are \b not of type @ref
90  * BASE_JD_REQ_ONLY_COMPUTE.
91  *
92  * @note A context can be both 'Compute' and 'Non Compute' if it contains
93  * both types of jobs.
94  *
95  * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES:
96  * Attribute indicating that a context contains compute-job atoms that
97  * aren't restricted to a coherent group, and can run on all cores.
98  *
99  * Specifically, this is when the atom's \a core_req satisfy:
100  * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
101  * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
102  *
103  * Such atoms could be blocked from running if one of the coherent groups
104  * is being used by another job slot, so tracking this context attribute
105  * allows us to prevent such situations.
106  *
107  * @note This doesn't take into account the 1-coregroup case, where all
108  * compute atoms would effectively be able to run on 'all cores', but
109  * contexts will still not always get marked with this attribute. Instead,
110  * it is the caller's responsibility to take into account the number of
111  * coregroups when interpreting this attribute.
112  *
113  * @note Whilst Tiler atoms are normally combined with
114  * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
115  * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
116  * enough to handle anyway.
117  *
118  *
119  */
120 enum kbasep_js_ctx_attr {
121 	KBASEP_JS_CTX_ATTR_COMPUTE,
122 	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
123 	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
124 	KBASEP_JS_CTX_ATTR_COUNT
125 };
126 
127 enum {
128 	/*
129 	 * Bit indicating that new atom should be started because this atom
130 	 * completed
131 	 */
132 	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
133 	/*
134 	 * Bit indicating that the atom was evicted from the JS_NEXT registers
135 	 */
136 	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
137 };
138 
139 /**
140  * typedef kbasep_js_atom_done_code - Combination of KBASE_JS_ATOM_DONE_<...>
141  * bits
142  */
143 typedef u32 kbasep_js_atom_done_code;
144 
145 /*
146  * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode
147  */
148 enum {
149 	/*
150 	 * In this mode, higher priority atoms will be scheduled first,
151 	 * regardless of the context they belong to. Newly-runnable higher
152 	 * priority atoms can preempt lower priority atoms currently running on
153 	 * the GPU, even if they belong to a different context.
154 	 */
155 	KBASE_JS_SYSTEM_PRIORITY_MODE = 0,
156 
157 	/*
158 	 * In this mode, the highest-priority atom will be chosen from each
159 	 * context in turn using a round-robin algorithm, so priority only has
160 	 * an effect within the context an atom belongs to. Newly-runnable
161 	 * higher priority atoms can preempt the lower priority atoms currently
162 	 * running on the GPU, but only if they belong to the same context.
163 	 */
164 	KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE,
165 
166 	/* Must be the last in the enum */
167 	KBASE_JS_PRIORITY_MODE_COUNT,
168 };
169 
170 /*
171  * Internal atom priority defines for kbase_jd_atom::sched_prio
172  */
173 enum {
174 	KBASE_JS_ATOM_SCHED_PRIO_FIRST = 0,
175 	KBASE_JS_ATOM_SCHED_PRIO_REALTIME = KBASE_JS_ATOM_SCHED_PRIO_FIRST,
176 	KBASE_JS_ATOM_SCHED_PRIO_HIGH,
177 	KBASE_JS_ATOM_SCHED_PRIO_MED,
178 	KBASE_JS_ATOM_SCHED_PRIO_LOW,
179 	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
180 };
181 
182 /* Invalid priority for kbase_jd_atom::sched_prio */
183 #define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
184 
185 /* Default priority in the case of contexts with no atoms, or being lenient
186  * about invalid priorities from userspace.
187  */
188 #define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
189 
190 /* Atom priority bitmaps, where bit 0 is the highest priority, and higher bits
191  * indicate successively lower KBASE_JS_ATOM_SCHED_PRIO_<...> levels.
192  *
193  * Must be strictly larger than the number of bits to represent a bitmap of
194  * priorities, so that we can do calculations such as:
195  *   (1 << KBASE_JS_ATOM_SCHED_PRIO_COUNT) - 1
196  * ...without causing undefined behavior due to a shift beyond the width of the
197  * type
198  *
199  * If KBASE_JS_ATOM_SCHED_PRIO_COUNT starts requiring 32 bits, then it's worth
200  * moving to DECLARE_BITMAP()
201  */
202 typedef u8 kbase_js_prio_bitmap_t;
203 
204 /* Ordering modification for kbase_js_atom_runs_before() */
205 typedef u32 kbase_atom_ordering_flag_t;
206 
207 /* Atoms of the same context and priority should have their ordering decided by
208  * their seq_nr instead of their age.
209  *
210  * seq_nr is used as a more slowly changing variant of age - it increases once
211  * per group of related atoms, as determined by user-space. Hence, it can be
212  * used to limit re-ordering decisions (such as pre-emption) to only re-order
213  * between such groups, rather than re-order within those groups of atoms.
214  */
215 #define KBASE_ATOM_ORDERING_FLAG_SEQNR (((kbase_atom_ordering_flag_t)1) << 0)
216 
217 /**
218  * struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure
219  * @runpool_irq: Sub-structure to collect together Job Scheduling data used in
220  *	IRQ context. The hwaccess_lock must be held when accessing.
221  * @runpool_irq.submit_allowed: Bitvector indicating whether a currently
222  * 	scheduled context is allowed to submit jobs. When bit 'N' is set in
223  * 	this, it indicates whether the context bound to address space 'N' is
224  * 	allowed to submit jobs.
225  * @runpool_irq.ctx_attr_ref_count: Array of Context Attributes Ref_counters:
226  * 	  Each is large enough to hold a refcount of the number of contexts
227  * 	that can fit into the runpool. This is currently BASE_MAX_NR_AS.
228  * 	  Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
229  * 	the refcount. Hence, it's not worthwhile reducing this to
230  * 	bit-manipulation on u32s to save space (where in contrast, 4 bit
231  * 	sub-fields would be easy to do and would save space).
232  * 	  Whilst this must not become negative, the sign bit is used for:
233  * 	- error detection in debug builds
234  * 	- Optimization: it is undefined for a signed int to overflow, and so
235  * 	the compiler can optimize for that never happening (thus, no masking
236  * 	is required on updating the variable)
237  * @runpool_irq.slot_affinities: Affinity management and tracking. Bitvector
238  *	to aid affinity checking. Element 'n' bit 'i' indicates that slot 'n'
239  *	is using core i (i.e. slot_affinity_refcount[n][i] > 0)
240  * @runpool_irq.slot_affinity_refcount: Array of fefcount for each core owned
241  *	by each slot. Used to generate the slot_affinities array of bitvectors.
242  *	  The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
243  *	because it is refcounted only when a job is definitely about to be
244  *	submitted to a slot, and is de-refcounted immediately after a job
245  *	finishes
246  * @schedule_sem: Scheduling semaphore. This must be held when calling
247  *	kbase_jm_kick()
248  * @ctx_list_pullable: List of contexts that can currently be pulled from
249  * @ctx_list_unpullable: List of contexts that can not currently be pulled
250  *	from, but have jobs currently running.
251  * @nr_user_contexts_running: Number of currently scheduled user contexts
252  *	(excluding ones that are not submitting jobs)
253  * @nr_all_contexts_running: Number of currently scheduled contexts (including
254  *	ones that are not submitting jobs)
255  * @js_reqs: Core Requirements to match up with base_js_atom's core_req memeber
256  *	@note This is a write-once member, and so no locking is required to
257  *	read
258  * @scheduling_period_ns:	Value for JS_SCHEDULING_PERIOD_NS
259  * @soft_stop_ticks:		Value for JS_SOFT_STOP_TICKS
260  * @soft_stop_ticks_cl:		Value for JS_SOFT_STOP_TICKS_CL
261  * @hard_stop_ticks_ss:		Value for JS_HARD_STOP_TICKS_SS
262  * @hard_stop_ticks_cl:		Value for JS_HARD_STOP_TICKS_CL
263  * @hard_stop_ticks_dumping:	Value for JS_HARD_STOP_TICKS_DUMPING
264  * @gpu_reset_ticks_ss:		Value for JS_RESET_TICKS_SS
265  * @gpu_reset_ticks_cl:		Value for JS_RESET_TICKS_CL
266  * @gpu_reset_ticks_dumping:	Value for JS_RESET_TICKS_DUMPING
267  * @ctx_timeslice_ns:		Value for JS_CTX_TIMESLICE_NS
268  * @suspended_soft_jobs_list:	List of suspended soft jobs
269  * @softstop_always:		Support soft-stop on a single context
270  * @init_status:The initialized-flag is placed at the end, to avoid
271  * 	cache-pollution (we should only be using this during init/term paths).
272  * 	@note This is a write-once member, and so no locking is required to
273  * 	read
274  * @nr_contexts_pullable:Number of contexts that can currently be pulled from
275  * @nr_contexts_runnable:Number of contexts that can either be pulled from or
276  * 	arecurrently running
277  * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT
278  * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts
279  * 	independently of the Run Pool.
280  *	Of course, you don't need the Run Pool lock to access this.
281  * @runpool_mutex: Run Pool mutex, for managing contexts within the runpool.
282  *
283  * This encapsulates the current context of the Job Scheduler on a particular
284  * device. This context is global to the device, and is not tied to any
285  * particular struct kbase_context running on the device.
286  *
287  * nr_contexts_running and as_free are optimized for packing together (by making
288  * them smaller types than u32). The operations on them should rarely involve
289  * masking. The use of signed types for arithmetic indicates to the compiler
290  * that the value will not rollover (which would be undefined behavior), and so
291  * under the Total License model, it is free to make optimizations based on
292  * that (i.e. to remove masking).
293  */
294 struct kbasep_js_device_data {
295 	struct runpool_irq {
296 		u16 submit_allowed;
297 		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
298 		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
299 		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
300 	} runpool_irq;
301 	struct semaphore schedule_sem;
302 	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS]
303 					  [KBASE_JS_ATOM_SCHED_PRIO_COUNT];
304 	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS]
305 					    [KBASE_JS_ATOM_SCHED_PRIO_COUNT];
306 	s8 nr_user_contexts_running;
307 	s8 nr_all_contexts_running;
308 	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
309 
310 	u32 scheduling_period_ns;
311 	u32 soft_stop_ticks;
312 	u32 soft_stop_ticks_cl;
313 	u32 hard_stop_ticks_ss;
314 	u32 hard_stop_ticks_cl;
315 	u32 hard_stop_ticks_dumping;
316 	u32 gpu_reset_ticks_ss;
317 	u32 gpu_reset_ticks_cl;
318 	u32 gpu_reset_ticks_dumping;
319 	u32 ctx_timeslice_ns;
320 
321 	struct list_head suspended_soft_jobs_list;
322 
323 #ifdef CONFIG_MALI_BIFROST_DEBUG
324 	bool softstop_always;
325 #endif				/* CONFIG_MALI_BIFROST_DEBUG */
326 	int init_status;
327 	u32 nr_contexts_pullable;
328 	atomic_t nr_contexts_runnable;
329 	atomic_t soft_job_timeout_ms;
330 	struct mutex queue_mutex;
331 	/*
332 	 * Run Pool mutex, for managing contexts within the runpool.
333 	 * Unless otherwise specified, you must hold this lock whilst accessing
334 	 * any members that follow
335 	 *
336 	 * In addition, this is used to access:
337 	 * * the kbasep_js_kctx_info::runpool substructure
338 	 */
339 	struct mutex runpool_mutex;
340 };
341 
342 /**
343  * struct kbasep_js_kctx_info - KBase Context Job Scheduling information
344  *	structure
345  * @ctx: Job Scheduler Context information sub-structure.Its members are
346  *	accessed regardless of whether the context is:
347  *	- In the Policy's Run Pool
348  *	- In the Policy's Queue
349  *	- Not queued nor in the Run Pool.
350  *	You must obtain the @ctx.jsctx_mutex before accessing any other members
351  *	of this substructure.
352  *	You may not access any of its members from IRQ context.
353  * @ctx.jsctx_mutex: Job Scheduler Context lock
354  * @ctx.nr_jobs: Number of jobs <b>ready to run</b> - does \em not include
355  *	the jobs waiting in the dispatcher, and dependency-only
356  *	jobs. See kbase_jd_context::job_nr for such jobs
357  * @ctx.ctx_attr_ref_count: Context Attributes ref count. Each is large enough
358  *	to hold a refcount of the number of atoms on the context.
359  * @ctx.is_scheduled_wait: Wait queue to wait for KCTX_SHEDULED flag state
360  *	changes.
361  * @ctx.ctx_list_entry: Link implementing JS queues. Context can be present on
362  *	one list per job slot.
363  * @init_status: The initalized-flag is placed at the end, to avoid
364  *	cache-pollution (we should only be using this during init/term paths)
365  *
366  * This is a substructure in the struct kbase_context that encapsulates all the
367  * scheduling information.
368  */
369 struct kbasep_js_kctx_info {
370 	struct kbase_jsctx {
371 		struct mutex jsctx_mutex;
372 
373 		u32 nr_jobs;
374 		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
375 		wait_queue_head_t is_scheduled_wait;
376 		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
377 	} ctx;
378 	int init_status;
379 };
380 
381 /**
382  * struct kbasep_js_atom_retained_state - Subset of atom state.
383  * @event_code: to determine whether the atom has finished
384  * @core_req: core requirements
385  * @sched_priority: priority
386  * @device_nr: Core group atom was executed on
387  *
388  * Subset of atom state that can be available after jd_done_nolock() is called
389  * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
390  * because the original atom could disappear.
391  */
392 struct kbasep_js_atom_retained_state {
393 	/* Event code - to determine whether the atom has finished */
394 	enum base_jd_event_code event_code;
395 	/* core requirements */
396 	base_jd_core_req core_req;
397 	/* priority */
398 	int sched_priority;
399 	/* Core group atom was executed on */
400 	u32 device_nr;
401 
402 };
403 
404 /*
405  * Value signifying 'no retry on a slot required' for:
406  * - kbase_js_atom_retained_state::retry_submit_on_slot
407  * - kbase_jd_atom::retry_submit_on_slot
408  */
409 #define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
410 
411 /*
412  * base_jd_core_req value signifying 'invalid' for a
413  * kbase_jd_atom_retained_state. See kbase_atom_retained_state_is_valid()
414  */
415 #define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
416 
417 /*
418  * The JS timer resolution, in microseconds
419  * Any non-zero difference in time will be at least this size.
420  */
421 #define KBASEP_JS_TICK_RESOLUTION_US 1
422 
423 /**
424  * struct kbase_jsctx_slot_tracking - Job Scheduling tracking of a context's
425  *                                    use of a job slot
426  * @blocked: bitmap of priorities that this slot is blocked at
427  * @atoms_pulled: counts of atoms that have been pulled from this slot,
428  *                across all priority levels
429  * @atoms_pulled_pri: counts of atoms that have been pulled from this slot, per
430  *                    priority level
431  *
432  * Controls how a slot from the &struct kbase_context's jsctx_queue is managed,
433  * for example to ensure correct ordering of atoms when atoms of different
434  * priorities are unpulled.
435  */
436 struct kbase_jsctx_slot_tracking {
437 	kbase_js_prio_bitmap_t blocked;
438 	atomic_t atoms_pulled;
439 	int atoms_pulled_pri[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
440 };
441 
442 #endif /* _KBASE_JS_DEFS_H_ */
443