1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 /* 3 * 4 * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. 5 * 6 * This program is free software and is provided to you under the terms of the 7 * GNU General Public License version 2 as published by the Free Software 8 * Foundation, and any use by you of this program is subject to the terms 9 * of such GNU license. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, you can access it online at 18 * http://www.gnu.org/licenses/gpl-2.0.html. 19 * 20 */ 21 22 /** 23 * DOC: Job Scheduler Type Definitions 24 */ 25 26 #ifndef _KBASE_JS_DEFS_H_ 27 #define _KBASE_JS_DEFS_H_ 28 29 /* Forward decls */ 30 struct kbase_device; 31 struct kbase_jd_atom; 32 33 34 typedef u32 kbase_context_flags; 35 36 /* 37 * typedef kbasep_js_ctx_job_cb - Callback function run on all of a context's 38 * jobs registered with the Job Scheduler 39 */ 40 typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev, 41 struct kbase_jd_atom *katom); 42 43 /* 44 * @brief Maximum number of jobs that can be submitted to a job slot whilst 45 * inside the IRQ handler. 46 * 47 * This is important because GPU NULL jobs can complete whilst the IRQ handler 48 * is running. Otherwise, it potentially allows an unlimited number of GPU NULL 49 * jobs to be submitted inside the IRQ handler, which increases IRQ latency. 50 */ 51 #define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2 52 53 /** 54 * enum kbasep_js_ctx_attr - Context attributes 55 * @KBASEP_JS_CTX_ATTR_COMPUTE: Attribute indicating a context that contains 56 * Compute jobs. 57 * @KBASEP_JS_CTX_ATTR_NON_COMPUTE: Attribute indicating a context that contains 58 * Non-Compute jobs. 59 * @KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: Attribute indicating that a context 60 * contains compute-job atoms that aren't restricted to a coherent group, 61 * and can run on all cores. 62 * @KBASEP_JS_CTX_ATTR_COUNT: Must be the last in the enum 63 * 64 * Each context attribute can be thought of as a boolean value that caches some 65 * state information about either the runpool, or the context: 66 * - In the case of the runpool, it is a cache of "Do any contexts owned by 67 * the runpool have attribute X?" 68 * - In the case of a context, it is a cache of "Do any atoms owned by the 69 * context have attribute X?" 70 * 71 * The boolean value of the context attributes often affect scheduling 72 * decisions, such as affinities to use and job slots to use. 73 * 74 * To accomodate changes of state in the context, each attribute is refcounted 75 * in the context, and in the runpool for all running contexts. Specifically: 76 * - The runpool holds a refcount of how many contexts in the runpool have this 77 * attribute. 78 * - The context holds a refcount of how many atoms have this attribute. 79 * 80 * KBASEP_JS_CTX_ATTR_COMPUTE: 81 * Attribute indicating a context that contains Compute jobs. That is, 82 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE 83 * 84 * @note A context can be both 'Compute' and 'Non Compute' if it contains 85 * both types of jobs. 86 * 87 * KBASEP_JS_CTX_ATTR_NON_COMPUTE: 88 * Attribute indicating a context that contains Non-Compute jobs. That is, 89 * the context has some jobs that are \b not of type @ref 90 * BASE_JD_REQ_ONLY_COMPUTE. 91 * 92 * @note A context can be both 'Compute' and 'Non Compute' if it contains 93 * both types of jobs. 94 * 95 * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: 96 * Attribute indicating that a context contains compute-job atoms that 97 * aren't restricted to a coherent group, and can run on all cores. 98 * 99 * Specifically, this is when the atom's \a core_req satisfy: 100 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 101 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups 102 * 103 * Such atoms could be blocked from running if one of the coherent groups 104 * is being used by another job slot, so tracking this context attribute 105 * allows us to prevent such situations. 106 * 107 * @note This doesn't take into account the 1-coregroup case, where all 108 * compute atoms would effectively be able to run on 'all cores', but 109 * contexts will still not always get marked with this attribute. Instead, 110 * it is the caller's responsibility to take into account the number of 111 * coregroups when interpreting this attribute. 112 * 113 * @note Whilst Tiler atoms are normally combined with 114 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without 115 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy 116 * enough to handle anyway. 117 * 118 * 119 */ 120 enum kbasep_js_ctx_attr { 121 KBASEP_JS_CTX_ATTR_COMPUTE, 122 KBASEP_JS_CTX_ATTR_NON_COMPUTE, 123 KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, 124 KBASEP_JS_CTX_ATTR_COUNT 125 }; 126 127 enum { 128 /* 129 * Bit indicating that new atom should be started because this atom 130 * completed 131 */ 132 KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0), 133 /* 134 * Bit indicating that the atom was evicted from the JS_NEXT registers 135 */ 136 KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1) 137 }; 138 139 /** 140 * typedef kbasep_js_atom_done_code - Combination of KBASE_JS_ATOM_DONE_<...> 141 * bits 142 */ 143 typedef u32 kbasep_js_atom_done_code; 144 145 /* 146 * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode 147 */ 148 enum { 149 /* 150 * In this mode, higher priority atoms will be scheduled first, 151 * regardless of the context they belong to. Newly-runnable higher 152 * priority atoms can preempt lower priority atoms currently running on 153 * the GPU, even if they belong to a different context. 154 */ 155 KBASE_JS_SYSTEM_PRIORITY_MODE = 0, 156 157 /* 158 * In this mode, the highest-priority atom will be chosen from each 159 * context in turn using a round-robin algorithm, so priority only has 160 * an effect within the context an atom belongs to. Newly-runnable 161 * higher priority atoms can preempt the lower priority atoms currently 162 * running on the GPU, but only if they belong to the same context. 163 */ 164 KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE, 165 166 /* Must be the last in the enum */ 167 KBASE_JS_PRIORITY_MODE_COUNT, 168 }; 169 170 /* 171 * Internal atom priority defines for kbase_jd_atom::sched_prio 172 */ 173 enum { 174 KBASE_JS_ATOM_SCHED_PRIO_FIRST = 0, 175 KBASE_JS_ATOM_SCHED_PRIO_REALTIME = KBASE_JS_ATOM_SCHED_PRIO_FIRST, 176 KBASE_JS_ATOM_SCHED_PRIO_HIGH, 177 KBASE_JS_ATOM_SCHED_PRIO_MED, 178 KBASE_JS_ATOM_SCHED_PRIO_LOW, 179 KBASE_JS_ATOM_SCHED_PRIO_COUNT, 180 }; 181 182 /* Invalid priority for kbase_jd_atom::sched_prio */ 183 #define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1 184 185 /* Default priority in the case of contexts with no atoms, or being lenient 186 * about invalid priorities from userspace. 187 */ 188 #define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED 189 190 /* Atom priority bitmaps, where bit 0 is the highest priority, and higher bits 191 * indicate successively lower KBASE_JS_ATOM_SCHED_PRIO_<...> levels. 192 * 193 * Must be strictly larger than the number of bits to represent a bitmap of 194 * priorities, so that we can do calculations such as: 195 * (1 << KBASE_JS_ATOM_SCHED_PRIO_COUNT) - 1 196 * ...without causing undefined behavior due to a shift beyond the width of the 197 * type 198 * 199 * If KBASE_JS_ATOM_SCHED_PRIO_COUNT starts requiring 32 bits, then it's worth 200 * moving to DECLARE_BITMAP() 201 */ 202 typedef u8 kbase_js_prio_bitmap_t; 203 204 /* Ordering modification for kbase_js_atom_runs_before() */ 205 typedef u32 kbase_atom_ordering_flag_t; 206 207 /* Atoms of the same context and priority should have their ordering decided by 208 * their seq_nr instead of their age. 209 * 210 * seq_nr is used as a more slowly changing variant of age - it increases once 211 * per group of related atoms, as determined by user-space. Hence, it can be 212 * used to limit re-ordering decisions (such as pre-emption) to only re-order 213 * between such groups, rather than re-order within those groups of atoms. 214 */ 215 #define KBASE_ATOM_ORDERING_FLAG_SEQNR (((kbase_atom_ordering_flag_t)1) << 0) 216 217 /** 218 * struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure 219 * @runpool_irq: Sub-structure to collect together Job Scheduling data used in 220 * IRQ context. The hwaccess_lock must be held when accessing. 221 * @runpool_irq.submit_allowed: Bitvector indicating whether a currently 222 * scheduled context is allowed to submit jobs. When bit 'N' is set in 223 * this, it indicates whether the context bound to address space 'N' is 224 * allowed to submit jobs. 225 * @runpool_irq.ctx_attr_ref_count: Array of Context Attributes Ref_counters: 226 * Each is large enough to hold a refcount of the number of contexts 227 * that can fit into the runpool. This is currently BASE_MAX_NR_AS. 228 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store 229 * the refcount. Hence, it's not worthwhile reducing this to 230 * bit-manipulation on u32s to save space (where in contrast, 4 bit 231 * sub-fields would be easy to do and would save space). 232 * Whilst this must not become negative, the sign bit is used for: 233 * - error detection in debug builds 234 * - Optimization: it is undefined for a signed int to overflow, and so 235 * the compiler can optimize for that never happening (thus, no masking 236 * is required on updating the variable) 237 * @runpool_irq.slot_affinities: Affinity management and tracking. Bitvector 238 * to aid affinity checking. Element 'n' bit 'i' indicates that slot 'n' 239 * is using core i (i.e. slot_affinity_refcount[n][i] > 0) 240 * @runpool_irq.slot_affinity_refcount: Array of fefcount for each core owned 241 * by each slot. Used to generate the slot_affinities array of bitvectors. 242 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, 243 * because it is refcounted only when a job is definitely about to be 244 * submitted to a slot, and is de-refcounted immediately after a job 245 * finishes 246 * @schedule_sem: Scheduling semaphore. This must be held when calling 247 * kbase_jm_kick() 248 * @ctx_list_pullable: List of contexts that can currently be pulled from 249 * @ctx_list_unpullable: List of contexts that can not currently be pulled 250 * from, but have jobs currently running. 251 * @nr_user_contexts_running: Number of currently scheduled user contexts 252 * (excluding ones that are not submitting jobs) 253 * @nr_all_contexts_running: Number of currently scheduled contexts (including 254 * ones that are not submitting jobs) 255 * @js_reqs: Core Requirements to match up with base_js_atom's core_req memeber 256 * @note This is a write-once member, and so no locking is required to 257 * read 258 * @scheduling_period_ns: Value for JS_SCHEDULING_PERIOD_NS 259 * @soft_stop_ticks: Value for JS_SOFT_STOP_TICKS 260 * @soft_stop_ticks_cl: Value for JS_SOFT_STOP_TICKS_CL 261 * @hard_stop_ticks_ss: Value for JS_HARD_STOP_TICKS_SS 262 * @hard_stop_ticks_cl: Value for JS_HARD_STOP_TICKS_CL 263 * @hard_stop_ticks_dumping: Value for JS_HARD_STOP_TICKS_DUMPING 264 * @gpu_reset_ticks_ss: Value for JS_RESET_TICKS_SS 265 * @gpu_reset_ticks_cl: Value for JS_RESET_TICKS_CL 266 * @gpu_reset_ticks_dumping: Value for JS_RESET_TICKS_DUMPING 267 * @ctx_timeslice_ns: Value for JS_CTX_TIMESLICE_NS 268 * @suspended_soft_jobs_list: List of suspended soft jobs 269 * @softstop_always: Support soft-stop on a single context 270 * @init_status:The initialized-flag is placed at the end, to avoid 271 * cache-pollution (we should only be using this during init/term paths). 272 * @note This is a write-once member, and so no locking is required to 273 * read 274 * @nr_contexts_pullable:Number of contexts that can currently be pulled from 275 * @nr_contexts_runnable:Number of contexts that can either be pulled from or 276 * arecurrently running 277 * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT 278 * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts 279 * independently of the Run Pool. 280 * Of course, you don't need the Run Pool lock to access this. 281 * @runpool_mutex: Run Pool mutex, for managing contexts within the runpool. 282 * 283 * This encapsulates the current context of the Job Scheduler on a particular 284 * device. This context is global to the device, and is not tied to any 285 * particular struct kbase_context running on the device. 286 * 287 * nr_contexts_running and as_free are optimized for packing together (by making 288 * them smaller types than u32). The operations on them should rarely involve 289 * masking. The use of signed types for arithmetic indicates to the compiler 290 * that the value will not rollover (which would be undefined behavior), and so 291 * under the Total License model, it is free to make optimizations based on 292 * that (i.e. to remove masking). 293 */ 294 struct kbasep_js_device_data { 295 struct runpool_irq { 296 u16 submit_allowed; 297 s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; 298 u64 slot_affinities[BASE_JM_MAX_NR_SLOTS]; 299 s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; 300 } runpool_irq; 301 struct semaphore schedule_sem; 302 struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS] 303 [KBASE_JS_ATOM_SCHED_PRIO_COUNT]; 304 struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS] 305 [KBASE_JS_ATOM_SCHED_PRIO_COUNT]; 306 s8 nr_user_contexts_running; 307 s8 nr_all_contexts_running; 308 base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; 309 310 u32 scheduling_period_ns; 311 u32 soft_stop_ticks; 312 u32 soft_stop_ticks_cl; 313 u32 hard_stop_ticks_ss; 314 u32 hard_stop_ticks_cl; 315 u32 hard_stop_ticks_dumping; 316 u32 gpu_reset_ticks_ss; 317 u32 gpu_reset_ticks_cl; 318 u32 gpu_reset_ticks_dumping; 319 u32 ctx_timeslice_ns; 320 321 struct list_head suspended_soft_jobs_list; 322 323 #ifdef CONFIG_MALI_BIFROST_DEBUG 324 bool softstop_always; 325 #endif /* CONFIG_MALI_BIFROST_DEBUG */ 326 int init_status; 327 u32 nr_contexts_pullable; 328 atomic_t nr_contexts_runnable; 329 atomic_t soft_job_timeout_ms; 330 struct mutex queue_mutex; 331 /* 332 * Run Pool mutex, for managing contexts within the runpool. 333 * Unless otherwise specified, you must hold this lock whilst accessing 334 * any members that follow 335 * 336 * In addition, this is used to access: 337 * * the kbasep_js_kctx_info::runpool substructure 338 */ 339 struct mutex runpool_mutex; 340 }; 341 342 /** 343 * struct kbasep_js_kctx_info - KBase Context Job Scheduling information 344 * structure 345 * @ctx: Job Scheduler Context information sub-structure.Its members are 346 * accessed regardless of whether the context is: 347 * - In the Policy's Run Pool 348 * - In the Policy's Queue 349 * - Not queued nor in the Run Pool. 350 * You must obtain the @ctx.jsctx_mutex before accessing any other members 351 * of this substructure. 352 * You may not access any of its members from IRQ context. 353 * @ctx.jsctx_mutex: Job Scheduler Context lock 354 * @ctx.nr_jobs: Number of jobs <b>ready to run</b> - does \em not include 355 * the jobs waiting in the dispatcher, and dependency-only 356 * jobs. See kbase_jd_context::job_nr for such jobs 357 * @ctx.ctx_attr_ref_count: Context Attributes ref count. Each is large enough 358 * to hold a refcount of the number of atoms on the context. 359 * @ctx.is_scheduled_wait: Wait queue to wait for KCTX_SHEDULED flag state 360 * changes. 361 * @ctx.ctx_list_entry: Link implementing JS queues. Context can be present on 362 * one list per job slot. 363 * @init_status: The initalized-flag is placed at the end, to avoid 364 * cache-pollution (we should only be using this during init/term paths) 365 * 366 * This is a substructure in the struct kbase_context that encapsulates all the 367 * scheduling information. 368 */ 369 struct kbasep_js_kctx_info { 370 struct kbase_jsctx { 371 struct mutex jsctx_mutex; 372 373 u32 nr_jobs; 374 u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; 375 wait_queue_head_t is_scheduled_wait; 376 struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS]; 377 } ctx; 378 int init_status; 379 }; 380 381 /** 382 * struct kbasep_js_atom_retained_state - Subset of atom state. 383 * @event_code: to determine whether the atom has finished 384 * @core_req: core requirements 385 * @sched_priority: priority 386 * @device_nr: Core group atom was executed on 387 * 388 * Subset of atom state that can be available after jd_done_nolock() is called 389 * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), 390 * because the original atom could disappear. 391 */ 392 struct kbasep_js_atom_retained_state { 393 /* Event code - to determine whether the atom has finished */ 394 enum base_jd_event_code event_code; 395 /* core requirements */ 396 base_jd_core_req core_req; 397 /* priority */ 398 int sched_priority; 399 /* Core group atom was executed on */ 400 u32 device_nr; 401 402 }; 403 404 /* 405 * Value signifying 'no retry on a slot required' for: 406 * - kbase_js_atom_retained_state::retry_submit_on_slot 407 * - kbase_jd_atom::retry_submit_on_slot 408 */ 409 #define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1) 410 411 /* 412 * base_jd_core_req value signifying 'invalid' for a 413 * kbase_jd_atom_retained_state. See kbase_atom_retained_state_is_valid() 414 */ 415 #define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP 416 417 /* 418 * The JS timer resolution, in microseconds 419 * Any non-zero difference in time will be at least this size. 420 */ 421 #define KBASEP_JS_TICK_RESOLUTION_US 1 422 423 /** 424 * struct kbase_jsctx_slot_tracking - Job Scheduling tracking of a context's 425 * use of a job slot 426 * @blocked: bitmap of priorities that this slot is blocked at 427 * @atoms_pulled: counts of atoms that have been pulled from this slot, 428 * across all priority levels 429 * @atoms_pulled_pri: counts of atoms that have been pulled from this slot, per 430 * priority level 431 * 432 * Controls how a slot from the &struct kbase_context's jsctx_queue is managed, 433 * for example to ensure correct ordering of atoms when atoms of different 434 * priorities are unpulled. 435 */ 436 struct kbase_jsctx_slot_tracking { 437 kbase_js_prio_bitmap_t blocked; 438 atomic_t atoms_pulled; 439 int atoms_pulled_pri[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; 440 }; 441 442 #endif /* _KBASE_JS_DEFS_H_ */ 443