• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *
3  * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
4  *
5  * This program is free software and is provided to you under the terms of the
6  * GNU General Public License version 2 as published by the Free Software
7  * Foundation, and any use by you of this program is subject to the terms
8  * of such GNU licence.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, you can access it online at
17  * http://www.gnu.org/licenses/gpl-2.0.html.
18  *
19  * SPDX-License-Identifier: GPL-2.0
20  *
21  */
22 
23 /*
24  * Base kernel job manager APIs
25  */
26 
27 #include <mali_kbase.h>
28 #include <mali_kbase_config.h>
29 #include <gpu/mali_kbase_gpu_regmap.h>
30 #include <tl/mali_kbase_tracepoints.h>
31 #include <mali_linux_trace.h>
32 #include <mali_kbase_hw.h>
33 #include <mali_kbase_hwaccess_jm.h>
34 #include <mali_kbase_reset_gpu.h>
35 #include <mali_kbase_ctx_sched.h>
36 #include <mali_kbase_kinstr_jm.h>
37 #include <mali_kbase_hwcnt_context.h>
38 #include <device/mali_kbase_device.h>
39 #include <backend/gpu/mali_kbase_irq_internal.h>
40 #include <backend/gpu/mali_kbase_jm_internal.h>
41 #include <mali_kbase_regs_history_debugfs.h>
42 
43 static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev);
44 
kbase_job_write_affinity(struct kbase_device * kbdev,base_jd_core_req core_req,int js)45 static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req, int js)
46 {
47     u64 affinity;
48 
49     if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == BASE_JD_REQ_T) {
50         /* Tiler-only atom */
51         /* If the hardware supports XAFFINITY then we'll only enable
52          * the tiler (which is the default so this is a no-op),
53          * otherwise enable shader core 0.
54          */
55         if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
56             affinity = 1;
57         } else {
58             affinity = 0;
59         }
60     } else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
61         unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
62         struct mali_base_gpu_coherent_group_info *coherency_info = &kbdev->gpu_props.props.coherency_info;
63 
64         affinity = kbdev->pm.backend.shaders_avail & kbdev->pm.debug_core_mask[js];
65 
66         /* JS2 on a dual core group system targets core group 1. All
67          * other cases target core group 0.
68          */
69         if (js == BASE_MEM_PROT_CPU_WR && num_core_groups > 1) {
70             affinity &= coherency_info->group[1].core_mask;
71         } else {
72             affinity &= coherency_info->group[0].core_mask;
73         }
74     } else {
75         /* Use all cores */
76         affinity = kbdev->pm.backend.shaders_avail & kbdev->pm.debug_core_mask[js];
77     }
78 
79     if (unlikely(!affinity)) {
80 #ifdef CONFIG_MALI_BIFROST_DEBUG
81         u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
82 
83         WARN_ON(!(shaders_ready & kbdev->pm.backend.shaders_avail));
84 #endif
85 
86         affinity = kbdev->pm.backend.shaders_avail;
87     }
88 
89     kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), affinity & BASE_MEM_FLAGS_MAX);
90     kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), affinity >> BASE_MEM_FLAGS_NR_HI_BITS);
91 
92     return affinity;
93 }
94 
95 /**
96  * select_job_chain() - Select which job chain to submit to the GPU
97  * @katom: Pointer to the atom about to be submitted to the GPU
98  *
99  * Selects one of the fragment job chains attached to the special atom at the
100  * end of a renderpass, or returns the address of the single job chain attached
101  * to any other type of atom.
102  *
103  * Which job chain is selected depends upon whether the tiling phase of the
104  * renderpass completed normally or was soft-stopped because it used too
105  * much memory. It also depends upon whether one of the fragment job chains
106  * has already been run as part of the same renderpass.
107  *
108  * Return: GPU virtual address of the selected job chain
109  */
select_job_chain(struct kbase_jd_atom * katom)110 static u64 select_job_chain(struct kbase_jd_atom *katom)
111 {
112     struct kbase_context *const kctx = katom->kctx;
113     u64 jc = katom->jc;
114     struct kbase_jd_renderpass *rp;
115 
116     lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
117 
118     if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) {
119         return jc;
120     }
121 
122     compiletime_assert((1ull << (sizeof(katom->renderpass_id) * BASE_MEM_PROT_GPU_WR)) <=
123                            ARRAY_SIZE(kctx->jctx.renderpasses),
124                        "Should check invalid access to renderpasses");
125 
126     rp = &kctx->jctx.renderpasses[katom->renderpass_id];
127     /* We can read a subset of renderpass state without holding
128      * higher-level locks (but not end_katom, for example).
129      * If the end-of-renderpass atom is running with as-yet indeterminate
130      * OOM state then assume that the start atom was not soft-stopped.
131      */
132     switch (rp->state) {
133         case KBASE_JD_RP_OOM:
134             /* Tiling ran out of memory.
135              * Start of incremental rendering, used once.
136              */
137             jc = katom->jc_fragment.norm_read_forced_write;
138             break;
139         case KBASE_JD_RP_START:
140         case KBASE_JD_RP_PEND_OOM:
141             /* Tiling completed successfully first time.
142              * Single-iteration rendering, used once.
143              */
144             jc = katom->jc_fragment.norm_read_norm_write;
145             break;
146         case KBASE_JD_RP_RETRY_OOM:
147             /* Tiling ran out of memory again.
148              * Continuation of incremental rendering, used as
149              * many times as required.
150              */
151             jc = katom->jc_fragment.forced_read_forced_write;
152             break;
153         case KBASE_JD_RP_RETRY:
154         case KBASE_JD_RP_RETRY_PEND_OOM:
155             /* Tiling completed successfully this time.
156              * End of incremental rendering, used once.
157              */
158             jc = katom->jc_fragment.forced_read_norm_write;
159             break;
160         default:
161             WARN_ON(1);
162             break;
163     }
164 
165     dev_dbg(kctx->kbdev->dev, "Selected job chain 0x%llx for end atom %p in state %d\n", jc, (void *)katom,
166             (int)rp->state);
167 
168     katom->jc = jc;
169     return jc;
170 }
171 
kbase_job_hw_submit(struct kbase_device * kbdev,struct kbase_jd_atom * katom,int js)172 void kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js)
173 {
174     struct kbase_context *kctx;
175     u32 cfg;
176     u64 const jc_head = select_job_chain(katom);
177     u64 affinity;
178 
179     KBASE_DEBUG_ASSERT(kbdev);
180     KBASE_DEBUG_ASSERT(katom);
181 
182     kctx = katom->kctx;
183 
184     /* Command register must be available */
185     KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
186 
187     dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %p\n", jc_head, (void *)katom);
188 
189     kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), jc_head & BASE_MEM_FLAGS_MAX);
190     kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), jc_head >> BASE_MEM_FLAGS_NR_HI_BITS);
191 
192     affinity = kbase_job_write_affinity(kbdev, katom->core_req, js);
193 
194     /* start MMU, medium priority, cache clean/flush on end, clean/flush on
195      * start */
196     cfg = kctx->as_nr;
197 
198     if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) &&
199         !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) {
200         cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
201     }
202 
203     if ((katom->core_req & BASE_JD_REQ_SKIP_CACHE_START) != 0) {
204         cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
205     } else {
206         cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
207     }
208 
209     if ((katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET) != 0) {
210         cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
211     } else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) {
212         cfg |= JS_CONFIG_END_FLUSH_CLEAN;
213     } else {
214         cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
215     }
216 
217     cfg |= JS_CONFIG_THREAD_PRI(BASE_MEM_PROT_GPU_WR);
218 
219     if ((katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED) || (katom->core_req & BASE_JD_REQ_END_RENDERPASS)) {
220         cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
221     }
222 
223     if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
224         if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
225             cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
226             katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
227             kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = true;
228         } else {
229             katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
230             kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = false;
231         }
232     }
233 
234     kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg);
235 
236     if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
237         kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), katom->flush_id);
238     }
239 
240     /* Write an approximate start timestamp.
241      * It's approximate because there might be a job in the HEAD register.
242      */
243     katom->start_timestamp = ktime_get();
244 
245     /* GO ! */
246     dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx", katom, kctx, js, jc_head);
247 
248     KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, (u32)affinity);
249 
250     KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx, js, kbase_jd_atom_id(kctx, katom), TL_JS_EVENT_START);
251 
252     KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, katom, jc_head, affinity, cfg);
253     KBASE_TLSTREAM_TL_RET_CTX_LPU(kbdev, kctx, &kbdev->gpu_props.props.raw_props.js_features[katom->slot_nr]);
254     KBASE_TLSTREAM_TL_RET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]);
255     KBASE_TLSTREAM_TL_RET_ATOM_LPU(kbdev, katom, &kbdev->gpu_props.props.raw_props.js_features[js], "ctx_nr,atom_nr");
256     kbase_kinstr_jm_atom_hw_submit(katom);
257 #ifdef CONFIG_GPU_TRACEPOINTS
258     if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
259         /* If this is the only job on the slot, trace it as starting */
260         char js_string[16];
261 
262         trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string, sizeof(js_string)),
263                                ktime_to_ns(katom->start_timestamp), (u32)katom->kctx->id, 0, katom->work_id);
264         kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
265     }
266 #endif
267 
268     trace_sysgraph_gpu(SGR_SUBMIT, kctx->id, kbase_jd_atom_id(kctx, katom), js);
269 
270     kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), JS_COMMAND_START);
271 }
272 
273 /**
274  * kbasep_job_slot_update_head_start_timestamp - Update timestamp
275  * @kbdev: kbase device
276  * @js: job slot
277  * @end_timestamp: timestamp
278  *
279  * Update the start_timestamp of the job currently in the HEAD, based on the
280  * fact that we got an IRQ for the previous set of completed jobs.
281  *
282  * The estimate also takes into account the time the job was submitted, to
283  * work out the best estimate (which might still result in an over-estimate to
284  * the calculated time spent)
285  */
kbasep_job_slot_update_head_start_timestamp(struct kbase_device * kbdev,int js,ktime_t end_timestamp)286 static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbdev, int js, ktime_t end_timestamp)
287 {
288     ktime_t timestamp_diff;
289     struct kbase_jd_atom *katom;
290 
291     /* Checking the HEAD position for the job slot */
292     katom = kbase_gpu_inspect(kbdev, js, 0);
293     if (katom != NULL) {
294         timestamp_diff = ktime_sub(end_timestamp, katom->start_timestamp);
295         if (ktime_to_ns(timestamp_diff) >= 0) {
296             /* Only update the timestamp if it's a better estimate
297              * than what's currently stored. This is because our
298              * estimate that accounts for the throttle time may be
299              * too much of an overestimate */
300             katom->start_timestamp = end_timestamp;
301         }
302     }
303 }
304 
305 /**
306  * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline
307  * tracepoint
308  * @kbdev: kbase device
309  * @js: job slot
310  *
311  * Make a tracepoint call to the instrumentation module informing that
312  * softstop happened on given lpu (job slot).
313  */
kbasep_trace_tl_event_lpu_softstop(struct kbase_device * kbdev,int js)314 static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, int js)
315 {
316     KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(kbdev, &kbdev->gpu_props.props.raw_props.js_features[js]);
317 }
318 
kbase_job_done(struct kbase_device * kbdev,u32 done)319 void kbase_job_done(struct kbase_device *kbdev, u32 done)
320 {
321     int i;
322     u32 count = 0;
323     ktime_t end_timestamp;
324 
325     lockdep_assert_held(&kbdev->hwaccess_lock);
326 
327     KBASE_DEBUG_ASSERT(kbdev);
328 
329     KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ, NULL, NULL, 0, done);
330 
331     end_timestamp = ktime_get();
332 
333     while (done) {
334         u32 failed = done >> 16;
335 
336         /* treat failed slots as finished slots */
337         u32 finished = (done & 0xFFFF) | failed;
338 
339         /* Note: This is inherently unfair, as we always check
340          * for lower numbered interrupts before the higher
341          * numbered ones. */
342         i = ffs(finished) - 1;
343         KBASE_DEBUG_ASSERT(i >= 0);
344 
345         do {
346             int nr_done;
347             u32 active;
348             u32 completion_code = BASE_JD_EVENT_DONE; /* assume OK */
349             u64 job_tail = 0;
350 
351             if (failed & (1u << i)) {
352                 /* read out the job slot status code if the job
353                  * slot reported failure */
354                 completion_code = kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS));
355                 if (completion_code == BASE_JD_EVENT_STOPPED) {
356                     KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, NULL, i, 0, TL_JS_EVENT_SOFT_STOP);
357 
358                     kbasep_trace_tl_event_lpu_softstop(kbdev, i);
359 
360                     /* Soft-stopped job - read the value of
361                      * JS<n>_TAIL so that the job chain can
362                      * be resumed */
363                     job_tail = (u64)kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_TAIL_LO)) |
364                                ((u64)kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_TAIL_HI)) << BASE_MEM_FLAGS_NR_HI_BITS);
365                 } else if (completion_code == BASE_JD_EVENT_NOT_STARTED) {
366                     /* PRLAM-10673 can cause a TERMINATED
367                      * job to come back as NOT_STARTED, but
368                      * the error interrupt helps us detect
369                      * it */
370                     completion_code = BASE_JD_EVENT_TERMINATED;
371                 }
372 
373                 kbase_gpu_irq_evict(kbdev, i, completion_code);
374 
375                 /* Some jobs that encounter a BUS FAULT may result in corrupted
376                  * state causing future jobs to hang. Reset GPU before
377                  * allowing any other jobs on the slot to continue. */
378                 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) {
379                     if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) {
380                         if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
381                             kbase_reset_gpu_locked(kbdev);
382                         }
383                     }
384                 }
385             }
386 
387             kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
388                             done & ((1 << i) | (1 << (i + KBASE_BASE_MEM_PROTECTED))));
389             active = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE));
390             if (((active >> i) & 1) == 0 &&
391                 (((done >> (i + KBASE_BASE_MEM_PROTECTED)) & KBASE_RESET_GPU_PREPARED) == 0)) {
392                 /* There is a potential race we must work
393                  * around:
394                  *
395                  *  1. A job slot has a job in both current and
396                  *     next registers
397                  *  2. The job in current completes
398                  *     successfully, the IRQ handler reads
399                  *     RAWSTAT and calls this function with the
400                  *     relevant bit set in "done"
401                  *  3. The job in the next registers becomes the
402                  *     current job on the GPU
403                  *  4. Sometime before the JOB_IRQ_CLEAR line
404                  *     above the job on the GPU _fails_
405                  *  5. The IRQ_CLEAR clears the done bit but not
406                  *     the failed bit. This atomically sets
407                  *     JOB_IRQ_JS_STATE. However since both jobs
408                  *     have now completed the relevant bits for
409                  *     the slot are set to 0.
410                  *
411                  * If we now did nothing then we'd incorrectly
412                  * assume that _both_ jobs had completed
413                  * successfully (since we haven't yet observed
414                  * the fail bit being set in RAWSTAT).
415                  *
416                  * So at this point if there are no active jobs
417                  * left we check to see if RAWSTAT has a failure
418                  * bit set for the job slot. If it does we know
419                  * that there has been a new failure that we
420                  * didn't previously know about, so we make sure
421                  * that we record this in active (but we wait
422                  * for the next loop to deal with it).
423                  *
424                  * If we were handling a job failure (i.e. done
425                  * has the relevant high bit set) then we know
426                  * that the value read back from
427                  * JOB_IRQ_JS_STATE is the correct number of
428                  * remaining jobs because the failed job will
429                  * have prevented any futher jobs from starting
430                  * execution.
431                  */
432                 u32 rawstat = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT));
433                 if ((rawstat >> (i + KBASE_BASE_MEM_PROTECTED)) & KBASE_RESET_GPU_PREPARED) {
434                     /* There is a failed job that we've
435                      * missed - add it back to active */
436                     active |= (1u << i);
437                 }
438             }
439 
440             dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", completion_code);
441 
442             nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
443             nr_done -= (active >> i) & 1;
444             nr_done -= (active >> (i + KBASE_BASE_MEM_PROTECTED)) & KBASE_RESET_GPU_PREPARED;
445 
446             if (nr_done <= 0) {
447                 dev_warn(kbdev->dev, "Spurious interrupt on slot %d", i);
448 
449                 goto spurious;
450             }
451 
452             count += nr_done;
453 
454             while (nr_done) {
455                 if (nr_done == 1) {
456                     kbase_gpu_complete_hw(kbdev, i, completion_code, job_tail, &end_timestamp);
457                     kbase_jm_try_kick_all(kbdev);
458                 } else {
459                     /* More than one job has completed.
460                      * Since this is not the last job being
461                      * reported this time it must have
462                      * passed. This is because the hardware
463                      * will not allow further jobs in a job
464                      * slot to complete until the failed job
465                      * is cleared from the IRQ status.
466                      */
467                     kbase_gpu_complete_hw(kbdev, i, BASE_JD_EVENT_DONE, 0, &end_timestamp);
468                 }
469                 nr_done--;
470             }
471         spurious:
472             done = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT));
473 
474             failed = done >> KBASE_BASE_MEM_PROTECTED;
475             finished = (done & 0xFFFF) | failed;
476             if (done) {
477                 end_timestamp = ktime_get();
478             }
479         } while (finished & (1 << i));
480 
481         kbasep_job_slot_update_head_start_timestamp(kbdev, i, end_timestamp);
482     }
483 
484     if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_COMMITTED) {
485         /* If we're trying to reset the GPU then we might be able to do
486          * it early (without waiting for a timeout) because some jobs
487          * have completed
488          */
489         kbasep_try_reset_gpu_early_locked(kbdev);
490     }
491     KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
492 }
493 
kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device * kbdev,int js,u32 action,base_jd_core_req core_reqs,struct kbase_jd_atom * target_katom)494 void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, int js, u32 action,
495                                                  base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
496 {
497 #if KBASE_KTRACE_ENABLE
498     u32 status_reg_before;
499     u64 job_in_head_before;
500     u32 status_reg_after;
501 
502     KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
503 
504     /* Check the head pointer */
505     job_in_head_before = ((u64)kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_LO))) |
506                          (((u64)kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_HI))) << BASE_MEM_FLAGS_NR_HI_BITS);
507     status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS));
508 #endif
509 
510     if (action == JS_COMMAND_SOFT_STOP) {
511         if (kbase_jd_katom_is_protected(target_katom)) {
512 #ifdef CONFIG_MALI_BIFROST_DEBUG
513             dev_dbg(kbdev->dev, "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%x",
514                     (unsigned int)core_reqs);
515 #endif /* CONFIG_MALI_BIFROST_DEBUG */
516             return;
517         }
518 
519         /* We are about to issue a soft stop, so mark the atom as having
520          * been soft stopped */
521         target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED;
522 
523         /* Mark the point where we issue the soft-stop command */
524         KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom);
525 
526         if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
527             action = (target_katom->atom_flags & KBASE_KATOM_FLAGS_JOBCHAIN) ? JS_COMMAND_SOFT_STOP_1
528                                                                              : JS_COMMAND_SOFT_STOP_0;
529         }
530     } else if (action == JS_COMMAND_HARD_STOP) {
531         target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
532 
533         if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
534             action = (target_katom->atom_flags & KBASE_KATOM_FLAGS_JOBCHAIN) ? JS_COMMAND_HARD_STOP_1
535                                                                              : JS_COMMAND_HARD_STOP_0;
536         }
537     }
538 
539     kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action);
540 
541 #if KBASE_KTRACE_ENABLE
542     status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS));
543     if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
544         struct kbase_jd_atom *head;
545         struct kbase_context *head_kctx;
546 
547         head = kbase_gpu_inspect(kbdev, js, 0);
548         head_kctx = head->kctx;
549 
550         if (status_reg_before == BASE_JD_EVENT_ACTIVE) {
551             KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, head, job_in_head_before, js);
552         } else {
553             KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js);
554         }
555 
556         switch (action) {
557             case JS_COMMAND_SOFT_STOP:
558                 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, head_kctx, head, head->jc, js);
559                 break;
560             case JS_COMMAND_SOFT_STOP_0:
561                 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, head, head->jc, js);
562                 break;
563             case JS_COMMAND_SOFT_STOP_1:
564                 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, head, head->jc, js);
565                 break;
566             case JS_COMMAND_HARD_STOP:
567                 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, head_kctx, head, head->jc, js);
568                 break;
569             case JS_COMMAND_HARD_STOP_0:
570                 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, head, head->jc, js);
571                 break;
572             case JS_COMMAND_HARD_STOP_1:
573                 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js);
574                 break;
575             default:
576                 BUG();
577                 break;
578         }
579     } else {
580         if (status_reg_before == BASE_JD_EVENT_ACTIVE) {
581             KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, job_in_head_before, js);
582         } else {
583             KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js);
584         }
585 
586         switch (action) {
587             case JS_COMMAND_SOFT_STOP:
588                 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, js);
589                 break;
590             case JS_COMMAND_SOFT_STOP_0:
591                 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, 0, js);
592                 break;
593             case JS_COMMAND_SOFT_STOP_1:
594                 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, 0, js);
595                 break;
596             case JS_COMMAND_HARD_STOP:
597                 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, js);
598                 break;
599             case JS_COMMAND_HARD_STOP_0:
600                 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, 0, js);
601                 break;
602             case JS_COMMAND_HARD_STOP_1:
603                 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js);
604                 break;
605             default:
606                 BUG();
607                 break;
608         }
609     }
610 #endif
611 }
612 
kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context * kctx)613 void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx)
614 {
615     struct kbase_device *kbdev = kctx->kbdev;
616     int i;
617 
618     lockdep_assert_held(&kbdev->hwaccess_lock);
619 
620     for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
621         kbase_job_slot_hardstop(kctx, i, NULL);
622     }
623 }
624 
625 /**
626  * kbase_is_existing_atom_submitted_later_than_ready
627  * @ready: sequence number of the ready atom
628  * @existing: sequence number of the existing atom
629  *
630  * Returns true if the existing atom has been submitted later than the
631  * ready atom. It is used to understand if an atom that is ready has been
632  * submitted earlier than the currently running atom, so that the currently
633  * running atom should be preempted to allow the ready atom to run.
634  */
kbase_is_existing_atom_submitted_later_than_ready(u64 ready,u64 existing)635 static inline bool kbase_is_existing_atom_submitted_later_than_ready(u64 ready, u64 existing)
636 {
637     /* No seq_nr set? */
638     if (!ready || !existing) {
639         return false;
640     }
641 
642     /* Efficiently handle the unlikely case of wrapping.
643      * The following code assumes that the delta between the sequence number
644      * of the two atoms is less than INT64_MAX.
645      * In the extremely unlikely case where the delta is higher, the comparison
646      * defaults for no preemption.
647      * The code also assumes that the conversion from unsigned to signed types
648      * works because the signed integers are 2's complement.
649      */
650     return (s64)(ready - existing) < 0;
651 }
652 
kbase_job_slot_ctx_priority_check_locked(struct kbase_context * kctx,struct kbase_jd_atom * target_katom)653 void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, struct kbase_jd_atom *target_katom)
654 {
655     struct kbase_device *kbdev;
656     int js = target_katom->slot_nr;
657     int priority = target_katom->sched_priority;
658     int seq_nr = target_katom->seq_nr;
659     int i;
660     bool stop_sent = false;
661 
662     KBASE_DEBUG_ASSERT(kctx != NULL);
663     kbdev = kctx->kbdev;
664     KBASE_DEBUG_ASSERT(kbdev != NULL);
665 
666     lockdep_assert_held(&kbdev->hwaccess_lock);
667 
668     for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
669         struct kbase_jd_atom *katom;
670 
671         katom = kbase_gpu_inspect(kbdev, js, i);
672         if (!katom) {
673             continue;
674         }
675 
676         if ((kbdev->js_ctx_scheduling_mode == KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) && (katom->kctx != kctx)) {
677             continue;
678         }
679 
680         if ((katom->sched_priority > priority) ||
681             (katom->kctx == kctx && kbase_is_existing_atom_submitted_later_than_ready(seq_nr, katom->seq_nr))) {
682             if (!stop_sent) {
683                 KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(kbdev, target_katom);
684             }
685 
686             kbase_job_slot_softstop(kbdev, js, katom);
687             stop_sent = true;
688         }
689     }
690 }
691 
softstop_start_rp_nolock(struct kbase_context * kctx,struct kbase_va_region * reg)692 static int softstop_start_rp_nolock(struct kbase_context *kctx, struct kbase_va_region *reg)
693 {
694     struct kbase_device *const kbdev = kctx->kbdev;
695     struct kbase_jd_atom *katom;
696     struct kbase_jd_renderpass *rp;
697 
698     lockdep_assert_held(&kbdev->hwaccess_lock);
699 
700     katom = kbase_gpu_inspect(kbdev, 1, 0);
701     if (!katom) {
702         dev_dbg(kctx->kbdev->dev, "No atom on job slot\n");
703         return -ESRCH;
704     }
705 
706     if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) {
707         dev_dbg(kctx->kbdev->dev, "Atom %p on job slot is not start RP\n", (void *)katom);
708         return -EPERM;
709     }
710 
711     compiletime_assert((1ull << (sizeof(katom->renderpass_id) * BASE_MEM_PROT_GPU_WR)) <=
712                            ARRAY_SIZE(kctx->jctx.renderpasses),
713                        "Should check invalid access to renderpasses");
714 
715     rp = &kctx->jctx.renderpasses[katom->renderpass_id];
716     if (WARN_ON(rp->state != KBASE_JD_RP_START && rp->state != KBASE_JD_RP_RETRY)) {
717         return -EINVAL;
718     }
719 
720     dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %p\n", (int)rp->state, (void *)reg);
721 
722     if (WARN_ON(katom != rp->start_katom)) {
723         return -EINVAL;
724     }
725 
726     dev_dbg(kctx->kbdev->dev, "Adding region %p to list %p\n", (void *)reg, (void *)&rp->oom_reg_list);
727     list_move_tail(&reg->link, &rp->oom_reg_list);
728     dev_dbg(kctx->kbdev->dev, "Added region to list\n");
729 
730     rp->state = (rp->state == KBASE_JD_RP_START ? KBASE_JD_RP_PEND_OOM : KBASE_JD_RP_RETRY_PEND_OOM);
731 
732     kbase_job_slot_softstop(kbdev, 1, katom);
733 
734     return 0;
735 }
736 
kbase_job_slot_softstop_start_rp(struct kbase_context * const kctx,struct kbase_va_region * const reg)737 int kbase_job_slot_softstop_start_rp(struct kbase_context *const kctx, struct kbase_va_region *const reg)
738 {
739     struct kbase_device *const kbdev = kctx->kbdev;
740     int err;
741     unsigned long flags;
742 
743     spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
744     err = softstop_start_rp_nolock(kctx, reg);
745     spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
746 
747     return err;
748 }
749 
kbase_jm_wait_for_zero_jobs(struct kbase_context * kctx)750 void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
751 {
752     struct kbase_device *kbdev = kctx->kbdev;
753     unsigned long timeout = msecs_to_jiffies(ZAP_TIMEOUT);
754 
755     timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0, timeout);
756     if (timeout != 0) {
757         timeout = wait_event_timeout(kctx->jctx.sched_info.ctx.is_scheduled_wait, !kbase_ctx_flag(kctx, KCTX_SCHEDULED),
758                                      timeout);
759     }
760 
761     /* Neither wait timed out; all done! */
762     if (timeout != 0) {
763         goto exit;
764     }
765 
766     if (kbase_prepare_to_reset_gpu(kbdev)) {
767         dev_err(kbdev->dev,
768                 "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context "
769                 "termination (e.g. process exit)\n",
770                 ZAP_TIMEOUT);
771         kbase_reset_gpu(kbdev);
772     }
773 
774     /* Wait for the reset to complete */
775     kbase_reset_gpu_wait(kbdev);
776 exit:
777     dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
778 
779     /* Ensure that the signallers of the waitqs have finished */
780     mutex_lock(&kctx->jctx.lock);
781     mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
782     mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
783     mutex_unlock(&kctx->jctx.lock);
784 }
785 
kbase_backend_get_current_flush_id(struct kbase_device * kbdev)786 u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
787 {
788     u32 flush_id = 0;
789 
790     if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
791         mutex_lock(&kbdev->pm.lock);
792         if (kbdev->pm.backend.gpu_powered) {
793             flush_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(LATEST_FLUSH));
794         }
795         mutex_unlock(&kbdev->pm.lock);
796     }
797 
798     return flush_id;
799 }
800 
kbase_job_slot_init(struct kbase_device * kbdev)801 int kbase_job_slot_init(struct kbase_device *kbdev)
802 {
803     CSTD_UNUSED(kbdev);
804     return 0;
805 }
806 KBASE_EXPORT_TEST_API(kbase_job_slot_init);
807 
kbase_job_slot_halt(struct kbase_device * kbdev)808 void kbase_job_slot_halt(struct kbase_device *kbdev)
809 {
810     CSTD_UNUSED(kbdev);
811 }
812 
kbase_job_slot_term(struct kbase_device * kbdev)813 void kbase_job_slot_term(struct kbase_device *kbdev)
814 {
815     CSTD_UNUSED(kbdev);
816 }
817 KBASE_EXPORT_TEST_API(kbase_job_slot_term);
818 
819 /**
820  * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
821  * @kbdev:         The kbase device
822  * @js:            The job slot to soft-stop
823  * @target_katom:  The job that should be soft-stopped (or NULL for any job)
824  * @sw_flags:      Flags to pass in about the soft-stop
825  *
826  * Context:
827  *   The job slot lock must be held when calling this function.
828  *   The job slot must not already be in the process of being soft-stopped.
829  *
830  * Soft-stop the specified job slot, with extra information about the stop
831  *
832  * Where possible any job in the next register is evicted before the soft-stop.
833  */
kbase_job_slot_softstop_swflags(struct kbase_device * kbdev,int js,struct kbase_jd_atom * target_katom,u32 sw_flags)834 void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, struct kbase_jd_atom *target_katom,
835                                      u32 sw_flags)
836 {
837     dev_dbg(kbdev->dev, "Soft-stop atom %p with flags 0x%x (s:%d)\n", target_katom, sw_flags, js);
838 
839     KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
840     kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, JS_COMMAND_SOFT_STOP | sw_flags);
841 }
842 
843 /**
844  * kbase_job_slot_softstop - Soft-stop the specified job slot
845  * @kbdev:         The kbase device
846  * @js:            The job slot to soft-stop
847  * @target_katom:  The job that should be soft-stopped (or NULL for any job)
848  * Context:
849  *   The job slot lock must be held when calling this function.
850  *   The job slot must not already be in the process of being soft-stopped.
851  *
852  * Where possible any job in the next register is evicted before the soft-stop.
853  */
kbase_job_slot_softstop(struct kbase_device * kbdev,int js,struct kbase_jd_atom * target_katom)854 void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, struct kbase_jd_atom *target_katom)
855 {
856     kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
857 }
858 
859 /**
860  * kbase_job_slot_hardstop - Hard-stop the specified job slot
861  * @kctx:         The kbase context that contains the job(s) that should
862  *                be hard-stopped
863  * @js:           The job slot to hard-stop
864  * @target_katom: The job that should be hard-stopped (or NULL for all
865  *                jobs from the context)
866  * Context:
867  *   The job slot lock must be held when calling this function.
868  */
kbase_job_slot_hardstop(struct kbase_context * kctx,int js,struct kbase_jd_atom * target_katom)869 void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, struct kbase_jd_atom *target_katom)
870 {
871     struct kbase_device *kbdev = kctx->kbdev;
872     bool stopped;
873 
874     stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, target_katom, JS_COMMAND_HARD_STOP);
875 }
876 
877 /**
878  * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
879  * @kbdev: kbase device
880  * @action: the event which has occurred
881  * @core_reqs: core requirements of the atom
882  * @target_katom: the atom which is being affected
883  *
884  * For a certain soft-stop action, work out whether to enter disjoint
885  * state.
886  *
887  * This does not register multiple disjoint events if the atom has already
888  * started a disjoint period
889  *
890  * @core_reqs can be supplied as 0 if the atom had not started on the hardware
891  * (and so a 'real' soft/hard-stop was not required, but it still interrupted
892  * flow, perhaps on another context)
893  *
894  * kbase_job_check_leave_disjoint() should be used to end the disjoint
895  * state when the soft/hard-stop action is complete
896  */
kbase_job_check_enter_disjoint(struct kbase_device * kbdev,u32 action,base_jd_core_req core_reqs,struct kbase_jd_atom * target_katom)897 void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, base_jd_core_req core_reqs,
898                                     struct kbase_jd_atom *target_katom)
899 {
900     u32 hw_action = action & JS_COMMAND_MASK;
901 
902     /* For soft-stop, don't enter if soft-stop not allowed, or isn't
903      * causing disjoint.
904      */
905     if (hw_action == JS_COMMAND_SOFT_STOP &&
906         (kbase_jd_katom_is_protected(target_katom) || ((action & JS_COMMAND_SW_CAUSES_DISJOINT) == 0))) {
907         return;
908     }
909 
910     /* Nothing to do if already logged disjoint state on this atom */
911     if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
912         return;
913     }
914 
915     target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
916     kbase_disjoint_state_up(kbdev);
917 }
918 
919 /**
920  * kbase_job_check_enter_disjoint - potentially leave disjoint state
921  * @kbdev: kbase device
922  * @target_katom: atom which is finishing
923  *
924  * Work out whether to leave disjoint state when finishing an atom that was
925  * originated by kbase_job_check_enter_disjoint().
926  */
kbase_job_check_leave_disjoint(struct kbase_device * kbdev,struct kbase_jd_atom * target_katom)927 void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, struct kbase_jd_atom *target_katom)
928 {
929     if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
930         target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
931         kbase_disjoint_state_down(kbdev);
932     }
933 }
934 
kbase_debug_dump_registers(struct kbase_device * kbdev)935 static void kbase_debug_dump_registers(struct kbase_device *kbdev)
936 {
937     int i;
938 
939     kbase_io_history_dump(kbdev);
940 
941     dev_err(kbdev->dev, "Register state:");
942     dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
943             kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)),
944             kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)));
945     dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x",
946             kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)),
947             kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE)));
948     dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
949             kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)));
950     dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
951             kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)),
952             kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)));
953     dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
954             kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)),
955             kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1)));
956     dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
957             kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)),
958             kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)));
959     dev_err(kbdev->dev, "  TILER_CONFIG=0x%08x    JM_CONFIG=0x%08x",
960             kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)), kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)));
961 }
962 
kbasep_reset_timeout_worker(struct work_struct * data)963 static void kbasep_reset_timeout_worker(struct work_struct *data)
964 {
965     unsigned long flags;
966     struct kbase_device *kbdev;
967     ktime_t end_timestamp = ktime_get();
968     struct kbasep_js_device_data *js_devdata;
969     bool silent = false;
970     u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
971 
972     KBASE_DEBUG_ASSERT(data);
973 
974     kbdev = container_of(data, struct kbase_device, hwaccess.backend.reset_work);
975 
976     KBASE_DEBUG_ASSERT(kbdev);
977     js_devdata = &kbdev->js_data;
978 
979     if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_SILENT) {
980         silent = true;
981     }
982 
983     KBASE_KTRACE_ADD_JM(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
984 
985     /* Disable GPU hardware counters.
986      * This call will block until counters are disabled.
987      */
988     kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
989 
990     /* Make sure the timer has completed - this cannot be done from
991      * interrupt context, so this cannot be done within
992      * kbasep_try_reset_gpu_early. */
993     hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
994 
995     if (kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
996         /* This would re-activate the GPU. Since it's already idle,
997          * there's no need to reset it */
998         atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING);
999         kbase_disjoint_state_down(kbdev);
1000         wake_up(&kbdev->hwaccess.backend.reset_wait);
1001         spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1002         kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
1003         spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1004         return;
1005     }
1006 
1007     KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
1008 
1009     spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1010     spin_lock(&kbdev->mmu_mask_change);
1011     kbase_pm_reset_start_locked(kbdev);
1012 
1013     /* We're about to flush out the IRQs and their bottom half's */
1014     kbdev->irq_reset_flush = true;
1015 
1016     /* Disable IRQ to avoid IRQ handlers to kick in after releasing the
1017      * spinlock; this also clears any outstanding interrupts */
1018     kbase_pm_disable_interrupts_nolock(kbdev);
1019 
1020     spin_unlock(&kbdev->mmu_mask_change);
1021     spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1022 
1023     /* Ensure that any IRQ handlers have finished
1024      * Must be done without any locks IRQ handlers will take */
1025     kbase_synchronize_irqs(kbdev);
1026 
1027     /* Flush out any in-flight work items */
1028     kbase_flush_mmu_wqs(kbdev);
1029 
1030     /* The flush has completed so reset the active indicator */
1031     kbdev->irq_reset_flush = false;
1032 
1033     if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
1034         /* Ensure that L2 is not transitioning when we send the reset
1035          * command */
1036         while (--max_loops && kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
1037             ;
1038         }
1039 
1040         WARN(!max_loops, "L2 power transition timed out while trying to reset\n");
1041     }
1042 
1043     mutex_lock(&kbdev->pm.lock);
1044     /* We hold the pm lock, so there ought to be a current policy */
1045     KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
1046 
1047     /* All slot have been soft-stopped and we've waited
1048      * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
1049      * assume that anything that is still left on the GPU is stuck there and
1050      * we'll kill it when we reset the GPU */
1051 
1052     if (!silent) {
1053         dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", RESET_TIMEOUT);
1054     }
1055 
1056     /* Output the state of some interesting registers to help in the
1057      * debugging of GPU resets */
1058     if (!silent) {
1059         kbase_debug_dump_registers(kbdev);
1060     }
1061 
1062     /* Complete any jobs that were still on the GPU */
1063     spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1064     kbdev->protected_mode = false;
1065     if (!kbdev->pm.backend.protected_entry_transition_override) {
1066         kbase_backend_reset(kbdev, &end_timestamp);
1067     }
1068     kbase_pm_metrics_update(kbdev, NULL);
1069     spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1070 
1071     /* Reset the GPU */
1072     kbase_pm_init_hw(kbdev, 0);
1073 
1074     mutex_unlock(&kbdev->pm.lock);
1075 
1076     mutex_lock(&js_devdata->runpool_mutex);
1077 
1078     mutex_lock(&kbdev->mmu_hw_mutex);
1079     spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1080     kbase_ctx_sched_restore_all_as(kbdev);
1081     spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1082     mutex_unlock(&kbdev->mmu_hw_mutex);
1083 
1084     kbase_pm_enable_interrupts(kbdev);
1085 
1086     kbase_disjoint_state_down(kbdev);
1087 
1088     mutex_unlock(&js_devdata->runpool_mutex);
1089 
1090     mutex_lock(&kbdev->pm.lock);
1091 
1092     kbase_pm_reset_complete(kbdev);
1093 
1094     /* Find out what cores are required now */
1095     kbase_pm_update_cores_state(kbdev);
1096 
1097     /* Synchronously request and wait for those cores, because if
1098      * instrumentation is enabled it would need them immediately. */
1099     kbase_pm_wait_for_desired_state(kbdev);
1100 
1101     mutex_unlock(&kbdev->pm.lock);
1102 
1103     atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING);
1104 
1105     wake_up(&kbdev->hwaccess.backend.reset_wait);
1106     if (!silent) {
1107         dev_err(kbdev->dev, "Reset complete");
1108     }
1109 
1110     /* Try submitting some jobs to restart processing */
1111     KBASE_KTRACE_ADD_JM(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0);
1112     kbase_js_sched_all(kbdev);
1113 
1114     /* Process any pending slot updates */
1115     spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1116     kbase_backend_slot_update(kbdev);
1117     spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1118 
1119     kbase_pm_context_idle(kbdev);
1120 
1121     /* Re-enable GPU hardware counters */
1122     spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1123     kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
1124     spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1125 
1126     KBASE_KTRACE_ADD_JM(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
1127 }
1128 
kbasep_reset_timer_callback(struct hrtimer * timer)1129 static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
1130 {
1131     struct kbase_device *kbdev = container_of(timer, struct kbase_device, hwaccess.backend.reset_timer);
1132 
1133     KBASE_DEBUG_ASSERT(kbdev);
1134 
1135     /* Reset still pending? */
1136     if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
1137         KBASE_RESET_GPU_COMMITTED) {
1138         queue_work(kbdev->hwaccess.backend.reset_workq, &kbdev->hwaccess.backend.reset_work);
1139     }
1140 
1141     return HRTIMER_NORESTART;
1142 }
1143 
1144 /*
1145  * If all jobs are evicted from the GPU then we can reset the GPU
1146  * immediately instead of waiting for the timeout to elapse
1147  */
1148 
kbasep_try_reset_gpu_early_locked(struct kbase_device * kbdev)1149 static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
1150 {
1151     int i;
1152     int pending_jobs = 0;
1153 
1154     KBASE_DEBUG_ASSERT(kbdev);
1155 
1156     /* Count the number of jobs */
1157     for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
1158         pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
1159     }
1160 
1161     if (pending_jobs > 0) {
1162         /* There are still jobs on the GPU - wait */
1163         return;
1164     }
1165 
1166     /* To prevent getting incorrect registers when dumping failed job,
1167      * skip early reset.
1168      */
1169     if (atomic_read(&kbdev->job_fault_debug) > 0) {
1170         return;
1171     }
1172 
1173     /* Check that the reset has been committed to (i.e. kbase_reset_gpu has
1174      * been called), and that no other thread beat this thread to starting
1175      * the reset */
1176     if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
1177         KBASE_RESET_GPU_COMMITTED) {
1178         /* Reset has already occurred */
1179         return;
1180     }
1181 
1182     queue_work(kbdev->hwaccess.backend.reset_workq, &kbdev->hwaccess.backend.reset_work);
1183 }
1184 
kbasep_try_reset_gpu_early(struct kbase_device * kbdev)1185 static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
1186 {
1187     unsigned long flags;
1188 
1189     spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1190     kbasep_try_reset_gpu_early_locked(kbdev);
1191     spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1192 }
1193 
1194 /**
1195  * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
1196  * @kbdev: kbase device
1197  *
1198  * This function just soft-stops all the slots to ensure that as many jobs as
1199  * possible are saved.
1200  *
1201  * Return:
1202  *   The function returns a boolean which should be interpreted as follows:
1203  *   true - Prepared for reset, kbase_reset_gpu_locked should be called.
1204  *   false - Another thread is performing a reset, kbase_reset_gpu should
1205  *   not be called.
1206  */
kbase_prepare_to_reset_gpu_locked(struct kbase_device * kbdev)1207 bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
1208 {
1209     int i;
1210 
1211     KBASE_DEBUG_ASSERT(kbdev);
1212 
1213 #ifdef CONFIG_MALI_ARBITER_SUPPORT
1214     if (kbase_pm_is_gpu_lost(kbdev)) {
1215         /* GPU access has been removed, reset will be done by
1216          * Arbiter instead
1217          */
1218         return false;
1219     }
1220 #endif
1221 
1222     if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING, KBASE_RESET_GPU_PREPARED) !=
1223         KBASE_RESET_GPU_NOT_PENDING) {
1224         /* Some other thread is already resetting the GPU */
1225         return false;
1226     }
1227 
1228     kbase_disjoint_state_up(kbdev);
1229 
1230     for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
1231         kbase_job_slot_softstop(kbdev, i, NULL);
1232     }
1233 
1234     return true;
1235 }
1236 
kbase_prepare_to_reset_gpu(struct kbase_device * kbdev)1237 bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
1238 {
1239     unsigned long flags;
1240     bool ret;
1241 
1242     spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1243     ret = kbase_prepare_to_reset_gpu_locked(kbdev);
1244     spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1245 
1246     return ret;
1247 }
1248 KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
1249 
1250 /*
1251  * This function should be called after kbase_prepare_to_reset_gpu if it
1252  * returns true. It should never be called without a corresponding call to
1253  * kbase_prepare_to_reset_gpu.
1254  *
1255  * After this function is called (or not called if kbase_prepare_to_reset_gpu
1256  * returned false), the caller should wait for
1257  * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
1258  * has completed.
1259  */
kbase_reset_gpu(struct kbase_device * kbdev)1260 void kbase_reset_gpu(struct kbase_device *kbdev)
1261 {
1262     KBASE_DEBUG_ASSERT(kbdev);
1263 
1264     /* Note this is an assert/atomic_set because it is a software issue for
1265      * a race to be occuring here */
1266     KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_PREPARED);
1267     atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED);
1268 
1269     dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
1270             kbdev->reset_timeout_ms);
1271 
1272     hrtimer_start(&kbdev->hwaccess.backend.reset_timer, HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), HRTIMER_MODE_REL);
1273 
1274     /* Try resetting early */
1275     kbasep_try_reset_gpu_early(kbdev);
1276 }
1277 KBASE_EXPORT_TEST_API(kbase_reset_gpu);
1278 
kbase_reset_gpu_locked(struct kbase_device * kbdev)1279 void kbase_reset_gpu_locked(struct kbase_device *kbdev)
1280 {
1281     KBASE_DEBUG_ASSERT(kbdev);
1282 
1283     /* Note this is an assert/atomic_set because it is a software issue for
1284      * a race to be occuring here */
1285     KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_PREPARED);
1286     atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED);
1287 
1288     dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
1289             kbdev->reset_timeout_ms);
1290     hrtimer_start(&kbdev->hwaccess.backend.reset_timer, HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), HRTIMER_MODE_REL);
1291 
1292     /* Try resetting early */
1293     kbasep_try_reset_gpu_early_locked(kbdev);
1294 }
1295 
kbase_reset_gpu_silent(struct kbase_device * kbdev)1296 int kbase_reset_gpu_silent(struct kbase_device *kbdev)
1297 {
1298     if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING, KBASE_RESET_GPU_SILENT) !=
1299         KBASE_RESET_GPU_NOT_PENDING) {
1300         /* Some other thread is already resetting the GPU */
1301         return -EAGAIN;
1302     }
1303 
1304     kbase_disjoint_state_up(kbdev);
1305 
1306     queue_work(kbdev->hwaccess.backend.reset_workq, &kbdev->hwaccess.backend.reset_work);
1307 
1308     return 0;
1309 }
1310 
kbase_reset_gpu_is_active(struct kbase_device * kbdev)1311 bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
1312 {
1313     if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING) {
1314         return false;
1315     }
1316 
1317     return true;
1318 }
1319 
kbase_reset_gpu_wait(struct kbase_device * kbdev)1320 int kbase_reset_gpu_wait(struct kbase_device *kbdev)
1321 {
1322     wait_event(kbdev->hwaccess.backend.reset_wait,
1323                atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING);
1324 
1325     return 0;
1326 }
1327 KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait);
1328 
kbase_reset_gpu_init(struct kbase_device * kbdev)1329 int kbase_reset_gpu_init(struct kbase_device *kbdev)
1330 {
1331     kbdev->hwaccess.backend.reset_workq = alloc_workqueue("Mali reset workqueue", 0, 1);
1332     if (kbdev->hwaccess.backend.reset_workq == NULL) {
1333         return -ENOMEM;
1334     }
1335 
1336     INIT_WORK(&kbdev->hwaccess.backend.reset_work, kbasep_reset_timeout_worker);
1337 
1338     hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1339     kbdev->hwaccess.backend.reset_timer.function = kbasep_reset_timer_callback;
1340 
1341     return 0;
1342 }
1343 
kbase_reset_gpu_term(struct kbase_device * kbdev)1344 void kbase_reset_gpu_term(struct kbase_device *kbdev)
1345 {
1346     destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
1347 }
1348