1 /*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2018 Intel Corporation
5 */
6
7 #include <linux/sort.h>
8
9 #include "i915_drv.h"
10
11 #include "intel_gt_requests.h"
12 #include "i915_selftest.h"
13 #include "selftest_engine_heartbeat.h"
14
timeline_sync(struct intel_timeline * tl)15 static int timeline_sync(struct intel_timeline *tl)
16 {
17 struct dma_fence *fence;
18 long timeout;
19
20 fence = i915_active_fence_get(&tl->last_request);
21 if (!fence)
22 return 0;
23
24 timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
25 dma_fence_put(fence);
26 if (timeout < 0)
27 return timeout;
28
29 return 0;
30 }
31
engine_sync_barrier(struct intel_engine_cs * engine)32 static int engine_sync_barrier(struct intel_engine_cs *engine)
33 {
34 return timeline_sync(engine->kernel_context->timeline);
35 }
36
37 struct pulse {
38 struct i915_active active;
39 struct kref kref;
40 };
41
pulse_active(struct i915_active * active)42 static int pulse_active(struct i915_active *active)
43 {
44 kref_get(&container_of(active, struct pulse, active)->kref);
45 return 0;
46 }
47
pulse_free(struct kref * kref)48 static void pulse_free(struct kref *kref)
49 {
50 struct pulse *p = container_of(kref, typeof(*p), kref);
51
52 i915_active_fini(&p->active);
53 kfree(p);
54 }
55
pulse_put(struct pulse * p)56 static void pulse_put(struct pulse *p)
57 {
58 kref_put(&p->kref, pulse_free);
59 }
60
pulse_retire(struct i915_active * active)61 static void pulse_retire(struct i915_active *active)
62 {
63 pulse_put(container_of(active, struct pulse, active));
64 }
65
pulse_create(void)66 static struct pulse *pulse_create(void)
67 {
68 struct pulse *p;
69
70 p = kmalloc(sizeof(*p), GFP_KERNEL);
71 if (!p)
72 return p;
73
74 kref_init(&p->kref);
75 i915_active_init(&p->active, pulse_active, pulse_retire);
76
77 return p;
78 }
79
pulse_unlock_wait(struct pulse * p)80 static void pulse_unlock_wait(struct pulse *p)
81 {
82 i915_active_unlock_wait(&p->active);
83 }
84
__live_idle_pulse(struct intel_engine_cs * engine,int (* fn)(struct intel_engine_cs * cs))85 static int __live_idle_pulse(struct intel_engine_cs *engine,
86 int (*fn)(struct intel_engine_cs *cs))
87 {
88 struct pulse *p;
89 int err;
90
91 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
92
93 p = pulse_create();
94 if (!p)
95 return -ENOMEM;
96
97 err = i915_active_acquire(&p->active);
98 if (err)
99 goto out;
100
101 err = i915_active_acquire_preallocate_barrier(&p->active, engine);
102 if (err) {
103 i915_active_release(&p->active);
104 goto out;
105 }
106
107 i915_active_acquire_barrier(&p->active);
108 i915_active_release(&p->active);
109
110 GEM_BUG_ON(i915_active_is_idle(&p->active));
111 GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
112
113 err = fn(engine);
114 if (err)
115 goto out;
116
117 GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
118
119 if (engine_sync_barrier(engine)) {
120 struct drm_printer m = drm_err_printer("pulse");
121
122 pr_err("%s: no heartbeat pulse?\n", engine->name);
123 intel_engine_dump(engine, &m, "%s", engine->name);
124
125 err = -ETIME;
126 goto out;
127 }
128
129 GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
130
131 pulse_unlock_wait(p); /* synchronize with the retirement callback */
132
133 if (!i915_active_is_idle(&p->active)) {
134 struct drm_printer m = drm_err_printer("pulse");
135
136 pr_err("%s: heartbeat pulse did not flush idle tasks\n",
137 engine->name);
138 i915_active_print(&p->active, &m);
139
140 err = -EINVAL;
141 goto out;
142 }
143
144 out:
145 pulse_put(p);
146 return err;
147 }
148
live_idle_flush(void * arg)149 static int live_idle_flush(void *arg)
150 {
151 struct intel_gt *gt = arg;
152 struct intel_engine_cs *engine;
153 enum intel_engine_id id;
154 int err = 0;
155
156 /* Check that we can flush the idle barriers */
157
158 for_each_engine(engine, gt, id) {
159 st_engine_heartbeat_disable(engine);
160 err = __live_idle_pulse(engine, intel_engine_flush_barriers);
161 st_engine_heartbeat_enable(engine);
162 if (err)
163 break;
164 }
165
166 return err;
167 }
168
live_idle_pulse(void * arg)169 static int live_idle_pulse(void *arg)
170 {
171 struct intel_gt *gt = arg;
172 struct intel_engine_cs *engine;
173 enum intel_engine_id id;
174 int err = 0;
175
176 /* Check that heartbeat pulses flush the idle barriers */
177
178 for_each_engine(engine, gt, id) {
179 st_engine_heartbeat_disable(engine);
180 err = __live_idle_pulse(engine, intel_engine_pulse);
181 st_engine_heartbeat_enable(engine);
182 if (err && err != -ENODEV)
183 break;
184
185 err = 0;
186 }
187
188 return err;
189 }
190
cmp_u32(const void * _a,const void * _b)191 static int cmp_u32(const void *_a, const void *_b)
192 {
193 const u32 *a = _a, *b = _b;
194
195 return *a - *b;
196 }
197
__live_heartbeat_fast(struct intel_engine_cs * engine)198 static int __live_heartbeat_fast(struct intel_engine_cs *engine)
199 {
200 struct intel_context *ce;
201 struct i915_request *rq;
202 ktime_t t0, t1;
203 u32 times[5];
204 int err;
205 int i;
206
207 ce = intel_context_create(engine);
208 if (IS_ERR(ce))
209 return PTR_ERR(ce);
210
211 intel_engine_pm_get(engine);
212
213 err = intel_engine_set_heartbeat(engine, 1);
214 if (err)
215 goto err_pm;
216
217 for (i = 0; i < ARRAY_SIZE(times); i++) {
218 /* Manufacture a tick */
219 do {
220 while (READ_ONCE(engine->heartbeat.systole))
221 flush_delayed_work(&engine->heartbeat.work);
222
223 engine->serial++; /* quick, pretend we are not idle! */
224 flush_delayed_work(&engine->heartbeat.work);
225 if (!delayed_work_pending(&engine->heartbeat.work)) {
226 pr_err("%s: heartbeat did not start\n",
227 engine->name);
228 err = -EINVAL;
229 goto err_pm;
230 }
231
232 rcu_read_lock();
233 rq = READ_ONCE(engine->heartbeat.systole);
234 if (rq)
235 rq = i915_request_get_rcu(rq);
236 rcu_read_unlock();
237 } while (!rq);
238
239 t0 = ktime_get();
240 while (rq == READ_ONCE(engine->heartbeat.systole))
241 yield(); /* work is on the local cpu! */
242 t1 = ktime_get();
243
244 i915_request_put(rq);
245 times[i] = ktime_us_delta(t1, t0);
246 }
247
248 sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
249
250 pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
251 engine->name,
252 times[ARRAY_SIZE(times) / 2],
253 times[0],
254 times[ARRAY_SIZE(times) - 1]);
255
256 /* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */
257 if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) {
258 pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
259 engine->name,
260 times[ARRAY_SIZE(times) / 2],
261 jiffies_to_usecs(6));
262 err = -EINVAL;
263 }
264
265 intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
266 err_pm:
267 intel_engine_pm_put(engine);
268 intel_context_put(ce);
269 return err;
270 }
271
live_heartbeat_fast(void * arg)272 static int live_heartbeat_fast(void *arg)
273 {
274 struct intel_gt *gt = arg;
275 struct intel_engine_cs *engine;
276 enum intel_engine_id id;
277 int err = 0;
278
279 /* Check that the heartbeat ticks at the desired rate. */
280 if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
281 return 0;
282
283 for_each_engine(engine, gt, id) {
284 err = __live_heartbeat_fast(engine);
285 if (err)
286 break;
287 }
288
289 return err;
290 }
291
__live_heartbeat_off(struct intel_engine_cs * engine)292 static int __live_heartbeat_off(struct intel_engine_cs *engine)
293 {
294 int err;
295
296 intel_engine_pm_get(engine);
297
298 engine->serial++;
299 flush_delayed_work(&engine->heartbeat.work);
300 if (!delayed_work_pending(&engine->heartbeat.work)) {
301 pr_err("%s: heartbeat not running\n",
302 engine->name);
303 err = -EINVAL;
304 goto err_pm;
305 }
306
307 err = intel_engine_set_heartbeat(engine, 0);
308 if (err)
309 goto err_pm;
310
311 engine->serial++;
312 flush_delayed_work(&engine->heartbeat.work);
313 if (delayed_work_pending(&engine->heartbeat.work)) {
314 pr_err("%s: heartbeat still running\n",
315 engine->name);
316 err = -EINVAL;
317 goto err_beat;
318 }
319
320 if (READ_ONCE(engine->heartbeat.systole)) {
321 pr_err("%s: heartbeat still allocated\n",
322 engine->name);
323 err = -EINVAL;
324 goto err_beat;
325 }
326
327 err_beat:
328 intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
329 err_pm:
330 intel_engine_pm_put(engine);
331 return err;
332 }
333
live_heartbeat_off(void * arg)334 static int live_heartbeat_off(void *arg)
335 {
336 struct intel_gt *gt = arg;
337 struct intel_engine_cs *engine;
338 enum intel_engine_id id;
339 int err = 0;
340
341 /* Check that we can turn off heartbeat and not interrupt VIP */
342 if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
343 return 0;
344
345 for_each_engine(engine, gt, id) {
346 if (!intel_engine_has_preemption(engine))
347 continue;
348
349 err = __live_heartbeat_off(engine);
350 if (err)
351 break;
352 }
353
354 return err;
355 }
356
intel_heartbeat_live_selftests(struct drm_i915_private * i915)357 int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
358 {
359 static const struct i915_subtest tests[] = {
360 SUBTEST(live_idle_flush),
361 SUBTEST(live_idle_pulse),
362 SUBTEST(live_heartbeat_fast),
363 SUBTEST(live_heartbeat_off),
364 };
365 int saved_hangcheck;
366 int err;
367
368 if (intel_gt_is_wedged(&i915->gt))
369 return 0;
370
371 saved_hangcheck = i915->params.enable_hangcheck;
372 i915->params.enable_hangcheck = INT_MAX;
373
374 err = intel_gt_live_subtests(tests, &i915->gt);
375
376 i915->params.enable_hangcheck = saved_hangcheck;
377 return err;
378 }
379
st_engine_heartbeat_disable(struct intel_engine_cs * engine)380 void st_engine_heartbeat_disable(struct intel_engine_cs *engine)
381 {
382 engine->props.heartbeat_interval_ms = 0;
383
384 intel_engine_pm_get(engine);
385 intel_engine_park_heartbeat(engine);
386 }
387
st_engine_heartbeat_enable(struct intel_engine_cs * engine)388 void st_engine_heartbeat_enable(struct intel_engine_cs *engine)
389 {
390 intel_engine_pm_put(engine);
391
392 engine->props.heartbeat_interval_ms =
393 engine->defaults.heartbeat_interval_ms;
394 }
395