1 /*
2 * Copyright © 2018-2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <sched.h>
25
26 #include "igt.h"
27 #include "igt_perf.h"
28 #include "i915/gem_ring.h"
29 #include "sw_sync.h"
30
31 IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
32
33 #define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
34
sizeof_load_balance(int count)35 static size_t sizeof_load_balance(int count)
36 {
37 return offsetof(struct i915_context_engines_load_balance,
38 engines[count]);
39 }
40
sizeof_param_engines(int count)41 static size_t sizeof_param_engines(int count)
42 {
43 return offsetof(struct i915_context_param_engines,
44 engines[count]);
45 }
46
sizeof_engines_bond(int count)47 static size_t sizeof_engines_bond(int count)
48 {
49 return offsetof(struct i915_context_engines_bond,
50 engines[count]);
51 }
52
53 #define alloca0(sz) ({ size_t sz__ = (sz); memset(alloca(sz__), 0, sz__); })
54
has_class_instance(int i915,uint16_t class,uint16_t instance)55 static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
56 {
57 int fd;
58
59 fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));
60 if (fd != -1) {
61 close(fd);
62 return true;
63 }
64
65 return false;
66 }
67
68 static struct i915_engine_class_instance *
list_engines(int i915,uint32_t class_mask,unsigned int * out)69 list_engines(int i915, uint32_t class_mask, unsigned int *out)
70 {
71 unsigned int count = 0, size = 64;
72 struct i915_engine_class_instance *engines;
73
74 engines = malloc(size * sizeof(*engines));
75 igt_assert(engines);
76
77 for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
78 class_mask;
79 class++, class_mask >>= 1) {
80 if (!(class_mask & 1))
81 continue;
82
83 for (unsigned int instance = 0;
84 instance < INSTANCE_COUNT;
85 instance++) {
86 if (!has_class_instance(i915, class, instance))
87 continue;
88
89 if (count == size) {
90 size *= 2;
91 engines = realloc(engines,
92 size * sizeof(*engines));
93 igt_assert(engines);
94 }
95
96 engines[count++] = (struct i915_engine_class_instance){
97 .engine_class = class,
98 .engine_instance = instance,
99 };
100 }
101 }
102
103 if (!count) {
104 free(engines);
105 engines = NULL;
106 }
107
108 *out = count;
109 return engines;
110 }
111
__set_engines(int i915,uint32_t ctx,const struct i915_engine_class_instance * ci,unsigned int count)112 static int __set_engines(int i915, uint32_t ctx,
113 const struct i915_engine_class_instance *ci,
114 unsigned int count)
115 {
116 struct i915_context_param_engines *engines =
117 alloca0(sizeof_param_engines(count));
118 struct drm_i915_gem_context_param p = {
119 .ctx_id = ctx,
120 .param = I915_CONTEXT_PARAM_ENGINES,
121 .size = sizeof_param_engines(count),
122 .value = to_user_pointer(engines)
123 };
124
125 engines->extensions = 0;
126 memcpy(engines->engines, ci, count * sizeof(*ci));
127
128 return __gem_context_set_param(i915, &p);
129 }
130
set_engines(int i915,uint32_t ctx,const struct i915_engine_class_instance * ci,unsigned int count)131 static void set_engines(int i915, uint32_t ctx,
132 const struct i915_engine_class_instance *ci,
133 unsigned int count)
134 {
135 igt_assert_eq(__set_engines(i915, ctx, ci, count), 0);
136 }
137
__set_load_balancer(int i915,uint32_t ctx,const struct i915_engine_class_instance * ci,unsigned int count,void * ext)138 static int __set_load_balancer(int i915, uint32_t ctx,
139 const struct i915_engine_class_instance *ci,
140 unsigned int count,
141 void *ext)
142 {
143 struct i915_context_engines_load_balance *balancer =
144 alloca0(sizeof_load_balance(count));
145 struct i915_context_param_engines *engines =
146 alloca0(sizeof_param_engines(count + 1));
147 struct drm_i915_gem_context_param p = {
148 .ctx_id = ctx,
149 .param = I915_CONTEXT_PARAM_ENGINES,
150 .size = sizeof_param_engines(count + 1),
151 .value = to_user_pointer(engines)
152 };
153
154 balancer->base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
155 balancer->base.next_extension = to_user_pointer(ext);
156
157 igt_assert(count);
158 balancer->num_siblings = count;
159 memcpy(balancer->engines, ci, count * sizeof(*ci));
160
161 engines->extensions = to_user_pointer(balancer);
162 engines->engines[0].engine_class =
163 I915_ENGINE_CLASS_INVALID;
164 engines->engines[0].engine_instance =
165 I915_ENGINE_CLASS_INVALID_NONE;
166 memcpy(engines->engines + 1, ci, count * sizeof(*ci));
167
168 return __gem_context_set_param(i915, &p);
169 }
170
set_load_balancer(int i915,uint32_t ctx,const struct i915_engine_class_instance * ci,unsigned int count,void * ext)171 static void set_load_balancer(int i915, uint32_t ctx,
172 const struct i915_engine_class_instance *ci,
173 unsigned int count,
174 void *ext)
175 {
176 igt_assert_eq(__set_load_balancer(i915, ctx, ci, count, ext), 0);
177 }
178
load_balancer_create(int i915,const struct i915_engine_class_instance * ci,unsigned int count)179 static uint32_t load_balancer_create(int i915,
180 const struct i915_engine_class_instance *ci,
181 unsigned int count)
182 {
183 uint32_t ctx;
184
185 ctx = gem_context_create(i915);
186 set_load_balancer(i915, ctx, ci, count, NULL);
187
188 return ctx;
189 }
190
__batch_create(int i915,uint32_t offset)191 static uint32_t __batch_create(int i915, uint32_t offset)
192 {
193 const uint32_t bbe = MI_BATCH_BUFFER_END;
194 uint32_t handle;
195
196 handle = gem_create(i915, ALIGN(offset + 4, 4096));
197 gem_write(i915, handle, offset, &bbe, sizeof(bbe));
198
199 return handle;
200 }
201
batch_create(int i915)202 static uint32_t batch_create(int i915)
203 {
204 return __batch_create(i915, 0);
205 }
206
invalid_balancer(int i915)207 static void invalid_balancer(int i915)
208 {
209 I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, 64);
210 I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
211 struct drm_i915_gem_context_param p = {
212 .param = I915_CONTEXT_PARAM_ENGINES,
213 .value = to_user_pointer(&engines)
214 };
215 uint32_t handle;
216 void *ptr;
217
218 /*
219 * Assume that I915_CONTEXT_PARAM_ENGINE validates the array
220 * of engines[], our job is to determine if the load_balancer
221 * extension explodes.
222 */
223
224 for (int class = 0; class < 32; class++) {
225 struct i915_engine_class_instance *ci;
226 unsigned int count;
227
228 ci = list_engines(i915, 1 << class, &count);
229 if (!ci)
230 continue;
231
232 igt_assert_lte(count, 64);
233
234 p.ctx_id = gem_context_create(i915);
235 p.size = (sizeof(struct i915_context_param_engines) +
236 (count + 1) * sizeof(*engines.engines));
237
238 memset(&engines, 0, sizeof(engines));
239 engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
240 engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
241 memcpy(engines.engines + 1, ci, count * sizeof(*ci));
242 gem_context_set_param(i915, &p);
243
244 engines.extensions = -1ull;
245 igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
246
247 engines.extensions = 1ull;
248 igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
249
250 memset(&balancer, 0, sizeof(balancer));
251 balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
252 balancer.num_siblings = count;
253 memcpy(balancer.engines, ci, count * sizeof(*ci));
254
255 engines.extensions = to_user_pointer(&balancer);
256 gem_context_set_param(i915, &p);
257
258 balancer.engine_index = 1;
259 igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
260
261 balancer.engine_index = count;
262 igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
263
264 balancer.engine_index = count + 1;
265 igt_assert_eq(__gem_context_set_param(i915, &p), -EINVAL);
266
267 balancer.engine_index = 0;
268 gem_context_set_param(i915, &p);
269
270 balancer.base.next_extension = to_user_pointer(&balancer);
271 igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
272
273 balancer.base.next_extension = -1ull;
274 igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
275
276 handle = gem_create(i915, 4096 * 3);
277 ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
278 gem_close(i915, handle);
279
280 memset(&engines, 0, sizeof(engines));
281 engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
282 engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
283 engines.engines[1].engine_class = I915_ENGINE_CLASS_INVALID;
284 engines.engines[1].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
285 memcpy(engines.engines + 2, ci, count * sizeof(ci));
286 p.size = (sizeof(struct i915_context_param_engines) +
287 (count + 2) * sizeof(*engines.engines));
288 gem_context_set_param(i915, &p);
289
290 balancer.base.next_extension = 0;
291 balancer.engine_index = 1;
292 engines.extensions = to_user_pointer(&balancer);
293 gem_context_set_param(i915, &p);
294
295 memcpy(ptr + 4096 - 8, &balancer, sizeof(balancer));
296 memcpy(ptr + 8192 - 8, &balancer, sizeof(balancer));
297 balancer.engine_index = 0;
298
299 engines.extensions = to_user_pointer(ptr) + 4096 - 8;
300 gem_context_set_param(i915, &p);
301
302 balancer.base.next_extension = engines.extensions;
303 engines.extensions = to_user_pointer(&balancer);
304 gem_context_set_param(i915, &p);
305
306 munmap(ptr, 4096);
307 igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
308 engines.extensions = to_user_pointer(ptr) + 4096 - 8;
309 igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
310
311 engines.extensions = to_user_pointer(ptr) + 8192 - 8;
312 gem_context_set_param(i915, &p);
313
314 balancer.base.next_extension = engines.extensions;
315 engines.extensions = to_user_pointer(&balancer);
316 gem_context_set_param(i915, &p);
317
318 munmap(ptr + 8192, 4096);
319 igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
320 engines.extensions = to_user_pointer(ptr) + 8192 - 8;
321 igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
322
323 munmap(ptr + 4096, 4096);
324
325 gem_context_destroy(i915, p.ctx_id);
326 free(ci);
327 }
328 }
329
invalid_bonds(int i915)330 static void invalid_bonds(int i915)
331 {
332 I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
333 I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1);
334 struct drm_i915_gem_context_param p = {
335 .ctx_id = gem_context_create(i915),
336 .param = I915_CONTEXT_PARAM_ENGINES,
337 .value = to_user_pointer(&engines),
338 .size = sizeof(engines),
339 };
340 uint32_t handle;
341 void *ptr;
342
343 memset(&engines, 0, sizeof(engines));
344 gem_context_set_param(i915, &p);
345
346 memset(bonds, 0, sizeof(bonds));
347 for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
348 bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
349 bonds[n].base.next_extension =
350 n ? to_user_pointer(&bonds[n - 1]) : 0;
351 bonds[n].num_bonds = 1;
352 }
353 engines.extensions = to_user_pointer(&bonds);
354 gem_context_set_param(i915, &p);
355
356 bonds[0].base.next_extension = -1ull;
357 igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
358
359 bonds[0].base.next_extension = to_user_pointer(&bonds[0]);
360 igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
361
362 engines.extensions = to_user_pointer(&bonds[1]);
363 igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
364 bonds[0].base.next_extension = 0;
365 gem_context_set_param(i915, &p);
366
367 handle = gem_create(i915, 4096 * 3);
368 ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
369 gem_close(i915, handle);
370
371 memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
372 engines.extensions = to_user_pointer(ptr) + 4096;
373 gem_context_set_param(i915, &p);
374
375 memcpy(ptr, &bonds[0], sizeof(bonds[0]));
376 bonds[0].base.next_extension = to_user_pointer(ptr);
377 memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
378 gem_context_set_param(i915, &p);
379
380 munmap(ptr, 4096);
381 igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
382
383 bonds[0].base.next_extension = 0;
384 memcpy(ptr + 8192, &bonds[0], sizeof(bonds[0]));
385 bonds[0].base.next_extension = to_user_pointer(ptr) + 8192;
386 memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
387 gem_context_set_param(i915, &p);
388
389 munmap(ptr + 8192, 4096);
390 igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
391
392 munmap(ptr + 4096, 4096);
393 igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
394
395 gem_context_destroy(i915, p.ctx_id);
396 }
397
kick_kthreads(void)398 static void kick_kthreads(void)
399 {
400 usleep(20 * 1000); /* 20ms should be enough for ksoftirqd! */
401 }
402
measure_load(int pmu,int period_us)403 static double measure_load(int pmu, int period_us)
404 {
405 uint64_t data[2];
406 uint64_t d_t, d_v;
407
408 kick_kthreads();
409
410 igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
411 d_v = -data[0];
412 d_t = -data[1];
413
414 usleep(period_us);
415
416 igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
417 d_v += data[0];
418 d_t += data[1];
419
420 return d_v / (double)d_t;
421 }
422
measure_min_load(int pmu,unsigned int num,int period_us)423 static double measure_min_load(int pmu, unsigned int num, int period_us)
424 {
425 uint64_t data[2 + num];
426 uint64_t d_t, d_v[num];
427 uint64_t min = -1, max = 0;
428
429 kick_kthreads();
430
431 igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
432 for (unsigned int n = 0; n < num; n++)
433 d_v[n] = -data[2 + n];
434 d_t = -data[1];
435
436 usleep(period_us);
437
438 igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
439
440 d_t += data[1];
441 for (unsigned int n = 0; n < num; n++) {
442 d_v[n] += data[2 + n];
443 igt_debug("engine[%d]: %.1f%%\n",
444 n, d_v[n] / (double)d_t * 100);
445 if (d_v[n] < min)
446 min = d_v[n];
447 if (d_v[n] > max)
448 max = d_v[n];
449 }
450
451 igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
452 d_t, min / (double)d_t * 100, max / (double)d_t * 100);
453
454 return min / (double)d_t;
455 }
456
measure_all_load(int pmu,double * v,unsigned int num,int period_us)457 static void measure_all_load(int pmu, double *v, unsigned int num, int period_us)
458 {
459 uint64_t data[2 + num];
460 uint64_t d_t, d_v[num];
461
462 kick_kthreads();
463
464 igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
465 for (unsigned int n = 0; n < num; n++)
466 d_v[n] = -data[2 + n];
467 d_t = -data[1];
468
469 usleep(period_us);
470
471 igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
472
473 d_t += data[1];
474 for (unsigned int n = 0; n < num; n++) {
475 d_v[n] += data[2 + n];
476 igt_debug("engine[%d]: %.1f%%\n",
477 n, d_v[n] / (double)d_t * 100);
478 v[n] = d_v[n] / (double)d_t;
479 }
480 }
481
add_pmu(int pmu,const struct i915_engine_class_instance * ci)482 static int add_pmu(int pmu, const struct i915_engine_class_instance *ci)
483 {
484 return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->engine_class,
485 ci->engine_instance),
486 pmu);
487 }
488
class_to_str(int class)489 static const char *class_to_str(int class)
490 {
491 const char *str[] = {
492 [I915_ENGINE_CLASS_RENDER] = "rcs",
493 [I915_ENGINE_CLASS_COPY] = "bcs",
494 [I915_ENGINE_CLASS_VIDEO] = "vcs",
495 [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "vecs",
496 };
497
498 if (class < ARRAY_SIZE(str))
499 return str[class];
500
501 return "unk";
502 }
503
check_individual_engine(int i915,uint32_t ctx,const struct i915_engine_class_instance * ci,int idx)504 static void check_individual_engine(int i915,
505 uint32_t ctx,
506 const struct i915_engine_class_instance *ci,
507 int idx)
508 {
509 igt_spin_t *spin;
510 double load;
511 int pmu;
512
513 pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].engine_class,
514 ci[idx].engine_instance));
515
516 spin = igt_spin_new(i915, .ctx = ctx, .engine = idx + 1);
517 load = measure_load(pmu, 10000);
518 igt_spin_free(i915, spin);
519
520 close(pmu);
521
522 igt_assert_f(load > 0.90,
523 "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
524 idx, ci[idx].engine_class, ci[idx].engine_instance, load*100);
525 }
526
individual(int i915)527 static void individual(int i915)
528 {
529 uint32_t ctx;
530
531 /*
532 * I915_CONTEXT_PARAM_ENGINE allows us to index into the user
533 * supplied array from gem_execbuf(). Our check is to build the
534 * ctx->engine[] with various different engine classes, feed in
535 * a spinner and then ask pmu to confirm it the expected engine
536 * was busy.
537 */
538
539 ctx = gem_context_create(i915);
540
541 for (int class = 0; class < 32; class++) {
542 struct i915_engine_class_instance *ci;
543 unsigned int count;
544
545 ci = list_engines(i915, 1u << class, &count);
546 if (!ci)
547 continue;
548
549 for (int pass = 0; pass < count; pass++) { /* approx. count! */
550 igt_assert(sizeof(*ci) == sizeof(int));
551 igt_permute_array(ci, count, igt_exchange_int);
552 set_load_balancer(i915, ctx, ci, count, NULL);
553 for (unsigned int n = 0; n < count; n++)
554 check_individual_engine(i915, ctx, ci, n);
555 }
556
557 free(ci);
558 }
559
560 gem_context_destroy(i915, ctx);
561 gem_quiescent_gpu(i915);
562 }
563
bonded(int i915,unsigned int flags)564 static void bonded(int i915, unsigned int flags)
565 #define CORK 0x1
566 {
567 I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
568 struct i915_engine_class_instance *master_engines;
569 uint32_t master;
570
571 /*
572 * I915_CONTEXT_PARAM_ENGINE provides an extension that allows us
573 * to specify which engine(s) to pair with a parallel (EXEC_SUBMIT)
574 * request submitted to another engine.
575 */
576
577 master = gem_queue_create(i915);
578
579 memset(bonds, 0, sizeof(bonds));
580 for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
581 bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
582 bonds[n].base.next_extension =
583 n ? to_user_pointer(&bonds[n - 1]) : 0;
584 bonds[n].num_bonds = 1;
585 }
586
587 for (int class = 0; class < 32; class++) {
588 struct i915_engine_class_instance *siblings;
589 unsigned int count, limit, *order;
590 uint32_t ctx;
591 int n;
592
593 siblings = list_engines(i915, 1u << class, &count);
594 if (!siblings)
595 continue;
596
597 if (count < 2) {
598 free(siblings);
599 continue;
600 }
601
602 master_engines = list_engines(i915, ~(1u << class), &limit);
603 set_engines(i915, master, master_engines, limit);
604
605 limit = min(count, limit);
606 igt_assert(limit <= ARRAY_SIZE(bonds));
607 for (n = 0; n < limit; n++) {
608 bonds[n].master = master_engines[n];
609 bonds[n].engines[0] = siblings[n];
610 }
611
612 ctx = gem_context_clone(i915,
613 master, I915_CONTEXT_CLONE_VM,
614 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
615 set_load_balancer(i915, ctx, siblings, count, &bonds[limit - 1]);
616
617 order = malloc(sizeof(*order) * 8 * limit);
618 igt_assert(order);
619 for (n = 0; n < limit; n++)
620 order[2 * limit - n - 1] = order[n] = n % limit;
621 memcpy(order + 2 * limit, order, 2 * limit * sizeof(*order));
622 memcpy(order + 4 * limit, order, 4 * limit * sizeof(*order));
623 igt_permute_array(order + 2 * limit, 6 * limit, igt_exchange_int);
624
625 for (n = 0; n < 8 * limit; n++) {
626 struct drm_i915_gem_execbuffer2 eb;
627 igt_spin_t *spin, *plug;
628 IGT_CORK_HANDLE(cork);
629 double v[limit];
630 int pmu[limit + 1];
631 int bond = order[n];
632
633 pmu[0] = -1;
634 for (int i = 0; i < limit; i++)
635 pmu[i] = add_pmu(pmu[0], &siblings[i]);
636 pmu[limit] = add_pmu(pmu[0], &master_engines[bond]);
637
638 igt_assert(siblings[bond].engine_class !=
639 master_engines[bond].engine_class);
640
641 plug = NULL;
642 if (flags & CORK) {
643 plug = __igt_spin_new(i915,
644 .ctx = master,
645 .engine = bond,
646 .dependency = igt_cork_plug(&cork, i915));
647 }
648
649 spin = __igt_spin_new(i915,
650 .ctx = master,
651 .engine = bond,
652 .flags = IGT_SPIN_FENCE_OUT);
653
654 eb = spin->execbuf;
655 eb.rsvd1 = ctx;
656 eb.rsvd2 = spin->out_fence;
657 eb.flags = I915_EXEC_FENCE_SUBMIT;
658 gem_execbuf(i915, &eb);
659
660 if (plug) {
661 igt_cork_unplug(&cork);
662 igt_spin_free(i915, plug);
663 }
664
665 measure_all_load(pmu[0], v, limit + 1, 10000);
666 igt_spin_free(i915, spin);
667
668 igt_assert_f(v[bond] > 0.90,
669 "engine %d (class:instance %s:%d) was found to be only %.1f%% busy\n",
670 bond,
671 class_to_str(siblings[bond].engine_class),
672 siblings[bond].engine_instance,
673 100 * v[bond]);
674 for (int other = 0; other < limit; other++) {
675 if (other == bond)
676 continue;
677
678 igt_assert_f(v[other] == 0,
679 "engine %d (class:instance %s:%d) was not idle, and actually %.1f%% busy\n",
680 other,
681 class_to_str(siblings[other].engine_class),
682 siblings[other].engine_instance,
683 100 * v[other]);
684 }
685 igt_assert_f(v[limit] > 0.90,
686 "master (class:instance %s:%d) was found to be only %.1f%% busy\n",
687 class_to_str(master_engines[bond].engine_class),
688 master_engines[bond].engine_instance,
689 100 * v[limit]);
690
691 close(pmu[0]);
692 }
693
694 free(order);
695 gem_context_destroy(i915, ctx);
696 free(master_engines);
697 free(siblings);
698 }
699
700 gem_context_destroy(i915, master);
701 }
702
indices(int i915)703 static void indices(int i915)
704 {
705 I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
706 struct drm_i915_gem_context_param p = {
707 .ctx_id = gem_context_create(i915),
708 .param = I915_CONTEXT_PARAM_ENGINES,
709 .value = to_user_pointer(&engines)
710 };
711
712 struct drm_i915_gem_exec_object2 batch = {
713 .handle = batch_create(i915),
714 };
715
716 unsigned int nengines = 0;
717 void *balancers = NULL;
718
719 /*
720 * We can populate our engine map with multiple virtual engines.
721 * Do so.
722 */
723
724 for (int class = 0; class < 32; class++) {
725 struct i915_engine_class_instance *ci;
726 unsigned int count;
727
728 ci = list_engines(i915, 1u << class, &count);
729 if (!ci)
730 continue;
731
732 for (int n = 0; n < count; n++) {
733 struct i915_context_engines_load_balance *balancer;
734
735 engines.engines[nengines].engine_class =
736 I915_ENGINE_CLASS_INVALID;
737 engines.engines[nengines].engine_instance =
738 I915_ENGINE_CLASS_INVALID_NONE;
739
740 balancer = calloc(sizeof_load_balance(count), 1);
741 igt_assert(balancer);
742
743 balancer->base.name =
744 I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
745 balancer->base.next_extension =
746 to_user_pointer(balancers);
747 balancers = balancer;
748
749 balancer->engine_index = nengines++;
750 balancer->num_siblings = count;
751
752 memcpy(balancer->engines,
753 ci, count * sizeof(*ci));
754 }
755 free(ci);
756 }
757
758 igt_require(balancers);
759 engines.extensions = to_user_pointer(balancers);
760 p.size = (sizeof(struct i915_engine_class_instance) * nengines +
761 sizeof(struct i915_context_param_engines));
762 gem_context_set_param(i915, &p);
763
764 for (unsigned int n = 0; n < nengines; n++) {
765 struct drm_i915_gem_execbuffer2 eb = {
766 .buffers_ptr = to_user_pointer(&batch),
767 .buffer_count = 1,
768 .flags = n,
769 .rsvd1 = p.ctx_id,
770 };
771 igt_debug("Executing on index=%d\n", n);
772 gem_execbuf(i915, &eb);
773 }
774 gem_context_destroy(i915, p.ctx_id);
775
776 gem_sync(i915, batch.handle);
777 gem_close(i915, batch.handle);
778
779 while (balancers) {
780 struct i915_context_engines_load_balance *b, *n;
781
782 b = balancers;
783 n = from_user_pointer(b->base.next_extension);
784 free(b);
785
786 balancers = n;
787 }
788
789 gem_quiescent_gpu(i915);
790 }
791
busy(int i915)792 static void busy(int i915)
793 {
794 uint32_t scratch = gem_create(i915, 4096);
795
796 /*
797 * Check that virtual engines are reported via GEM_BUSY.
798 *
799 * When running, the batch will be on the real engine and report
800 * the actual class.
801 *
802 * Prior to running, if the load-balancer is across multiple
803 * classes we don't know which engine the batch will
804 * execute on, so we report them all!
805 *
806 * However, as we only support (and test) creating a load-balancer
807 * from engines of only one class, that can be propagated accurately
808 * through to GEM_BUSY.
809 */
810
811 for (int class = 0; class < 16; class++) {
812 struct drm_i915_gem_busy busy;
813 struct i915_engine_class_instance *ci;
814 unsigned int count;
815 igt_spin_t *spin[2];
816 uint32_t ctx;
817
818 ci = list_engines(i915, 1u << class, &count);
819 if (!ci)
820 continue;
821
822 ctx = load_balancer_create(i915, ci, count);
823 free(ci);
824
825 spin[0] = __igt_spin_new(i915,
826 .ctx = ctx,
827 .flags = IGT_SPIN_POLL_RUN);
828 spin[1] = __igt_spin_new(i915,
829 .ctx = ctx,
830 .dependency = scratch);
831
832 igt_spin_busywait_until_started(spin[0]);
833
834 /* Running: actual class */
835 busy.handle = spin[0]->handle;
836 do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
837 igt_assert_eq_u32(busy.busy, 1u << (class + 16));
838
839 /* Queued(read): expected class */
840 busy.handle = spin[1]->handle;
841 do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
842 igt_assert_eq_u32(busy.busy, 1u << (class + 16));
843
844 /* Queued(write): expected class */
845 busy.handle = scratch;
846 do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
847 igt_assert_eq_u32(busy.busy,
848 (1u << (class + 16)) | (class + 1));
849
850 igt_spin_free(i915, spin[1]);
851 igt_spin_free(i915, spin[0]);
852
853 gem_context_destroy(i915, ctx);
854 }
855
856 gem_close(i915, scratch);
857 gem_quiescent_gpu(i915);
858 }
859
full(int i915,unsigned int flags)860 static void full(int i915, unsigned int flags)
861 #define PULSE 0x1
862 #define LATE 0x2
863 {
864 struct drm_i915_gem_exec_object2 batch = {
865 .handle = batch_create(i915),
866 };
867
868 if (flags & LATE)
869 igt_require_sw_sync();
870
871 /*
872 * I915_CONTEXT_PARAM_ENGINE changes the meaning of engine selector in
873 * execbuf to utilize our own map, into which we replace I915_EXEC_DEFAULT
874 * to provide an automatic selection from the other ctx->engine[]. It
875 * employs load-balancing to evenly distribute the workload the
876 * array. If we submit N spinners, we expect them to be simultaneously
877 * running across N engines and use PMU to confirm that the entire
878 * set of engines are busy.
879 *
880 * We complicate matters by interspersing short-lived tasks to
881 * challenge the kernel to search for space in which to insert new
882 * batches.
883 */
884
885 for (int class = 0; class < 32; class++) {
886 struct i915_engine_class_instance *ci;
887 igt_spin_t *spin = NULL;
888 IGT_CORK_FENCE(cork);
889 unsigned int count;
890 double load;
891 int fence = -1;
892 int *pmu;
893
894 ci = list_engines(i915, 1u << class, &count);
895 if (!ci)
896 continue;
897
898 pmu = malloc(sizeof(*pmu) * count);
899 igt_assert(pmu);
900
901 if (flags & LATE)
902 fence = igt_cork_plug(&cork, i915);
903
904 pmu[0] = -1;
905 for (unsigned int n = 0; n < count; n++) {
906 uint32_t ctx;
907
908 pmu[n] = add_pmu(pmu[0], &ci[n]);
909
910 if (flags & PULSE) {
911 struct drm_i915_gem_execbuffer2 eb = {
912 .buffers_ptr = to_user_pointer(&batch),
913 .buffer_count = 1,
914 .rsvd2 = fence,
915 .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
916 };
917 gem_execbuf(i915, &eb);
918 }
919
920 /*
921 * Each spinner needs to be one a new timeline,
922 * otherwise they will just sit in the single queue
923 * and not run concurrently.
924 */
925 ctx = load_balancer_create(i915, ci, count);
926
927 if (spin == NULL) {
928 spin = __igt_spin_new(i915, .ctx = ctx);
929 } else {
930 struct drm_i915_gem_execbuffer2 eb = {
931 .buffers_ptr = spin->execbuf.buffers_ptr,
932 .buffer_count = spin->execbuf.buffer_count,
933 .rsvd1 = ctx,
934 .rsvd2 = fence,
935 .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
936 };
937 gem_execbuf(i915, &eb);
938 }
939
940 gem_context_destroy(i915, ctx);
941 }
942
943 if (flags & LATE) {
944 igt_cork_unplug(&cork);
945 close(fence);
946 }
947
948 load = measure_min_load(pmu[0], count, 10000);
949 igt_spin_free(i915, spin);
950
951 close(pmu[0]);
952 free(pmu);
953
954 free(ci);
955
956 igt_assert_f(load > 0.90,
957 "minimum load for %d x class:%d was found to be only %.1f%% busy\n",
958 count, class, load*100);
959 gem_quiescent_gpu(i915);
960 }
961
962 gem_close(i915, batch.handle);
963 gem_quiescent_gpu(i915);
964 }
965
nop(int i915)966 static void nop(int i915)
967 {
968 struct drm_i915_gem_exec_object2 batch = {
969 .handle = batch_create(i915),
970 };
971
972 for (int class = 0; class < 32; class++) {
973 struct i915_engine_class_instance *ci;
974 unsigned int count;
975 uint32_t ctx;
976
977 ci = list_engines(i915, 1u << class, &count);
978 if (!ci)
979 continue;
980
981 ctx = load_balancer_create(i915, ci, count);
982
983 for (int n = 0; n < count; n++) {
984 struct drm_i915_gem_execbuffer2 execbuf = {
985 .buffers_ptr = to_user_pointer(&batch),
986 .buffer_count = 1,
987 .flags = n + 1,
988 .rsvd1 = ctx,
989 };
990 struct timespec tv = {};
991 unsigned long nops;
992 double t;
993
994 igt_nsec_elapsed(&tv);
995 nops = 0;
996 do {
997 for (int r = 0; r < 1024; r++)
998 gem_execbuf(i915, &execbuf);
999 nops += 1024;
1000 } while (igt_seconds_elapsed(&tv) < 2);
1001 gem_sync(i915, batch.handle);
1002
1003 t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
1004 igt_info("%s:%d %.3fus\n", class_to_str(class), n, t);
1005 }
1006
1007 {
1008 struct drm_i915_gem_execbuffer2 execbuf = {
1009 .buffers_ptr = to_user_pointer(&batch),
1010 .buffer_count = 1,
1011 .rsvd1 = ctx,
1012 };
1013 struct timespec tv = {};
1014 unsigned long nops;
1015 double t;
1016
1017 igt_nsec_elapsed(&tv);
1018 nops = 0;
1019 do {
1020 for (int r = 0; r < 1024; r++)
1021 gem_execbuf(i915, &execbuf);
1022 nops += 1024;
1023 } while (igt_seconds_elapsed(&tv) < 2);
1024 gem_sync(i915, batch.handle);
1025
1026 t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
1027 igt_info("%s:* %.3fus\n", class_to_str(class), t);
1028 }
1029
1030
1031 igt_fork(child, count) {
1032 struct drm_i915_gem_execbuffer2 execbuf = {
1033 .buffers_ptr = to_user_pointer(&batch),
1034 .buffer_count = 1,
1035 .flags = child + 1,
1036 .rsvd1 = gem_context_clone(i915, ctx,
1037 I915_CONTEXT_CLONE_ENGINES, 0),
1038 };
1039 struct timespec tv = {};
1040 unsigned long nops;
1041 double t;
1042
1043 igt_nsec_elapsed(&tv);
1044 nops = 0;
1045 do {
1046 for (int r = 0; r < 1024; r++)
1047 gem_execbuf(i915, &execbuf);
1048 nops += 1024;
1049 } while (igt_seconds_elapsed(&tv) < 2);
1050 gem_sync(i915, batch.handle);
1051
1052 t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
1053 igt_info("[%d] %s:%d %.3fus\n",
1054 child, class_to_str(class), child, t);
1055
1056 memset(&tv, 0, sizeof(tv));
1057 execbuf.flags = 0;
1058
1059 igt_nsec_elapsed(&tv);
1060 nops = 0;
1061 do {
1062 for (int r = 0; r < 1024; r++)
1063 gem_execbuf(i915, &execbuf);
1064 nops += 1024;
1065 } while (igt_seconds_elapsed(&tv) < 2);
1066 gem_sync(i915, batch.handle);
1067
1068 t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
1069 igt_info("[%d] %s:* %.3fus\n",
1070 child, class_to_str(class), t);
1071
1072 gem_context_destroy(i915, execbuf.rsvd1);
1073 }
1074
1075 igt_waitchildren();
1076
1077 gem_context_destroy(i915, ctx);
1078 free(ci);
1079 }
1080
1081 gem_close(i915, batch.handle);
1082 gem_quiescent_gpu(i915);
1083 }
1084
ping(int i915,uint32_t ctx,unsigned int engine)1085 static void ping(int i915, uint32_t ctx, unsigned int engine)
1086 {
1087 struct drm_i915_gem_exec_object2 obj = {
1088 .handle = batch_create(i915),
1089 };
1090 struct drm_i915_gem_execbuffer2 execbuf = {
1091 .buffers_ptr = to_user_pointer(&obj),
1092 .buffer_count = 1,
1093 .flags = engine,
1094 .rsvd1 = ctx,
1095 };
1096 gem_execbuf(i915, &execbuf);
1097 gem_sync(i915, obj.handle);
1098 gem_close(i915, obj.handle);
1099 }
1100
semaphore(int i915)1101 static void semaphore(int i915)
1102 {
1103 uint32_t block[2], scratch;
1104 igt_spin_t *spin[3];
1105
1106 /*
1107 * If we are using HW semaphores to launch serialised requests
1108 * on different engine concurrently, we want to verify that real
1109 * work is unimpeded.
1110 */
1111 igt_require(gem_scheduler_has_preemption(i915));
1112
1113 block[0] = gem_context_create(i915);
1114 block[1] = gem_context_create(i915);
1115
1116 scratch = gem_create(i915, 4096);
1117 spin[2] = igt_spin_new(i915, .dependency = scratch);
1118 for (int class = 1; class < 32; class++) {
1119 struct i915_engine_class_instance *ci;
1120 unsigned int count;
1121 uint32_t vip;
1122
1123 ci = list_engines(i915, 1u << class, &count);
1124 if (!ci)
1125 continue;
1126
1127 if (count < ARRAY_SIZE(block))
1128 continue;
1129
1130 /* Ensure that we completely occupy all engines in this group */
1131 count = ARRAY_SIZE(block);
1132
1133 for (int i = 0; i < count; i++) {
1134 set_load_balancer(i915, block[i], ci, count, NULL);
1135 spin[i] = __igt_spin_new(i915,
1136 .ctx = block[i],
1137 .dependency = scratch);
1138 }
1139
1140 /*
1141 * Either we haven't blocked both engines with semaphores,
1142 * or we let the vip through. If not, we hang.
1143 */
1144 vip = gem_context_create(i915);
1145 set_load_balancer(i915, vip, ci, count, NULL);
1146 ping(i915, vip, 0);
1147 gem_context_destroy(i915, vip);
1148
1149 for (int i = 0; i < count; i++)
1150 igt_spin_free(i915, spin[i]);
1151
1152 free(ci);
1153 }
1154 igt_spin_free(i915, spin[2]);
1155 gem_close(i915, scratch);
1156
1157 gem_context_destroy(i915, block[1]);
1158 gem_context_destroy(i915, block[0]);
1159
1160 gem_quiescent_gpu(i915);
1161 }
1162
smoketest(int i915,int timeout)1163 static void smoketest(int i915, int timeout)
1164 {
1165 struct drm_i915_gem_exec_object2 batch[2] = {
1166 { .handle = __batch_create(i915, 16380) }
1167 };
1168 unsigned int ncontext = 0;
1169 uint32_t *contexts = NULL;
1170 uint32_t *handles = NULL;
1171
1172 igt_require_sw_sync();
1173
1174 for (int class = 0; class < 32; class++) {
1175 struct i915_engine_class_instance *ci;
1176 unsigned int count = 0;
1177
1178 ci = list_engines(i915, 1u << class, &count);
1179 if (!ci || count < 2) {
1180 free(ci);
1181 continue;
1182 }
1183
1184 ncontext += 128;
1185 contexts = realloc(contexts, sizeof(*contexts) * ncontext);
1186 igt_assert(contexts);
1187
1188 for (unsigned int n = ncontext - 128; n < ncontext; n++) {
1189 contexts[n] = load_balancer_create(i915, ci, count);
1190 igt_assert(contexts[n]);
1191 }
1192
1193 free(ci);
1194 }
1195 igt_debug("Created %d virtual engines (one per context)\n", ncontext);
1196 igt_require(ncontext);
1197
1198 contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4);
1199 igt_assert(contexts);
1200 memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
1201 ncontext *= 2;
1202 memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
1203 ncontext *= 2;
1204
1205 handles = malloc(sizeof(*handles) * ncontext);
1206 igt_assert(handles);
1207 for (unsigned int n = 0; n < ncontext; n++)
1208 handles[n] = gem_create(i915, 4096);
1209
1210 igt_until_timeout(timeout) {
1211 unsigned int count = 1 + (rand() % (ncontext - 1));
1212 IGT_CORK_FENCE(cork);
1213 int fence = igt_cork_plug(&cork, i915);
1214
1215 for (unsigned int n = 0; n < count; n++) {
1216 struct drm_i915_gem_execbuffer2 eb = {
1217 .buffers_ptr = to_user_pointer(batch),
1218 .buffer_count = ARRAY_SIZE(batch),
1219 .rsvd1 = contexts[n],
1220 .rsvd2 = fence,
1221 .flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN,
1222 };
1223 batch[1].handle = handles[n];
1224 gem_execbuf(i915, &eb);
1225 }
1226 igt_permute_array(handles, count, igt_exchange_int);
1227
1228 igt_cork_unplug(&cork);
1229 for (unsigned int n = 0; n < count; n++)
1230 gem_sync(i915, handles[n]);
1231
1232 close(fence);
1233 }
1234
1235 for (unsigned int n = 0; n < ncontext; n++) {
1236 gem_close(i915, handles[n]);
1237 __gem_context_destroy(i915, contexts[n]);
1238 }
1239 free(handles);
1240 free(contexts);
1241 gem_close(i915, batch[0].handle);
1242 }
1243
has_context_engines(int i915)1244 static bool has_context_engines(int i915)
1245 {
1246 struct drm_i915_gem_context_param p = {
1247 .param = I915_CONTEXT_PARAM_ENGINES,
1248 };
1249
1250 return __gem_context_set_param(i915, &p) == 0;
1251 }
1252
has_load_balancer(int i915)1253 static bool has_load_balancer(int i915)
1254 {
1255 struct i915_engine_class_instance ci = {};
1256 uint32_t ctx;
1257 int err;
1258
1259 ctx = gem_context_create(i915);
1260 err = __set_load_balancer(i915, ctx, &ci, 1, NULL);
1261 gem_context_destroy(i915, ctx);
1262
1263 return err == 0;
1264 }
1265
1266 igt_main
1267 {
1268 int i915 = -1;
1269
1270 igt_skip_on_simulation();
1271
1272 igt_fixture {
1273 i915 = drm_open_driver(DRIVER_INTEL);
1274 igt_require_gem(i915);
1275
1276 gem_require_contexts(i915);
1277 igt_require(has_context_engines(i915));
1278 igt_require(has_load_balancer(i915));
1279
1280 igt_fork_hang_detector(i915);
1281 }
1282
1283 igt_subtest("invalid-balancer")
1284 invalid_balancer(i915);
1285
1286 igt_subtest("invalid-bonds")
1287 invalid_bonds(i915);
1288
1289 igt_subtest("individual")
1290 individual(i915);
1291
1292 igt_subtest("indices")
1293 indices(i915);
1294
1295 igt_subtest("busy")
1296 busy(i915);
1297
1298 igt_subtest_group {
1299 static const struct {
1300 const char *name;
1301 unsigned int flags;
1302 } phases[] = {
1303 { "", 0 },
1304 { "-pulse", PULSE },
1305 { "-late", LATE },
1306 { "-late-pulse", PULSE | LATE },
1307 { }
1308 };
1309 for (typeof(*phases) *p = phases; p->name; p++)
1310 igt_subtest_f("full%s", p->name)
1311 full(i915, p->flags);
1312 }
1313
1314 igt_subtest("nop")
1315 nop(i915);
1316
1317 igt_subtest("semaphore")
1318 semaphore(i915);
1319
1320 igt_subtest("smoke")
1321 smoketest(i915, 20);
1322
1323 igt_subtest("bonded-imm")
1324 bonded(i915, 0);
1325
1326 igt_subtest("bonded-cork")
1327 bonded(i915, CORK);
1328
1329 igt_fixture {
1330 igt_stop_hang_detector();
1331 }
1332 }
1333