/* * Copyright © 2018-2019 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include #include "igt.h" #include "igt_perf.h" #include "i915/gem_ring.h" #include "sw_sync.h" IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing"); #define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS) static size_t sizeof_load_balance(int count) { return offsetof(struct i915_context_engines_load_balance, engines[count]); } static size_t sizeof_param_engines(int count) { return offsetof(struct i915_context_param_engines, engines[count]); } static size_t sizeof_engines_bond(int count) { return offsetof(struct i915_context_engines_bond, engines[count]); } #define alloca0(sz) ({ size_t sz__ = (sz); memset(alloca(sz__), 0, sz__); }) static bool has_class_instance(int i915, uint16_t class, uint16_t instance) { int fd; fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance)); if (fd != -1) { close(fd); return true; } return false; } static struct i915_engine_class_instance * list_engines(int i915, uint32_t class_mask, unsigned int *out) { unsigned int count = 0, size = 64; struct i915_engine_class_instance *engines; engines = malloc(size * sizeof(*engines)); igt_assert(engines); for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER; class_mask; class++, class_mask >>= 1) { if (!(class_mask & 1)) continue; for (unsigned int instance = 0; instance < INSTANCE_COUNT; instance++) { if (!has_class_instance(i915, class, instance)) continue; if (count == size) { size *= 2; engines = realloc(engines, size * sizeof(*engines)); igt_assert(engines); } engines[count++] = (struct i915_engine_class_instance){ .engine_class = class, .engine_instance = instance, }; } } if (!count) { free(engines); engines = NULL; } *out = count; return engines; } static int __set_engines(int i915, uint32_t ctx, const struct i915_engine_class_instance *ci, unsigned int count) { struct i915_context_param_engines *engines = alloca0(sizeof_param_engines(count)); struct drm_i915_gem_context_param p = { .ctx_id = ctx, .param = I915_CONTEXT_PARAM_ENGINES, .size = sizeof_param_engines(count), .value = to_user_pointer(engines) }; engines->extensions = 0; memcpy(engines->engines, ci, count * sizeof(*ci)); return __gem_context_set_param(i915, &p); } static void set_engines(int i915, uint32_t ctx, const struct i915_engine_class_instance *ci, unsigned int count) { igt_assert_eq(__set_engines(i915, ctx, ci, count), 0); } static int __set_load_balancer(int i915, uint32_t ctx, const struct i915_engine_class_instance *ci, unsigned int count, void *ext) { struct i915_context_engines_load_balance *balancer = alloca0(sizeof_load_balance(count)); struct i915_context_param_engines *engines = alloca0(sizeof_param_engines(count + 1)); struct drm_i915_gem_context_param p = { .ctx_id = ctx, .param = I915_CONTEXT_PARAM_ENGINES, .size = sizeof_param_engines(count + 1), .value = to_user_pointer(engines) }; balancer->base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE; balancer->base.next_extension = to_user_pointer(ext); igt_assert(count); balancer->num_siblings = count; memcpy(balancer->engines, ci, count * sizeof(*ci)); engines->extensions = to_user_pointer(balancer); engines->engines[0].engine_class = I915_ENGINE_CLASS_INVALID; engines->engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE; memcpy(engines->engines + 1, ci, count * sizeof(*ci)); return __gem_context_set_param(i915, &p); } static void set_load_balancer(int i915, uint32_t ctx, const struct i915_engine_class_instance *ci, unsigned int count, void *ext) { igt_assert_eq(__set_load_balancer(i915, ctx, ci, count, ext), 0); } static uint32_t load_balancer_create(int i915, const struct i915_engine_class_instance *ci, unsigned int count) { uint32_t ctx; ctx = gem_context_create(i915); set_load_balancer(i915, ctx, ci, count, NULL); return ctx; } static uint32_t __batch_create(int i915, uint32_t offset) { const uint32_t bbe = MI_BATCH_BUFFER_END; uint32_t handle; handle = gem_create(i915, ALIGN(offset + 4, 4096)); gem_write(i915, handle, offset, &bbe, sizeof(bbe)); return handle; } static uint32_t batch_create(int i915) { return __batch_create(i915, 0); } static void invalid_balancer(int i915) { I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, 64); I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64); struct drm_i915_gem_context_param p = { .param = I915_CONTEXT_PARAM_ENGINES, .value = to_user_pointer(&engines) }; uint32_t handle; void *ptr; /* * Assume that I915_CONTEXT_PARAM_ENGINE validates the array * of engines[], our job is to determine if the load_balancer * extension explodes. */ for (int class = 0; class < 32; class++) { struct i915_engine_class_instance *ci; unsigned int count; ci = list_engines(i915, 1 << class, &count); if (!ci) continue; igt_assert_lte(count, 64); p.ctx_id = gem_context_create(i915); p.size = (sizeof(struct i915_context_param_engines) + (count + 1) * sizeof(*engines.engines)); memset(&engines, 0, sizeof(engines)); engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID; engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE; memcpy(engines.engines + 1, ci, count * sizeof(*ci)); gem_context_set_param(i915, &p); engines.extensions = -1ull; igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT); engines.extensions = 1ull; igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT); memset(&balancer, 0, sizeof(balancer)); balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE; balancer.num_siblings = count; memcpy(balancer.engines, ci, count * sizeof(*ci)); engines.extensions = to_user_pointer(&balancer); gem_context_set_param(i915, &p); balancer.engine_index = 1; igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST); balancer.engine_index = count; igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST); balancer.engine_index = count + 1; igt_assert_eq(__gem_context_set_param(i915, &p), -EINVAL); balancer.engine_index = 0; gem_context_set_param(i915, &p); balancer.base.next_extension = to_user_pointer(&balancer); igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST); balancer.base.next_extension = -1ull; igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT); handle = gem_create(i915, 4096 * 3); ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE); gem_close(i915, handle); memset(&engines, 0, sizeof(engines)); engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID; engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE; engines.engines[1].engine_class = I915_ENGINE_CLASS_INVALID; engines.engines[1].engine_instance = I915_ENGINE_CLASS_INVALID_NONE; memcpy(engines.engines + 2, ci, count * sizeof(ci)); p.size = (sizeof(struct i915_context_param_engines) + (count + 2) * sizeof(*engines.engines)); gem_context_set_param(i915, &p); balancer.base.next_extension = 0; balancer.engine_index = 1; engines.extensions = to_user_pointer(&balancer); gem_context_set_param(i915, &p); memcpy(ptr + 4096 - 8, &balancer, sizeof(balancer)); memcpy(ptr + 8192 - 8, &balancer, sizeof(balancer)); balancer.engine_index = 0; engines.extensions = to_user_pointer(ptr) + 4096 - 8; gem_context_set_param(i915, &p); balancer.base.next_extension = engines.extensions; engines.extensions = to_user_pointer(&balancer); gem_context_set_param(i915, &p); munmap(ptr, 4096); igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT); engines.extensions = to_user_pointer(ptr) + 4096 - 8; igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT); engines.extensions = to_user_pointer(ptr) + 8192 - 8; gem_context_set_param(i915, &p); balancer.base.next_extension = engines.extensions; engines.extensions = to_user_pointer(&balancer); gem_context_set_param(i915, &p); munmap(ptr + 8192, 4096); igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT); engines.extensions = to_user_pointer(ptr) + 8192 - 8; igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT); munmap(ptr + 4096, 4096); gem_context_destroy(i915, p.ctx_id); free(ci); } } static void invalid_bonds(int i915) { I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1); I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1); struct drm_i915_gem_context_param p = { .ctx_id = gem_context_create(i915), .param = I915_CONTEXT_PARAM_ENGINES, .value = to_user_pointer(&engines), .size = sizeof(engines), }; uint32_t handle; void *ptr; memset(&engines, 0, sizeof(engines)); gem_context_set_param(i915, &p); memset(bonds, 0, sizeof(bonds)); for (int n = 0; n < ARRAY_SIZE(bonds); n++) { bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND; bonds[n].base.next_extension = n ? to_user_pointer(&bonds[n - 1]) : 0; bonds[n].num_bonds = 1; } engines.extensions = to_user_pointer(&bonds); gem_context_set_param(i915, &p); bonds[0].base.next_extension = -1ull; igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT); bonds[0].base.next_extension = to_user_pointer(&bonds[0]); igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG); engines.extensions = to_user_pointer(&bonds[1]); igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG); bonds[0].base.next_extension = 0; gem_context_set_param(i915, &p); handle = gem_create(i915, 4096 * 3); ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE); gem_close(i915, handle); memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0])); engines.extensions = to_user_pointer(ptr) + 4096; gem_context_set_param(i915, &p); memcpy(ptr, &bonds[0], sizeof(bonds[0])); bonds[0].base.next_extension = to_user_pointer(ptr); memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0])); gem_context_set_param(i915, &p); munmap(ptr, 4096); igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT); bonds[0].base.next_extension = 0; memcpy(ptr + 8192, &bonds[0], sizeof(bonds[0])); bonds[0].base.next_extension = to_user_pointer(ptr) + 8192; memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0])); gem_context_set_param(i915, &p); munmap(ptr + 8192, 4096); igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT); munmap(ptr + 4096, 4096); igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT); gem_context_destroy(i915, p.ctx_id); } static void kick_kthreads(void) { usleep(20 * 1000); /* 20ms should be enough for ksoftirqd! */ } static double measure_load(int pmu, int period_us) { uint64_t data[2]; uint64_t d_t, d_v; kick_kthreads(); igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); d_v = -data[0]; d_t = -data[1]; usleep(period_us); igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); d_v += data[0]; d_t += data[1]; return d_v / (double)d_t; } static double measure_min_load(int pmu, unsigned int num, int period_us) { uint64_t data[2 + num]; uint64_t d_t, d_v[num]; uint64_t min = -1, max = 0; kick_kthreads(); igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); for (unsigned int n = 0; n < num; n++) d_v[n] = -data[2 + n]; d_t = -data[1]; usleep(period_us); igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); d_t += data[1]; for (unsigned int n = 0; n < num; n++) { d_v[n] += data[2 + n]; igt_debug("engine[%d]: %.1f%%\n", n, d_v[n] / (double)d_t * 100); if (d_v[n] < min) min = d_v[n]; if (d_v[n] > max) max = d_v[n]; } igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n", d_t, min / (double)d_t * 100, max / (double)d_t * 100); return min / (double)d_t; } static void measure_all_load(int pmu, double *v, unsigned int num, int period_us) { uint64_t data[2 + num]; uint64_t d_t, d_v[num]; kick_kthreads(); igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); for (unsigned int n = 0; n < num; n++) d_v[n] = -data[2 + n]; d_t = -data[1]; usleep(period_us); igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); d_t += data[1]; for (unsigned int n = 0; n < num; n++) { d_v[n] += data[2 + n]; igt_debug("engine[%d]: %.1f%%\n", n, d_v[n] / (double)d_t * 100); v[n] = d_v[n] / (double)d_t; } } static int add_pmu(int pmu, const struct i915_engine_class_instance *ci) { return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->engine_class, ci->engine_instance), pmu); } static const char *class_to_str(int class) { const char *str[] = { [I915_ENGINE_CLASS_RENDER] = "rcs", [I915_ENGINE_CLASS_COPY] = "bcs", [I915_ENGINE_CLASS_VIDEO] = "vcs", [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "vecs", }; if (class < ARRAY_SIZE(str)) return str[class]; return "unk"; } static void check_individual_engine(int i915, uint32_t ctx, const struct i915_engine_class_instance *ci, int idx) { igt_spin_t *spin; double load; int pmu; pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].engine_class, ci[idx].engine_instance)); spin = igt_spin_new(i915, .ctx = ctx, .engine = idx + 1); load = measure_load(pmu, 10000); igt_spin_free(i915, spin); close(pmu); igt_assert_f(load > 0.90, "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n", idx, ci[idx].engine_class, ci[idx].engine_instance, load*100); } static void individual(int i915) { uint32_t ctx; /* * I915_CONTEXT_PARAM_ENGINE allows us to index into the user * supplied array from gem_execbuf(). Our check is to build the * ctx->engine[] with various different engine classes, feed in * a spinner and then ask pmu to confirm it the expected engine * was busy. */ ctx = gem_context_create(i915); for (int class = 0; class < 32; class++) { struct i915_engine_class_instance *ci; unsigned int count; ci = list_engines(i915, 1u << class, &count); if (!ci) continue; for (int pass = 0; pass < count; pass++) { /* approx. count! */ igt_assert(sizeof(*ci) == sizeof(int)); igt_permute_array(ci, count, igt_exchange_int); set_load_balancer(i915, ctx, ci, count, NULL); for (unsigned int n = 0; n < count; n++) check_individual_engine(i915, ctx, ci, n); } free(ci); } gem_context_destroy(i915, ctx); gem_quiescent_gpu(i915); } static void bonded(int i915, unsigned int flags) #define CORK 0x1 { I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1); struct i915_engine_class_instance *master_engines; uint32_t master; /* * I915_CONTEXT_PARAM_ENGINE provides an extension that allows us * to specify which engine(s) to pair with a parallel (EXEC_SUBMIT) * request submitted to another engine. */ master = gem_queue_create(i915); memset(bonds, 0, sizeof(bonds)); for (int n = 0; n < ARRAY_SIZE(bonds); n++) { bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND; bonds[n].base.next_extension = n ? to_user_pointer(&bonds[n - 1]) : 0; bonds[n].num_bonds = 1; } for (int class = 0; class < 32; class++) { struct i915_engine_class_instance *siblings; unsigned int count, limit, *order; uint32_t ctx; int n; siblings = list_engines(i915, 1u << class, &count); if (!siblings) continue; if (count < 2) { free(siblings); continue; } master_engines = list_engines(i915, ~(1u << class), &limit); set_engines(i915, master, master_engines, limit); limit = min(count, limit); igt_assert(limit <= ARRAY_SIZE(bonds)); for (n = 0; n < limit; n++) { bonds[n].master = master_engines[n]; bonds[n].engines[0] = siblings[n]; } ctx = gem_context_clone(i915, master, I915_CONTEXT_CLONE_VM, I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE); set_load_balancer(i915, ctx, siblings, count, &bonds[limit - 1]); order = malloc(sizeof(*order) * 8 * limit); igt_assert(order); for (n = 0; n < limit; n++) order[2 * limit - n - 1] = order[n] = n % limit; memcpy(order + 2 * limit, order, 2 * limit * sizeof(*order)); memcpy(order + 4 * limit, order, 4 * limit * sizeof(*order)); igt_permute_array(order + 2 * limit, 6 * limit, igt_exchange_int); for (n = 0; n < 8 * limit; n++) { struct drm_i915_gem_execbuffer2 eb; igt_spin_t *spin, *plug; IGT_CORK_HANDLE(cork); double v[limit]; int pmu[limit + 1]; int bond = order[n]; pmu[0] = -1; for (int i = 0; i < limit; i++) pmu[i] = add_pmu(pmu[0], &siblings[i]); pmu[limit] = add_pmu(pmu[0], &master_engines[bond]); igt_assert(siblings[bond].engine_class != master_engines[bond].engine_class); plug = NULL; if (flags & CORK) { plug = __igt_spin_new(i915, .ctx = master, .engine = bond, .dependency = igt_cork_plug(&cork, i915)); } spin = __igt_spin_new(i915, .ctx = master, .engine = bond, .flags = IGT_SPIN_FENCE_OUT); eb = spin->execbuf; eb.rsvd1 = ctx; eb.rsvd2 = spin->out_fence; eb.flags = I915_EXEC_FENCE_SUBMIT; gem_execbuf(i915, &eb); if (plug) { igt_cork_unplug(&cork); igt_spin_free(i915, plug); } measure_all_load(pmu[0], v, limit + 1, 10000); igt_spin_free(i915, spin); igt_assert_f(v[bond] > 0.90, "engine %d (class:instance %s:%d) was found to be only %.1f%% busy\n", bond, class_to_str(siblings[bond].engine_class), siblings[bond].engine_instance, 100 * v[bond]); for (int other = 0; other < limit; other++) { if (other == bond) continue; igt_assert_f(v[other] == 0, "engine %d (class:instance %s:%d) was not idle, and actually %.1f%% busy\n", other, class_to_str(siblings[other].engine_class), siblings[other].engine_instance, 100 * v[other]); } igt_assert_f(v[limit] > 0.90, "master (class:instance %s:%d) was found to be only %.1f%% busy\n", class_to_str(master_engines[bond].engine_class), master_engines[bond].engine_instance, 100 * v[limit]); close(pmu[0]); } free(order); gem_context_destroy(i915, ctx); free(master_engines); free(siblings); } gem_context_destroy(i915, master); } static void indices(int i915) { I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1); struct drm_i915_gem_context_param p = { .ctx_id = gem_context_create(i915), .param = I915_CONTEXT_PARAM_ENGINES, .value = to_user_pointer(&engines) }; struct drm_i915_gem_exec_object2 batch = { .handle = batch_create(i915), }; unsigned int nengines = 0; void *balancers = NULL; /* * We can populate our engine map with multiple virtual engines. * Do so. */ for (int class = 0; class < 32; class++) { struct i915_engine_class_instance *ci; unsigned int count; ci = list_engines(i915, 1u << class, &count); if (!ci) continue; for (int n = 0; n < count; n++) { struct i915_context_engines_load_balance *balancer; engines.engines[nengines].engine_class = I915_ENGINE_CLASS_INVALID; engines.engines[nengines].engine_instance = I915_ENGINE_CLASS_INVALID_NONE; balancer = calloc(sizeof_load_balance(count), 1); igt_assert(balancer); balancer->base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE; balancer->base.next_extension = to_user_pointer(balancers); balancers = balancer; balancer->engine_index = nengines++; balancer->num_siblings = count; memcpy(balancer->engines, ci, count * sizeof(*ci)); } free(ci); } igt_require(balancers); engines.extensions = to_user_pointer(balancers); p.size = (sizeof(struct i915_engine_class_instance) * nengines + sizeof(struct i915_context_param_engines)); gem_context_set_param(i915, &p); for (unsigned int n = 0; n < nengines; n++) { struct drm_i915_gem_execbuffer2 eb = { .buffers_ptr = to_user_pointer(&batch), .buffer_count = 1, .flags = n, .rsvd1 = p.ctx_id, }; igt_debug("Executing on index=%d\n", n); gem_execbuf(i915, &eb); } gem_context_destroy(i915, p.ctx_id); gem_sync(i915, batch.handle); gem_close(i915, batch.handle); while (balancers) { struct i915_context_engines_load_balance *b, *n; b = balancers; n = from_user_pointer(b->base.next_extension); free(b); balancers = n; } gem_quiescent_gpu(i915); } static void busy(int i915) { uint32_t scratch = gem_create(i915, 4096); /* * Check that virtual engines are reported via GEM_BUSY. * * When running, the batch will be on the real engine and report * the actual class. * * Prior to running, if the load-balancer is across multiple * classes we don't know which engine the batch will * execute on, so we report them all! * * However, as we only support (and test) creating a load-balancer * from engines of only one class, that can be propagated accurately * through to GEM_BUSY. */ for (int class = 0; class < 16; class++) { struct drm_i915_gem_busy busy; struct i915_engine_class_instance *ci; unsigned int count; igt_spin_t *spin[2]; uint32_t ctx; ci = list_engines(i915, 1u << class, &count); if (!ci) continue; ctx = load_balancer_create(i915, ci, count); free(ci); spin[0] = __igt_spin_new(i915, .ctx = ctx, .flags = IGT_SPIN_POLL_RUN); spin[1] = __igt_spin_new(i915, .ctx = ctx, .dependency = scratch); igt_spin_busywait_until_started(spin[0]); /* Running: actual class */ busy.handle = spin[0]->handle; do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy); igt_assert_eq_u32(busy.busy, 1u << (class + 16)); /* Queued(read): expected class */ busy.handle = spin[1]->handle; do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy); igt_assert_eq_u32(busy.busy, 1u << (class + 16)); /* Queued(write): expected class */ busy.handle = scratch; do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy); igt_assert_eq_u32(busy.busy, (1u << (class + 16)) | (class + 1)); igt_spin_free(i915, spin[1]); igt_spin_free(i915, spin[0]); gem_context_destroy(i915, ctx); } gem_close(i915, scratch); gem_quiescent_gpu(i915); } static void full(int i915, unsigned int flags) #define PULSE 0x1 #define LATE 0x2 { struct drm_i915_gem_exec_object2 batch = { .handle = batch_create(i915), }; if (flags & LATE) igt_require_sw_sync(); /* * I915_CONTEXT_PARAM_ENGINE changes the meaning of engine selector in * execbuf to utilize our own map, into which we replace I915_EXEC_DEFAULT * to provide an automatic selection from the other ctx->engine[]. It * employs load-balancing to evenly distribute the workload the * array. If we submit N spinners, we expect them to be simultaneously * running across N engines and use PMU to confirm that the entire * set of engines are busy. * * We complicate matters by interspersing short-lived tasks to * challenge the kernel to search for space in which to insert new * batches. */ for (int class = 0; class < 32; class++) { struct i915_engine_class_instance *ci; igt_spin_t *spin = NULL; IGT_CORK_FENCE(cork); unsigned int count; double load; int fence = -1; int *pmu; ci = list_engines(i915, 1u << class, &count); if (!ci) continue; pmu = malloc(sizeof(*pmu) * count); igt_assert(pmu); if (flags & LATE) fence = igt_cork_plug(&cork, i915); pmu[0] = -1; for (unsigned int n = 0; n < count; n++) { uint32_t ctx; pmu[n] = add_pmu(pmu[0], &ci[n]); if (flags & PULSE) { struct drm_i915_gem_execbuffer2 eb = { .buffers_ptr = to_user_pointer(&batch), .buffer_count = 1, .rsvd2 = fence, .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0, }; gem_execbuf(i915, &eb); } /* * Each spinner needs to be one a new timeline, * otherwise they will just sit in the single queue * and not run concurrently. */ ctx = load_balancer_create(i915, ci, count); if (spin == NULL) { spin = __igt_spin_new(i915, .ctx = ctx); } else { struct drm_i915_gem_execbuffer2 eb = { .buffers_ptr = spin->execbuf.buffers_ptr, .buffer_count = spin->execbuf.buffer_count, .rsvd1 = ctx, .rsvd2 = fence, .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0, }; gem_execbuf(i915, &eb); } gem_context_destroy(i915, ctx); } if (flags & LATE) { igt_cork_unplug(&cork); close(fence); } load = measure_min_load(pmu[0], count, 10000); igt_spin_free(i915, spin); close(pmu[0]); free(pmu); free(ci); igt_assert_f(load > 0.90, "minimum load for %d x class:%d was found to be only %.1f%% busy\n", count, class, load*100); gem_quiescent_gpu(i915); } gem_close(i915, batch.handle); gem_quiescent_gpu(i915); } static void nop(int i915) { struct drm_i915_gem_exec_object2 batch = { .handle = batch_create(i915), }; for (int class = 0; class < 32; class++) { struct i915_engine_class_instance *ci; unsigned int count; uint32_t ctx; ci = list_engines(i915, 1u << class, &count); if (!ci) continue; ctx = load_balancer_create(i915, ci, count); for (int n = 0; n < count; n++) { struct drm_i915_gem_execbuffer2 execbuf = { .buffers_ptr = to_user_pointer(&batch), .buffer_count = 1, .flags = n + 1, .rsvd1 = ctx, }; struct timespec tv = {}; unsigned long nops; double t; igt_nsec_elapsed(&tv); nops = 0; do { for (int r = 0; r < 1024; r++) gem_execbuf(i915, &execbuf); nops += 1024; } while (igt_seconds_elapsed(&tv) < 2); gem_sync(i915, batch.handle); t = igt_nsec_elapsed(&tv) * 1e-3 / nops; igt_info("%s:%d %.3fus\n", class_to_str(class), n, t); } { struct drm_i915_gem_execbuffer2 execbuf = { .buffers_ptr = to_user_pointer(&batch), .buffer_count = 1, .rsvd1 = ctx, }; struct timespec tv = {}; unsigned long nops; double t; igt_nsec_elapsed(&tv); nops = 0; do { for (int r = 0; r < 1024; r++) gem_execbuf(i915, &execbuf); nops += 1024; } while (igt_seconds_elapsed(&tv) < 2); gem_sync(i915, batch.handle); t = igt_nsec_elapsed(&tv) * 1e-3 / nops; igt_info("%s:* %.3fus\n", class_to_str(class), t); } igt_fork(child, count) { struct drm_i915_gem_execbuffer2 execbuf = { .buffers_ptr = to_user_pointer(&batch), .buffer_count = 1, .flags = child + 1, .rsvd1 = gem_context_clone(i915, ctx, I915_CONTEXT_CLONE_ENGINES, 0), }; struct timespec tv = {}; unsigned long nops; double t; igt_nsec_elapsed(&tv); nops = 0; do { for (int r = 0; r < 1024; r++) gem_execbuf(i915, &execbuf); nops += 1024; } while (igt_seconds_elapsed(&tv) < 2); gem_sync(i915, batch.handle); t = igt_nsec_elapsed(&tv) * 1e-3 / nops; igt_info("[%d] %s:%d %.3fus\n", child, class_to_str(class), child, t); memset(&tv, 0, sizeof(tv)); execbuf.flags = 0; igt_nsec_elapsed(&tv); nops = 0; do { for (int r = 0; r < 1024; r++) gem_execbuf(i915, &execbuf); nops += 1024; } while (igt_seconds_elapsed(&tv) < 2); gem_sync(i915, batch.handle); t = igt_nsec_elapsed(&tv) * 1e-3 / nops; igt_info("[%d] %s:* %.3fus\n", child, class_to_str(class), t); gem_context_destroy(i915, execbuf.rsvd1); } igt_waitchildren(); gem_context_destroy(i915, ctx); free(ci); } gem_close(i915, batch.handle); gem_quiescent_gpu(i915); } static void ping(int i915, uint32_t ctx, unsigned int engine) { struct drm_i915_gem_exec_object2 obj = { .handle = batch_create(i915), }; struct drm_i915_gem_execbuffer2 execbuf = { .buffers_ptr = to_user_pointer(&obj), .buffer_count = 1, .flags = engine, .rsvd1 = ctx, }; gem_execbuf(i915, &execbuf); gem_sync(i915, obj.handle); gem_close(i915, obj.handle); } static void semaphore(int i915) { uint32_t block[2], scratch; igt_spin_t *spin[3]; /* * If we are using HW semaphores to launch serialised requests * on different engine concurrently, we want to verify that real * work is unimpeded. */ igt_require(gem_scheduler_has_preemption(i915)); block[0] = gem_context_create(i915); block[1] = gem_context_create(i915); scratch = gem_create(i915, 4096); spin[2] = igt_spin_new(i915, .dependency = scratch); for (int class = 1; class < 32; class++) { struct i915_engine_class_instance *ci; unsigned int count; uint32_t vip; ci = list_engines(i915, 1u << class, &count); if (!ci) continue; if (count < ARRAY_SIZE(block)) continue; /* Ensure that we completely occupy all engines in this group */ count = ARRAY_SIZE(block); for (int i = 0; i < count; i++) { set_load_balancer(i915, block[i], ci, count, NULL); spin[i] = __igt_spin_new(i915, .ctx = block[i], .dependency = scratch); } /* * Either we haven't blocked both engines with semaphores, * or we let the vip through. If not, we hang. */ vip = gem_context_create(i915); set_load_balancer(i915, vip, ci, count, NULL); ping(i915, vip, 0); gem_context_destroy(i915, vip); for (int i = 0; i < count; i++) igt_spin_free(i915, spin[i]); free(ci); } igt_spin_free(i915, spin[2]); gem_close(i915, scratch); gem_context_destroy(i915, block[1]); gem_context_destroy(i915, block[0]); gem_quiescent_gpu(i915); } static void smoketest(int i915, int timeout) { struct drm_i915_gem_exec_object2 batch[2] = { { .handle = __batch_create(i915, 16380) } }; unsigned int ncontext = 0; uint32_t *contexts = NULL; uint32_t *handles = NULL; igt_require_sw_sync(); for (int class = 0; class < 32; class++) { struct i915_engine_class_instance *ci; unsigned int count = 0; ci = list_engines(i915, 1u << class, &count); if (!ci || count < 2) { free(ci); continue; } ncontext += 128; contexts = realloc(contexts, sizeof(*contexts) * ncontext); igt_assert(contexts); for (unsigned int n = ncontext - 128; n < ncontext; n++) { contexts[n] = load_balancer_create(i915, ci, count); igt_assert(contexts[n]); } free(ci); } igt_debug("Created %d virtual engines (one per context)\n", ncontext); igt_require(ncontext); contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4); igt_assert(contexts); memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts)); ncontext *= 2; memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts)); ncontext *= 2; handles = malloc(sizeof(*handles) * ncontext); igt_assert(handles); for (unsigned int n = 0; n < ncontext; n++) handles[n] = gem_create(i915, 4096); igt_until_timeout(timeout) { unsigned int count = 1 + (rand() % (ncontext - 1)); IGT_CORK_FENCE(cork); int fence = igt_cork_plug(&cork, i915); for (unsigned int n = 0; n < count; n++) { struct drm_i915_gem_execbuffer2 eb = { .buffers_ptr = to_user_pointer(batch), .buffer_count = ARRAY_SIZE(batch), .rsvd1 = contexts[n], .rsvd2 = fence, .flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN, }; batch[1].handle = handles[n]; gem_execbuf(i915, &eb); } igt_permute_array(handles, count, igt_exchange_int); igt_cork_unplug(&cork); for (unsigned int n = 0; n < count; n++) gem_sync(i915, handles[n]); close(fence); } for (unsigned int n = 0; n < ncontext; n++) { gem_close(i915, handles[n]); __gem_context_destroy(i915, contexts[n]); } free(handles); free(contexts); gem_close(i915, batch[0].handle); } static bool has_context_engines(int i915) { struct drm_i915_gem_context_param p = { .param = I915_CONTEXT_PARAM_ENGINES, }; return __gem_context_set_param(i915, &p) == 0; } static bool has_load_balancer(int i915) { struct i915_engine_class_instance ci = {}; uint32_t ctx; int err; ctx = gem_context_create(i915); err = __set_load_balancer(i915, ctx, &ci, 1, NULL); gem_context_destroy(i915, ctx); return err == 0; } igt_main { int i915 = -1; igt_skip_on_simulation(); igt_fixture { i915 = drm_open_driver(DRIVER_INTEL); igt_require_gem(i915); gem_require_contexts(i915); igt_require(has_context_engines(i915)); igt_require(has_load_balancer(i915)); igt_fork_hang_detector(i915); } igt_subtest("invalid-balancer") invalid_balancer(i915); igt_subtest("invalid-bonds") invalid_bonds(i915); igt_subtest("individual") individual(i915); igt_subtest("indices") indices(i915); igt_subtest("busy") busy(i915); igt_subtest_group { static const struct { const char *name; unsigned int flags; } phases[] = { { "", 0 }, { "-pulse", PULSE }, { "-late", LATE }, { "-late-pulse", PULSE | LATE }, { } }; for (typeof(*phases) *p = phases; p->name; p++) igt_subtest_f("full%s", p->name) full(i915, p->flags); } igt_subtest("nop") nop(i915); igt_subtest("semaphore") semaphore(i915); igt_subtest("smoke") smoketest(i915, 20); igt_subtest("bonded-imm") bonded(i915, 0); igt_subtest("bonded-cork") bonded(i915, CORK); igt_fixture { igt_stop_hang_detector(); } }