1 /*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2017-2018 Intel Corporation
5 */
6
7 #include "../i915_selftest.h"
8 #include "i915_random.h"
9
10 #include "mock_gem_device.h"
11 #include "mock_timeline.h"
12
13 struct __igt_sync {
14 const char *name;
15 u32 seqno;
16 bool expected;
17 bool set;
18 };
19
__igt_sync(struct i915_timeline * tl,u64 ctx,const struct __igt_sync * p,const char * name)20 static int __igt_sync(struct i915_timeline *tl,
21 u64 ctx,
22 const struct __igt_sync *p,
23 const char *name)
24 {
25 int ret;
26
27 if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
28 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
29 name, p->name, ctx, p->seqno, yesno(p->expected));
30 return -EINVAL;
31 }
32
33 if (p->set) {
34 ret = __i915_timeline_sync_set(tl, ctx, p->seqno);
35 if (ret)
36 return ret;
37 }
38
39 return 0;
40 }
41
igt_sync(void * arg)42 static int igt_sync(void *arg)
43 {
44 const struct __igt_sync pass[] = {
45 { "unset", 0, false, false },
46 { "new", 0, false, true },
47 { "0a", 0, true, true },
48 { "1a", 1, false, true },
49 { "1b", 1, true, true },
50 { "0b", 0, true, false },
51 { "2a", 2, false, true },
52 { "4", 4, false, true },
53 { "INT_MAX", INT_MAX, false, true },
54 { "INT_MAX-1", INT_MAX-1, true, false },
55 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
56 { "INT_MAX", INT_MAX, true, false },
57 { "UINT_MAX", UINT_MAX, false, true },
58 { "wrap", 0, false, true },
59 { "unwrap", UINT_MAX, true, false },
60 {},
61 }, *p;
62 struct i915_timeline tl;
63 int order, offset;
64 int ret = -ENODEV;
65
66 mock_timeline_init(&tl, 0);
67 for (p = pass; p->name; p++) {
68 for (order = 1; order < 64; order++) {
69 for (offset = -1; offset <= (order > 1); offset++) {
70 u64 ctx = BIT_ULL(order) + offset;
71
72 ret = __igt_sync(&tl, ctx, p, "1");
73 if (ret)
74 goto out;
75 }
76 }
77 }
78 mock_timeline_fini(&tl);
79
80 mock_timeline_init(&tl, 0);
81 for (order = 1; order < 64; order++) {
82 for (offset = -1; offset <= (order > 1); offset++) {
83 u64 ctx = BIT_ULL(order) + offset;
84
85 for (p = pass; p->name; p++) {
86 ret = __igt_sync(&tl, ctx, p, "2");
87 if (ret)
88 goto out;
89 }
90 }
91 }
92
93 out:
94 mock_timeline_fini(&tl);
95 return ret;
96 }
97
random_engine(struct rnd_state * rnd)98 static unsigned int random_engine(struct rnd_state *rnd)
99 {
100 return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
101 }
102
bench_sync(void * arg)103 static int bench_sync(void *arg)
104 {
105 struct rnd_state prng;
106 struct i915_timeline tl;
107 unsigned long end_time, count;
108 u64 prng32_1M;
109 ktime_t kt;
110 int order, last_order;
111
112 mock_timeline_init(&tl, 0);
113
114 /* Lookups from cache are very fast and so the random number generation
115 * and the loop itself becomes a significant factor in the per-iteration
116 * timings. We try to compensate the results by measuring the overhead
117 * of the prng and subtract it from the reported results.
118 */
119 prandom_seed_state(&prng, i915_selftest.random_seed);
120 count = 0;
121 kt = ktime_get();
122 end_time = jiffies + HZ/10;
123 do {
124 u32 x;
125
126 /* Make sure the compiler doesn't optimise away the prng call */
127 WRITE_ONCE(x, prandom_u32_state(&prng));
128
129 count++;
130 } while (!time_after(jiffies, end_time));
131 kt = ktime_sub(ktime_get(), kt);
132 pr_debug("%s: %lu random evaluations, %lluns/prng\n",
133 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
134 prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
135
136 /* Benchmark (only) setting random context ids */
137 prandom_seed_state(&prng, i915_selftest.random_seed);
138 count = 0;
139 kt = ktime_get();
140 end_time = jiffies + HZ/10;
141 do {
142 u64 id = i915_prandom_u64_state(&prng);
143
144 __i915_timeline_sync_set(&tl, id, 0);
145 count++;
146 } while (!time_after(jiffies, end_time));
147 kt = ktime_sub(ktime_get(), kt);
148 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
149 pr_info("%s: %lu random insertions, %lluns/insert\n",
150 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
151
152 /* Benchmark looking up the exact same context ids as we just set */
153 prandom_seed_state(&prng, i915_selftest.random_seed);
154 end_time = count;
155 kt = ktime_get();
156 while (end_time--) {
157 u64 id = i915_prandom_u64_state(&prng);
158
159 if (!__i915_timeline_sync_is_later(&tl, id, 0)) {
160 mock_timeline_fini(&tl);
161 pr_err("Lookup of %llu failed\n", id);
162 return -EINVAL;
163 }
164 }
165 kt = ktime_sub(ktime_get(), kt);
166 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
167 pr_info("%s: %lu random lookups, %lluns/lookup\n",
168 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
169
170 mock_timeline_fini(&tl);
171 cond_resched();
172
173 mock_timeline_init(&tl, 0);
174
175 /* Benchmark setting the first N (in order) contexts */
176 count = 0;
177 kt = ktime_get();
178 end_time = jiffies + HZ/10;
179 do {
180 __i915_timeline_sync_set(&tl, count++, 0);
181 } while (!time_after(jiffies, end_time));
182 kt = ktime_sub(ktime_get(), kt);
183 pr_info("%s: %lu in-order insertions, %lluns/insert\n",
184 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
185
186 /* Benchmark looking up the exact same context ids as we just set */
187 end_time = count;
188 kt = ktime_get();
189 while (end_time--) {
190 if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) {
191 pr_err("Lookup of %lu failed\n", end_time);
192 mock_timeline_fini(&tl);
193 return -EINVAL;
194 }
195 }
196 kt = ktime_sub(ktime_get(), kt);
197 pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
198 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
199
200 mock_timeline_fini(&tl);
201 cond_resched();
202
203 mock_timeline_init(&tl, 0);
204
205 /* Benchmark searching for a random context id and maybe changing it */
206 prandom_seed_state(&prng, i915_selftest.random_seed);
207 count = 0;
208 kt = ktime_get();
209 end_time = jiffies + HZ/10;
210 do {
211 u32 id = random_engine(&prng);
212 u32 seqno = prandom_u32_state(&prng);
213
214 if (!__i915_timeline_sync_is_later(&tl, id, seqno))
215 __i915_timeline_sync_set(&tl, id, seqno);
216
217 count++;
218 } while (!time_after(jiffies, end_time));
219 kt = ktime_sub(ktime_get(), kt);
220 kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
221 pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
222 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
223 mock_timeline_fini(&tl);
224 cond_resched();
225
226 /* Benchmark searching for a known context id and changing the seqno */
227 for (last_order = 1, order = 1; order < 32;
228 ({ int tmp = last_order; last_order = order; order += tmp; })) {
229 unsigned int mask = BIT(order) - 1;
230
231 mock_timeline_init(&tl, 0);
232
233 count = 0;
234 kt = ktime_get();
235 end_time = jiffies + HZ/10;
236 do {
237 /* Without assuming too many details of the underlying
238 * implementation, try to identify its phase-changes
239 * (if any)!
240 */
241 u64 id = (u64)(count & mask) << order;
242
243 __i915_timeline_sync_is_later(&tl, id, 0);
244 __i915_timeline_sync_set(&tl, id, 0);
245
246 count++;
247 } while (!time_after(jiffies, end_time));
248 kt = ktime_sub(ktime_get(), kt);
249 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
250 __func__, count, order,
251 (long long)div64_ul(ktime_to_ns(kt), count));
252 mock_timeline_fini(&tl);
253 cond_resched();
254 }
255
256 return 0;
257 }
258
i915_gem_timeline_mock_selftests(void)259 int i915_gem_timeline_mock_selftests(void)
260 {
261 static const struct i915_subtest tests[] = {
262 SUBTEST(igt_sync),
263 SUBTEST(bench_sync),
264 };
265
266 return i915_subtests(tests, NULL);
267 }
268