• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017-2018 Intel Corporation
5  */
6 
7 #include "../i915_selftest.h"
8 #include "i915_random.h"
9 
10 #include "mock_gem_device.h"
11 #include "mock_timeline.h"
12 
13 struct __igt_sync {
14 	const char *name;
15 	u32 seqno;
16 	bool expected;
17 	bool set;
18 };
19 
__igt_sync(struct i915_timeline * tl,u64 ctx,const struct __igt_sync * p,const char * name)20 static int __igt_sync(struct i915_timeline *tl,
21 		      u64 ctx,
22 		      const struct __igt_sync *p,
23 		      const char *name)
24 {
25 	int ret;
26 
27 	if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
28 		pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
29 		       name, p->name, ctx, p->seqno, yesno(p->expected));
30 		return -EINVAL;
31 	}
32 
33 	if (p->set) {
34 		ret = __i915_timeline_sync_set(tl, ctx, p->seqno);
35 		if (ret)
36 			return ret;
37 	}
38 
39 	return 0;
40 }
41 
igt_sync(void * arg)42 static int igt_sync(void *arg)
43 {
44 	const struct __igt_sync pass[] = {
45 		{ "unset", 0, false, false },
46 		{ "new", 0, false, true },
47 		{ "0a", 0, true, true },
48 		{ "1a", 1, false, true },
49 		{ "1b", 1, true, true },
50 		{ "0b", 0, true, false },
51 		{ "2a", 2, false, true },
52 		{ "4", 4, false, true },
53 		{ "INT_MAX", INT_MAX, false, true },
54 		{ "INT_MAX-1", INT_MAX-1, true, false },
55 		{ "INT_MAX+1", (u32)INT_MAX+1, false, true },
56 		{ "INT_MAX", INT_MAX, true, false },
57 		{ "UINT_MAX", UINT_MAX, false, true },
58 		{ "wrap", 0, false, true },
59 		{ "unwrap", UINT_MAX, true, false },
60 		{},
61 	}, *p;
62 	struct i915_timeline tl;
63 	int order, offset;
64 	int ret = -ENODEV;
65 
66 	mock_timeline_init(&tl, 0);
67 	for (p = pass; p->name; p++) {
68 		for (order = 1; order < 64; order++) {
69 			for (offset = -1; offset <= (order > 1); offset++) {
70 				u64 ctx = BIT_ULL(order) + offset;
71 
72 				ret = __igt_sync(&tl, ctx, p, "1");
73 				if (ret)
74 					goto out;
75 			}
76 		}
77 	}
78 	mock_timeline_fini(&tl);
79 
80 	mock_timeline_init(&tl, 0);
81 	for (order = 1; order < 64; order++) {
82 		for (offset = -1; offset <= (order > 1); offset++) {
83 			u64 ctx = BIT_ULL(order) + offset;
84 
85 			for (p = pass; p->name; p++) {
86 				ret = __igt_sync(&tl, ctx, p, "2");
87 				if (ret)
88 					goto out;
89 			}
90 		}
91 	}
92 
93 out:
94 	mock_timeline_fini(&tl);
95 	return ret;
96 }
97 
random_engine(struct rnd_state * rnd)98 static unsigned int random_engine(struct rnd_state *rnd)
99 {
100 	return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
101 }
102 
bench_sync(void * arg)103 static int bench_sync(void *arg)
104 {
105 	struct rnd_state prng;
106 	struct i915_timeline tl;
107 	unsigned long end_time, count;
108 	u64 prng32_1M;
109 	ktime_t kt;
110 	int order, last_order;
111 
112 	mock_timeline_init(&tl, 0);
113 
114 	/* Lookups from cache are very fast and so the random number generation
115 	 * and the loop itself becomes a significant factor in the per-iteration
116 	 * timings. We try to compensate the results by measuring the overhead
117 	 * of the prng and subtract it from the reported results.
118 	 */
119 	prandom_seed_state(&prng, i915_selftest.random_seed);
120 	count = 0;
121 	kt = ktime_get();
122 	end_time = jiffies + HZ/10;
123 	do {
124 		u32 x;
125 
126 		/* Make sure the compiler doesn't optimise away the prng call */
127 		WRITE_ONCE(x, prandom_u32_state(&prng));
128 
129 		count++;
130 	} while (!time_after(jiffies, end_time));
131 	kt = ktime_sub(ktime_get(), kt);
132 	pr_debug("%s: %lu random evaluations, %lluns/prng\n",
133 		 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
134 	prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
135 
136 	/* Benchmark (only) setting random context ids */
137 	prandom_seed_state(&prng, i915_selftest.random_seed);
138 	count = 0;
139 	kt = ktime_get();
140 	end_time = jiffies + HZ/10;
141 	do {
142 		u64 id = i915_prandom_u64_state(&prng);
143 
144 		__i915_timeline_sync_set(&tl, id, 0);
145 		count++;
146 	} while (!time_after(jiffies, end_time));
147 	kt = ktime_sub(ktime_get(), kt);
148 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
149 	pr_info("%s: %lu random insertions, %lluns/insert\n",
150 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
151 
152 	/* Benchmark looking up the exact same context ids as we just set */
153 	prandom_seed_state(&prng, i915_selftest.random_seed);
154 	end_time = count;
155 	kt = ktime_get();
156 	while (end_time--) {
157 		u64 id = i915_prandom_u64_state(&prng);
158 
159 		if (!__i915_timeline_sync_is_later(&tl, id, 0)) {
160 			mock_timeline_fini(&tl);
161 			pr_err("Lookup of %llu failed\n", id);
162 			return -EINVAL;
163 		}
164 	}
165 	kt = ktime_sub(ktime_get(), kt);
166 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
167 	pr_info("%s: %lu random lookups, %lluns/lookup\n",
168 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
169 
170 	mock_timeline_fini(&tl);
171 	cond_resched();
172 
173 	mock_timeline_init(&tl, 0);
174 
175 	/* Benchmark setting the first N (in order) contexts */
176 	count = 0;
177 	kt = ktime_get();
178 	end_time = jiffies + HZ/10;
179 	do {
180 		__i915_timeline_sync_set(&tl, count++, 0);
181 	} while (!time_after(jiffies, end_time));
182 	kt = ktime_sub(ktime_get(), kt);
183 	pr_info("%s: %lu in-order insertions, %lluns/insert\n",
184 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
185 
186 	/* Benchmark looking up the exact same context ids as we just set */
187 	end_time = count;
188 	kt = ktime_get();
189 	while (end_time--) {
190 		if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) {
191 			pr_err("Lookup of %lu failed\n", end_time);
192 			mock_timeline_fini(&tl);
193 			return -EINVAL;
194 		}
195 	}
196 	kt = ktime_sub(ktime_get(), kt);
197 	pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
198 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
199 
200 	mock_timeline_fini(&tl);
201 	cond_resched();
202 
203 	mock_timeline_init(&tl, 0);
204 
205 	/* Benchmark searching for a random context id and maybe changing it */
206 	prandom_seed_state(&prng, i915_selftest.random_seed);
207 	count = 0;
208 	kt = ktime_get();
209 	end_time = jiffies + HZ/10;
210 	do {
211 		u32 id = random_engine(&prng);
212 		u32 seqno = prandom_u32_state(&prng);
213 
214 		if (!__i915_timeline_sync_is_later(&tl, id, seqno))
215 			__i915_timeline_sync_set(&tl, id, seqno);
216 
217 		count++;
218 	} while (!time_after(jiffies, end_time));
219 	kt = ktime_sub(ktime_get(), kt);
220 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
221 	pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
222 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
223 	mock_timeline_fini(&tl);
224 	cond_resched();
225 
226 	/* Benchmark searching for a known context id and changing the seqno */
227 	for (last_order = 1, order = 1; order < 32;
228 	     ({ int tmp = last_order; last_order = order; order += tmp; })) {
229 		unsigned int mask = BIT(order) - 1;
230 
231 		mock_timeline_init(&tl, 0);
232 
233 		count = 0;
234 		kt = ktime_get();
235 		end_time = jiffies + HZ/10;
236 		do {
237 			/* Without assuming too many details of the underlying
238 			 * implementation, try to identify its phase-changes
239 			 * (if any)!
240 			 */
241 			u64 id = (u64)(count & mask) << order;
242 
243 			__i915_timeline_sync_is_later(&tl, id, 0);
244 			__i915_timeline_sync_set(&tl, id, 0);
245 
246 			count++;
247 		} while (!time_after(jiffies, end_time));
248 		kt = ktime_sub(ktime_get(), kt);
249 		pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
250 			__func__, count, order,
251 			(long long)div64_ul(ktime_to_ns(kt), count));
252 		mock_timeline_fini(&tl);
253 		cond_resched();
254 	}
255 
256 	return 0;
257 }
258 
i915_gem_timeline_mock_selftests(void)259 int i915_gem_timeline_mock_selftests(void)
260 {
261 	static const struct i915_subtest tests[] = {
262 		SUBTEST(igt_sync),
263 		SUBTEST(bench_sync),
264 	};
265 
266 	return i915_subtests(tests, NULL);
267 }
268