• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "igt.h"
26 #include <unistd.h>
27 #include <stdlib.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <fcntl.h>
32 #include <ftw.h>
33 #include <inttypes.h>
34 #include <pthread.h>
35 #include <sched.h>
36 #include <signal.h>
37 #include <errno.h>
38 #include <sys/stat.h>
39 #include <sys/ioctl.h>
40 #include <sys/time.h>
41 #include <time.h>
42 #include <limits.h>
43 #include "drm.h"
44 
45 #include <linux/unistd.h>
46 
47 #define sigev_notify_thread_id _sigev_un._tid
48 
49 static volatile int done;
50 
51 struct gem_busyspin {
52 	pthread_t thread;
53 	unsigned long sz;
54 	unsigned long count;
55 	bool leak;
56 	bool interrupts;
57 };
58 
59 struct sys_wait {
60 	pthread_t thread;
61 	struct igt_mean mean;
62 };
63 
force_low_latency(void)64 static void force_low_latency(void)
65 {
66 	int32_t target = 0;
67 	int fd = open("/dev/cpu_dma_latency", O_RDWR);
68 	if (fd < 0 || write(fd, &target, sizeof(target)) < 0)
69 		fprintf(stderr,
70 			"Unable to prevent CPU sleeps and force low latency using /dev/cpu_dma_latency: %s\n",
71 			strerror(errno));
72 }
73 
74 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
75 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
76 
77 #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
78 #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
79 
80 #define ENGINE_FLAGS  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
81 
ignore_engine(int fd,unsigned engine)82 static bool ignore_engine(int fd, unsigned engine)
83 {
84 	if (engine == 0)
85 		return true;
86 
87 	if (gem_has_bsd2(fd) && engine == I915_EXEC_BSD)
88 		return true;
89 
90 	return false;
91 }
92 
gem_busyspin(void * arg)93 static void *gem_busyspin(void *arg)
94 {
95 	const uint32_t bbe = MI_BATCH_BUFFER_END;
96 	struct gem_busyspin *bs = arg;
97 	struct drm_i915_gem_execbuffer2 execbuf;
98 	struct drm_i915_gem_exec_object2 obj[2];
99 	const unsigned sz =
100 		bs->sz ? bs->sz + sizeof(bbe) : bs->leak ? 16 << 20 : 4 << 10;
101 	unsigned engines[16];
102 	unsigned nengine;
103 	unsigned engine;
104 	int fd;
105 
106 	fd = drm_open_driver(DRIVER_INTEL);
107 
108 	nengine = 0;
109 	for_each_engine(fd, engine)
110 		if (!ignore_engine(fd, engine)) engines[nengine++] = engine;
111 
112 	memset(obj, 0, sizeof(obj));
113 	obj[0].handle = gem_create(fd, 4096);
114 	obj[0].flags = EXEC_OBJECT_WRITE;
115 	obj[1].handle = gem_create(fd, sz);
116 	gem_write(fd, obj[1].handle, bs->sz, &bbe, sizeof(bbe));
117 
118 	memset(&execbuf, 0, sizeof(execbuf));
119 	if (bs->interrupts) {
120 		execbuf.buffers_ptr = (uintptr_t)&obj[0];
121 		execbuf.buffer_count = 2;
122 	} else {
123 		execbuf.buffers_ptr = (uintptr_t)&obj[1];
124 		execbuf.buffer_count = 1;
125 	}
126 	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
127 	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
128 	if (__gem_execbuf(fd, &execbuf)) {
129 		execbuf.flags = 0;
130 		gem_execbuf(fd, &execbuf);
131 	}
132 
133 	while (!done) {
134 		for (int n = 0; n < nengine; n++) {
135 			const int m = rand() % nengine;
136 			unsigned int tmp = engines[n];
137 			engines[n] = engines[m];
138 			engines[m] = tmp;
139 		}
140 		for (int n = 0; n < nengine; n++) {
141 			execbuf.flags &= ~ENGINE_FLAGS;
142 			execbuf.flags |= engines[n];
143 			gem_execbuf(fd, &execbuf);
144 		}
145 		bs->count += nengine;
146 		if (bs->leak) {
147 			gem_madvise(fd, obj[1].handle, I915_MADV_DONTNEED);
148 			obj[1].handle = gem_create(fd, sz);
149 			gem_write(fd, obj[1].handle, bs->sz, &bbe, sizeof(bbe));
150 		}
151 	}
152 
153 	close(fd);
154 	return NULL;
155 }
156 
elapsed(const struct timespec * a,const struct timespec * b)157 static double elapsed(const struct timespec *a, const struct timespec *b)
158 {
159 	return 1e9*(b->tv_sec - a->tv_sec) + (b->tv_nsec - a ->tv_nsec);
160 }
161 
sys_wait(void * arg)162 static void *sys_wait(void *arg)
163 {
164 	struct sys_wait *w = arg;
165 	struct sigevent sev;
166 	timer_t timer;
167 	sigset_t mask;
168 	struct timespec now;
169 #define SIG SIGRTMIN
170 
171 	sigemptyset(&mask);
172 	sigaddset(&mask, SIG);
173 	sigprocmask(SIG_SETMASK, &mask, NULL);
174 
175 	sev.sigev_notify = SIGEV_SIGNAL | SIGEV_THREAD_ID;
176 	sev.sigev_notify_thread_id = gettid();
177 	sev.sigev_signo = SIG;
178 	timer_create(CLOCK_MONOTONIC, &sev, &timer);
179 
180 	clock_gettime(CLOCK_MONOTONIC, &now);
181 	while (!done) {
182 		struct itimerspec its;
183 		int sigs;
184 
185 		its.it_value = now;
186 		its.it_value.tv_nsec += 100 * 1000;
187 		its.it_value.tv_nsec += rand() % (NSEC_PER_SEC / 1000);
188 		if (its.it_value.tv_nsec >= NSEC_PER_SEC) {
189 			its.it_value.tv_nsec -= NSEC_PER_SEC;
190 			its.it_value.tv_sec += 1;
191 		}
192 		its.it_interval.tv_sec = its.it_interval.tv_nsec = 0;
193 		timer_settime(timer, TIMER_ABSTIME, &its, NULL);
194 
195 		sigwait(&mask, &sigs);
196 		clock_gettime(CLOCK_MONOTONIC, &now);
197 		igt_mean_add(&w->mean, elapsed(&its.it_value, &now));
198 	}
199 
200 	sigprocmask(SIG_UNBLOCK, &mask, NULL);
201 	timer_delete(timer);
202 
203 	return NULL;
204 }
205 
206 #define PAGE_SIZE 4096
sys_thp_alloc(void * arg)207 static void *sys_thp_alloc(void *arg)
208 {
209 	struct sys_wait *w = arg;
210 	struct timespec now;
211 
212 	clock_gettime(CLOCK_MONOTONIC, &now);
213 	while (!done) {
214 		const size_t sz = 2 << 20;
215 		const struct timespec start = now;
216 		void *ptr;
217 
218 		ptr = mmap(NULL, sz,
219 			   PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
220 			   -1, 0);
221 		assert(ptr != MAP_FAILED);
222 		madvise(ptr, sz, MADV_HUGEPAGE);
223 		for (size_t page = 0; page < sz; page += PAGE_SIZE)
224 			*(volatile uint32_t *)((unsigned char *)ptr + page) = 0;
225 		munmap(ptr, sz);
226 
227 		clock_gettime(CLOCK_MONOTONIC, &now);
228 		igt_mean_add(&w->mean, elapsed(&start, &now));
229 	}
230 
231 	return NULL;
232 }
233 
bind_cpu(pthread_attr_t * attr,int cpu)234 static void bind_cpu(pthread_attr_t *attr, int cpu)
235 {
236 #ifdef __USE_GNU
237 	cpu_set_t mask;
238 
239 	if (cpu == -1)
240 		return;
241 
242 	CPU_ZERO(&mask);
243 	CPU_SET(cpu, &mask);
244 
245 	pthread_attr_setaffinity_np(attr, sizeof(mask), &mask);
246 #endif
247 }
248 
rtprio(pthread_attr_t * attr,int prio)249 static void rtprio(pthread_attr_t *attr, int prio)
250 {
251 #ifdef PTHREAD_EXPLICIT_SCHED
252 	struct sched_param param = { .sched_priority = 99 };
253 	pthread_attr_setinheritsched(attr, PTHREAD_EXPLICIT_SCHED);
254 	pthread_attr_setschedpolicy(attr, SCHED_FIFO);
255 	pthread_attr_setschedparam(attr, &param);
256 #endif
257 }
258 
l_estimate(igt_stats_t * stats)259 static double l_estimate(igt_stats_t *stats)
260 {
261 	if (stats->n_values > 9)
262 		return igt_stats_get_trimean(stats);
263 	else if (stats->n_values > 5)
264 		return igt_stats_get_median(stats);
265 	else
266 		return igt_stats_get_mean(stats);
267 }
268 
min_measurement_error(void)269 static double min_measurement_error(void)
270 {
271 	struct timespec start, end;
272 	int n;
273 
274 	clock_gettime(CLOCK_MONOTONIC, &start);
275 	for (n = 0; n < 1024; n++)
276 		clock_gettime(CLOCK_MONOTONIC, &end);
277 
278 	return elapsed(&start, &end) / n;
279 }
280 
print_entry(const char * filepath,const struct stat * info,const int typeflag,struct FTW * pathinfo)281 static int print_entry(const char *filepath, const struct stat *info,
282 		       const int typeflag, struct FTW *pathinfo)
283 {
284 	int fd;
285 
286 	fd = open(filepath, O_RDONLY);
287 	if (fd != -1)  {
288 		void *ptr;
289 
290 		ptr = mmap(NULL, info->st_size,
291 			   PROT_READ, MAP_SHARED | MAP_POPULATE,
292 			   fd, 0);
293 		if (ptr != MAP_FAILED)
294 			munmap(ptr, info->st_size);
295 
296 		close(fd);
297 	}
298 
299 	return 0;
300 }
301 
background_fs(void * path)302 static void *background_fs(void *path)
303 {
304 	while (1)
305 		nftw(path, print_entry, 20, FTW_PHYS | FTW_MOUNT);
306 	return NULL;
307 }
308 
calibrate_nop(unsigned int target_us,unsigned int tolerance_pct)309 static unsigned long calibrate_nop(unsigned int target_us,
310 				   unsigned int tolerance_pct)
311 {
312 	const uint32_t bbe = MI_BATCH_BUFFER_END;
313 	const unsigned int loops = 100;
314 	struct drm_i915_gem_exec_object2 obj = {};
315 	struct drm_i915_gem_execbuffer2 eb =
316 		{ .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
317 	struct timespec t_0, t_end;
318 	long sz, prev;
319 	int fd;
320 
321 	fd = drm_open_driver(DRIVER_INTEL);
322 
323 	clock_gettime(CLOCK_MONOTONIC, &t_0);
324 
325 	sz = 256 * 1024;
326 	do {
327 		struct timespec t_start;
328 
329 		obj.handle = gem_create(fd, sz + sizeof(bbe));
330 		gem_write(fd, obj.handle, sz, &bbe, sizeof(bbe));
331 		gem_execbuf(fd, &eb);
332 		gem_sync(fd, obj.handle);
333 
334 		clock_gettime(CLOCK_MONOTONIC, &t_start);
335 		for (int loop = 0; loop < loops; loop++)
336 			gem_execbuf(fd, &eb);
337 		gem_sync(fd, obj.handle);
338 		clock_gettime(CLOCK_MONOTONIC, &t_end);
339 
340 		gem_close(fd, obj.handle);
341 
342 		prev = sz;
343 		sz = loops * sz / elapsed(&t_start, &t_end) * 1e3 * target_us;
344 		sz = ALIGN(sz, sizeof(uint32_t));
345 	} while (elapsed(&t_0, &t_end) < 5 ||
346 		 abs(sz - prev) > (sz * tolerance_pct / 100));
347 
348 	close(fd);
349 
350 	return sz;
351 }
352 
main(int argc,char ** argv)353 int main(int argc, char **argv)
354 {
355 	struct gem_busyspin *busy;
356 	struct sys_wait *wait;
357 	void *sys_fn = sys_wait;
358 	pthread_attr_t attr;
359 	pthread_t bg_fs = 0;
360 	int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
361 	igt_stats_t cycles, mean, max;
362 	double min;
363 	int time = 10;
364 	int field = -1;
365 	int enable_gem_sysbusy = 1;
366 	bool leak = false;
367 	bool interrupts = false;
368 	long batch = 0;
369 	int n, c;
370 
371 	while ((c = getopt(argc, argv, "r:t:f:bmni1")) != -1) {
372 		switch (c) {
373 		case '1':
374 			ncpus = 1;
375 			break;
376 		case 'n': /* dry run, measure baseline system latency */
377 			enable_gem_sysbusy = 0;
378 			break;
379 		case 'i': /* interrupts ahoy! */
380 			interrupts = true;
381 			break;
382 		case 't':
383 			/* How long to run the benchmark for (seconds) */
384 			time = atoi(optarg);
385 			if (time < 0)
386 				time = INT_MAX;
387 			break;
388 		case 'r':
389 			/* Duration of each batch (microseconds) */
390 			batch = atoi(optarg);
391 			break;
392 		case 'f':
393 			/* Select an output field */
394 			field = atoi(optarg);
395 			break;
396 		case 'b':
397 			pthread_create(&bg_fs, NULL,
398 				       background_fs, (void *)"/");
399 			sleep(5);
400 			break;
401 		case 'm':
402 			sys_fn = sys_thp_alloc;
403 			leak = true;
404 			break;
405 		default:
406 			break;
407 		}
408 	}
409 
410 	/* Prevent CPU sleeps so that busy and idle loads are consistent. */
411 	force_low_latency();
412 	min = min_measurement_error();
413 
414 	if (batch > 0)
415 		batch = calibrate_nop(batch, 2);
416 	else
417 		batch = -batch;
418 
419 	busy = calloc(ncpus, sizeof(*busy));
420 	pthread_attr_init(&attr);
421 	if (enable_gem_sysbusy) {
422 		for (n = 0; n < ncpus; n++) {
423 			bind_cpu(&attr, n);
424 			busy[n].sz = batch;
425 			busy[n].leak = leak;
426 			busy[n].interrupts = interrupts;
427 			pthread_create(&busy[n].thread, &attr,
428 				       gem_busyspin, &busy[n]);
429 		}
430 	}
431 
432 	wait = calloc(ncpus, sizeof(*wait));
433 	pthread_attr_init(&attr);
434 	rtprio(&attr, 99);
435 	for (n = 0; n < ncpus; n++) {
436 		igt_mean_init(&wait[n].mean);
437 		bind_cpu(&attr, n);
438 		pthread_create(&wait[n].thread, &attr, sys_fn, &wait[n]);
439 	}
440 
441 	sleep(time);
442 	done = 1;
443 
444 	igt_stats_init_with_size(&cycles, ncpus);
445 	if (enable_gem_sysbusy) {
446 		for (n = 0; n < ncpus; n++) {
447 			pthread_join(busy[n].thread, NULL);
448 			igt_stats_push(&cycles, busy[n].count);
449 		}
450 	}
451 
452 	igt_stats_init_with_size(&mean, ncpus);
453 	igt_stats_init_with_size(&max, ncpus);
454 	for (n = 0; n < ncpus; n++) {
455 		pthread_join(wait[n].thread, NULL);
456 		igt_stats_push_float(&mean, wait[n].mean.mean);
457 		igt_stats_push_float(&max, wait[n].mean.max);
458 	}
459 	if (bg_fs) {
460 		pthread_cancel(bg_fs);
461 		pthread_join(bg_fs, NULL);
462 	}
463 
464 	switch (field) {
465 	default:
466 		printf("gem_syslatency: cycles=%.0f, latency mean=%.3fus max=%.0fus\n",
467 		       igt_stats_get_mean(&cycles),
468 		       (igt_stats_get_mean(&mean) - min)/ 1000,
469 		       (l_estimate(&max) - min) / 1000);
470 		break;
471 	case 0:
472 		printf("%.0f\n", igt_stats_get_mean(&cycles));
473 		break;
474 	case 1:
475 		printf("%.3f\n", (igt_stats_get_mean(&mean) - min) / 1000);
476 		break;
477 	case 2:
478 		printf("%.0f\n", (l_estimate(&max) - min) / 1000);
479 		break;
480 	}
481 
482 	return 0;
483 
484 }
485