• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <chris@chris-wilson.co.uk>
25  *
26  */
27 
28 #include <pthread.h>
29 
30 #include "igt.h"
31 #include <unistd.h>
32 #include <stdlib.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <string.h>
36 #include <fcntl.h>
37 #include <inttypes.h>
38 #include <limits.h>
39 #include <errno.h>
40 #include <sys/stat.h>
41 #include <sys/ioctl.h>
42 #include <sys/time.h>
43 #include <sys/poll.h>
44 #include <sys/resource.h>
45 #include "drm.h"
46 
47 #define LOCAL_I915_EXEC_FENCE_IN              (1<<16)
48 #define LOCAL_I915_EXEC_FENCE_OUT             (1<<17)
49 
50 #define CONTEXT		0x1
51 #define REALTIME	0x2
52 #define CMDPARSER	0x4
53 #define FENCE_OUT	0x8
54 
55 static int done;
56 static int fd;
57 static volatile uint32_t *timestamp_reg;
58 
59 #define REG(x) (volatile uint32_t *)((volatile char *)igt_global_mmio + x)
60 #define REG_OFFSET(x) ((volatile char *)(x) - (volatile char *)igt_global_mmio)
61 
62 #if defined(__USE_XOPEN2K) && defined(gen7_safe_mmio)
63 static pthread_spinlock_t timestamp_lock;
64 
read_timestamp_locked(void)65 static uint32_t read_timestamp_locked(void)
66 {
67 	uint32_t t;
68 
69 	pthread_spin_lock(&timestamp_lock);
70 	t = *timestamp_reg;
71 	pthread_spin_unlock(&timestamp_lock);
72 
73 	return t;
74 }
setup_timestamp_locked(void)75 static int setup_timestamp_locked(void)
76 {
77 	if (pthread_spin_init(&timestamp_lock, 0))
78 		return 0;
79 
80 	read_timestamp = read_timestamp_locked;
81 	return 1;
82 }
83 
read_timestamp_unlocked(void)84 static uint32_t read_timestamp_unlocked(void)
85 {
86 	return *timestamp_reg;
87 }
88 
89 static uint32_t (*read_timestamp)(void) = read_timestamp_unlocked;
90 
91 #else
setup_timestamp_locked(void)92 static int setup_timestamp_locked(void)
93 {
94 	return 1;
95 }
96 
read_timestamp(void)97 inline static uint32_t read_timestamp(void)
98 {
99 	return *timestamp_reg;
100 }
101 #endif
102 
103 struct consumer {
104 	pthread_t thread;
105 
106 	int go;
107 
108 	struct igt_mean latency;
109 	struct producer *producer;
110 };
111 
112 struct producer {
113 	pthread_t thread;
114 	uint32_t ctx;
115 	struct {
116 		struct drm_i915_gem_exec_object2 exec[1];
117 		struct drm_i915_gem_execbuffer2 execbuf;
118 	} nop_dispatch;
119 	struct {
120 		struct drm_i915_gem_exec_object2 exec[2];
121 		struct drm_i915_gem_execbuffer2 execbuf;
122 	} workload_dispatch;
123 	struct {
124 		struct drm_i915_gem_exec_object2 exec[1];
125 		struct drm_i915_gem_relocation_entry reloc[1];
126 		struct drm_i915_gem_execbuffer2 execbuf;
127 	} latency_dispatch;
128 
129 	pthread_mutex_t lock;
130 	pthread_cond_t p_cond, c_cond;
131 	uint32_t *last_timestamp;
132 	int wait;
133 	int complete;
134 	int done;
135 	struct igt_mean latency, dispatch;
136 
137 	int nop;
138 	int nconsumers;
139 	struct consumer *consumers;
140 };
141 
142 #define LOCAL_EXEC_NO_RELOC (1<<11)
143 #define COPY_BLT_CMD		(2<<29|0x53<<22|0x6)
144 #define BLT_WRITE_ALPHA		(1<<21)
145 #define BLT_WRITE_RGB		(1<<20)
146 
147 #define WIDTH 1024
148 #define HEIGHT 1024
149 
150 #define RCS_TIMESTAMP (0x2000 + 0x358)
151 #define BCS_TIMESTAMP (0x22000 + 0x358)
152 #define CYCLES_TO_NS(x) (80.*(x))
153 #define CYCLES_TO_US(x) (CYCLES_TO_NS(x)/1000.)
154 
create_workload(int gen,int factor)155 static uint32_t create_workload(int gen, int factor)
156 {
157 	const int has_64bit_reloc = gen >= 8;
158 	uint32_t handle = gem_create(fd, 4096);
159 	uint32_t *map = gem_mmap__cpu(fd, handle, 0, 4096, PROT_WRITE);
160 	int i = 0;
161 
162 	while (factor--) {
163 		/* XY_SRC_COPY */
164 		map[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
165 		if (has_64bit_reloc)
166 			map[i-1] += 2;
167 		map[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | (4*WIDTH);
168 		map[i++] = 0;
169 		map[i++] = HEIGHT << 16 | WIDTH;
170 		map[i++] = 0;
171 		if (has_64bit_reloc)
172 			map[i++] = 0;
173 		map[i++] = 0;
174 		map[i++] = 4096;
175 		map[i++] = 0;
176 		if (has_64bit_reloc)
177 			map[i++] = 0;
178 	}
179 	map[i++] = MI_BATCH_BUFFER_END;
180 	munmap(map, 4096);
181 
182 	return handle;
183 }
184 
setup_workload(struct producer * p,int gen,uint32_t scratch,uint32_t batch,int factor,unsigned flags)185 static void setup_workload(struct producer *p, int gen,
186 			   uint32_t scratch,
187 			   uint32_t batch,
188 			   int factor,
189 			   unsigned flags)
190 {
191 	struct drm_i915_gem_execbuffer2 *eb;
192 	const int has_64bit_reloc = gen >= 8;
193 	struct drm_i915_gem_relocation_entry *reloc;
194 	int offset;
195 
196 	reloc = calloc(sizeof(*reloc), 2*factor);
197 
198 	p->workload_dispatch.exec[0].handle = scratch;
199 	p->workload_dispatch.exec[1].relocation_count = 2*factor;
200 	p->workload_dispatch.exec[1].relocs_ptr = (uintptr_t)reloc;
201 	p->workload_dispatch.exec[1].handle = batch;
202 
203 	offset = 0;
204 	while (factor--) {
205 		reloc->offset = (offset+4) * sizeof(uint32_t);
206 		reloc->target_handle = scratch;
207 		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
208 		reloc->write_domain = I915_GEM_DOMAIN_RENDER;
209 		reloc++;
210 
211 		reloc->offset = (offset+7) * sizeof(uint32_t);
212 		if (has_64bit_reloc)
213 			reloc->offset += sizeof(uint32_t);
214 		reloc->target_handle = scratch;
215 		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
216 		reloc++;
217 
218 		offset += 8;
219 		if (has_64bit_reloc)
220 			offset += 2;
221 	}
222 
223 	eb = memset(&p->workload_dispatch.execbuf, 0, sizeof(*eb));
224 	eb->buffers_ptr = (uintptr_t)p->workload_dispatch.exec;
225 	eb->buffer_count = 2;
226 	if (flags & CMDPARSER)
227 		eb->batch_len = 4096;
228 	eb->flags = I915_EXEC_BLT | LOCAL_EXEC_NO_RELOC;
229 	eb->rsvd1 = p->ctx;
230 }
231 
setup_latency(struct producer * p,int gen,unsigned flags)232 static void setup_latency(struct producer *p, int gen, unsigned flags)
233 {
234 	struct drm_i915_gem_execbuffer2 *eb;
235 	const int has_64bit_reloc = gen >= 8;
236 	uint32_t handle;
237 	uint32_t *map;
238 	int i = 0;
239 
240 	handle = gem_create(fd, 4096);
241 	if (gem_has_llc(fd))
242 		map = gem_mmap__cpu(fd, handle, 0, 4096, PROT_WRITE);
243 	else
244 		map = gem_mmap__gtt(fd, handle, 4096, PROT_WRITE);
245 
246 	p->latency_dispatch.exec[0].relocation_count = 1;
247 	p->latency_dispatch.exec[0].relocs_ptr =
248 		(uintptr_t)p->latency_dispatch.reloc;
249 	p->latency_dispatch.exec[0].handle = handle;
250 
251 	/* MI_STORE_REG_MEM */
252 	map[i++] = 0x24 << 23 | 1;
253 	if (has_64bit_reloc)
254 		map[i-1]++;
255 	map[i++] = REG_OFFSET(timestamp_reg);
256 	p->latency_dispatch.reloc[0].offset = i * sizeof(uint32_t);
257 	p->latency_dispatch.reloc[0].delta = 4000;
258 	p->latency_dispatch.reloc[0].target_handle = handle;
259 	p->latency_dispatch.reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
260 	p->latency_dispatch.reloc[0].write_domain = 0; /* We lie! */
261 	p->latency_dispatch.reloc[0].presumed_offset = 0;
262 	p->last_timestamp = &map[1000];
263 	map[i++] = 4000;
264 	if (has_64bit_reloc)
265 		map[i++] = 0;
266 
267 	map[i++] = MI_BATCH_BUFFER_END;
268 
269 	eb = memset(&p->latency_dispatch.execbuf, 0, sizeof(*eb));
270 	eb->buffers_ptr = (uintptr_t)p->latency_dispatch.exec;
271 	eb->buffer_count = 1;
272 	if (flags & CMDPARSER)
273 		eb->batch_len = sizeof(*map) * ((i + 1) & ~1);
274 	eb->flags = I915_EXEC_BLT | LOCAL_EXEC_NO_RELOC;
275 	if (flags & FENCE_OUT)
276 		eb->flags |= LOCAL_I915_EXEC_FENCE_OUT;
277 	eb->rsvd1 = p->ctx;
278 }
279 
create_nop(void)280 static uint32_t create_nop(void)
281 {
282 	uint32_t buf = MI_BATCH_BUFFER_END;
283 	uint32_t handle;
284 
285 	handle = gem_create(fd, 4096);
286 	gem_write(fd, handle, 0, &buf, sizeof(buf));
287 
288 	return handle;
289 }
290 
setup_nop(struct producer * p,uint32_t batch,unsigned flags)291 static void setup_nop(struct producer *p, uint32_t batch, unsigned flags)
292 {
293 	struct drm_i915_gem_execbuffer2 *eb;
294 
295 	p->nop_dispatch.exec[0].handle = batch;
296 
297 	eb = memset(&p->nop_dispatch.execbuf, 0, sizeof(*eb));
298 	eb->buffers_ptr = (uintptr_t)p->nop_dispatch.exec;
299 	eb->buffer_count = 1;
300 	if (flags & CMDPARSER)
301 		eb->batch_len = 8;
302 	eb->flags = I915_EXEC_BLT | LOCAL_EXEC_NO_RELOC;
303 	eb->rsvd1 = p->ctx;
304 }
305 
fence_wait(int fence)306 static void fence_wait(int fence)
307 {
308 	struct pollfd pfd = { .fd = fence, .events = POLLIN };
309 	poll(&pfd, 1, -1);
310 }
311 
measure_latency(struct producer * p,struct igt_mean * mean)312 static void measure_latency(struct producer *p, struct igt_mean *mean)
313 {
314 	if (!(p->latency_dispatch.execbuf.flags & LOCAL_I915_EXEC_FENCE_OUT))
315 		gem_sync(fd, p->latency_dispatch.exec[0].handle);
316 	else
317 		fence_wait(p->latency_dispatch.execbuf.rsvd2 >> 32);
318 	igt_mean_add(mean, read_timestamp() - *p->last_timestamp);
319 }
320 
producer(void * arg)321 static void *producer(void *arg)
322 {
323 	struct producer *p = arg;
324 	int n;
325 
326 	while (!done) {
327 		uint32_t start = read_timestamp();
328 		int batches;
329 
330 		/* Control the amount of work we do, similar to submitting
331 		 * empty buffers below, except this time we will load the
332 		 * GPU with a small amount of real work - so there is a small
333 		 * period between execution and interrupts.
334 		 */
335 		gem_execbuf(fd, &p->workload_dispatch.execbuf);
336 
337 		/* Submitting a set of empty batches has a two fold effect:
338 		 * - increases contention on execbuffer, i.e. measure dispatch
339 		 *   latency with number of clients.
340 		 * - generates lots of spurious interrupts (if someone is
341 		 *   waiting).
342 		 */
343 		batches = p->nop;
344 		while (batches--)
345 			gem_execbuf(fd, &p->nop_dispatch.execbuf);
346 
347 		/* Finally, execute a batch that just reads the current
348 		 * TIMESTAMP so we can measure the latency.
349 		 */
350 		if (p->latency_dispatch.execbuf.flags & LOCAL_I915_EXEC_FENCE_OUT)
351 			gem_execbuf_wr(fd, &p->latency_dispatch.execbuf);
352 		else
353 			gem_execbuf(fd, &p->latency_dispatch.execbuf);
354 
355 		/* Wake all the associated clients to wait upon our batch */
356 		p->wait = p->nconsumers;
357 		for (n = 0; n < p->nconsumers; n++)
358 			p->consumers[n].go = 1;
359 		pthread_cond_broadcast(&p->c_cond);
360 
361 		/* Wait for this batch to finish and record how long we waited,
362 		 * and how long it took for the batch to be submitted
363 		 * (including the nop delays).
364 		 */
365 		measure_latency(p, &p->latency);
366 		igt_mean_add(&p->dispatch, *p->last_timestamp - start);
367 
368 		/* Tidy up all the extra threads before we submit again. */
369 		pthread_mutex_lock(&p->lock);
370 		while (p->wait)
371 			pthread_cond_wait(&p->p_cond, &p->lock);
372 		pthread_mutex_unlock(&p->lock);
373 
374 		p->complete++;
375 
376 		if (p->latency_dispatch.execbuf.flags & LOCAL_I915_EXEC_FENCE_OUT)
377 			close(p->latency_dispatch.execbuf.rsvd2 >> 32);
378 	}
379 
380 	pthread_mutex_lock(&p->lock);
381 	p->wait = p->nconsumers;
382 	p->done = true;
383 	for (n = 0; n < p->nconsumers; n++)
384 		p->consumers[n].go = 1;
385 	pthread_cond_broadcast(&p->c_cond);
386 	pthread_mutex_unlock(&p->lock);
387 
388 	return NULL;
389 }
390 
consumer(void * arg)391 static void *consumer(void *arg)
392 {
393 	struct consumer *c = arg;
394 	struct producer *p = c->producer;
395 
396 	/* Sit around waiting for the "go" signal from the producer, then
397 	 * wait upon the batch to finish. This is to add extra waiters to
398 	 * the same request - increasing wakeup contention.
399 	 */
400 	do {
401 		pthread_mutex_lock(&p->lock);
402 		if (--p->wait == 0)
403 			pthread_cond_signal(&p->p_cond);
404 		while (!c->go)
405 			pthread_cond_wait(&p->c_cond, &p->lock);
406 		c->go = 0;
407 		pthread_mutex_unlock(&p->lock);
408 		if (p->done)
409 			return NULL;
410 
411 		measure_latency(p, &c->latency);
412 	} while (1);
413 }
414 
l_estimate(igt_stats_t * stats)415 static double l_estimate(igt_stats_t *stats)
416 {
417 	if (stats->n_values > 9)
418 		return igt_stats_get_trimean(stats);
419 	else if (stats->n_values > 5)
420 		return igt_stats_get_median(stats);
421 	else
422 		return igt_stats_get_mean(stats);
423 }
424 
cpu_time(const struct rusage * r)425 static double cpu_time(const struct rusage *r)
426 {
427 	return 10e6*(r->ru_utime.tv_sec + r->ru_stime.tv_sec) +
428 		(r->ru_utime.tv_usec + r->ru_stime.tv_usec);
429 }
430 
run(int seconds,int nproducers,int nconsumers,int nop,int workload,unsigned flags)431 static int run(int seconds,
432 	       int nproducers,
433 	       int nconsumers,
434 	       int nop,
435 	       int workload,
436 	       unsigned flags)
437 {
438 	pthread_attr_t attr;
439 	struct producer *p;
440 	igt_stats_t platency, latency, dispatch;
441 	struct rusage rused;
442 	uint32_t nop_batch;
443 	uint32_t workload_batch;
444 	uint32_t scratch;
445 	int gen, n, m;
446 	int complete;
447 	int nrun;
448 
449 #if 0
450 	printf("producers=%d, consumers=%d, nop=%d, workload=%d, flags=%x\n",
451 	       nproducers, nconsumers, nop, workload, flags);
452 #endif
453 
454 	fd = drm_open_driver(DRIVER_INTEL);
455 	gen = intel_gen(intel_get_drm_devid(fd));
456 	if (gen < 6)
457 		return IGT_EXIT_SKIP; /* Needs BCS timestamp */
458 
459 	intel_register_access_init(intel_get_pci_device(), false, fd);
460 
461 	if (gen == 6)
462 		timestamp_reg = REG(RCS_TIMESTAMP);
463 	else
464 		timestamp_reg = REG(BCS_TIMESTAMP);
465 
466 	if (gen < 8 && !setup_timestamp_locked())
467 		return IGT_EXIT_SKIP;
468 
469 	nrun = read_timestamp();
470 	usleep(1);
471 	if (read_timestamp() == nrun)
472 		return IGT_EXIT_SKIP;
473 
474 	scratch = gem_create(fd, 4*WIDTH*HEIGHT);
475 	nop_batch = create_nop();
476 	workload_batch = create_workload(gen, workload);
477 
478 	p = calloc(nproducers, sizeof(*p));
479 	for (n = 0; n < nproducers; n++) {
480 		if (flags & CONTEXT)
481 			p[n].ctx = gem_context_create(fd);
482 
483 		setup_nop(&p[n], nop_batch, flags);
484 		setup_workload(&p[n], gen, scratch, workload_batch, workload, flags);
485 		setup_latency(&p[n], gen, flags);
486 
487 		pthread_mutex_init(&p[n].lock, NULL);
488 		pthread_cond_init(&p[n].p_cond, NULL);
489 		pthread_cond_init(&p[n].c_cond, NULL);
490 
491 		igt_mean_init(&p[n].latency);
492 		igt_mean_init(&p[n].dispatch);
493 		p[n].wait = nconsumers;
494 		p[n].nop = nop;
495 		p[n].nconsumers = nconsumers;
496 		p[n].consumers = calloc(nconsumers, sizeof(struct consumer));
497 		for (m = 0; m < nconsumers; m++) {
498 			p[n].consumers[m].producer = &p[n];
499 			igt_mean_init(&p[n].consumers[m].latency);
500 			pthread_create(&p[n].consumers[m].thread, NULL,
501 				       consumer, &p[n].consumers[m]);
502 		}
503 		pthread_mutex_lock(&p[n].lock);
504 		while (p[n].wait)
505 			pthread_cond_wait(&p[n].p_cond, &p[n].lock);
506 		pthread_mutex_unlock(&p[n].lock);
507 	}
508 
509 	pthread_attr_init(&attr);
510 	if (flags & REALTIME) {
511 #ifdef PTHREAD_EXPLICIT_SCHED
512 		struct sched_param param = { .sched_priority = 99 };
513 		pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
514 		pthread_attr_setschedpolicy(&attr, SCHED_FIFO);
515 		pthread_attr_setschedparam(&attr, &param);
516 #else
517 		return IGT_EXIT_SKIP;
518 #endif
519 	}
520 	for (n = 0; n < nproducers; n++)
521 		pthread_create(&p[n].thread, &attr, producer, &p[n]);
522 
523 	sleep(seconds);
524 	done = true;
525 
526 	nrun = complete = 0;
527 	igt_stats_init_with_size(&dispatch, nproducers);
528 	igt_stats_init_with_size(&platency, nproducers);
529 	igt_stats_init_with_size(&latency, nconsumers*nproducers);
530 	for (n = 0; n < nproducers; n++) {
531 		pthread_join(p[n].thread, NULL);
532 
533 		if (!p[n].complete)
534 			continue;
535 
536 		nrun++;
537 		complete += p[n].complete;
538 		igt_stats_push_float(&latency, p[n].latency.mean);
539 		igt_stats_push_float(&platency, p[n].latency.mean);
540 		igt_stats_push_float(&dispatch, p[n].dispatch.mean);
541 
542 		for (m = 0; m < nconsumers; m++) {
543 			pthread_join(p[n].consumers[m].thread, NULL);
544 			igt_stats_push_float(&latency,
545 					     p[n].consumers[m].latency.mean);
546 		}
547 	}
548 
549 	getrusage(RUSAGE_SELF, &rused);
550 
551 	switch ((flags >> 8) & 0xf) {
552 	default:
553 		printf("%d/%d: %7.3fus %7.3fus %7.3fus %7.3fus\n",
554 		       complete, nrun,
555 		       CYCLES_TO_US(l_estimate(&dispatch)),
556 		       CYCLES_TO_US(l_estimate(&latency)),
557 		       CYCLES_TO_US(l_estimate(&platency)),
558 		       cpu_time(&rused) / complete);
559 		break;
560 	case 1:
561 		printf("%f\n", CYCLES_TO_US(l_estimate(&dispatch)));
562 		break;
563 	case 2:
564 		printf("%f\n", CYCLES_TO_US(l_estimate(&latency)));
565 		break;
566 	case 3:
567 		printf("%f\n", CYCLES_TO_US(l_estimate(&platency)));
568 		break;
569 	case 4:
570 		printf("%f\n", cpu_time(&rused) / complete);
571 		break;
572 	case 5:
573 		printf("%d\n", complete);
574 		break;
575 	}
576 
577 	return 0;
578 }
579 
main(int argc,char ** argv)580 int main(int argc, char **argv)
581 {
582 	int time = 10;
583 	int producers = 1;
584 	int consumers = 0;
585 	int nop = 0;
586 	int workload = 0;
587 	unsigned flags = 0;
588 	int c;
589 
590 	while ((c = getopt(argc, argv, "Cp:c:n:w:t:f:sRF")) != -1) {
591 		switch (c) {
592 		case 'p':
593 			/* How many threads generate work? */
594 			producers = atoi(optarg);
595 			if (producers < 1)
596 				producers = 1;
597 			break;
598 
599 		case 'c':
600 			/* How many threads wait upon each piece of work? */
601 			consumers = atoi(optarg);
602 			if (consumers < 0)
603 				consumers = 0;
604 			break;
605 
606 		case 'n':
607 			/* Extra dispatch contention + interrupts */
608 			nop = atoi(optarg);
609 			if (nop < 0)
610 				nop = 0;
611 			break;
612 
613 		case 'w':
614 			/* Control the amount of real work done */
615 			workload = atoi(optarg);
616 			if (workload < 0)
617 				workload = 0;
618 			if (workload > 100)
619 				workload = 100;
620 			break;
621 
622 		case 't':
623 			/* How long to run the benchmark for (seconds) */
624 			time = atoi(optarg);
625 			if (time < 0)
626 				time = INT_MAX;
627 			break;
628 
629 		case 'f':
630 			/* Select an output field */
631 			flags |= atoi(optarg) << 8;
632 			break;
633 
634 		case 's':
635 			/* Assign each producer to its own context, adding
636 			 * context switching into the mix (e.g. execlists
637 			 * can amalgamate requests from one context, so
638 			 * having each producer submit in different contexts
639 			 * should force more execlist interrupts).
640 			 */
641 			flags |= CONTEXT;
642 			break;
643 
644 		case 'R':
645 			/* Run the producers at RealTime priority */
646 			flags |= REALTIME;
647 			break;
648 
649 		case 'C':
650 			/* Don't hide from the command parser (gen7) */
651 			flags |= CMDPARSER;
652 			break;
653 
654 		case 'F':
655 			flags |= FENCE_OUT;
656 			break;
657 
658 		default:
659 			break;
660 		}
661 	}
662 
663 	return run(time, producers, consumers, nop, workload, flags);
664 }
665