• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <chris@chris-wilson.co.uk>
25  *
26  */
27 
28 /* Measure the time it to takes to bind/unbind objects from the ppGTT */
29 
30 #include <unistd.h>
31 #include <stdlib.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <fcntl.h>
36 #include <inttypes.h>
37 #include <errno.h>
38 #include <sys/stat.h>
39 #include <sys/ioctl.h>
40 #include <sys/time.h>
41 #include <time.h>
42 
43 #include "drm.h"
44 #include "ioctl_wrappers.h"
45 #include "drmtest.h"
46 #include "intel_io.h"
47 #include "intel_reg.h"
48 #include "igt_stats.h"
49 
50 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
51 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
52 
53 #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
54 #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
55 
56 #define ENGINE_FLAGS  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
57 
elapsed(const struct timespec * start,const struct timespec * end)58 static double elapsed(const struct timespec *start,
59 		      const struct timespec *end)
60 {
61 	return (end->tv_sec - start->tv_sec) + 1e-9*(end->tv_nsec - start->tv_nsec);
62 }
63 
batch(int fd,uint64_t size)64 static uint32_t batch(int fd, uint64_t size)
65 {
66 	const uint32_t bbe = MI_BATCH_BUFFER_END;
67 	uint32_t handle = gem_create(fd, size);
68 	gem_write(fd, handle, 0, &bbe, sizeof(bbe));
69 	return handle;
70 }
71 
loop(uint64_t size,unsigned ring,int reps,int ncpus,unsigned flags)72 static int loop(uint64_t size, unsigned ring, int reps, int ncpus, unsigned flags)
73 {
74 	struct drm_i915_gem_execbuffer2 execbuf;
75 	struct drm_i915_gem_exec_object2 obj;
76 	unsigned engines[16];
77 	unsigned nengine;
78 	double *shared;
79 	int fd;
80 
81 	shared = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
82 
83 	fd = drm_open_driver(DRIVER_INTEL);
84 
85 	memset(&obj, 0, sizeof(obj));
86 	obj.handle = batch(fd, 4096);
87 
88 	memset(&execbuf, 0, sizeof(execbuf));
89 	execbuf.buffers_ptr = (uintptr_t)&obj;
90 	execbuf.buffer_count = 1;
91 	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
92 	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
93 	if (__gem_execbuf(fd, &execbuf)) {
94 		execbuf.flags = 0;
95 		if (__gem_execbuf(fd, &execbuf))
96 			return 77;
97 	}
98 	/* let the small object leak; ideally blocking the low address */
99 
100 	nengine = 0;
101 	if (ring == -1) {
102 		for (ring = 1; ring < 16; ring++) {
103 			execbuf.flags &= ~ENGINE_FLAGS;
104 			execbuf.flags |= ring;
105 			if (__gem_execbuf(fd, &execbuf) == 0)
106 				engines[nengine++] = ring;
107 		}
108 	} else
109 		engines[nengine++] = ring;
110 
111 	if (size > 1ul << 31)
112 		obj.flags |= 1 << 3;
113 
114 	while (reps--) {
115 		memset(shared, 0, 4096);
116 
117 		igt_fork(child, ncpus) {
118 			struct timespec start, end;
119 			unsigned count = 0;
120 
121 			obj.handle = batch(fd, size);
122 			obj.offset = -1;
123 
124 			clock_gettime(CLOCK_MONOTONIC, &start);
125 			do {
126 				for (int inner = 0; inner < 1024; inner++) {
127 					execbuf.flags &= ~ENGINE_FLAGS;
128 					execbuf.flags |= engines[count++ % nengine];
129 					/* fault in */
130 					obj.alignment = 0;
131 					gem_execbuf(fd, &execbuf);
132 
133 					/* fault out */
134 					obj.alignment = 1ull << 63;
135 					__gem_execbuf(fd, &execbuf);
136 				}
137 
138 				clock_gettime(CLOCK_MONOTONIC, &end);
139 			} while (elapsed(&start, &end) < 2.);
140 
141 			gem_sync(fd, obj.handle);
142 			clock_gettime(CLOCK_MONOTONIC, &end);
143 			shared[child] = 1e6*elapsed(&start, &end) / count / 2;
144 
145 			gem_close(fd, obj.handle);
146 		}
147 		igt_waitchildren();
148 
149 		for (int child = 0; child < ncpus; child++)
150 			shared[ncpus] += shared[child];
151 		printf("%7.3f\n", shared[ncpus] / ncpus);
152 	}
153 	return 0;
154 }
155 
main(int argc,char ** argv)156 int main(int argc, char **argv)
157 {
158 	unsigned ring = I915_EXEC_RENDER;
159 	unsigned flags = 0;
160 	uint64_t size = 4096;
161 	int reps = 1;
162 	int ncpus = 1;
163 	int c;
164 
165 	while ((c = getopt (argc, argv, "e:r:s:f")) != -1) {
166 		switch (c) {
167 		case 'e':
168 			if (strcmp(optarg, "rcs") == 0)
169 				ring = I915_EXEC_RENDER;
170 			else if (strcmp(optarg, "vcs") == 0)
171 				ring = I915_EXEC_BSD;
172 			else if (strcmp(optarg, "bcs") == 0)
173 				ring = I915_EXEC_BLT;
174 			else if (strcmp(optarg, "vecs") == 0)
175 				ring = I915_EXEC_VEBOX;
176 			else if (strcmp(optarg, "all") == 0)
177 				ring = -1;
178 			else
179 				ring = atoi(optarg);
180 			break;
181 
182 		case 'r':
183 			reps = atoi(optarg);
184 			if (reps < 1)
185 				reps = 1;
186 			break;
187 
188 		case 'f':
189 			ncpus = sysconf(_SC_NPROCESSORS_ONLN);
190 			break;
191 
192 		case 's':
193 			size = strtoull(optarg, NULL, 0);
194 			if (size < 4096)
195 				size = 4096;
196 			break;
197 
198 		default:
199 			break;
200 		}
201 	}
202 
203 	return loop(size, ring, reps, ncpus, flags);
204 }
205