• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <chris@chris-wilson.co.uk>
25  *
26  */
27 
28 #include "igt.h"
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <stdint.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <fcntl.h>
35 #include <inttypes.h>
36 #include <errno.h>
37 #include <math.h>
38 #include <sys/stat.h>
39 #include <sys/ioctl.h>
40 #include <sys/time.h>
41 #include <time.h>
42 
43 #include "drm.h"
44 
45 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
46 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
47 
48 #define COPY_BLT_CMD		(2<<29|0x53<<22|0x6)
49 #define BLT_WRITE_ALPHA		(1<<21)
50 #define BLT_WRITE_RGB		(1<<20)
51 #define BLT_SRC_TILED		(1<<15)
52 #define BLT_DST_TILED		(1<<11)
53 
54 static int has_64bit_reloc;
55 
56 static double
elapsed(const struct timespec * start,const struct timespec * end)57 elapsed(const struct timespec *start, const struct timespec *end)
58 {
59 	return (end->tv_sec - start->tv_sec) + 1e-9*(end->tv_nsec - start->tv_nsec);
60 }
61 
baseline(uint64_t bytes,int milliseconds)62 static int baseline(uint64_t bytes, int milliseconds)
63 {
64 	struct timespec start, end;
65 	const int size = 64*1024*1024;
66 	int count = 0;
67 	void *mem;
68 
69 	mem = malloc(size);
70 	if (mem == NULL)
71 		return 1;
72 
73 	clock_gettime(CLOCK_MONOTONIC, &start);
74 	do {
75 		memset(mem, count, size);
76 		count++;
77 		clock_gettime(CLOCK_MONOTONIC, &end);
78 		if (elapsed(&start, &end) > 0.1)
79 			break;
80 	} while (1);
81 
82 	free(mem);
83 
84 	return ceil(1e-3*milliseconds/elapsed(&start, &end) * count * size / bytes);
85 }
86 
gem_linear_blt(int fd,uint32_t * batch,int offset,uint32_t src,uint32_t dst,uint32_t length,struct drm_i915_gem_relocation_entry * reloc)87 static int gem_linear_blt(int fd,
88 			  uint32_t *batch,
89 			  int offset,
90 			  uint32_t src,
91 			  uint32_t dst,
92 			  uint32_t length,
93 			  struct drm_i915_gem_relocation_entry *reloc)
94 {
95 	uint32_t *b = batch + offset/4;
96 	int height = length / (16 * 1024);
97 
98 	igt_assert_lte(height, 1 << 16);
99 
100 	if (height) {
101 		int i = 0;
102 		b[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
103 		if (has_64bit_reloc)
104 			b[i-1]+=2;
105 		b[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | (16*1024);
106 		b[i++] = 0;
107 		b[i++] = height << 16 | (4*1024);
108 		b[i++] = 0;
109 		reloc->offset = (b-batch+4) * sizeof(uint32_t);
110 		reloc->delta = 0;
111 		reloc->target_handle = dst;
112 		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
113 		reloc->write_domain = I915_GEM_DOMAIN_RENDER;
114 		reloc->presumed_offset = 0;
115 		reloc++;
116 		if (has_64bit_reloc)
117 			b[i++] = 0; /* FIXME */
118 
119 		b[i++] = 0;
120 		b[i++] = 16*1024;
121 		b[i++] = 0;
122 		reloc->offset = (b-batch+7) * sizeof(uint32_t);
123 		if (has_64bit_reloc)
124 			reloc->offset += sizeof(uint32_t);
125 		reloc->delta = 0;
126 		reloc->target_handle = src;
127 		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
128 		reloc->write_domain = 0;
129 		reloc->presumed_offset = 0;
130 		reloc++;
131 		if (has_64bit_reloc)
132 			b[i++] = 0; /* FIXME */
133 
134 		b += i;
135 		length -= height * 16*1024;
136 	}
137 
138 	if (length) {
139 		int i = 0;
140 		b[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
141 		if (has_64bit_reloc)
142 			b[i-1]+=2;
143 		b[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | (16*1024);
144 		b[i++] = height << 16;
145 		b[i++] = (1+height) << 16 | (length / 4);
146 		b[i++] = 0;
147 		reloc->offset = (b-batch+4) * sizeof(uint32_t);
148 		reloc->delta = 0;
149 		reloc->target_handle = dst;
150 		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
151 		reloc->write_domain = I915_GEM_DOMAIN_RENDER;
152 		reloc->presumed_offset = 0;
153 		reloc++;
154 		if (has_64bit_reloc)
155 			b[i++] = 0; /* FIXME */
156 
157 		b[i++] = height << 16;
158 		b[i++] = 16*1024;
159 		b[i++] = 0;
160 		reloc->offset = (b-batch+7) * sizeof(uint32_t);
161 		if (has_64bit_reloc)
162 			reloc->offset += sizeof(uint32_t);
163 		reloc->delta = 0;
164 		reloc->target_handle = src;
165 		reloc->read_domains = I915_GEM_DOMAIN_RENDER;
166 		reloc->write_domain = 0;
167 		reloc->presumed_offset = 0;
168 		reloc++;
169 		if (has_64bit_reloc)
170 			b[i++] = 0; /* FIXME */
171 
172 		b += i;
173 	}
174 
175 	b[0] = MI_BATCH_BUFFER_END;
176 	b[1] = 0;
177 
178 	return (b+2 - batch) * sizeof(uint32_t);
179 }
180 
181 #define SYNC 0x1
182 #define NOCMD 0x2
183 
run(int object,int batch,int time,int reps,int ncpus,unsigned flags)184 static int run(int object, int batch, int time, int reps, int ncpus, unsigned flags)
185 {
186 	struct drm_i915_gem_execbuffer2 execbuf;
187 	struct drm_i915_gem_exec_object2 exec[3];
188 	struct drm_i915_gem_relocation_entry *reloc;
189 	uint32_t *buf, handle, src, dst;
190 	int fd, len, gen, size, nreloc;
191 	int ring, count;
192 	double *shared;
193 
194 	shared = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
195 
196 	size = ALIGN(batch * 64, 4096);
197 	reloc = malloc(sizeof(*reloc)*size/32*2);
198 
199 	fd = drm_open_driver(DRIVER_INTEL);
200 	handle = gem_create(fd, size);
201 	buf = gem_mmap__cpu(fd, handle, 0, size, PROT_WRITE);
202 
203 	gen = intel_gen(intel_get_drm_devid(fd));
204 	has_64bit_reloc = gen >= 8;
205 
206 	src = gem_create(fd, ALIGN(object, 4096));
207 	dst = gem_create(fd, ALIGN(object, 4096));
208 
209 	len = gem_linear_blt(fd, buf, 0, 0, 1, object, reloc);
210 	if (has_64bit_reloc)
211 		nreloc = len > 56 ? 4 : 2;
212 	else
213 		nreloc = len > 40 ? 4 : 2;
214 
215 	memset(exec, 0, sizeof(exec));
216 	exec[0].handle = src;
217 	exec[1].handle = dst;
218 
219 	exec[2].handle = handle;
220 	exec[2].relocs_ptr = (uintptr_t)reloc;
221 	exec[2].relocation_count = nreloc;
222 
223 	ring = 0;
224 	if (gen >= 6)
225 		ring = I915_EXEC_BLT;
226 
227 	memset(&execbuf, 0, sizeof(execbuf));
228 	execbuf.buffers_ptr = (uintptr_t)exec;
229 	execbuf.buffer_count = 3;
230 	execbuf.batch_len = len;
231 	execbuf.flags = ring;
232 	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
233 
234 	if (__gem_execbuf(fd, &execbuf)) {
235 		gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
236 		len = gem_linear_blt(fd, buf, 0, src, dst, object, reloc);
237 		igt_assert(len == execbuf.batch_len);
238 		execbuf.flags = ring;
239 		gem_execbuf(fd, &execbuf);
240 	}
241 	gem_sync(fd, handle);
242 
243 	if (batch > 1) {
244 		if (execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT) {
245 			src = 0;
246 			dst = 1;
247 		}
248 
249 		gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
250 		for (int i = 1; i < batch; i++) {
251 			len = gem_linear_blt(fd, buf, len - 8,
252 					     src, dst, object,
253 					     reloc + nreloc * i);
254 		}
255 		exec[2].relocation_count = nreloc * batch;
256 		execbuf.batch_len = len;
257 
258 		gem_execbuf(fd, &execbuf);
259 		gem_sync(fd, handle);
260 	}
261 	if (execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT)
262 		execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
263 
264 	/* Guess how many loops we need for 0.1s */
265 	count = baseline((uint64_t)object * batch, 100) / ncpus;
266 	if (flags & SYNC) {
267 		time *= count / 2;
268 		count = 1;
269 	}
270 	if (flags & NOCMD) {
271 		drm_i915_getparam_t gp;
272 		int v;
273 
274 		gp.param = I915_PARAM_CMD_PARSER_VERSION;
275 		gp.value = &v;
276 		drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
277 		if (v < 1)
278 			return 77;
279 
280 		execbuf.batch_len = 0;
281 	}
282 
283 	while (reps--) {
284 		memset(shared, 0, 4096);
285 
286 		igt_fork(child, ncpus) {
287 			double min = HUGE_VAL;
288 
289 			for (int s = 0; s <= time / 100; s++) {
290 				struct timespec start, end;
291 				double t;
292 
293 				clock_gettime(CLOCK_MONOTONIC, &start);
294 				for (int loop = 0; loop < count; loop++)
295 					gem_execbuf(fd, &execbuf);
296 				gem_sync(fd, handle);
297 				clock_gettime(CLOCK_MONOTONIC, &end);
298 
299 				t = elapsed(&start, &end);
300 				if (t < min)
301 					min = t;
302 			}
303 
304 			shared[child] = object/(1024*1024.)*batch*count/min;
305 		}
306 		igt_waitchildren();
307 
308 		for (int child = 0; child < ncpus; child++)
309 			shared[ncpus] += shared[child];
310 		printf("%7.3f\n", shared[ncpus] / ncpus);
311 	}
312 
313 	close(fd);
314 	return 0;
315 }
316 
main(int argc,char ** argv)317 int main(int argc, char **argv)
318 {
319 	int size = 1024*1024;
320 	int reps = 13;
321 	int time = 2000;
322 	int ncpus = 1;
323 	int batch = 1;
324 	unsigned flags = 0;
325 	int c;
326 
327 	while ((c = getopt (argc, argv, "CSs:b:r:t:f")) != -1) {
328 		switch (c) {
329 		case 's':
330 			size = atoi(optarg);
331 			size = ALIGN(size, 4);
332 			if (size < 4)
333 				size = 4;
334 			break;
335 
336 		case 'S':
337 			flags |= SYNC;
338 			break;
339 
340 		case 'C':
341 			flags |= NOCMD;
342 			break;
343 
344 		case 't':
345 			time = atoi(optarg);
346 			if (time < 1)
347 				time = 1;
348 			break;
349 
350 		case 'r':
351 			reps = atoi(optarg);
352 			if (reps < 1)
353 				reps = 1;
354 			break;
355 
356 		case 'b':
357 			batch = atoi(optarg);
358 			if (batch < 1)
359 				batch = 1;
360 			break;
361 
362 		case 'f':
363 			ncpus = sysconf(_SC_NPROCESSORS_ONLN);
364 			break;
365 
366 		default:
367 			break;
368 		}
369 	}
370 
371 	return run(size, batch, time, reps, ncpus, flags);
372 }
373