1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Chris Wilson <chris@chris-wilson.co.uk>
25 *
26 */
27
28 #include "config.h"
29
30 #include "igt.h"
31 #include <unistd.h>
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <fcntl.h>
36 #include <inttypes.h>
37 #include <errno.h>
38 #include <sys/time.h>
39 #include <pthread.h>
40 #include "drm.h"
41 #include "i915_drm.h"
42
43 #define OBJECT_SIZE (1024*1024) /* restricted to 1MiB alignment on i915 fences */
44
elapsed(const struct timeval * start,const struct timeval * end)45 static double elapsed(const struct timeval *start,
46 const struct timeval *end)
47 {
48 return (end->tv_sec - start->tv_sec) + 1e-6*(end->tv_usec - start->tv_usec);
49 }
50
performance(void)51 static void performance(void)
52 {
53 int n, loop, count;
54 int fd, num_fences;
55 double linear[2], tiled[2];
56
57 fd = drm_open_driver(DRIVER_INTEL);
58
59 num_fences = gem_available_fences(fd);
60 igt_require(num_fences > 0);
61
62 for (count = 2; count < 4*num_fences; count *= 2) {
63 struct timeval start, end;
64 uint32_t handle[count];
65 void *ptr[count];
66
67 for (n = 0; n < count; n++) {
68 handle[n] = gem_create(fd, OBJECT_SIZE);
69 ptr[n] = gem_mmap__gtt(fd, handle[n], OBJECT_SIZE, PROT_READ | PROT_WRITE);
70 }
71
72 gettimeofday(&start, NULL);
73 for (loop = 0; loop < 1024; loop++) {
74 for (n = 0; n < count; n++)
75 memset(ptr[n], 0, OBJECT_SIZE);
76 }
77 gettimeofday(&end, NULL);
78
79 linear[count != 2] = count * loop / elapsed(&start, &end);
80 igt_info("Upload rate for %d linear surfaces: %7.3fMiB/s\n", count, linear[count != 2]);
81
82 for (n = 0; n < count; n++)
83 gem_set_tiling(fd, handle[n], I915_TILING_X, 1024);
84
85 gettimeofday(&start, NULL);
86 for (loop = 0; loop < 1024; loop++) {
87 for (n = 0; n < count; n++)
88 memset(ptr[n], 0, OBJECT_SIZE);
89 }
90 gettimeofday(&end, NULL);
91
92 tiled[count != 2] = count * loop / elapsed(&start, &end);
93 igt_info("Upload rate for %d tiled surfaces: %7.3fMiB/s\n", count, tiled[count != 2]);
94
95 for (n = 0; n < count; n++) {
96 munmap(ptr[n], OBJECT_SIZE);
97 gem_close(fd, handle[n]);
98 }
99
100 }
101
102 errno = 0;
103 igt_assert(linear[1] > 0.75 * linear[0]);
104 igt_assert(tiled[1] > 0.75 * tiled[0]);
105 }
106
107 struct thread_performance {
108 pthread_t thread;
109 int id, count, direction, loops;
110 void **ptr;
111 };
112
read_thread_performance(void * closure)113 static void *read_thread_performance(void *closure)
114 {
115 struct thread_performance *t = closure;
116 uint32_t x = 0;
117 int n, m;
118
119 for (n = 0; n < t->loops; n++) {
120 uint32_t *src = t->ptr[rand() % t->count];
121 src += (rand() % 256) * 4096 / 4;
122 for (m = 0; m < 4096/4; m++)
123 x += src[m];
124 }
125
126 return (void *)(uintptr_t)x;
127 }
128
write_thread_performance(void * closure)129 static void *write_thread_performance(void *closure)
130 {
131 struct thread_performance *t = closure;
132 int n;
133
134 for (n = 0; n < t->loops; n++) {
135 uint32_t *dst = t->ptr[rand() % t->count];
136 dst += (rand() % 256) * 4096 / 4;
137 memset(dst, 0, 4096);
138 }
139
140 return NULL;
141 }
142
143 #define READ (1<<0)
144 #define WRITE (1<<1)
direction_string(unsigned mask)145 static const char *direction_string(unsigned mask)
146 {
147 switch (mask) {
148 case READ: return "Download";
149 case WRITE: return "Upload";
150 case READ | WRITE: return "Combined";
151 default: return "Unknown";
152 }
153 }
thread_performance(unsigned mask)154 static void thread_performance(unsigned mask)
155 {
156 const int loops = 4096;
157 int n, count;
158 int fd, num_fences;
159 double linear[2], tiled[2];
160
161 fd = drm_open_driver(DRIVER_INTEL);
162
163 num_fences = gem_available_fences(fd);
164 igt_require(num_fences > 0);
165
166 for (count = 2; count < 4*num_fences; count *= 2) {
167 const int nthreads = (mask & READ ? count : 0) + (mask & WRITE ? count : 0);
168 struct timeval start, end;
169 struct thread_performance readers[count];
170 struct thread_performance writers[count];
171 uint32_t handle[count];
172 void *ptr[count];
173
174 for (n = 0; n < count; n++) {
175 handle[n] = gem_create(fd, OBJECT_SIZE);
176 ptr[n] = gem_mmap__gtt(fd, handle[n], OBJECT_SIZE, PROT_READ | PROT_WRITE);
177
178 if (mask & READ) {
179 readers[n].id = n;
180 readers[n].direction = READ;
181 readers[n].ptr = ptr;
182 readers[n].count = count;
183 readers[n].loops = loops;
184 }
185
186 if (mask & WRITE) {
187 writers[n].id = count - n - 1;
188 writers[n].direction = WRITE;
189 writers[n].ptr = ptr;
190 writers[n].count = count;
191 writers[n].loops = loops;
192 }
193 }
194
195 gettimeofday(&start, NULL);
196 for (n = 0; n < count; n++) {
197 if (mask & READ)
198 pthread_create(&readers[n].thread, NULL, read_thread_performance, &readers[n]);
199 if (mask & WRITE)
200 pthread_create(&writers[n].thread, NULL, write_thread_performance, &writers[n]);
201 }
202 for (n = 0; n < count; n++) {
203 if (mask & READ)
204 pthread_join(readers[n].thread, NULL);
205 if (mask & WRITE)
206 pthread_join(writers[n].thread, NULL);
207 }
208 gettimeofday(&end, NULL);
209
210 linear[count != 2] = nthreads * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
211 igt_info("%s rate for %d linear surfaces, %d threads: %7.3fMiB/s\n", direction_string(mask), count, nthreads, linear[count != 2]);
212
213 for (n = 0; n < count; n++)
214 gem_set_tiling(fd, handle[n], I915_TILING_X, 1024);
215
216 gettimeofday(&start, NULL);
217 for (n = 0; n < count; n++) {
218 if (mask & READ)
219 pthread_create(&readers[n].thread, NULL, read_thread_performance, &readers[n]);
220 if (mask & WRITE)
221 pthread_create(&writers[n].thread, NULL, write_thread_performance, &writers[n]);
222 }
223 for (n = 0; n < count; n++) {
224 if (mask & READ)
225 pthread_join(readers[n].thread, NULL);
226 if (mask & WRITE)
227 pthread_join(writers[n].thread, NULL);
228 }
229 gettimeofday(&end, NULL);
230
231 tiled[count != 2] = nthreads * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
232 igt_info("%s rate for %d tiled surfaces, %d threads: %7.3fMiB/s\n", direction_string(mask), count, nthreads, tiled[count != 2]);
233
234 for (n = 0; n < count; n++) {
235 munmap(ptr[n], OBJECT_SIZE);
236 gem_close(fd, handle[n]);
237 }
238 }
239
240 errno = 0;
241 igt_assert(linear[1] > 0.75 * linear[0]);
242 igt_assert(tiled[1] > 0.75 * tiled[0]);
243 }
244
245 struct thread_contention {
246 pthread_t thread;
247 uint32_t handle;
248 int loops, fd;
249 };
no_contention(void * closure)250 static void *no_contention(void *closure)
251 {
252 struct thread_contention *t = closure;
253 int n;
254
255 for (n = 0; n < t->loops; n++) {
256 uint32_t *ptr = gem_mmap__gtt(t->fd, t->handle, OBJECT_SIZE, PROT_READ | PROT_WRITE);
257 memset(ptr + (rand() % 256) * 4096 / 4, 0, 4096);
258 munmap(ptr, OBJECT_SIZE);
259 }
260
261 return NULL;
262 }
263
wc_mmap(void * closure)264 static void *wc_mmap(void *closure)
265 {
266 struct thread_contention *t = closure;
267 int n;
268
269 for (n = 0; n < t->loops; n++) {
270 uint32_t *ptr = gem_mmap__wc(t->fd, t->handle, 0, OBJECT_SIZE, PROT_READ | PROT_WRITE);
271 memset(ptr + (rand() % 256) * 4096 / 4, 0, 4096);
272 munmap(ptr, OBJECT_SIZE);
273 }
274
275 return NULL;
276 }
277
thread_contention(void)278 static void thread_contention(void)
279 {
280 const int loops = 4096;
281 int n, count;
282 int fd, num_fences;
283 double linear[2], tiled[2];
284
285 fd = drm_open_driver(DRIVER_INTEL);
286
287 num_fences = gem_available_fences(fd);
288 igt_require(num_fences > 0);
289
290 for (count = 1; count < 4*num_fences; count *= 2) {
291 struct timeval start, end;
292 struct thread_contention threads[count];
293
294 for (n = 0; n < count; n++) {
295 threads[n].handle = gem_create(fd, OBJECT_SIZE);
296 threads[n].loops = loops;
297 threads[n].fd = fd;
298 }
299
300 gettimeofday(&start, NULL);
301 for (n = 0; n < count; n++)
302 pthread_create(&threads[n].thread, NULL, no_contention, &threads[n]);
303 for (n = 0; n < count; n++)
304 pthread_join(threads[n].thread, NULL);
305 gettimeofday(&end, NULL);
306
307 linear[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
308 igt_info("Contended upload rate for %d linear threads: %7.3fMiB/s\n", count, linear[count != 2]);
309
310 for (n = 0; n < count; n++)
311 gem_set_tiling(fd, threads[n].handle, I915_TILING_X, 1024);
312
313 gettimeofday(&start, NULL);
314 for (n = 0; n < count; n++)
315 pthread_create(&threads[n].thread, NULL, no_contention, &threads[n]);
316 for (n = 0; n < count; n++)
317 pthread_join(threads[n].thread, NULL);
318 gettimeofday(&end, NULL);
319
320 tiled[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
321 igt_info("Contended upload rate for %d tiled threads: %7.3fMiB/s\n", count, tiled[count != 2]);
322
323 for (n = 0; n < count; n++) {
324 gem_close(fd, threads[n].handle);
325 }
326 }
327
328 errno = 0;
329 igt_assert(linear[1] > 0.75 * linear[0]);
330 igt_assert(tiled[1] > 0.75 * tiled[0]);
331 }
332
wc_contention(void)333 static void wc_contention(void)
334 {
335 const int loops = 4096;
336 int n, count;
337 int fd, num_fences;
338 double linear[2], tiled[2];
339
340 fd = drm_open_driver(DRIVER_INTEL);
341 gem_require_mmap_wc(fd);
342
343 num_fences = gem_available_fences(fd);
344 igt_require(num_fences > 0);
345
346 for (count = 1; count < 4*num_fences; count *= 2) {
347 struct timeval start, end;
348 struct thread_contention threads[count];
349
350 for (n = 0; n < count; n++) {
351 threads[n].handle = gem_create(fd, OBJECT_SIZE);
352 threads[n].loops = loops;
353 threads[n].fd = fd;
354 }
355
356 gettimeofday(&start, NULL);
357 for (n = 0; n < count; n++)
358 pthread_create(&threads[n].thread, NULL, wc_mmap, &threads[n]);
359 for (n = 0; n < count; n++)
360 pthread_join(threads[n].thread, NULL);
361 gettimeofday(&end, NULL);
362
363 linear[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
364 igt_info("Contended upload rate for %d linear threads/wc: %7.3fMiB/s\n", count, linear[count != 2]);
365
366 for (n = 0; n < count; n++)
367 gem_set_tiling(fd, threads[n].handle, I915_TILING_X, 1024);
368
369 gettimeofday(&start, NULL);
370 for (n = 0; n < count; n++)
371 pthread_create(&threads[n].thread, NULL, wc_mmap, &threads[n]);
372 for (n = 0; n < count; n++)
373 pthread_join(threads[n].thread, NULL);
374 gettimeofday(&end, NULL);
375
376 tiled[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096);
377 igt_info("Contended upload rate for %d tiled threads/wc: %7.3fMiB/s\n", count, tiled[count != 2]);
378
379 for (n = 0; n < count; n++) {
380 gem_close(fd, threads[n].handle);
381 }
382 }
383
384 errno = 0;
385 igt_assert(linear[1] > 0.75 * linear[0]);
386 igt_assert(tiled[1] > 0.75 * tiled[0]);
387 }
388
389 igt_main
390 {
391 igt_skip_on_simulation();
392
393 igt_subtest("performance")
394 performance();
395 igt_subtest("thread-contention")
396 thread_contention();
397 igt_subtest("wc-contention")
398 wc_contention();
399 igt_subtest("thread-performance-read")
400 thread_performance(READ);
401 igt_subtest("thread-performance-write")
402 thread_performance(WRITE);
403 igt_subtest("thread-performance-both")
404 thread_performance(READ | WRITE);
405 }
406