1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Chris Wilson <chris@chris-wilson.co.uk>
25 *
26 */
27
28 /* Measure the time it to takes to bind/unbind objects from the ppGTT */
29
30 #include <unistd.h>
31 #include <stdlib.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <fcntl.h>
36 #include <inttypes.h>
37 #include <errno.h>
38 #include <sys/stat.h>
39 #include <sys/ioctl.h>
40 #include <sys/time.h>
41 #include <time.h>
42
43 #include "drm.h"
44 #include "ioctl_wrappers.h"
45 #include "drmtest.h"
46 #include "intel_io.h"
47 #include "intel_reg.h"
48 #include "igt_stats.h"
49
50 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
51 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
52
53 #define LOCAL_I915_EXEC_BSD_SHIFT (13)
54 #define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT)
55
56 #define ENGINE_FLAGS (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
57
elapsed(const struct timespec * start,const struct timespec * end)58 static double elapsed(const struct timespec *start,
59 const struct timespec *end)
60 {
61 return (end->tv_sec - start->tv_sec) + 1e-9*(end->tv_nsec - start->tv_nsec);
62 }
63
batch(int fd,uint64_t size)64 static uint32_t batch(int fd, uint64_t size)
65 {
66 const uint32_t bbe = MI_BATCH_BUFFER_END;
67 uint32_t handle = gem_create(fd, size);
68 gem_write(fd, handle, 0, &bbe, sizeof(bbe));
69 return handle;
70 }
71
loop(uint64_t size,unsigned ring,int reps,int ncpus,unsigned flags)72 static int loop(uint64_t size, unsigned ring, int reps, int ncpus, unsigned flags)
73 {
74 struct drm_i915_gem_execbuffer2 execbuf;
75 struct drm_i915_gem_exec_object2 obj;
76 unsigned engines[16];
77 unsigned nengine;
78 double *shared;
79 int fd;
80
81 shared = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
82
83 fd = drm_open_driver(DRIVER_INTEL);
84
85 memset(&obj, 0, sizeof(obj));
86 obj.handle = batch(fd, 4096);
87
88 memset(&execbuf, 0, sizeof(execbuf));
89 execbuf.buffers_ptr = (uintptr_t)&obj;
90 execbuf.buffer_count = 1;
91 execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
92 execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
93 if (__gem_execbuf(fd, &execbuf)) {
94 execbuf.flags = 0;
95 if (__gem_execbuf(fd, &execbuf))
96 return 77;
97 }
98 /* let the small object leak; ideally blocking the low address */
99
100 nengine = 0;
101 if (ring == -1) {
102 for (ring = 1; ring < 16; ring++) {
103 execbuf.flags &= ~ENGINE_FLAGS;
104 execbuf.flags |= ring;
105 if (__gem_execbuf(fd, &execbuf) == 0)
106 engines[nengine++] = ring;
107 }
108 } else
109 engines[nengine++] = ring;
110
111 if (size > 1ul << 31)
112 obj.flags |= 1 << 3;
113
114 while (reps--) {
115 memset(shared, 0, 4096);
116
117 igt_fork(child, ncpus) {
118 struct timespec start, end;
119 unsigned count = 0;
120
121 obj.handle = batch(fd, size);
122 obj.offset = -1;
123
124 clock_gettime(CLOCK_MONOTONIC, &start);
125 do {
126 for (int inner = 0; inner < 1024; inner++) {
127 execbuf.flags &= ~ENGINE_FLAGS;
128 execbuf.flags |= engines[count++ % nengine];
129 /* fault in */
130 obj.alignment = 0;
131 gem_execbuf(fd, &execbuf);
132
133 /* fault out */
134 obj.alignment = 1ull << 63;
135 __gem_execbuf(fd, &execbuf);
136 }
137
138 clock_gettime(CLOCK_MONOTONIC, &end);
139 } while (elapsed(&start, &end) < 2.);
140
141 gem_sync(fd, obj.handle);
142 clock_gettime(CLOCK_MONOTONIC, &end);
143 shared[child] = 1e6*elapsed(&start, &end) / count / 2;
144
145 gem_close(fd, obj.handle);
146 }
147 igt_waitchildren();
148
149 for (int child = 0; child < ncpus; child++)
150 shared[ncpus] += shared[child];
151 printf("%7.3f\n", shared[ncpus] / ncpus);
152 }
153 return 0;
154 }
155
main(int argc,char ** argv)156 int main(int argc, char **argv)
157 {
158 unsigned ring = I915_EXEC_RENDER;
159 unsigned flags = 0;
160 uint64_t size = 4096;
161 int reps = 1;
162 int ncpus = 1;
163 int c;
164
165 while ((c = getopt (argc, argv, "e:r:s:f")) != -1) {
166 switch (c) {
167 case 'e':
168 if (strcmp(optarg, "rcs") == 0)
169 ring = I915_EXEC_RENDER;
170 else if (strcmp(optarg, "vcs") == 0)
171 ring = I915_EXEC_BSD;
172 else if (strcmp(optarg, "bcs") == 0)
173 ring = I915_EXEC_BLT;
174 else if (strcmp(optarg, "vecs") == 0)
175 ring = I915_EXEC_VEBOX;
176 else if (strcmp(optarg, "all") == 0)
177 ring = -1;
178 else
179 ring = atoi(optarg);
180 break;
181
182 case 'r':
183 reps = atoi(optarg);
184 if (reps < 1)
185 reps = 1;
186 break;
187
188 case 'f':
189 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
190 break;
191
192 case 's':
193 size = strtoull(optarg, NULL, 0);
194 if (size < 4096)
195 size = 4096;
196 break;
197
198 default:
199 break;
200 }
201 }
202
203 return loop(size, ring, reps, ncpus, flags);
204 }
205