• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2009,2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 /** @file gem_tiled_fence_blits.c
29  *
30  * This is a test of doing many tiled blits, with a working set
31  * larger than the aperture size.
32  *
33  * The goal is to catch a couple types of failure;
34  * - Fence management problems on pre-965.
35  * - A17 or L-shaped memory tiling workaround problems in acceleration.
36  *
37  * The model is to fill a collection of 1MB objects in a way that can't trip
38  * over A6 swizzling -- upload data to a non-tiled object, blit to the tiled
39  * object.  Then, copy the 1MB objects randomly between each other for a while.
40  * Finally, download their data through linear objects again and see what
41  * resulted.
42  */
43 
44 #include "igt.h"
45 #include "igt_x86.h"
46 
47 enum { width = 512, height = 512 };
48 static uint32_t linear[width * height];
49 static const int bo_size = sizeof(linear);
50 
create_bo(int fd,uint32_t start_val)51 static uint32_t create_bo(int fd, uint32_t start_val)
52 {
53 	uint32_t handle;
54 	uint32_t *ptr;
55 
56 	handle = gem_create(fd, bo_size);
57 	gem_set_tiling(fd, handle, I915_TILING_X, width * 4);
58 
59 	/* Fill the BO with dwords starting at start_val */
60 	ptr = gem_mmap__gtt(fd, handle, bo_size, PROT_WRITE);
61 	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
62 	for (int i = 0; i < width * height; i++)
63 		ptr[i] = start_val++;
64 	munmap(ptr, bo_size);
65 
66 	return handle;
67 }
68 
check_bo(int fd,uint32_t handle,uint32_t start_val)69 static void check_bo(int fd, uint32_t handle, uint32_t start_val)
70 {
71 	uint32_t *ptr;
72 
73 	ptr = gem_mmap__gtt(fd, handle, bo_size, PROT_READ);
74 	gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, 0);
75 	igt_memcpy_from_wc(linear, ptr, bo_size);
76 	munmap(ptr, bo_size);
77 
78 	for (int i = 0; i < width * height; i++) {
79 		igt_assert_f(linear[i] == start_val,
80 			     "Expected 0x%08x, found 0x%08x "
81 			     "at offset 0x%08x\n",
82 			     start_val, linear[i], i * 4);
83 		start_val++;
84 	}
85 }
86 
87 static uint32_t
create_batch(int fd,struct drm_i915_gem_relocation_entry * reloc)88 create_batch(int fd, struct drm_i915_gem_relocation_entry *reloc)
89 {
90 	const int gen = intel_gen(intel_get_drm_devid(fd));
91 	const bool has_64b_reloc = gen >= 8;
92 	uint32_t *batch;
93 	uint32_t handle;
94 	uint32_t pitch;
95 	int i = 0;
96 
97 	handle = gem_create(fd, 4096);
98 	batch = gem_mmap__cpu(fd, handle, 0, 4096, PROT_WRITE);
99 
100 	batch[i] = (XY_SRC_COPY_BLT_CMD |
101 		    XY_SRC_COPY_BLT_WRITE_ALPHA |
102 		    XY_SRC_COPY_BLT_WRITE_RGB);
103 	if (gen >= 4) {
104 		batch[i] |= (XY_SRC_COPY_BLT_SRC_TILED |
105 			     XY_SRC_COPY_BLT_DST_TILED);
106 		pitch = width;
107 	} else {
108 		pitch = 4 * width;
109 	}
110 	batch[i++] |= 6 + 2 * has_64b_reloc;
111 
112 	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
113 	batch[i++] = 0; /* dst (x1, y1) */
114 	batch[i++] = height << 16 | width; /* dst (x2 y2) */
115 	reloc[0].offset = sizeof(*batch) * i;
116 	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
117 	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
118 	batch[i++] = 0;
119 	if (has_64b_reloc)
120 		batch[i++] = 0;
121 
122 	batch[i++] = 0; /* src (x1, y1) */
123 	batch[i++] = pitch;
124 	reloc[1].offset = sizeof(*batch) * i;
125 	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
126 	batch[i++] = 0;
127 	if (has_64b_reloc)
128 		batch[i++] = 0;
129 
130 	batch[i++] = MI_BATCH_BUFFER_END;
131 	munmap(batch, 4096);
132 
133 	return handle;
134 }
135 
run_test(int fd,int count)136 static void run_test(int fd, int count)
137 {
138 	struct drm_i915_gem_relocation_entry reloc[2];
139 	struct drm_i915_gem_exec_object2 obj[3];
140 	struct drm_i915_gem_execbuffer2 eb;
141 	uint32_t *bo, *bo_start_val;
142 	uint32_t start = 0;
143 
144 	memset(reloc, 0, sizeof(reloc));
145 	memset(obj, 0, sizeof(obj));
146 	obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
147 	obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
148 	obj[2].handle = create_batch(fd, reloc);
149 	obj[2].relocs_ptr = to_user_pointer(reloc);
150 	obj[2].relocation_count = ARRAY_SIZE(reloc);
151 
152 	memset(&eb, 0, sizeof(eb));
153 	eb.buffers_ptr = to_user_pointer(obj);
154 	eb.buffer_count = ARRAY_SIZE(obj);
155 	if (intel_gen(intel_get_drm_devid(fd)) >= 6)
156 		eb.flags = I915_EXEC_BLT;
157 
158 	count |= 1;
159 	igt_info("Using %d 1MiB buffers\n", count);
160 
161 	bo = malloc(count * (sizeof(*bo) + sizeof(*bo_start_val)));
162 	igt_assert(bo);
163 	bo_start_val = bo + count;
164 
165 	for (int i = 0; i < count; i++) {
166 		bo[i] = create_bo(fd, start);
167 		bo_start_val[i] = start;
168 		start += width * height;
169 	}
170 
171 	for (int dst = 0; dst < count; dst++) {
172 		int src = count - dst - 1;
173 
174 		if (src == dst)
175 			continue;
176 
177 		reloc[0].target_handle = obj[0].handle = bo[dst];
178 		reloc[1].target_handle = obj[1].handle = bo[src];
179 
180 		gem_execbuf(fd, &eb);
181 		bo_start_val[dst] = bo_start_val[src];
182 	}
183 
184 	for (int i = 0; i < count * 4; i++) {
185 		int src = random() % count;
186 		int dst = random() % count;
187 
188 		if (src == dst)
189 			continue;
190 
191 		reloc[0].target_handle = obj[0].handle = bo[dst];
192 		reloc[1].target_handle = obj[1].handle = bo[src];
193 
194 		gem_execbuf(fd, &eb);
195 		bo_start_val[dst] = bo_start_val[src];
196 	}
197 
198 	for (int i = 0; i < count; i++) {
199 		check_bo(fd, bo[i], bo_start_val[i]);
200 		gem_close(fd, bo[i]);
201 	}
202 	free(bo);
203 
204 	gem_close(fd, obj[2].handle);
205 }
206 
207 #define MAX_32b ((1ull << 32) - 4096)
208 
209 igt_main
210 {
211 	int fd;
212 
213 	igt_fixture {
214 		fd = drm_open_driver(DRIVER_INTEL);
215 		igt_require_gem(fd);
216 	}
217 
218 	igt_subtest("basic")
219 		run_test (fd, 2);
220 
221 	/* the rest of the tests are too long for simulation */
222 	igt_skip_on_simulation();
223 
224 	igt_subtest("normal") {
225 		uint64_t count;
226 
227 		count = gem_aperture_size(fd);
228 		if (count >> 32)
229 			count = MAX_32b;
230 		count = 3 * count / bo_size / 2;
231 		intel_require_memory(count, bo_size, CHECK_RAM);
232 		run_test(fd, count);
233 	}
234 
235 	igt_fixture
236 		close(fd);
237 }
238