1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <fcntl.h>
25 #include <string.h>
26 #include <xf86drm.h>
27 #include <sys/mman.h>
28
29 #include <gtest/gtest.h>
30
31 #include "c99_compat.h"
32 #include "common/xe/intel_engine.h"
33 #include "common/intel_gem.h"
34 #include "dev/intel_debug.h"
35 #include "dev/intel_device_info.h"
36 #include "dev/intel_kmd.h"
37 #include "intel_gem.h"
38 #include "isl/isl.h"
39 #include "drm-uapi/i915_drm.h"
40 #include "drm-uapi/xe_drm.h"
41 #include "genxml/gen_macros.h"
42 #include "util/macros.h"
43
44 class mi_builder_test;
45
46 struct address {
47 uint32_t gem_handle;
48 uint32_t offset;
49 };
50
51 #define __gen_address_type struct address
52 #define __gen_user_data ::mi_builder_test
53
54 uint64_t __gen_combine_address(mi_builder_test *test, void *location,
55 struct address addr, uint32_t delta);
56 void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);
57 struct address __gen_get_batch_address(mi_builder_test *test,
58 void *location);
59 bool *__gen_get_write_fencing_status(mi_builder_test *test);
60
61 struct address
__gen_address_offset(address addr,uint64_t offset)62 __gen_address_offset(address addr, uint64_t offset)
63 {
64 addr.offset += offset;
65 return addr;
66 }
67
68 #if GFX_VERx10 >= 75
69 #define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */
70 #else
71 #define RSVD_TEMP_REG 0x2430 /* GFX7_3DPRIM_START_VERTEX */
72 #endif
73 #define MI_BUILDER_NUM_ALLOC_GPRS 15
74 #define INPUT_DATA_OFFSET 0
75 #define OUTPUT_DATA_OFFSET 2048
76
77 #define MI_BUILDER_CAN_WRITE_BATCH GFX_VER >= 8
78
79 #define __genxml_cmd_length(cmd) cmd ## _length
80 #define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
81 #define __genxml_cmd_header(cmd) cmd ## _header
82 #define __genxml_cmd_pack(cmd) cmd ## _pack
83
84 #include "genxml/genX_pack.h"
85 #include "mi_builder.h"
86
87 #define emit_cmd(cmd, name) \
88 for (struct cmd name = { __genxml_cmd_header(cmd) }, \
89 *_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \
90 __builtin_expect(_dst != NULL, 1); \
91 __genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL)
92
93 #include <vector>
94
95 class mi_builder_test : public ::testing::Test {
96 public:
97 void SetUp() override;
98 void TearDown() override;
99
100 void *emit_dwords(int num_dwords);
101 void submit_batch();
102
in_addr(uint32_t offset)103 inline address in_addr(uint32_t offset)
104 {
105 address addr;
106 addr.gem_handle = data_bo_handle;
107 addr.offset = INPUT_DATA_OFFSET + offset;
108 return addr;
109 }
110
out_addr(uint32_t offset)111 inline address out_addr(uint32_t offset)
112 {
113 address addr;
114 addr.gem_handle = data_bo_handle;
115 addr.offset = OUTPUT_DATA_OFFSET + offset;
116 return addr;
117 }
118
in_mem64(uint32_t offset)119 inline mi_value in_mem64(uint32_t offset)
120 {
121 return mi_mem64(in_addr(offset));
122 }
123
in_mem32(uint32_t offset)124 inline mi_value in_mem32(uint32_t offset)
125 {
126 return mi_mem32(in_addr(offset));
127 }
128
out_mem64(uint32_t offset)129 inline mi_value out_mem64(uint32_t offset)
130 {
131 return mi_mem64(out_addr(offset));
132 }
133
out_mem32(uint32_t offset)134 inline mi_value out_mem32(uint32_t offset)
135 {
136 return mi_mem32(out_addr(offset));
137 }
138
139 int fd = -1;
140 intel_device_info devinfo;
141
142 uint32_t batch_bo_handle = 0;
143 uint64_t batch_bo_addr;
144 uint32_t batch_offset;
145 void *batch_map = NULL;
146
147 struct {
148 uint32_t vm_id = 0;
149 uint32_t queue_id = 0;
150 } xe;
151
152 struct {
153 uint32_t ctx_id = 0;
154 #if GFX_VER < 8
155 std::vector<drm_i915_gem_relocation_entry> relocs;
156 #endif
157 } i915;
158
159 uint32_t data_bo_handle = 0;
160 uint64_t data_bo_addr;
161 void *data_map = NULL;
162
163 char *input;
164 char *output;
165 uint64_t canary;
166
167 bool write_fence_status;
168
169 mi_builder b;
170 };
171
172 // 1 MB of batch should be enough for anyone, right?
173 #define BATCH_BO_SIZE (256 * 4096)
174 #define DATA_BO_SIZE 4096
175
176 void
SetUp()177 mi_builder_test::SetUp()
178 {
179 drmDevicePtr devices[8];
180 int max_devices = drmGetDevices2(0, devices, 8);
181 ASSERT_GT(max_devices, 0);
182
183 int i;
184 for (i = 0; i < max_devices; i++) {
185 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
186 devices[i]->bustype == DRM_BUS_PCI &&
187 devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
188 fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
189 if (fd < 0)
190 continue;
191
192 if (intel_get_kmd_type(fd) == INTEL_KMD_TYPE_I915) {
193 /* We don't really need to do this when running on hardware because
194 * we can just pull it from the drmDevice. However, without doing
195 * this, intel_dump_gpu gets a bit of heartburn and we can't use the
196 * --device option with it.
197 */
198 int device_id;
199 ASSERT_TRUE(intel_gem_get_param(fd, I915_PARAM_CHIPSET_ID, &device_id))
200 << strerror(errno);
201 }
202
203 ASSERT_TRUE(intel_get_device_info_from_fd(fd, &devinfo, -1, -1));
204 if (devinfo.ver != GFX_VER ||
205 (devinfo.platform == INTEL_PLATFORM_HSW) != (GFX_VERx10 == 75)) {
206 close(fd);
207 fd = -1;
208 continue;
209 }
210
211 /* Found a device! */
212 break;
213 }
214 }
215
216 drmFreeDevices(devices, max_devices);
217 ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device";
218 drmFreeDevices(devices, max_devices);
219
220 if (devinfo.kmd_type == INTEL_KMD_TYPE_I915) {
221 ASSERT_TRUE(intel_gem_create_context(fd, &i915.ctx_id)) << strerror(errno);
222
223 if (GFX_VER >= 8) {
224 /* On gfx8+, we require softpin */
225 int has_softpin;
226 ASSERT_TRUE(intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &has_softpin))
227 << strerror(errno);
228 ASSERT_TRUE(has_softpin);
229 }
230
231 // Create the batch buffer
232 drm_i915_gem_create gem_create = drm_i915_gem_create();
233 gem_create.size = BATCH_BO_SIZE;
234 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
235 (void *)&gem_create), 0) << strerror(errno);
236 batch_bo_handle = gem_create.handle;
237 #if GFX_VER >= 8
238 batch_bo_addr = 0xffffffffdff70000ULL;
239 #endif
240
241 if (devinfo.has_caching_uapi) {
242 drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
243 gem_caching.handle = batch_bo_handle;
244 gem_caching.caching = I915_CACHING_CACHED;
245 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
246 (void *)&gem_caching), 0) << strerror(errno);
247 }
248
249 if (devinfo.has_mmap_offset) {
250 drm_i915_gem_mmap_offset gem_mmap_offset = drm_i915_gem_mmap_offset();
251 gem_mmap_offset.handle = batch_bo_handle;
252 gem_mmap_offset.flags = devinfo.has_local_mem ?
253 I915_MMAP_OFFSET_FIXED :
254 I915_MMAP_OFFSET_WC;
255 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET,
256 &gem_mmap_offset), 0) << strerror(errno);
257
258 batch_map = mmap(NULL, BATCH_BO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
259 fd, gem_mmap_offset.offset);
260 ASSERT_NE(batch_map, MAP_FAILED) << strerror(errno);
261 } else {
262 drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
263 gem_mmap.handle = batch_bo_handle;
264 gem_mmap.offset = 0;
265 gem_mmap.size = BATCH_BO_SIZE;
266 gem_mmap.flags = 0;
267 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
268 (void *)&gem_mmap), 0) << strerror(errno);
269 batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
270 }
271
272 // Create the data buffer
273 gem_create = drm_i915_gem_create();
274 gem_create.size = DATA_BO_SIZE;
275 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
276 (void *)&gem_create), 0) << strerror(errno);
277 data_bo_handle = gem_create.handle;
278 #if GFX_VER >= 8
279 data_bo_addr = 0xffffffffefff0000ULL;
280 #endif
281
282 if (devinfo.has_caching_uapi) {
283 drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
284 gem_caching.handle = data_bo_handle;
285 gem_caching.caching = I915_CACHING_CACHED;
286 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
287 (void *)&gem_caching), 0) << strerror(errno);
288 }
289
290 if (devinfo.has_mmap_offset) {
291 drm_i915_gem_mmap_offset gem_mmap_offset = drm_i915_gem_mmap_offset();
292 gem_mmap_offset.handle = data_bo_handle;
293 gem_mmap_offset.flags = devinfo.has_local_mem ?
294 I915_MMAP_OFFSET_FIXED :
295 I915_MMAP_OFFSET_WC;
296 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET,
297 &gem_mmap_offset), 0) << strerror(errno);
298
299 data_map = mmap(NULL, DATA_BO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
300 fd, gem_mmap_offset.offset);
301 ASSERT_NE(data_map, MAP_FAILED) << strerror(errno);
302 } else {
303 drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
304 gem_mmap.handle = data_bo_handle;
305 gem_mmap.offset = 0;
306 gem_mmap.size = DATA_BO_SIZE;
307 gem_mmap.flags = 0;
308 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
309 (void *)&gem_mmap), 0) << strerror(errno);
310 data_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
311 }
312 } else {
313 assert(devinfo.kmd_type == INTEL_KMD_TYPE_XE);
314
315 int err;
316
317 struct drm_xe_vm_create create = {
318 .flags = DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE,
319 };
320 err = intel_ioctl(fd, DRM_IOCTL_XE_VM_CREATE, &create);
321 ASSERT_EQ(err, 0) << strerror(err);
322 xe.vm_id = create.vm_id;
323
324 struct drm_xe_engine_class_instance instance = {};
325
326 struct intel_query_engine_info *engines_info = xe_engine_get_info(fd);
327 assert(engines_info);
328
329 bool found_engine = false;
330 for (uint32_t i = 0; i < engines_info->num_engines; i++) {
331 struct intel_engine_class_instance *e = &engines_info->engines[i];
332 if (e->engine_class == INTEL_ENGINE_CLASS_RENDER) {
333 instance.engine_class = DRM_XE_ENGINE_CLASS_RENDER;
334 instance.engine_instance = e->engine_instance;
335 instance.gt_id = e->gt_id;
336 found_engine = true;
337 break;
338 }
339 }
340 free(engines_info);
341 ASSERT_TRUE(found_engine);
342
343 struct drm_xe_exec_queue_create queue_create = {
344 .width = 1,
345 .num_placements = 1,
346 .vm_id = xe.vm_id,
347 .instances = (uintptr_t)&instance,
348 };
349 err = intel_ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &queue_create);
350 ASSERT_EQ(err, 0) << strerror(err);
351 xe.queue_id = queue_create.exec_queue_id;
352
353 // Create the batch buffer.
354 {
355 struct drm_xe_gem_create gem_create = {
356 .size = BATCH_BO_SIZE,
357 .placement = 1u << devinfo.mem.sram.mem.instance,
358 .cpu_caching = DRM_XE_GEM_CPU_CACHING_WB,
359 };
360 err = intel_ioctl(fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create);
361 ASSERT_EQ(err, 0) << strerror(err);
362 batch_bo_handle = gem_create.handle;
363 batch_bo_addr = 0x10000000;
364
365 struct drm_xe_gem_mmap_offset mm = {
366 .handle = batch_bo_handle,
367 };
368 err = intel_ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mm);
369 ASSERT_EQ(err, 0) << strerror(err);
370 batch_map = mmap(NULL, BATCH_BO_SIZE, PROT_READ | PROT_WRITE,
371 MAP_SHARED, fd, mm.offset);
372 ASSERT_NE(batch_map, MAP_FAILED) << strerror(errno);
373 }
374
375 // Create the data buffer.
376 {
377 struct drm_xe_gem_create gem_create = {
378 .size = DATA_BO_SIZE,
379 .placement = 1u << devinfo.mem.sram.mem.instance,
380 .cpu_caching = DRM_XE_GEM_CPU_CACHING_WB,
381 };
382 err = intel_ioctl(fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create);
383 ASSERT_EQ(err, 0) << strerror(err);
384 data_bo_handle = gem_create.handle;
385 data_bo_addr = 0x20000000;
386
387 struct drm_xe_gem_mmap_offset mm = {
388 .handle = data_bo_handle,
389 };
390 err = intel_ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mm);
391 ASSERT_EQ(err, 0) << strerror(err);
392 data_map = mmap(NULL, DATA_BO_SIZE, PROT_READ | PROT_WRITE,
393 MAP_SHARED, fd, mm.offset);
394 ASSERT_NE(data_map, MAP_FAILED) << strerror(errno);
395 }
396 }
397
398 // Start the batch at zero
399 batch_offset = 0;
400
401 input = (char *)data_map + INPUT_DATA_OFFSET;
402 output = (char *)data_map + OUTPUT_DATA_OFFSET;
403
404 // Fill the test data with garbage
405 memset(data_map, 139, DATA_BO_SIZE);
406 memset(&canary, 139, sizeof(canary));
407
408 write_fence_status = false;
409
410 struct isl_device isl_dev;
411 isl_device_init(&isl_dev, &devinfo);
412 mi_builder_init(&b, &devinfo, this);
413 const uint32_t mocs = isl_mocs(&isl_dev, 0, false);
414 mi_builder_set_mocs(&b, mocs);
415 }
416
417 void
TearDown()418 mi_builder_test::TearDown()
419 {
420 int err;
421
422 if (data_map) {
423 err = munmap(data_map, DATA_BO_SIZE);
424 EXPECT_EQ(err, 0) << "unmap data bo failed";
425 }
426
427 if (data_bo_handle) {
428 struct drm_gem_close gem_close = { .handle = data_bo_handle };
429 err = intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
430 EXPECT_EQ(err, 0) << "close data bo failed";
431 }
432
433 if (batch_map) {
434 err = munmap(batch_map, BATCH_BO_SIZE);
435 EXPECT_EQ(err, 0) << "unmmap batch bo failed";
436 }
437
438 if (batch_bo_handle) {
439 struct drm_gem_close gem_close = { .handle = batch_bo_handle };
440 err = intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
441 EXPECT_EQ(err, 0) << "close batch bo failed";
442 }
443
444 if (devinfo.kmd_type == INTEL_KMD_TYPE_I915) {
445 if (i915.ctx_id) {
446 struct drm_i915_gem_context_destroy destroy = {
447 .ctx_id = i915.ctx_id,
448 };
449 err = intel_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
450 EXPECT_EQ(err, 0) << "context destroy failed";
451 }
452 } else {
453 assert(devinfo.kmd_type == INTEL_KMD_TYPE_XE);
454
455 if (xe.queue_id) {
456 struct drm_xe_exec_queue_destroy queue_destroy = {
457 .exec_queue_id = xe.queue_id,
458 };
459 err = intel_ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &queue_destroy);
460 EXPECT_EQ(err, 0) << "queue_destroy failure";
461 }
462
463 if (xe.vm_id) {
464 struct drm_xe_vm_destroy destroy = {
465 .vm_id = xe.vm_id,
466 };
467 err = intel_ioctl(fd, DRM_IOCTL_XE_VM_DESTROY, &destroy);
468 EXPECT_EQ(err, 0) << "vm_destroy failure";
469 }
470 }
471
472 if (fd != -1)
473 close(fd);
474 }
475
476 void *
emit_dwords(int num_dwords)477 mi_builder_test::emit_dwords(int num_dwords)
478 {
479 void *ptr = (void *)((char *)batch_map + batch_offset);
480 batch_offset += num_dwords * 4;
481 assert(batch_offset < BATCH_BO_SIZE);
482 return ptr;
483 }
484
485 void
submit_batch()486 mi_builder_test::submit_batch()
487 {
488 mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
489
490 // Round batch up to an even number of dwords.
491 if (batch_offset & 4)
492 mi_builder_emit(&b, GENX(MI_NOOP), noop);
493
494 if (devinfo.kmd_type == INTEL_KMD_TYPE_I915) {
495 drm_i915_gem_exec_object2 objects[2];
496 memset(objects, 0, sizeof(objects));
497
498 objects[0].handle = data_bo_handle;
499 objects[0].relocation_count = 0;
500 objects[0].relocs_ptr = 0;
501 #if GFX_VER >= 8 /* On gfx8+, we pin everything */
502 objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
503 EXEC_OBJECT_PINNED |
504 EXEC_OBJECT_WRITE;
505 objects[0].offset = data_bo_addr;
506 #else
507 objects[0].flags = EXEC_OBJECT_WRITE;
508 objects[0].offset = -1;
509 #endif
510
511 objects[1].handle = batch_bo_handle;
512 #if GFX_VER >= 8 /* On gfx8+, we don't use relocations */
513 objects[1].relocation_count = 0;
514 objects[1].relocs_ptr = 0;
515 objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
516 EXEC_OBJECT_PINNED;
517 objects[1].offset = batch_bo_addr;
518 #else
519 objects[1].relocation_count = i915.relocs.size();
520 objects[1].relocs_ptr = (uintptr_t)(void *)&i915.relocs[0];
521 objects[1].flags = 0;
522 objects[1].offset = -1;
523 #endif
524
525 drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2();
526 execbuf.buffers_ptr = (uintptr_t)(void *)objects;
527 execbuf.buffer_count = 2;
528 execbuf.batch_start_offset = 0;
529 execbuf.batch_len = batch_offset;
530 execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER;
531 execbuf.rsvd1 = i915.ctx_id;
532
533 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
534 (void *)&execbuf), 0) << strerror(errno);
535
536 drm_i915_gem_wait gem_wait = drm_i915_gem_wait();
537 gem_wait.bo_handle = batch_bo_handle;
538 gem_wait.timeout_ns = INT64_MAX;
539 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT,
540 (void *)&gem_wait), 0) << strerror(errno);
541 } else {
542 assert(devinfo.kmd_type == INTEL_KMD_TYPE_XE);
543
544 int err;
545
546 uint32_t sync_handles[2] = {};
547 for (int i = 0; i < 2; i++) {
548 struct drm_syncobj_create sync_create = {};
549 err = intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &sync_create);
550 ASSERT_EQ(err, 0) << strerror(err);
551 sync_handles[i] = sync_create.handle;
552 }
553
554 struct drm_xe_vm_bind_op bind_ops[] = {
555 {
556 .obj = batch_bo_handle,
557 .pat_index = devinfo.pat.cached_coherent.index,
558 .range = BATCH_BO_SIZE,
559 .addr = batch_bo_addr,
560 .op = DRM_XE_VM_BIND_OP_MAP,
561 .flags = DRM_XE_VM_BIND_FLAG_READONLY,
562 },
563 {
564 .obj = data_bo_handle,
565 .pat_index = devinfo.pat.cached_coherent.index,
566 .range = DATA_BO_SIZE,
567 .addr = data_bo_addr,
568 .op = DRM_XE_VM_BIND_OP_MAP,
569 },
570 };
571
572 struct drm_xe_sync bind_syncs[] = {
573 {
574 .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
575 .flags = DRM_XE_SYNC_FLAG_SIGNAL,
576 .handle = sync_handles[0],
577 },
578 };
579
580 struct drm_xe_vm_bind bind = {
581 .vm_id = xe.vm_id,
582 .num_binds = ARRAY_SIZE(bind_ops),
583 .vector_of_binds = (uintptr_t)bind_ops,
584 .num_syncs = 1,
585 .syncs = (uintptr_t)bind_syncs,
586 };
587
588 err = intel_ioctl(fd, DRM_IOCTL_XE_VM_BIND, &bind);
589 ASSERT_EQ(err, 0) << strerror(err);
590
591 struct drm_xe_sync exec_syncs[] = {
592 {
593 .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
594 .handle = sync_handles[0],
595 },
596 {
597 .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
598 .flags = DRM_XE_SYNC_FLAG_SIGNAL,
599 .handle = sync_handles[1],
600 }
601 };
602
603 struct drm_xe_exec exec = {
604 .exec_queue_id = xe.queue_id,
605 .num_syncs = 2,
606 .syncs = (uintptr_t)exec_syncs,
607 .address = batch_bo_addr,
608 .num_batch_buffer = 1,
609 };
610 err = intel_ioctl(fd, DRM_IOCTL_XE_EXEC, &exec);
611 ASSERT_EQ(err, 0) << strerror(err);
612
613 struct drm_syncobj_wait wait = {
614 .handles = (uintptr_t)&sync_handles[1],
615 .timeout_nsec = INT64_MAX,
616 .count_handles = 1,
617 };
618 err = intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait);
619 ASSERT_EQ(err, 0) << strerror(err);
620 }
621 }
622
623 uint64_t
__gen_combine_address(mi_builder_test * test,void * location,address addr,uint32_t delta)624 __gen_combine_address(mi_builder_test *test, void *location,
625 address addr, uint32_t delta)
626 {
627 #if GFX_VER >= 8
628 uint64_t addr_u64 = addr.gem_handle == test->data_bo_handle ?
629 test->data_bo_addr : test->batch_bo_addr;
630 return addr_u64 + addr.offset + delta;
631 #else
632 assert(test->devinfo.kmd_type == INTEL_KMD_TYPE_I915);
633 drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();
634 reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1;
635 reloc.delta = addr.offset + delta;
636 reloc.offset = (char *)location - (char *)test->batch_map;
637 reloc.presumed_offset = -1;
638 test->i915.relocs.push_back(reloc);
639
640 return reloc.delta;
641 #endif
642 }
643
644 bool *
__gen_get_write_fencing_status(mi_builder_test * test)645 __gen_get_write_fencing_status(mi_builder_test *test)
646 {
647 return &test->write_fence_status;
648 }
649
650 void *
__gen_get_batch_dwords(mi_builder_test * test,unsigned num_dwords)651 __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)
652 {
653 return test->emit_dwords(num_dwords);
654 }
655
656 struct address
__gen_get_batch_address(mi_builder_test * test,void * location)657 __gen_get_batch_address(mi_builder_test *test, void *location)
658 {
659 assert(location >= test->batch_map);
660 size_t offset = (char *)location - (char *)test->batch_map;
661 assert(offset < BATCH_BO_SIZE);
662 assert(offset <= UINT32_MAX);
663
664 return (struct address) {
665 .gem_handle = test->batch_bo_handle,
666 .offset = (uint32_t)offset,
667 };
668 }
669
670 #include "genxml/genX_pack.h"
671 #include "mi_builder.h"
672
TEST_F(mi_builder_test,imm_mem)673 TEST_F(mi_builder_test, imm_mem)
674 {
675 const uint64_t value = 0x0123456789abcdef;
676
677 mi_store(&b, out_mem64(0), mi_imm(value));
678 mi_store(&b, out_mem32(8), mi_imm(value));
679
680 submit_batch();
681
682 // 64 -> 64
683 EXPECT_EQ(*(uint64_t *)(output + 0), value);
684
685 // 64 -> 32
686 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
687 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
688 }
689
690 /* mem -> mem copies are only supported on HSW+ */
691 #if GFX_VERx10 >= 75
TEST_F(mi_builder_test,mem_mem)692 TEST_F(mi_builder_test, mem_mem)
693 {
694 const uint64_t value = 0x0123456789abcdef;
695 *(uint64_t *)input = value;
696
697 mi_store(&b, out_mem64(0), in_mem64(0));
698 mi_store(&b, out_mem32(8), in_mem64(0));
699 mi_store(&b, out_mem32(16), in_mem32(0));
700 mi_store(&b, out_mem64(24), in_mem32(0));
701
702 submit_batch();
703
704 // 64 -> 64
705 EXPECT_EQ(*(uint64_t *)(output + 0), value);
706
707 // 64 -> 32
708 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
709 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
710
711 // 32 -> 32
712 EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
713 EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
714
715 // 32 -> 64
716 EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
717 }
718 #endif
719
TEST_F(mi_builder_test,imm_reg)720 TEST_F(mi_builder_test, imm_reg)
721 {
722 const uint64_t value = 0x0123456789abcdef;
723
724 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
725 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value));
726 mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
727
728 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
729 mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value));
730 mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
731
732 submit_batch();
733
734 // 64 -> 64
735 EXPECT_EQ(*(uint64_t *)(output + 0), value);
736
737 // 64 -> 32
738 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
739 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
740 }
741
TEST_F(mi_builder_test,mem_reg)742 TEST_F(mi_builder_test, mem_reg)
743 {
744 const uint64_t value = 0x0123456789abcdef;
745 *(uint64_t *)input = value;
746
747 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
748 mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0));
749 mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
750
751 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
752 mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0));
753 mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
754
755 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
756 mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0));
757 mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG));
758
759 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
760 mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0));
761 mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG));
762
763 submit_batch();
764
765 // 64 -> 64
766 EXPECT_EQ(*(uint64_t *)(output + 0), value);
767
768 // 64 -> 32
769 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
770 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
771
772 // 32 -> 32
773 EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
774 EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
775
776 // 32 -> 64
777 EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
778 }
779
TEST_F(mi_builder_test,memset)780 TEST_F(mi_builder_test, memset)
781 {
782 const unsigned memset_size = 256;
783
784 mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
785
786 submit_batch();
787
788 uint32_t *out_u32 = (uint32_t *)output;
789 for (unsigned i = 0; i < memset_size / sizeof(*out_u32); i++)
790 EXPECT_EQ(out_u32[i], 0xdeadbeef);
791 }
792
TEST_F(mi_builder_test,memcpy)793 TEST_F(mi_builder_test, memcpy)
794 {
795 const unsigned memcpy_size = 256;
796
797 uint8_t *in_u8 = (uint8_t *)input;
798 for (unsigned i = 0; i < memcpy_size; i++)
799 in_u8[i] = i;
800
801 mi_memcpy(&b, out_addr(0), in_addr(0), 256);
802
803 submit_batch();
804
805 uint8_t *out_u8 = (uint8_t *)output;
806 for (unsigned i = 0; i < memcpy_size; i++)
807 EXPECT_EQ(out_u8[i], i);
808 }
809
810 /* Start of MI_MATH section */
811 #if GFX_VERx10 >= 75
812
813 #define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm))
814
TEST_F(mi_builder_test,inot)815 TEST_F(mi_builder_test, inot)
816 {
817 const uint64_t value = 0x0123456789abcdef;
818 const uint32_t value_lo = (uint32_t)value;
819 const uint32_t value_hi = (uint32_t)(value >> 32);
820 memcpy(input, &value, sizeof(value));
821
822 mi_store(&b, out_mem64(0), mi_inot(&b, in_mem64(0)));
823 mi_store(&b, out_mem64(8), mi_inot(&b, mi_inot(&b, in_mem64(0))));
824 mi_store(&b, out_mem64(16), mi_inot(&b, in_mem32(0)));
825 mi_store(&b, out_mem64(24), mi_inot(&b, in_mem32(4)));
826 mi_store(&b, out_mem32(32), mi_inot(&b, in_mem64(0)));
827 mi_store(&b, out_mem32(36), mi_inot(&b, in_mem32(0)));
828 mi_store(&b, out_mem32(40), mi_inot(&b, mi_inot(&b, in_mem32(0))));
829 mi_store(&b, out_mem32(44), mi_inot(&b, in_mem32(4)));
830
831 submit_batch();
832
833 EXPECT_EQ(*(uint64_t *)(output + 0), ~value);
834 EXPECT_EQ(*(uint64_t *)(output + 8), value);
835 EXPECT_EQ(*(uint64_t *)(output + 16), ~(uint64_t)value_lo);
836 EXPECT_EQ(*(uint64_t *)(output + 24), ~(uint64_t)value_hi);
837 EXPECT_EQ(*(uint32_t *)(output + 32), (uint32_t)~value);
838 EXPECT_EQ(*(uint32_t *)(output + 36), (uint32_t)~value_lo);
839 EXPECT_EQ(*(uint32_t *)(output + 40), (uint32_t)value_lo);
840 EXPECT_EQ(*(uint32_t *)(output + 44), (uint32_t)~value_hi);
841 }
842
843 /* Test adding of immediates of all kinds including
844 *
845 * - All zeroes
846 * - All ones
847 * - inverted constants
848 */
TEST_F(mi_builder_test,add_imm)849 TEST_F(mi_builder_test, add_imm)
850 {
851 const uint64_t value = 0x0123456789abcdef;
852 const uint64_t add = 0xdeadbeefac0ffee2;
853 memcpy(input, &value, sizeof(value));
854
855 mi_store(&b, out_mem64(0),
856 mi_iadd(&b, in_mem64(0), mi_imm(0)));
857 mi_store(&b, out_mem64(8),
858 mi_iadd(&b, in_mem64(0), mi_imm(-1)));
859 mi_store(&b, out_mem64(16),
860 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0))));
861 mi_store(&b, out_mem64(24),
862 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1))));
863 mi_store(&b, out_mem64(32),
864 mi_iadd(&b, in_mem64(0), mi_imm(add)));
865 mi_store(&b, out_mem64(40),
866 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add))));
867 mi_store(&b, out_mem64(48),
868 mi_iadd(&b, mi_imm(0), in_mem64(0)));
869 mi_store(&b, out_mem64(56),
870 mi_iadd(&b, mi_imm(-1), in_mem64(0)));
871 mi_store(&b, out_mem64(64),
872 mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0)));
873 mi_store(&b, out_mem64(72),
874 mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0)));
875 mi_store(&b, out_mem64(80),
876 mi_iadd(&b, mi_imm(add), in_mem64(0)));
877 mi_store(&b, out_mem64(88),
878 mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));
879
880 // And some add_imm just for good measure
881 mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));
882 mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));
883
884 submit_batch();
885
886 EXPECT_EQ(*(uint64_t *)(output + 0), value);
887 EXPECT_EQ(*(uint64_t *)(output + 8), value - 1);
888 EXPECT_EQ(*(uint64_t *)(output + 16), value - 1);
889 EXPECT_EQ(*(uint64_t *)(output + 24), value);
890 EXPECT_EQ(*(uint64_t *)(output + 32), value + add);
891 EXPECT_EQ(*(uint64_t *)(output + 40), value + ~add);
892 EXPECT_EQ(*(uint64_t *)(output + 48), value);
893 EXPECT_EQ(*(uint64_t *)(output + 56), value - 1);
894 EXPECT_EQ(*(uint64_t *)(output + 64), value - 1);
895 EXPECT_EQ(*(uint64_t *)(output + 72), value);
896 EXPECT_EQ(*(uint64_t *)(output + 80), value + add);
897 EXPECT_EQ(*(uint64_t *)(output + 88), value + ~add);
898 EXPECT_EQ(*(uint64_t *)(output + 96), value);
899 EXPECT_EQ(*(uint64_t *)(output + 104), value + add);
900 }
901
TEST_F(mi_builder_test,ult_uge_ieq_ine)902 TEST_F(mi_builder_test, ult_uge_ieq_ine)
903 {
904 uint64_t values[8] = {
905 0x0123456789abcdef,
906 0xdeadbeefac0ffee2,
907 (uint64_t)-1,
908 1,
909 0,
910 1049571,
911 (uint64_t)-240058,
912 20204184,
913 };
914 memcpy(input, values, sizeof(values));
915
916 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
917 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
918 mi_store(&b, out_mem64(i * 256 + j * 32 + 0),
919 mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
920 mi_store(&b, out_mem64(i * 256 + j * 32 + 8),
921 mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
922 mi_store(&b, out_mem64(i * 256 + j * 32 + 16),
923 mi_ieq(&b, in_mem64(i * 8), in_mem64(j * 8)));
924 mi_store(&b, out_mem64(i * 256 + j * 32 + 24),
925 mi_ine(&b, in_mem64(i * 8), in_mem64(j * 8)));
926 }
927 }
928
929 submit_batch();
930
931 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
932 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
933 uint64_t *out_u64 = (uint64_t *)(output + i * 256 + j * 32);
934 EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]),
935 mi_imm(values[j])));
936 EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]),
937 mi_imm(values[j])));
938 EXPECT_EQ_IMM(out_u64[2], mi_ieq(&b, mi_imm(values[i]),
939 mi_imm(values[j])));
940 EXPECT_EQ_IMM(out_u64[3], mi_ine(&b, mi_imm(values[i]),
941 mi_imm(values[j])));
942 }
943 }
944 }
945
TEST_F(mi_builder_test,z_nz)946 TEST_F(mi_builder_test, z_nz)
947 {
948 uint64_t values[8] = {
949 0,
950 1,
951 UINT32_MAX,
952 UINT32_MAX + 1,
953 UINT64_MAX,
954 };
955 memcpy(input, values, sizeof(values));
956
957 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
958 mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8)));
959 mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8)));
960 }
961
962 submit_batch();
963
964 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
965 uint64_t *out_u64 = (uint64_t *)(output + i * 16);
966 EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i])));
967 EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i])));
968 }
969 }
970
TEST_F(mi_builder_test,iand)971 TEST_F(mi_builder_test, iand)
972 {
973 const uint64_t values[2] = {
974 0x0123456789abcdef,
975 0xdeadbeefac0ffee2,
976 };
977 memcpy(input, values, sizeof(values));
978
979 mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8)));
980
981 submit_batch();
982
983 EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]),
984 mi_imm(values[1])));
985 }
986
987 #if GFX_VER >= 8
TEST_F(mi_builder_test,imm_mem_relocated)988 TEST_F(mi_builder_test, imm_mem_relocated)
989 {
990 const uint64_t value = 0x0123456789abcdef;
991
992 struct mi_reloc_imm_token r0 = mi_store_relocated_imm(&b, out_mem64(0));
993 struct mi_reloc_imm_token r1 = mi_store_relocated_imm(&b, out_mem32(8));
994
995 mi_relocate_store_imm(r0, value);
996 mi_relocate_store_imm(r1, value);
997
998 submit_batch();
999
1000 // 64 -> 64
1001 EXPECT_EQ(*(uint64_t *)(output + 0), value);
1002
1003 // 64 -> 32
1004 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
1005 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
1006 }
1007
TEST_F(mi_builder_test,imm_reg_relocated)1008 TEST_F(mi_builder_test, imm_reg_relocated)
1009 {
1010 const uint64_t value = 0x0123456789abcdef;
1011
1012 struct mi_reloc_imm_token r0, r1;
1013
1014 r0 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
1015 r1 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
1016 mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
1017
1018 mi_relocate_store_imm(r0, canary);
1019 mi_relocate_store_imm(r1, value);
1020
1021 r0 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
1022 r1 = mi_store_relocated_imm(&b, mi_reg32(RSVD_TEMP_REG));
1023 mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
1024
1025 mi_relocate_store_imm(r0, canary);
1026 mi_relocate_store_imm(r1, value);
1027
1028 submit_batch();
1029
1030 // 64 -> 64
1031 EXPECT_EQ(*(uint64_t *)(output + 0), value);
1032
1033 // 64 -> 32
1034 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
1035 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
1036 }
1037 #endif // GFX_VER >= 8
1038
1039 #if GFX_VERx10 >= 125
TEST_F(mi_builder_test,ishl)1040 TEST_F(mi_builder_test, ishl)
1041 {
1042 const uint64_t value = 0x0123456789abcdef;
1043 memcpy(input, &value, sizeof(value));
1044
1045 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
1046 memcpy(input + 8, shifts, sizeof(shifts));
1047
1048 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
1049 mi_store(&b, out_mem64(i * 8),
1050 mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4)));
1051 }
1052
1053 submit_batch();
1054
1055 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
1056 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
1057 mi_ishl(&b, mi_imm(value), mi_imm(shifts[i])));
1058 }
1059 }
1060
TEST_F(mi_builder_test,ushr)1061 TEST_F(mi_builder_test, ushr)
1062 {
1063 const uint64_t value = 0x0123456789abcdef;
1064 memcpy(input, &value, sizeof(value));
1065
1066 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
1067 memcpy(input + 8, shifts, sizeof(shifts));
1068
1069 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
1070 mi_store(&b, out_mem64(i * 8),
1071 mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4)));
1072 }
1073
1074 submit_batch();
1075
1076 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
1077 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
1078 mi_ushr(&b, mi_imm(value), mi_imm(shifts[i])));
1079 }
1080 }
1081
TEST_F(mi_builder_test,ushr_imm)1082 TEST_F(mi_builder_test, ushr_imm)
1083 {
1084 const uint64_t value = 0x0123456789abcdef;
1085 memcpy(input, &value, sizeof(value));
1086
1087 const unsigned max_shift = 64;
1088
1089 for (unsigned i = 0; i <= max_shift; i++)
1090 mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i));
1091
1092 submit_batch();
1093
1094 for (unsigned i = 0; i <= max_shift; i++) {
1095 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
1096 mi_ushr_imm(&b, mi_imm(value), i));
1097 }
1098 }
1099
TEST_F(mi_builder_test,ishr)1100 TEST_F(mi_builder_test, ishr)
1101 {
1102 const uint64_t values[] = {
1103 0x0123456789abcdef,
1104 0xfedcba9876543210,
1105 };
1106 memcpy(input, values, sizeof(values));
1107
1108 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
1109 memcpy(input + 16, shifts, sizeof(shifts));
1110
1111 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
1112 for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
1113 mi_store(&b, out_mem64(i * 8 + j * 16),
1114 mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4)));
1115 }
1116 }
1117
1118 submit_batch();
1119
1120 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
1121 for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
1122 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16),
1123 mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j])));
1124 }
1125 }
1126 }
1127
TEST_F(mi_builder_test,ishr_imm)1128 TEST_F(mi_builder_test, ishr_imm)
1129 {
1130 const uint64_t value = 0x0123456789abcdef;
1131 memcpy(input, &value, sizeof(value));
1132
1133 const unsigned max_shift = 64;
1134
1135 for (unsigned i = 0; i <= max_shift; i++)
1136 mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i));
1137
1138 submit_batch();
1139
1140 for (unsigned i = 0; i <= max_shift; i++) {
1141 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
1142 mi_ishr_imm(&b, mi_imm(value), i));
1143 }
1144 }
1145 #endif /* if GFX_VERx10 >= 125 */
1146
TEST_F(mi_builder_test,imul_imm)1147 TEST_F(mi_builder_test, imul_imm)
1148 {
1149 uint64_t lhs[2] = {
1150 0x0123456789abcdef,
1151 0xdeadbeefac0ffee2,
1152 };
1153 memcpy(input, lhs, sizeof(lhs));
1154
1155 /* Some random 32-bit unsigned integers. The first four have been
1156 * hand-chosen just to ensure some good low integers; the rest were
1157 * generated with a python script.
1158 */
1159 uint32_t rhs[20] = {
1160 1, 2, 3, 5,
1161 10800, 193, 64, 40,
1162 3796, 256, 88, 473,
1163 1421, 706, 175, 850,
1164 39, 38985, 1941, 17,
1165 };
1166
1167 for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
1168 for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
1169 mi_store(&b, out_mem64(i * 160 + j * 8),
1170 mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
1171 }
1172 }
1173
1174 submit_batch();
1175
1176 for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
1177 for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
1178 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8),
1179 mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j]));
1180 }
1181 }
1182 }
1183
TEST_F(mi_builder_test,ishl_imm)1184 TEST_F(mi_builder_test, ishl_imm)
1185 {
1186 const uint64_t value = 0x0123456789abcdef;
1187 memcpy(input, &value, sizeof(value));
1188
1189 const unsigned max_shift = 64;
1190
1191 for (unsigned i = 0; i <= max_shift; i++)
1192 mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i));
1193
1194 submit_batch();
1195
1196 for (unsigned i = 0; i <= max_shift; i++) {
1197 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
1198 mi_ishl_imm(&b, mi_imm(value), i));
1199 }
1200 }
1201
TEST_F(mi_builder_test,ushr32_imm)1202 TEST_F(mi_builder_test, ushr32_imm)
1203 {
1204 const uint64_t value = 0x0123456789abcdef;
1205 memcpy(input, &value, sizeof(value));
1206
1207 const unsigned max_shift = 64;
1208
1209 for (unsigned i = 0; i <= max_shift; i++)
1210 mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i));
1211
1212 submit_batch();
1213
1214 for (unsigned i = 0; i <= max_shift; i++) {
1215 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
1216 mi_ushr32_imm(&b, mi_imm(value), i));
1217 }
1218 }
1219
TEST_F(mi_builder_test,udiv32_imm)1220 TEST_F(mi_builder_test, udiv32_imm)
1221 {
1222 /* Some random 32-bit unsigned integers. The first four have been
1223 * hand-chosen just to ensure some good low integers; the rest were
1224 * generated with a python script.
1225 */
1226 uint32_t values[20] = {
1227 1, 2, 3, 5,
1228 10800, 193, 64, 40,
1229 3796, 256, 88, 473,
1230 1421, 706, 175, 850,
1231 39, 38985, 1941, 17,
1232 };
1233 memcpy(input, values, sizeof(values));
1234
1235 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
1236 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
1237 mi_store(&b, out_mem32(i * 80 + j * 4),
1238 mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
1239 }
1240 }
1241
1242 submit_batch();
1243
1244 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
1245 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
1246 EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4),
1247 mi_udiv32_imm(&b, mi_imm(values[i]), values[j]));
1248 }
1249 }
1250 }
1251
TEST_F(mi_builder_test,store_if)1252 TEST_F(mi_builder_test, store_if)
1253 {
1254 uint64_t u64 = 0xb453b411deadc0deull;
1255 uint32_t u32 = 0x1337d00d;
1256
1257 /* Write values with the predicate enabled */
1258 emit_cmd(GENX(MI_PREDICATE), mip) {
1259 mip.LoadOperation = LOAD_LOAD;
1260 mip.CombineOperation = COMBINE_SET;
1261 mip.CompareOperation = COMPARE_TRUE;
1262 }
1263
1264 mi_store_if(&b, out_mem64(0), mi_imm(u64));
1265 mi_store_if(&b, out_mem32(8), mi_imm(u32));
1266
1267 /* Set predicate to false, write garbage that shouldn't land */
1268 emit_cmd(GENX(MI_PREDICATE), mip) {
1269 mip.LoadOperation = LOAD_LOAD;
1270 mip.CombineOperation = COMBINE_SET;
1271 mip.CompareOperation = COMPARE_FALSE;
1272 }
1273
1274 mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull));
1275 mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000));
1276
1277 submit_batch();
1278
1279 EXPECT_EQ(*(uint64_t *)(output + 0), u64);
1280 EXPECT_EQ(*(uint32_t *)(output + 8), u32);
1281 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
1282 }
1283
1284 #endif /* GFX_VERx10 >= 75 */
1285
1286 #if GFX_VERx10 >= 125
1287
1288 /*
1289 * Indirect load/store tests. Only available on XE_HP+
1290 */
1291
TEST_F(mi_builder_test,load_mem64_offset)1292 TEST_F(mi_builder_test, load_mem64_offset)
1293 {
1294 uint64_t values[8] = {
1295 0x0123456789abcdef,
1296 0xdeadbeefac0ffee2,
1297 (uint64_t)-1,
1298 1,
1299 0,
1300 1049571,
1301 (uint64_t)-240058,
1302 20204184,
1303 };
1304 memcpy(input, values, sizeof(values));
1305
1306 uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1307 memcpy(input + 64, offsets, sizeof(offsets));
1308
1309 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1310 mi_store(&b, out_mem64(i * 8),
1311 mi_load_mem64_offset(&b, in_addr(0), in_mem32(i * 4 + 64)));
1312 }
1313
1314 submit_batch();
1315
1316 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1317 EXPECT_EQ(*(uint64_t *)(output + i * 8), values[offsets[i] / 8]);
1318 }
1319
TEST_F(mi_builder_test,store_mem64_offset)1320 TEST_F(mi_builder_test, store_mem64_offset)
1321 {
1322 uint64_t values[8] = {
1323 0x0123456789abcdef,
1324 0xdeadbeefac0ffee2,
1325 (uint64_t)-1,
1326 1,
1327 0,
1328 1049571,
1329 (uint64_t)-240058,
1330 20204184,
1331 };
1332 memcpy(input, values, sizeof(values));
1333
1334 uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1335 memcpy(input + 64, offsets, sizeof(offsets));
1336
1337 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1338 mi_store_mem64_offset(&b, out_addr(0), in_mem32(i * 4 + 64),
1339 in_mem64(i * 8));
1340 }
1341
1342 submit_batch();
1343
1344 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1345 EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]);
1346 }
1347
1348 #endif /* GFX_VERx10 >= 125 */
1349
1350 #if GFX_VER >= 9
1351
1352 /*
1353 * Control-flow tests. Only available on Gfx9+
1354 */
1355
TEST_F(mi_builder_test,goto)1356 TEST_F(mi_builder_test, goto)
1357 {
1358 const uint64_t value = 0xb453b411deadc0deull;
1359
1360 mi_store(&b, out_mem64(0), mi_imm(value));
1361
1362 struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1363 mi_goto(&b, &t);
1364
1365 /* This one should be skipped */
1366 mi_store(&b, out_mem64(0), mi_imm(0));
1367
1368 mi_goto_target(&b, &t);
1369
1370 submit_batch();
1371
1372 EXPECT_EQ(*(uint64_t *)(output + 0), value);
1373 }
1374
1375 #define MI_PREDICATE_RESULT 0x2418
1376
TEST_F(mi_builder_test,goto_if)1377 TEST_F(mi_builder_test, goto_if)
1378 {
1379 const uint64_t values[] = {
1380 0xb453b411deadc0deull,
1381 0x0123456789abcdefull,
1382 0,
1383 };
1384
1385 mi_store(&b, out_mem64(0), mi_imm(values[0]));
1386
1387 emit_cmd(GENX(MI_PREDICATE), mip) {
1388 mip.LoadOperation = LOAD_LOAD;
1389 mip.CombineOperation = COMBINE_SET;
1390 mip.CompareOperation = COMPARE_FALSE;
1391 }
1392
1393 struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1394 mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1395
1396 mi_store(&b, out_mem64(0), mi_imm(values[1]));
1397
1398 emit_cmd(GENX(MI_PREDICATE), mip) {
1399 mip.LoadOperation = LOAD_LOAD;
1400 mip.CombineOperation = COMBINE_SET;
1401 mip.CompareOperation = COMPARE_TRUE;
1402 }
1403
1404 mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1405
1406 /* This one should be skipped */
1407 mi_store(&b, out_mem64(0), mi_imm(values[2]));
1408
1409 mi_goto_target(&b, &t);
1410
1411 submit_batch();
1412
1413 EXPECT_EQ(*(uint64_t *)(output + 0), values[1]);
1414 }
1415
TEST_F(mi_builder_test,loop_simple)1416 TEST_F(mi_builder_test, loop_simple)
1417 {
1418 const uint64_t loop_count = 8;
1419
1420 mi_store(&b, out_mem64(0), mi_imm(0));
1421
1422 mi_loop(&b) {
1423 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1424
1425 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1426 }
1427
1428 submit_batch();
1429
1430 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1431 }
1432
TEST_F(mi_builder_test,loop_break)1433 TEST_F(mi_builder_test, loop_break)
1434 {
1435 mi_loop(&b) {
1436 mi_store(&b, out_mem64(0), mi_imm(1));
1437
1438 mi_break_if(&b, mi_imm(0));
1439
1440 mi_store(&b, out_mem64(0), mi_imm(2));
1441
1442 mi_break(&b);
1443
1444 mi_store(&b, out_mem64(0), mi_imm(3));
1445 }
1446
1447 submit_batch();
1448
1449 EXPECT_EQ(*(uint64_t *)(output + 0), 2);
1450 }
1451
TEST_F(mi_builder_test,loop_continue)1452 TEST_F(mi_builder_test, loop_continue)
1453 {
1454 const uint64_t loop_count = 8;
1455
1456 mi_store(&b, out_mem64(0), mi_imm(0));
1457 mi_store(&b, out_mem64(8), mi_imm(0));
1458
1459 mi_loop(&b) {
1460 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1461
1462 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1463 mi_store(&b, out_mem64(8), mi_imm(5));
1464
1465 mi_continue(&b);
1466
1467 mi_store(&b, out_mem64(8), mi_imm(10));
1468 }
1469
1470 submit_batch();
1471
1472 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1473 EXPECT_EQ(*(uint64_t *)(output + 8), 5);
1474 }
1475
TEST_F(mi_builder_test,loop_continue_if)1476 TEST_F(mi_builder_test, loop_continue_if)
1477 {
1478 const uint64_t loop_count = 8;
1479
1480 mi_store(&b, out_mem64(0), mi_imm(0));
1481 mi_store(&b, out_mem64(8), mi_imm(0));
1482
1483 mi_loop(&b) {
1484 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1485
1486 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1487 mi_store(&b, out_mem64(8), mi_imm(5));
1488
1489 emit_cmd(GENX(MI_PREDICATE), mip) {
1490 mip.LoadOperation = LOAD_LOAD;
1491 mip.CombineOperation = COMBINE_SET;
1492 mip.CompareOperation = COMPARE_FALSE;
1493 }
1494
1495 mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1496
1497 mi_store(&b, out_mem64(8), mi_imm(10));
1498
1499 emit_cmd(GENX(MI_PREDICATE), mip) {
1500 mip.LoadOperation = LOAD_LOAD;
1501 mip.CombineOperation = COMBINE_SET;
1502 mip.CompareOperation = COMPARE_TRUE;
1503 }
1504
1505 mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1506
1507 mi_store(&b, out_mem64(8), mi_imm(15));
1508 }
1509
1510 submit_batch();
1511
1512 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1513 EXPECT_EQ(*(uint64_t *)(output + 8), 10);
1514 }
1515 #endif /* GFX_VER >= 9 */
1516