• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <fcntl.h>
25 #include <string.h>
26 #include <xf86drm.h>
27 #include <sys/mman.h>
28 
29 #include <gtest/gtest.h>
30 
31 #include "c99_compat.h"
32 #include "common/xe/intel_engine.h"
33 #include "common/intel_gem.h"
34 #include "dev/intel_debug.h"
35 #include "dev/intel_device_info.h"
36 #include "dev/intel_kmd.h"
37 #include "intel_gem.h"
38 #include "isl/isl.h"
39 #include "drm-uapi/i915_drm.h"
40 #include "drm-uapi/xe_drm.h"
41 #include "genxml/gen_macros.h"
42 #include "util/macros.h"
43 
44 class mi_builder_test;
45 
46 struct address {
47    uint32_t gem_handle;
48    uint32_t offset;
49 };
50 
51 #define __gen_address_type struct address
52 #define __gen_user_data ::mi_builder_test
53 
54 uint64_t __gen_combine_address(mi_builder_test *test, void *location,
55                                struct address addr, uint32_t delta);
56 void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);
57 struct address __gen_get_batch_address(mi_builder_test *test,
58                                        void *location);
59 bool *__gen_get_write_fencing_status(mi_builder_test *test);
60 
61 struct address
__gen_address_offset(address addr,uint64_t offset)62 __gen_address_offset(address addr, uint64_t offset)
63 {
64    addr.offset += offset;
65    return addr;
66 }
67 
68 #if GFX_VERx10 >= 75
69 #define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */
70 #else
71 #define RSVD_TEMP_REG 0x2430 /* GFX7_3DPRIM_START_VERTEX */
72 #endif
73 #define MI_BUILDER_NUM_ALLOC_GPRS 15
74 #define INPUT_DATA_OFFSET 0
75 #define OUTPUT_DATA_OFFSET 2048
76 
77 #define MI_BUILDER_CAN_WRITE_BATCH GFX_VER >= 8
78 
79 #define __genxml_cmd_length(cmd) cmd ## _length
80 #define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
81 #define __genxml_cmd_header(cmd) cmd ## _header
82 #define __genxml_cmd_pack(cmd) cmd ## _pack
83 
84 #include "genxml/genX_pack.h"
85 #include "mi_builder.h"
86 
87 #define emit_cmd(cmd, name)                                           \
88    for (struct cmd name = { __genxml_cmd_header(cmd) },               \
89         *_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \
90         __builtin_expect(_dst != NULL, 1);                            \
91         __genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL)
92 
93 #include <vector>
94 
95 class mi_builder_test : public ::testing::Test {
96 public:
97    void SetUp() override;
98    void TearDown() override;
99 
100    void *emit_dwords(int num_dwords);
101    void submit_batch();
102 
in_addr(uint32_t offset)103    inline address in_addr(uint32_t offset)
104    {
105       address addr;
106       addr.gem_handle = data_bo_handle;
107       addr.offset = INPUT_DATA_OFFSET + offset;
108       return addr;
109    }
110 
out_addr(uint32_t offset)111    inline address out_addr(uint32_t offset)
112    {
113       address addr;
114       addr.gem_handle = data_bo_handle;
115       addr.offset = OUTPUT_DATA_OFFSET + offset;
116       return addr;
117    }
118 
in_mem64(uint32_t offset)119    inline mi_value in_mem64(uint32_t offset)
120    {
121       return mi_mem64(in_addr(offset));
122    }
123 
in_mem32(uint32_t offset)124    inline mi_value in_mem32(uint32_t offset)
125    {
126       return mi_mem32(in_addr(offset));
127    }
128 
out_mem64(uint32_t offset)129    inline mi_value out_mem64(uint32_t offset)
130    {
131       return mi_mem64(out_addr(offset));
132    }
133 
out_mem32(uint32_t offset)134    inline mi_value out_mem32(uint32_t offset)
135    {
136       return mi_mem32(out_addr(offset));
137    }
138 
139    int fd = -1;
140    intel_device_info devinfo;
141 
142    uint32_t batch_bo_handle = 0;
143    uint64_t batch_bo_addr;
144    uint32_t batch_offset;
145    void *batch_map = NULL;
146 
147    struct {
148       uint32_t vm_id = 0;
149       uint32_t queue_id = 0;
150    } xe;
151 
152    struct {
153       uint32_t ctx_id = 0;
154 #if GFX_VER < 8
155       std::vector<drm_i915_gem_relocation_entry> relocs;
156 #endif
157    } i915;
158 
159    uint32_t data_bo_handle = 0;
160    uint64_t data_bo_addr;
161    void *data_map = NULL;
162 
163    char *input;
164    char *output;
165    uint64_t canary;
166 
167    bool write_fence_status;
168 
169    mi_builder b;
170 };
171 
172 // 1 MB of batch should be enough for anyone, right?
173 #define BATCH_BO_SIZE (256 * 4096)
174 #define DATA_BO_SIZE 4096
175 
176 void
SetUp()177 mi_builder_test::SetUp()
178 {
179    drmDevicePtr devices[8];
180    int max_devices = drmGetDevices2(0, devices, 8);
181    ASSERT_GT(max_devices, 0);
182 
183    int i;
184    for (i = 0; i < max_devices; i++) {
185       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
186           devices[i]->bustype == DRM_BUS_PCI &&
187           devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
188          fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
189          if (fd < 0)
190             continue;
191 
192          if (intel_get_kmd_type(fd) == INTEL_KMD_TYPE_I915) {
193             /* We don't really need to do this when running on hardware because
194              * we can just pull it from the drmDevice.  However, without doing
195              * this, intel_dump_gpu gets a bit of heartburn and we can't use the
196              * --device option with it.
197              */
198             int device_id;
199             ASSERT_TRUE(intel_gem_get_param(fd, I915_PARAM_CHIPSET_ID, &device_id))
200                   << strerror(errno);
201          }
202 
203          ASSERT_TRUE(intel_get_device_info_from_fd(fd, &devinfo, -1, -1));
204          if (devinfo.ver != GFX_VER ||
205              (devinfo.platform == INTEL_PLATFORM_HSW) != (GFX_VERx10 == 75)) {
206             close(fd);
207             fd = -1;
208             continue;
209          }
210 
211          /* Found a device! */
212          break;
213       }
214    }
215 
216    drmFreeDevices(devices, max_devices);
217    ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device";
218    drmFreeDevices(devices, max_devices);
219 
220    if (devinfo.kmd_type == INTEL_KMD_TYPE_I915) {
221       ASSERT_TRUE(intel_gem_create_context(fd, &i915.ctx_id)) << strerror(errno);
222 
223       if (GFX_VER >= 8) {
224          /* On gfx8+, we require softpin */
225          int has_softpin;
226          ASSERT_TRUE(intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &has_softpin))
227                << strerror(errno);
228          ASSERT_TRUE(has_softpin);
229       }
230 
231       // Create the batch buffer
232       drm_i915_gem_create gem_create = drm_i915_gem_create();
233       gem_create.size = BATCH_BO_SIZE;
234       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
235                          (void *)&gem_create), 0) << strerror(errno);
236       batch_bo_handle = gem_create.handle;
237 #if GFX_VER >= 8
238       batch_bo_addr = 0xffffffffdff70000ULL;
239 #endif
240 
241       if (devinfo.has_caching_uapi) {
242          drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
243          gem_caching.handle = batch_bo_handle;
244          gem_caching.caching = I915_CACHING_CACHED;
245          ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
246                             (void *)&gem_caching), 0) << strerror(errno);
247       }
248 
249       if (devinfo.has_mmap_offset) {
250          drm_i915_gem_mmap_offset gem_mmap_offset = drm_i915_gem_mmap_offset();
251          gem_mmap_offset.handle = batch_bo_handle;
252          gem_mmap_offset.flags = devinfo.has_local_mem ?
253                                  I915_MMAP_OFFSET_FIXED :
254                                  I915_MMAP_OFFSET_WC;
255          ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET,
256                             &gem_mmap_offset), 0) << strerror(errno);
257 
258          batch_map = mmap(NULL, BATCH_BO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
259                           fd, gem_mmap_offset.offset);
260          ASSERT_NE(batch_map, MAP_FAILED) << strerror(errno);
261       } else {
262          drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
263          gem_mmap.handle = batch_bo_handle;
264          gem_mmap.offset = 0;
265          gem_mmap.size = BATCH_BO_SIZE;
266          gem_mmap.flags = 0;
267          ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
268                          (void *)&gem_mmap), 0) << strerror(errno);
269          batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
270       }
271 
272       // Create the data buffer
273       gem_create = drm_i915_gem_create();
274       gem_create.size = DATA_BO_SIZE;
275       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
276                          (void *)&gem_create), 0) << strerror(errno);
277       data_bo_handle = gem_create.handle;
278 #if GFX_VER >= 8
279       data_bo_addr = 0xffffffffefff0000ULL;
280 #endif
281 
282       if (devinfo.has_caching_uapi) {
283          drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
284          gem_caching.handle = data_bo_handle;
285          gem_caching.caching = I915_CACHING_CACHED;
286          ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
287                             (void *)&gem_caching), 0) << strerror(errno);
288       }
289 
290       if (devinfo.has_mmap_offset) {
291          drm_i915_gem_mmap_offset gem_mmap_offset = drm_i915_gem_mmap_offset();
292          gem_mmap_offset.handle = data_bo_handle;
293          gem_mmap_offset.flags = devinfo.has_local_mem ?
294                                  I915_MMAP_OFFSET_FIXED :
295                                  I915_MMAP_OFFSET_WC;
296          ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET,
297                             &gem_mmap_offset), 0) << strerror(errno);
298 
299          data_map = mmap(NULL, DATA_BO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
300                          fd, gem_mmap_offset.offset);
301          ASSERT_NE(data_map, MAP_FAILED) << strerror(errno);
302       } else {
303          drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
304          gem_mmap.handle = data_bo_handle;
305          gem_mmap.offset = 0;
306          gem_mmap.size = DATA_BO_SIZE;
307          gem_mmap.flags = 0;
308          ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
309                             (void *)&gem_mmap), 0) << strerror(errno);
310          data_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
311       }
312    } else {
313       assert(devinfo.kmd_type == INTEL_KMD_TYPE_XE);
314 
315       int err;
316 
317       struct drm_xe_vm_create create = {
318          .flags = DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE,
319       };
320       err = intel_ioctl(fd, DRM_IOCTL_XE_VM_CREATE, &create);
321       ASSERT_EQ(err, 0) << strerror(err);
322       xe.vm_id = create.vm_id;
323 
324       struct drm_xe_engine_class_instance instance = {};
325 
326       struct intel_query_engine_info *engines_info = xe_engine_get_info(fd);
327       assert(engines_info);
328 
329       bool found_engine = false;
330       for (uint32_t i = 0; i < engines_info->num_engines; i++) {
331          struct intel_engine_class_instance *e = &engines_info->engines[i];
332          if (e->engine_class == INTEL_ENGINE_CLASS_RENDER) {
333             instance.engine_class = DRM_XE_ENGINE_CLASS_RENDER;
334             instance.engine_instance = e->engine_instance;
335             instance.gt_id = e->gt_id;
336             found_engine = true;
337             break;
338          }
339       }
340       free(engines_info);
341       ASSERT_TRUE(found_engine);
342 
343       struct drm_xe_exec_queue_create queue_create = {
344          .width          = 1,
345          .num_placements = 1,
346          .vm_id          = xe.vm_id,
347          .instances      = (uintptr_t)&instance,
348       };
349       err = intel_ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &queue_create);
350       ASSERT_EQ(err, 0) << strerror(err);
351       xe.queue_id = queue_create.exec_queue_id;
352 
353       // Create the batch buffer.
354       {
355          struct drm_xe_gem_create gem_create = {
356             .size        = BATCH_BO_SIZE,
357             .placement   = 1u << devinfo.mem.sram.mem.instance,
358             .cpu_caching = DRM_XE_GEM_CPU_CACHING_WB,
359          };
360          err = intel_ioctl(fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create);
361          ASSERT_EQ(err, 0) << strerror(err);
362          batch_bo_handle = gem_create.handle;
363          batch_bo_addr = 0x10000000;
364 
365          struct drm_xe_gem_mmap_offset mm = {
366             .handle = batch_bo_handle,
367          };
368          err = intel_ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mm);
369          ASSERT_EQ(err, 0) << strerror(err);
370          batch_map = mmap(NULL, BATCH_BO_SIZE, PROT_READ | PROT_WRITE,
371                           MAP_SHARED, fd, mm.offset);
372          ASSERT_NE(batch_map, MAP_FAILED) << strerror(errno);
373       }
374 
375       // Create the data buffer.
376       {
377          struct drm_xe_gem_create gem_create = {
378             .size        = DATA_BO_SIZE,
379             .placement   = 1u << devinfo.mem.sram.mem.instance,
380             .cpu_caching = DRM_XE_GEM_CPU_CACHING_WB,
381          };
382          err = intel_ioctl(fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create);
383          ASSERT_EQ(err, 0) << strerror(err);
384          data_bo_handle = gem_create.handle;
385          data_bo_addr = 0x20000000;
386 
387          struct drm_xe_gem_mmap_offset mm = {
388             .handle = data_bo_handle,
389          };
390          err = intel_ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mm);
391          ASSERT_EQ(err, 0) << strerror(err);
392          data_map = mmap(NULL, DATA_BO_SIZE, PROT_READ | PROT_WRITE,
393                           MAP_SHARED, fd, mm.offset);
394          ASSERT_NE(data_map, MAP_FAILED) << strerror(errno);
395       }
396    }
397 
398    // Start the batch at zero
399    batch_offset = 0;
400 
401    input = (char *)data_map + INPUT_DATA_OFFSET;
402    output = (char *)data_map + OUTPUT_DATA_OFFSET;
403 
404    // Fill the test data with garbage
405    memset(data_map, 139, DATA_BO_SIZE);
406    memset(&canary, 139, sizeof(canary));
407 
408    write_fence_status = false;
409 
410    struct isl_device isl_dev;
411    isl_device_init(&isl_dev, &devinfo);
412    mi_builder_init(&b, &devinfo, this);
413    const uint32_t mocs = isl_mocs(&isl_dev, 0, false);
414    mi_builder_set_mocs(&b, mocs);
415 }
416 
417 void
TearDown()418 mi_builder_test::TearDown()
419 {
420    int err;
421 
422    if (data_map) {
423       err = munmap(data_map, DATA_BO_SIZE);
424       EXPECT_EQ(err, 0) << "unmap data bo failed";
425    }
426 
427    if (data_bo_handle) {
428       struct drm_gem_close gem_close = { .handle = data_bo_handle };
429       err = intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
430       EXPECT_EQ(err, 0) << "close data bo failed";
431    }
432 
433    if (batch_map) {
434       err = munmap(batch_map, BATCH_BO_SIZE);
435       EXPECT_EQ(err, 0) << "unmmap batch bo failed";
436    }
437 
438    if (batch_bo_handle) {
439       struct drm_gem_close gem_close = { .handle = batch_bo_handle };
440       err = intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
441       EXPECT_EQ(err, 0) << "close batch bo failed";
442    }
443 
444    if (devinfo.kmd_type == INTEL_KMD_TYPE_I915) {
445       if (i915.ctx_id) {
446          struct drm_i915_gem_context_destroy destroy = {
447             .ctx_id = i915.ctx_id,
448          };
449          err = intel_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
450          EXPECT_EQ(err, 0) << "context destroy failed";
451       }
452    } else {
453       assert(devinfo.kmd_type == INTEL_KMD_TYPE_XE);
454 
455       if (xe.queue_id) {
456          struct drm_xe_exec_queue_destroy queue_destroy = {
457             .exec_queue_id = xe.queue_id,
458          };
459          err = intel_ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &queue_destroy);
460          EXPECT_EQ(err, 0) << "queue_destroy failure";
461       }
462 
463       if (xe.vm_id) {
464          struct drm_xe_vm_destroy destroy = {
465             .vm_id = xe.vm_id,
466          };
467          err = intel_ioctl(fd, DRM_IOCTL_XE_VM_DESTROY, &destroy);
468          EXPECT_EQ(err, 0) << "vm_destroy failure";
469       }
470    }
471 
472    if (fd != -1)
473       close(fd);
474 }
475 
476 void *
emit_dwords(int num_dwords)477 mi_builder_test::emit_dwords(int num_dwords)
478 {
479    void *ptr = (void *)((char *)batch_map + batch_offset);
480    batch_offset += num_dwords * 4;
481    assert(batch_offset < BATCH_BO_SIZE);
482    return ptr;
483 }
484 
485 void
submit_batch()486 mi_builder_test::submit_batch()
487 {
488    mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
489 
490    // Round batch up to an even number of dwords.
491    if (batch_offset & 4)
492       mi_builder_emit(&b, GENX(MI_NOOP), noop);
493 
494    if (devinfo.kmd_type == INTEL_KMD_TYPE_I915) {
495       drm_i915_gem_exec_object2 objects[2];
496       memset(objects, 0, sizeof(objects));
497 
498       objects[0].handle = data_bo_handle;
499       objects[0].relocation_count = 0;
500       objects[0].relocs_ptr = 0;
501 #if GFX_VER >= 8 /* On gfx8+, we pin everything */
502       objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
503                          EXEC_OBJECT_PINNED |
504                          EXEC_OBJECT_WRITE;
505       objects[0].offset = data_bo_addr;
506 #else
507       objects[0].flags = EXEC_OBJECT_WRITE;
508       objects[0].offset = -1;
509 #endif
510 
511       objects[1].handle = batch_bo_handle;
512 #if GFX_VER >= 8 /* On gfx8+, we don't use relocations */
513       objects[1].relocation_count = 0;
514       objects[1].relocs_ptr = 0;
515       objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
516                          EXEC_OBJECT_PINNED;
517       objects[1].offset = batch_bo_addr;
518 #else
519       objects[1].relocation_count = i915.relocs.size();
520       objects[1].relocs_ptr = (uintptr_t)(void *)&i915.relocs[0];
521       objects[1].flags = 0;
522       objects[1].offset = -1;
523 #endif
524 
525       drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2();
526       execbuf.buffers_ptr = (uintptr_t)(void *)objects;
527       execbuf.buffer_count = 2;
528       execbuf.batch_start_offset = 0;
529       execbuf.batch_len = batch_offset;
530       execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER;
531       execbuf.rsvd1 = i915.ctx_id;
532 
533       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
534                          (void *)&execbuf), 0) << strerror(errno);
535 
536       drm_i915_gem_wait gem_wait = drm_i915_gem_wait();
537       gem_wait.bo_handle = batch_bo_handle;
538       gem_wait.timeout_ns = INT64_MAX;
539       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT,
540                          (void *)&gem_wait), 0) << strerror(errno);
541    } else {
542       assert(devinfo.kmd_type == INTEL_KMD_TYPE_XE);
543 
544       int err;
545 
546       uint32_t sync_handles[2] = {};
547       for (int i = 0; i < 2; i++) {
548          struct drm_syncobj_create sync_create = {};
549          err = intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &sync_create);
550          ASSERT_EQ(err, 0) << strerror(err);
551          sync_handles[i] = sync_create.handle;
552       }
553 
554       struct drm_xe_vm_bind_op bind_ops[] = {
555          {
556             .obj       = batch_bo_handle,
557             .pat_index = devinfo.pat.cached_coherent.index,
558             .range     = BATCH_BO_SIZE,
559             .addr      = batch_bo_addr,
560             .op        = DRM_XE_VM_BIND_OP_MAP,
561             .flags     = DRM_XE_VM_BIND_FLAG_READONLY,
562          },
563          {
564             .obj       = data_bo_handle,
565             .pat_index = devinfo.pat.cached_coherent.index,
566             .range     = DATA_BO_SIZE,
567             .addr      = data_bo_addr,
568             .op        = DRM_XE_VM_BIND_OP_MAP,
569          },
570       };
571 
572       struct drm_xe_sync bind_syncs[] = {
573          {
574             .type   = DRM_XE_SYNC_TYPE_SYNCOBJ,
575             .flags  = DRM_XE_SYNC_FLAG_SIGNAL,
576             .handle = sync_handles[0],
577          },
578       };
579 
580       struct drm_xe_vm_bind bind = {
581          .vm_id           = xe.vm_id,
582          .num_binds       = ARRAY_SIZE(bind_ops),
583          .vector_of_binds = (uintptr_t)bind_ops,
584          .num_syncs       = 1,
585          .syncs           = (uintptr_t)bind_syncs,
586       };
587 
588       err = intel_ioctl(fd, DRM_IOCTL_XE_VM_BIND, &bind);
589       ASSERT_EQ(err, 0) << strerror(err);
590 
591       struct drm_xe_sync exec_syncs[] = {
592          {
593             .type   = DRM_XE_SYNC_TYPE_SYNCOBJ,
594             .handle = sync_handles[0],
595          },
596          {
597             .type   = DRM_XE_SYNC_TYPE_SYNCOBJ,
598             .flags  = DRM_XE_SYNC_FLAG_SIGNAL,
599             .handle = sync_handles[1],
600          }
601       };
602 
603       struct drm_xe_exec exec = {
604          .exec_queue_id    = xe.queue_id,
605          .num_syncs        = 2,
606          .syncs            = (uintptr_t)exec_syncs,
607          .address          = batch_bo_addr,
608          .num_batch_buffer = 1,
609       };
610       err = intel_ioctl(fd, DRM_IOCTL_XE_EXEC, &exec);
611       ASSERT_EQ(err, 0) << strerror(err);
612 
613       struct drm_syncobj_wait wait = {
614          .handles       = (uintptr_t)&sync_handles[1],
615          .timeout_nsec  = INT64_MAX,
616          .count_handles = 1,
617       };
618       err = intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait);
619       ASSERT_EQ(err, 0) << strerror(err);
620    }
621 }
622 
623 uint64_t
__gen_combine_address(mi_builder_test * test,void * location,address addr,uint32_t delta)624 __gen_combine_address(mi_builder_test *test, void *location,
625                       address addr, uint32_t delta)
626 {
627 #if GFX_VER >= 8
628    uint64_t addr_u64 = addr.gem_handle == test->data_bo_handle ?
629                        test->data_bo_addr : test->batch_bo_addr;
630    return addr_u64 + addr.offset + delta;
631 #else
632    assert(test->devinfo.kmd_type == INTEL_KMD_TYPE_I915);
633    drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();
634    reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1;
635    reloc.delta = addr.offset + delta;
636    reloc.offset = (char *)location - (char *)test->batch_map;
637    reloc.presumed_offset = -1;
638    test->i915.relocs.push_back(reloc);
639 
640    return reloc.delta;
641 #endif
642 }
643 
644 bool *
__gen_get_write_fencing_status(mi_builder_test * test)645 __gen_get_write_fencing_status(mi_builder_test *test)
646 {
647    return &test->write_fence_status;
648 }
649 
650 void *
__gen_get_batch_dwords(mi_builder_test * test,unsigned num_dwords)651 __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)
652 {
653    return test->emit_dwords(num_dwords);
654 }
655 
656 struct address
__gen_get_batch_address(mi_builder_test * test,void * location)657 __gen_get_batch_address(mi_builder_test *test, void *location)
658 {
659    assert(location >= test->batch_map);
660    size_t offset = (char *)location - (char *)test->batch_map;
661    assert(offset < BATCH_BO_SIZE);
662    assert(offset <= UINT32_MAX);
663 
664    return (struct address) {
665       .gem_handle = test->batch_bo_handle,
666       .offset = (uint32_t)offset,
667    };
668 }
669 
670 #include "genxml/genX_pack.h"
671 #include "mi_builder.h"
672 
TEST_F(mi_builder_test,imm_mem)673 TEST_F(mi_builder_test, imm_mem)
674 {
675    const uint64_t value = 0x0123456789abcdef;
676 
677    mi_store(&b, out_mem64(0), mi_imm(value));
678    mi_store(&b, out_mem32(8), mi_imm(value));
679 
680    submit_batch();
681 
682    // 64 -> 64
683    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
684 
685    // 64 -> 32
686    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
687    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
688 }
689 
690 /* mem -> mem copies are only supported on HSW+ */
691 #if GFX_VERx10 >= 75
TEST_F(mi_builder_test,mem_mem)692 TEST_F(mi_builder_test, mem_mem)
693 {
694    const uint64_t value = 0x0123456789abcdef;
695    *(uint64_t *)input = value;
696 
697    mi_store(&b, out_mem64(0),   in_mem64(0));
698    mi_store(&b, out_mem32(8),   in_mem64(0));
699    mi_store(&b, out_mem32(16),  in_mem32(0));
700    mi_store(&b, out_mem64(24),  in_mem32(0));
701 
702    submit_batch();
703 
704    // 64 -> 64
705    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
706 
707    // 64 -> 32
708    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
709    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
710 
711    // 32 -> 32
712    EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
713    EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
714 
715    // 32 -> 64
716    EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
717 }
718 #endif
719 
TEST_F(mi_builder_test,imm_reg)720 TEST_F(mi_builder_test, imm_reg)
721 {
722    const uint64_t value = 0x0123456789abcdef;
723 
724    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
725    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value));
726    mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
727 
728    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
729    mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value));
730    mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
731 
732    submit_batch();
733 
734    // 64 -> 64
735    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
736 
737    // 64 -> 32
738    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
739    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
740 }
741 
TEST_F(mi_builder_test,mem_reg)742 TEST_F(mi_builder_test, mem_reg)
743 {
744    const uint64_t value = 0x0123456789abcdef;
745    *(uint64_t *)input = value;
746 
747    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
748    mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0));
749    mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
750 
751    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
752    mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0));
753    mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
754 
755    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
756    mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0));
757    mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG));
758 
759    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
760    mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0));
761    mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG));
762 
763    submit_batch();
764 
765    // 64 -> 64
766    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
767 
768    // 64 -> 32
769    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
770    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
771 
772    // 32 -> 32
773    EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
774    EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
775 
776    // 32 -> 64
777    EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
778 }
779 
TEST_F(mi_builder_test,memset)780 TEST_F(mi_builder_test, memset)
781 {
782    const unsigned memset_size = 256;
783 
784    mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
785 
786    submit_batch();
787 
788    uint32_t *out_u32 = (uint32_t *)output;
789    for (unsigned i = 0; i <  memset_size / sizeof(*out_u32); i++)
790       EXPECT_EQ(out_u32[i], 0xdeadbeef);
791 }
792 
TEST_F(mi_builder_test,memcpy)793 TEST_F(mi_builder_test, memcpy)
794 {
795    const unsigned memcpy_size = 256;
796 
797    uint8_t *in_u8 = (uint8_t *)input;
798    for (unsigned i = 0; i < memcpy_size; i++)
799       in_u8[i] = i;
800 
801    mi_memcpy(&b, out_addr(0), in_addr(0), 256);
802 
803    submit_batch();
804 
805    uint8_t *out_u8 = (uint8_t *)output;
806    for (unsigned i = 0; i < memcpy_size; i++)
807       EXPECT_EQ(out_u8[i], i);
808 }
809 
810 /* Start of MI_MATH section */
811 #if GFX_VERx10 >= 75
812 
813 #define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm))
814 
TEST_F(mi_builder_test,inot)815 TEST_F(mi_builder_test, inot)
816 {
817    const uint64_t value = 0x0123456789abcdef;
818    const uint32_t value_lo = (uint32_t)value;
819    const uint32_t value_hi = (uint32_t)(value >> 32);
820    memcpy(input, &value, sizeof(value));
821 
822    mi_store(&b, out_mem64(0),  mi_inot(&b, in_mem64(0)));
823    mi_store(&b, out_mem64(8),  mi_inot(&b, mi_inot(&b, in_mem64(0))));
824    mi_store(&b, out_mem64(16), mi_inot(&b, in_mem32(0)));
825    mi_store(&b, out_mem64(24), mi_inot(&b, in_mem32(4)));
826    mi_store(&b, out_mem32(32), mi_inot(&b, in_mem64(0)));
827    mi_store(&b, out_mem32(36), mi_inot(&b, in_mem32(0)));
828    mi_store(&b, out_mem32(40), mi_inot(&b, mi_inot(&b, in_mem32(0))));
829    mi_store(&b, out_mem32(44), mi_inot(&b, in_mem32(4)));
830 
831    submit_batch();
832 
833    EXPECT_EQ(*(uint64_t *)(output + 0),  ~value);
834    EXPECT_EQ(*(uint64_t *)(output + 8),  value);
835    EXPECT_EQ(*(uint64_t *)(output + 16), ~(uint64_t)value_lo);
836    EXPECT_EQ(*(uint64_t *)(output + 24), ~(uint64_t)value_hi);
837    EXPECT_EQ(*(uint32_t *)(output + 32), (uint32_t)~value);
838    EXPECT_EQ(*(uint32_t *)(output + 36), (uint32_t)~value_lo);
839    EXPECT_EQ(*(uint32_t *)(output + 40), (uint32_t)value_lo);
840    EXPECT_EQ(*(uint32_t *)(output + 44), (uint32_t)~value_hi);
841 }
842 
843 /* Test adding of immediates of all kinds including
844  *
845  *  - All zeroes
846  *  - All ones
847  *  - inverted constants
848  */
TEST_F(mi_builder_test,add_imm)849 TEST_F(mi_builder_test, add_imm)
850 {
851    const uint64_t value = 0x0123456789abcdef;
852    const uint64_t add = 0xdeadbeefac0ffee2;
853    memcpy(input, &value, sizeof(value));
854 
855    mi_store(&b, out_mem64(0),
856                 mi_iadd(&b, in_mem64(0), mi_imm(0)));
857    mi_store(&b, out_mem64(8),
858                 mi_iadd(&b, in_mem64(0), mi_imm(-1)));
859    mi_store(&b, out_mem64(16),
860                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0))));
861    mi_store(&b, out_mem64(24),
862                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1))));
863    mi_store(&b, out_mem64(32),
864                 mi_iadd(&b, in_mem64(0), mi_imm(add)));
865    mi_store(&b, out_mem64(40),
866                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add))));
867    mi_store(&b, out_mem64(48),
868                 mi_iadd(&b, mi_imm(0), in_mem64(0)));
869    mi_store(&b, out_mem64(56),
870                 mi_iadd(&b, mi_imm(-1), in_mem64(0)));
871    mi_store(&b, out_mem64(64),
872                 mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0)));
873    mi_store(&b, out_mem64(72),
874                 mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0)));
875    mi_store(&b, out_mem64(80),
876                 mi_iadd(&b, mi_imm(add), in_mem64(0)));
877    mi_store(&b, out_mem64(88),
878                 mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));
879 
880    // And some add_imm just for good measure
881    mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));
882    mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));
883 
884    submit_batch();
885 
886    EXPECT_EQ(*(uint64_t *)(output + 0),   value);
887    EXPECT_EQ(*(uint64_t *)(output + 8),   value - 1);
888    EXPECT_EQ(*(uint64_t *)(output + 16),  value - 1);
889    EXPECT_EQ(*(uint64_t *)(output + 24),  value);
890    EXPECT_EQ(*(uint64_t *)(output + 32),  value + add);
891    EXPECT_EQ(*(uint64_t *)(output + 40),  value + ~add);
892    EXPECT_EQ(*(uint64_t *)(output + 48),  value);
893    EXPECT_EQ(*(uint64_t *)(output + 56),  value - 1);
894    EXPECT_EQ(*(uint64_t *)(output + 64),  value - 1);
895    EXPECT_EQ(*(uint64_t *)(output + 72),  value);
896    EXPECT_EQ(*(uint64_t *)(output + 80),  value + add);
897    EXPECT_EQ(*(uint64_t *)(output + 88),  value + ~add);
898    EXPECT_EQ(*(uint64_t *)(output + 96),  value);
899    EXPECT_EQ(*(uint64_t *)(output + 104), value + add);
900 }
901 
TEST_F(mi_builder_test,ult_uge_ieq_ine)902 TEST_F(mi_builder_test, ult_uge_ieq_ine)
903 {
904    uint64_t values[8] = {
905       0x0123456789abcdef,
906       0xdeadbeefac0ffee2,
907       (uint64_t)-1,
908       1,
909       0,
910       1049571,
911       (uint64_t)-240058,
912       20204184,
913    };
914    memcpy(input, values, sizeof(values));
915 
916    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
917       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
918          mi_store(&b, out_mem64(i * 256 + j * 32 + 0),
919                       mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
920          mi_store(&b, out_mem64(i * 256 + j * 32 + 8),
921                       mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
922          mi_store(&b, out_mem64(i * 256 + j * 32 + 16),
923                       mi_ieq(&b, in_mem64(i * 8), in_mem64(j * 8)));
924          mi_store(&b, out_mem64(i * 256 + j * 32 + 24),
925                       mi_ine(&b, in_mem64(i * 8), in_mem64(j * 8)));
926       }
927    }
928 
929    submit_batch();
930 
931    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
932       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
933          uint64_t *out_u64 = (uint64_t *)(output + i * 256 + j * 32);
934          EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]),
935                                               mi_imm(values[j])));
936          EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]),
937                                               mi_imm(values[j])));
938          EXPECT_EQ_IMM(out_u64[2], mi_ieq(&b, mi_imm(values[i]),
939                                               mi_imm(values[j])));
940          EXPECT_EQ_IMM(out_u64[3], mi_ine(&b, mi_imm(values[i]),
941                                               mi_imm(values[j])));
942       }
943    }
944 }
945 
TEST_F(mi_builder_test,z_nz)946 TEST_F(mi_builder_test, z_nz)
947 {
948    uint64_t values[8] = {
949       0,
950       1,
951       UINT32_MAX,
952       UINT32_MAX + 1,
953       UINT64_MAX,
954    };
955    memcpy(input, values, sizeof(values));
956 
957    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
958       mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8)));
959       mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8)));
960    }
961 
962    submit_batch();
963 
964    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
965       uint64_t *out_u64 = (uint64_t *)(output + i * 16);
966       EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i])));
967       EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i])));
968    }
969 }
970 
TEST_F(mi_builder_test,iand)971 TEST_F(mi_builder_test, iand)
972 {
973    const uint64_t values[2] = {
974       0x0123456789abcdef,
975       0xdeadbeefac0ffee2,
976    };
977    memcpy(input, values, sizeof(values));
978 
979    mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8)));
980 
981    submit_batch();
982 
983    EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]),
984                                                   mi_imm(values[1])));
985 }
986 
987 #if GFX_VER >= 8
TEST_F(mi_builder_test,imm_mem_relocated)988 TEST_F(mi_builder_test, imm_mem_relocated)
989 {
990    const uint64_t value = 0x0123456789abcdef;
991 
992    struct mi_reloc_imm_token r0 = mi_store_relocated_imm(&b, out_mem64(0));
993    struct mi_reloc_imm_token r1 = mi_store_relocated_imm(&b, out_mem32(8));
994 
995    mi_relocate_store_imm(r0, value);
996    mi_relocate_store_imm(r1, value);
997 
998    submit_batch();
999 
1000    // 64 -> 64
1001    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
1002 
1003    // 64 -> 32
1004    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
1005    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
1006 }
1007 
TEST_F(mi_builder_test,imm_reg_relocated)1008 TEST_F(mi_builder_test, imm_reg_relocated)
1009 {
1010    const uint64_t value = 0x0123456789abcdef;
1011 
1012    struct mi_reloc_imm_token r0, r1;
1013 
1014    r0 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
1015    r1 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
1016    mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
1017 
1018    mi_relocate_store_imm(r0, canary);
1019    mi_relocate_store_imm(r1, value);
1020 
1021    r0 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
1022    r1 = mi_store_relocated_imm(&b, mi_reg32(RSVD_TEMP_REG));
1023    mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
1024 
1025    mi_relocate_store_imm(r0, canary);
1026    mi_relocate_store_imm(r1, value);
1027 
1028    submit_batch();
1029 
1030    // 64 -> 64
1031    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
1032 
1033    // 64 -> 32
1034    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
1035    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
1036 }
1037 #endif // GFX_VER >= 8
1038 
1039 #if GFX_VERx10 >= 125
TEST_F(mi_builder_test,ishl)1040 TEST_F(mi_builder_test, ishl)
1041 {
1042    const uint64_t value = 0x0123456789abcdef;
1043    memcpy(input, &value, sizeof(value));
1044 
1045    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
1046    memcpy(input + 8, shifts, sizeof(shifts));
1047 
1048    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
1049       mi_store(&b, out_mem64(i * 8),
1050                    mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4)));
1051    }
1052 
1053    submit_batch();
1054 
1055    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
1056       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
1057                     mi_ishl(&b, mi_imm(value), mi_imm(shifts[i])));
1058    }
1059 }
1060 
TEST_F(mi_builder_test,ushr)1061 TEST_F(mi_builder_test, ushr)
1062 {
1063    const uint64_t value = 0x0123456789abcdef;
1064    memcpy(input, &value, sizeof(value));
1065 
1066    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
1067    memcpy(input + 8, shifts, sizeof(shifts));
1068 
1069    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
1070       mi_store(&b, out_mem64(i * 8),
1071                    mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4)));
1072    }
1073 
1074    submit_batch();
1075 
1076    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
1077       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
1078                     mi_ushr(&b, mi_imm(value), mi_imm(shifts[i])));
1079    }
1080 }
1081 
TEST_F(mi_builder_test,ushr_imm)1082 TEST_F(mi_builder_test, ushr_imm)
1083 {
1084    const uint64_t value = 0x0123456789abcdef;
1085    memcpy(input, &value, sizeof(value));
1086 
1087    const unsigned max_shift = 64;
1088 
1089    for (unsigned i = 0; i <= max_shift; i++)
1090       mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i));
1091 
1092    submit_batch();
1093 
1094    for (unsigned i = 0; i <= max_shift; i++) {
1095       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
1096                     mi_ushr_imm(&b, mi_imm(value), i));
1097    }
1098 }
1099 
TEST_F(mi_builder_test,ishr)1100 TEST_F(mi_builder_test, ishr)
1101 {
1102    const uint64_t values[] = {
1103       0x0123456789abcdef,
1104       0xfedcba9876543210,
1105    };
1106    memcpy(input, values, sizeof(values));
1107 
1108    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
1109    memcpy(input + 16, shifts, sizeof(shifts));
1110 
1111    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
1112       for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
1113          mi_store(&b, out_mem64(i * 8 + j * 16),
1114                       mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4)));
1115       }
1116    }
1117 
1118    submit_batch();
1119 
1120    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
1121       for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
1122          EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16),
1123                        mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j])));
1124       }
1125    }
1126 }
1127 
TEST_F(mi_builder_test,ishr_imm)1128 TEST_F(mi_builder_test, ishr_imm)
1129 {
1130    const uint64_t value = 0x0123456789abcdef;
1131    memcpy(input, &value, sizeof(value));
1132 
1133    const unsigned max_shift = 64;
1134 
1135    for (unsigned i = 0; i <= max_shift; i++)
1136       mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i));
1137 
1138    submit_batch();
1139 
1140    for (unsigned i = 0; i <= max_shift; i++) {
1141       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
1142                     mi_ishr_imm(&b, mi_imm(value), i));
1143    }
1144 }
1145 #endif /* if GFX_VERx10 >= 125 */
1146 
TEST_F(mi_builder_test,imul_imm)1147 TEST_F(mi_builder_test, imul_imm)
1148 {
1149    uint64_t lhs[2] = {
1150       0x0123456789abcdef,
1151       0xdeadbeefac0ffee2,
1152    };
1153    memcpy(input, lhs, sizeof(lhs));
1154 
1155     /* Some random 32-bit unsigned integers.  The first four have been
1156      * hand-chosen just to ensure some good low integers; the rest were
1157      * generated with a python script.
1158      */
1159    uint32_t rhs[20] = {
1160       1,       2,       3,       5,
1161       10800,   193,     64,      40,
1162       3796,    256,     88,      473,
1163       1421,    706,     175,     850,
1164       39,      38985,   1941,    17,
1165    };
1166 
1167    for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
1168       for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
1169          mi_store(&b, out_mem64(i * 160 + j * 8),
1170                       mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
1171       }
1172    }
1173 
1174    submit_batch();
1175 
1176    for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
1177       for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
1178          EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8),
1179                        mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j]));
1180       }
1181    }
1182 }
1183 
TEST_F(mi_builder_test,ishl_imm)1184 TEST_F(mi_builder_test, ishl_imm)
1185 {
1186    const uint64_t value = 0x0123456789abcdef;
1187    memcpy(input, &value, sizeof(value));
1188 
1189    const unsigned max_shift = 64;
1190 
1191    for (unsigned i = 0; i <= max_shift; i++)
1192       mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i));
1193 
1194    submit_batch();
1195 
1196    for (unsigned i = 0; i <= max_shift; i++) {
1197       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
1198                     mi_ishl_imm(&b, mi_imm(value), i));
1199    }
1200 }
1201 
TEST_F(mi_builder_test,ushr32_imm)1202 TEST_F(mi_builder_test, ushr32_imm)
1203 {
1204    const uint64_t value = 0x0123456789abcdef;
1205    memcpy(input, &value, sizeof(value));
1206 
1207    const unsigned max_shift = 64;
1208 
1209    for (unsigned i = 0; i <= max_shift; i++)
1210       mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i));
1211 
1212    submit_batch();
1213 
1214    for (unsigned i = 0; i <= max_shift; i++) {
1215       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
1216                     mi_ushr32_imm(&b, mi_imm(value), i));
1217    }
1218 }
1219 
TEST_F(mi_builder_test,udiv32_imm)1220 TEST_F(mi_builder_test, udiv32_imm)
1221 {
1222     /* Some random 32-bit unsigned integers.  The first four have been
1223      * hand-chosen just to ensure some good low integers; the rest were
1224      * generated with a python script.
1225      */
1226    uint32_t values[20] = {
1227       1,       2,       3,       5,
1228       10800,   193,     64,      40,
1229       3796,    256,     88,      473,
1230       1421,    706,     175,     850,
1231       39,      38985,   1941,    17,
1232    };
1233    memcpy(input, values, sizeof(values));
1234 
1235    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
1236       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
1237          mi_store(&b, out_mem32(i * 80 + j * 4),
1238                       mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
1239       }
1240    }
1241 
1242    submit_batch();
1243 
1244    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
1245       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
1246          EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4),
1247                        mi_udiv32_imm(&b, mi_imm(values[i]), values[j]));
1248       }
1249    }
1250 }
1251 
TEST_F(mi_builder_test,store_if)1252 TEST_F(mi_builder_test, store_if)
1253 {
1254    uint64_t u64 = 0xb453b411deadc0deull;
1255    uint32_t u32 = 0x1337d00d;
1256 
1257    /* Write values with the predicate enabled */
1258    emit_cmd(GENX(MI_PREDICATE), mip) {
1259       mip.LoadOperation    = LOAD_LOAD;
1260       mip.CombineOperation = COMBINE_SET;
1261       mip.CompareOperation = COMPARE_TRUE;
1262    }
1263 
1264    mi_store_if(&b, out_mem64(0), mi_imm(u64));
1265    mi_store_if(&b, out_mem32(8), mi_imm(u32));
1266 
1267    /* Set predicate to false, write garbage that shouldn't land */
1268    emit_cmd(GENX(MI_PREDICATE), mip) {
1269       mip.LoadOperation    = LOAD_LOAD;
1270       mip.CombineOperation = COMBINE_SET;
1271       mip.CompareOperation = COMPARE_FALSE;
1272    }
1273 
1274    mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull));
1275    mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000));
1276 
1277    submit_batch();
1278 
1279    EXPECT_EQ(*(uint64_t *)(output + 0), u64);
1280    EXPECT_EQ(*(uint32_t *)(output + 8), u32);
1281    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
1282 }
1283 
1284 #endif /* GFX_VERx10 >= 75 */
1285 
1286 #if GFX_VERx10 >= 125
1287 
1288 /*
1289  * Indirect load/store tests.  Only available on XE_HP+
1290  */
1291 
TEST_F(mi_builder_test,load_mem64_offset)1292 TEST_F(mi_builder_test, load_mem64_offset)
1293 {
1294    uint64_t values[8] = {
1295       0x0123456789abcdef,
1296       0xdeadbeefac0ffee2,
1297       (uint64_t)-1,
1298       1,
1299       0,
1300       1049571,
1301       (uint64_t)-240058,
1302       20204184,
1303    };
1304    memcpy(input, values, sizeof(values));
1305 
1306    uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1307    memcpy(input + 64, offsets, sizeof(offsets));
1308 
1309    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1310       mi_store(&b, out_mem64(i * 8),
1311                mi_load_mem64_offset(&b, in_addr(0), in_mem32(i * 4 + 64)));
1312    }
1313 
1314    submit_batch();
1315 
1316    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1317       EXPECT_EQ(*(uint64_t *)(output + i * 8), values[offsets[i] / 8]);
1318 }
1319 
TEST_F(mi_builder_test,store_mem64_offset)1320 TEST_F(mi_builder_test, store_mem64_offset)
1321 {
1322    uint64_t values[8] = {
1323       0x0123456789abcdef,
1324       0xdeadbeefac0ffee2,
1325       (uint64_t)-1,
1326       1,
1327       0,
1328       1049571,
1329       (uint64_t)-240058,
1330       20204184,
1331    };
1332    memcpy(input, values, sizeof(values));
1333 
1334    uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1335    memcpy(input + 64, offsets, sizeof(offsets));
1336 
1337    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1338       mi_store_mem64_offset(&b, out_addr(0), in_mem32(i * 4 + 64),
1339                                 in_mem64(i * 8));
1340    }
1341 
1342    submit_batch();
1343 
1344    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1345       EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]);
1346 }
1347 
1348 #endif /* GFX_VERx10 >= 125 */
1349 
1350 #if GFX_VER >= 9
1351 
1352 /*
1353  * Control-flow tests.  Only available on Gfx9+
1354  */
1355 
TEST_F(mi_builder_test,goto)1356 TEST_F(mi_builder_test, goto)
1357 {
1358    const uint64_t value = 0xb453b411deadc0deull;
1359 
1360    mi_store(&b, out_mem64(0), mi_imm(value));
1361 
1362    struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1363    mi_goto(&b, &t);
1364 
1365    /* This one should be skipped */
1366    mi_store(&b, out_mem64(0), mi_imm(0));
1367 
1368    mi_goto_target(&b, &t);
1369 
1370    submit_batch();
1371 
1372    EXPECT_EQ(*(uint64_t *)(output + 0), value);
1373 }
1374 
1375 #define MI_PREDICATE_RESULT  0x2418
1376 
TEST_F(mi_builder_test,goto_if)1377 TEST_F(mi_builder_test, goto_if)
1378 {
1379    const uint64_t values[] = {
1380       0xb453b411deadc0deull,
1381       0x0123456789abcdefull,
1382       0,
1383    };
1384 
1385    mi_store(&b, out_mem64(0), mi_imm(values[0]));
1386 
1387    emit_cmd(GENX(MI_PREDICATE), mip) {
1388       mip.LoadOperation    = LOAD_LOAD;
1389       mip.CombineOperation = COMBINE_SET;
1390       mip.CompareOperation = COMPARE_FALSE;
1391    }
1392 
1393    struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1394    mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1395 
1396    mi_store(&b, out_mem64(0), mi_imm(values[1]));
1397 
1398    emit_cmd(GENX(MI_PREDICATE), mip) {
1399       mip.LoadOperation    = LOAD_LOAD;
1400       mip.CombineOperation = COMBINE_SET;
1401       mip.CompareOperation = COMPARE_TRUE;
1402    }
1403 
1404    mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1405 
1406    /* This one should be skipped */
1407    mi_store(&b, out_mem64(0), mi_imm(values[2]));
1408 
1409    mi_goto_target(&b, &t);
1410 
1411    submit_batch();
1412 
1413    EXPECT_EQ(*(uint64_t *)(output + 0), values[1]);
1414 }
1415 
TEST_F(mi_builder_test,loop_simple)1416 TEST_F(mi_builder_test, loop_simple)
1417 {
1418    const uint64_t loop_count = 8;
1419 
1420    mi_store(&b, out_mem64(0), mi_imm(0));
1421 
1422    mi_loop(&b) {
1423       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1424 
1425       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1426    }
1427 
1428    submit_batch();
1429 
1430    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1431 }
1432 
TEST_F(mi_builder_test,loop_break)1433 TEST_F(mi_builder_test, loop_break)
1434 {
1435    mi_loop(&b) {
1436       mi_store(&b, out_mem64(0), mi_imm(1));
1437 
1438       mi_break_if(&b, mi_imm(0));
1439 
1440       mi_store(&b, out_mem64(0), mi_imm(2));
1441 
1442       mi_break(&b);
1443 
1444       mi_store(&b, out_mem64(0), mi_imm(3));
1445    }
1446 
1447    submit_batch();
1448 
1449    EXPECT_EQ(*(uint64_t *)(output + 0), 2);
1450 }
1451 
TEST_F(mi_builder_test,loop_continue)1452 TEST_F(mi_builder_test, loop_continue)
1453 {
1454    const uint64_t loop_count = 8;
1455 
1456    mi_store(&b, out_mem64(0), mi_imm(0));
1457    mi_store(&b, out_mem64(8), mi_imm(0));
1458 
1459    mi_loop(&b) {
1460       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1461 
1462       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1463       mi_store(&b, out_mem64(8), mi_imm(5));
1464 
1465       mi_continue(&b);
1466 
1467       mi_store(&b, out_mem64(8), mi_imm(10));
1468    }
1469 
1470    submit_batch();
1471 
1472    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1473    EXPECT_EQ(*(uint64_t *)(output + 8), 5);
1474 }
1475 
TEST_F(mi_builder_test,loop_continue_if)1476 TEST_F(mi_builder_test, loop_continue_if)
1477 {
1478    const uint64_t loop_count = 8;
1479 
1480    mi_store(&b, out_mem64(0), mi_imm(0));
1481    mi_store(&b, out_mem64(8), mi_imm(0));
1482 
1483    mi_loop(&b) {
1484       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1485 
1486       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1487       mi_store(&b, out_mem64(8), mi_imm(5));
1488 
1489       emit_cmd(GENX(MI_PREDICATE), mip) {
1490          mip.LoadOperation    = LOAD_LOAD;
1491          mip.CombineOperation = COMBINE_SET;
1492          mip.CompareOperation = COMPARE_FALSE;
1493       }
1494 
1495       mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1496 
1497       mi_store(&b, out_mem64(8), mi_imm(10));
1498 
1499       emit_cmd(GENX(MI_PREDICATE), mip) {
1500          mip.LoadOperation    = LOAD_LOAD;
1501          mip.CombineOperation = COMBINE_SET;
1502          mip.CompareOperation = COMPARE_TRUE;
1503       }
1504 
1505       mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1506 
1507       mi_store(&b, out_mem64(8), mi_imm(15));
1508    }
1509 
1510    submit_batch();
1511 
1512    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1513    EXPECT_EQ(*(uint64_t *)(output + 8), 10);
1514 }
1515 #endif /* GFX_VER >= 9 */
1516