• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <fcntl.h>
25 #include <string.h>
26 #include <xf86drm.h>
27 #include <sys/mman.h>
28 
29 #include <gtest/gtest.h>
30 
31 #include "c99_compat.h"
32 #include "common/intel_gem.h"
33 #include "dev/intel_device_info.h"
34 #include "intel_gem.h"
35 #include "isl/isl.h"
36 #include "drm-uapi/i915_drm.h"
37 #include "genxml/gen_macros.h"
38 #include "util/macros.h"
39 
40 class mi_builder_test;
41 
42 struct address {
43    uint32_t gem_handle;
44    uint32_t offset;
45 };
46 
47 #define __gen_address_type struct address
48 #define __gen_user_data ::mi_builder_test
49 
50 uint64_t __gen_combine_address(mi_builder_test *test, void *location,
51                                struct address addr, uint32_t delta);
52 void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);
53 struct address __gen_get_batch_address(mi_builder_test *test,
54                                        void *location);
55 
56 struct address
__gen_address_offset(address addr,uint64_t offset)57 __gen_address_offset(address addr, uint64_t offset)
58 {
59    addr.offset += offset;
60    return addr;
61 }
62 
63 #if GFX_VERx10 >= 75
64 #define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */
65 #else
66 #define RSVD_TEMP_REG 0x2430 /* GFX7_3DPRIM_START_VERTEX */
67 #endif
68 #define MI_BUILDER_NUM_ALLOC_GPRS 15
69 #define INPUT_DATA_OFFSET 0
70 #define OUTPUT_DATA_OFFSET 2048
71 
72 #define __genxml_cmd_length(cmd) cmd ## _length
73 #define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
74 #define __genxml_cmd_header(cmd) cmd ## _header
75 #define __genxml_cmd_pack(cmd) cmd ## _pack
76 
77 #include "genxml/genX_pack.h"
78 #include "mi_builder.h"
79 
80 #define emit_cmd(cmd, name)                                           \
81    for (struct cmd name = { __genxml_cmd_header(cmd) },               \
82         *_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \
83         __builtin_expect(_dst != NULL, 1);                            \
84         __genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL)
85 
86 #include <vector>
87 
88 class mi_builder_test : public ::testing::Test {
89 public:
90    mi_builder_test();
91    ~mi_builder_test();
92 
93    void SetUp();
94 
95    void *emit_dwords(int num_dwords);
96    void submit_batch();
97 
in_addr(uint32_t offset)98    inline address in_addr(uint32_t offset)
99    {
100       address addr;
101       addr.gem_handle = data_bo_handle;
102       addr.offset = INPUT_DATA_OFFSET + offset;
103       return addr;
104    }
105 
out_addr(uint32_t offset)106    inline address out_addr(uint32_t offset)
107    {
108       address addr;
109       addr.gem_handle = data_bo_handle;
110       addr.offset = OUTPUT_DATA_OFFSET + offset;
111       return addr;
112    }
113 
in_mem64(uint32_t offset)114    inline mi_value in_mem64(uint32_t offset)
115    {
116       return mi_mem64(in_addr(offset));
117    }
118 
in_mem32(uint32_t offset)119    inline mi_value in_mem32(uint32_t offset)
120    {
121       return mi_mem32(in_addr(offset));
122    }
123 
out_mem64(uint32_t offset)124    inline mi_value out_mem64(uint32_t offset)
125    {
126       return mi_mem64(out_addr(offset));
127    }
128 
out_mem32(uint32_t offset)129    inline mi_value out_mem32(uint32_t offset)
130    {
131       return mi_mem32(out_addr(offset));
132    }
133 
134    int fd;
135    uint32_t ctx_id;
136    intel_device_info devinfo;
137 
138    uint32_t batch_bo_handle;
139 #if GFX_VER >= 8
140    uint64_t batch_bo_addr;
141 #endif
142    uint32_t batch_offset;
143    void *batch_map;
144 
145 #if GFX_VER < 8
146    std::vector<drm_i915_gem_relocation_entry> relocs;
147 #endif
148 
149    uint32_t data_bo_handle;
150 #if GFX_VER >= 8
151    uint64_t data_bo_addr;
152 #endif
153    void *data_map;
154    char *input;
155    char *output;
156    uint64_t canary;
157 
158    mi_builder b;
159 };
160 
mi_builder_test()161 mi_builder_test::mi_builder_test() :
162   fd(-1)
163 { }
164 
~mi_builder_test()165 mi_builder_test::~mi_builder_test()
166 {
167    close(fd);
168 }
169 
170 // 1 MB of batch should be enough for anyone, right?
171 #define BATCH_BO_SIZE (256 * 4096)
172 #define DATA_BO_SIZE 4096
173 
174 void
SetUp()175 mi_builder_test::SetUp()
176 {
177    drmDevicePtr devices[8];
178    int max_devices = drmGetDevices2(0, devices, 8);
179 
180    int i;
181    for (i = 0; i < max_devices; i++) {
182       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
183           devices[i]->bustype == DRM_BUS_PCI &&
184           devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
185          fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
186          if (fd < 0)
187             continue;
188 
189          /* We don't really need to do this when running on hardware because
190           * we can just pull it from the drmDevice.  However, without doing
191           * this, intel_dump_gpu gets a bit of heartburn and we can't use the
192           * --device option with it.
193           */
194          int device_id;
195          ASSERT_TRUE(intel_gem_get_param(fd, I915_PARAM_CHIPSET_ID, &device_id))
196                << strerror(errno);
197 
198          ASSERT_TRUE(intel_get_device_info_from_fd(fd, &devinfo, -1, -1));
199          if (devinfo.ver != GFX_VER ||
200              (devinfo.platform == INTEL_PLATFORM_HSW) != (GFX_VERx10 == 75)) {
201             close(fd);
202             fd = -1;
203             continue;
204          }
205 
206 
207          /* Found a device! */
208          break;
209       }
210    }
211    ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device";
212 
213    ASSERT_TRUE(intel_gem_create_context(fd, &ctx_id)) << strerror(errno);
214 
215    if (GFX_VER >= 8) {
216       /* On gfx8+, we require softpin */
217       int has_softpin;
218       ASSERT_TRUE(intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &has_softpin))
219             << strerror(errno);
220       ASSERT_TRUE(has_softpin);
221    }
222 
223    // Create the batch buffer
224    drm_i915_gem_create gem_create = drm_i915_gem_create();
225    gem_create.size = BATCH_BO_SIZE;
226    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
227                       (void *)&gem_create), 0) << strerror(errno);
228    batch_bo_handle = gem_create.handle;
229 #if GFX_VER >= 8
230    batch_bo_addr = 0xffffffffdff70000ULL;
231 #endif
232 
233    if (devinfo.has_caching_uapi) {
234       drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
235       gem_caching.handle = batch_bo_handle;
236       gem_caching.caching = I915_CACHING_CACHED;
237       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
238                          (void *)&gem_caching), 0) << strerror(errno);
239    }
240 
241    if (devinfo.has_mmap_offset) {
242       drm_i915_gem_mmap_offset gem_mmap_offset = drm_i915_gem_mmap_offset();
243       gem_mmap_offset.handle = batch_bo_handle;
244       gem_mmap_offset.flags = devinfo.has_local_mem ?
245                               I915_MMAP_OFFSET_FIXED :
246                               I915_MMAP_OFFSET_WC;
247       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET,
248                          &gem_mmap_offset), 0) << strerror(errno);
249 
250       batch_map = mmap(NULL, BATCH_BO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
251                        fd, gem_mmap_offset.offset);
252       ASSERT_NE(batch_map, MAP_FAILED) << strerror(errno);
253    } else {
254       drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
255       gem_mmap.handle = batch_bo_handle;
256       gem_mmap.offset = 0;
257       gem_mmap.size = BATCH_BO_SIZE;
258       gem_mmap.flags = 0;
259       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
260                       (void *)&gem_mmap), 0) << strerror(errno);
261       batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
262    }
263 
264    // Start the batch at zero
265    batch_offset = 0;
266 
267    // Create the data buffer
268    gem_create = drm_i915_gem_create();
269    gem_create.size = DATA_BO_SIZE;
270    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
271                       (void *)&gem_create), 0) << strerror(errno);
272    data_bo_handle = gem_create.handle;
273 #if GFX_VER >= 8
274    data_bo_addr = 0xffffffffefff0000ULL;
275 #endif
276 
277    if (devinfo.has_caching_uapi) {
278       drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
279       gem_caching.handle = data_bo_handle;
280       gem_caching.caching = I915_CACHING_CACHED;
281       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
282                          (void *)&gem_caching), 0) << strerror(errno);
283    }
284 
285    if (devinfo.has_mmap_offset) {
286       drm_i915_gem_mmap_offset gem_mmap_offset = drm_i915_gem_mmap_offset();
287       gem_mmap_offset.handle = data_bo_handle;
288       gem_mmap_offset.flags = devinfo.has_local_mem ?
289                               I915_MMAP_OFFSET_FIXED :
290                               I915_MMAP_OFFSET_WC;
291       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET,
292                          &gem_mmap_offset), 0) << strerror(errno);
293 
294       data_map = mmap(NULL, DATA_BO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
295                       fd, gem_mmap_offset.offset);
296       ASSERT_NE(data_map, MAP_FAILED) << strerror(errno);
297    } else {
298       drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
299       gem_mmap.handle = data_bo_handle;
300       gem_mmap.offset = 0;
301       gem_mmap.size = DATA_BO_SIZE;
302       gem_mmap.flags = 0;
303       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
304                          (void *)&gem_mmap), 0) << strerror(errno);
305       data_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
306    }
307 
308    input = (char *)data_map + INPUT_DATA_OFFSET;
309    output = (char *)data_map + OUTPUT_DATA_OFFSET;
310 
311    // Fill the test data with garbage
312    memset(data_map, 139, DATA_BO_SIZE);
313    memset(&canary, 139, sizeof(canary));
314 
315    struct isl_device isl_dev;
316    isl_device_init(&isl_dev, &devinfo);
317    mi_builder_init(&b, &devinfo, this);
318    const uint32_t mocs = isl_mocs(&isl_dev, 0, false);
319    mi_builder_set_mocs(&b, mocs);
320 }
321 
322 void *
emit_dwords(int num_dwords)323 mi_builder_test::emit_dwords(int num_dwords)
324 {
325    void *ptr = (void *)((char *)batch_map + batch_offset);
326    batch_offset += num_dwords * 4;
327    assert(batch_offset < BATCH_BO_SIZE);
328    return ptr;
329 }
330 
331 void
submit_batch()332 mi_builder_test::submit_batch()
333 {
334    mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
335 
336    // Round batch up to an even number of dwords.
337    if (batch_offset & 4)
338       mi_builder_emit(&b, GENX(MI_NOOP), noop);
339 
340    drm_i915_gem_exec_object2 objects[2];
341    memset(objects, 0, sizeof(objects));
342 
343    objects[0].handle = data_bo_handle;
344    objects[0].relocation_count = 0;
345    objects[0].relocs_ptr = 0;
346 #if GFX_VER >= 8 /* On gfx8+, we pin everything */
347    objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
348                       EXEC_OBJECT_PINNED |
349                       EXEC_OBJECT_WRITE;
350    objects[0].offset = data_bo_addr;
351 #else
352    objects[0].flags = EXEC_OBJECT_WRITE;
353    objects[0].offset = -1;
354 #endif
355 
356    objects[1].handle = batch_bo_handle;
357 #if GFX_VER >= 8 /* On gfx8+, we don't use relocations */
358    objects[1].relocation_count = 0;
359    objects[1].relocs_ptr = 0;
360    objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
361                       EXEC_OBJECT_PINNED;
362    objects[1].offset = batch_bo_addr;
363 #else
364    objects[1].relocation_count = relocs.size();
365    objects[1].relocs_ptr = (uintptr_t)(void *)&relocs[0];
366    objects[1].flags = 0;
367    objects[1].offset = -1;
368 #endif
369 
370    drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2();
371    execbuf.buffers_ptr = (uintptr_t)(void *)objects;
372    execbuf.buffer_count = 2;
373    execbuf.batch_start_offset = 0;
374    execbuf.batch_len = batch_offset;
375    execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER;
376    execbuf.rsvd1 = ctx_id;
377 
378    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
379                       (void *)&execbuf), 0) << strerror(errno);
380 
381    drm_i915_gem_wait gem_wait = drm_i915_gem_wait();
382    gem_wait.bo_handle = batch_bo_handle;
383    gem_wait.timeout_ns = INT64_MAX;
384    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT,
385                       (void *)&gem_wait), 0) << strerror(errno);
386 }
387 
388 uint64_t
__gen_combine_address(mi_builder_test * test,void * location,address addr,uint32_t delta)389 __gen_combine_address(mi_builder_test *test, void *location,
390                       address addr, uint32_t delta)
391 {
392 #if GFX_VER >= 8
393    uint64_t addr_u64 = addr.gem_handle == test->data_bo_handle ?
394                        test->data_bo_addr : test->batch_bo_addr;
395    return addr_u64 + addr.offset + delta;
396 #else
397    drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();
398    reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1;
399    reloc.delta = addr.offset + delta;
400    reloc.offset = (char *)location - (char *)test->batch_map;
401    reloc.presumed_offset = -1;
402    test->relocs.push_back(reloc);
403 
404    return reloc.delta;
405 #endif
406 }
407 
408 void *
__gen_get_batch_dwords(mi_builder_test * test,unsigned num_dwords)409 __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)
410 {
411    return test->emit_dwords(num_dwords);
412 }
413 
414 struct address
__gen_get_batch_address(mi_builder_test * test,void * location)415 __gen_get_batch_address(mi_builder_test *test, void *location)
416 {
417    assert(location >= test->batch_map);
418    size_t offset = (char *)location - (char *)test->batch_map;
419    assert(offset < BATCH_BO_SIZE);
420    assert(offset <= UINT32_MAX);
421 
422    return (struct address) {
423       .gem_handle = test->batch_bo_handle,
424       .offset = (uint32_t)offset,
425    };
426 }
427 
428 #include "genxml/genX_pack.h"
429 #include "mi_builder.h"
430 
TEST_F(mi_builder_test,imm_mem)431 TEST_F(mi_builder_test, imm_mem)
432 {
433    const uint64_t value = 0x0123456789abcdef;
434 
435    mi_store(&b, out_mem64(0), mi_imm(value));
436    mi_store(&b, out_mem32(8), mi_imm(value));
437 
438    submit_batch();
439 
440    // 64 -> 64
441    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
442 
443    // 64 -> 32
444    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
445    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
446 }
447 
448 /* mem -> mem copies are only supported on HSW+ */
449 #if GFX_VERx10 >= 75
TEST_F(mi_builder_test,mem_mem)450 TEST_F(mi_builder_test, mem_mem)
451 {
452    const uint64_t value = 0x0123456789abcdef;
453    *(uint64_t *)input = value;
454 
455    mi_store(&b, out_mem64(0),   in_mem64(0));
456    mi_store(&b, out_mem32(8),   in_mem64(0));
457    mi_store(&b, out_mem32(16),  in_mem32(0));
458    mi_store(&b, out_mem64(24),  in_mem32(0));
459 
460    submit_batch();
461 
462    // 64 -> 64
463    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
464 
465    // 64 -> 32
466    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
467    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
468 
469    // 32 -> 32
470    EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
471    EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
472 
473    // 32 -> 64
474    EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
475 }
476 #endif
477 
TEST_F(mi_builder_test,imm_reg)478 TEST_F(mi_builder_test, imm_reg)
479 {
480    const uint64_t value = 0x0123456789abcdef;
481 
482    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
483    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value));
484    mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
485 
486    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
487    mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value));
488    mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
489 
490    submit_batch();
491 
492    // 64 -> 64
493    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
494 
495    // 64 -> 32
496    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
497    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
498 }
499 
TEST_F(mi_builder_test,mem_reg)500 TEST_F(mi_builder_test, mem_reg)
501 {
502    const uint64_t value = 0x0123456789abcdef;
503    *(uint64_t *)input = value;
504 
505    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
506    mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0));
507    mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
508 
509    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
510    mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0));
511    mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
512 
513    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
514    mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0));
515    mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG));
516 
517    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
518    mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0));
519    mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG));
520 
521    submit_batch();
522 
523    // 64 -> 64
524    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
525 
526    // 64 -> 32
527    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
528    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
529 
530    // 32 -> 32
531    EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
532    EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
533 
534    // 32 -> 64
535    EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
536 }
537 
TEST_F(mi_builder_test,memset)538 TEST_F(mi_builder_test, memset)
539 {
540    const unsigned memset_size = 256;
541 
542    mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
543 
544    submit_batch();
545 
546    uint32_t *out_u32 = (uint32_t *)output;
547    for (unsigned i = 0; i <  memset_size / sizeof(*out_u32); i++)
548       EXPECT_EQ(out_u32[i], 0xdeadbeef);
549 }
550 
TEST_F(mi_builder_test,memcpy)551 TEST_F(mi_builder_test, memcpy)
552 {
553    const unsigned memcpy_size = 256;
554 
555    uint8_t *in_u8 = (uint8_t *)input;
556    for (unsigned i = 0; i < memcpy_size; i++)
557       in_u8[i] = i;
558 
559    mi_memcpy(&b, out_addr(0), in_addr(0), 256);
560 
561    submit_batch();
562 
563    uint8_t *out_u8 = (uint8_t *)output;
564    for (unsigned i = 0; i < memcpy_size; i++)
565       EXPECT_EQ(out_u8[i], i);
566 }
567 
568 /* Start of MI_MATH section */
569 #if GFX_VERx10 >= 75
570 
571 #define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm))
572 
TEST_F(mi_builder_test,inot)573 TEST_F(mi_builder_test, inot)
574 {
575    const uint64_t value = 0x0123456789abcdef;
576    const uint32_t value_lo = (uint32_t)value;
577    const uint32_t value_hi = (uint32_t)(value >> 32);
578    memcpy(input, &value, sizeof(value));
579 
580    mi_store(&b, out_mem64(0),  mi_inot(&b, in_mem64(0)));
581    mi_store(&b, out_mem64(8),  mi_inot(&b, mi_inot(&b, in_mem64(0))));
582    mi_store(&b, out_mem64(16), mi_inot(&b, in_mem32(0)));
583    mi_store(&b, out_mem64(24), mi_inot(&b, in_mem32(4)));
584    mi_store(&b, out_mem32(32), mi_inot(&b, in_mem64(0)));
585    mi_store(&b, out_mem32(36), mi_inot(&b, in_mem32(0)));
586    mi_store(&b, out_mem32(40), mi_inot(&b, mi_inot(&b, in_mem32(0))));
587    mi_store(&b, out_mem32(44), mi_inot(&b, in_mem32(4)));
588 
589    submit_batch();
590 
591    EXPECT_EQ(*(uint64_t *)(output + 0),  ~value);
592    EXPECT_EQ(*(uint64_t *)(output + 8),  value);
593    EXPECT_EQ(*(uint64_t *)(output + 16), ~(uint64_t)value_lo);
594    EXPECT_EQ(*(uint64_t *)(output + 24), ~(uint64_t)value_hi);
595    EXPECT_EQ(*(uint32_t *)(output + 32), (uint32_t)~value);
596    EXPECT_EQ(*(uint32_t *)(output + 36), (uint32_t)~value_lo);
597    EXPECT_EQ(*(uint32_t *)(output + 40), (uint32_t)value_lo);
598    EXPECT_EQ(*(uint32_t *)(output + 44), (uint32_t)~value_hi);
599 }
600 
601 /* Test adding of immediates of all kinds including
602  *
603  *  - All zeroes
604  *  - All ones
605  *  - inverted constants
606  */
TEST_F(mi_builder_test,add_imm)607 TEST_F(mi_builder_test, add_imm)
608 {
609    const uint64_t value = 0x0123456789abcdef;
610    const uint64_t add = 0xdeadbeefac0ffee2;
611    memcpy(input, &value, sizeof(value));
612 
613    mi_store(&b, out_mem64(0),
614                 mi_iadd(&b, in_mem64(0), mi_imm(0)));
615    mi_store(&b, out_mem64(8),
616                 mi_iadd(&b, in_mem64(0), mi_imm(-1)));
617    mi_store(&b, out_mem64(16),
618                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0))));
619    mi_store(&b, out_mem64(24),
620                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1))));
621    mi_store(&b, out_mem64(32),
622                 mi_iadd(&b, in_mem64(0), mi_imm(add)));
623    mi_store(&b, out_mem64(40),
624                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add))));
625    mi_store(&b, out_mem64(48),
626                 mi_iadd(&b, mi_imm(0), in_mem64(0)));
627    mi_store(&b, out_mem64(56),
628                 mi_iadd(&b, mi_imm(-1), in_mem64(0)));
629    mi_store(&b, out_mem64(64),
630                 mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0)));
631    mi_store(&b, out_mem64(72),
632                 mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0)));
633    mi_store(&b, out_mem64(80),
634                 mi_iadd(&b, mi_imm(add), in_mem64(0)));
635    mi_store(&b, out_mem64(88),
636                 mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));
637 
638    // And some add_imm just for good measure
639    mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));
640    mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));
641 
642    submit_batch();
643 
644    EXPECT_EQ(*(uint64_t *)(output + 0),   value);
645    EXPECT_EQ(*(uint64_t *)(output + 8),   value - 1);
646    EXPECT_EQ(*(uint64_t *)(output + 16),  value - 1);
647    EXPECT_EQ(*(uint64_t *)(output + 24),  value);
648    EXPECT_EQ(*(uint64_t *)(output + 32),  value + add);
649    EXPECT_EQ(*(uint64_t *)(output + 40),  value + ~add);
650    EXPECT_EQ(*(uint64_t *)(output + 48),  value);
651    EXPECT_EQ(*(uint64_t *)(output + 56),  value - 1);
652    EXPECT_EQ(*(uint64_t *)(output + 64),  value - 1);
653    EXPECT_EQ(*(uint64_t *)(output + 72),  value);
654    EXPECT_EQ(*(uint64_t *)(output + 80),  value + add);
655    EXPECT_EQ(*(uint64_t *)(output + 88),  value + ~add);
656    EXPECT_EQ(*(uint64_t *)(output + 96),  value);
657    EXPECT_EQ(*(uint64_t *)(output + 104), value + add);
658 }
659 
TEST_F(mi_builder_test,ult_uge_ieq_ine)660 TEST_F(mi_builder_test, ult_uge_ieq_ine)
661 {
662    uint64_t values[8] = {
663       0x0123456789abcdef,
664       0xdeadbeefac0ffee2,
665       (uint64_t)-1,
666       1,
667       0,
668       1049571,
669       (uint64_t)-240058,
670       20204184,
671    };
672    memcpy(input, values, sizeof(values));
673 
674    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
675       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
676          mi_store(&b, out_mem64(i * 256 + j * 32 + 0),
677                       mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
678          mi_store(&b, out_mem64(i * 256 + j * 32 + 8),
679                       mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
680          mi_store(&b, out_mem64(i * 256 + j * 32 + 16),
681                       mi_ieq(&b, in_mem64(i * 8), in_mem64(j * 8)));
682          mi_store(&b, out_mem64(i * 256 + j * 32 + 24),
683                       mi_ine(&b, in_mem64(i * 8), in_mem64(j * 8)));
684       }
685    }
686 
687    submit_batch();
688 
689    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
690       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
691          uint64_t *out_u64 = (uint64_t *)(output + i * 256 + j * 32);
692          EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]),
693                                               mi_imm(values[j])));
694          EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]),
695                                               mi_imm(values[j])));
696          EXPECT_EQ_IMM(out_u64[2], mi_ieq(&b, mi_imm(values[i]),
697                                               mi_imm(values[j])));
698          EXPECT_EQ_IMM(out_u64[3], mi_ine(&b, mi_imm(values[i]),
699                                               mi_imm(values[j])));
700       }
701    }
702 }
703 
TEST_F(mi_builder_test,z_nz)704 TEST_F(mi_builder_test, z_nz)
705 {
706    uint64_t values[8] = {
707       0,
708       1,
709       UINT32_MAX,
710       UINT32_MAX + 1,
711       UINT64_MAX,
712    };
713    memcpy(input, values, sizeof(values));
714 
715    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
716       mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8)));
717       mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8)));
718    }
719 
720    submit_batch();
721 
722    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
723       uint64_t *out_u64 = (uint64_t *)(output + i * 16);
724       EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i])));
725       EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i])));
726    }
727 }
728 
TEST_F(mi_builder_test,iand)729 TEST_F(mi_builder_test, iand)
730 {
731    const uint64_t values[2] = {
732       0x0123456789abcdef,
733       0xdeadbeefac0ffee2,
734    };
735    memcpy(input, values, sizeof(values));
736 
737    mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8)));
738 
739    submit_batch();
740 
741    EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]),
742                                                   mi_imm(values[1])));
743 }
744 
745 #if GFX_VERx10 >= 125
TEST_F(mi_builder_test,ishl)746 TEST_F(mi_builder_test, ishl)
747 {
748    const uint64_t value = 0x0123456789abcdef;
749    memcpy(input, &value, sizeof(value));
750 
751    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
752    memcpy(input + 8, shifts, sizeof(shifts));
753 
754    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
755       mi_store(&b, out_mem64(i * 8),
756                    mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4)));
757    }
758 
759    submit_batch();
760 
761    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
762       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
763                     mi_ishl(&b, mi_imm(value), mi_imm(shifts[i])));
764    }
765 }
766 
TEST_F(mi_builder_test,ushr)767 TEST_F(mi_builder_test, ushr)
768 {
769    const uint64_t value = 0x0123456789abcdef;
770    memcpy(input, &value, sizeof(value));
771 
772    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
773    memcpy(input + 8, shifts, sizeof(shifts));
774 
775    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
776       mi_store(&b, out_mem64(i * 8),
777                    mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4)));
778    }
779 
780    submit_batch();
781 
782    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
783       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
784                     mi_ushr(&b, mi_imm(value), mi_imm(shifts[i])));
785    }
786 }
787 
TEST_F(mi_builder_test,ushr_imm)788 TEST_F(mi_builder_test, ushr_imm)
789 {
790    const uint64_t value = 0x0123456789abcdef;
791    memcpy(input, &value, sizeof(value));
792 
793    const unsigned max_shift = 64;
794 
795    for (unsigned i = 0; i <= max_shift; i++)
796       mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i));
797 
798    submit_batch();
799 
800    for (unsigned i = 0; i <= max_shift; i++) {
801       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
802                     mi_ushr_imm(&b, mi_imm(value), i));
803    }
804 }
805 
TEST_F(mi_builder_test,ishr)806 TEST_F(mi_builder_test, ishr)
807 {
808    const uint64_t values[] = {
809       0x0123456789abcdef,
810       0xfedcba9876543210,
811    };
812    memcpy(input, values, sizeof(values));
813 
814    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
815    memcpy(input + 16, shifts, sizeof(shifts));
816 
817    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
818       for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
819          mi_store(&b, out_mem64(i * 8 + j * 16),
820                       mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4)));
821       }
822    }
823 
824    submit_batch();
825 
826    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
827       for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
828          EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16),
829                        mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j])));
830       }
831    }
832 }
833 
TEST_F(mi_builder_test,ishr_imm)834 TEST_F(mi_builder_test, ishr_imm)
835 {
836    const uint64_t value = 0x0123456789abcdef;
837    memcpy(input, &value, sizeof(value));
838 
839    const unsigned max_shift = 64;
840 
841    for (unsigned i = 0; i <= max_shift; i++)
842       mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i));
843 
844    submit_batch();
845 
846    for (unsigned i = 0; i <= max_shift; i++) {
847       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
848                     mi_ishr_imm(&b, mi_imm(value), i));
849    }
850 }
851 #endif /* if GFX_VERx10 >= 125 */
852 
TEST_F(mi_builder_test,imul_imm)853 TEST_F(mi_builder_test, imul_imm)
854 {
855    uint64_t lhs[2] = {
856       0x0123456789abcdef,
857       0xdeadbeefac0ffee2,
858    };
859    memcpy(input, lhs, sizeof(lhs));
860 
861     /* Some random 32-bit unsigned integers.  The first four have been
862      * hand-chosen just to ensure some good low integers; the rest were
863      * generated with a python script.
864      */
865    uint32_t rhs[20] = {
866       1,       2,       3,       5,
867       10800,   193,     64,      40,
868       3796,    256,     88,      473,
869       1421,    706,     175,     850,
870       39,      38985,   1941,    17,
871    };
872 
873    for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
874       for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
875          mi_store(&b, out_mem64(i * 160 + j * 8),
876                       mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
877       }
878    }
879 
880    submit_batch();
881 
882    for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
883       for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
884          EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8),
885                        mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j]));
886       }
887    }
888 }
889 
TEST_F(mi_builder_test,ishl_imm)890 TEST_F(mi_builder_test, ishl_imm)
891 {
892    const uint64_t value = 0x0123456789abcdef;
893    memcpy(input, &value, sizeof(value));
894 
895    const unsigned max_shift = 64;
896 
897    for (unsigned i = 0; i <= max_shift; i++)
898       mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i));
899 
900    submit_batch();
901 
902    for (unsigned i = 0; i <= max_shift; i++) {
903       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
904                     mi_ishl_imm(&b, mi_imm(value), i));
905    }
906 }
907 
TEST_F(mi_builder_test,ushr32_imm)908 TEST_F(mi_builder_test, ushr32_imm)
909 {
910    const uint64_t value = 0x0123456789abcdef;
911    memcpy(input, &value, sizeof(value));
912 
913    const unsigned max_shift = 64;
914 
915    for (unsigned i = 0; i <= max_shift; i++)
916       mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i));
917 
918    submit_batch();
919 
920    for (unsigned i = 0; i <= max_shift; i++) {
921       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
922                     mi_ushr32_imm(&b, mi_imm(value), i));
923    }
924 }
925 
TEST_F(mi_builder_test,udiv32_imm)926 TEST_F(mi_builder_test, udiv32_imm)
927 {
928     /* Some random 32-bit unsigned integers.  The first four have been
929      * hand-chosen just to ensure some good low integers; the rest were
930      * generated with a python script.
931      */
932    uint32_t values[20] = {
933       1,       2,       3,       5,
934       10800,   193,     64,      40,
935       3796,    256,     88,      473,
936       1421,    706,     175,     850,
937       39,      38985,   1941,    17,
938    };
939    memcpy(input, values, sizeof(values));
940 
941    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
942       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
943          mi_store(&b, out_mem32(i * 80 + j * 4),
944                       mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
945       }
946    }
947 
948    submit_batch();
949 
950    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
951       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
952          EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4),
953                        mi_udiv32_imm(&b, mi_imm(values[i]), values[j]));
954       }
955    }
956 }
957 
TEST_F(mi_builder_test,store_if)958 TEST_F(mi_builder_test, store_if)
959 {
960    uint64_t u64 = 0xb453b411deadc0deull;
961    uint32_t u32 = 0x1337d00d;
962 
963    /* Write values with the predicate enabled */
964    emit_cmd(GENX(MI_PREDICATE), mip) {
965       mip.LoadOperation    = LOAD_LOAD;
966       mip.CombineOperation = COMBINE_SET;
967       mip.CompareOperation = COMPARE_TRUE;
968    }
969 
970    mi_store_if(&b, out_mem64(0), mi_imm(u64));
971    mi_store_if(&b, out_mem32(8), mi_imm(u32));
972 
973    /* Set predicate to false, write garbage that shouldn't land */
974    emit_cmd(GENX(MI_PREDICATE), mip) {
975       mip.LoadOperation    = LOAD_LOAD;
976       mip.CombineOperation = COMBINE_SET;
977       mip.CompareOperation = COMPARE_FALSE;
978    }
979 
980    mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull));
981    mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000));
982 
983    submit_batch();
984 
985    EXPECT_EQ(*(uint64_t *)(output + 0), u64);
986    EXPECT_EQ(*(uint32_t *)(output + 8), u32);
987    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
988 }
989 
990 #endif /* GFX_VERx10 >= 75 */
991 
992 #if GFX_VERx10 >= 125
993 
994 /*
995  * Indirect load/store tests.  Only available on XE_HP+
996  */
997 
TEST_F(mi_builder_test,load_mem64_offset)998 TEST_F(mi_builder_test, load_mem64_offset)
999 {
1000    uint64_t values[8] = {
1001       0x0123456789abcdef,
1002       0xdeadbeefac0ffee2,
1003       (uint64_t)-1,
1004       1,
1005       0,
1006       1049571,
1007       (uint64_t)-240058,
1008       20204184,
1009    };
1010    memcpy(input, values, sizeof(values));
1011 
1012    uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1013    memcpy(input + 64, offsets, sizeof(offsets));
1014 
1015    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1016       mi_store(&b, out_mem64(i * 8),
1017                mi_load_mem64_offset(&b, in_addr(0), in_mem32(i * 4 + 64)));
1018    }
1019 
1020    submit_batch();
1021 
1022    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1023       EXPECT_EQ(*(uint64_t *)(output + i * 8), values[offsets[i] / 8]);
1024 }
1025 
TEST_F(mi_builder_test,store_mem64_offset)1026 TEST_F(mi_builder_test, store_mem64_offset)
1027 {
1028    uint64_t values[8] = {
1029       0x0123456789abcdef,
1030       0xdeadbeefac0ffee2,
1031       (uint64_t)-1,
1032       1,
1033       0,
1034       1049571,
1035       (uint64_t)-240058,
1036       20204184,
1037    };
1038    memcpy(input, values, sizeof(values));
1039 
1040    uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1041    memcpy(input + 64, offsets, sizeof(offsets));
1042 
1043    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1044       mi_store_mem64_offset(&b, out_addr(0), in_mem32(i * 4 + 64),
1045                                 in_mem64(i * 8));
1046    }
1047 
1048    submit_batch();
1049 
1050    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1051       EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]);
1052 }
1053 
1054 /*
1055  * Control-flow tests.  Only available on XE_HP+
1056  */
1057 
TEST_F(mi_builder_test,goto)1058 TEST_F(mi_builder_test, goto)
1059 {
1060    const uint64_t value = 0xb453b411deadc0deull;
1061 
1062    mi_store(&b, out_mem64(0), mi_imm(value));
1063 
1064    struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1065    mi_goto(&b, &t);
1066 
1067    /* This one should be skipped */
1068    mi_store(&b, out_mem64(0), mi_imm(0));
1069 
1070    mi_goto_target(&b, &t);
1071 
1072    submit_batch();
1073 
1074    EXPECT_EQ(*(uint64_t *)(output + 0), value);
1075 }
1076 
1077 #define MI_PREDICATE_RESULT  0x2418
1078 
TEST_F(mi_builder_test,goto_if)1079 TEST_F(mi_builder_test, goto_if)
1080 {
1081    const uint64_t values[] = {
1082       0xb453b411deadc0deull,
1083       0x0123456789abcdefull,
1084       0,
1085    };
1086 
1087    mi_store(&b, out_mem64(0), mi_imm(values[0]));
1088 
1089    emit_cmd(GENX(MI_PREDICATE), mip) {
1090       mip.LoadOperation    = LOAD_LOAD;
1091       mip.CombineOperation = COMBINE_SET;
1092       mip.CompareOperation = COMPARE_FALSE;
1093    }
1094 
1095    struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1096    mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1097 
1098    mi_store(&b, out_mem64(0), mi_imm(values[1]));
1099 
1100    emit_cmd(GENX(MI_PREDICATE), mip) {
1101       mip.LoadOperation    = LOAD_LOAD;
1102       mip.CombineOperation = COMBINE_SET;
1103       mip.CompareOperation = COMPARE_TRUE;
1104    }
1105 
1106    mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1107 
1108    /* This one should be skipped */
1109    mi_store(&b, out_mem64(0), mi_imm(values[2]));
1110 
1111    mi_goto_target(&b, &t);
1112 
1113    submit_batch();
1114 
1115    EXPECT_EQ(*(uint64_t *)(output + 0), values[1]);
1116 }
1117 
TEST_F(mi_builder_test,loop_simple)1118 TEST_F(mi_builder_test, loop_simple)
1119 {
1120    const uint64_t loop_count = 8;
1121 
1122    mi_store(&b, out_mem64(0), mi_imm(0));
1123 
1124    mi_loop(&b) {
1125       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1126 
1127       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1128    }
1129 
1130    submit_batch();
1131 
1132    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1133 }
1134 
TEST_F(mi_builder_test,loop_break)1135 TEST_F(mi_builder_test, loop_break)
1136 {
1137    mi_loop(&b) {
1138       mi_store(&b, out_mem64(0), mi_imm(1));
1139 
1140       mi_break_if(&b, mi_imm(0));
1141 
1142       mi_store(&b, out_mem64(0), mi_imm(2));
1143 
1144       mi_break(&b);
1145 
1146       mi_store(&b, out_mem64(0), mi_imm(3));
1147    }
1148 
1149    submit_batch();
1150 
1151    EXPECT_EQ(*(uint64_t *)(output + 0), 2);
1152 }
1153 
TEST_F(mi_builder_test,loop_continue)1154 TEST_F(mi_builder_test, loop_continue)
1155 {
1156    const uint64_t loop_count = 8;
1157 
1158    mi_store(&b, out_mem64(0), mi_imm(0));
1159    mi_store(&b, out_mem64(8), mi_imm(0));
1160 
1161    mi_loop(&b) {
1162       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1163 
1164       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1165       mi_store(&b, out_mem64(8), mi_imm(5));
1166 
1167       mi_continue(&b);
1168 
1169       mi_store(&b, out_mem64(8), mi_imm(10));
1170    }
1171 
1172    submit_batch();
1173 
1174    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1175    EXPECT_EQ(*(uint64_t *)(output + 8), 5);
1176 }
1177 
TEST_F(mi_builder_test,loop_continue_if)1178 TEST_F(mi_builder_test, loop_continue_if)
1179 {
1180    const uint64_t loop_count = 8;
1181 
1182    mi_store(&b, out_mem64(0), mi_imm(0));
1183    mi_store(&b, out_mem64(8), mi_imm(0));
1184 
1185    mi_loop(&b) {
1186       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1187 
1188       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1189       mi_store(&b, out_mem64(8), mi_imm(5));
1190 
1191       emit_cmd(GENX(MI_PREDICATE), mip) {
1192          mip.LoadOperation    = LOAD_LOAD;
1193          mip.CombineOperation = COMBINE_SET;
1194          mip.CompareOperation = COMPARE_FALSE;
1195       }
1196 
1197       mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1198 
1199       mi_store(&b, out_mem64(8), mi_imm(10));
1200 
1201       emit_cmd(GENX(MI_PREDICATE), mip) {
1202          mip.LoadOperation    = LOAD_LOAD;
1203          mip.CombineOperation = COMBINE_SET;
1204          mip.CompareOperation = COMPARE_TRUE;
1205       }
1206 
1207       mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1208 
1209       mi_store(&b, out_mem64(8), mi_imm(15));
1210    }
1211 
1212    submit_batch();
1213 
1214    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1215    EXPECT_EQ(*(uint64_t *)(output + 8), 10);
1216 }
1217 #endif /* GFX_VERx10 >= 125 */
1218