• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <fcntl.h>
25 #include <string.h>
26 #include <xf86drm.h>
27 
28 #include <gtest/gtest.h>
29 
30 #include "c99_compat.h"
31 #include "dev/intel_device_info.h"
32 #include "drm-uapi/i915_drm.h"
33 #include "genxml/gen_macros.h"
34 #include "util/macros.h"
35 
36 class mi_builder_test;
37 
38 struct address {
39    uint32_t gem_handle;
40    uint32_t offset;
41 };
42 
43 #define __gen_address_type struct address
44 #define __gen_user_data ::mi_builder_test
45 
46 uint64_t __gen_combine_address(mi_builder_test *test, void *location,
47                                struct address addr, uint32_t delta);
48 void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);
49 struct address __gen_get_batch_address(mi_builder_test *test,
50                                        void *location);
51 
52 struct address
__gen_address_offset(address addr,uint64_t offset)53 __gen_address_offset(address addr, uint64_t offset)
54 {
55    addr.offset += offset;
56    return addr;
57 }
58 
59 #if GFX_VERx10 >= 75
60 #define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */
61 #else
62 #define RSVD_TEMP_REG 0x2430 /* GFX7_3DPRIM_START_VERTEX */
63 #endif
64 #define MI_BUILDER_NUM_ALLOC_GPRS 15
65 #define INPUT_DATA_OFFSET 0
66 #define OUTPUT_DATA_OFFSET 2048
67 
68 #define __genxml_cmd_length(cmd) cmd ## _length
69 #define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
70 #define __genxml_cmd_header(cmd) cmd ## _header
71 #define __genxml_cmd_pack(cmd) cmd ## _pack
72 
73 #include "genxml/genX_pack.h"
74 #include "mi_builder.h"
75 
76 #define emit_cmd(cmd, name)                                           \
77    for (struct cmd name = { __genxml_cmd_header(cmd) },               \
78         *_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \
79         __builtin_expect(_dst != NULL, 1);                            \
80         __genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL)
81 
82 #include <vector>
83 
84 class mi_builder_test : public ::testing::Test {
85 public:
86    mi_builder_test();
87    ~mi_builder_test();
88 
89    void SetUp();
90 
91    void *emit_dwords(int num_dwords);
92    void submit_batch();
93 
in_addr(uint32_t offset)94    inline address in_addr(uint32_t offset)
95    {
96       address addr;
97       addr.gem_handle = data_bo_handle;
98       addr.offset = INPUT_DATA_OFFSET + offset;
99       return addr;
100    }
101 
out_addr(uint32_t offset)102    inline address out_addr(uint32_t offset)
103    {
104       address addr;
105       addr.gem_handle = data_bo_handle;
106       addr.offset = OUTPUT_DATA_OFFSET + offset;
107       return addr;
108    }
109 
in_mem64(uint32_t offset)110    inline mi_value in_mem64(uint32_t offset)
111    {
112       return mi_mem64(in_addr(offset));
113    }
114 
in_mem32(uint32_t offset)115    inline mi_value in_mem32(uint32_t offset)
116    {
117       return mi_mem32(in_addr(offset));
118    }
119 
out_mem64(uint32_t offset)120    inline mi_value out_mem64(uint32_t offset)
121    {
122       return mi_mem64(out_addr(offset));
123    }
124 
out_mem32(uint32_t offset)125    inline mi_value out_mem32(uint32_t offset)
126    {
127       return mi_mem32(out_addr(offset));
128    }
129 
130    int fd;
131    int ctx_id;
132    intel_device_info devinfo;
133 
134    uint32_t batch_bo_handle;
135 #if GFX_VER >= 8
136    uint64_t batch_bo_addr;
137 #endif
138    uint32_t batch_offset;
139    void *batch_map;
140 
141 #if GFX_VER < 8
142    std::vector<drm_i915_gem_relocation_entry> relocs;
143 #endif
144 
145    uint32_t data_bo_handle;
146 #if GFX_VER >= 8
147    uint64_t data_bo_addr;
148 #endif
149    void *data_map;
150    char *input;
151    char *output;
152    uint64_t canary;
153 
154    mi_builder b;
155 };
156 
mi_builder_test()157 mi_builder_test::mi_builder_test() :
158   fd(-1)
159 { }
160 
~mi_builder_test()161 mi_builder_test::~mi_builder_test()
162 {
163    close(fd);
164 }
165 
166 // 1 MB of batch should be enough for anyone, right?
167 #define BATCH_BO_SIZE (256 * 4096)
168 #define DATA_BO_SIZE 4096
169 
170 void
SetUp()171 mi_builder_test::SetUp()
172 {
173    drmDevicePtr devices[8];
174    int max_devices = drmGetDevices2(0, devices, 8);
175 
176    int i;
177    for (i = 0; i < max_devices; i++) {
178       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
179           devices[i]->bustype == DRM_BUS_PCI &&
180           devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
181          fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
182          if (fd < 0)
183             continue;
184 
185          /* We don't really need to do this when running on hardware because
186           * we can just pull it from the drmDevice.  However, without doing
187           * this, intel_dump_gpu gets a bit of heartburn and we can't use the
188           * --device option with it.
189           */
190          int device_id;
191          drm_i915_getparam getparam = drm_i915_getparam();
192          getparam.param = I915_PARAM_CHIPSET_ID;
193          getparam.value = &device_id;
194          ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,
195                             (void *)&getparam), 0) << strerror(errno);
196 
197          ASSERT_TRUE(intel_get_device_info_from_pci_id(device_id, &devinfo));
198          if (devinfo.ver != GFX_VER ||
199              (devinfo.platform == INTEL_PLATFORM_HSW) != (GFX_VERx10 == 75)) {
200             close(fd);
201             fd = -1;
202             continue;
203          }
204 
205 
206          /* Found a device! */
207          break;
208       }
209    }
210    ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device";
211 
212    drm_i915_gem_context_create ctx_create = drm_i915_gem_context_create();
213    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE,
214                       (void *)&ctx_create), 0) << strerror(errno);
215    ctx_id = ctx_create.ctx_id;
216 
217    if (GFX_VER >= 8) {
218       /* On gfx8+, we require softpin */
219       int has_softpin;
220       drm_i915_getparam getparam = drm_i915_getparam();
221       getparam.param = I915_PARAM_HAS_EXEC_SOFTPIN;
222       getparam.value = &has_softpin;
223       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,
224                          (void *)&getparam), 0) << strerror(errno);
225       ASSERT_TRUE(has_softpin);
226    }
227 
228    // Create the batch buffer
229    drm_i915_gem_create gem_create = drm_i915_gem_create();
230    gem_create.size = BATCH_BO_SIZE;
231    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
232                       (void *)&gem_create), 0) << strerror(errno);
233    batch_bo_handle = gem_create.handle;
234 #if GFX_VER >= 8
235    batch_bo_addr = 0xffffffffdff70000ULL;
236 #endif
237 
238    drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
239    gem_caching.handle = batch_bo_handle;
240    gem_caching.caching = I915_CACHING_CACHED;
241    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
242                       (void *)&gem_caching), 0) << strerror(errno);
243 
244    drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
245    gem_mmap.handle = batch_bo_handle;
246    gem_mmap.offset = 0;
247    gem_mmap.size = BATCH_BO_SIZE;
248    gem_mmap.flags = 0;
249    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
250                       (void *)&gem_mmap), 0) << strerror(errno);
251    batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
252 
253    // Start the batch at zero
254    batch_offset = 0;
255 
256    // Create the data buffer
257    gem_create = drm_i915_gem_create();
258    gem_create.size = DATA_BO_SIZE;
259    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
260                       (void *)&gem_create), 0) << strerror(errno);
261    data_bo_handle = gem_create.handle;
262 #if GFX_VER >= 8
263    data_bo_addr = 0xffffffffefff0000ULL;
264 #endif
265 
266    gem_caching = drm_i915_gem_caching();
267    gem_caching.handle = data_bo_handle;
268    gem_caching.caching = I915_CACHING_CACHED;
269    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
270                       (void *)&gem_caching), 0) << strerror(errno);
271 
272    gem_mmap = drm_i915_gem_mmap();
273    gem_mmap.handle = data_bo_handle;
274    gem_mmap.offset = 0;
275    gem_mmap.size = DATA_BO_SIZE;
276    gem_mmap.flags = 0;
277    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
278                       (void *)&gem_mmap), 0) << strerror(errno);
279    data_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
280    input = (char *)data_map + INPUT_DATA_OFFSET;
281    output = (char *)data_map + OUTPUT_DATA_OFFSET;
282 
283    // Fill the test data with garbage
284    memset(data_map, 139, DATA_BO_SIZE);
285    memset(&canary, 139, sizeof(canary));
286 
287    mi_builder_init(&b, &devinfo, this);
288 }
289 
290 void *
emit_dwords(int num_dwords)291 mi_builder_test::emit_dwords(int num_dwords)
292 {
293    void *ptr = (void *)((char *)batch_map + batch_offset);
294    batch_offset += num_dwords * 4;
295    assert(batch_offset < BATCH_BO_SIZE);
296    return ptr;
297 }
298 
299 void
submit_batch()300 mi_builder_test::submit_batch()
301 {
302    mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
303 
304    // Round batch up to an even number of dwords.
305    if (batch_offset & 4)
306       mi_builder_emit(&b, GENX(MI_NOOP), noop);
307 
308    drm_i915_gem_exec_object2 objects[2];
309    memset(objects, 0, sizeof(objects));
310 
311    objects[0].handle = data_bo_handle;
312    objects[0].relocation_count = 0;
313    objects[0].relocs_ptr = 0;
314 #if GFX_VER >= 8 /* On gfx8+, we pin everything */
315    objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
316                       EXEC_OBJECT_PINNED |
317                       EXEC_OBJECT_WRITE;
318    objects[0].offset = data_bo_addr;
319 #else
320    objects[0].flags = EXEC_OBJECT_WRITE;
321    objects[0].offset = -1;
322 #endif
323 
324    objects[1].handle = batch_bo_handle;
325 #if GFX_VER >= 8 /* On gfx8+, we don't use relocations */
326    objects[1].relocation_count = 0;
327    objects[1].relocs_ptr = 0;
328    objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
329                       EXEC_OBJECT_PINNED;
330    objects[1].offset = batch_bo_addr;
331 #else
332    objects[1].relocation_count = relocs.size();
333    objects[1].relocs_ptr = (uintptr_t)(void *)&relocs[0];
334    objects[1].flags = 0;
335    objects[1].offset = -1;
336 #endif
337 
338    drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2();
339    execbuf.buffers_ptr = (uintptr_t)(void *)objects;
340    execbuf.buffer_count = 2;
341    execbuf.batch_start_offset = 0;
342    execbuf.batch_len = batch_offset;
343    execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER;
344    execbuf.rsvd1 = ctx_id;
345 
346    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
347                       (void *)&execbuf), 0) << strerror(errno);
348 
349    drm_i915_gem_wait gem_wait = drm_i915_gem_wait();
350    gem_wait.bo_handle = batch_bo_handle;
351    gem_wait.timeout_ns = INT64_MAX;
352    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT,
353                       (void *)&gem_wait), 0) << strerror(errno);
354 }
355 
356 uint64_t
__gen_combine_address(mi_builder_test * test,void * location,address addr,uint32_t delta)357 __gen_combine_address(mi_builder_test *test, void *location,
358                       address addr, uint32_t delta)
359 {
360 #if GFX_VER >= 8
361    uint64_t addr_u64 = addr.gem_handle == test->data_bo_handle ?
362                        test->data_bo_addr : test->batch_bo_addr;
363    return addr_u64 + addr.offset + delta;
364 #else
365    drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();
366    reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1;
367    reloc.delta = addr.offset + delta;
368    reloc.offset = (char *)location - (char *)test->batch_map;
369    reloc.presumed_offset = -1;
370    test->relocs.push_back(reloc);
371 
372    return reloc.delta;
373 #endif
374 }
375 
376 void *
__gen_get_batch_dwords(mi_builder_test * test,unsigned num_dwords)377 __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)
378 {
379    return test->emit_dwords(num_dwords);
380 }
381 
382 struct address
__gen_get_batch_address(mi_builder_test * test,void * location)383 __gen_get_batch_address(mi_builder_test *test, void *location)
384 {
385    assert(location >= test->batch_map);
386    size_t offset = (char *)location - (char *)test->batch_map;
387    assert(offset < BATCH_BO_SIZE);
388    assert(offset <= UINT32_MAX);
389 
390    return (struct address) {
391       .gem_handle = test->batch_bo_handle,
392       .offset = (uint32_t)offset,
393    };
394 }
395 
396 #include "genxml/genX_pack.h"
397 #include "mi_builder.h"
398 
TEST_F(mi_builder_test,imm_mem)399 TEST_F(mi_builder_test, imm_mem)
400 {
401    const uint64_t value = 0x0123456789abcdef;
402 
403    mi_store(&b, out_mem64(0), mi_imm(value));
404    mi_store(&b, out_mem32(8), mi_imm(value));
405 
406    submit_batch();
407 
408    // 64 -> 64
409    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
410 
411    // 64 -> 32
412    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
413    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
414 }
415 
416 /* mem -> mem copies are only supported on HSW+ */
417 #if GFX_VERx10 >= 75
TEST_F(mi_builder_test,mem_mem)418 TEST_F(mi_builder_test, mem_mem)
419 {
420    const uint64_t value = 0x0123456789abcdef;
421    *(uint64_t *)input = value;
422 
423    mi_store(&b, out_mem64(0),   in_mem64(0));
424    mi_store(&b, out_mem32(8),   in_mem64(0));
425    mi_store(&b, out_mem32(16),  in_mem32(0));
426    mi_store(&b, out_mem64(24),  in_mem32(0));
427 
428    submit_batch();
429 
430    // 64 -> 64
431    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
432 
433    // 64 -> 32
434    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
435    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
436 
437    // 32 -> 32
438    EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
439    EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
440 
441    // 32 -> 64
442    EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
443 }
444 #endif
445 
TEST_F(mi_builder_test,imm_reg)446 TEST_F(mi_builder_test, imm_reg)
447 {
448    const uint64_t value = 0x0123456789abcdef;
449 
450    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
451    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value));
452    mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
453 
454    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
455    mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value));
456    mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
457 
458    submit_batch();
459 
460    // 64 -> 64
461    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
462 
463    // 64 -> 32
464    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
465    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
466 }
467 
TEST_F(mi_builder_test,mem_reg)468 TEST_F(mi_builder_test, mem_reg)
469 {
470    const uint64_t value = 0x0123456789abcdef;
471    *(uint64_t *)input = value;
472 
473    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
474    mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0));
475    mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
476 
477    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
478    mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0));
479    mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
480 
481    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
482    mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0));
483    mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG));
484 
485    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
486    mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0));
487    mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG));
488 
489    submit_batch();
490 
491    // 64 -> 64
492    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
493 
494    // 64 -> 32
495    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
496    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
497 
498    // 32 -> 32
499    EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
500    EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
501 
502    // 32 -> 64
503    EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
504 }
505 
TEST_F(mi_builder_test,memset)506 TEST_F(mi_builder_test, memset)
507 {
508    const unsigned memset_size = 256;
509 
510    mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
511 
512    submit_batch();
513 
514    uint32_t *out_u32 = (uint32_t *)output;
515    for (unsigned i = 0; i <  memset_size / sizeof(*out_u32); i++)
516       EXPECT_EQ(out_u32[i], 0xdeadbeef);
517 }
518 
TEST_F(mi_builder_test,memcpy)519 TEST_F(mi_builder_test, memcpy)
520 {
521    const unsigned memcpy_size = 256;
522 
523    uint8_t *in_u8 = (uint8_t *)input;
524    for (unsigned i = 0; i < memcpy_size; i++)
525       in_u8[i] = i;
526 
527    mi_memcpy(&b, out_addr(0), in_addr(0), 256);
528 
529    submit_batch();
530 
531    uint8_t *out_u8 = (uint8_t *)output;
532    for (unsigned i = 0; i < memcpy_size; i++)
533       EXPECT_EQ(out_u8[i], i);
534 }
535 
536 /* Start of MI_MATH section */
537 #if GFX_VERx10 >= 75
538 
539 #define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm))
540 
TEST_F(mi_builder_test,inot)541 TEST_F(mi_builder_test, inot)
542 {
543    const uint64_t value = 0x0123456789abcdef;
544    const uint32_t value_lo = (uint32_t)value;
545    const uint32_t value_hi = (uint32_t)(value >> 32);
546    memcpy(input, &value, sizeof(value));
547 
548    mi_store(&b, out_mem64(0),  mi_inot(&b, in_mem64(0)));
549    mi_store(&b, out_mem64(8),  mi_inot(&b, mi_inot(&b, in_mem64(0))));
550    mi_store(&b, out_mem64(16), mi_inot(&b, in_mem32(0)));
551    mi_store(&b, out_mem64(24), mi_inot(&b, in_mem32(4)));
552    mi_store(&b, out_mem32(32), mi_inot(&b, in_mem64(0)));
553    mi_store(&b, out_mem32(36), mi_inot(&b, in_mem32(0)));
554    mi_store(&b, out_mem32(40), mi_inot(&b, mi_inot(&b, in_mem32(0))));
555    mi_store(&b, out_mem32(44), mi_inot(&b, in_mem32(4)));
556 
557    submit_batch();
558 
559    EXPECT_EQ(*(uint64_t *)(output + 0),  ~value);
560    EXPECT_EQ(*(uint64_t *)(output + 8),  value);
561    EXPECT_EQ(*(uint64_t *)(output + 16), ~(uint64_t)value_lo);
562    EXPECT_EQ(*(uint64_t *)(output + 24), ~(uint64_t)value_hi);
563    EXPECT_EQ(*(uint32_t *)(output + 32), (uint32_t)~value);
564    EXPECT_EQ(*(uint32_t *)(output + 36), (uint32_t)~value_lo);
565    EXPECT_EQ(*(uint32_t *)(output + 40), (uint32_t)value_lo);
566    EXPECT_EQ(*(uint32_t *)(output + 44), (uint32_t)~value_hi);
567 }
568 
569 /* Test adding of immediates of all kinds including
570  *
571  *  - All zeroes
572  *  - All ones
573  *  - inverted constants
574  */
TEST_F(mi_builder_test,add_imm)575 TEST_F(mi_builder_test, add_imm)
576 {
577    const uint64_t value = 0x0123456789abcdef;
578    const uint64_t add = 0xdeadbeefac0ffee2;
579    memcpy(input, &value, sizeof(value));
580 
581    mi_store(&b, out_mem64(0),
582                 mi_iadd(&b, in_mem64(0), mi_imm(0)));
583    mi_store(&b, out_mem64(8),
584                 mi_iadd(&b, in_mem64(0), mi_imm(-1)));
585    mi_store(&b, out_mem64(16),
586                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0))));
587    mi_store(&b, out_mem64(24),
588                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1))));
589    mi_store(&b, out_mem64(32),
590                 mi_iadd(&b, in_mem64(0), mi_imm(add)));
591    mi_store(&b, out_mem64(40),
592                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add))));
593    mi_store(&b, out_mem64(48),
594                 mi_iadd(&b, mi_imm(0), in_mem64(0)));
595    mi_store(&b, out_mem64(56),
596                 mi_iadd(&b, mi_imm(-1), in_mem64(0)));
597    mi_store(&b, out_mem64(64),
598                 mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0)));
599    mi_store(&b, out_mem64(72),
600                 mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0)));
601    mi_store(&b, out_mem64(80),
602                 mi_iadd(&b, mi_imm(add), in_mem64(0)));
603    mi_store(&b, out_mem64(88),
604                 mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));
605 
606    // And some add_imm just for good measure
607    mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));
608    mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));
609 
610    submit_batch();
611 
612    EXPECT_EQ(*(uint64_t *)(output + 0),   value);
613    EXPECT_EQ(*(uint64_t *)(output + 8),   value - 1);
614    EXPECT_EQ(*(uint64_t *)(output + 16),  value - 1);
615    EXPECT_EQ(*(uint64_t *)(output + 24),  value);
616    EXPECT_EQ(*(uint64_t *)(output + 32),  value + add);
617    EXPECT_EQ(*(uint64_t *)(output + 40),  value + ~add);
618    EXPECT_EQ(*(uint64_t *)(output + 48),  value);
619    EXPECT_EQ(*(uint64_t *)(output + 56),  value - 1);
620    EXPECT_EQ(*(uint64_t *)(output + 64),  value - 1);
621    EXPECT_EQ(*(uint64_t *)(output + 72),  value);
622    EXPECT_EQ(*(uint64_t *)(output + 80),  value + add);
623    EXPECT_EQ(*(uint64_t *)(output + 88),  value + ~add);
624    EXPECT_EQ(*(uint64_t *)(output + 96),  value);
625    EXPECT_EQ(*(uint64_t *)(output + 104), value + add);
626 }
627 
TEST_F(mi_builder_test,ult_uge_ieq_ine)628 TEST_F(mi_builder_test, ult_uge_ieq_ine)
629 {
630    uint64_t values[8] = {
631       0x0123456789abcdef,
632       0xdeadbeefac0ffee2,
633       (uint64_t)-1,
634       1,
635       0,
636       1049571,
637       (uint64_t)-240058,
638       20204184,
639    };
640    memcpy(input, values, sizeof(values));
641 
642    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
643       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
644          mi_store(&b, out_mem64(i * 256 + j * 32 + 0),
645                       mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
646          mi_store(&b, out_mem64(i * 256 + j * 32 + 8),
647                       mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
648          mi_store(&b, out_mem64(i * 256 + j * 32 + 16),
649                       mi_ieq(&b, in_mem64(i * 8), in_mem64(j * 8)));
650          mi_store(&b, out_mem64(i * 256 + j * 32 + 24),
651                       mi_ine(&b, in_mem64(i * 8), in_mem64(j * 8)));
652       }
653    }
654 
655    submit_batch();
656 
657    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
658       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
659          uint64_t *out_u64 = (uint64_t *)(output + i * 256 + j * 32);
660          EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]),
661                                               mi_imm(values[j])));
662          EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]),
663                                               mi_imm(values[j])));
664          EXPECT_EQ_IMM(out_u64[2], mi_ieq(&b, mi_imm(values[i]),
665                                               mi_imm(values[j])));
666          EXPECT_EQ_IMM(out_u64[3], mi_ine(&b, mi_imm(values[i]),
667                                               mi_imm(values[j])));
668       }
669    }
670 }
671 
TEST_F(mi_builder_test,z_nz)672 TEST_F(mi_builder_test, z_nz)
673 {
674    uint64_t values[8] = {
675       0,
676       1,
677       UINT32_MAX,
678       UINT32_MAX + 1,
679       UINT64_MAX,
680    };
681    memcpy(input, values, sizeof(values));
682 
683    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
684       mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8)));
685       mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8)));
686    }
687 
688    submit_batch();
689 
690    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
691       uint64_t *out_u64 = (uint64_t *)(output + i * 16);
692       EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i])));
693       EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i])));
694    }
695 }
696 
TEST_F(mi_builder_test,iand)697 TEST_F(mi_builder_test, iand)
698 {
699    const uint64_t values[2] = {
700       0x0123456789abcdef,
701       0xdeadbeefac0ffee2,
702    };
703    memcpy(input, values, sizeof(values));
704 
705    mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8)));
706 
707    submit_batch();
708 
709    EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]),
710                                                   mi_imm(values[1])));
711 }
712 
713 #if GFX_VERx10 >= 125
TEST_F(mi_builder_test,ishl)714 TEST_F(mi_builder_test, ishl)
715 {
716    const uint64_t value = 0x0123456789abcdef;
717    memcpy(input, &value, sizeof(value));
718 
719    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
720    memcpy(input + 8, shifts, sizeof(shifts));
721 
722    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
723       mi_store(&b, out_mem64(i * 8),
724                    mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4)));
725    }
726 
727    submit_batch();
728 
729    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
730       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
731                     mi_ishl(&b, mi_imm(value), mi_imm(shifts[i])));
732    }
733 }
734 
TEST_F(mi_builder_test,ushr)735 TEST_F(mi_builder_test, ushr)
736 {
737    const uint64_t value = 0x0123456789abcdef;
738    memcpy(input, &value, sizeof(value));
739 
740    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
741    memcpy(input + 8, shifts, sizeof(shifts));
742 
743    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
744       mi_store(&b, out_mem64(i * 8),
745                    mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4)));
746    }
747 
748    submit_batch();
749 
750    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
751       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
752                     mi_ushr(&b, mi_imm(value), mi_imm(shifts[i])));
753    }
754 }
755 
TEST_F(mi_builder_test,ushr_imm)756 TEST_F(mi_builder_test, ushr_imm)
757 {
758    const uint64_t value = 0x0123456789abcdef;
759    memcpy(input, &value, sizeof(value));
760 
761    const unsigned max_shift = 64;
762 
763    for (unsigned i = 0; i <= max_shift; i++)
764       mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i));
765 
766    submit_batch();
767 
768    for (unsigned i = 0; i <= max_shift; i++) {
769       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
770                     mi_ushr_imm(&b, mi_imm(value), i));
771    }
772 }
773 
TEST_F(mi_builder_test,ishr)774 TEST_F(mi_builder_test, ishr)
775 {
776    const uint64_t values[] = {
777       0x0123456789abcdef,
778       0xfedcba9876543210,
779    };
780    memcpy(input, values, sizeof(values));
781 
782    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
783    memcpy(input + 16, shifts, sizeof(shifts));
784 
785    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
786       for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
787          mi_store(&b, out_mem64(i * 8 + j * 16),
788                       mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4)));
789       }
790    }
791 
792    submit_batch();
793 
794    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
795       for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
796          EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16),
797                        mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j])));
798       }
799    }
800 }
801 
TEST_F(mi_builder_test,ishr_imm)802 TEST_F(mi_builder_test, ishr_imm)
803 {
804    const uint64_t value = 0x0123456789abcdef;
805    memcpy(input, &value, sizeof(value));
806 
807    const unsigned max_shift = 64;
808 
809    for (unsigned i = 0; i <= max_shift; i++)
810       mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i));
811 
812    submit_batch();
813 
814    for (unsigned i = 0; i <= max_shift; i++) {
815       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
816                     mi_ishr_imm(&b, mi_imm(value), i));
817    }
818 }
819 #endif /* if GFX_VERx10 >= 125 */
820 
TEST_F(mi_builder_test,imul_imm)821 TEST_F(mi_builder_test, imul_imm)
822 {
823    uint64_t lhs[2] = {
824       0x0123456789abcdef,
825       0xdeadbeefac0ffee2,
826    };
827    memcpy(input, lhs, sizeof(lhs));
828 
829     /* Some random 32-bit unsigned integers.  The first four have been
830      * hand-chosen just to ensure some good low integers; the rest were
831      * generated with a python script.
832      */
833    uint32_t rhs[20] = {
834       1,       2,       3,       5,
835       10800,   193,     64,      40,
836       3796,    256,     88,      473,
837       1421,    706,     175,     850,
838       39,      38985,   1941,    17,
839    };
840 
841    for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
842       for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
843          mi_store(&b, out_mem64(i * 160 + j * 8),
844                       mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
845       }
846    }
847 
848    submit_batch();
849 
850    for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
851       for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
852          EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8),
853                        mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j]));
854       }
855    }
856 }
857 
TEST_F(mi_builder_test,ishl_imm)858 TEST_F(mi_builder_test, ishl_imm)
859 {
860    const uint64_t value = 0x0123456789abcdef;
861    memcpy(input, &value, sizeof(value));
862 
863    const unsigned max_shift = 64;
864 
865    for (unsigned i = 0; i <= max_shift; i++)
866       mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i));
867 
868    submit_batch();
869 
870    for (unsigned i = 0; i <= max_shift; i++) {
871       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
872                     mi_ishl_imm(&b, mi_imm(value), i));
873    }
874 }
875 
TEST_F(mi_builder_test,ushr32_imm)876 TEST_F(mi_builder_test, ushr32_imm)
877 {
878    const uint64_t value = 0x0123456789abcdef;
879    memcpy(input, &value, sizeof(value));
880 
881    const unsigned max_shift = 64;
882 
883    for (unsigned i = 0; i <= max_shift; i++)
884       mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i));
885 
886    submit_batch();
887 
888    for (unsigned i = 0; i <= max_shift; i++) {
889       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
890                     mi_ushr32_imm(&b, mi_imm(value), i));
891    }
892 }
893 
TEST_F(mi_builder_test,udiv32_imm)894 TEST_F(mi_builder_test, udiv32_imm)
895 {
896     /* Some random 32-bit unsigned integers.  The first four have been
897      * hand-chosen just to ensure some good low integers; the rest were
898      * generated with a python script.
899      */
900    uint32_t values[20] = {
901       1,       2,       3,       5,
902       10800,   193,     64,      40,
903       3796,    256,     88,      473,
904       1421,    706,     175,     850,
905       39,      38985,   1941,    17,
906    };
907    memcpy(input, values, sizeof(values));
908 
909    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
910       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
911          mi_store(&b, out_mem32(i * 80 + j * 4),
912                       mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
913       }
914    }
915 
916    submit_batch();
917 
918    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
919       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
920          EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4),
921                        mi_udiv32_imm(&b, mi_imm(values[i]), values[j]));
922       }
923    }
924 }
925 
TEST_F(mi_builder_test,store_if)926 TEST_F(mi_builder_test, store_if)
927 {
928    uint64_t u64 = 0xb453b411deadc0deull;
929    uint32_t u32 = 0x1337d00d;
930 
931    /* Write values with the predicate enabled */
932    emit_cmd(GENX(MI_PREDICATE), mip) {
933       mip.LoadOperation    = LOAD_LOAD;
934       mip.CombineOperation = COMBINE_SET;
935       mip.CompareOperation = COMPARE_TRUE;
936    }
937 
938    mi_store_if(&b, out_mem64(0), mi_imm(u64));
939    mi_store_if(&b, out_mem32(8), mi_imm(u32));
940 
941    /* Set predicate to false, write garbage that shouldn't land */
942    emit_cmd(GENX(MI_PREDICATE), mip) {
943       mip.LoadOperation    = LOAD_LOAD;
944       mip.CombineOperation = COMBINE_SET;
945       mip.CompareOperation = COMPARE_FALSE;
946    }
947 
948    mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull));
949    mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000));
950 
951    submit_batch();
952 
953    EXPECT_EQ(*(uint64_t *)(output + 0), u64);
954    EXPECT_EQ(*(uint32_t *)(output + 8), u32);
955    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
956 }
957 
958 #endif /* GFX_VERx10 >= 75 */
959 
960 #if GFX_VERx10 >= 125
961 
962 /*
963  * Indirect load/store tests.  Only available on XE_HP+
964  */
965 
TEST_F(mi_builder_test,load_mem64_offset)966 TEST_F(mi_builder_test, load_mem64_offset)
967 {
968    uint64_t values[8] = {
969       0x0123456789abcdef,
970       0xdeadbeefac0ffee2,
971       (uint64_t)-1,
972       1,
973       0,
974       1049571,
975       (uint64_t)-240058,
976       20204184,
977    };
978    memcpy(input, values, sizeof(values));
979 
980    uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
981    memcpy(input + 64, offsets, sizeof(offsets));
982 
983    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
984       mi_store(&b, out_mem64(i * 8),
985                mi_load_mem64_offset(&b, in_addr(0), in_mem32(i * 4 + 64)));
986    }
987 
988    submit_batch();
989 
990    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
991       EXPECT_EQ(*(uint64_t *)(output + i * 8), values[offsets[i] / 8]);
992 }
993 
TEST_F(mi_builder_test,store_mem64_offset)994 TEST_F(mi_builder_test, store_mem64_offset)
995 {
996    uint64_t values[8] = {
997       0x0123456789abcdef,
998       0xdeadbeefac0ffee2,
999       (uint64_t)-1,
1000       1,
1001       0,
1002       1049571,
1003       (uint64_t)-240058,
1004       20204184,
1005    };
1006    memcpy(input, values, sizeof(values));
1007 
1008    uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1009    memcpy(input + 64, offsets, sizeof(offsets));
1010 
1011    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1012       mi_store_mem64_offset(&b, out_addr(0), in_mem32(i * 4 + 64),
1013                                 in_mem64(i * 8));
1014    }
1015 
1016    submit_batch();
1017 
1018    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1019       EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]);
1020 }
1021 
1022 /*
1023  * Control-flow tests.  Only available on XE_HP+
1024  */
1025 
TEST_F(mi_builder_test,goto)1026 TEST_F(mi_builder_test, goto)
1027 {
1028    const uint64_t value = 0xb453b411deadc0deull;
1029 
1030    mi_store(&b, out_mem64(0), mi_imm(value));
1031 
1032    struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1033    mi_goto(&b, &t);
1034 
1035    /* This one should be skipped */
1036    mi_store(&b, out_mem64(0), mi_imm(0));
1037 
1038    mi_goto_target(&b, &t);
1039 
1040    submit_batch();
1041 
1042    EXPECT_EQ(*(uint64_t *)(output + 0), value);
1043 }
1044 
1045 #define MI_PREDICATE_RESULT  0x2418
1046 
TEST_F(mi_builder_test,goto_if)1047 TEST_F(mi_builder_test, goto_if)
1048 {
1049    const uint64_t values[] = {
1050       0xb453b411deadc0deull,
1051       0x0123456789abcdefull,
1052       0,
1053    };
1054 
1055    mi_store(&b, out_mem64(0), mi_imm(values[0]));
1056 
1057    emit_cmd(GENX(MI_PREDICATE), mip) {
1058       mip.LoadOperation    = LOAD_LOAD;
1059       mip.CombineOperation = COMBINE_SET;
1060       mip.CompareOperation = COMPARE_FALSE;
1061    }
1062 
1063    struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1064    mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1065 
1066    mi_store(&b, out_mem64(0), mi_imm(values[1]));
1067 
1068    emit_cmd(GENX(MI_PREDICATE), mip) {
1069       mip.LoadOperation    = LOAD_LOAD;
1070       mip.CombineOperation = COMBINE_SET;
1071       mip.CompareOperation = COMPARE_TRUE;
1072    }
1073 
1074    mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1075 
1076    /* This one should be skipped */
1077    mi_store(&b, out_mem64(0), mi_imm(values[2]));
1078 
1079    mi_goto_target(&b, &t);
1080 
1081    submit_batch();
1082 
1083    EXPECT_EQ(*(uint64_t *)(output + 0), values[1]);
1084 }
1085 
TEST_F(mi_builder_test,loop_simple)1086 TEST_F(mi_builder_test, loop_simple)
1087 {
1088    const uint64_t loop_count = 8;
1089 
1090    mi_store(&b, out_mem64(0), mi_imm(0));
1091 
1092    mi_loop(&b) {
1093       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1094 
1095       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1096    }
1097 
1098    submit_batch();
1099 
1100    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1101 }
1102 
TEST_F(mi_builder_test,loop_break)1103 TEST_F(mi_builder_test, loop_break)
1104 {
1105    mi_loop(&b) {
1106       mi_store(&b, out_mem64(0), mi_imm(1));
1107 
1108       mi_break_if(&b, mi_imm(0));
1109 
1110       mi_store(&b, out_mem64(0), mi_imm(2));
1111 
1112       mi_break(&b);
1113 
1114       mi_store(&b, out_mem64(0), mi_imm(3));
1115    }
1116 
1117    submit_batch();
1118 
1119    EXPECT_EQ(*(uint64_t *)(output + 0), 2);
1120 }
1121 
TEST_F(mi_builder_test,loop_continue)1122 TEST_F(mi_builder_test, loop_continue)
1123 {
1124    const uint64_t loop_count = 8;
1125 
1126    mi_store(&b, out_mem64(0), mi_imm(0));
1127    mi_store(&b, out_mem64(8), mi_imm(0));
1128 
1129    mi_loop(&b) {
1130       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1131 
1132       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1133       mi_store(&b, out_mem64(8), mi_imm(5));
1134 
1135       mi_continue(&b);
1136 
1137       mi_store(&b, out_mem64(8), mi_imm(10));
1138    }
1139 
1140    submit_batch();
1141 
1142    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1143    EXPECT_EQ(*(uint64_t *)(output + 8), 5);
1144 }
1145 
TEST_F(mi_builder_test,loop_continue_if)1146 TEST_F(mi_builder_test, loop_continue_if)
1147 {
1148    const uint64_t loop_count = 8;
1149 
1150    mi_store(&b, out_mem64(0), mi_imm(0));
1151    mi_store(&b, out_mem64(8), mi_imm(0));
1152 
1153    mi_loop(&b) {
1154       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1155 
1156       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1157       mi_store(&b, out_mem64(8), mi_imm(5));
1158 
1159       emit_cmd(GENX(MI_PREDICATE), mip) {
1160          mip.LoadOperation    = LOAD_LOAD;
1161          mip.CombineOperation = COMBINE_SET;
1162          mip.CompareOperation = COMPARE_FALSE;
1163       }
1164 
1165       mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1166 
1167       mi_store(&b, out_mem64(8), mi_imm(10));
1168 
1169       emit_cmd(GENX(MI_PREDICATE), mip) {
1170          mip.LoadOperation    = LOAD_LOAD;
1171          mip.CombineOperation = COMBINE_SET;
1172          mip.CompareOperation = COMPARE_TRUE;
1173       }
1174 
1175       mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1176 
1177       mi_store(&b, out_mem64(8), mi_imm(15));
1178    }
1179 
1180    submit_batch();
1181 
1182    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1183    EXPECT_EQ(*(uint64_t *)(output + 8), 10);
1184 }
1185 #endif /* GFX_VERx10 >= 125 */
1186