1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <fcntl.h>
25 #include <string.h>
26 #include <xf86drm.h>
27
28 #include <gtest/gtest.h>
29
30 #include "c99_compat.h"
31 #include "dev/intel_device_info.h"
32 #include "drm-uapi/i915_drm.h"
33 #include "genxml/gen_macros.h"
34 #include "util/macros.h"
35
36 class mi_builder_test;
37
38 struct address {
39 uint32_t gem_handle;
40 uint32_t offset;
41 };
42
43 #define __gen_address_type struct address
44 #define __gen_user_data ::mi_builder_test
45
46 uint64_t __gen_combine_address(mi_builder_test *test, void *location,
47 struct address addr, uint32_t delta);
48 void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);
49 struct address __gen_get_batch_address(mi_builder_test *test,
50 void *location);
51
52 struct address
__gen_address_offset(address addr,uint64_t offset)53 __gen_address_offset(address addr, uint64_t offset)
54 {
55 addr.offset += offset;
56 return addr;
57 }
58
59 #if GFX_VERx10 >= 75
60 #define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */
61 #else
62 #define RSVD_TEMP_REG 0x2430 /* GFX7_3DPRIM_START_VERTEX */
63 #endif
64 #define MI_BUILDER_NUM_ALLOC_GPRS 15
65 #define INPUT_DATA_OFFSET 0
66 #define OUTPUT_DATA_OFFSET 2048
67
68 #define __genxml_cmd_length(cmd) cmd ## _length
69 #define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
70 #define __genxml_cmd_header(cmd) cmd ## _header
71 #define __genxml_cmd_pack(cmd) cmd ## _pack
72
73 #include "genxml/genX_pack.h"
74 #include "mi_builder.h"
75
76 #define emit_cmd(cmd, name) \
77 for (struct cmd name = { __genxml_cmd_header(cmd) }, \
78 *_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \
79 __builtin_expect(_dst != NULL, 1); \
80 __genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL)
81
82 #include <vector>
83
84 class mi_builder_test : public ::testing::Test {
85 public:
86 mi_builder_test();
87 ~mi_builder_test();
88
89 void SetUp();
90
91 void *emit_dwords(int num_dwords);
92 void submit_batch();
93
in_addr(uint32_t offset)94 inline address in_addr(uint32_t offset)
95 {
96 address addr;
97 addr.gem_handle = data_bo_handle;
98 addr.offset = INPUT_DATA_OFFSET + offset;
99 return addr;
100 }
101
out_addr(uint32_t offset)102 inline address out_addr(uint32_t offset)
103 {
104 address addr;
105 addr.gem_handle = data_bo_handle;
106 addr.offset = OUTPUT_DATA_OFFSET + offset;
107 return addr;
108 }
109
in_mem64(uint32_t offset)110 inline mi_value in_mem64(uint32_t offset)
111 {
112 return mi_mem64(in_addr(offset));
113 }
114
in_mem32(uint32_t offset)115 inline mi_value in_mem32(uint32_t offset)
116 {
117 return mi_mem32(in_addr(offset));
118 }
119
out_mem64(uint32_t offset)120 inline mi_value out_mem64(uint32_t offset)
121 {
122 return mi_mem64(out_addr(offset));
123 }
124
out_mem32(uint32_t offset)125 inline mi_value out_mem32(uint32_t offset)
126 {
127 return mi_mem32(out_addr(offset));
128 }
129
130 int fd;
131 int ctx_id;
132 intel_device_info devinfo;
133
134 uint32_t batch_bo_handle;
135 #if GFX_VER >= 8
136 uint64_t batch_bo_addr;
137 #endif
138 uint32_t batch_offset;
139 void *batch_map;
140
141 #if GFX_VER < 8
142 std::vector<drm_i915_gem_relocation_entry> relocs;
143 #endif
144
145 uint32_t data_bo_handle;
146 #if GFX_VER >= 8
147 uint64_t data_bo_addr;
148 #endif
149 void *data_map;
150 char *input;
151 char *output;
152 uint64_t canary;
153
154 mi_builder b;
155 };
156
mi_builder_test()157 mi_builder_test::mi_builder_test() :
158 fd(-1)
159 { }
160
~mi_builder_test()161 mi_builder_test::~mi_builder_test()
162 {
163 close(fd);
164 }
165
166 // 1 MB of batch should be enough for anyone, right?
167 #define BATCH_BO_SIZE (256 * 4096)
168 #define DATA_BO_SIZE 4096
169
170 void
SetUp()171 mi_builder_test::SetUp()
172 {
173 drmDevicePtr devices[8];
174 int max_devices = drmGetDevices2(0, devices, 8);
175
176 int i;
177 for (i = 0; i < max_devices; i++) {
178 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
179 devices[i]->bustype == DRM_BUS_PCI &&
180 devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
181 fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
182 if (fd < 0)
183 continue;
184
185 /* We don't really need to do this when running on hardware because
186 * we can just pull it from the drmDevice. However, without doing
187 * this, intel_dump_gpu gets a bit of heartburn and we can't use the
188 * --device option with it.
189 */
190 int device_id;
191 drm_i915_getparam getparam = drm_i915_getparam();
192 getparam.param = I915_PARAM_CHIPSET_ID;
193 getparam.value = &device_id;
194 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,
195 (void *)&getparam), 0) << strerror(errno);
196
197 ASSERT_TRUE(intel_get_device_info_from_pci_id(device_id, &devinfo));
198 if (devinfo.ver != GFX_VER ||
199 (devinfo.platform == INTEL_PLATFORM_HSW) != (GFX_VERx10 == 75)) {
200 close(fd);
201 fd = -1;
202 continue;
203 }
204
205
206 /* Found a device! */
207 break;
208 }
209 }
210 ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device";
211
212 drm_i915_gem_context_create ctx_create = drm_i915_gem_context_create();
213 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE,
214 (void *)&ctx_create), 0) << strerror(errno);
215 ctx_id = ctx_create.ctx_id;
216
217 if (GFX_VER >= 8) {
218 /* On gfx8+, we require softpin */
219 int has_softpin;
220 drm_i915_getparam getparam = drm_i915_getparam();
221 getparam.param = I915_PARAM_HAS_EXEC_SOFTPIN;
222 getparam.value = &has_softpin;
223 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,
224 (void *)&getparam), 0) << strerror(errno);
225 ASSERT_TRUE(has_softpin);
226 }
227
228 // Create the batch buffer
229 drm_i915_gem_create gem_create = drm_i915_gem_create();
230 gem_create.size = BATCH_BO_SIZE;
231 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
232 (void *)&gem_create), 0) << strerror(errno);
233 batch_bo_handle = gem_create.handle;
234 #if GFX_VER >= 8
235 batch_bo_addr = 0xffffffffdff70000ULL;
236 #endif
237
238 drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
239 gem_caching.handle = batch_bo_handle;
240 gem_caching.caching = I915_CACHING_CACHED;
241 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
242 (void *)&gem_caching), 0) << strerror(errno);
243
244 drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
245 gem_mmap.handle = batch_bo_handle;
246 gem_mmap.offset = 0;
247 gem_mmap.size = BATCH_BO_SIZE;
248 gem_mmap.flags = 0;
249 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
250 (void *)&gem_mmap), 0) << strerror(errno);
251 batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
252
253 // Start the batch at zero
254 batch_offset = 0;
255
256 // Create the data buffer
257 gem_create = drm_i915_gem_create();
258 gem_create.size = DATA_BO_SIZE;
259 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
260 (void *)&gem_create), 0) << strerror(errno);
261 data_bo_handle = gem_create.handle;
262 #if GFX_VER >= 8
263 data_bo_addr = 0xffffffffefff0000ULL;
264 #endif
265
266 gem_caching = drm_i915_gem_caching();
267 gem_caching.handle = data_bo_handle;
268 gem_caching.caching = I915_CACHING_CACHED;
269 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
270 (void *)&gem_caching), 0) << strerror(errno);
271
272 gem_mmap = drm_i915_gem_mmap();
273 gem_mmap.handle = data_bo_handle;
274 gem_mmap.offset = 0;
275 gem_mmap.size = DATA_BO_SIZE;
276 gem_mmap.flags = 0;
277 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
278 (void *)&gem_mmap), 0) << strerror(errno);
279 data_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
280 input = (char *)data_map + INPUT_DATA_OFFSET;
281 output = (char *)data_map + OUTPUT_DATA_OFFSET;
282
283 // Fill the test data with garbage
284 memset(data_map, 139, DATA_BO_SIZE);
285 memset(&canary, 139, sizeof(canary));
286
287 mi_builder_init(&b, &devinfo, this);
288 }
289
290 void *
emit_dwords(int num_dwords)291 mi_builder_test::emit_dwords(int num_dwords)
292 {
293 void *ptr = (void *)((char *)batch_map + batch_offset);
294 batch_offset += num_dwords * 4;
295 assert(batch_offset < BATCH_BO_SIZE);
296 return ptr;
297 }
298
299 void
submit_batch()300 mi_builder_test::submit_batch()
301 {
302 mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
303
304 // Round batch up to an even number of dwords.
305 if (batch_offset & 4)
306 mi_builder_emit(&b, GENX(MI_NOOP), noop);
307
308 drm_i915_gem_exec_object2 objects[2];
309 memset(objects, 0, sizeof(objects));
310
311 objects[0].handle = data_bo_handle;
312 objects[0].relocation_count = 0;
313 objects[0].relocs_ptr = 0;
314 #if GFX_VER >= 8 /* On gfx8+, we pin everything */
315 objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
316 EXEC_OBJECT_PINNED |
317 EXEC_OBJECT_WRITE;
318 objects[0].offset = data_bo_addr;
319 #else
320 objects[0].flags = EXEC_OBJECT_WRITE;
321 objects[0].offset = -1;
322 #endif
323
324 objects[1].handle = batch_bo_handle;
325 #if GFX_VER >= 8 /* On gfx8+, we don't use relocations */
326 objects[1].relocation_count = 0;
327 objects[1].relocs_ptr = 0;
328 objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
329 EXEC_OBJECT_PINNED;
330 objects[1].offset = batch_bo_addr;
331 #else
332 objects[1].relocation_count = relocs.size();
333 objects[1].relocs_ptr = (uintptr_t)(void *)&relocs[0];
334 objects[1].flags = 0;
335 objects[1].offset = -1;
336 #endif
337
338 drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2();
339 execbuf.buffers_ptr = (uintptr_t)(void *)objects;
340 execbuf.buffer_count = 2;
341 execbuf.batch_start_offset = 0;
342 execbuf.batch_len = batch_offset;
343 execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER;
344 execbuf.rsvd1 = ctx_id;
345
346 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
347 (void *)&execbuf), 0) << strerror(errno);
348
349 drm_i915_gem_wait gem_wait = drm_i915_gem_wait();
350 gem_wait.bo_handle = batch_bo_handle;
351 gem_wait.timeout_ns = INT64_MAX;
352 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT,
353 (void *)&gem_wait), 0) << strerror(errno);
354 }
355
356 uint64_t
__gen_combine_address(mi_builder_test * test,void * location,address addr,uint32_t delta)357 __gen_combine_address(mi_builder_test *test, void *location,
358 address addr, uint32_t delta)
359 {
360 #if GFX_VER >= 8
361 uint64_t addr_u64 = addr.gem_handle == test->data_bo_handle ?
362 test->data_bo_addr : test->batch_bo_addr;
363 return addr_u64 + addr.offset + delta;
364 #else
365 drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();
366 reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1;
367 reloc.delta = addr.offset + delta;
368 reloc.offset = (char *)location - (char *)test->batch_map;
369 reloc.presumed_offset = -1;
370 test->relocs.push_back(reloc);
371
372 return reloc.delta;
373 #endif
374 }
375
376 void *
__gen_get_batch_dwords(mi_builder_test * test,unsigned num_dwords)377 __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)
378 {
379 return test->emit_dwords(num_dwords);
380 }
381
382 struct address
__gen_get_batch_address(mi_builder_test * test,void * location)383 __gen_get_batch_address(mi_builder_test *test, void *location)
384 {
385 assert(location >= test->batch_map);
386 size_t offset = (char *)location - (char *)test->batch_map;
387 assert(offset < BATCH_BO_SIZE);
388 assert(offset <= UINT32_MAX);
389
390 return (struct address) {
391 .gem_handle = test->batch_bo_handle,
392 .offset = (uint32_t)offset,
393 };
394 }
395
396 #include "genxml/genX_pack.h"
397 #include "mi_builder.h"
398
TEST_F(mi_builder_test,imm_mem)399 TEST_F(mi_builder_test, imm_mem)
400 {
401 const uint64_t value = 0x0123456789abcdef;
402
403 mi_store(&b, out_mem64(0), mi_imm(value));
404 mi_store(&b, out_mem32(8), mi_imm(value));
405
406 submit_batch();
407
408 // 64 -> 64
409 EXPECT_EQ(*(uint64_t *)(output + 0), value);
410
411 // 64 -> 32
412 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
413 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
414 }
415
416 /* mem -> mem copies are only supported on HSW+ */
417 #if GFX_VERx10 >= 75
TEST_F(mi_builder_test,mem_mem)418 TEST_F(mi_builder_test, mem_mem)
419 {
420 const uint64_t value = 0x0123456789abcdef;
421 *(uint64_t *)input = value;
422
423 mi_store(&b, out_mem64(0), in_mem64(0));
424 mi_store(&b, out_mem32(8), in_mem64(0));
425 mi_store(&b, out_mem32(16), in_mem32(0));
426 mi_store(&b, out_mem64(24), in_mem32(0));
427
428 submit_batch();
429
430 // 64 -> 64
431 EXPECT_EQ(*(uint64_t *)(output + 0), value);
432
433 // 64 -> 32
434 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
435 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
436
437 // 32 -> 32
438 EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
439 EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
440
441 // 32 -> 64
442 EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
443 }
444 #endif
445
TEST_F(mi_builder_test,imm_reg)446 TEST_F(mi_builder_test, imm_reg)
447 {
448 const uint64_t value = 0x0123456789abcdef;
449
450 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
451 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value));
452 mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
453
454 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
455 mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value));
456 mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
457
458 submit_batch();
459
460 // 64 -> 64
461 EXPECT_EQ(*(uint64_t *)(output + 0), value);
462
463 // 64 -> 32
464 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
465 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
466 }
467
TEST_F(mi_builder_test,mem_reg)468 TEST_F(mi_builder_test, mem_reg)
469 {
470 const uint64_t value = 0x0123456789abcdef;
471 *(uint64_t *)input = value;
472
473 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
474 mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0));
475 mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
476
477 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
478 mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0));
479 mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
480
481 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
482 mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0));
483 mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG));
484
485 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
486 mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0));
487 mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG));
488
489 submit_batch();
490
491 // 64 -> 64
492 EXPECT_EQ(*(uint64_t *)(output + 0), value);
493
494 // 64 -> 32
495 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
496 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
497
498 // 32 -> 32
499 EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
500 EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
501
502 // 32 -> 64
503 EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
504 }
505
TEST_F(mi_builder_test,memset)506 TEST_F(mi_builder_test, memset)
507 {
508 const unsigned memset_size = 256;
509
510 mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
511
512 submit_batch();
513
514 uint32_t *out_u32 = (uint32_t *)output;
515 for (unsigned i = 0; i < memset_size / sizeof(*out_u32); i++)
516 EXPECT_EQ(out_u32[i], 0xdeadbeef);
517 }
518
TEST_F(mi_builder_test,memcpy)519 TEST_F(mi_builder_test, memcpy)
520 {
521 const unsigned memcpy_size = 256;
522
523 uint8_t *in_u8 = (uint8_t *)input;
524 for (unsigned i = 0; i < memcpy_size; i++)
525 in_u8[i] = i;
526
527 mi_memcpy(&b, out_addr(0), in_addr(0), 256);
528
529 submit_batch();
530
531 uint8_t *out_u8 = (uint8_t *)output;
532 for (unsigned i = 0; i < memcpy_size; i++)
533 EXPECT_EQ(out_u8[i], i);
534 }
535
536 /* Start of MI_MATH section */
537 #if GFX_VERx10 >= 75
538
539 #define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm))
540
TEST_F(mi_builder_test,inot)541 TEST_F(mi_builder_test, inot)
542 {
543 const uint64_t value = 0x0123456789abcdef;
544 const uint32_t value_lo = (uint32_t)value;
545 const uint32_t value_hi = (uint32_t)(value >> 32);
546 memcpy(input, &value, sizeof(value));
547
548 mi_store(&b, out_mem64(0), mi_inot(&b, in_mem64(0)));
549 mi_store(&b, out_mem64(8), mi_inot(&b, mi_inot(&b, in_mem64(0))));
550 mi_store(&b, out_mem64(16), mi_inot(&b, in_mem32(0)));
551 mi_store(&b, out_mem64(24), mi_inot(&b, in_mem32(4)));
552 mi_store(&b, out_mem32(32), mi_inot(&b, in_mem64(0)));
553 mi_store(&b, out_mem32(36), mi_inot(&b, in_mem32(0)));
554 mi_store(&b, out_mem32(40), mi_inot(&b, mi_inot(&b, in_mem32(0))));
555 mi_store(&b, out_mem32(44), mi_inot(&b, in_mem32(4)));
556
557 submit_batch();
558
559 EXPECT_EQ(*(uint64_t *)(output + 0), ~value);
560 EXPECT_EQ(*(uint64_t *)(output + 8), value);
561 EXPECT_EQ(*(uint64_t *)(output + 16), ~(uint64_t)value_lo);
562 EXPECT_EQ(*(uint64_t *)(output + 24), ~(uint64_t)value_hi);
563 EXPECT_EQ(*(uint32_t *)(output + 32), (uint32_t)~value);
564 EXPECT_EQ(*(uint32_t *)(output + 36), (uint32_t)~value_lo);
565 EXPECT_EQ(*(uint32_t *)(output + 40), (uint32_t)value_lo);
566 EXPECT_EQ(*(uint32_t *)(output + 44), (uint32_t)~value_hi);
567 }
568
569 /* Test adding of immediates of all kinds including
570 *
571 * - All zeroes
572 * - All ones
573 * - inverted constants
574 */
TEST_F(mi_builder_test,add_imm)575 TEST_F(mi_builder_test, add_imm)
576 {
577 const uint64_t value = 0x0123456789abcdef;
578 const uint64_t add = 0xdeadbeefac0ffee2;
579 memcpy(input, &value, sizeof(value));
580
581 mi_store(&b, out_mem64(0),
582 mi_iadd(&b, in_mem64(0), mi_imm(0)));
583 mi_store(&b, out_mem64(8),
584 mi_iadd(&b, in_mem64(0), mi_imm(-1)));
585 mi_store(&b, out_mem64(16),
586 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0))));
587 mi_store(&b, out_mem64(24),
588 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1))));
589 mi_store(&b, out_mem64(32),
590 mi_iadd(&b, in_mem64(0), mi_imm(add)));
591 mi_store(&b, out_mem64(40),
592 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add))));
593 mi_store(&b, out_mem64(48),
594 mi_iadd(&b, mi_imm(0), in_mem64(0)));
595 mi_store(&b, out_mem64(56),
596 mi_iadd(&b, mi_imm(-1), in_mem64(0)));
597 mi_store(&b, out_mem64(64),
598 mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0)));
599 mi_store(&b, out_mem64(72),
600 mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0)));
601 mi_store(&b, out_mem64(80),
602 mi_iadd(&b, mi_imm(add), in_mem64(0)));
603 mi_store(&b, out_mem64(88),
604 mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));
605
606 // And some add_imm just for good measure
607 mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));
608 mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));
609
610 submit_batch();
611
612 EXPECT_EQ(*(uint64_t *)(output + 0), value);
613 EXPECT_EQ(*(uint64_t *)(output + 8), value - 1);
614 EXPECT_EQ(*(uint64_t *)(output + 16), value - 1);
615 EXPECT_EQ(*(uint64_t *)(output + 24), value);
616 EXPECT_EQ(*(uint64_t *)(output + 32), value + add);
617 EXPECT_EQ(*(uint64_t *)(output + 40), value + ~add);
618 EXPECT_EQ(*(uint64_t *)(output + 48), value);
619 EXPECT_EQ(*(uint64_t *)(output + 56), value - 1);
620 EXPECT_EQ(*(uint64_t *)(output + 64), value - 1);
621 EXPECT_EQ(*(uint64_t *)(output + 72), value);
622 EXPECT_EQ(*(uint64_t *)(output + 80), value + add);
623 EXPECT_EQ(*(uint64_t *)(output + 88), value + ~add);
624 EXPECT_EQ(*(uint64_t *)(output + 96), value);
625 EXPECT_EQ(*(uint64_t *)(output + 104), value + add);
626 }
627
TEST_F(mi_builder_test,ult_uge_ieq_ine)628 TEST_F(mi_builder_test, ult_uge_ieq_ine)
629 {
630 uint64_t values[8] = {
631 0x0123456789abcdef,
632 0xdeadbeefac0ffee2,
633 (uint64_t)-1,
634 1,
635 0,
636 1049571,
637 (uint64_t)-240058,
638 20204184,
639 };
640 memcpy(input, values, sizeof(values));
641
642 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
643 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
644 mi_store(&b, out_mem64(i * 256 + j * 32 + 0),
645 mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
646 mi_store(&b, out_mem64(i * 256 + j * 32 + 8),
647 mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
648 mi_store(&b, out_mem64(i * 256 + j * 32 + 16),
649 mi_ieq(&b, in_mem64(i * 8), in_mem64(j * 8)));
650 mi_store(&b, out_mem64(i * 256 + j * 32 + 24),
651 mi_ine(&b, in_mem64(i * 8), in_mem64(j * 8)));
652 }
653 }
654
655 submit_batch();
656
657 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
658 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
659 uint64_t *out_u64 = (uint64_t *)(output + i * 256 + j * 32);
660 EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]),
661 mi_imm(values[j])));
662 EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]),
663 mi_imm(values[j])));
664 EXPECT_EQ_IMM(out_u64[2], mi_ieq(&b, mi_imm(values[i]),
665 mi_imm(values[j])));
666 EXPECT_EQ_IMM(out_u64[3], mi_ine(&b, mi_imm(values[i]),
667 mi_imm(values[j])));
668 }
669 }
670 }
671
TEST_F(mi_builder_test,z_nz)672 TEST_F(mi_builder_test, z_nz)
673 {
674 uint64_t values[8] = {
675 0,
676 1,
677 UINT32_MAX,
678 UINT32_MAX + 1,
679 UINT64_MAX,
680 };
681 memcpy(input, values, sizeof(values));
682
683 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
684 mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8)));
685 mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8)));
686 }
687
688 submit_batch();
689
690 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
691 uint64_t *out_u64 = (uint64_t *)(output + i * 16);
692 EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i])));
693 EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i])));
694 }
695 }
696
TEST_F(mi_builder_test,iand)697 TEST_F(mi_builder_test, iand)
698 {
699 const uint64_t values[2] = {
700 0x0123456789abcdef,
701 0xdeadbeefac0ffee2,
702 };
703 memcpy(input, values, sizeof(values));
704
705 mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8)));
706
707 submit_batch();
708
709 EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]),
710 mi_imm(values[1])));
711 }
712
713 #if GFX_VERx10 >= 125
TEST_F(mi_builder_test,ishl)714 TEST_F(mi_builder_test, ishl)
715 {
716 const uint64_t value = 0x0123456789abcdef;
717 memcpy(input, &value, sizeof(value));
718
719 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
720 memcpy(input + 8, shifts, sizeof(shifts));
721
722 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
723 mi_store(&b, out_mem64(i * 8),
724 mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4)));
725 }
726
727 submit_batch();
728
729 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
730 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
731 mi_ishl(&b, mi_imm(value), mi_imm(shifts[i])));
732 }
733 }
734
TEST_F(mi_builder_test,ushr)735 TEST_F(mi_builder_test, ushr)
736 {
737 const uint64_t value = 0x0123456789abcdef;
738 memcpy(input, &value, sizeof(value));
739
740 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
741 memcpy(input + 8, shifts, sizeof(shifts));
742
743 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
744 mi_store(&b, out_mem64(i * 8),
745 mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4)));
746 }
747
748 submit_batch();
749
750 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
751 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
752 mi_ushr(&b, mi_imm(value), mi_imm(shifts[i])));
753 }
754 }
755
TEST_F(mi_builder_test,ushr_imm)756 TEST_F(mi_builder_test, ushr_imm)
757 {
758 const uint64_t value = 0x0123456789abcdef;
759 memcpy(input, &value, sizeof(value));
760
761 const unsigned max_shift = 64;
762
763 for (unsigned i = 0; i <= max_shift; i++)
764 mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i));
765
766 submit_batch();
767
768 for (unsigned i = 0; i <= max_shift; i++) {
769 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
770 mi_ushr_imm(&b, mi_imm(value), i));
771 }
772 }
773
TEST_F(mi_builder_test,ishr)774 TEST_F(mi_builder_test, ishr)
775 {
776 const uint64_t values[] = {
777 0x0123456789abcdef,
778 0xfedcba9876543210,
779 };
780 memcpy(input, values, sizeof(values));
781
782 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
783 memcpy(input + 16, shifts, sizeof(shifts));
784
785 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
786 for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
787 mi_store(&b, out_mem64(i * 8 + j * 16),
788 mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4)));
789 }
790 }
791
792 submit_batch();
793
794 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
795 for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
796 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16),
797 mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j])));
798 }
799 }
800 }
801
TEST_F(mi_builder_test,ishr_imm)802 TEST_F(mi_builder_test, ishr_imm)
803 {
804 const uint64_t value = 0x0123456789abcdef;
805 memcpy(input, &value, sizeof(value));
806
807 const unsigned max_shift = 64;
808
809 for (unsigned i = 0; i <= max_shift; i++)
810 mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i));
811
812 submit_batch();
813
814 for (unsigned i = 0; i <= max_shift; i++) {
815 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
816 mi_ishr_imm(&b, mi_imm(value), i));
817 }
818 }
819 #endif /* if GFX_VERx10 >= 125 */
820
TEST_F(mi_builder_test,imul_imm)821 TEST_F(mi_builder_test, imul_imm)
822 {
823 uint64_t lhs[2] = {
824 0x0123456789abcdef,
825 0xdeadbeefac0ffee2,
826 };
827 memcpy(input, lhs, sizeof(lhs));
828
829 /* Some random 32-bit unsigned integers. The first four have been
830 * hand-chosen just to ensure some good low integers; the rest were
831 * generated with a python script.
832 */
833 uint32_t rhs[20] = {
834 1, 2, 3, 5,
835 10800, 193, 64, 40,
836 3796, 256, 88, 473,
837 1421, 706, 175, 850,
838 39, 38985, 1941, 17,
839 };
840
841 for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
842 for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
843 mi_store(&b, out_mem64(i * 160 + j * 8),
844 mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
845 }
846 }
847
848 submit_batch();
849
850 for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
851 for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
852 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8),
853 mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j]));
854 }
855 }
856 }
857
TEST_F(mi_builder_test,ishl_imm)858 TEST_F(mi_builder_test, ishl_imm)
859 {
860 const uint64_t value = 0x0123456789abcdef;
861 memcpy(input, &value, sizeof(value));
862
863 const unsigned max_shift = 64;
864
865 for (unsigned i = 0; i <= max_shift; i++)
866 mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i));
867
868 submit_batch();
869
870 for (unsigned i = 0; i <= max_shift; i++) {
871 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
872 mi_ishl_imm(&b, mi_imm(value), i));
873 }
874 }
875
TEST_F(mi_builder_test,ushr32_imm)876 TEST_F(mi_builder_test, ushr32_imm)
877 {
878 const uint64_t value = 0x0123456789abcdef;
879 memcpy(input, &value, sizeof(value));
880
881 const unsigned max_shift = 64;
882
883 for (unsigned i = 0; i <= max_shift; i++)
884 mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i));
885
886 submit_batch();
887
888 for (unsigned i = 0; i <= max_shift; i++) {
889 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
890 mi_ushr32_imm(&b, mi_imm(value), i));
891 }
892 }
893
TEST_F(mi_builder_test,udiv32_imm)894 TEST_F(mi_builder_test, udiv32_imm)
895 {
896 /* Some random 32-bit unsigned integers. The first four have been
897 * hand-chosen just to ensure some good low integers; the rest were
898 * generated with a python script.
899 */
900 uint32_t values[20] = {
901 1, 2, 3, 5,
902 10800, 193, 64, 40,
903 3796, 256, 88, 473,
904 1421, 706, 175, 850,
905 39, 38985, 1941, 17,
906 };
907 memcpy(input, values, sizeof(values));
908
909 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
910 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
911 mi_store(&b, out_mem32(i * 80 + j * 4),
912 mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
913 }
914 }
915
916 submit_batch();
917
918 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
919 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
920 EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4),
921 mi_udiv32_imm(&b, mi_imm(values[i]), values[j]));
922 }
923 }
924 }
925
TEST_F(mi_builder_test,store_if)926 TEST_F(mi_builder_test, store_if)
927 {
928 uint64_t u64 = 0xb453b411deadc0deull;
929 uint32_t u32 = 0x1337d00d;
930
931 /* Write values with the predicate enabled */
932 emit_cmd(GENX(MI_PREDICATE), mip) {
933 mip.LoadOperation = LOAD_LOAD;
934 mip.CombineOperation = COMBINE_SET;
935 mip.CompareOperation = COMPARE_TRUE;
936 }
937
938 mi_store_if(&b, out_mem64(0), mi_imm(u64));
939 mi_store_if(&b, out_mem32(8), mi_imm(u32));
940
941 /* Set predicate to false, write garbage that shouldn't land */
942 emit_cmd(GENX(MI_PREDICATE), mip) {
943 mip.LoadOperation = LOAD_LOAD;
944 mip.CombineOperation = COMBINE_SET;
945 mip.CompareOperation = COMPARE_FALSE;
946 }
947
948 mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull));
949 mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000));
950
951 submit_batch();
952
953 EXPECT_EQ(*(uint64_t *)(output + 0), u64);
954 EXPECT_EQ(*(uint32_t *)(output + 8), u32);
955 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
956 }
957
958 #endif /* GFX_VERx10 >= 75 */
959
960 #if GFX_VERx10 >= 125
961
962 /*
963 * Indirect load/store tests. Only available on XE_HP+
964 */
965
TEST_F(mi_builder_test,load_mem64_offset)966 TEST_F(mi_builder_test, load_mem64_offset)
967 {
968 uint64_t values[8] = {
969 0x0123456789abcdef,
970 0xdeadbeefac0ffee2,
971 (uint64_t)-1,
972 1,
973 0,
974 1049571,
975 (uint64_t)-240058,
976 20204184,
977 };
978 memcpy(input, values, sizeof(values));
979
980 uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
981 memcpy(input + 64, offsets, sizeof(offsets));
982
983 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
984 mi_store(&b, out_mem64(i * 8),
985 mi_load_mem64_offset(&b, in_addr(0), in_mem32(i * 4 + 64)));
986 }
987
988 submit_batch();
989
990 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
991 EXPECT_EQ(*(uint64_t *)(output + i * 8), values[offsets[i] / 8]);
992 }
993
TEST_F(mi_builder_test,store_mem64_offset)994 TEST_F(mi_builder_test, store_mem64_offset)
995 {
996 uint64_t values[8] = {
997 0x0123456789abcdef,
998 0xdeadbeefac0ffee2,
999 (uint64_t)-1,
1000 1,
1001 0,
1002 1049571,
1003 (uint64_t)-240058,
1004 20204184,
1005 };
1006 memcpy(input, values, sizeof(values));
1007
1008 uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1009 memcpy(input + 64, offsets, sizeof(offsets));
1010
1011 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1012 mi_store_mem64_offset(&b, out_addr(0), in_mem32(i * 4 + 64),
1013 in_mem64(i * 8));
1014 }
1015
1016 submit_batch();
1017
1018 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1019 EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]);
1020 }
1021
1022 /*
1023 * Control-flow tests. Only available on XE_HP+
1024 */
1025
TEST_F(mi_builder_test,goto)1026 TEST_F(mi_builder_test, goto)
1027 {
1028 const uint64_t value = 0xb453b411deadc0deull;
1029
1030 mi_store(&b, out_mem64(0), mi_imm(value));
1031
1032 struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1033 mi_goto(&b, &t);
1034
1035 /* This one should be skipped */
1036 mi_store(&b, out_mem64(0), mi_imm(0));
1037
1038 mi_goto_target(&b, &t);
1039
1040 submit_batch();
1041
1042 EXPECT_EQ(*(uint64_t *)(output + 0), value);
1043 }
1044
1045 #define MI_PREDICATE_RESULT 0x2418
1046
TEST_F(mi_builder_test,goto_if)1047 TEST_F(mi_builder_test, goto_if)
1048 {
1049 const uint64_t values[] = {
1050 0xb453b411deadc0deull,
1051 0x0123456789abcdefull,
1052 0,
1053 };
1054
1055 mi_store(&b, out_mem64(0), mi_imm(values[0]));
1056
1057 emit_cmd(GENX(MI_PREDICATE), mip) {
1058 mip.LoadOperation = LOAD_LOAD;
1059 mip.CombineOperation = COMBINE_SET;
1060 mip.CompareOperation = COMPARE_FALSE;
1061 }
1062
1063 struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1064 mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1065
1066 mi_store(&b, out_mem64(0), mi_imm(values[1]));
1067
1068 emit_cmd(GENX(MI_PREDICATE), mip) {
1069 mip.LoadOperation = LOAD_LOAD;
1070 mip.CombineOperation = COMBINE_SET;
1071 mip.CompareOperation = COMPARE_TRUE;
1072 }
1073
1074 mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1075
1076 /* This one should be skipped */
1077 mi_store(&b, out_mem64(0), mi_imm(values[2]));
1078
1079 mi_goto_target(&b, &t);
1080
1081 submit_batch();
1082
1083 EXPECT_EQ(*(uint64_t *)(output + 0), values[1]);
1084 }
1085
TEST_F(mi_builder_test,loop_simple)1086 TEST_F(mi_builder_test, loop_simple)
1087 {
1088 const uint64_t loop_count = 8;
1089
1090 mi_store(&b, out_mem64(0), mi_imm(0));
1091
1092 mi_loop(&b) {
1093 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1094
1095 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1096 }
1097
1098 submit_batch();
1099
1100 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1101 }
1102
TEST_F(mi_builder_test,loop_break)1103 TEST_F(mi_builder_test, loop_break)
1104 {
1105 mi_loop(&b) {
1106 mi_store(&b, out_mem64(0), mi_imm(1));
1107
1108 mi_break_if(&b, mi_imm(0));
1109
1110 mi_store(&b, out_mem64(0), mi_imm(2));
1111
1112 mi_break(&b);
1113
1114 mi_store(&b, out_mem64(0), mi_imm(3));
1115 }
1116
1117 submit_batch();
1118
1119 EXPECT_EQ(*(uint64_t *)(output + 0), 2);
1120 }
1121
TEST_F(mi_builder_test,loop_continue)1122 TEST_F(mi_builder_test, loop_continue)
1123 {
1124 const uint64_t loop_count = 8;
1125
1126 mi_store(&b, out_mem64(0), mi_imm(0));
1127 mi_store(&b, out_mem64(8), mi_imm(0));
1128
1129 mi_loop(&b) {
1130 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1131
1132 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1133 mi_store(&b, out_mem64(8), mi_imm(5));
1134
1135 mi_continue(&b);
1136
1137 mi_store(&b, out_mem64(8), mi_imm(10));
1138 }
1139
1140 submit_batch();
1141
1142 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1143 EXPECT_EQ(*(uint64_t *)(output + 8), 5);
1144 }
1145
TEST_F(mi_builder_test,loop_continue_if)1146 TEST_F(mi_builder_test, loop_continue_if)
1147 {
1148 const uint64_t loop_count = 8;
1149
1150 mi_store(&b, out_mem64(0), mi_imm(0));
1151 mi_store(&b, out_mem64(8), mi_imm(0));
1152
1153 mi_loop(&b) {
1154 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1155
1156 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1157 mi_store(&b, out_mem64(8), mi_imm(5));
1158
1159 emit_cmd(GENX(MI_PREDICATE), mip) {
1160 mip.LoadOperation = LOAD_LOAD;
1161 mip.CombineOperation = COMBINE_SET;
1162 mip.CompareOperation = COMPARE_FALSE;
1163 }
1164
1165 mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1166
1167 mi_store(&b, out_mem64(8), mi_imm(10));
1168
1169 emit_cmd(GENX(MI_PREDICATE), mip) {
1170 mip.LoadOperation = LOAD_LOAD;
1171 mip.CombineOperation = COMBINE_SET;
1172 mip.CompareOperation = COMPARE_TRUE;
1173 }
1174
1175 mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1176
1177 mi_store(&b, out_mem64(8), mi_imm(15));
1178 }
1179
1180 submit_batch();
1181
1182 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1183 EXPECT_EQ(*(uint64_t *)(output + 8), 10);
1184 }
1185 #endif /* GFX_VERx10 >= 125 */
1186