1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <fcntl.h>
25 #include <string.h>
26 #include <xf86drm.h>
27 #include <sys/mman.h>
28
29 #include <gtest/gtest.h>
30
31 #include "c99_compat.h"
32 #include "common/intel_gem.h"
33 #include "dev/intel_device_info.h"
34 #include "intel_gem.h"
35 #include "isl/isl.h"
36 #include "drm-uapi/i915_drm.h"
37 #include "genxml/gen_macros.h"
38 #include "util/macros.h"
39
40 class mi_builder_test;
41
42 struct address {
43 uint32_t gem_handle;
44 uint32_t offset;
45 };
46
47 #define __gen_address_type struct address
48 #define __gen_user_data ::mi_builder_test
49
50 uint64_t __gen_combine_address(mi_builder_test *test, void *location,
51 struct address addr, uint32_t delta);
52 void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);
53 struct address __gen_get_batch_address(mi_builder_test *test,
54 void *location);
55
56 struct address
__gen_address_offset(address addr,uint64_t offset)57 __gen_address_offset(address addr, uint64_t offset)
58 {
59 addr.offset += offset;
60 return addr;
61 }
62
63 #if GFX_VERx10 >= 75
64 #define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */
65 #else
66 #define RSVD_TEMP_REG 0x2430 /* GFX7_3DPRIM_START_VERTEX */
67 #endif
68 #define MI_BUILDER_NUM_ALLOC_GPRS 15
69 #define INPUT_DATA_OFFSET 0
70 #define OUTPUT_DATA_OFFSET 2048
71
72 #define __genxml_cmd_length(cmd) cmd ## _length
73 #define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
74 #define __genxml_cmd_header(cmd) cmd ## _header
75 #define __genxml_cmd_pack(cmd) cmd ## _pack
76
77 #include "genxml/genX_pack.h"
78 #include "mi_builder.h"
79
80 #define emit_cmd(cmd, name) \
81 for (struct cmd name = { __genxml_cmd_header(cmd) }, \
82 *_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \
83 __builtin_expect(_dst != NULL, 1); \
84 __genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL)
85
86 #include <vector>
87
88 class mi_builder_test : public ::testing::Test {
89 public:
90 mi_builder_test();
91 ~mi_builder_test();
92
93 void SetUp();
94
95 void *emit_dwords(int num_dwords);
96 void submit_batch();
97
in_addr(uint32_t offset)98 inline address in_addr(uint32_t offset)
99 {
100 address addr;
101 addr.gem_handle = data_bo_handle;
102 addr.offset = INPUT_DATA_OFFSET + offset;
103 return addr;
104 }
105
out_addr(uint32_t offset)106 inline address out_addr(uint32_t offset)
107 {
108 address addr;
109 addr.gem_handle = data_bo_handle;
110 addr.offset = OUTPUT_DATA_OFFSET + offset;
111 return addr;
112 }
113
in_mem64(uint32_t offset)114 inline mi_value in_mem64(uint32_t offset)
115 {
116 return mi_mem64(in_addr(offset));
117 }
118
in_mem32(uint32_t offset)119 inline mi_value in_mem32(uint32_t offset)
120 {
121 return mi_mem32(in_addr(offset));
122 }
123
out_mem64(uint32_t offset)124 inline mi_value out_mem64(uint32_t offset)
125 {
126 return mi_mem64(out_addr(offset));
127 }
128
out_mem32(uint32_t offset)129 inline mi_value out_mem32(uint32_t offset)
130 {
131 return mi_mem32(out_addr(offset));
132 }
133
134 int fd;
135 uint32_t ctx_id;
136 intel_device_info devinfo;
137
138 uint32_t batch_bo_handle;
139 #if GFX_VER >= 8
140 uint64_t batch_bo_addr;
141 #endif
142 uint32_t batch_offset;
143 void *batch_map;
144
145 #if GFX_VER < 8
146 std::vector<drm_i915_gem_relocation_entry> relocs;
147 #endif
148
149 uint32_t data_bo_handle;
150 #if GFX_VER >= 8
151 uint64_t data_bo_addr;
152 #endif
153 void *data_map;
154 char *input;
155 char *output;
156 uint64_t canary;
157
158 mi_builder b;
159 };
160
mi_builder_test()161 mi_builder_test::mi_builder_test() :
162 fd(-1)
163 { }
164
~mi_builder_test()165 mi_builder_test::~mi_builder_test()
166 {
167 close(fd);
168 }
169
170 // 1 MB of batch should be enough for anyone, right?
171 #define BATCH_BO_SIZE (256 * 4096)
172 #define DATA_BO_SIZE 4096
173
174 void
SetUp()175 mi_builder_test::SetUp()
176 {
177 drmDevicePtr devices[8];
178 int max_devices = drmGetDevices2(0, devices, 8);
179
180 int i;
181 for (i = 0; i < max_devices; i++) {
182 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
183 devices[i]->bustype == DRM_BUS_PCI &&
184 devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
185 fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
186 if (fd < 0)
187 continue;
188
189 /* We don't really need to do this when running on hardware because
190 * we can just pull it from the drmDevice. However, without doing
191 * this, intel_dump_gpu gets a bit of heartburn and we can't use the
192 * --device option with it.
193 */
194 int device_id;
195 ASSERT_TRUE(intel_gem_get_param(fd, I915_PARAM_CHIPSET_ID, &device_id))
196 << strerror(errno);
197
198 ASSERT_TRUE(intel_get_device_info_from_fd(fd, &devinfo, -1, -1));
199 if (devinfo.ver != GFX_VER ||
200 (devinfo.platform == INTEL_PLATFORM_HSW) != (GFX_VERx10 == 75)) {
201 close(fd);
202 fd = -1;
203 continue;
204 }
205
206
207 /* Found a device! */
208 break;
209 }
210 }
211 ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device";
212
213 ASSERT_TRUE(intel_gem_create_context(fd, &ctx_id)) << strerror(errno);
214
215 if (GFX_VER >= 8) {
216 /* On gfx8+, we require softpin */
217 int has_softpin;
218 ASSERT_TRUE(intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &has_softpin))
219 << strerror(errno);
220 ASSERT_TRUE(has_softpin);
221 }
222
223 // Create the batch buffer
224 drm_i915_gem_create gem_create = drm_i915_gem_create();
225 gem_create.size = BATCH_BO_SIZE;
226 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
227 (void *)&gem_create), 0) << strerror(errno);
228 batch_bo_handle = gem_create.handle;
229 #if GFX_VER >= 8
230 batch_bo_addr = 0xffffffffdff70000ULL;
231 #endif
232
233 if (devinfo.has_caching_uapi) {
234 drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
235 gem_caching.handle = batch_bo_handle;
236 gem_caching.caching = I915_CACHING_CACHED;
237 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
238 (void *)&gem_caching), 0) << strerror(errno);
239 }
240
241 if (devinfo.has_mmap_offset) {
242 drm_i915_gem_mmap_offset gem_mmap_offset = drm_i915_gem_mmap_offset();
243 gem_mmap_offset.handle = batch_bo_handle;
244 gem_mmap_offset.flags = devinfo.has_local_mem ?
245 I915_MMAP_OFFSET_FIXED :
246 I915_MMAP_OFFSET_WC;
247 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET,
248 &gem_mmap_offset), 0) << strerror(errno);
249
250 batch_map = mmap(NULL, BATCH_BO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
251 fd, gem_mmap_offset.offset);
252 ASSERT_NE(batch_map, MAP_FAILED) << strerror(errno);
253 } else {
254 drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
255 gem_mmap.handle = batch_bo_handle;
256 gem_mmap.offset = 0;
257 gem_mmap.size = BATCH_BO_SIZE;
258 gem_mmap.flags = 0;
259 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
260 (void *)&gem_mmap), 0) << strerror(errno);
261 batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
262 }
263
264 // Start the batch at zero
265 batch_offset = 0;
266
267 // Create the data buffer
268 gem_create = drm_i915_gem_create();
269 gem_create.size = DATA_BO_SIZE;
270 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
271 (void *)&gem_create), 0) << strerror(errno);
272 data_bo_handle = gem_create.handle;
273 #if GFX_VER >= 8
274 data_bo_addr = 0xffffffffefff0000ULL;
275 #endif
276
277 if (devinfo.has_caching_uapi) {
278 drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
279 gem_caching.handle = data_bo_handle;
280 gem_caching.caching = I915_CACHING_CACHED;
281 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
282 (void *)&gem_caching), 0) << strerror(errno);
283 }
284
285 if (devinfo.has_mmap_offset) {
286 drm_i915_gem_mmap_offset gem_mmap_offset = drm_i915_gem_mmap_offset();
287 gem_mmap_offset.handle = data_bo_handle;
288 gem_mmap_offset.flags = devinfo.has_local_mem ?
289 I915_MMAP_OFFSET_FIXED :
290 I915_MMAP_OFFSET_WC;
291 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET,
292 &gem_mmap_offset), 0) << strerror(errno);
293
294 data_map = mmap(NULL, DATA_BO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
295 fd, gem_mmap_offset.offset);
296 ASSERT_NE(data_map, MAP_FAILED) << strerror(errno);
297 } else {
298 drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
299 gem_mmap.handle = data_bo_handle;
300 gem_mmap.offset = 0;
301 gem_mmap.size = DATA_BO_SIZE;
302 gem_mmap.flags = 0;
303 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
304 (void *)&gem_mmap), 0) << strerror(errno);
305 data_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
306 }
307
308 input = (char *)data_map + INPUT_DATA_OFFSET;
309 output = (char *)data_map + OUTPUT_DATA_OFFSET;
310
311 // Fill the test data with garbage
312 memset(data_map, 139, DATA_BO_SIZE);
313 memset(&canary, 139, sizeof(canary));
314
315 struct isl_device isl_dev;
316 isl_device_init(&isl_dev, &devinfo);
317 mi_builder_init(&b, &devinfo, this);
318 const uint32_t mocs = isl_mocs(&isl_dev, 0, false);
319 mi_builder_set_mocs(&b, mocs);
320 }
321
322 void *
emit_dwords(int num_dwords)323 mi_builder_test::emit_dwords(int num_dwords)
324 {
325 void *ptr = (void *)((char *)batch_map + batch_offset);
326 batch_offset += num_dwords * 4;
327 assert(batch_offset < BATCH_BO_SIZE);
328 return ptr;
329 }
330
331 void
submit_batch()332 mi_builder_test::submit_batch()
333 {
334 mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
335
336 // Round batch up to an even number of dwords.
337 if (batch_offset & 4)
338 mi_builder_emit(&b, GENX(MI_NOOP), noop);
339
340 drm_i915_gem_exec_object2 objects[2];
341 memset(objects, 0, sizeof(objects));
342
343 objects[0].handle = data_bo_handle;
344 objects[0].relocation_count = 0;
345 objects[0].relocs_ptr = 0;
346 #if GFX_VER >= 8 /* On gfx8+, we pin everything */
347 objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
348 EXEC_OBJECT_PINNED |
349 EXEC_OBJECT_WRITE;
350 objects[0].offset = data_bo_addr;
351 #else
352 objects[0].flags = EXEC_OBJECT_WRITE;
353 objects[0].offset = -1;
354 #endif
355
356 objects[1].handle = batch_bo_handle;
357 #if GFX_VER >= 8 /* On gfx8+, we don't use relocations */
358 objects[1].relocation_count = 0;
359 objects[1].relocs_ptr = 0;
360 objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
361 EXEC_OBJECT_PINNED;
362 objects[1].offset = batch_bo_addr;
363 #else
364 objects[1].relocation_count = relocs.size();
365 objects[1].relocs_ptr = (uintptr_t)(void *)&relocs[0];
366 objects[1].flags = 0;
367 objects[1].offset = -1;
368 #endif
369
370 drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2();
371 execbuf.buffers_ptr = (uintptr_t)(void *)objects;
372 execbuf.buffer_count = 2;
373 execbuf.batch_start_offset = 0;
374 execbuf.batch_len = batch_offset;
375 execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER;
376 execbuf.rsvd1 = ctx_id;
377
378 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
379 (void *)&execbuf), 0) << strerror(errno);
380
381 drm_i915_gem_wait gem_wait = drm_i915_gem_wait();
382 gem_wait.bo_handle = batch_bo_handle;
383 gem_wait.timeout_ns = INT64_MAX;
384 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT,
385 (void *)&gem_wait), 0) << strerror(errno);
386 }
387
388 uint64_t
__gen_combine_address(mi_builder_test * test,void * location,address addr,uint32_t delta)389 __gen_combine_address(mi_builder_test *test, void *location,
390 address addr, uint32_t delta)
391 {
392 #if GFX_VER >= 8
393 uint64_t addr_u64 = addr.gem_handle == test->data_bo_handle ?
394 test->data_bo_addr : test->batch_bo_addr;
395 return addr_u64 + addr.offset + delta;
396 #else
397 drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();
398 reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1;
399 reloc.delta = addr.offset + delta;
400 reloc.offset = (char *)location - (char *)test->batch_map;
401 reloc.presumed_offset = -1;
402 test->relocs.push_back(reloc);
403
404 return reloc.delta;
405 #endif
406 }
407
408 void *
__gen_get_batch_dwords(mi_builder_test * test,unsigned num_dwords)409 __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)
410 {
411 return test->emit_dwords(num_dwords);
412 }
413
414 struct address
__gen_get_batch_address(mi_builder_test * test,void * location)415 __gen_get_batch_address(mi_builder_test *test, void *location)
416 {
417 assert(location >= test->batch_map);
418 size_t offset = (char *)location - (char *)test->batch_map;
419 assert(offset < BATCH_BO_SIZE);
420 assert(offset <= UINT32_MAX);
421
422 return (struct address) {
423 .gem_handle = test->batch_bo_handle,
424 .offset = (uint32_t)offset,
425 };
426 }
427
428 #include "genxml/genX_pack.h"
429 #include "mi_builder.h"
430
TEST_F(mi_builder_test,imm_mem)431 TEST_F(mi_builder_test, imm_mem)
432 {
433 const uint64_t value = 0x0123456789abcdef;
434
435 mi_store(&b, out_mem64(0), mi_imm(value));
436 mi_store(&b, out_mem32(8), mi_imm(value));
437
438 submit_batch();
439
440 // 64 -> 64
441 EXPECT_EQ(*(uint64_t *)(output + 0), value);
442
443 // 64 -> 32
444 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
445 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
446 }
447
448 /* mem -> mem copies are only supported on HSW+ */
449 #if GFX_VERx10 >= 75
TEST_F(mi_builder_test,mem_mem)450 TEST_F(mi_builder_test, mem_mem)
451 {
452 const uint64_t value = 0x0123456789abcdef;
453 *(uint64_t *)input = value;
454
455 mi_store(&b, out_mem64(0), in_mem64(0));
456 mi_store(&b, out_mem32(8), in_mem64(0));
457 mi_store(&b, out_mem32(16), in_mem32(0));
458 mi_store(&b, out_mem64(24), in_mem32(0));
459
460 submit_batch();
461
462 // 64 -> 64
463 EXPECT_EQ(*(uint64_t *)(output + 0), value);
464
465 // 64 -> 32
466 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
467 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
468
469 // 32 -> 32
470 EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
471 EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
472
473 // 32 -> 64
474 EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
475 }
476 #endif
477
TEST_F(mi_builder_test,imm_reg)478 TEST_F(mi_builder_test, imm_reg)
479 {
480 const uint64_t value = 0x0123456789abcdef;
481
482 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
483 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value));
484 mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
485
486 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
487 mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value));
488 mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
489
490 submit_batch();
491
492 // 64 -> 64
493 EXPECT_EQ(*(uint64_t *)(output + 0), value);
494
495 // 64 -> 32
496 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
497 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
498 }
499
TEST_F(mi_builder_test,mem_reg)500 TEST_F(mi_builder_test, mem_reg)
501 {
502 const uint64_t value = 0x0123456789abcdef;
503 *(uint64_t *)input = value;
504
505 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
506 mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0));
507 mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
508
509 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
510 mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0));
511 mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
512
513 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
514 mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0));
515 mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG));
516
517 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
518 mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0));
519 mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG));
520
521 submit_batch();
522
523 // 64 -> 64
524 EXPECT_EQ(*(uint64_t *)(output + 0), value);
525
526 // 64 -> 32
527 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
528 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
529
530 // 32 -> 32
531 EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
532 EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
533
534 // 32 -> 64
535 EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
536 }
537
TEST_F(mi_builder_test,memset)538 TEST_F(mi_builder_test, memset)
539 {
540 const unsigned memset_size = 256;
541
542 mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
543
544 submit_batch();
545
546 uint32_t *out_u32 = (uint32_t *)output;
547 for (unsigned i = 0; i < memset_size / sizeof(*out_u32); i++)
548 EXPECT_EQ(out_u32[i], 0xdeadbeef);
549 }
550
TEST_F(mi_builder_test,memcpy)551 TEST_F(mi_builder_test, memcpy)
552 {
553 const unsigned memcpy_size = 256;
554
555 uint8_t *in_u8 = (uint8_t *)input;
556 for (unsigned i = 0; i < memcpy_size; i++)
557 in_u8[i] = i;
558
559 mi_memcpy(&b, out_addr(0), in_addr(0), 256);
560
561 submit_batch();
562
563 uint8_t *out_u8 = (uint8_t *)output;
564 for (unsigned i = 0; i < memcpy_size; i++)
565 EXPECT_EQ(out_u8[i], i);
566 }
567
568 /* Start of MI_MATH section */
569 #if GFX_VERx10 >= 75
570
571 #define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm))
572
TEST_F(mi_builder_test,inot)573 TEST_F(mi_builder_test, inot)
574 {
575 const uint64_t value = 0x0123456789abcdef;
576 const uint32_t value_lo = (uint32_t)value;
577 const uint32_t value_hi = (uint32_t)(value >> 32);
578 memcpy(input, &value, sizeof(value));
579
580 mi_store(&b, out_mem64(0), mi_inot(&b, in_mem64(0)));
581 mi_store(&b, out_mem64(8), mi_inot(&b, mi_inot(&b, in_mem64(0))));
582 mi_store(&b, out_mem64(16), mi_inot(&b, in_mem32(0)));
583 mi_store(&b, out_mem64(24), mi_inot(&b, in_mem32(4)));
584 mi_store(&b, out_mem32(32), mi_inot(&b, in_mem64(0)));
585 mi_store(&b, out_mem32(36), mi_inot(&b, in_mem32(0)));
586 mi_store(&b, out_mem32(40), mi_inot(&b, mi_inot(&b, in_mem32(0))));
587 mi_store(&b, out_mem32(44), mi_inot(&b, in_mem32(4)));
588
589 submit_batch();
590
591 EXPECT_EQ(*(uint64_t *)(output + 0), ~value);
592 EXPECT_EQ(*(uint64_t *)(output + 8), value);
593 EXPECT_EQ(*(uint64_t *)(output + 16), ~(uint64_t)value_lo);
594 EXPECT_EQ(*(uint64_t *)(output + 24), ~(uint64_t)value_hi);
595 EXPECT_EQ(*(uint32_t *)(output + 32), (uint32_t)~value);
596 EXPECT_EQ(*(uint32_t *)(output + 36), (uint32_t)~value_lo);
597 EXPECT_EQ(*(uint32_t *)(output + 40), (uint32_t)value_lo);
598 EXPECT_EQ(*(uint32_t *)(output + 44), (uint32_t)~value_hi);
599 }
600
601 /* Test adding of immediates of all kinds including
602 *
603 * - All zeroes
604 * - All ones
605 * - inverted constants
606 */
TEST_F(mi_builder_test,add_imm)607 TEST_F(mi_builder_test, add_imm)
608 {
609 const uint64_t value = 0x0123456789abcdef;
610 const uint64_t add = 0xdeadbeefac0ffee2;
611 memcpy(input, &value, sizeof(value));
612
613 mi_store(&b, out_mem64(0),
614 mi_iadd(&b, in_mem64(0), mi_imm(0)));
615 mi_store(&b, out_mem64(8),
616 mi_iadd(&b, in_mem64(0), mi_imm(-1)));
617 mi_store(&b, out_mem64(16),
618 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0))));
619 mi_store(&b, out_mem64(24),
620 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1))));
621 mi_store(&b, out_mem64(32),
622 mi_iadd(&b, in_mem64(0), mi_imm(add)));
623 mi_store(&b, out_mem64(40),
624 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add))));
625 mi_store(&b, out_mem64(48),
626 mi_iadd(&b, mi_imm(0), in_mem64(0)));
627 mi_store(&b, out_mem64(56),
628 mi_iadd(&b, mi_imm(-1), in_mem64(0)));
629 mi_store(&b, out_mem64(64),
630 mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0)));
631 mi_store(&b, out_mem64(72),
632 mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0)));
633 mi_store(&b, out_mem64(80),
634 mi_iadd(&b, mi_imm(add), in_mem64(0)));
635 mi_store(&b, out_mem64(88),
636 mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));
637
638 // And some add_imm just for good measure
639 mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));
640 mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));
641
642 submit_batch();
643
644 EXPECT_EQ(*(uint64_t *)(output + 0), value);
645 EXPECT_EQ(*(uint64_t *)(output + 8), value - 1);
646 EXPECT_EQ(*(uint64_t *)(output + 16), value - 1);
647 EXPECT_EQ(*(uint64_t *)(output + 24), value);
648 EXPECT_EQ(*(uint64_t *)(output + 32), value + add);
649 EXPECT_EQ(*(uint64_t *)(output + 40), value + ~add);
650 EXPECT_EQ(*(uint64_t *)(output + 48), value);
651 EXPECT_EQ(*(uint64_t *)(output + 56), value - 1);
652 EXPECT_EQ(*(uint64_t *)(output + 64), value - 1);
653 EXPECT_EQ(*(uint64_t *)(output + 72), value);
654 EXPECT_EQ(*(uint64_t *)(output + 80), value + add);
655 EXPECT_EQ(*(uint64_t *)(output + 88), value + ~add);
656 EXPECT_EQ(*(uint64_t *)(output + 96), value);
657 EXPECT_EQ(*(uint64_t *)(output + 104), value + add);
658 }
659
TEST_F(mi_builder_test,ult_uge_ieq_ine)660 TEST_F(mi_builder_test, ult_uge_ieq_ine)
661 {
662 uint64_t values[8] = {
663 0x0123456789abcdef,
664 0xdeadbeefac0ffee2,
665 (uint64_t)-1,
666 1,
667 0,
668 1049571,
669 (uint64_t)-240058,
670 20204184,
671 };
672 memcpy(input, values, sizeof(values));
673
674 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
675 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
676 mi_store(&b, out_mem64(i * 256 + j * 32 + 0),
677 mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
678 mi_store(&b, out_mem64(i * 256 + j * 32 + 8),
679 mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
680 mi_store(&b, out_mem64(i * 256 + j * 32 + 16),
681 mi_ieq(&b, in_mem64(i * 8), in_mem64(j * 8)));
682 mi_store(&b, out_mem64(i * 256 + j * 32 + 24),
683 mi_ine(&b, in_mem64(i * 8), in_mem64(j * 8)));
684 }
685 }
686
687 submit_batch();
688
689 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
690 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
691 uint64_t *out_u64 = (uint64_t *)(output + i * 256 + j * 32);
692 EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]),
693 mi_imm(values[j])));
694 EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]),
695 mi_imm(values[j])));
696 EXPECT_EQ_IMM(out_u64[2], mi_ieq(&b, mi_imm(values[i]),
697 mi_imm(values[j])));
698 EXPECT_EQ_IMM(out_u64[3], mi_ine(&b, mi_imm(values[i]),
699 mi_imm(values[j])));
700 }
701 }
702 }
703
TEST_F(mi_builder_test,z_nz)704 TEST_F(mi_builder_test, z_nz)
705 {
706 uint64_t values[8] = {
707 0,
708 1,
709 UINT32_MAX,
710 UINT32_MAX + 1,
711 UINT64_MAX,
712 };
713 memcpy(input, values, sizeof(values));
714
715 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
716 mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8)));
717 mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8)));
718 }
719
720 submit_batch();
721
722 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
723 uint64_t *out_u64 = (uint64_t *)(output + i * 16);
724 EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i])));
725 EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i])));
726 }
727 }
728
TEST_F(mi_builder_test,iand)729 TEST_F(mi_builder_test, iand)
730 {
731 const uint64_t values[2] = {
732 0x0123456789abcdef,
733 0xdeadbeefac0ffee2,
734 };
735 memcpy(input, values, sizeof(values));
736
737 mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8)));
738
739 submit_batch();
740
741 EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]),
742 mi_imm(values[1])));
743 }
744
745 #if GFX_VERx10 >= 125
TEST_F(mi_builder_test,ishl)746 TEST_F(mi_builder_test, ishl)
747 {
748 const uint64_t value = 0x0123456789abcdef;
749 memcpy(input, &value, sizeof(value));
750
751 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
752 memcpy(input + 8, shifts, sizeof(shifts));
753
754 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
755 mi_store(&b, out_mem64(i * 8),
756 mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4)));
757 }
758
759 submit_batch();
760
761 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
762 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
763 mi_ishl(&b, mi_imm(value), mi_imm(shifts[i])));
764 }
765 }
766
TEST_F(mi_builder_test,ushr)767 TEST_F(mi_builder_test, ushr)
768 {
769 const uint64_t value = 0x0123456789abcdef;
770 memcpy(input, &value, sizeof(value));
771
772 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
773 memcpy(input + 8, shifts, sizeof(shifts));
774
775 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
776 mi_store(&b, out_mem64(i * 8),
777 mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4)));
778 }
779
780 submit_batch();
781
782 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
783 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
784 mi_ushr(&b, mi_imm(value), mi_imm(shifts[i])));
785 }
786 }
787
TEST_F(mi_builder_test,ushr_imm)788 TEST_F(mi_builder_test, ushr_imm)
789 {
790 const uint64_t value = 0x0123456789abcdef;
791 memcpy(input, &value, sizeof(value));
792
793 const unsigned max_shift = 64;
794
795 for (unsigned i = 0; i <= max_shift; i++)
796 mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i));
797
798 submit_batch();
799
800 for (unsigned i = 0; i <= max_shift; i++) {
801 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
802 mi_ushr_imm(&b, mi_imm(value), i));
803 }
804 }
805
TEST_F(mi_builder_test,ishr)806 TEST_F(mi_builder_test, ishr)
807 {
808 const uint64_t values[] = {
809 0x0123456789abcdef,
810 0xfedcba9876543210,
811 };
812 memcpy(input, values, sizeof(values));
813
814 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
815 memcpy(input + 16, shifts, sizeof(shifts));
816
817 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
818 for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
819 mi_store(&b, out_mem64(i * 8 + j * 16),
820 mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4)));
821 }
822 }
823
824 submit_batch();
825
826 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
827 for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
828 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16),
829 mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j])));
830 }
831 }
832 }
833
TEST_F(mi_builder_test,ishr_imm)834 TEST_F(mi_builder_test, ishr_imm)
835 {
836 const uint64_t value = 0x0123456789abcdef;
837 memcpy(input, &value, sizeof(value));
838
839 const unsigned max_shift = 64;
840
841 for (unsigned i = 0; i <= max_shift; i++)
842 mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i));
843
844 submit_batch();
845
846 for (unsigned i = 0; i <= max_shift; i++) {
847 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
848 mi_ishr_imm(&b, mi_imm(value), i));
849 }
850 }
851 #endif /* if GFX_VERx10 >= 125 */
852
TEST_F(mi_builder_test,imul_imm)853 TEST_F(mi_builder_test, imul_imm)
854 {
855 uint64_t lhs[2] = {
856 0x0123456789abcdef,
857 0xdeadbeefac0ffee2,
858 };
859 memcpy(input, lhs, sizeof(lhs));
860
861 /* Some random 32-bit unsigned integers. The first four have been
862 * hand-chosen just to ensure some good low integers; the rest were
863 * generated with a python script.
864 */
865 uint32_t rhs[20] = {
866 1, 2, 3, 5,
867 10800, 193, 64, 40,
868 3796, 256, 88, 473,
869 1421, 706, 175, 850,
870 39, 38985, 1941, 17,
871 };
872
873 for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
874 for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
875 mi_store(&b, out_mem64(i * 160 + j * 8),
876 mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
877 }
878 }
879
880 submit_batch();
881
882 for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
883 for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
884 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8),
885 mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j]));
886 }
887 }
888 }
889
TEST_F(mi_builder_test,ishl_imm)890 TEST_F(mi_builder_test, ishl_imm)
891 {
892 const uint64_t value = 0x0123456789abcdef;
893 memcpy(input, &value, sizeof(value));
894
895 const unsigned max_shift = 64;
896
897 for (unsigned i = 0; i <= max_shift; i++)
898 mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i));
899
900 submit_batch();
901
902 for (unsigned i = 0; i <= max_shift; i++) {
903 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
904 mi_ishl_imm(&b, mi_imm(value), i));
905 }
906 }
907
TEST_F(mi_builder_test,ushr32_imm)908 TEST_F(mi_builder_test, ushr32_imm)
909 {
910 const uint64_t value = 0x0123456789abcdef;
911 memcpy(input, &value, sizeof(value));
912
913 const unsigned max_shift = 64;
914
915 for (unsigned i = 0; i <= max_shift; i++)
916 mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i));
917
918 submit_batch();
919
920 for (unsigned i = 0; i <= max_shift; i++) {
921 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
922 mi_ushr32_imm(&b, mi_imm(value), i));
923 }
924 }
925
TEST_F(mi_builder_test,udiv32_imm)926 TEST_F(mi_builder_test, udiv32_imm)
927 {
928 /* Some random 32-bit unsigned integers. The first four have been
929 * hand-chosen just to ensure some good low integers; the rest were
930 * generated with a python script.
931 */
932 uint32_t values[20] = {
933 1, 2, 3, 5,
934 10800, 193, 64, 40,
935 3796, 256, 88, 473,
936 1421, 706, 175, 850,
937 39, 38985, 1941, 17,
938 };
939 memcpy(input, values, sizeof(values));
940
941 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
942 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
943 mi_store(&b, out_mem32(i * 80 + j * 4),
944 mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
945 }
946 }
947
948 submit_batch();
949
950 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
951 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
952 EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4),
953 mi_udiv32_imm(&b, mi_imm(values[i]), values[j]));
954 }
955 }
956 }
957
TEST_F(mi_builder_test,store_if)958 TEST_F(mi_builder_test, store_if)
959 {
960 uint64_t u64 = 0xb453b411deadc0deull;
961 uint32_t u32 = 0x1337d00d;
962
963 /* Write values with the predicate enabled */
964 emit_cmd(GENX(MI_PREDICATE), mip) {
965 mip.LoadOperation = LOAD_LOAD;
966 mip.CombineOperation = COMBINE_SET;
967 mip.CompareOperation = COMPARE_TRUE;
968 }
969
970 mi_store_if(&b, out_mem64(0), mi_imm(u64));
971 mi_store_if(&b, out_mem32(8), mi_imm(u32));
972
973 /* Set predicate to false, write garbage that shouldn't land */
974 emit_cmd(GENX(MI_PREDICATE), mip) {
975 mip.LoadOperation = LOAD_LOAD;
976 mip.CombineOperation = COMBINE_SET;
977 mip.CompareOperation = COMPARE_FALSE;
978 }
979
980 mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull));
981 mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000));
982
983 submit_batch();
984
985 EXPECT_EQ(*(uint64_t *)(output + 0), u64);
986 EXPECT_EQ(*(uint32_t *)(output + 8), u32);
987 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
988 }
989
990 #endif /* GFX_VERx10 >= 75 */
991
992 #if GFX_VERx10 >= 125
993
994 /*
995 * Indirect load/store tests. Only available on XE_HP+
996 */
997
TEST_F(mi_builder_test,load_mem64_offset)998 TEST_F(mi_builder_test, load_mem64_offset)
999 {
1000 uint64_t values[8] = {
1001 0x0123456789abcdef,
1002 0xdeadbeefac0ffee2,
1003 (uint64_t)-1,
1004 1,
1005 0,
1006 1049571,
1007 (uint64_t)-240058,
1008 20204184,
1009 };
1010 memcpy(input, values, sizeof(values));
1011
1012 uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1013 memcpy(input + 64, offsets, sizeof(offsets));
1014
1015 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1016 mi_store(&b, out_mem64(i * 8),
1017 mi_load_mem64_offset(&b, in_addr(0), in_mem32(i * 4 + 64)));
1018 }
1019
1020 submit_batch();
1021
1022 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1023 EXPECT_EQ(*(uint64_t *)(output + i * 8), values[offsets[i] / 8]);
1024 }
1025
TEST_F(mi_builder_test,store_mem64_offset)1026 TEST_F(mi_builder_test, store_mem64_offset)
1027 {
1028 uint64_t values[8] = {
1029 0x0123456789abcdef,
1030 0xdeadbeefac0ffee2,
1031 (uint64_t)-1,
1032 1,
1033 0,
1034 1049571,
1035 (uint64_t)-240058,
1036 20204184,
1037 };
1038 memcpy(input, values, sizeof(values));
1039
1040 uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1041 memcpy(input + 64, offsets, sizeof(offsets));
1042
1043 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1044 mi_store_mem64_offset(&b, out_addr(0), in_mem32(i * 4 + 64),
1045 in_mem64(i * 8));
1046 }
1047
1048 submit_batch();
1049
1050 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1051 EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]);
1052 }
1053
1054 /*
1055 * Control-flow tests. Only available on XE_HP+
1056 */
1057
TEST_F(mi_builder_test,goto)1058 TEST_F(mi_builder_test, goto)
1059 {
1060 const uint64_t value = 0xb453b411deadc0deull;
1061
1062 mi_store(&b, out_mem64(0), mi_imm(value));
1063
1064 struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1065 mi_goto(&b, &t);
1066
1067 /* This one should be skipped */
1068 mi_store(&b, out_mem64(0), mi_imm(0));
1069
1070 mi_goto_target(&b, &t);
1071
1072 submit_batch();
1073
1074 EXPECT_EQ(*(uint64_t *)(output + 0), value);
1075 }
1076
1077 #define MI_PREDICATE_RESULT 0x2418
1078
TEST_F(mi_builder_test,goto_if)1079 TEST_F(mi_builder_test, goto_if)
1080 {
1081 const uint64_t values[] = {
1082 0xb453b411deadc0deull,
1083 0x0123456789abcdefull,
1084 0,
1085 };
1086
1087 mi_store(&b, out_mem64(0), mi_imm(values[0]));
1088
1089 emit_cmd(GENX(MI_PREDICATE), mip) {
1090 mip.LoadOperation = LOAD_LOAD;
1091 mip.CombineOperation = COMBINE_SET;
1092 mip.CompareOperation = COMPARE_FALSE;
1093 }
1094
1095 struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1096 mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1097
1098 mi_store(&b, out_mem64(0), mi_imm(values[1]));
1099
1100 emit_cmd(GENX(MI_PREDICATE), mip) {
1101 mip.LoadOperation = LOAD_LOAD;
1102 mip.CombineOperation = COMBINE_SET;
1103 mip.CompareOperation = COMPARE_TRUE;
1104 }
1105
1106 mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1107
1108 /* This one should be skipped */
1109 mi_store(&b, out_mem64(0), mi_imm(values[2]));
1110
1111 mi_goto_target(&b, &t);
1112
1113 submit_batch();
1114
1115 EXPECT_EQ(*(uint64_t *)(output + 0), values[1]);
1116 }
1117
TEST_F(mi_builder_test,loop_simple)1118 TEST_F(mi_builder_test, loop_simple)
1119 {
1120 const uint64_t loop_count = 8;
1121
1122 mi_store(&b, out_mem64(0), mi_imm(0));
1123
1124 mi_loop(&b) {
1125 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1126
1127 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1128 }
1129
1130 submit_batch();
1131
1132 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1133 }
1134
TEST_F(mi_builder_test,loop_break)1135 TEST_F(mi_builder_test, loop_break)
1136 {
1137 mi_loop(&b) {
1138 mi_store(&b, out_mem64(0), mi_imm(1));
1139
1140 mi_break_if(&b, mi_imm(0));
1141
1142 mi_store(&b, out_mem64(0), mi_imm(2));
1143
1144 mi_break(&b);
1145
1146 mi_store(&b, out_mem64(0), mi_imm(3));
1147 }
1148
1149 submit_batch();
1150
1151 EXPECT_EQ(*(uint64_t *)(output + 0), 2);
1152 }
1153
TEST_F(mi_builder_test,loop_continue)1154 TEST_F(mi_builder_test, loop_continue)
1155 {
1156 const uint64_t loop_count = 8;
1157
1158 mi_store(&b, out_mem64(0), mi_imm(0));
1159 mi_store(&b, out_mem64(8), mi_imm(0));
1160
1161 mi_loop(&b) {
1162 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1163
1164 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1165 mi_store(&b, out_mem64(8), mi_imm(5));
1166
1167 mi_continue(&b);
1168
1169 mi_store(&b, out_mem64(8), mi_imm(10));
1170 }
1171
1172 submit_batch();
1173
1174 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1175 EXPECT_EQ(*(uint64_t *)(output + 8), 5);
1176 }
1177
TEST_F(mi_builder_test,loop_continue_if)1178 TEST_F(mi_builder_test, loop_continue_if)
1179 {
1180 const uint64_t loop_count = 8;
1181
1182 mi_store(&b, out_mem64(0), mi_imm(0));
1183 mi_store(&b, out_mem64(8), mi_imm(0));
1184
1185 mi_loop(&b) {
1186 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1187
1188 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1189 mi_store(&b, out_mem64(8), mi_imm(5));
1190
1191 emit_cmd(GENX(MI_PREDICATE), mip) {
1192 mip.LoadOperation = LOAD_LOAD;
1193 mip.CombineOperation = COMBINE_SET;
1194 mip.CompareOperation = COMPARE_FALSE;
1195 }
1196
1197 mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1198
1199 mi_store(&b, out_mem64(8), mi_imm(10));
1200
1201 emit_cmd(GENX(MI_PREDICATE), mip) {
1202 mip.LoadOperation = LOAD_LOAD;
1203 mip.CombineOperation = COMBINE_SET;
1204 mip.CompareOperation = COMPARE_TRUE;
1205 }
1206
1207 mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1208
1209 mi_store(&b, out_mem64(8), mi_imm(15));
1210 }
1211
1212 submit_batch();
1213
1214 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1215 EXPECT_EQ(*(uint64_t *)(output + 8), 10);
1216 }
1217 #endif /* GFX_VERx10 >= 125 */
1218