1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <fcntl.h>
25 #include <string.h>
26 #include <xf86drm.h>
27
28 #include <gtest/gtest.h>
29
30 #include "dev/gen_device_info.h"
31 #include "drm-uapi/i915_drm.h"
32 #include "genxml/gen_macros.h"
33 #include "util/macros.h"
34
35 class gen_mi_builder_test;
36
37 struct address {
38 uint32_t gem_handle;
39 uint32_t offset;
40 };
41
42 #define __gen_address_type struct address
43 #define __gen_user_data ::gen_mi_builder_test
44
45 uint64_t __gen_combine_address(gen_mi_builder_test *test, void *location,
46 struct address addr, uint32_t delta);
47 void * __gen_get_batch_dwords(gen_mi_builder_test *test, unsigned num_dwords);
48
49 struct address
__gen_address_offset(address addr,uint64_t offset)50 __gen_address_offset(address addr, uint64_t offset)
51 {
52 addr.offset += offset;
53 return addr;
54 }
55
56 #if GEN_GEN >= 8 || GEN_IS_HASWELL
57 #define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */
58 #else
59 #define RSVD_TEMP_REG 0x2430 /* GEN7_3DPRIM_START_VERTEX */
60 #endif
61 #define GEN_MI_BUILDER_NUM_ALLOC_GPRS 15
62 #define INPUT_DATA_OFFSET 0
63 #define OUTPUT_DATA_OFFSET 2048
64
65 #define __genxml_cmd_length(cmd) cmd ## _length
66 #define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
67 #define __genxml_cmd_header(cmd) cmd ## _header
68 #define __genxml_cmd_pack(cmd) cmd ## _pack
69
70 #include "genxml/genX_pack.h"
71 #include "gen_mi_builder.h"
72
73 #define emit_cmd(cmd, name) \
74 for (struct cmd name = { __genxml_cmd_header(cmd) }, \
75 *_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \
76 __builtin_expect(_dst != NULL, 1); \
77 __genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL)
78
79 #include <vector>
80
81 class gen_mi_builder_test : public ::testing::Test {
82 public:
83 gen_mi_builder_test();
84 ~gen_mi_builder_test();
85
86 void SetUp();
87
88 void *emit_dwords(int num_dwords);
89 void submit_batch();
90
in_addr(uint32_t offset)91 inline address in_addr(uint32_t offset)
92 {
93 address addr;
94 addr.gem_handle = data_bo_handle;
95 addr.offset = INPUT_DATA_OFFSET + offset;
96 return addr;
97 }
98
out_addr(uint32_t offset)99 inline address out_addr(uint32_t offset)
100 {
101 address addr;
102 addr.gem_handle = data_bo_handle;
103 addr.offset = OUTPUT_DATA_OFFSET + offset;
104 return addr;
105 }
106
in_mem64(uint32_t offset)107 inline gen_mi_value in_mem64(uint32_t offset)
108 {
109 return gen_mi_mem64(in_addr(offset));
110 }
111
in_mem32(uint32_t offset)112 inline gen_mi_value in_mem32(uint32_t offset)
113 {
114 return gen_mi_mem32(in_addr(offset));
115 }
116
out_mem64(uint32_t offset)117 inline gen_mi_value out_mem64(uint32_t offset)
118 {
119 return gen_mi_mem64(out_addr(offset));
120 }
121
out_mem32(uint32_t offset)122 inline gen_mi_value out_mem32(uint32_t offset)
123 {
124 return gen_mi_mem32(out_addr(offset));
125 }
126
127 int fd;
128 gen_device_info devinfo;
129
130 uint32_t batch_bo_handle;
131 uint32_t batch_offset;
132 void *batch_map;
133
134 std::vector<drm_i915_gem_relocation_entry> relocs;
135
136 uint32_t data_bo_handle;
137 void *data_map;
138 char *input;
139 char *output;
140 uint64_t canary;
141
142 gen_mi_builder b;
143 };
144
gen_mi_builder_test()145 gen_mi_builder_test::gen_mi_builder_test() :
146 fd(-1)
147 { }
148
~gen_mi_builder_test()149 gen_mi_builder_test::~gen_mi_builder_test()
150 {
151 close(fd);
152 }
153
154 // 1 MB of batch should be enough for anyone, right?
155 #define BATCH_BO_SIZE (256 * 4096)
156 #define DATA_BO_SIZE 4096
157
158 void
SetUp()159 gen_mi_builder_test::SetUp()
160 {
161 drmDevicePtr devices[8];
162 int max_devices = drmGetDevices2(0, devices, 8);
163
164 int i;
165 for (i = 0; i < max_devices; i++) {
166 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
167 devices[i]->bustype == DRM_BUS_PCI &&
168 devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
169 fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
170 if (fd < 0)
171 continue;
172
173 /* We don't really need to do this when running on hardware because
174 * we can just pull it from the drmDevice. However, without doing
175 * this, intel_dump_gpu gets a bit of heartburn and we can't use the
176 * --device option with it.
177 */
178 int device_id;
179 drm_i915_getparam getparam = drm_i915_getparam();
180 getparam.param = I915_PARAM_CHIPSET_ID;
181 getparam.value = &device_id;
182 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,
183 (void *)&getparam), 0) << strerror(errno);
184
185 ASSERT_TRUE(gen_get_device_info_from_pci_id(device_id, &devinfo));
186 if (devinfo.gen != GEN_GEN || devinfo.is_haswell != GEN_IS_HASWELL) {
187 close(fd);
188 fd = -1;
189 continue;
190 }
191
192
193 /* Found a device! */
194 break;
195 }
196 }
197 ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device";
198
199 // Create the batch buffer
200 drm_i915_gem_create gem_create = drm_i915_gem_create();
201 gem_create.size = BATCH_BO_SIZE;
202 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
203 (void *)&gem_create), 0) << strerror(errno);
204 batch_bo_handle = gem_create.handle;
205
206 drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
207 gem_caching.handle = batch_bo_handle;
208 gem_caching.caching = I915_CACHING_CACHED;
209 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
210 (void *)&gem_caching), 0) << strerror(errno);
211
212 drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
213 gem_mmap.handle = batch_bo_handle;
214 gem_mmap.offset = 0;
215 gem_mmap.size = BATCH_BO_SIZE;
216 gem_mmap.flags = 0;
217 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
218 (void *)&gem_mmap), 0) << strerror(errno);
219 batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
220
221 // Start the batch at zero
222 batch_offset = 0;
223
224 // Create the data buffer
225 gem_create = drm_i915_gem_create();
226 gem_create.size = DATA_BO_SIZE;
227 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
228 (void *)&gem_create), 0) << strerror(errno);
229 data_bo_handle = gem_create.handle;
230
231 gem_caching = drm_i915_gem_caching();
232 gem_caching.handle = data_bo_handle;
233 gem_caching.caching = I915_CACHING_CACHED;
234 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
235 (void *)&gem_caching), 0) << strerror(errno);
236
237 gem_mmap = drm_i915_gem_mmap();
238 gem_mmap.handle = data_bo_handle;
239 gem_mmap.offset = 0;
240 gem_mmap.size = DATA_BO_SIZE;
241 gem_mmap.flags = 0;
242 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
243 (void *)&gem_mmap), 0) << strerror(errno);
244 data_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
245 input = (char *)data_map + INPUT_DATA_OFFSET;
246 output = (char *)data_map + OUTPUT_DATA_OFFSET;
247
248 // Fill the test data with garbage
249 memset(data_map, 139, DATA_BO_SIZE);
250 memset(&canary, 139, sizeof(canary));
251
252 gen_mi_builder_init(&b, this);
253 }
254
255 void *
emit_dwords(int num_dwords)256 gen_mi_builder_test::emit_dwords(int num_dwords)
257 {
258 void *ptr = (void *)((char *)batch_map + batch_offset);
259 batch_offset += num_dwords * 4;
260 assert(batch_offset < BATCH_BO_SIZE);
261 return ptr;
262 }
263
264 void
submit_batch()265 gen_mi_builder_test::submit_batch()
266 {
267 gen_mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
268
269 // Round batch up to an even number of dwords.
270 if (batch_offset & 4)
271 gen_mi_builder_emit(&b, GENX(MI_NOOP), noop);
272
273 drm_i915_gem_exec_object2 objects[2];
274 memset(objects, 0, sizeof(objects));
275
276 objects[0].handle = data_bo_handle;
277 objects[0].relocation_count = 0;
278 objects[0].relocs_ptr = 0;
279 objects[0].flags = EXEC_OBJECT_WRITE;
280 objects[0].offset = -1;
281 if (GEN_GEN >= 8)
282 objects[0].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
283
284 objects[1].handle = batch_bo_handle;
285 objects[1].relocation_count = relocs.size();
286 objects[1].relocs_ptr = (uintptr_t)(void *)&relocs[0];
287 objects[1].flags = 0;
288 objects[1].offset = -1;
289 if (GEN_GEN >= 8)
290 objects[1].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
291
292 drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2();
293 execbuf.buffers_ptr = (uintptr_t)(void *)objects;
294 execbuf.buffer_count = 2;
295 execbuf.batch_start_offset = 0;
296 execbuf.batch_len = batch_offset;
297 execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER;
298
299 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
300 (void *)&execbuf), 0) << strerror(errno);
301
302 drm_i915_gem_wait gem_wait = drm_i915_gem_wait();
303 gem_wait.bo_handle = batch_bo_handle;
304 gem_wait.timeout_ns = INT64_MAX;
305 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT,
306 (void *)&gem_wait), 0) << strerror(errno);
307 }
308
309 uint64_t
__gen_combine_address(gen_mi_builder_test * test,void * location,address addr,uint32_t delta)310 __gen_combine_address(gen_mi_builder_test *test, void *location,
311 address addr, uint32_t delta)
312 {
313 drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();
314 reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1;
315 reloc.delta = addr.offset + delta;
316 reloc.offset = (char *)location - (char *)test->batch_map;
317 reloc.presumed_offset = -1;
318 test->relocs.push_back(reloc);
319
320 return reloc.delta;
321 }
322
323 void *
__gen_get_batch_dwords(gen_mi_builder_test * test,unsigned num_dwords)324 __gen_get_batch_dwords(gen_mi_builder_test *test, unsigned num_dwords)
325 {
326 return test->emit_dwords(num_dwords);
327 }
328
329 #include "genxml/genX_pack.h"
330 #include "gen_mi_builder.h"
331
TEST_F(gen_mi_builder_test,imm_mem)332 TEST_F(gen_mi_builder_test, imm_mem)
333 {
334 const uint64_t value = 0x0123456789abcdef;
335
336 gen_mi_store(&b, out_mem64(0), gen_mi_imm(value));
337 gen_mi_store(&b, out_mem32(8), gen_mi_imm(value));
338
339 submit_batch();
340
341 // 64 -> 64
342 EXPECT_EQ(*(uint64_t *)(output + 0), value);
343
344 // 64 -> 32
345 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
346 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
347 }
348
349 /* mem -> mem copies are only supported on HSW+ */
350 #if GEN_GEN >= 8 || GEN_IS_HASWELL
TEST_F(gen_mi_builder_test,mem_mem)351 TEST_F(gen_mi_builder_test, mem_mem)
352 {
353 const uint64_t value = 0x0123456789abcdef;
354 *(uint64_t *)input = value;
355
356 gen_mi_store(&b, out_mem64(0), in_mem64(0));
357 gen_mi_store(&b, out_mem32(8), in_mem64(0));
358 gen_mi_store(&b, out_mem32(16), in_mem32(0));
359 gen_mi_store(&b, out_mem64(24), in_mem32(0));
360
361 submit_batch();
362
363 // 64 -> 64
364 EXPECT_EQ(*(uint64_t *)(output + 0), value);
365
366 // 64 -> 32
367 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
368 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
369
370 // 32 -> 32
371 EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
372 EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
373
374 // 32 -> 64
375 EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
376 }
377 #endif
378
TEST_F(gen_mi_builder_test,imm_reg)379 TEST_F(gen_mi_builder_test, imm_reg)
380 {
381 const uint64_t value = 0x0123456789abcdef;
382
383 gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary));
384 gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(value));
385 gen_mi_store(&b, out_mem64(0), gen_mi_reg64(RSVD_TEMP_REG));
386
387 gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary));
388 gen_mi_store(&b, gen_mi_reg32(RSVD_TEMP_REG), gen_mi_imm(value));
389 gen_mi_store(&b, out_mem64(8), gen_mi_reg64(RSVD_TEMP_REG));
390
391 submit_batch();
392
393 // 64 -> 64
394 EXPECT_EQ(*(uint64_t *)(output + 0), value);
395
396 // 64 -> 32
397 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
398 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
399 }
400
TEST_F(gen_mi_builder_test,mem_reg)401 TEST_F(gen_mi_builder_test, mem_reg)
402 {
403 const uint64_t value = 0x0123456789abcdef;
404 *(uint64_t *)input = value;
405
406 gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary));
407 gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), in_mem64(0));
408 gen_mi_store(&b, out_mem64(0), gen_mi_reg64(RSVD_TEMP_REG));
409
410 gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary));
411 gen_mi_store(&b, gen_mi_reg32(RSVD_TEMP_REG), in_mem64(0));
412 gen_mi_store(&b, out_mem64(8), gen_mi_reg64(RSVD_TEMP_REG));
413
414 gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary));
415 gen_mi_store(&b, gen_mi_reg32(RSVD_TEMP_REG), in_mem32(0));
416 gen_mi_store(&b, out_mem64(16), gen_mi_reg64(RSVD_TEMP_REG));
417
418 gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary));
419 gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), in_mem32(0));
420 gen_mi_store(&b, out_mem64(24), gen_mi_reg64(RSVD_TEMP_REG));
421
422 submit_batch();
423
424 // 64 -> 64
425 EXPECT_EQ(*(uint64_t *)(output + 0), value);
426
427 // 64 -> 32
428 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
429 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
430
431 // 32 -> 32
432 EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
433 EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
434
435 // 32 -> 64
436 EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
437 }
438
TEST_F(gen_mi_builder_test,memset)439 TEST_F(gen_mi_builder_test, memset)
440 {
441 const unsigned memset_size = 256;
442
443 gen_mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
444
445 submit_batch();
446
447 uint32_t *out_u32 = (uint32_t *)output;
448 for (unsigned i = 0; i < memset_size / sizeof(*out_u32); i++)
449 EXPECT_EQ(out_u32[i], 0xdeadbeef);
450 }
451
TEST_F(gen_mi_builder_test,memcpy)452 TEST_F(gen_mi_builder_test, memcpy)
453 {
454 const unsigned memcpy_size = 256;
455
456 uint8_t *in_u8 = (uint8_t *)input;
457 for (unsigned i = 0; i < memcpy_size; i++)
458 in_u8[i] = i;
459
460 gen_mi_memcpy(&b, out_addr(0), in_addr(0), 256);
461
462 submit_batch();
463
464 uint8_t *out_u8 = (uint8_t *)output;
465 for (unsigned i = 0; i < memcpy_size; i++)
466 EXPECT_EQ(out_u8[i], i);
467 }
468
469 /* Start of MI_MATH section */
470 #if GEN_GEN >= 8 || GEN_IS_HASWELL
471
472 /* Test adding of immediates of all kinds including
473 *
474 * - All zeroes
475 * - All ones
476 * - inverted constants
477 */
TEST_F(gen_mi_builder_test,add_imm)478 TEST_F(gen_mi_builder_test, add_imm)
479 {
480 const uint64_t value = 0x0123456789abcdef;
481 const uint64_t add = 0xdeadbeefac0ffee2;
482 memcpy(input, &value, sizeof(value));
483
484 gen_mi_store(&b, out_mem64(0),
485 gen_mi_iadd(&b, in_mem64(0), gen_mi_imm(0)));
486 gen_mi_store(&b, out_mem64(8),
487 gen_mi_iadd(&b, in_mem64(0), gen_mi_imm(-1)));
488 gen_mi_store(&b, out_mem64(16),
489 gen_mi_iadd(&b, in_mem64(0), gen_mi_inot(&b, gen_mi_imm(0))));
490 gen_mi_store(&b, out_mem64(24),
491 gen_mi_iadd(&b, in_mem64(0), gen_mi_inot(&b, gen_mi_imm(-1))));
492 gen_mi_store(&b, out_mem64(32),
493 gen_mi_iadd(&b, in_mem64(0), gen_mi_imm(add)));
494 gen_mi_store(&b, out_mem64(40),
495 gen_mi_iadd(&b, in_mem64(0), gen_mi_inot(&b, gen_mi_imm(add))));
496 gen_mi_store(&b, out_mem64(48),
497 gen_mi_iadd(&b, gen_mi_imm(0), in_mem64(0)));
498 gen_mi_store(&b, out_mem64(56),
499 gen_mi_iadd(&b, gen_mi_imm(-1), in_mem64(0)));
500 gen_mi_store(&b, out_mem64(64),
501 gen_mi_iadd(&b, gen_mi_inot(&b, gen_mi_imm(0)), in_mem64(0)));
502 gen_mi_store(&b, out_mem64(72),
503 gen_mi_iadd(&b, gen_mi_inot(&b, gen_mi_imm(-1)), in_mem64(0)));
504 gen_mi_store(&b, out_mem64(80),
505 gen_mi_iadd(&b, gen_mi_imm(add), in_mem64(0)));
506 gen_mi_store(&b, out_mem64(88),
507 gen_mi_iadd(&b, gen_mi_inot(&b, gen_mi_imm(add)), in_mem64(0)));
508
509 // And som add_imm just for good measure
510 gen_mi_store(&b, out_mem64(96), gen_mi_iadd_imm(&b, in_mem64(0), 0));
511 gen_mi_store(&b, out_mem64(104), gen_mi_iadd_imm(&b, in_mem64(0), add));
512
513 submit_batch();
514
515 EXPECT_EQ(*(uint64_t *)(output + 0), value);
516 EXPECT_EQ(*(uint64_t *)(output + 8), value - 1);
517 EXPECT_EQ(*(uint64_t *)(output + 16), value - 1);
518 EXPECT_EQ(*(uint64_t *)(output + 24), value);
519 EXPECT_EQ(*(uint64_t *)(output + 32), value + add);
520 EXPECT_EQ(*(uint64_t *)(output + 40), value + ~add);
521 EXPECT_EQ(*(uint64_t *)(output + 48), value);
522 EXPECT_EQ(*(uint64_t *)(output + 56), value - 1);
523 EXPECT_EQ(*(uint64_t *)(output + 64), value - 1);
524 EXPECT_EQ(*(uint64_t *)(output + 72), value);
525 EXPECT_EQ(*(uint64_t *)(output + 80), value + add);
526 EXPECT_EQ(*(uint64_t *)(output + 88), value + ~add);
527 EXPECT_EQ(*(uint64_t *)(output + 96), value);
528 EXPECT_EQ(*(uint64_t *)(output + 104), value + add);
529 }
530
TEST_F(gen_mi_builder_test,ilt_uge)531 TEST_F(gen_mi_builder_test, ilt_uge)
532 {
533 uint64_t values[8] = {
534 0x0123456789abcdef,
535 0xdeadbeefac0ffee2,
536 (uint64_t)-1,
537 1,
538 0,
539 1049571,
540 (uint64_t)-240058,
541 20204184,
542 };
543 memcpy(input, values, sizeof(values));
544
545 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
546 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
547 gen_mi_store(&b, out_mem32(i * 64 + j * 8 + 0),
548 gen_mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
549 gen_mi_store(&b, out_mem32(i * 64 + j * 8 + 4),
550 gen_mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
551 }
552 }
553
554 submit_batch();
555
556 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
557 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
558 uint32_t *out_u32 = (uint32_t *)(output + i * 64 + j * 8);
559 EXPECT_EQ(out_u32[0], values[i] < values[j] ? ~0u : 0u);
560 EXPECT_EQ(out_u32[1], values[i] >= values[j] ? ~0u : 0u);
561 }
562 }
563 }
564
TEST_F(gen_mi_builder_test,iand)565 TEST_F(gen_mi_builder_test, iand)
566 {
567 const uint64_t values[2] = {
568 0x0123456789abcdef,
569 0xdeadbeefac0ffee2,
570 };
571 memcpy(input, values, sizeof(values));
572
573 gen_mi_store(&b, out_mem64(0), gen_mi_iand(&b, in_mem64(0), in_mem64(8)));
574
575 submit_batch();
576
577 EXPECT_EQ(*(uint64_t *)output, values[0] & values[1]);
578 }
579
TEST_F(gen_mi_builder_test,imul_imm)580 TEST_F(gen_mi_builder_test, imul_imm)
581 {
582 uint64_t lhs[2] = {
583 0x0123456789abcdef,
584 0xdeadbeefac0ffee2,
585 };
586 memcpy(input, lhs, sizeof(lhs));
587
588 /* Some random 32-bit unsigned integers. The first four have been
589 * hand-chosen just to ensure some good low integers; the rest were
590 * generated with a python script.
591 */
592 uint32_t rhs[20] = {
593 1, 2, 3, 5,
594 10800, 193, 64, 40,
595 3796, 256, 88, 473,
596 1421, 706, 175, 850,
597 39, 38985, 1941, 17,
598 };
599
600 for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
601 for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
602 gen_mi_store(&b, out_mem64(i * 160 + j * 8),
603 gen_mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
604 }
605 }
606
607 submit_batch();
608
609 for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
610 for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
611 EXPECT_EQ(*(uint64_t *)(output + i * 160 + j * 8), lhs[i] * rhs[j]);
612 }
613 }
614 }
615
TEST_F(gen_mi_builder_test,ishl_imm)616 TEST_F(gen_mi_builder_test, ishl_imm)
617 {
618 const uint64_t value = 0x0123456789abcdef;
619 memcpy(input, &value, sizeof(value));
620
621 const unsigned max_shift = 64;
622
623 for (unsigned i = 0; i <= max_shift; i++)
624 gen_mi_store(&b, out_mem64(i * 8), gen_mi_ishl_imm(&b, in_mem64(0), i));
625
626 submit_batch();
627
628 for (unsigned i = 0; i <= max_shift; i++) {
629 if (i >= 64) {
630 EXPECT_EQ(*(uint64_t *)(output + i * 8), 0);
631 } else {
632 EXPECT_EQ(*(uint64_t *)(output + i * 8), value << i);
633 }
634 }
635 }
636
TEST_F(gen_mi_builder_test,ushr32_imm)637 TEST_F(gen_mi_builder_test, ushr32_imm)
638 {
639 const uint64_t value = 0x0123456789abcdef;
640 memcpy(input, &value, sizeof(value));
641
642 const unsigned max_shift = 64;
643
644 for (unsigned i = 0; i <= max_shift; i++)
645 gen_mi_store(&b, out_mem64(i * 8), gen_mi_ushr32_imm(&b, in_mem64(0), i));
646
647 submit_batch();
648
649 for (unsigned i = 0; i <= max_shift; i++) {
650 if (i >= 64) {
651 EXPECT_EQ(*(uint64_t *)(output + i * 8), 0);
652 } else {
653 EXPECT_EQ(*(uint64_t *)(output + i * 8), (value >> i) & UINT32_MAX);
654 }
655 }
656 }
657
TEST_F(gen_mi_builder_test,udiv32_imm)658 TEST_F(gen_mi_builder_test, udiv32_imm)
659 {
660 /* Some random 32-bit unsigned integers. The first four have been
661 * hand-chosen just to ensure some good low integers; the rest were
662 * generated with a python script.
663 */
664 uint32_t values[20] = {
665 1, 2, 3, 5,
666 10800, 193, 64, 40,
667 3796, 256, 88, 473,
668 1421, 706, 175, 850,
669 39, 38985, 1941, 17,
670 };
671 memcpy(input, values, sizeof(values));
672
673 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
674 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
675 gen_mi_store(&b, out_mem32(i * 80 + j * 4),
676 gen_mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
677 }
678 }
679
680 submit_batch();
681
682 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
683 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
684 EXPECT_EQ(*(uint32_t *)(output + i * 80 + j * 4),
685 values[i] / values[j]);
686 }
687 }
688 }
689
TEST_F(gen_mi_builder_test,store_if)690 TEST_F(gen_mi_builder_test, store_if)
691 {
692 uint64_t u64 = 0xb453b411deadc0deull;
693 uint32_t u32 = 0x1337d00d;
694
695 /* Write values with the predicate enabled */
696 emit_cmd(GENX(MI_PREDICATE), mip) {
697 mip.LoadOperation = LOAD_LOAD;
698 mip.CombineOperation = COMBINE_SET;
699 mip.CompareOperation = COMPARE_TRUE;
700 }
701
702 gen_mi_store_if(&b, out_mem64(0), gen_mi_imm(u64));
703 gen_mi_store_if(&b, out_mem32(8), gen_mi_imm(u32));
704
705 /* Set predicate to false, write garbage that shouldn't land */
706 emit_cmd(GENX(MI_PREDICATE), mip) {
707 mip.LoadOperation = LOAD_LOAD;
708 mip.CombineOperation = COMBINE_SET;
709 mip.CompareOperation = COMPARE_FALSE;
710 }
711
712 gen_mi_store_if(&b, out_mem64(0), gen_mi_imm(0xd0d0d0d0d0d0d0d0ull));
713 gen_mi_store_if(&b, out_mem32(8), gen_mi_imm(0xc000c000));
714
715 submit_batch();
716
717 EXPECT_EQ(*(uint64_t *)(output + 0), u64);
718 EXPECT_EQ(*(uint32_t *)(output + 8), u32);
719 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
720 }
721
722 #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
723