1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <gtest/gtest.h>
25 #include "brw_disasm_info.h"
26 #include "brw_eu.h"
27 #include "brw_eu_defines.h"
28 #include "util/bitset.h"
29 #include "util/ralloc.h"
30
31 static const struct intel_gfx_info {
32 const char *name;
33 } gfx_names[] = {
34 { "skl", },
35 { "bxt", },
36 { "kbl", },
37 { "aml", },
38 { "glk", },
39 { "cfl", },
40 { "whl", },
41 { "cml", },
42 { "icl", },
43 { "ehl", },
44 { "jsl", },
45 { "tgl", },
46 { "rkl", },
47 { "dg1", },
48 { "adl", },
49 { "sg1", },
50 { "rpl", },
51 { "dg2", },
52 { "mtl", },
53 { "lnl", },
54 { "bmg", },
55 { "ptl", },
56 };
57
58 class validation_test: public ::testing::TestWithParam<struct intel_gfx_info> {
59 virtual void SetUp();
60
61 public:
62 validation_test();
63 virtual ~validation_test();
64
65 struct brw_isa_info isa;
66 struct brw_codegen *p;
67 struct intel_device_info devinfo;
68 };
69
validation_test()70 validation_test::validation_test()
71 {
72 p = rzalloc(NULL, struct brw_codegen);
73 memset(&devinfo, 0, sizeof(devinfo));
74 }
75
~validation_test()76 validation_test::~validation_test()
77 {
78 ralloc_free(p);
79 }
80
SetUp()81 void validation_test::SetUp()
82 {
83 struct intel_gfx_info info = GetParam();
84 int devid = intel_device_name_to_pci_device_id(info.name);
85
86 intel_get_device_info_from_pci_id(devid, &devinfo);
87
88 brw_init_isa_info(&isa, &devinfo);
89
90 brw_init_codegen(&isa, p, p);
91 }
92
93 struct gfx_name {
94 template <class ParamType>
95 std::string
operator ()gfx_name96 operator()(const ::testing::TestParamInfo<ParamType>& info) const {
97 return info.param.name;
98 }
99 };
100
101 INSTANTIATE_TEST_SUITE_P(
102 eu_assembly, validation_test,
103 ::testing::ValuesIn(gfx_names),
104 gfx_name()
105 );
106
107 static bool
validate(struct brw_codegen * p)108 validate(struct brw_codegen *p)
109 {
110 const bool print = getenv("TEST_DEBUG");
111 struct disasm_info *disasm = disasm_initialize(p->isa, NULL);
112
113 if (print) {
114 disasm_new_inst_group(disasm, 0);
115 disasm_new_inst_group(disasm, p->next_insn_offset);
116 }
117
118 bool ret = brw_validate_instructions(p->isa, p->store, 0,
119 p->next_insn_offset, disasm);
120
121 if (print) {
122 dump_assembly(p->store, 0, p->next_insn_offset, disasm, NULL);
123 }
124 ralloc_free(disasm);
125
126 return ret;
127 }
128
129 #define last_inst (&p->store[p->nr_insn - 1])
130 #define g0 brw_vec8_grf(0, 0)
131 #define acc0 brw_acc_reg(8)
132 #define null brw_null_reg()
133 #define zero brw_imm_f(0.0f)
134
135 static void
clear_instructions(struct brw_codegen * p)136 clear_instructions(struct brw_codegen *p)
137 {
138 p->next_insn_offset = 0;
139 p->nr_insn = 0;
140 }
141
TEST_P(validation_test,sanity)142 TEST_P(validation_test, sanity)
143 {
144 brw_ADD(p, g0, g0, g0);
145
146 EXPECT_TRUE(validate(p));
147 }
148
TEST_P(validation_test,src0_null_reg)149 TEST_P(validation_test, src0_null_reg)
150 {
151 brw_MOV(p, g0, null);
152
153 EXPECT_FALSE(validate(p));
154 }
155
TEST_P(validation_test,src1_null_reg)156 TEST_P(validation_test, src1_null_reg)
157 {
158 brw_ADD(p, g0, g0, null);
159
160 EXPECT_FALSE(validate(p));
161 }
162
TEST_P(validation_test,math_src0_null_reg)163 TEST_P(validation_test, math_src0_null_reg)
164 {
165 gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, null, null);
166
167 EXPECT_FALSE(validate(p));
168 }
169
TEST_P(validation_test,math_src1_null_reg)170 TEST_P(validation_test, math_src1_null_reg)
171 {
172 gfx6_math(p, g0, BRW_MATH_FUNCTION_POW, g0, null);
173 EXPECT_FALSE(validate(p));
174 }
175
TEST_P(validation_test,opcode46)176 TEST_P(validation_test, opcode46)
177 {
178 /* opcode 46 is "push" on Gen 4 and 5
179 * "fork" on Gen 6
180 * reserved on Gen 7
181 * "goto" on Gfx8+
182 */
183 brw_next_insn(p, brw_opcode_decode(&isa, 46));
184
185 EXPECT_TRUE(validate(p));
186 }
187
TEST_P(validation_test,invalid_exec_size_encoding)188 TEST_P(validation_test, invalid_exec_size_encoding)
189 {
190 const struct {
191 enum brw_execution_size exec_size;
192 bool expected_result;
193 } test_case[] = {
194 { BRW_EXECUTE_1, true },
195 { BRW_EXECUTE_2, true },
196 { BRW_EXECUTE_4, true },
197 { BRW_EXECUTE_8, true },
198 { BRW_EXECUTE_16, true },
199 { BRW_EXECUTE_32, true },
200
201 { (enum brw_execution_size)((int)BRW_EXECUTE_32 + 1), false },
202 { (enum brw_execution_size)((int)BRW_EXECUTE_32 + 2), false },
203 };
204
205 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
206 brw_MOV(p, g0, g0);
207
208 brw_eu_inst_set_exec_size(&devinfo, last_inst, test_case[i].exec_size);
209 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
210 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
211
212 if (test_case[i].exec_size == BRW_EXECUTE_1) {
213 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
214 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
215 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
216 } else {
217 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2);
218 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
219 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
220 }
221
222 EXPECT_EQ(test_case[i].expected_result, validate(p));
223
224 clear_instructions(p);
225 }
226 }
227
TEST_P(validation_test,invalid_type_encoding)228 TEST_P(validation_test, invalid_type_encoding)
229 {
230 enum brw_reg_file files[2] = {
231 FIXED_GRF,
232 IMM,
233 };
234
235 for (unsigned i = 0; i < ARRAY_SIZE(files); i++) {
236 const enum brw_reg_file file = files[i];
237 const int num_bits = 4;
238 const int num_encodings = 1 << num_bits;
239
240 /* The data types are encoded into <num_bits> bits to be used in hardware
241 * instructions, so keep a record in a bitset the invalid patterns so
242 * they can be verified to be invalid when used.
243 */
244 BITSET_DECLARE(invalid_encodings, num_encodings);
245
246 const struct {
247 enum brw_reg_type type;
248 bool expected_result;
249 } test_case[] = {
250 { BRW_TYPE_DF, devinfo.has_64bit_float },
251 { BRW_TYPE_F, true },
252 { BRW_TYPE_HF, true },
253 { BRW_TYPE_VF, file == IMM },
254 { BRW_TYPE_Q, devinfo.has_64bit_int },
255 { BRW_TYPE_UQ, devinfo.has_64bit_int },
256 { BRW_TYPE_D, true },
257 { BRW_TYPE_UD, true },
258 { BRW_TYPE_W, true },
259 { BRW_TYPE_UW, true },
260 { BRW_TYPE_B, file == FIXED_GRF },
261 { BRW_TYPE_UB, file == FIXED_GRF },
262 { BRW_TYPE_V, file == IMM },
263 { BRW_TYPE_UV, file == IMM },
264 };
265
266 /* Initially assume all hardware encodings are invalid */
267 BITSET_ONES(invalid_encodings);
268
269 brw_set_default_exec_size(p, BRW_EXECUTE_4);
270
271 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
272 if (test_case[i].expected_result) {
273 unsigned hw_type = brw_type_encode(&devinfo, file, test_case[i].type);
274 if (hw_type != INVALID_HW_REG_TYPE) {
275 /* ... and remove valid encodings from the set */
276 assert(BITSET_TEST(invalid_encodings, hw_type));
277 BITSET_CLEAR(invalid_encodings, hw_type);
278 }
279
280 if (file == FIXED_GRF) {
281 struct brw_reg g = retype(g0, test_case[i].type);
282 brw_MOV(p, g, g);
283 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
284 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
285 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
286 } else {
287 enum brw_reg_type t;
288
289 switch (test_case[i].type) {
290 case BRW_TYPE_V:
291 t = BRW_TYPE_W;
292 break;
293 case BRW_TYPE_UV:
294 t = BRW_TYPE_UW;
295 break;
296 case BRW_TYPE_VF:
297 t = BRW_TYPE_F;
298 break;
299 default:
300 t = test_case[i].type;
301 break;
302 }
303
304 struct brw_reg g = retype(g0, t);
305 brw_MOV(p, g, retype(brw_imm_w(0), test_case[i].type));
306 }
307
308 EXPECT_TRUE(validate(p));
309
310 clear_instructions(p);
311 }
312 }
313
314 /* The remaining encodings in invalid_encodings do not have a mapping
315 * from BRW_TYPE_* and must be invalid. Verify that invalid
316 * encodings are rejected by the validator.
317 */
318 int e;
319 BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
320 if (file == FIXED_GRF) {
321 brw_MOV(p, g0, g0);
322 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
323 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
324 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
325 } else {
326 brw_MOV(p, g0, brw_imm_w(0));
327 }
328 brw_eu_inst_set_dst_reg_hw_type(&devinfo, last_inst, e);
329 brw_eu_inst_set_src0_reg_hw_type(&devinfo, last_inst, e);
330
331 EXPECT_FALSE(validate(p));
332
333 clear_instructions(p);
334 }
335 }
336 }
337
TEST_P(validation_test,invalid_type_encoding_3src_a16)338 TEST_P(validation_test, invalid_type_encoding_3src_a16)
339 {
340 /* 3-src instructions in align16 mode only supported on Gfx6-9. */
341 if (devinfo.ver != 9)
342 return;
343
344 const int num_bits = 3;
345 const int num_encodings = 1 << num_bits;
346
347 /* The data types are encoded into <num_bits> bits to be used in hardware
348 * instructions, so keep a record in a bitset the invalid patterns so
349 * they can be verified to be invalid when used.
350 */
351 BITSET_DECLARE(invalid_encodings, num_encodings);
352
353 const struct {
354 enum brw_reg_type type;
355 bool expected_result;
356 } test_case[] = {
357 { BRW_TYPE_DF, true },
358 { BRW_TYPE_F, true },
359 { BRW_TYPE_HF, true },
360 { BRW_TYPE_D, true },
361 { BRW_TYPE_UD, true },
362 };
363
364 /* Initially assume all hardware encodings are invalid */
365 BITSET_ONES(invalid_encodings);
366
367 brw_set_default_access_mode(p, BRW_ALIGN_16);
368 brw_set_default_exec_size(p, BRW_EXECUTE_4);
369
370 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
371 if (test_case[i].expected_result) {
372 unsigned hw_type =
373 brw_type_encode_for_3src(&devinfo, test_case[i].type);
374 if (hw_type != INVALID_HW_REG_TYPE) {
375 /* ... and remove valid encodings from the set */
376 assert(BITSET_TEST(invalid_encodings, hw_type));
377 BITSET_CLEAR(invalid_encodings, hw_type);
378 }
379
380 struct brw_reg g = retype(g0, test_case[i].type);
381 if (!brw_type_is_int(test_case[i].type)) {
382 brw_MAD(p, g, g, g, g);
383 } else {
384 brw_BFE(p, g, g, g, g);
385 }
386
387 EXPECT_TRUE(validate(p));
388
389 clear_instructions(p);
390 }
391 }
392
393 /* The remaining encodings in invalid_encodings do not have a mapping
394 * from BRW_TYPE_* and must be invalid. Verify that invalid
395 * encodings are rejected by the validator.
396 */
397 int e;
398 BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
399 for (unsigned i = 0; i < 2; i++) {
400 if (i == 0) {
401 brw_MAD(p, g0, g0, g0, g0);
402 } else {
403 brw_BFE(p, g0, g0, g0, g0);
404 }
405
406 brw_eu_inst_set_3src_a16_dst_hw_type(&devinfo, last_inst, e);
407 brw_eu_inst_set_3src_a16_src_hw_type(&devinfo, last_inst, e);
408
409 EXPECT_FALSE(validate(p));
410
411 clear_instructions(p);
412 }
413 }
414 }
415
TEST_P(validation_test,invalid_type_encoding_3src_a1)416 TEST_P(validation_test, invalid_type_encoding_3src_a1)
417 {
418 /* 3-src instructions in align1 mode only supported on Gfx11+ */
419 if (devinfo.ver == 9)
420 return;
421
422 const int num_bits = 3 + 1 /* for exec_type */;
423 const int num_encodings = 1 << num_bits;
424
425 /* The data types are encoded into <num_bits> bits to be used in hardware
426 * instructions, so keep a record in a bitset the invalid patterns so
427 * they can be verified to be invalid when used.
428 */
429 BITSET_DECLARE(invalid_encodings, num_encodings);
430
431 const struct {
432 enum brw_reg_type type;
433 unsigned exec_type;
434 bool expected_result;
435 } test_case[] = {
436 #define E(x) ((unsigned)BRW_ALIGN1_3SRC_EXEC_TYPE_##x)
437 { BRW_TYPE_DF, E(FLOAT), devinfo.has_64bit_float },
438 { BRW_TYPE_F, E(FLOAT), true },
439 { BRW_TYPE_HF, E(FLOAT), true },
440 { BRW_TYPE_Q, E(INT), devinfo.has_64bit_int },
441 { BRW_TYPE_UQ, E(INT), devinfo.has_64bit_int },
442 { BRW_TYPE_D, E(INT), true },
443 { BRW_TYPE_UD, E(INT), true },
444 { BRW_TYPE_W, E(INT), true },
445 { BRW_TYPE_UW, E(INT), true },
446
447 /* There are no ternary instructions that can operate on B-type sources
448 * on Gfx11-12. Src1/Src2 cannot be B-typed either.
449 */
450 { BRW_TYPE_B, E(INT), false },
451 { BRW_TYPE_UB, E(INT), false },
452 };
453
454 /* Initially assume all hardware encodings are invalid */
455 BITSET_ONES(invalid_encodings);
456
457 brw_set_default_access_mode(p, BRW_ALIGN_1);
458 brw_set_default_exec_size(p, BRW_EXECUTE_4);
459
460 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
461 if (test_case[i].expected_result) {
462 unsigned hw_type =
463 brw_type_encode_for_3src(&devinfo, test_case[i].type);
464 unsigned hw_exec_type = hw_type | (test_case[i].exec_type << 3);
465 if (hw_type != INVALID_HW_REG_TYPE) {
466 /* ... and remove valid encodings from the set */
467 assert(BITSET_TEST(invalid_encodings, hw_exec_type));
468 BITSET_CLEAR(invalid_encodings, hw_exec_type);
469 }
470
471 struct brw_reg g = retype(g0, test_case[i].type);
472 if (!brw_type_is_int(test_case[i].type)) {
473 brw_MAD(p, g, g, g, g);
474 } else {
475 brw_BFE(p, g, g, g, g);
476 }
477
478 EXPECT_TRUE(validate(p));
479
480 clear_instructions(p);
481 }
482 }
483
484 /* The remaining encodings in invalid_encodings do not have a mapping
485 * from BRW_TYPE_* and must be invalid. Verify that invalid
486 * encodings are rejected by the validator.
487 */
488 int e;
489 BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
490 const unsigned hw_type = e & 0x7;
491 const unsigned exec_type = e >> 3;
492
493 for (unsigned i = 0; i < 2; i++) {
494 if (i == 0) {
495 brw_MAD(p, g0, g0, g0, g0);
496 brw_eu_inst_set_3src_a1_exec_type(&devinfo, last_inst, BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
497 } else {
498 brw_CSEL(p, g0, g0, g0, g0);
499 brw_eu_inst_set_3src_cond_modifier(&devinfo, last_inst, BRW_CONDITIONAL_NZ);
500 brw_eu_inst_set_3src_a1_exec_type(&devinfo, last_inst, BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
501 }
502
503 brw_eu_inst_set_3src_a1_exec_type(&devinfo, last_inst, exec_type);
504 brw_eu_inst_set_3src_a1_dst_hw_type (&devinfo, last_inst, hw_type);
505 brw_eu_inst_set_3src_a1_src0_hw_type(&devinfo, last_inst, hw_type);
506 brw_eu_inst_set_3src_a1_src1_hw_type(&devinfo, last_inst, hw_type);
507 brw_eu_inst_set_3src_a1_src2_hw_type(&devinfo, last_inst, hw_type);
508
509 EXPECT_FALSE(validate(p));
510
511 clear_instructions(p);
512 }
513 }
514 }
515
516 TEST_P(validation_test, 3src_inst_access_mode)
517 {
518 /* No access mode bit on Gfx12+ */
519 if (devinfo.ver >= 12)
520 return;
521
522 const struct {
523 unsigned mode;
524 bool expected_result;
525 } test_case[] = {
526 { BRW_ALIGN_1, devinfo.ver != 9 },
527 { BRW_ALIGN_16, devinfo.ver == 9 },
528 };
529
530 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
531 if (devinfo.ver == 9)
532 brw_set_default_access_mode(p, BRW_ALIGN_16);
533
534 brw_MAD(p, g0, g0, g0, g0);
535 brw_eu_inst_set_access_mode(&devinfo, last_inst, test_case[i].mode);
536
537 EXPECT_EQ(test_case[i].expected_result, validate(p));
538
539 clear_instructions(p);
540 }
541 }
542
543 /* When the Execution Data Type is wider than the destination data type, the
544 * destination must [...] specify a HorzStride equal to the ratio in sizes of
545 * the two data types.
546 */
TEST_P(validation_test,dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)547 TEST_P(validation_test, dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)
548 {
549 brw_ADD(p, g0, g0, g0);
550 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
551 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
552 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
553
554 EXPECT_FALSE(validate(p));
555
556 clear_instructions(p);
557
558 brw_ADD(p, g0, g0, g0);
559 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
560 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
561 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
562 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
563
564 EXPECT_TRUE(validate(p));
565 }
566
567 /* When the Execution Data Type is wider than the destination data type, the
568 * destination must be aligned as required by the wider execution data type
569 * [...]
570 */
TEST_P(validation_test,dst_subreg_must_be_aligned_to_exec_type_size)571 TEST_P(validation_test, dst_subreg_must_be_aligned_to_exec_type_size)
572 {
573 brw_ADD(p, g0, g0, g0);
574 brw_eu_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 2);
575 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
576 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
577 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
578 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
579
580 EXPECT_FALSE(validate(p));
581
582 clear_instructions(p);
583
584 brw_ADD(p, g0, g0, g0);
585 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
586 brw_eu_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 8);
587 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
588 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
589 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
590 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
591 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
592 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
593 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
594 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
595 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
596 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
597
598 EXPECT_TRUE(validate(p));
599 }
600
601 /* ExecSize must be greater than or equal to Width. */
TEST_P(validation_test,exec_size_less_than_width)602 TEST_P(validation_test, exec_size_less_than_width)
603 {
604 brw_ADD(p, g0, g0, g0);
605 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_16);
606
607 EXPECT_FALSE(validate(p));
608
609 clear_instructions(p);
610
611 brw_ADD(p, g0, g0, g0);
612 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_16);
613
614 EXPECT_FALSE(validate(p));
615 }
616
617 /* If ExecSize = Width and HorzStride ≠ 0,
618 * VertStride must be set to Width * HorzStride.
619 */
TEST_P(validation_test,vertical_stride_is_width_by_horizontal_stride)620 TEST_P(validation_test, vertical_stride_is_width_by_horizontal_stride)
621 {
622 brw_ADD(p, g0, g0, g0);
623 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
624
625 EXPECT_FALSE(validate(p));
626
627 clear_instructions(p);
628
629 brw_ADD(p, g0, g0, g0);
630 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
631
632 EXPECT_FALSE(validate(p));
633 }
634
635 /* If Width = 1, HorzStride must be 0 regardless of the values
636 * of ExecSize and VertStride.
637 */
TEST_P(validation_test,horizontal_stride_must_be_0_if_width_is_1)638 TEST_P(validation_test, horizontal_stride_must_be_0_if_width_is_1)
639 {
640 brw_ADD(p, g0, g0, g0);
641 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
642 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
643 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
644
645 EXPECT_FALSE(validate(p));
646
647 clear_instructions(p);
648
649 brw_ADD(p, g0, g0, g0);
650 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
651 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1);
652 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
653
654 EXPECT_FALSE(validate(p));
655 }
656
657 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
TEST_P(validation_test,scalar_region_must_be_0_1_0)658 TEST_P(validation_test, scalar_region_must_be_0_1_0)
659 {
660 struct brw_reg g0_0 = brw_vec1_grf(0, 0);
661
662 brw_ADD(p, g0, g0, g0_0);
663 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_1);
664 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_1);
665 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
666 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
667
668 EXPECT_FALSE(validate(p));
669
670 clear_instructions(p);
671
672 brw_ADD(p, g0, g0_0, g0);
673 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_1);
674 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_1);
675 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1);
676 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
677
678 EXPECT_FALSE(validate(p));
679 }
680
681 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the value
682 * of ExecSize.
683 */
TEST_P(validation_test,zero_stride_implies_0_1_0)684 TEST_P(validation_test, zero_stride_implies_0_1_0)
685 {
686 brw_ADD(p, g0, g0, g0);
687 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
688 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
689 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
690
691 EXPECT_FALSE(validate(p));
692
693 clear_instructions(p);
694
695 brw_ADD(p, g0, g0, g0);
696 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
697 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2);
698 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
699
700 EXPECT_FALSE(validate(p));
701 }
702
703 /* Dst.HorzStride must not be 0. */
TEST_P(validation_test,dst_horizontal_stride_0)704 TEST_P(validation_test, dst_horizontal_stride_0)
705 {
706 brw_ADD(p, g0, g0, g0);
707 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
708
709 EXPECT_FALSE(validate(p));
710
711 clear_instructions(p);
712
713 /* Align16 does not exist on Gfx11+ */
714 if (devinfo.ver >= 11)
715 return;
716
717 brw_set_default_access_mode(p, BRW_ALIGN_16);
718
719 brw_ADD(p, g0, g0, g0);
720 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
721
722 EXPECT_FALSE(validate(p));
723 }
724
725 /* VertStride must be used to cross FIXED_GRF register boundaries. This rule implies
726 * that elements within a 'Width' cannot cross FIXED_GRF boundaries.
727 */
TEST_P(validation_test,must_not_cross_grf_boundary_in_a_width)728 TEST_P(validation_test, must_not_cross_grf_boundary_in_a_width)
729 {
730 brw_ADD(p, g0, g0, g0);
731 brw_eu_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 4);
732
733 EXPECT_FALSE(validate(p));
734
735 clear_instructions(p);
736
737 brw_ADD(p, g0, g0, g0);
738 brw_eu_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 4);
739
740 EXPECT_FALSE(validate(p));
741
742 clear_instructions(p);
743
744 brw_ADD(p, g0, g0, g0);
745 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
746 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
747 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
748
749 EXPECT_FALSE(validate(p));
750
751 clear_instructions(p);
752
753 brw_ADD(p, g0, g0, g0);
754 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
755 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
756 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
757
758 EXPECT_FALSE(validate(p));
759 }
760
761 /* Destination Horizontal must be 1 in Align16 */
TEST_P(validation_test,dst_hstride_on_align16_must_be_1)762 TEST_P(validation_test, dst_hstride_on_align16_must_be_1)
763 {
764 /* Align16 does not exist on Gfx11+ */
765 if (devinfo.ver >= 11)
766 return;
767
768 brw_set_default_access_mode(p, BRW_ALIGN_16);
769
770 brw_ADD(p, g0, g0, g0);
771 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
772
773 EXPECT_FALSE(validate(p));
774
775 clear_instructions(p);
776
777 brw_ADD(p, g0, g0, g0);
778 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
779
780 EXPECT_TRUE(validate(p));
781 }
782
783 /* VertStride must be 0 or 4 in Align16 */
TEST_P(validation_test,vstride_on_align16_must_be_0_or_4)784 TEST_P(validation_test, vstride_on_align16_must_be_0_or_4)
785 {
786 /* Align16 does not exist on Gfx11+ */
787 if (devinfo.ver >= 11)
788 return;
789
790 const struct {
791 enum brw_vertical_stride vstride;
792 bool expected_result;
793 } vstride[] = {
794 { BRW_VERTICAL_STRIDE_0, true },
795 { BRW_VERTICAL_STRIDE_1, false },
796 { BRW_VERTICAL_STRIDE_2, true },
797 { BRW_VERTICAL_STRIDE_4, true },
798 { BRW_VERTICAL_STRIDE_8, false },
799 { BRW_VERTICAL_STRIDE_16, false },
800 { BRW_VERTICAL_STRIDE_32, false },
801 { BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL, false },
802 };
803
804 brw_set_default_access_mode(p, BRW_ALIGN_16);
805
806 for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
807 brw_ADD(p, g0, g0, g0);
808 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, vstride[i].vstride);
809
810 EXPECT_EQ(vstride[i].expected_result, validate(p));
811
812 clear_instructions(p);
813 }
814
815 for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
816 brw_ADD(p, g0, g0, g0);
817 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, vstride[i].vstride);
818
819 EXPECT_EQ(vstride[i].expected_result, validate(p));
820
821 clear_instructions(p);
822 }
823 }
824
825 /* In Direct Addressing mode, a source cannot span more than 2 adjacent FIXED_GRF
826 * registers.
827 */
TEST_P(validation_test,source_cannot_span_more_than_2_registers)828 TEST_P(validation_test, source_cannot_span_more_than_2_registers)
829 {
830 enum brw_reg_type type = devinfo.ver >= 20 ? BRW_TYPE_D : BRW_TYPE_W;
831
832 brw_ADD(p, g0, g0, g0);
833 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_32);
834 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, type);
835 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, type);
836 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, type);
837 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
838 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8);
839 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
840
841 EXPECT_FALSE(validate(p));
842
843 clear_instructions(p);
844
845 brw_ADD(p, g0, g0, g0);
846 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
847 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, type);
848 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, type);
849 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, type);
850 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
851 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8);
852 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
853 brw_eu_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 2);
854
855 EXPECT_TRUE(validate(p));
856
857 clear_instructions(p);
858
859 brw_ADD(p, g0, g0, g0);
860 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
861
862 EXPECT_TRUE(validate(p));
863 }
864
865 /* A destination cannot span more than 2 adjacent FIXED_GRF registers. */
TEST_P(validation_test,destination_cannot_span_more_than_2_registers)866 TEST_P(validation_test, destination_cannot_span_more_than_2_registers)
867 {
868 unsigned invalid_stride = devinfo.ver >= 20 ? 4 : 2;
869
870 brw_ADD(p, g0, g0, g0);
871 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_32);
872 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, cvt(invalid_stride));
873 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
874 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
875 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
876
877 EXPECT_FALSE(validate(p));
878
879 clear_instructions(p);
880
881 brw_ADD(p, g0, g0, g0);
882 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_8);
883 brw_eu_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 6);
884 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4);
885 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
886 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
887 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
888 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
889 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
890 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
891 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
892 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
893 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
894
895 EXPECT_TRUE(validate(p));
896 }
897
TEST_P(validation_test,src_region_spans_two_regs_dst_region_spans_one)898 TEST_P(validation_test, src_region_spans_two_regs_dst_region_spans_one)
899 {
900 const enum brw_reg_type type = devinfo.ver >= 20 ? BRW_TYPE_D : BRW_TYPE_W;
901
902 /* Writes to dest are to the lower OWord */
903 brw_ADD(p, g0, g0, g0);
904 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, type);
905 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, type);
906 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, type);
907 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
908 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
909 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
910
911 EXPECT_TRUE(validate(p));
912
913 clear_instructions(p);
914
915 /* Writes to dest are to the upper OWord */
916 brw_ADD(p, g0, g0, g0);
917 brw_eu_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
918 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, type);
919 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, type);
920 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, type);
921 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
922 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
923 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
924
925 EXPECT_TRUE(validate(p));
926
927 clear_instructions(p);
928
929 /* Writes to dest are evenly split between OWords */
930 brw_ADD(p, g0, g0, g0);
931 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
932 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, type);
933 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, type);
934 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, type);
935 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
936 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8);
937 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
938
939 EXPECT_TRUE(validate(p));
940
941 clear_instructions(p);
942
943 /* Writes to dest are uneven between OWords */
944 brw_ADD(p, g0, g0, g0);
945 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
946 brw_eu_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 10);
947 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, type);
948 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, type);
949 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
950 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
951 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
952 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, type);
953 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
954 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2);
955 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
956
957 EXPECT_TRUE(validate(p));
958 }
959
TEST_P(validation_test,dst_elements_must_be_evenly_split_between_registers)960 TEST_P(validation_test, dst_elements_must_be_evenly_split_between_registers)
961 {
962 brw_ADD(p, g0, g0, g0);
963 brw_eu_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
964
965 if (devinfo.verx10 < 125) {
966 EXPECT_TRUE(validate(p));
967 } else {
968 EXPECT_FALSE(validate(p));
969 }
970
971 clear_instructions(p);
972
973 brw_ADD(p, g0, g0, g0);
974 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
975
976 EXPECT_TRUE(validate(p));
977
978 clear_instructions(p);
979
980 gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
981
982 EXPECT_TRUE(validate(p));
983
984 clear_instructions(p);
985
986 gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
987 brw_eu_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
988
989 EXPECT_FALSE(validate(p));
990 }
991
TEST_P(validation_test,two_src_two_dst_source_offsets_must_be_same)992 TEST_P(validation_test, two_src_two_dst_source_offsets_must_be_same)
993 {
994 brw_ADD(p, g0, g0, g0);
995 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
996 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4);
997 brw_eu_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 16);
998 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2);
999 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
1000 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1001 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1002 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
1003 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1004
1005 if (devinfo.verx10 >= 125) {
1006 EXPECT_FALSE(validate(p));
1007 } else {
1008 EXPECT_TRUE(validate(p));
1009 }
1010
1011 clear_instructions(p);
1012
1013 brw_ADD(p, g0, g0, g0);
1014 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
1015 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4);
1016 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1017 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
1018 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1019 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_8);
1020 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2);
1021 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1022
1023 if (devinfo.verx10 >= 125)
1024 EXPECT_FALSE(validate(p));
1025 else
1026 EXPECT_TRUE(validate(p));
1027 }
1028
TEST_P(validation_test,two_src_two_dst_each_dst_must_be_derived_from_one_src)1029 TEST_P(validation_test, two_src_two_dst_each_dst_must_be_derived_from_one_src)
1030 {
1031 brw_MOV(p, g0, g0);
1032 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1033 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1034 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1035 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1036 brw_eu_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
1037 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1038 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1039 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1040
1041 EXPECT_TRUE(validate(p));
1042
1043 clear_instructions(p);
1044
1045 brw_MOV(p, g0, g0);
1046 brw_eu_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
1047 brw_eu_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
1048 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2);
1049 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
1050 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1051
1052 if (devinfo.verx10 >= 125) {
1053 EXPECT_FALSE(validate(p));
1054 } else {
1055 EXPECT_TRUE(validate(p));
1056 }
1057 }
1058
TEST_P(validation_test,one_src_two_dst)1059 TEST_P(validation_test, one_src_two_dst)
1060 {
1061 struct brw_reg g0_0 = brw_vec1_grf(0, 0);
1062
1063 brw_ADD(p, g0, g0_0, g0_0);
1064 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1065
1066 EXPECT_TRUE(validate(p));
1067
1068 clear_instructions(p);
1069
1070 brw_ADD(p, g0, g0, g0);
1071 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1072 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1073 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1074 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1075
1076 EXPECT_TRUE(validate(p));
1077
1078 clear_instructions(p);
1079
1080 brw_ADD(p, g0, g0, g0);
1081 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1082 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1083 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1084 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1085
1086 EXPECT_TRUE(validate(p));
1087
1088 clear_instructions(p);
1089
1090 brw_ADD(p, g0, g0, g0);
1091 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1092 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1093 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1094 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1095
1096 EXPECT_TRUE(validate(p));
1097
1098 clear_instructions(p);
1099
1100 brw_ADD(p, g0, g0, g0);
1101 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1102 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1103 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1104 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1105 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1106 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
1107 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1);
1108 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1109
1110 EXPECT_TRUE(validate(p));
1111
1112 clear_instructions(p);
1113
1114 brw_ADD(p, g0, g0, g0);
1115 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1116 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1117 brw_eu_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1118 brw_eu_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1119 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
1120 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
1121 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1122 brw_eu_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1123
1124 EXPECT_TRUE(validate(p));
1125 }
1126
TEST_P(validation_test,packed_byte_destination)1127 TEST_P(validation_test, packed_byte_destination)
1128 {
1129 static const struct {
1130 enum brw_reg_type dst_type;
1131 enum brw_reg_type src_type;
1132 bool neg, abs, sat;
1133 bool expected_result;
1134 } move[] = {
1135 { BRW_TYPE_UB, BRW_TYPE_UB, 0, 0, 0, true },
1136 { BRW_TYPE_B , BRW_TYPE_B , 0, 0, 0, true },
1137 { BRW_TYPE_UB, BRW_TYPE_B , 0, 0, 0, true },
1138 { BRW_TYPE_B , BRW_TYPE_UB, 0, 0, 0, true },
1139
1140 { BRW_TYPE_UB, BRW_TYPE_UB, 1, 0, 0, false },
1141 { BRW_TYPE_B , BRW_TYPE_B , 1, 0, 0, false },
1142 { BRW_TYPE_UB, BRW_TYPE_B , 1, 0, 0, false },
1143 { BRW_TYPE_B , BRW_TYPE_UB, 1, 0, 0, false },
1144
1145 { BRW_TYPE_UB, BRW_TYPE_UB, 0, 1, 0, false },
1146 { BRW_TYPE_B , BRW_TYPE_B , 0, 1, 0, false },
1147 { BRW_TYPE_UB, BRW_TYPE_B , 0, 1, 0, false },
1148 { BRW_TYPE_B , BRW_TYPE_UB, 0, 1, 0, false },
1149
1150 { BRW_TYPE_UB, BRW_TYPE_UB, 0, 0, 1, false },
1151 { BRW_TYPE_B , BRW_TYPE_B , 0, 0, 1, false },
1152 { BRW_TYPE_UB, BRW_TYPE_B , 0, 0, 1, false },
1153 { BRW_TYPE_B , BRW_TYPE_UB, 0, 0, 1, false },
1154
1155 { BRW_TYPE_UB, BRW_TYPE_UW, 0, 0, 0, false },
1156 { BRW_TYPE_B , BRW_TYPE_W , 0, 0, 0, false },
1157 { BRW_TYPE_UB, BRW_TYPE_UD, 0, 0, 0, false },
1158 { BRW_TYPE_B , BRW_TYPE_D , 0, 0, 0, false },
1159 };
1160
1161 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
1162 brw_MOV(p, retype(g0, move[i].dst_type), retype(g0, move[i].src_type));
1163 brw_eu_inst_set_src0_negate(&devinfo, last_inst, move[i].neg);
1164 brw_eu_inst_set_src0_abs(&devinfo, last_inst, move[i].abs);
1165 brw_eu_inst_set_saturate(&devinfo, last_inst, move[i].sat);
1166
1167 EXPECT_EQ(move[i].expected_result, validate(p));
1168
1169 clear_instructions(p);
1170 }
1171
1172 brw_SEL(p, retype(g0, BRW_TYPE_UB),
1173 retype(g0, BRW_TYPE_UB),
1174 retype(g0, BRW_TYPE_UB));
1175 brw_eu_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1176
1177 EXPECT_FALSE(validate(p));
1178
1179 clear_instructions(p);
1180
1181 brw_SEL(p, retype(g0, BRW_TYPE_B),
1182 retype(g0, BRW_TYPE_B),
1183 retype(g0, BRW_TYPE_B));
1184 brw_eu_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1185
1186 EXPECT_FALSE(validate(p));
1187 }
1188
TEST_P(validation_test,byte_destination_relaxed_alignment)1189 TEST_P(validation_test, byte_destination_relaxed_alignment)
1190 {
1191 brw_SEL(p, retype(g0, BRW_TYPE_B),
1192 retype(g0, BRW_TYPE_W),
1193 retype(g0, BRW_TYPE_W));
1194 brw_eu_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1195 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1196
1197 EXPECT_TRUE(validate(p));
1198
1199 clear_instructions(p);
1200
1201 brw_SEL(p, retype(g0, BRW_TYPE_B),
1202 retype(g0, BRW_TYPE_W),
1203 retype(g0, BRW_TYPE_W));
1204 brw_eu_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1205 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1206 brw_eu_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 1);
1207
1208 EXPECT_TRUE(validate(p));
1209 }
1210
TEST_P(validation_test,byte_64bit_conversion)1211 TEST_P(validation_test, byte_64bit_conversion)
1212 {
1213 static const struct {
1214 enum brw_reg_type dst_type;
1215 enum brw_reg_type src_type;
1216 unsigned dst_stride;
1217 bool expected_result;
1218 } inst[] = {
1219 #define INST(dst_type, src_type, dst_stride, expected_result) \
1220 { \
1221 BRW_TYPE_##dst_type, \
1222 BRW_TYPE_##src_type, \
1223 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1224 expected_result, \
1225 }
1226
1227 INST(B, Q, 1, false),
1228 INST(B, UQ, 1, false),
1229 INST(B, DF, 1, false),
1230 INST(UB, Q, 1, false),
1231 INST(UB, UQ, 1, false),
1232 INST(UB, DF, 1, false),
1233
1234 INST(B, Q, 2, false),
1235 INST(B, UQ, 2, false),
1236 INST(B , DF, 2, false),
1237 INST(UB, Q, 2, false),
1238 INST(UB, UQ, 2, false),
1239 INST(UB, DF, 2, false),
1240
1241 INST(B, Q, 4, false),
1242 INST(B, UQ, 4, false),
1243 INST(B, DF, 4, false),
1244 INST(UB, Q, 4, false),
1245 INST(UB, UQ, 4, false),
1246 INST(UB, DF, 4, false),
1247
1248 #undef INST
1249 };
1250
1251 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1252 if (!devinfo.has_64bit_float &&
1253 inst[i].src_type == BRW_TYPE_DF)
1254 continue;
1255
1256 if (!devinfo.has_64bit_int &&
1257 (inst[i].src_type == BRW_TYPE_Q ||
1258 inst[i].src_type == BRW_TYPE_UQ))
1259 continue;
1260
1261 brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1262 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1263 EXPECT_EQ(inst[i].expected_result, validate(p));
1264
1265 clear_instructions(p);
1266 }
1267 }
1268
TEST_P(validation_test,half_float_conversion)1269 TEST_P(validation_test, half_float_conversion)
1270 {
1271 static const struct {
1272 enum brw_reg_type dst_type;
1273 enum brw_reg_type src_type;
1274 unsigned dst_stride;
1275 unsigned dst_subnr;
1276 bool expected_result_gfx9;
1277 bool expected_result_gfx125;
1278 } inst[] = {
1279 #define INST(dst_type, src_type, dst_stride, dst_subnr, \
1280 expected_result_gfx9, \
1281 expected_result_gfx125) \
1282 { \
1283 BRW_TYPE_##dst_type, \
1284 BRW_TYPE_##src_type, \
1285 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1286 dst_subnr, \
1287 expected_result_gfx9, \
1288 expected_result_gfx125, \
1289 }
1290
1291 /* MOV to half-float destination */
1292 INST(HF, B, 1, 0, false, false), /* 0 */
1293 INST(HF, W, 1, 0, false, false),
1294 INST(HF, HF, 1, 0, true, true),
1295 INST(HF, HF, 1, 2, true, false),
1296 INST(HF, D, 1, 0, false, false),
1297 INST(HF, F, 1, 0, true, false),
1298 INST(HF, Q, 1, 0, false, false),
1299 INST(HF, B, 2, 0, true, false),
1300 INST(HF, B, 2, 2, false, false),
1301 INST(HF, W, 2, 0, true, false),
1302 INST(HF, W, 2, 2, false, false), /* 10 */
1303 INST(HF, HF, 2, 0, true, false),
1304 INST(HF, HF, 2, 2, true, false),
1305 INST(HF, D, 2, 0, true, true),
1306 INST(HF, D, 2, 2, false, false),
1307 INST(HF, F, 2, 0, true, true),
1308 INST(HF, F, 2, 2, true, false),
1309 INST(HF, Q, 2, 0, false, false),
1310 INST(HF, DF, 2, 0, false, false),
1311 INST(HF, B, 4, 0, false, false),
1312 INST(HF, W, 4, 0, false, false), /* 20 */
1313 INST(HF, HF, 4, 0, true, false),
1314 INST(HF, HF, 4, 2, true, false),
1315 INST(HF, D, 4, 0, false, false),
1316 INST(HF, F, 4, 0, false, false),
1317 INST(HF, Q, 4, 0, false, false),
1318 INST(HF, DF, 4, 0, false, false),
1319
1320 /* MOV from half-float source */
1321 INST( B, HF, 1, 0, false, false),
1322 INST( W, HF, 1, 0, false, false),
1323 INST( D, HF, 1, 0, true, true),
1324 INST( D, HF, 1, 4, true, true), /* 30 */
1325 INST( F, HF, 1, 0, true, false),
1326 INST( F, HF, 1, 4, true, false),
1327 INST( Q, HF, 1, 0, false, false),
1328 INST(DF, HF, 1, 0, false, false),
1329 INST( B, HF, 2, 0, false, false),
1330 INST( W, HF, 2, 0, true, true),
1331 INST( W, HF, 2, 2, false, false),
1332 INST( D, HF, 2, 0, false, false),
1333 INST( F, HF, 2, 0, true, false),
1334 INST( B, HF, 4, 0, true, true), /* 40 */
1335 INST( B, HF, 4, 1, false, false),
1336 INST( W, HF, 4, 0, false, false),
1337
1338 #undef INST
1339 };
1340
1341 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1342 if (!devinfo.has_64bit_float &&
1343 (inst[i].dst_type == BRW_TYPE_DF ||
1344 inst[i].src_type == BRW_TYPE_DF))
1345 continue;
1346
1347 if (!devinfo.has_64bit_int &&
1348 (inst[i].dst_type == BRW_TYPE_Q ||
1349 inst[i].dst_type == BRW_TYPE_UQ ||
1350 inst[i].src_type == BRW_TYPE_Q ||
1351 inst[i].src_type == BRW_TYPE_UQ))
1352 continue;
1353
1354 brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1355
1356 brw_eu_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
1357
1358 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1359 brw_eu_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1360
1361 if (inst[i].src_type == BRW_TYPE_B) {
1362 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1363 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
1364 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1365 } else {
1366 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1367 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1368 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1369 }
1370
1371 if (devinfo.verx10 >= 125) {
1372 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p)) <<
1373 "Failing test is: " << i;
1374 } else {
1375 EXPECT_EQ(inst[i].expected_result_gfx9, validate(p)) <<
1376 "Failing test is: " << i;
1377 }
1378
1379 clear_instructions(p);
1380 }
1381 }
1382
TEST_P(validation_test,mixed_float_source_indirect_addressing)1383 TEST_P(validation_test, mixed_float_source_indirect_addressing)
1384 {
1385 static const struct {
1386 enum brw_reg_type dst_type;
1387 enum brw_reg_type src0_type;
1388 enum brw_reg_type src1_type;
1389 unsigned dst_stride;
1390 bool dst_indirect;
1391 bool src0_indirect;
1392 bool expected_result;
1393 bool gfx125_expected_result;
1394 } inst[] = {
1395 #define INST(dst_type, src0_type, src1_type, \
1396 dst_stride, dst_indirect, src0_indirect, expected_result, \
1397 gfx125_expected_result) \
1398 { \
1399 BRW_TYPE_##dst_type, \
1400 BRW_TYPE_##src0_type, \
1401 BRW_TYPE_##src1_type, \
1402 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1403 dst_indirect, \
1404 src0_indirect, \
1405 expected_result, \
1406 gfx125_expected_result, \
1407 }
1408
1409 /* Source and dest are mixed float: indirect src addressing not allowed */
1410 INST(HF, F, F, 2, false, false, true, true),
1411 INST(HF, F, F, 2, true, false, true, true),
1412 INST(HF, F, F, 2, false, true, false, false),
1413 INST(HF, F, F, 2, true, true, false, false),
1414 INST( F, HF, F, 1, false, false, true, false),
1415 INST( F, HF, F, 1, true, false, true, false),
1416 INST( F, HF, F, 1, false, true, false, false),
1417 INST( F, HF, F, 1, true, true, false, false),
1418
1419 INST(HF, HF, F, 2, false, false, true, false),
1420 INST(HF, HF, F, 2, true, false, true, false),
1421 INST(HF, HF, F, 2, false, true, false, false),
1422 INST(HF, HF, F, 2, true, true, false, false),
1423 INST( F, F, HF, 1, false, false, true, false),
1424 INST( F, F, HF, 1, true, false, true, false),
1425 INST( F, F, HF, 1, false, true, false, false),
1426 INST( F, F, HF, 1, true, true, false, false),
1427
1428 #undef INST
1429 };
1430
1431 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1432 brw_ADD(p, retype(g0, inst[i].dst_type),
1433 retype(g0, inst[i].src0_type),
1434 retype(g0, inst[i].src1_type));
1435
1436 brw_eu_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_indirect);
1437 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1438 brw_eu_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src0_indirect);
1439
1440 if (devinfo.verx10 >= 125) {
1441 EXPECT_EQ(inst[i].gfx125_expected_result, validate(p));
1442 } else {
1443 EXPECT_EQ(inst[i].expected_result, validate(p));
1444 }
1445
1446 clear_instructions(p);
1447 }
1448 }
1449
TEST_P(validation_test,mixed_float_align1_simd16)1450 TEST_P(validation_test, mixed_float_align1_simd16)
1451 {
1452 static const struct {
1453 unsigned exec_size;
1454 enum brw_reg_type dst_type;
1455 enum brw_reg_type src0_type;
1456 enum brw_reg_type src1_type;
1457 unsigned dst_stride;
1458 bool expected_result;
1459 bool gfx125_expected_result;
1460 } inst[] = {
1461 #define INST(exec_size, dst_type, src0_type, src1_type, \
1462 dst_stride, expected_result, gfx125_expected_result) \
1463 { \
1464 BRW_EXECUTE_##exec_size, \
1465 BRW_TYPE_##dst_type, \
1466 BRW_TYPE_##src0_type, \
1467 BRW_TYPE_##src1_type, \
1468 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1469 expected_result, \
1470 gfx125_expected_result, \
1471 }
1472
1473 /* No SIMD16 in mixed mode when destination is packed f16 */
1474 INST( 8, HF, F, HF, 2, true, false),
1475 INST(16, HF, HF, F, 2, true, false),
1476 INST(16, HF, HF, F, 1, false, false),
1477 INST(16, HF, F, HF, 1, false, false),
1478
1479 /* No SIMD16 in mixed mode when destination is f32 */
1480 INST( 8, F, HF, F, 1, true, false),
1481 INST( 8, F, F, HF, 1, true, false),
1482 INST(16, F, HF, F, 1, false, false),
1483 INST(16, F, F, HF, 1, false, false),
1484
1485 #undef INST
1486 };
1487
1488 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1489 brw_ADD(p, retype(g0, inst[i].dst_type),
1490 retype(g0, inst[i].src0_type),
1491 retype(g0, inst[i].src1_type));
1492
1493 brw_eu_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1494
1495 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1496
1497 if (devinfo.verx10 >= 125) {
1498 EXPECT_EQ(inst[i].gfx125_expected_result, validate(p));
1499 } else {
1500 EXPECT_EQ(inst[i].expected_result, validate(p));
1501 }
1502
1503 clear_instructions(p);
1504 }
1505 }
1506
TEST_P(validation_test,mixed_float_align1_packed_fp16_dst_acc_read_offset_0)1507 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst_acc_read_offset_0)
1508 {
1509 static const struct {
1510 enum brw_reg_type dst_type;
1511 enum brw_reg_type src0_type;
1512 enum brw_reg_type src1_type;
1513 unsigned dst_stride;
1514 bool read_acc;
1515 unsigned subnr;
1516 bool expected_result_skl;
1517 bool expected_result_gfx125;
1518 } inst[] = {
1519 #define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr, \
1520 expected_result_skl, expected_result_gfx125) \
1521 { \
1522 BRW_TYPE_##dst_type, \
1523 BRW_TYPE_##src0_type, \
1524 BRW_TYPE_##src1_type, \
1525 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1526 read_acc, \
1527 subnr, \
1528 expected_result_skl, \
1529 expected_result_gfx125, \
1530 }
1531
1532 /* Destination is not packed */
1533 INST(HF, HF, F, 2, true, 0, true, false),
1534 INST(HF, HF, F, 2, true, 2, true, false),
1535 INST(HF, HF, F, 2, true, 4, true, false),
1536 INST(HF, HF, F, 2, true, 8, true, false),
1537 INST(HF, HF, F, 2, true, 16, true, false),
1538
1539 /* Destination is packed, we don't read acc */
1540 INST(HF, HF, F, 1, false, 0, true, false),
1541 INST(HF, HF, F, 1, false, 2, true, false),
1542 INST(HF, HF, F, 1, false, 4, true, false),
1543 INST(HF, HF, F, 1, false, 8, true, false),
1544 INST(HF, HF, F, 1, false, 16, true, false),
1545
1546 /* Destination is packed, we read acc */
1547 INST(HF, HF, F, 1, true, 0, false, false),
1548 INST(HF, HF, F, 1, true, 2, false, false),
1549 INST(HF, HF, F, 1, true, 4, false, false),
1550 INST(HF, HF, F, 1, true, 8, false, false),
1551 INST(HF, HF, F, 1, true, 16, false, false),
1552
1553 #undef INST
1554 };
1555
1556 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1557 brw_ADD(p, retype(g0, inst[i].dst_type),
1558 retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
1559 retype(g0, inst[i].src1_type));
1560
1561 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1562
1563 brw_eu_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr);
1564
1565 if (devinfo.verx10 >= 125)
1566 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1567 else
1568 EXPECT_EQ(inst[i].expected_result_skl, validate(p));
1569
1570 clear_instructions(p);
1571 }
1572 }
1573
TEST_P(validation_test,mixed_float_fp16_dest_with_acc)1574 TEST_P(validation_test, mixed_float_fp16_dest_with_acc)
1575 {
1576 static const struct {
1577 unsigned exec_size;
1578 unsigned opcode;
1579 enum brw_reg_type dst_type;
1580 enum brw_reg_type src0_type;
1581 enum brw_reg_type src1_type;
1582 unsigned dst_stride;
1583 bool read_acc;
1584 bool expected_result_skl;
1585 bool expected_result_gfx125;
1586 } inst[] = {
1587 #define INST(exec_size, opcode, dst_type, src0_type, src1_type, \
1588 dst_stride, read_acc, \
1589 expected_result_skl, expected_result_gfx125) \
1590 { \
1591 BRW_EXECUTE_##exec_size, \
1592 BRW_OPCODE_##opcode, \
1593 BRW_TYPE_##dst_type, \
1594 BRW_TYPE_##src0_type, \
1595 BRW_TYPE_##src1_type, \
1596 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1597 read_acc, \
1598 expected_result_skl, \
1599 expected_result_gfx125, \
1600 }
1601
1602 /* Packed fp16 dest with implicit acc needs hstride=2 */
1603 INST(8, MAC, HF, HF, F, 1, false, false, false),
1604 INST(8, MAC, HF, HF, F, 2, false, true, false),
1605 INST(8, MAC, HF, F, HF, 1, false, false, false),
1606 INST(8, MAC, HF, F, HF, 2, false, true, false),
1607
1608 /* Packed fp16 dest with explicit acc needs hstride=2 */
1609 INST(8, ADD, HF, HF, F, 1, true, false, false),
1610 INST(8, ADD, HF, HF, F, 2, true, true, false),
1611 INST(8, ADD, HF, F, HF, 1, true, false, false),
1612 INST(8, ADD, HF, F, HF, 2, true, true, false),
1613
1614 /* If destination is not fp16, restriction doesn't apply */
1615 INST(8, MAC, F, HF, F, 1, false, true, false),
1616 INST(8, MAC, F, HF, F, 2, false, true, false),
1617
1618 /* If there is no implicit/explicit acc, restriction doesn't apply */
1619 INST(8, ADD, HF, HF, F, 1, false, true, false),
1620 INST(8, ADD, HF, HF, F, 2, false, true, false),
1621 INST(8, ADD, HF, F, HF, 1, false, true, false),
1622 INST(8, ADD, HF, F, HF, 2, false, true, false),
1623 INST(8, ADD, F, HF, F, 1, false, true, false),
1624 INST(8, ADD, F, HF, F, 2, false, true, false),
1625
1626 #undef INST
1627 };
1628
1629 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1630 if (inst[i].opcode == BRW_OPCODE_MAC) {
1631 brw_MAC(p, retype(g0, inst[i].dst_type),
1632 retype(g0, inst[i].src0_type),
1633 retype(g0, inst[i].src1_type));
1634 } else {
1635 assert(inst[i].opcode == BRW_OPCODE_ADD);
1636 brw_ADD(p, retype(g0, inst[i].dst_type),
1637 retype(inst[i].read_acc ? acc0: g0, inst[i].src0_type),
1638 retype(g0, inst[i].src1_type));
1639 }
1640
1641 brw_eu_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1642
1643 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1644
1645 if (devinfo.verx10 >= 125)
1646 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1647 else
1648 EXPECT_EQ(inst[i].expected_result_skl, validate(p));
1649
1650 clear_instructions(p);
1651 }
1652 }
1653
TEST_P(validation_test,mixed_float_align1_math_strided_fp16_inputs)1654 TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs)
1655 {
1656 static const struct {
1657 enum brw_reg_type dst_type;
1658 enum brw_reg_type src0_type;
1659 enum brw_reg_type src1_type;
1660 unsigned dst_stride;
1661 unsigned src0_stride;
1662 unsigned src1_stride;
1663 bool expected_result;
1664 bool expected_result_gfx125;
1665 } inst[] = {
1666 #define INST(dst_type, src0_type, src1_type, \
1667 dst_stride, src0_stride, src1_stride, expected_result, \
1668 expected_result_125) \
1669 { \
1670 BRW_TYPE_##dst_type, \
1671 BRW_TYPE_##src0_type, \
1672 BRW_TYPE_##src1_type, \
1673 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1674 BRW_HORIZONTAL_STRIDE_##src0_stride, \
1675 BRW_HORIZONTAL_STRIDE_##src1_stride, \
1676 expected_result, \
1677 expected_result_125, \
1678 }
1679
1680 INST(HF, HF, F, 2, 2, 1, true, false),
1681 INST(HF, F, HF, 2, 1, 2, true, false),
1682 INST(HF, F, HF, 1, 1, 2, true, false),
1683 INST(HF, F, HF, 2, 1, 1, false, false),
1684 INST(HF, HF, F, 2, 1, 1, false, false),
1685 INST(HF, HF, F, 1, 1, 1, false, false),
1686 INST(HF, HF, F, 2, 1, 1, false, false),
1687 INST( F, HF, F, 1, 1, 1, false, false),
1688 INST( F, F, HF, 1, 1, 2, true, false),
1689 INST( F, HF, HF, 1, 2, 1, false, false),
1690 INST( F, HF, HF, 1, 2, 2, true, false),
1691
1692 #undef INST
1693 };
1694
1695 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1696 gfx6_math(p, retype(g0, inst[i].dst_type),
1697 BRW_MATH_FUNCTION_POW,
1698 retype(g0, inst[i].src0_type),
1699 retype(g0, inst[i].src1_type));
1700
1701 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1702
1703 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1704 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1705 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride);
1706
1707 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1708 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
1709 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride);
1710
1711 if (devinfo.verx10 >= 125)
1712 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1713 else
1714 EXPECT_EQ(inst[i].expected_result, validate(p));
1715
1716 clear_instructions(p);
1717 }
1718 }
1719
TEST_P(validation_test,mixed_float_align1_packed_fp16_dst)1720 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst)
1721 {
1722 static const struct {
1723 unsigned exec_size;
1724 enum brw_reg_type dst_type;
1725 enum brw_reg_type src0_type;
1726 enum brw_reg_type src1_type;
1727 unsigned dst_stride;
1728 unsigned dst_subnr;
1729 bool expected_result_skl;
1730 bool expected_result_gfx125;
1731 } inst[] = {
1732 #define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, dst_subnr, \
1733 expected_result_skl, expected_result_gfx125) \
1734 { \
1735 BRW_EXECUTE_##exec_size, \
1736 BRW_TYPE_##dst_type, \
1737 BRW_TYPE_##src0_type, \
1738 BRW_TYPE_##src1_type, \
1739 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1740 dst_subnr, \
1741 expected_result_skl, \
1742 expected_result_gfx125 \
1743 }
1744
1745 /* SIMD8 packed fp16 dst won't cross oword boundaries if region is
1746 * oword-aligned
1747 */
1748 INST( 8, HF, HF, F, 1, 0, true, false),
1749 INST( 8, HF, HF, F, 1, 2, false, false),
1750 INST( 8, HF, HF, F, 1, 4, false, false),
1751 INST( 8, HF, HF, F, 1, 8, false, false),
1752 INST( 8, HF, HF, F, 1, 16, true, false),
1753
1754 /* SIMD16 packed fp16 always crosses oword boundaries */
1755 INST(16, HF, HF, F, 1, 0, false, false),
1756 INST(16, HF, HF, F, 1, 2, false, false),
1757 INST(16, HF, HF, F, 1, 4, false, false),
1758 INST(16, HF, HF, F, 1, 8, false, false),
1759 INST(16, HF, HF, F, 1, 16, false, false),
1760
1761 /* If destination is not packed (or not fp16) we can cross oword
1762 * boundaries
1763 */
1764 INST( 8, HF, HF, F, 2, 0, true, false),
1765 INST( 8, F, HF, F, 1, 0, true, false),
1766
1767 #undef INST
1768 };
1769
1770 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1771 brw_ADD(p, retype(g0, inst[i].dst_type),
1772 retype(g0, inst[i].src0_type),
1773 retype(g0, inst[i].src1_type));
1774
1775 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1776 brw_eu_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1777
1778 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1779 brw_eu_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1780 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1781
1782 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1783 brw_eu_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
1784 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1785
1786 brw_eu_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1787
1788 if (devinfo.verx10 >= 125)
1789 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1790 else
1791 EXPECT_EQ(inst[i].expected_result_skl, validate(p));
1792
1793 clear_instructions(p);
1794 }
1795 }
1796
TEST_P(validation_test,mixed_float_align16_packed_data)1797 TEST_P(validation_test, mixed_float_align16_packed_data)
1798 {
1799 static const struct {
1800 enum brw_reg_type dst_type;
1801 enum brw_reg_type src0_type;
1802 enum brw_reg_type src1_type;
1803 unsigned src0_vstride;
1804 unsigned src1_vstride;
1805 bool expected_result;
1806 } inst[] = {
1807 #define INST(dst_type, src0_type, src1_type, \
1808 src0_vstride, src1_vstride, expected_result) \
1809 { \
1810 BRW_TYPE_##dst_type, \
1811 BRW_TYPE_##src0_type, \
1812 BRW_TYPE_##src1_type, \
1813 BRW_VERTICAL_STRIDE_##src0_vstride, \
1814 BRW_VERTICAL_STRIDE_##src1_vstride, \
1815 expected_result, \
1816 }
1817
1818 /* We only test with F destination because there is a restriction
1819 * by which F->HF conversions need to be DWord aligned but Align16 also
1820 * requires that destination horizontal stride is 1.
1821 */
1822 INST(F, F, HF, 4, 4, true),
1823 INST(F, F, HF, 2, 4, false),
1824 INST(F, F, HF, 4, 2, false),
1825 INST(F, F, HF, 0, 4, false),
1826 INST(F, F, HF, 4, 0, false),
1827 INST(F, HF, F, 4, 4, true),
1828 INST(F, HF, F, 4, 2, false),
1829 INST(F, HF, F, 2, 4, false),
1830 INST(F, HF, F, 0, 4, false),
1831 INST(F, HF, F, 4, 0, false),
1832
1833 #undef INST
1834 };
1835
1836 if (devinfo.ver >= 11)
1837 return;
1838
1839 brw_set_default_access_mode(p, BRW_ALIGN_16);
1840
1841 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1842 brw_ADD(p, retype(g0, inst[i].dst_type),
1843 retype(g0, inst[i].src0_type),
1844 retype(g0, inst[i].src1_type));
1845
1846 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
1847 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
1848
1849 EXPECT_EQ(inst[i].expected_result, validate(p));
1850
1851 clear_instructions(p);
1852 }
1853 }
1854
TEST_P(validation_test,mixed_float_align16_no_simd16)1855 TEST_P(validation_test, mixed_float_align16_no_simd16)
1856 {
1857 static const struct {
1858 unsigned exec_size;
1859 enum brw_reg_type dst_type;
1860 enum brw_reg_type src0_type;
1861 enum brw_reg_type src1_type;
1862 bool expected_result;
1863 } inst[] = {
1864 #define INST(exec_size, dst_type, src0_type, src1_type, expected_result) \
1865 { \
1866 BRW_EXECUTE_##exec_size, \
1867 BRW_TYPE_##dst_type, \
1868 BRW_TYPE_##src0_type, \
1869 BRW_TYPE_##src1_type, \
1870 expected_result, \
1871 }
1872
1873 /* We only test with F destination because there is a restriction
1874 * by which F->HF conversions need to be DWord aligned but Align16 also
1875 * requires that destination horizontal stride is 1.
1876 */
1877 INST( 8, F, F, HF, true),
1878 INST( 8, F, HF, F, true),
1879 INST( 8, F, F, HF, true),
1880 INST(16, F, F, HF, false),
1881 INST(16, F, HF, F, false),
1882 INST(16, F, F, HF, false),
1883
1884 #undef INST
1885 };
1886
1887 if (devinfo.ver >= 11)
1888 return;
1889
1890 brw_set_default_access_mode(p, BRW_ALIGN_16);
1891
1892 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1893 brw_ADD(p, retype(g0, inst[i].dst_type),
1894 retype(g0, inst[i].src0_type),
1895 retype(g0, inst[i].src1_type));
1896
1897 brw_eu_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1898
1899 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1900 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1901
1902 EXPECT_EQ(inst[i].expected_result, validate(p));
1903
1904 clear_instructions(p);
1905 }
1906 }
1907
TEST_P(validation_test,mixed_float_align16_no_acc_read)1908 TEST_P(validation_test, mixed_float_align16_no_acc_read)
1909 {
1910 static const struct {
1911 enum brw_reg_type dst_type;
1912 enum brw_reg_type src0_type;
1913 enum brw_reg_type src1_type;
1914 bool read_acc;
1915 bool expected_result;
1916 } inst[] = {
1917 #define INST(dst_type, src0_type, src1_type, read_acc, expected_result) \
1918 { \
1919 BRW_TYPE_##dst_type, \
1920 BRW_TYPE_##src0_type, \
1921 BRW_TYPE_##src1_type, \
1922 read_acc, \
1923 expected_result, \
1924 }
1925
1926 /* We only test with F destination because there is a restriction
1927 * by which F->HF conversions need to be DWord aligned but Align16 also
1928 * requires that destination horizontal stride is 1.
1929 */
1930 INST( F, F, HF, false, true),
1931 INST( F, F, HF, true, false),
1932 INST( F, HF, F, false, true),
1933 INST( F, HF, F, true, false),
1934
1935 #undef INST
1936 };
1937
1938 if (devinfo.ver >= 11)
1939 return;
1940
1941 brw_set_default_access_mode(p, BRW_ALIGN_16);
1942
1943 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1944 brw_ADD(p, retype(g0, inst[i].dst_type),
1945 retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
1946 retype(g0, inst[i].src1_type));
1947
1948 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1949 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1950
1951 EXPECT_EQ(inst[i].expected_result, validate(p));
1952
1953 clear_instructions(p);
1954 }
1955 }
1956
TEST_P(validation_test,mixed_float_align16_math_packed_format)1957 TEST_P(validation_test, mixed_float_align16_math_packed_format)
1958 {
1959 static const struct {
1960 enum brw_reg_type dst_type;
1961 enum brw_reg_type src0_type;
1962 enum brw_reg_type src1_type;
1963 unsigned src0_vstride;
1964 unsigned src1_vstride;
1965 bool expected_result;
1966 } inst[] = {
1967 #define INST(dst_type, src0_type, src1_type, \
1968 src0_vstride, src1_vstride, expected_result) \
1969 { \
1970 BRW_TYPE_##dst_type, \
1971 BRW_TYPE_##src0_type, \
1972 BRW_TYPE_##src1_type, \
1973 BRW_VERTICAL_STRIDE_##src0_vstride, \
1974 BRW_VERTICAL_STRIDE_##src1_vstride, \
1975 expected_result, \
1976 }
1977
1978 /* We only test with F destination because there is a restriction
1979 * by which F->HF conversions need to be DWord aligned but Align16 also
1980 * requires that destination horizontal stride is 1.
1981 */
1982 INST( F, HF, F, 4, 0, false),
1983 INST( F, HF, HF, 4, 4, true),
1984 INST( F, F, HF, 4, 0, false),
1985 INST( F, F, HF, 2, 4, false),
1986 INST( F, F, HF, 4, 2, false),
1987 INST( F, HF, HF, 0, 4, false),
1988
1989 #undef INST
1990 };
1991
1992 /* Align16 Math for mixed float mode is not supported in Gfx11+ */
1993 if (devinfo.ver >= 11)
1994 return;
1995
1996 brw_set_default_access_mode(p, BRW_ALIGN_16);
1997
1998 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1999 gfx6_math(p, retype(g0, inst[i].dst_type),
2000 BRW_MATH_FUNCTION_POW,
2001 retype(g0, inst[i].src0_type),
2002 retype(g0, inst[i].src1_type));
2003
2004 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
2005 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
2006
2007 EXPECT_EQ(inst[i].expected_result, validate(p));
2008
2009 clear_instructions(p);
2010 }
2011 }
2012
TEST_P(validation_test,vector_immediate_destination_alignment)2013 TEST_P(validation_test, vector_immediate_destination_alignment)
2014 {
2015 static const struct {
2016 enum brw_reg_type dst_type;
2017 enum brw_reg_type src_type;
2018 unsigned subnr;
2019 unsigned exec_size;
2020 bool expected_result;
2021 } move[] = {
2022 { BRW_TYPE_F, BRW_TYPE_VF, 0, BRW_EXECUTE_4, true },
2023 { BRW_TYPE_F, BRW_TYPE_VF, 16, BRW_EXECUTE_4, true },
2024 { BRW_TYPE_F, BRW_TYPE_VF, 1, BRW_EXECUTE_4, false },
2025
2026 { BRW_TYPE_W, BRW_TYPE_V, 0, BRW_EXECUTE_8, true },
2027 { BRW_TYPE_W, BRW_TYPE_V, 16, BRW_EXECUTE_8, true },
2028 { BRW_TYPE_W, BRW_TYPE_V, 1, BRW_EXECUTE_8, false },
2029
2030 { BRW_TYPE_W, BRW_TYPE_UV, 0, BRW_EXECUTE_8, true },
2031 { BRW_TYPE_W, BRW_TYPE_UV, 16, BRW_EXECUTE_8, true },
2032 { BRW_TYPE_W, BRW_TYPE_UV, 1, BRW_EXECUTE_8, false },
2033 };
2034
2035 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
2036 brw_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2037 brw_eu_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, move[i].subnr);
2038 brw_eu_inst_set_exec_size(&devinfo, last_inst, move[i].exec_size);
2039
2040 EXPECT_EQ(move[i].expected_result, validate(p));
2041
2042 clear_instructions(p);
2043 }
2044 }
2045
TEST_P(validation_test,vector_immediate_destination_stride)2046 TEST_P(validation_test, vector_immediate_destination_stride)
2047 {
2048 static const struct {
2049 enum brw_reg_type dst_type;
2050 enum brw_reg_type src_type;
2051 unsigned stride;
2052 bool expected_result;
2053 } move[] = {
2054 { BRW_TYPE_F, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_1, true },
2055 { BRW_TYPE_F, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, false },
2056 { BRW_TYPE_D, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_1, true },
2057 { BRW_TYPE_D, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, false },
2058 { BRW_TYPE_W, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, true },
2059 { BRW_TYPE_B, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_4, true },
2060
2061 { BRW_TYPE_W, BRW_TYPE_V, BRW_HORIZONTAL_STRIDE_1, true },
2062 { BRW_TYPE_W, BRW_TYPE_V, BRW_HORIZONTAL_STRIDE_2, false },
2063 { BRW_TYPE_W, BRW_TYPE_V, BRW_HORIZONTAL_STRIDE_4, false },
2064 { BRW_TYPE_B, BRW_TYPE_V, BRW_HORIZONTAL_STRIDE_2, true },
2065
2066 { BRW_TYPE_W, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_1, true },
2067 { BRW_TYPE_W, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_2, false },
2068 { BRW_TYPE_W, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_4, false },
2069 { BRW_TYPE_B, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_2, true },
2070 };
2071
2072 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
2073 brw_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2074 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, move[i].stride);
2075
2076 EXPECT_EQ(move[i].expected_result, validate(p));
2077
2078 clear_instructions(p);
2079 }
2080 }
2081
TEST_P(validation_test,qword_low_power_align1_regioning_restrictions)2082 TEST_P(validation_test, qword_low_power_align1_regioning_restrictions)
2083 {
2084 static const struct {
2085 enum opcode opcode;
2086 unsigned exec_size;
2087
2088 enum brw_reg_type dst_type;
2089 unsigned dst_subreg;
2090 unsigned dst_stride;
2091
2092 enum brw_reg_type src_type;
2093 unsigned src_subreg;
2094 unsigned src_vstride;
2095 unsigned src_width;
2096 unsigned src_hstride;
2097
2098 bool expected_result;
2099 } inst[] = {
2100 #define INST(opcode, exec_size, dst_type, dst_subreg, dst_stride, src_type, \
2101 src_subreg, src_vstride, src_width, src_hstride, expected_result) \
2102 { \
2103 BRW_OPCODE_##opcode, \
2104 BRW_EXECUTE_##exec_size, \
2105 BRW_TYPE_##dst_type, \
2106 dst_subreg, \
2107 BRW_HORIZONTAL_STRIDE_##dst_stride, \
2108 BRW_TYPE_##src_type, \
2109 src_subreg, \
2110 BRW_VERTICAL_STRIDE_##src_vstride, \
2111 BRW_WIDTH_##src_width, \
2112 BRW_HORIZONTAL_STRIDE_##src_hstride, \
2113 expected_result, \
2114 }
2115
2116 /* Some instruction that violate no restrictions, as a control */
2117 INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2118 INST(MOV, 4, Q, 0, 1, Q, 0, 4, 4, 1, true ),
2119 INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2120
2121 INST(MOV, 4, DF, 0, 1, F, 0, 8, 4, 2, true ),
2122 INST(MOV, 4, Q, 0, 1, D, 0, 8, 4, 2, true ),
2123 INST(MOV, 4, UQ, 0, 1, UD, 0, 8, 4, 2, true ),
2124
2125 INST(MOV, 4, F, 0, 2, DF, 0, 4, 4, 1, true ),
2126 INST(MOV, 4, D, 0, 2, Q, 0, 4, 4, 1, true ),
2127 INST(MOV, 4, UD, 0, 2, UQ, 0, 4, 4, 1, true ),
2128
2129 INST(MUL, 8, D, 0, 2, D, 0, 8, 4, 2, true ),
2130 INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2131
2132 /* Something with subreg nrs */
2133 INST(MOV, 2, DF, 8, 1, DF, 8, 2, 2, 1, true ),
2134 INST(MOV, 2, Q, 8, 1, Q, 8, 2, 2, 1, true ),
2135 INST(MOV, 2, UQ, 8, 1, UQ, 8, 2, 2, 1, true ),
2136
2137 INST(MUL, 2, D, 4, 2, D, 4, 4, 2, 2, true ),
2138 INST(MUL, 2, UD, 4, 2, UD, 4, 4, 2, 2, true ),
2139
2140 /* The PRMs say that for CHV, BXT:
2141 *
2142 * When source or destination datatype is 64b or operation is integer
2143 * DWord multiply, regioning in Align1 must follow these rules:
2144 *
2145 * 1. Source and Destination horizontal stride must be aligned to the
2146 * same qword.
2147 */
2148 INST(MOV, 4, DF, 0, 2, DF, 0, 4, 4, 1, false),
2149 INST(MOV, 4, Q, 0, 2, Q, 0, 4, 4, 1, false),
2150 INST(MOV, 4, UQ, 0, 2, UQ, 0, 4, 4, 1, false),
2151
2152 INST(MOV, 4, DF, 0, 2, F, 0, 8, 4, 2, false),
2153 INST(MOV, 4, Q, 0, 2, D, 0, 8, 4, 2, false),
2154 INST(MOV, 4, UQ, 0, 2, UD, 0, 8, 4, 2, false),
2155
2156 INST(MOV, 4, DF, 0, 2, F, 0, 4, 4, 1, false),
2157 INST(MOV, 4, Q, 0, 2, D, 0, 4, 4, 1, false),
2158 INST(MOV, 4, UQ, 0, 2, UD, 0, 4, 4, 1, false),
2159
2160 INST(MUL, 4, D, 0, 2, D, 0, 4, 4, 1, false),
2161 INST(MUL, 4, UD, 0, 2, UD, 0, 4, 4, 1, false),
2162
2163 INST(MUL, 4, D, 0, 1, D, 0, 8, 4, 2, false),
2164 INST(MUL, 4, UD, 0, 1, UD, 0, 8, 4, 2, false),
2165
2166 /* 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. */
2167 INST(MOV, 4, DF, 0, 1, DF, 0, 0, 2, 1, false),
2168 INST(MOV, 4, Q, 0, 1, Q, 0, 0, 2, 1, false),
2169 INST(MOV, 4, UQ, 0, 1, UQ, 0, 0, 2, 1, false),
2170
2171 INST(MOV, 4, DF, 0, 1, F, 0, 0, 2, 2, false),
2172 INST(MOV, 4, Q, 0, 1, D, 0, 0, 2, 2, false),
2173 INST(MOV, 4, UQ, 0, 1, UD, 0, 0, 2, 2, false),
2174
2175 INST(MOV, 8, F, 0, 2, DF, 0, 0, 2, 1, false),
2176 INST(MOV, 8, D, 0, 2, Q, 0, 0, 2, 1, false),
2177 INST(MOV, 8, UD, 0, 2, UQ, 0, 0, 2, 1, false),
2178
2179 INST(MUL, 8, D, 0, 2, D, 0, 0, 4, 2, false),
2180 INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2181
2182 INST(MUL, 8, D, 0, 2, D, 0, 0, 4, 2, false),
2183 INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2184
2185 /* 3. Source and Destination offset must be the same, except the case
2186 * of scalar source.
2187 */
2188 INST(MOV, 2, DF, 8, 1, DF, 0, 2, 2, 1, false),
2189 INST(MOV, 2, Q, 8, 1, Q, 0, 2, 2, 1, false),
2190 INST(MOV, 2, UQ, 8, 1, UQ, 0, 2, 2, 1, false),
2191
2192 INST(MOV, 2, DF, 0, 1, DF, 8, 2, 2, 1, false),
2193 INST(MOV, 2, Q, 0, 1, Q, 8, 2, 2, 1, false),
2194 INST(MOV, 2, UQ, 0, 1, UQ, 8, 2, 2, 1, false),
2195
2196 INST(MUL, 4, D, 4, 2, D, 0, 4, 2, 2, false),
2197 INST(MUL, 4, UD, 4, 2, UD, 0, 4, 2, 2, false),
2198
2199 INST(MUL, 4, D, 0, 2, D, 4, 4, 2, 2, false),
2200 INST(MUL, 4, UD, 0, 2, UD, 4, 4, 2, 2, false),
2201
2202 INST(MOV, 2, DF, 8, 1, DF, 0, 0, 1, 0, true ),
2203 INST(MOV, 2, Q, 8, 1, Q, 0, 0, 1, 0, true ),
2204 INST(MOV, 2, UQ, 8, 1, UQ, 0, 0, 1, 0, true ),
2205
2206 INST(MOV, 2, DF, 8, 1, F, 4, 0, 1, 0, true ),
2207 INST(MOV, 2, Q, 8, 1, D, 4, 0, 1, 0, true ),
2208 INST(MOV, 2, UQ, 8, 1, UD, 4, 0, 1, 0, true ),
2209
2210 INST(MUL, 4, D, 4, 1, D, 0, 0, 1, 0, true ),
2211 INST(MUL, 4, UD, 4, 1, UD, 0, 0, 1, 0, true ),
2212
2213 INST(MUL, 4, D, 0, 1, D, 4, 0, 1, 0, true ),
2214 INST(MUL, 4, UD, 0, 1, UD, 4, 0, 1, 0, true ),
2215
2216 #undef INST
2217 };
2218
2219 /* NoDDChk/NoDDClr does not exist on Gfx12+ */
2220 if (devinfo.ver >= 12)
2221 return;
2222
2223 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2224 if (!devinfo.has_64bit_float &&
2225 (inst[i].dst_type == BRW_TYPE_DF ||
2226 inst[i].src_type == BRW_TYPE_DF))
2227 continue;
2228
2229 if (!devinfo.has_64bit_int &&
2230 (inst[i].dst_type == BRW_TYPE_Q ||
2231 inst[i].dst_type == BRW_TYPE_UQ ||
2232 inst[i].src_type == BRW_TYPE_Q ||
2233 inst[i].src_type == BRW_TYPE_UQ))
2234 continue;
2235
2236 if (inst[i].opcode == BRW_OPCODE_MOV) {
2237 brw_MOV(p, retype(g0, inst[i].dst_type),
2238 retype(g0, inst[i].src_type));
2239 } else {
2240 assert(inst[i].opcode == BRW_OPCODE_MUL);
2241 brw_MUL(p, retype(g0, inst[i].dst_type),
2242 retype(g0, inst[i].src_type),
2243 retype(zero, inst[i].src_type));
2244 }
2245 brw_eu_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2246
2247 brw_eu_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subreg);
2248 brw_eu_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].src_subreg);
2249
2250 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2251
2252 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2253 brw_eu_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2254 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2255
2256 if (intel_device_info_is_9lp(&devinfo)) {
2257 EXPECT_EQ(inst[i].expected_result, validate(p));
2258 } else {
2259 EXPECT_TRUE(validate(p));
2260 }
2261
2262 clear_instructions(p);
2263 }
2264 }
2265
TEST_P(validation_test,qword_low_power_no_indirect_addressing)2266 TEST_P(validation_test, qword_low_power_no_indirect_addressing)
2267 {
2268 static const struct {
2269 enum opcode opcode;
2270 unsigned exec_size;
2271
2272 enum brw_reg_type dst_type;
2273 bool dst_is_indirect;
2274 unsigned dst_stride;
2275
2276 enum brw_reg_type src_type;
2277 bool src_is_indirect;
2278 unsigned src_vstride;
2279 unsigned src_width;
2280 unsigned src_hstride;
2281
2282 bool expected_result;
2283 } inst[] = {
2284 #define INST(opcode, exec_size, dst_type, dst_is_indirect, dst_stride, \
2285 src_type, src_is_indirect, src_vstride, src_width, src_hstride, \
2286 expected_result) \
2287 { \
2288 BRW_OPCODE_##opcode, \
2289 BRW_EXECUTE_##exec_size, \
2290 BRW_TYPE_##dst_type, \
2291 dst_is_indirect, \
2292 BRW_HORIZONTAL_STRIDE_##dst_stride, \
2293 BRW_TYPE_##src_type, \
2294 src_is_indirect, \
2295 BRW_VERTICAL_STRIDE_##src_vstride, \
2296 BRW_WIDTH_##src_width, \
2297 BRW_HORIZONTAL_STRIDE_##src_hstride, \
2298 expected_result, \
2299 }
2300
2301 /* Some instruction that violate no restrictions, as a control */
2302 INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2303 INST(MOV, 4, Q, 0, 1, Q, 0, 4, 4, 1, true ),
2304 INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2305
2306 INST(MUL, 8, D, 0, 2, D, 0, 8, 4, 2, true ),
2307 INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2308
2309 INST(MOV, 4, F, 1, 1, F, 0, 4, 4, 1, true ),
2310 INST(MOV, 4, F, 0, 1, F, 1, 4, 4, 1, true ),
2311 INST(MOV, 4, F, 1, 1, F, 1, 4, 4, 1, true ),
2312
2313 /* The PRMs say that for CHV, BXT:
2314 *
2315 * When source or destination datatype is 64b or operation is integer
2316 * DWord multiply, indirect addressing must not be used.
2317 */
2318 INST(MOV, 4, DF, 1, 1, DF, 0, 4, 4, 1, false),
2319 INST(MOV, 4, Q, 1, 1, Q, 0, 4, 4, 1, false),
2320 INST(MOV, 4, UQ, 1, 1, UQ, 0, 4, 4, 1, false),
2321
2322 INST(MOV, 4, DF, 0, 1, DF, 1, 4, 4, 1, false),
2323 INST(MOV, 4, Q, 0, 1, Q, 1, 4, 4, 1, false),
2324 INST(MOV, 4, UQ, 0, 1, UQ, 1, 4, 4, 1, false),
2325
2326 INST(MOV, 4, DF, 1, 1, F, 0, 8, 4, 2, false),
2327 INST(MOV, 4, Q, 1, 1, D, 0, 8, 4, 2, false),
2328 INST(MOV, 4, UQ, 1, 1, UD, 0, 8, 4, 2, false),
2329
2330 INST(MOV, 4, DF, 0, 1, F, 1, 8, 4, 2, false),
2331 INST(MOV, 4, Q, 0, 1, D, 1, 8, 4, 2, false),
2332 INST(MOV, 4, UQ, 0, 1, UD, 1, 8, 4, 2, false),
2333
2334 INST(MOV, 4, F, 1, 2, DF, 0, 4, 4, 1, false),
2335 INST(MOV, 4, D, 1, 2, Q, 0, 4, 4, 1, false),
2336 INST(MOV, 4, UD, 1, 2, UQ, 0, 4, 4, 1, false),
2337
2338 INST(MOV, 4, F, 0, 2, DF, 1, 4, 4, 1, false),
2339 INST(MOV, 4, D, 0, 2, Q, 1, 4, 4, 1, false),
2340 INST(MOV, 4, UD, 0, 2, UQ, 1, 4, 4, 1, false),
2341
2342 INST(MUL, 8, D, 1, 2, D, 0, 8, 4, 2, false),
2343 INST(MUL, 8, UD, 1, 2, UD, 0, 8, 4, 2, false),
2344
2345 INST(MUL, 8, D, 0, 2, D, 1, 8, 4, 2, false),
2346 INST(MUL, 8, UD, 0, 2, UD, 1, 8, 4, 2, false),
2347
2348 #undef INST
2349 };
2350
2351 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2352 if (!devinfo.has_64bit_float &&
2353 (inst[i].dst_type == BRW_TYPE_DF ||
2354 inst[i].src_type == BRW_TYPE_DF))
2355 continue;
2356
2357 if (!devinfo.has_64bit_int &&
2358 (inst[i].dst_type == BRW_TYPE_Q ||
2359 inst[i].dst_type == BRW_TYPE_UQ ||
2360 inst[i].src_type == BRW_TYPE_Q ||
2361 inst[i].src_type == BRW_TYPE_UQ))
2362 continue;
2363
2364 if (inst[i].opcode == BRW_OPCODE_MOV) {
2365 brw_MOV(p, retype(g0, inst[i].dst_type),
2366 retype(g0, inst[i].src_type));
2367 } else {
2368 assert(inst[i].opcode == BRW_OPCODE_MUL);
2369 brw_MUL(p, retype(g0, inst[i].dst_type),
2370 retype(g0, inst[i].src_type),
2371 retype(zero, inst[i].src_type));
2372 }
2373 brw_eu_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2374
2375 brw_eu_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_is_indirect);
2376 brw_eu_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src_is_indirect);
2377
2378 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2379
2380 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2381 brw_eu_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2382 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2383
2384 if (intel_device_info_is_9lp(&devinfo)) {
2385 EXPECT_EQ(inst[i].expected_result, validate(p));
2386 } else {
2387 EXPECT_TRUE(validate(p));
2388 }
2389
2390 clear_instructions(p);
2391 }
2392 }
2393
TEST_P(validation_test,qword_low_power_no_64bit_arf)2394 TEST_P(validation_test, qword_low_power_no_64bit_arf)
2395 {
2396 static const struct {
2397 enum opcode opcode;
2398 unsigned exec_size;
2399
2400 struct brw_reg dst;
2401 enum brw_reg_type dst_type;
2402 unsigned dst_stride;
2403
2404 struct brw_reg src;
2405 enum brw_reg_type src_type;
2406 unsigned src_vstride;
2407 unsigned src_width;
2408 unsigned src_hstride;
2409
2410 bool acc_wr;
2411 bool expected_result;
2412 } inst[] = {
2413 #define INST(opcode, exec_size, dst, dst_type, dst_stride, \
2414 src, src_type, src_vstride, src_width, src_hstride, \
2415 acc_wr, expected_result) \
2416 { \
2417 BRW_OPCODE_##opcode, \
2418 BRW_EXECUTE_##exec_size, \
2419 dst, \
2420 BRW_TYPE_##dst_type, \
2421 BRW_HORIZONTAL_STRIDE_##dst_stride, \
2422 src, \
2423 BRW_TYPE_##src_type, \
2424 BRW_VERTICAL_STRIDE_##src_vstride, \
2425 BRW_WIDTH_##src_width, \
2426 BRW_HORIZONTAL_STRIDE_##src_hstride, \
2427 acc_wr, \
2428 expected_result, \
2429 }
2430
2431 /* Some instruction that violate no restrictions, as a control */
2432 INST(MOV, 4, g0, DF, 1, g0, F, 4, 2, 2, 0, true ),
2433 INST(MOV, 4, g0, F, 2, g0, DF, 4, 4, 1, 0, true ),
2434
2435 INST(MOV, 4, g0, Q, 1, g0, D, 4, 2, 2, 0, true ),
2436 INST(MOV, 4, g0, D, 2, g0, Q, 4, 4, 1, 0, true ),
2437
2438 INST(MOV, 4, g0, UQ, 1, g0, UD, 4, 2, 2, 0, true ),
2439 INST(MOV, 4, g0, UD, 2, g0, UQ, 4, 4, 1, 0, true ),
2440
2441 INST(MOV, 4, null, F, 1, g0, F, 4, 4, 1, 0, true ),
2442 INST(MOV, 4, acc0, F, 1, g0, F, 4, 4, 1, 0, true ),
2443 INST(MOV, 4, g0, F, 1, acc0, F, 4, 4, 1, 0, true ),
2444
2445 INST(MOV, 4, null, D, 1, g0, D, 4, 4, 1, 0, true ),
2446 INST(MOV, 4, acc0, D, 1, g0, D, 4, 4, 1, 0, true ),
2447 INST(MOV, 4, g0, D, 1, acc0, D, 4, 4, 1, 0, true ),
2448
2449 INST(MOV, 4, null, UD, 1, g0, UD, 4, 4, 1, 0, true ),
2450 INST(MOV, 4, acc0, UD, 1, g0, UD, 4, 4, 1, 0, true ),
2451 INST(MOV, 4, g0, UD, 1, acc0, UD, 4, 4, 1, 0, true ),
2452
2453 INST(MUL, 4, g0, D, 2, g0, D, 4, 2, 2, 0, true ),
2454 INST(MUL, 4, g0, UD, 2, g0, UD, 4, 2, 2, 0, true ),
2455
2456 /* The PRMs say that for CHV, BXT:
2457 *
2458 * ARF registers must never be used with 64b datatype or when
2459 * operation is integer DWord multiply.
2460 */
2461 INST(MOV, 4, acc0, DF, 1, g0, F, 4, 2, 2, 0, false),
2462 INST(MOV, 4, g0, DF, 1, acc0, F, 4, 2, 2, 0, false),
2463
2464 INST(MOV, 4, acc0, Q, 1, g0, D, 4, 2, 2, 0, false),
2465 INST(MOV, 4, g0, Q, 1, acc0, D, 4, 2, 2, 0, false),
2466
2467 INST(MOV, 4, acc0, UQ, 1, g0, UD, 4, 2, 2, 0, false),
2468 INST(MOV, 4, g0, UQ, 1, acc0, UD, 4, 2, 2, 0, false),
2469
2470 INST(MOV, 4, acc0, F, 2, g0, DF, 4, 4, 1, 0, false),
2471 INST(MOV, 4, g0, F, 2, acc0, DF, 4, 4, 1, 0, false),
2472
2473 INST(MOV, 4, acc0, D, 2, g0, Q, 4, 4, 1, 0, false),
2474 INST(MOV, 4, g0, D, 2, acc0, Q, 4, 4, 1, 0, false),
2475
2476 INST(MOV, 4, acc0, UD, 2, g0, UQ, 4, 4, 1, 0, false),
2477 INST(MOV, 4, g0, UD, 2, acc0, UQ, 4, 4, 1, 0, false),
2478
2479 INST(MUL, 4, acc0, D, 2, g0, D, 4, 2, 2, 0, false),
2480 INST(MUL, 4, acc0, UD, 2, g0, UD, 4, 2, 2, 0, false),
2481 /* MUL cannot have integer accumulator sources, so don't test that */
2482
2483 /* We assume that the restriction does not apply to the null register */
2484 INST(MOV, 4, null, DF, 1, g0, F, 4, 2, 2, 0, true ),
2485 INST(MOV, 4, null, Q, 1, g0, D, 4, 2, 2, 0, true ),
2486 INST(MOV, 4, null, UQ, 1, g0, UD, 4, 2, 2, 0, true ),
2487
2488 /* Check implicit accumulator write control */
2489 INST(MOV, 4, null, DF, 1, g0, F, 4, 2, 2, 1, false),
2490 INST(MUL, 4, null, DF, 1, g0, F, 4, 2, 2, 1, false),
2491
2492 #undef INST
2493 };
2494
2495 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2496 if (!devinfo.has_64bit_float &&
2497 (inst[i].dst_type == BRW_TYPE_DF ||
2498 inst[i].src_type == BRW_TYPE_DF))
2499 continue;
2500
2501 if (!devinfo.has_64bit_int &&
2502 (inst[i].dst_type == BRW_TYPE_Q ||
2503 inst[i].dst_type == BRW_TYPE_UQ ||
2504 inst[i].src_type == BRW_TYPE_Q ||
2505 inst[i].src_type == BRW_TYPE_UQ))
2506 continue;
2507
2508 if (inst[i].opcode == BRW_OPCODE_MOV) {
2509 brw_MOV(p, retype(inst[i].dst, inst[i].dst_type),
2510 retype(inst[i].src, inst[i].src_type));
2511 } else {
2512 assert(inst[i].opcode == BRW_OPCODE_MUL);
2513 brw_MUL(p, retype(inst[i].dst, inst[i].dst_type),
2514 retype(inst[i].src, inst[i].src_type),
2515 retype(zero, inst[i].src_type));
2516 brw_eu_inst_set_opcode(&isa, last_inst, inst[i].opcode);
2517 }
2518 brw_eu_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2519 if (devinfo.ver < 20)
2520 brw_eu_inst_set_acc_wr_control(&devinfo, last_inst, inst[i].acc_wr);
2521
2522 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2523
2524 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2525 brw_eu_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2526 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2527
2528 if (intel_device_info_is_9lp(&devinfo)) {
2529 EXPECT_EQ(inst[i].expected_result, validate(p));
2530 } else {
2531 EXPECT_TRUE(validate(p));
2532 }
2533
2534 clear_instructions(p);
2535 }
2536
2537 if (!devinfo.has_64bit_float)
2538 return;
2539
2540 /* MAC implicitly reads the accumulator */
2541 brw_MAC(p, retype(g0, BRW_TYPE_DF),
2542 retype(stride(g0, 4, 4, 1), BRW_TYPE_DF),
2543 retype(stride(g0, 4, 4, 1), BRW_TYPE_DF));
2544 if (intel_device_info_is_9lp(&devinfo)) {
2545 EXPECT_FALSE(validate(p));
2546 } else {
2547 EXPECT_TRUE(validate(p));
2548 }
2549 }
2550
TEST_P(validation_test,align16_64_bit_integer)2551 TEST_P(validation_test, align16_64_bit_integer)
2552 {
2553 static const struct {
2554 enum opcode opcode;
2555 unsigned exec_size;
2556
2557 enum brw_reg_type dst_type;
2558 enum brw_reg_type src_type;
2559
2560 bool expected_result;
2561 } inst[] = {
2562 #define INST(opcode, exec_size, dst_type, src_type, expected_result) \
2563 { \
2564 BRW_OPCODE_##opcode, \
2565 BRW_EXECUTE_##exec_size, \
2566 BRW_TYPE_##dst_type, \
2567 BRW_TYPE_##src_type, \
2568 expected_result, \
2569 }
2570
2571 /* Some instruction that violate no restrictions, as a control */
2572 INST(MOV, 2, Q, D, true ),
2573 INST(MOV, 2, UQ, UD, true ),
2574 INST(MOV, 2, DF, F, true ),
2575
2576 INST(ADD, 2, Q, D, true ),
2577 INST(ADD, 2, UQ, UD, true ),
2578 INST(ADD, 2, DF, F, true ),
2579
2580 /* The PRMs say that for BDW, SKL:
2581 *
2582 * If Align16 is required for an operation with QW destination and non-QW
2583 * source datatypes, the execution size cannot exceed 2.
2584 */
2585
2586 INST(MOV, 4, Q, D, false),
2587 INST(MOV, 4, UQ, UD, false),
2588 INST(MOV, 4, DF, F, false),
2589
2590 INST(ADD, 4, Q, D, false),
2591 INST(ADD, 4, UQ, UD, false),
2592 INST(ADD, 4, DF, F, false),
2593
2594 #undef INST
2595 };
2596
2597 /* Align16 does not exist on Gfx11+ */
2598 if (devinfo.ver >= 11)
2599 return;
2600
2601 brw_set_default_access_mode(p, BRW_ALIGN_16);
2602
2603 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2604 if (inst[i].opcode == BRW_OPCODE_MOV) {
2605 brw_MOV(p, retype(g0, inst[i].dst_type),
2606 retype(g0, inst[i].src_type));
2607 } else {
2608 assert(inst[i].opcode == BRW_OPCODE_ADD);
2609 brw_ADD(p, retype(g0, inst[i].dst_type),
2610 retype(g0, inst[i].src_type),
2611 retype(g0, inst[i].src_type));
2612 }
2613 brw_eu_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2614
2615 EXPECT_EQ(inst[i].expected_result, validate(p));
2616
2617 clear_instructions(p);
2618 }
2619 }
2620
TEST_P(validation_test,qword_low_power_no_depctrl)2621 TEST_P(validation_test, qword_low_power_no_depctrl)
2622 {
2623 static const struct {
2624 enum opcode opcode;
2625 unsigned exec_size;
2626
2627 enum brw_reg_type dst_type;
2628 unsigned dst_stride;
2629
2630 enum brw_reg_type src_type;
2631 unsigned src_vstride;
2632 unsigned src_width;
2633 unsigned src_hstride;
2634
2635 bool no_dd_check;
2636 bool no_dd_clear;
2637
2638 bool expected_result;
2639 } inst[] = {
2640 #define INST(opcode, exec_size, dst_type, dst_stride, \
2641 src_type, src_vstride, src_width, src_hstride, \
2642 no_dd_check, no_dd_clear, expected_result) \
2643 { \
2644 BRW_OPCODE_##opcode, \
2645 BRW_EXECUTE_##exec_size, \
2646 BRW_TYPE_##dst_type, \
2647 BRW_HORIZONTAL_STRIDE_##dst_stride, \
2648 BRW_TYPE_##src_type, \
2649 BRW_VERTICAL_STRIDE_##src_vstride, \
2650 BRW_WIDTH_##src_width, \
2651 BRW_HORIZONTAL_STRIDE_##src_hstride, \
2652 no_dd_check, \
2653 no_dd_clear, \
2654 expected_result, \
2655 }
2656
2657 /* Some instruction that violate no restrictions, as a control */
2658 INST(MOV, 4, DF, 1, F, 8, 4, 2, 0, 0, true ),
2659 INST(MOV, 4, Q, 1, D, 8, 4, 2, 0, 0, true ),
2660 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 0, true ),
2661
2662 INST(MOV, 4, F, 2, DF, 4, 4, 1, 0, 0, true ),
2663 INST(MOV, 4, D, 2, Q, 4, 4, 1, 0, 0, true ),
2664 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 0, true ),
2665
2666 INST(MUL, 8, D, 2, D, 8, 4, 2, 0, 0, true ),
2667 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 0, true ),
2668
2669 INST(MOV, 4, F, 1, F, 4, 4, 1, 1, 1, true ),
2670
2671 /* The PRMs say that for CHV, BXT:
2672 *
2673 * When source or destination datatype is 64b or operation is integer
2674 * DWord multiply, DepCtrl must not be used.
2675 */
2676 INST(MOV, 4, DF, 1, F, 8, 4, 2, 1, 0, false),
2677 INST(MOV, 4, Q, 1, D, 8, 4, 2, 1, 0, false),
2678 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 1, 0, false),
2679
2680 INST(MOV, 4, F, 2, DF, 4, 4, 1, 1, 0, false),
2681 INST(MOV, 4, D, 2, Q, 4, 4, 1, 1, 0, false),
2682 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 1, 0, false),
2683
2684 INST(MOV, 4, DF, 1, F, 8, 4, 2, 0, 1, false),
2685 INST(MOV, 4, Q, 1, D, 8, 4, 2, 0, 1, false),
2686 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 1, false),
2687
2688 INST(MOV, 4, F, 2, DF, 4, 4, 1, 0, 1, false),
2689 INST(MOV, 4, D, 2, Q, 4, 4, 1, 0, 1, false),
2690 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 1, false),
2691
2692 INST(MUL, 8, D, 2, D, 8, 4, 2, 1, 0, false),
2693 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 1, 0, false),
2694
2695 INST(MUL, 8, D, 2, D, 8, 4, 2, 0, 1, false),
2696 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 1, false),
2697
2698 #undef INST
2699 };
2700
2701 /* NoDDChk/NoDDClr does not exist on Gfx12+ */
2702 if (devinfo.ver >= 12)
2703 return;
2704
2705 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2706 if (!devinfo.has_64bit_float &&
2707 (inst[i].dst_type == BRW_TYPE_DF ||
2708 inst[i].src_type == BRW_TYPE_DF))
2709 continue;
2710
2711 if (!devinfo.has_64bit_int &&
2712 (inst[i].dst_type == BRW_TYPE_Q ||
2713 inst[i].dst_type == BRW_TYPE_UQ ||
2714 inst[i].src_type == BRW_TYPE_Q ||
2715 inst[i].src_type == BRW_TYPE_UQ))
2716 continue;
2717
2718 if (inst[i].opcode == BRW_OPCODE_MOV) {
2719 brw_MOV(p, retype(g0, inst[i].dst_type),
2720 retype(g0, inst[i].src_type));
2721 } else {
2722 assert(inst[i].opcode == BRW_OPCODE_MUL);
2723 brw_MUL(p, retype(g0, inst[i].dst_type),
2724 retype(g0, inst[i].src_type),
2725 retype(zero, inst[i].src_type));
2726 }
2727 brw_eu_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2728
2729 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2730
2731 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2732 brw_eu_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2733 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2734
2735 brw_eu_inst_set_no_dd_check(&devinfo, last_inst, inst[i].no_dd_check);
2736 brw_eu_inst_set_no_dd_clear(&devinfo, last_inst, inst[i].no_dd_clear);
2737
2738 if (intel_device_info_is_9lp(&devinfo)) {
2739 EXPECT_EQ(inst[i].expected_result, validate(p));
2740 } else {
2741 EXPECT_TRUE(validate(p));
2742 }
2743
2744 clear_instructions(p);
2745 }
2746 }
2747
TEST_P(validation_test,gfx11_no_byte_src_1_2)2748 TEST_P(validation_test, gfx11_no_byte_src_1_2)
2749 {
2750 static const struct {
2751 enum opcode opcode;
2752 unsigned access_mode;
2753
2754 enum brw_reg_type dst_type;
2755 struct {
2756 enum brw_reg_type type;
2757 unsigned vstride;
2758 unsigned width;
2759 unsigned hstride;
2760 } srcs[3];
2761
2762 int gfx_ver;
2763 bool expected_result;
2764 } inst[] = {
2765 #define INST(opcode, access_mode, dst_type, \
2766 src0_type, src0_vstride, src0_width, src0_hstride, \
2767 src1_type, src1_vstride, src1_width, src1_hstride, \
2768 src2_type, \
2769 gfx_ver, expected_result) \
2770 { \
2771 BRW_OPCODE_##opcode, \
2772 BRW_ALIGN_##access_mode, \
2773 BRW_TYPE_##dst_type, \
2774 { \
2775 { \
2776 BRW_TYPE_##src0_type, \
2777 BRW_VERTICAL_STRIDE_##src0_vstride, \
2778 BRW_WIDTH_##src0_width, \
2779 BRW_HORIZONTAL_STRIDE_##src0_hstride, \
2780 }, \
2781 { \
2782 BRW_TYPE_##src1_type, \
2783 BRW_VERTICAL_STRIDE_##src1_vstride, \
2784 BRW_WIDTH_##src1_width, \
2785 BRW_HORIZONTAL_STRIDE_##src1_hstride, \
2786 }, \
2787 { \
2788 BRW_TYPE_##src2_type, \
2789 }, \
2790 }, \
2791 gfx_ver, \
2792 expected_result, \
2793 }
2794
2795 /* Passes on < 11 */
2796 INST(MOV, 16, F, B, 2, 4, 0, UD, 0, 4, 0, D, 8, true ),
2797 INST(ADD, 16, UD, F, 0, 4, 0, UB, 0, 1, 0, D, 7, true ),
2798 INST(MAD, 16, D, B, 0, 4, 0, UB, 0, 1, 0, B, 10, true ),
2799
2800 /* Fails on 11+ */
2801 INST(MAD, 1, UB, W, 1, 1, 0, D, 0, 4, 0, B, 11, false ),
2802 INST(MAD, 1, UB, W, 1, 1, 1, UB, 1, 1, 0, W, 11, false ),
2803 INST(ADD, 1, W, W, 1, 4, 1, B, 1, 1, 0, D, 11, false ),
2804
2805 /* Passes on 11+ */
2806 INST(MOV, 1, W, B, 8, 8, 1, D, 8, 8, 1, D, 11, true ),
2807 INST(ADD, 1, UD, B, 8, 8, 1, W, 8, 8, 1, D, 11, true ),
2808 INST(MAD, 1, B, B, 0, 1, 0, D, 0, 4, 0, W, 11, true ),
2809
2810 #undef INST
2811 };
2812
2813
2814 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2815 /* Skip instruction not meant for this gfx_ver. */
2816 if (devinfo.ver != inst[i].gfx_ver)
2817 continue;
2818
2819 brw_push_insn_state(p);
2820
2821 brw_set_default_exec_size(p, BRW_EXECUTE_8);
2822 brw_set_default_access_mode(p, inst[i].access_mode);
2823
2824 switch (inst[i].opcode) {
2825 case BRW_OPCODE_MOV:
2826 brw_MOV(p, retype(g0, inst[i].dst_type),
2827 retype(g0, inst[i].srcs[0].type));
2828 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2829 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2830 break;
2831 case BRW_OPCODE_ADD:
2832 brw_ADD(p, retype(g0, inst[i].dst_type),
2833 retype(g0, inst[i].srcs[0].type),
2834 retype(g0, inst[i].srcs[1].type));
2835 brw_eu_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2836 brw_eu_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
2837 brw_eu_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2838 brw_eu_inst_set_src1_vstride(&devinfo, last_inst, inst[i].srcs[1].vstride);
2839 brw_eu_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
2840 brw_eu_inst_set_src1_hstride(&devinfo, last_inst, inst[i].srcs[1].hstride);
2841 break;
2842 case BRW_OPCODE_MAD:
2843 brw_MAD(p, retype(g0, inst[i].dst_type),
2844 retype(g0, inst[i].srcs[0].type),
2845 retype(g0, inst[i].srcs[1].type),
2846 retype(g0, inst[i].srcs[2].type));
2847 brw_eu_inst_set_3src_a1_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2848 brw_eu_inst_set_3src_a1_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2849 brw_eu_inst_set_3src_a1_src1_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2850 brw_eu_inst_set_3src_a1_src1_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2851 break;
2852 default:
2853 unreachable("invalid opcode");
2854 }
2855
2856 brw_eu_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
2857
2858 brw_eu_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
2859 brw_eu_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
2860
2861 brw_pop_insn_state(p);
2862
2863 EXPECT_EQ(inst[i].expected_result, validate(p));
2864
2865 clear_instructions(p);
2866 }
2867 }
2868
TEST_P(validation_test,add3_source_types)2869 TEST_P(validation_test, add3_source_types)
2870 {
2871 static const struct {
2872 enum brw_reg_type dst_type;
2873 enum brw_reg_type src0_type;
2874 enum brw_reg_type src1_type;
2875 enum brw_reg_type src2_type;
2876 bool expected_result;
2877 } inst[] = {
2878 #define INST(dst_type, src0_type, src1_type, src2_type, expected_result) \
2879 { \
2880 BRW_TYPE_##dst_type, \
2881 BRW_TYPE_##src0_type, \
2882 BRW_TYPE_##src1_type, \
2883 BRW_TYPE_##src2_type, \
2884 expected_result, \
2885 }
2886
2887 INST( F, F, F, F, false),
2888 INST(HF, HF, HF, HF, false),
2889 INST( B, B, B, B, false),
2890 INST(UB, UB, UB, UB, false),
2891
2892 INST( W, W, W, W, true),
2893 INST(UW, UW, UW, UW, true),
2894 INST( D, D, D, D, true),
2895 INST(UD, UD, UD, UD, true),
2896
2897 INST( W, D, W, W, true),
2898 INST(UW, UW, UD, UW, true),
2899 INST( D, D, W, D, true),
2900 INST(UD, UD, UD, UW, true),
2901 #undef INST
2902 };
2903
2904
2905 if (devinfo.verx10 < 125)
2906 return;
2907
2908 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2909 brw_ADD3(p,
2910 retype(g0, inst[i].dst_type),
2911 retype(g0, inst[i].src0_type),
2912 retype(g0, inst[i].src1_type),
2913 retype(g0, inst[i].src2_type));
2914
2915 EXPECT_EQ(inst[i].expected_result, validate(p));
2916
2917 clear_instructions(p);
2918 }
2919 }
2920
TEST_P(validation_test,add3_immediate_types)2921 TEST_P(validation_test, add3_immediate_types)
2922 {
2923 static const struct {
2924 enum brw_reg_type reg_type;
2925 enum brw_reg_type imm_type;
2926 unsigned imm_src;
2927 bool expected_result;
2928 } inst[] = {
2929 #define INST(reg_type, imm_type, imm_src, expected_result) \
2930 { \
2931 BRW_TYPE_##reg_type, \
2932 BRW_TYPE_##imm_type, \
2933 imm_src, \
2934 expected_result, \
2935 }
2936
2937 INST( W, W, 0, true),
2938 INST( W, W, 2, true),
2939 INST(UW, UW, 0, true),
2940 INST(UW, UW, 2, true),
2941 INST( D, W, 0, true),
2942 INST(UD, W, 2, true),
2943 INST( D, UW, 0, true),
2944 INST(UW, UW, 2, true),
2945
2946 INST( W, D, 0, false),
2947 INST( W, D, 2, false),
2948 INST(UW, UD, 0, false),
2949 INST(UW, UD, 2, false),
2950 INST( D, D, 0, false),
2951 INST(UD, D, 2, false),
2952 INST( D, UD, 0, false),
2953 INST(UW, UD, 2, false),
2954 #undef INST
2955 };
2956
2957
2958 if (devinfo.verx10 < 125)
2959 return;
2960
2961 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2962 brw_ADD3(p,
2963 retype(g0, inst[i].reg_type),
2964 inst[i].imm_src == 0 ? retype(brw_imm_d(0x1234), inst[i].imm_type)
2965 : retype(g0, inst[i].reg_type),
2966 retype(g0, inst[i].reg_type),
2967 inst[i].imm_src == 2 ? retype(brw_imm_d(0x2143), inst[i].imm_type)
2968 : retype(g0, inst[i].reg_type));
2969
2970 EXPECT_EQ(inst[i].expected_result, validate(p));
2971
2972 clear_instructions(p);
2973 }
2974 }
2975
TEST_P(validation_test,dpas_sdepth)2976 TEST_P(validation_test, dpas_sdepth)
2977 {
2978 if (devinfo.verx10 < 125)
2979 return;
2980
2981 static const enum gfx12_systolic_depth depth[] = {
2982 BRW_SYSTOLIC_DEPTH_16,
2983 BRW_SYSTOLIC_DEPTH_2,
2984 BRW_SYSTOLIC_DEPTH_4,
2985 BRW_SYSTOLIC_DEPTH_8,
2986 };
2987
2988 brw_set_default_exec_size(p, devinfo.ver >= 20 ? BRW_EXECUTE_16
2989 : BRW_EXECUTE_8);
2990
2991 for (unsigned i = 0; i < ARRAY_SIZE(depth); i++) {
2992 brw_DPAS(p,
2993 depth[i],
2994 8,
2995 retype(brw_vec8_grf(0, 0), BRW_TYPE_F),
2996 null,
2997 retype(brw_vec8_grf(16, 0), BRW_TYPE_HF),
2998 retype(brw_vec8_grf(32, 0), BRW_TYPE_HF));
2999
3000 const bool expected_result = depth[i] == BRW_SYSTOLIC_DEPTH_8;
3001
3002 EXPECT_EQ(expected_result, validate(p)) <<
3003 "Encoded systolic depth value is: " << depth[i];
3004
3005 clear_instructions(p);
3006 }
3007 }
3008
TEST_P(validation_test,dpas_exec_size)3009 TEST_P(validation_test, dpas_exec_size)
3010 {
3011 if (devinfo.verx10 < 125)
3012 return;
3013
3014 static const enum brw_execution_size test_vectors[] = {
3015 BRW_EXECUTE_1,
3016 BRW_EXECUTE_2,
3017 BRW_EXECUTE_4,
3018 BRW_EXECUTE_8,
3019 BRW_EXECUTE_16,
3020 BRW_EXECUTE_32,
3021 };
3022
3023 const brw_execution_size valid_exec_size =
3024 devinfo.ver >= 20 ? BRW_EXECUTE_16 : BRW_EXECUTE_8;
3025
3026 for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) {
3027 brw_set_default_exec_size(p, test_vectors[i]);
3028
3029 brw_DPAS(p,
3030 BRW_SYSTOLIC_DEPTH_8,
3031 8,
3032 retype(brw_vec8_grf(0, 0), BRW_TYPE_F),
3033 null,
3034 retype(brw_vec8_grf(16, 0), BRW_TYPE_HF),
3035 retype(brw_vec8_grf(32, 0), BRW_TYPE_HF));
3036
3037 const bool expected_result = test_vectors[i] == valid_exec_size;
3038
3039 EXPECT_EQ(expected_result, validate(p)) <<
3040 "Exec size = " << (1u << test_vectors[i]);
3041
3042 clear_instructions(p);
3043 }
3044
3045 brw_set_default_exec_size(p, BRW_EXECUTE_8);
3046 }
3047
TEST_P(validation_test,dpas_sub_byte_precision)3048 TEST_P(validation_test, dpas_sub_byte_precision)
3049 {
3050 if (devinfo.verx10 < 125)
3051 return;
3052
3053 static const struct {
3054 brw_reg_type dst_type;
3055 brw_reg_type src0_type;
3056 brw_reg_type src1_type;
3057 enum gfx12_sub_byte_precision src1_prec;
3058 brw_reg_type src2_type;
3059 enum gfx12_sub_byte_precision src2_prec;
3060 bool expected_result;
3061 } test_vectors[] = {
3062 {
3063 BRW_TYPE_F,
3064 BRW_TYPE_F,
3065 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3066 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3067 true,
3068 },
3069 {
3070 BRW_TYPE_F,
3071 BRW_TYPE_F,
3072 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3073 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_4BIT,
3074 false,
3075 },
3076 {
3077 BRW_TYPE_F,
3078 BRW_TYPE_F,
3079 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3080 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_2BIT,
3081 false,
3082 },
3083 {
3084 BRW_TYPE_F,
3085 BRW_TYPE_F,
3086 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_4BIT,
3087 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3088 false,
3089 },
3090 {
3091 BRW_TYPE_F,
3092 BRW_TYPE_F,
3093 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_2BIT,
3094 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3095 false,
3096 },
3097
3098 {
3099 BRW_TYPE_UD,
3100 BRW_TYPE_UD,
3101 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3102 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3103 true,
3104 },
3105 {
3106 BRW_TYPE_UD,
3107 BRW_TYPE_UD,
3108 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3109 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_4BIT,
3110 true,
3111 },
3112 {
3113 BRW_TYPE_UD,
3114 BRW_TYPE_UD,
3115 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3116 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_2BIT,
3117 true,
3118 },
3119 {
3120 BRW_TYPE_UD,
3121 BRW_TYPE_UD,
3122 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3123 BRW_TYPE_UB, (enum gfx12_sub_byte_precision) 3,
3124 false,
3125 },
3126 {
3127 BRW_TYPE_UD,
3128 BRW_TYPE_UD,
3129 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_4BIT,
3130 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3131 true,
3132 },
3133 {
3134 BRW_TYPE_UD,
3135 BRW_TYPE_UD,
3136 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_2BIT,
3137 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3138 true,
3139 },
3140 {
3141 BRW_TYPE_UD,
3142 BRW_TYPE_UD,
3143 BRW_TYPE_UB, (enum gfx12_sub_byte_precision) 3,
3144 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3145 false,
3146 },
3147 };
3148
3149 brw_set_default_exec_size(p, devinfo.ver >= 20 ? BRW_EXECUTE_16
3150 : BRW_EXECUTE_8);
3151
3152 for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) {
3153 brw_eu_inst *inst =
3154 brw_DPAS(p,
3155 BRW_SYSTOLIC_DEPTH_8,
3156 8,
3157 retype(brw_vec8_grf(0, 0), test_vectors[i].dst_type),
3158 retype(brw_vec8_grf(16, 0), test_vectors[i].src0_type),
3159 retype(brw_vec8_grf(32, 0), test_vectors[i].src1_type),
3160 retype(brw_vec8_grf(48, 0), test_vectors[i].src2_type));
3161
3162 brw_eu_inst_set_dpas_3src_src1_subbyte(&devinfo, inst,
3163 test_vectors[i].src1_prec);
3164 brw_eu_inst_set_dpas_3src_src2_subbyte(&devinfo, inst,
3165 test_vectors[i].src2_prec);
3166
3167 EXPECT_EQ(test_vectors[i].expected_result, validate(p)) <<
3168 "test vector index = " << i;
3169
3170 clear_instructions(p);
3171 }
3172 }
3173
TEST_P(validation_test,dpas_types)3174 TEST_P(validation_test, dpas_types)
3175 {
3176 if (devinfo.verx10 < 125)
3177 return;
3178
3179 #define TV(a, b, c, d, r) \
3180 { BRW_TYPE_ ## a, BRW_TYPE_ ## b, BRW_TYPE_ ## c, BRW_TYPE_ ## d, r }
3181
3182 static const struct {
3183 brw_reg_type dst_type;
3184 brw_reg_type src0_type;
3185 brw_reg_type src1_type;
3186 brw_reg_type src2_type;
3187 bool expected_result;
3188 } test_vectors[] = {
3189 TV( F, F, HF, HF, true),
3190 TV( F, HF, HF, HF, false),
3191 TV(HF, F, HF, HF, false),
3192 TV( F, F, F, HF, false),
3193 TV( F, F, HF, F, false),
3194
3195 TV(DF, DF, DF, DF, false),
3196 TV(DF, DF, DF, F, false),
3197 TV(DF, DF, F, DF, false),
3198 TV(DF, F, DF, DF, false),
3199 TV(DF, DF, DF, HF, false),
3200 TV(DF, DF, HF, DF, false),
3201 TV(DF, HF, DF, DF, false),
3202
3203 TV(UD, UD, UB, UB, true),
3204 TV(UD, UD, UB, UD, false),
3205 TV(UD, UD, UD, UB, false),
3206 TV(UD, UD, UB, UW, false),
3207 TV(UD, UD, UW, UB, false),
3208
3209 TV(UD, UB, UB, UB, false),
3210 TV(UD, UW, UB, UB, false),
3211
3212 TV(UQ, UQ, UB, UB, false),
3213 TV(UQ, UQ, UB, UQ, false),
3214 TV(UQ, UQ, UQ, UB, false),
3215 TV(UQ, UQ, UB, UW, false),
3216 TV(UQ, UQ, UW, UB, false),
3217
3218 TV( D, D, B, B, true),
3219 TV( D, D, B, UB, true),
3220 TV( D, D, UB, B, true),
3221 TV( D, UD, B, B, true),
3222
3223 TV( D, D, B, D, false),
3224 TV( D, D, D, B, false),
3225 TV( D, D, B, W, false),
3226 TV( D, D, W, B, false),
3227
3228 TV( D, B, B, B, false),
3229 TV( D, W, B, B, false),
3230
3231 TV( Q, Q, B, B, false),
3232 TV( Q, Q, B, Q, false),
3233 TV( Q, Q, Q, B, false),
3234 TV( Q, Q, B, W, false),
3235 TV( Q, Q, W, B, false),
3236
3237 TV(UD, UD, UB, B, false),
3238 TV(UD, UD, B, UB, false),
3239 TV(UD, D, UB, UB, false),
3240 };
3241
3242 #undef TV
3243
3244 brw_set_default_exec_size(p, devinfo.ver >= 20 ? BRW_EXECUTE_16
3245 : BRW_EXECUTE_8);
3246
3247 for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) {
3248 brw_DPAS(p,
3249 BRW_SYSTOLIC_DEPTH_8,
3250 8,
3251 retype(brw_vec8_grf(0, 0), test_vectors[i].dst_type),
3252 retype(brw_vec8_grf(16, 0), test_vectors[i].src0_type),
3253 retype(brw_vec8_grf(32, 0), test_vectors[i].src1_type),
3254 retype(brw_vec8_grf(48, 0), test_vectors[i].src2_type));
3255
3256 EXPECT_EQ(test_vectors[i].expected_result, validate(p)) <<
3257 "test vector index = " << i;
3258
3259 clear_instructions(p);
3260 }
3261 }
3262
TEST_P(validation_test,dpas_src_subreg_nr)3263 TEST_P(validation_test, dpas_src_subreg_nr)
3264 {
3265 if (devinfo.verx10 < 125)
3266 return;
3267
3268 #define TV(dt, od, t0, o0, t1, o1, o2, r) \
3269 { BRW_TYPE_ ## dt, od, BRW_TYPE_ ## t0, o0, BRW_TYPE_ ## t1, o1, o2, r }
3270
3271 static const struct {
3272 brw_reg_type dst_type;
3273 unsigned dst_subnr;
3274 brw_reg_type src0_type;
3275 unsigned src0_subnr;
3276 brw_reg_type src1_src2_type;
3277 unsigned src1_subnr;
3278 unsigned src2_subnr;
3279 bool expected_result;
3280 } test_vectors[] = {
3281 TV( F, 0, F, 0, HF, 0, 0, true),
3282 TV( D, 0, D, 0, B, 0, 0, true),
3283 TV( D, 0, D, 0, UB, 0, 0, true),
3284 TV( D, 0, UD, 0, B, 0, 0, true),
3285
3286 TV( F, 1, F, 0, HF, 0, 0, false),
3287 TV( F, 2, F, 0, HF, 0, 0, false),
3288 TV( F, 3, F, 0, HF, 0, 0, false),
3289 TV( F, 4, F, 0, HF, 0, 0, false),
3290 TV( F, 5, F, 0, HF, 0, 0, false),
3291 TV( F, 6, F, 0, HF, 0, 0, false),
3292 TV( F, 7, F, 0, HF, 0, 0, false),
3293
3294 TV( F, 0, F, 1, HF, 0, 0, false),
3295 TV( F, 0, F, 2, HF, 0, 0, false),
3296 TV( F, 0, F, 3, HF, 0, 0, false),
3297 TV( F, 0, F, 4, HF, 0, 0, false),
3298 TV( F, 0, F, 5, HF, 0, 0, false),
3299 TV( F, 0, F, 6, HF, 0, 0, false),
3300 TV( F, 0, F, 7, HF, 0, 0, false),
3301
3302 TV( F, 0, F, 0, HF, 1, 0, false),
3303 TV( F, 0, F, 0, HF, 2, 0, false),
3304 TV( F, 0, F, 0, HF, 3, 0, false),
3305 TV( F, 0, F, 0, HF, 4, 0, false),
3306 TV( F, 0, F, 0, HF, 5, 0, false),
3307 TV( F, 0, F, 0, HF, 6, 0, false),
3308 TV( F, 0, F, 0, HF, 7, 0, false),
3309 TV( F, 0, F, 0, HF, 8, 0, false),
3310 TV( F, 0, F, 0, HF, 9, 0, false),
3311 TV( F, 0, F, 0, HF, 10, 0, false),
3312 TV( F, 0, F, 0, HF, 11, 0, false),
3313 TV( F, 0, F, 0, HF, 12, 0, false),
3314 TV( F, 0, F, 0, HF, 13, 0, false),
3315 TV( F, 0, F, 0, HF, 14, 0, false),
3316 TV( F, 0, F, 0, HF, 15, 0, false),
3317
3318 TV( F, 0, F, 0, HF, 0, 1, false),
3319 TV( F, 0, F, 0, HF, 0, 2, false),
3320 TV( F, 0, F, 0, HF, 0, 3, false),
3321 TV( F, 0, F, 0, HF, 0, 4, false),
3322 TV( F, 0, F, 0, HF, 0, 5, false),
3323 TV( F, 0, F, 0, HF, 0, 6, false),
3324 TV( F, 0, F, 0, HF, 0, 7, false),
3325 TV( F, 0, F, 0, HF, 0, 8, false),
3326 TV( F, 0, F, 0, HF, 0, 9, false),
3327 TV( F, 0, F, 0, HF, 0, 10, false),
3328 TV( F, 0, F, 0, HF, 0, 11, false),
3329 TV( F, 0, F, 0, HF, 0, 12, false),
3330 TV( F, 0, F, 0, HF, 0, 13, false),
3331 TV( F, 0, F, 0, HF, 0, 14, false),
3332 TV( F, 0, F, 0, HF, 0, 15, false),
3333
3334 /* These meet the requirements, but they specify a subnr that is part of
3335 * the next register. It is currently not possible to specify a subnr of
3336 * 32 for the B and UB values because brw_reg::subnr is only 5 bits.
3337 */
3338 TV( F, 16, F, 0, HF, 0, 0, false),
3339 TV( F, 0, F, 16, HF, 0, 0, false),
3340 TV( F, 0, F, 0, HF, 0, 16, false),
3341
3342 TV( D, 16, D, 0, B, 0, 0, false),
3343 TV( D, 0, D, 16, B, 0, 0, false),
3344 };
3345
3346 #undef TV
3347
3348 brw_set_default_exec_size(p, devinfo.ver >= 20 ? BRW_EXECUTE_16
3349 : BRW_EXECUTE_8);
3350
3351 for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) {
3352 struct brw_reg dst =
3353 retype(brw_vec8_grf( 0, 0), test_vectors[i].dst_type);
3354 struct brw_reg src0 =
3355 retype(brw_vec8_grf(16, 0), test_vectors[i].src0_type);
3356 struct brw_reg src1 =
3357 retype(brw_vec8_grf(32, 0), test_vectors[i].src1_src2_type);
3358 struct brw_reg src2 =
3359 retype(brw_vec8_grf(48, 0), test_vectors[i].src1_src2_type);
3360
3361 /* subnr for DPAS is in units of datatype precision instead of bytes as
3362 * it is for every other instruction. Set the value by hand instead of
3363 * using byte_offset() or similar.
3364 */
3365 dst.subnr = test_vectors[i].dst_subnr;
3366 src0.subnr = test_vectors[i].src0_subnr;
3367 src1.subnr = test_vectors[i].src1_subnr;
3368 src2.subnr = test_vectors[i].src2_subnr;
3369
3370 brw_DPAS(p, BRW_SYSTOLIC_DEPTH_8, 8, dst, src0, src1, src2);
3371
3372 EXPECT_EQ(test_vectors[i].expected_result, validate(p)) <<
3373 "test vector index = " << i;
3374
3375 clear_instructions(p);
3376 }
3377 }
3378
TEST_P(validation_test,xe2_register_region_special_restrictions_for_src0_and_src1)3379 TEST_P(validation_test, xe2_register_region_special_restrictions_for_src0_and_src1)
3380 {
3381 if (devinfo.verx10 < 200)
3382 return;
3383
3384 /* See "Src0 Restrictions" and "Src1 Restrictions" in "Special Restrictions"
3385 * in Bspec 56640 (r57070).
3386 */
3387
3388 const unsigned V = 0xF;
3389
3390 #define DST(t, s, h) { BRW_TYPE_ ## t, s, h }
3391 #define SRC(t, s, v, w, h, ...) { BRW_TYPE_ ## t, s, v, w, h, __VA_ARGS__ }
3392 #define INDIRECT true
3393
3394 static const struct {
3395 struct {
3396 brw_reg_type type;
3397 unsigned subnr;
3398 unsigned h;
3399 } dst;
3400
3401 struct {
3402 brw_reg_type type;
3403 unsigned subnr;
3404 unsigned v;
3405 unsigned w;
3406 unsigned h;
3407 bool indirect;
3408 } src0, src1;
3409
3410 bool expected_result;
3411 } test_vectors[] = {
3412 /* Source 0. One element per dword channel. */
3413 { DST( D, 0, 1 ), SRC( D, 0, 1,1,0 ), SRC( D, 0, 1,1,0 ), true },
3414 { DST( D, 0, 1 ), SRC( W, 0, 1,1,0 ), SRC( D, 0, 1,1,0 ), true },
3415 { DST( D, 0, 1 ), SRC( B, 0, 1,1,0 ), SRC( D, 0, 1,1,0 ), true },
3416
3417 { DST( W, 0, 2 ), SRC( D, 0, 1,1,0 ), SRC( D, 0, 1,1,0 ), true },
3418 { DST( W, 0, 2 ), SRC( W, 0, 1,1,0 ), SRC( D, 0, 1,1,0 ), true },
3419 { DST( W, 0, 2 ), SRC( B, 0, 1,1,0 ), SRC( D, 0, 1,1,0 ), true },
3420
3421 { DST( B, 0, 4 ), SRC( D, 0, 1,1,0 ), SRC( D, 0, 1,1,0 ), true },
3422 { DST( B, 0, 4 ), SRC( W, 0, 1,1,0 ), SRC( D, 0, 1,1,0 ), true },
3423 { DST( B, 0, 4 ), SRC( B, 0, 1,1,0 ), SRC( D, 0, 1,1,0 ), true },
3424
3425 { DST( D, 0, 1 ), SRC( D, 0, V,8,1, INDIRECT ), SRC( D, 0, 1,1,0 ), true },
3426 { DST( D, 0, 1 ), SRC( D, 0, V,1,0, INDIRECT ), SRC( D, 0, 1,1,0 ), true },
3427
3428 /* Source 0. Uniform stride W->W cases. */
3429 { DST( W, 1, 1 ), SRC( W, 0, 1,1,0 ), SRC( W, 0, 1,1,0 ), true },
3430 { DST( W, 1, 1 ), SRC( W, 2, 1,1,0 ), SRC( W, 0, 1,1,0 ), true },
3431 { DST( W, 1, 1 ), SRC( W, 0, 2,1,0 ), SRC( W, 0, 1,1,0 ), false },
3432 { DST( W, 1, 1 ), SRC( W, 2, 2,1,0 ), SRC( W, 0, 1,1,0 ), true },
3433 { DST( W, 1, 1 ), SRC( W, 0, 4,1,0 ), SRC( W, 0, 1,1,0 ), false },
3434 { DST( W, 1, 1 ), SRC( W, 2, 4,1,0 ), SRC( W, 0, 1,1,0 ), false },
3435
3436 /* Source 0. Dword aligned W->W cases. */
3437 { DST( W, 2, 1 ), SRC( W, 0, 8,4,1 ), SRC( W, 0, 1,1,0 ), true },
3438 { DST( W, 2, 1 ), SRC( W, 4, 8,4,1 ), SRC( W, 0, 1,1,0 ), true },
3439 { DST( W, 2, 1 ), SRC( W, 0, 8,4,2 ), SRC( W, 0, 1,1,0 ), false },
3440 { DST( W, 2, 1 ), SRC( W, 4, 8,4,2 ), SRC( W, 0, 1,1,0 ), true },
3441 { DST( W, 2, 1 ), SRC( W, 0, 16,2,4 ), SRC( W, 0, 1,1,0 ), false },
3442 { DST( W, 2, 1 ), SRC( W, 4, 16,2,4 ), SRC( W, 0, 1,1,0 ), false },
3443
3444 /* Source 0. Uniform stride W->B cases. */
3445 { DST( B, 2, 2 ), SRC( W, 0, 1,1,0), SRC( W, 0, 1,1,0 ), true },
3446 { DST( B, 2, 2 ), SRC( W, 1, 1,1,0), SRC( W, 0, 1,1,0 ), true },
3447 { DST( B, 2, 2 ), SRC( W, 0, 2,1,0), SRC( W, 0, 1,1,0 ), false },
3448 { DST( B, 2, 2 ), SRC( W, 1, 2,1,0), SRC( W, 0, 1,1,0 ), false },
3449 { DST( B, 2, 2 ), SRC( W, 0, 4,1,0), SRC( W, 0, 1,1,0 ), false },
3450 { DST( B, 2, 2 ), SRC( W, 1, 4,1,0), SRC( W, 0, 1,1,0 ), false },
3451
3452 /* Source 0. Dword aligned W->B cases. */
3453 { DST( B, 4, 2 ), SRC( W, 0, 8,4,1 ), SRC( W, 0, 1,1,0 ), true },
3454 { DST( B, 4, 2 ), SRC( W, 2, 8,4,1 ), SRC( W, 0, 1,1,0 ), true },
3455 { DST( B, 4, 2 ), SRC( W, 0, 8,4,2 ), SRC( W, 0, 1,1,0 ), false },
3456 { DST( B, 4, 2 ), SRC( W, 2, 8,4,2 ), SRC( W, 0, 1,1,0 ), false },
3457 { DST( B, 4, 2 ), SRC( W, 0, 16,2,4 ), SRC( W, 0, 1,1,0 ), false },
3458 { DST( B, 4, 2 ), SRC( W, 2, 16,2,4 ), SRC( W, 0, 1,1,0 ), false },
3459
3460 /* TODO: Add B->W and B->B cases. */
3461
3462 /* Source 1. One element per dword channel. */
3463 { DST( D, 0, 1 ), SRC( D, 0, 1,1,0 ), SRC( D, 0, 1,1,0 ), true },
3464 { DST( D, 0, 1 ), SRC( D, 0, 1,1,0 ), SRC( W, 0, 1,1,0 ), true },
3465 { DST( W, 0, 2 ), SRC( D, 0, 1,1,0 ), SRC( D, 0, 1,1,0 ), true },
3466 { DST( W, 0, 2 ), SRC( D, 0, 1,1,0 ), SRC( W, 0, 1,1,0 ), true },
3467
3468 /* Source 1. Uniform stride W->W cases. */
3469 { DST( W, 1, 1 ), SRC( W, 0, 1,1,0 ), SRC( W, 0, 1,1,0 ), true },
3470 { DST( W, 1, 1 ), SRC( W, 0, 1,1,0 ), SRC( W, 2, 1,1,0 ), true },
3471 { DST( W, 1, 1 ), SRC( W, 0, 1,1,0 ), SRC( W, 0, 2,1,0 ), false },
3472 { DST( W, 1, 1 ), SRC( W, 0, 1,1,0 ), SRC( W, 2, 2,1,0 ), true },
3473 { DST( W, 1, 1 ), SRC( W, 0, 1,1,0 ), SRC( W, 0, 4,1,0 ), false },
3474 { DST( W, 1, 1 ), SRC( W, 0, 1,1,0 ), SRC( W, 2, 4,1,0 ), false },
3475
3476 /* Source 1. Dword aligned W->W cases. */
3477 { DST( W, 2, 1 ), SRC( W, 0, 1,1,0 ), SRC( W, 0, 8,4,1 ), true },
3478 { DST( W, 2, 1 ), SRC( W, 0, 1,1,0 ), SRC( W, 4, 8,4,1 ), true },
3479 { DST( W, 2, 1 ), SRC( W, 0, 1,1,0 ), SRC( W, 0, 8,4,2 ), false },
3480 { DST( W, 2, 1 ), SRC( W, 0, 1,1,0 ), SRC( W, 4, 8,4,2 ), true },
3481 { DST( W, 2, 1 ), SRC( W, 0, 1,1,0 ), SRC( W, 0, 16,2,4 ), false },
3482 { DST( W, 2, 1 ), SRC( W, 0, 1,1,0 ), SRC( W, 4, 16,2,4 ), false },
3483
3484 /* Source 1. Uniform stride W->B cases. */
3485 { DST( B, 2, 2 ), SRC( B, 0, 1,1,0 ), SRC( W, 0, 1,1,0), true },
3486 { DST( B, 2, 2 ), SRC( B, 0, 1,1,0 ), SRC( W, 1, 1,1,0), true },
3487 { DST( B, 2, 2 ), SRC( B, 0, 1,1,0 ), SRC( W, 0, 2,1,0), false },
3488 { DST( B, 2, 2 ), SRC( B, 0, 1,1,0 ), SRC( W, 1, 2,1,0), false },
3489 { DST( B, 2, 2 ), SRC( B, 0, 1,1,0 ), SRC( W, 0, 4,1,0), false },
3490 { DST( B, 2, 2 ), SRC( B, 0, 1,1,0 ), SRC( W, 1, 4,1,0), false },
3491
3492 /* Source 1. Dword aligned W->B cases. */
3493 { DST( B, 4, 2 ), SRC( W, 0, 1,1,0 ), SRC( W, 0, 8,4,1 ), true },
3494 { DST( B, 4, 2 ), SRC( W, 0, 1,1,0 ), SRC( W, 2, 8,4,1 ), true },
3495 { DST( B, 4, 2 ), SRC( W, 0, 1,1,0 ), SRC( W, 0, 8,4,2 ), false },
3496 { DST( B, 4, 2 ), SRC( W, 0, 1,1,0 ), SRC( W, 2, 8,4,2 ), false },
3497 { DST( B, 4, 2 ), SRC( W, 0, 1,1,0 ), SRC( W, 0, 16,2,4 ), false },
3498 { DST( B, 4, 2 ), SRC( W, 0, 1,1,0 ), SRC( W, 2, 16,2,4 ), false },
3499 };
3500
3501 #undef DST
3502 #undef SRC
3503 #undef SOME
3504 #undef INDIRECT
3505
3506 for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) {
3507 struct brw_reg dst =
3508 brw_make_reg(FIXED_GRF,
3509 0,
3510 test_vectors[i].dst.subnr,
3511 0,
3512 0,
3513 test_vectors[i].dst.type,
3514 cvt(test_vectors[i].dst.h),
3515 BRW_WIDTH_1,
3516 cvt(test_vectors[i].dst.h),
3517 BRW_SWIZZLE_XYZW,
3518 WRITEMASK_XYZW);
3519
3520 struct brw_reg src0 =
3521 brw_make_reg(FIXED_GRF,
3522 2,
3523 test_vectors[i].src0.subnr,
3524 0,
3525 0,
3526 test_vectors[i].src0.type,
3527 test_vectors[i].src0.v == V ? 0xF : cvt(test_vectors[i].src0.v),
3528 cvt(test_vectors[i].src0.w) - 1,
3529 cvt(test_vectors[i].src0.h),
3530 BRW_SWIZZLE_XYZW,
3531 WRITEMASK_XYZW);
3532 if (test_vectors[i].src0.indirect)
3533 src0.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
3534
3535 struct brw_reg src1 =
3536 brw_make_reg(FIXED_GRF,
3537 4,
3538 test_vectors[i].src1.subnr,
3539 0,
3540 0,
3541 test_vectors[i].src1.type,
3542 test_vectors[i].src1.v == V ? 0xF : cvt(test_vectors[i].src1.v),
3543 cvt(test_vectors[i].src1.w) - 1,
3544 cvt(test_vectors[i].src1.h),
3545 BRW_SWIZZLE_XYZW,
3546 WRITEMASK_XYZW);
3547 if (test_vectors[i].src1.indirect)
3548 src1.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
3549
3550 brw_ADD(p, dst, src0, src1);
3551
3552 EXPECT_EQ(test_vectors[i].expected_result, validate(p)) <<
3553 "test vector index = " << i;
3554
3555 clear_instructions(p);
3556 }
3557 }
3558
3559 static brw_reg
brw_s0(enum brw_reg_type type,unsigned subnr)3560 brw_s0(enum brw_reg_type type, unsigned subnr)
3561 {
3562 return brw_make_reg(ARF,
3563 BRW_ARF_SCALAR,
3564 subnr,
3565 0,
3566 0,
3567 type,
3568 BRW_VERTICAL_STRIDE_0,
3569 BRW_WIDTH_1,
3570 BRW_HORIZONTAL_STRIDE_0,
3571 BRW_SWIZZLE_XYZW,
3572 WRITEMASK_XYZW);
3573 }
3574
3575 static brw_reg
brw_s0_with_region(enum brw_reg_type type,unsigned subnr,unsigned v,unsigned w,unsigned h)3576 brw_s0_with_region(enum brw_reg_type type, unsigned subnr, unsigned v, unsigned w, unsigned h)
3577 {
3578 return brw_make_reg(ARF,
3579 BRW_ARF_SCALAR,
3580 subnr,
3581 0,
3582 0,
3583 type,
3584 cvt(v),
3585 cvt(w)-1,
3586 cvt(h),
3587 BRW_SWIZZLE_XYZW,
3588 WRITEMASK_XYZW);
3589 }
3590
3591 static brw_reg
brw_grf(enum brw_reg_type type,unsigned nr,unsigned subnr,unsigned v,unsigned w,unsigned h)3592 brw_grf(enum brw_reg_type type, unsigned nr, unsigned subnr, unsigned v, unsigned w, unsigned h)
3593 {
3594 return brw_make_reg(FIXED_GRF,
3595 nr,
3596 subnr,
3597 0,
3598 0,
3599 type,
3600 cvt(v),
3601 cvt(w)-1,
3602 cvt(h),
3603 BRW_SWIZZLE_XYZW,
3604 WRITEMASK_XYZW);
3605 }
3606
TEST_P(validation_test,scalar_register_restrictions)3607 TEST_P(validation_test, scalar_register_restrictions)
3608 {
3609 /* Restrictions from BSpec 71168 (r55736). */
3610
3611 if (devinfo.ver < 30)
3612 return;
3613
3614 const brw_reg null_ud = retype(brw_null_reg(), BRW_TYPE_UD);
3615
3616 struct test {
3617 enum opcode opcode;
3618 unsigned exec_size;
3619 struct {
3620 enum brw_conditional_mod cmod;
3621 } opts;
3622 brw_reg dst, src0, src1;
3623 bool expected_result;
3624 };
3625
3626 static const struct test tests[] = {
3627 { BRW_OPCODE_MOV, 8, {}, brw_s0(BRW_TYPE_UD, 0), brw_grf(BRW_TYPE_UD, 1, 0, 1,1,0), {}, true },
3628
3629 /* When destination, opcode must be MOV. */
3630 { BRW_OPCODE_NOT, 8, {}, brw_s0(BRW_TYPE_UD, 0), brw_grf(BRW_TYPE_UD, 1, 0, 1,1,0), {}, false },
3631 { BRW_OPCODE_ADD, 8, {}, brw_s0(BRW_TYPE_UD, 0), brw_grf(BRW_TYPE_UD, 1, 0, 1,1,0), brw_imm_ud(1), false },
3632
3633 /* Source and destination types must match. */
3634 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UQ, 0), brw_imm_uq(0x000036161836341E), {}, true },
3635 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UQ, 0), brw_imm_ud(0x1836341E), {}, false },
3636 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UQ, 0), brw_imm_uw(0x341E), {}, false },
3637 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UD, 0), brw_imm_uq(0x000036161836341E), {}, false },
3638 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UD, 0), brw_imm_ud(0x1836341E), {}, true },
3639 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UD, 0), brw_imm_uw(0x341E), {}, false },
3640 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UW, 0), brw_imm_uq(0x000036161836341E), {}, false },
3641 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UW, 0), brw_imm_ud(0x1836341E), {}, false },
3642 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UW, 0), brw_imm_uw(0x341E), {}, true },
3643
3644 /* When destination, must be integers of size 16, 32 or 64. */
3645 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_B, 0), brw_grf(BRW_TYPE_B, 1, 0, 0,1,0), {}, false },
3646 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UB, 0), brw_grf(BRW_TYPE_UB, 1, 0, 0,1,0), {}, false },
3647 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_W, 0), brw_grf(BRW_TYPE_W, 1, 0, 0,1,0), {}, true },
3648 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UW, 0), brw_grf(BRW_TYPE_UW, 1, 0, 0,1,0), {}, true },
3649 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_D, 0), brw_grf(BRW_TYPE_D, 1, 0, 0,1,0), {}, true },
3650 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UD, 0), brw_grf(BRW_TYPE_UD, 1, 0, 0,1,0), {}, true },
3651 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_Q, 0), brw_grf(BRW_TYPE_Q, 1, 0, 0,1,0), {}, true },
3652 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UQ, 0), brw_grf(BRW_TYPE_UQ, 1, 0, 0,1,0), {}, true },
3653 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_F, 0), brw_grf(BRW_TYPE_F, 1, 0, 0,1,0), {}, false },
3654 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_HF, 0), brw_grf(BRW_TYPE_HF, 1, 0, 0,1,0), {}, false },
3655 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_DF, 0), brw_grf(BRW_TYPE_DF, 1, 0, 0,1,0), {}, false },
3656 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_V, 0), brw_grf(BRW_TYPE_V, 1, 0, 0,1,0), {}, false },
3657 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UV, 0), brw_grf(BRW_TYPE_UV, 1, 0, 0,1,0), {}, false },
3658 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_VF, 0), brw_grf(BRW_TYPE_VF, 1, 0, 0,1,0), {}, false },
3659
3660 /* When destination with immediate source, execution size must be 1. */
3661 { BRW_OPCODE_MOV, 8, {}, brw_s0(BRW_TYPE_UW, 0), brw_imm_uw(0x1234), {}, false },
3662
3663 /* When destination with with immediate source, conditional modifier cannot be used. */
3664 { BRW_OPCODE_MOV, 1, {.cmod = BRW_CONDITIONAL_Z}, brw_s0(BRW_TYPE_UW, 0), brw_imm_uw(0x341E), {}, false },
3665 { BRW_OPCODE_MOV, 1, {.cmod = BRW_CONDITIONAL_Z}, brw_s0(BRW_TYPE_UW, 0), brw_grf(BRW_TYPE_UW, 1, 0, 0,1,0), {}, true },
3666
3667 /* When source is scalar, destination must not be scalar. */
3668 { BRW_OPCODE_MOV, 1, {}, brw_s0(BRW_TYPE_UW, 0), brw_s0(BRW_TYPE_UW, 4), {}, false },
3669
3670 /* When source of MOV is scalar, it must be a broadcast. */
3671 { BRW_OPCODE_MOV, 8, {}, brw_grf(BRW_TYPE_UW, 1, 0, 1,1,0), brw_s0_with_region(BRW_TYPE_UW, 0, 0,1,0), {}, true },
3672 { BRW_OPCODE_MOV, 8, {}, brw_grf(BRW_TYPE_UW, 1, 0, 1,1,0), brw_s0_with_region(BRW_TYPE_UW, 0, 1,1,0), {}, false },
3673 { BRW_OPCODE_MOV, 8, {}, brw_grf(BRW_TYPE_UW, 1, 0, 1,1,0), brw_s0_with_region(BRW_TYPE_UW, 0, 8,8,1), {}, false },
3674
3675 /* When source 0 of SEND/SENDC is scalar, source 1 must be null. */
3676 { BRW_OPCODE_SEND, 16, {}, null_ud, brw_s0(BRW_TYPE_UD, 0), null_ud, true },
3677 { BRW_OPCODE_SENDC, 16, {}, null_ud, brw_s0(BRW_TYPE_UD, 0), null_ud, true },
3678 { BRW_OPCODE_SEND, 16, {}, null_ud, brw_s0(BRW_TYPE_UD, 0), brw_grf(BRW_TYPE_UD, 1, 0, 0,1,0), false },
3679 { BRW_OPCODE_SENDC, 16, {}, null_ud, brw_s0(BRW_TYPE_UD, 0), brw_grf(BRW_TYPE_UD, 1, 0, 0,1,0), false },
3680
3681 /* When source is a scalar register, it must be on source 0. */
3682 { BRW_OPCODE_SEND, 16, {}, null_ud, brw_grf(BRW_TYPE_UD, 0, 0, 0,1,0), brw_grf(BRW_TYPE_UD, 2, 0, 0,1,0), true },
3683 { BRW_OPCODE_SEND, 16, {}, null_ud, brw_grf(BRW_TYPE_UD, 0, 0, 0,1,0), brw_s0(BRW_TYPE_UD, 0), false },
3684 };
3685
3686 for (unsigned i = 0; i < ARRAY_SIZE(tests); i++) {
3687 const struct test &t = tests[i];
3688
3689 switch (tests[i].opcode) {
3690 case BRW_OPCODE_ADD:
3691 brw_ADD(p, t.dst, t.src0, t.src1);
3692 break;
3693 case BRW_OPCODE_NOT:
3694 brw_NOT(p, t.dst, t.src0);
3695 break;
3696 case BRW_OPCODE_MOV:
3697 brw_MOV(p, t.dst, t.src0);
3698 break;
3699 case BRW_OPCODE_SEND:
3700 case BRW_OPCODE_SENDC: {
3701 brw_eu_inst *send = brw_next_insn(p, tests[i].opcode);
3702 brw_set_dest(p, send, t.dst);
3703 brw_set_src0(p, send, t.src0);
3704 brw_set_src1(p, send, t.src1);
3705 break;
3706 }
3707 default:
3708 unreachable("unexpected opcode in tests");
3709 }
3710
3711 brw_eu_inst_set_exec_size(&devinfo, last_inst, cvt(t.exec_size) - 1);
3712 brw_eu_inst_set_cond_modifier(&devinfo, last_inst, t.opts.cmod);
3713
3714 EXPECT_EQ(t.expected_result, validate(p)) <<
3715 "test vector index = " << i;
3716
3717 clear_instructions(p);
3718 }
3719 }
3720