• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <gtest/gtest.h>
25 #include "elk_disasm_info.h"
26 #include "elk_eu.h"
27 #include "elk_eu_defines.h"
28 #include "util/bitset.h"
29 #include "util/ralloc.h"
30 
31 static const struct intel_gfx_info {
32    const char *name;
33 } gfx_names[] = {
34    { "brw", },
35    { "g4x", },
36    { "ilk", },
37    { "snb", },
38    { "ivb", },
39    { "hsw", },
40    { "byt", },
41    { "bdw", },
42    { "chv", },
43 };
44 
45 class validation_test: public ::testing::TestWithParam<struct intel_gfx_info> {
46    virtual void SetUp();
47 
48 public:
49    validation_test();
50    virtual ~validation_test();
51 
52    struct elk_isa_info isa;
53    struct elk_codegen *p;
54    struct intel_device_info devinfo;
55 };
56 
validation_test()57 validation_test::validation_test()
58 {
59    p = rzalloc(NULL, struct elk_codegen);
60    memset(&devinfo, 0, sizeof(devinfo));
61 }
62 
~validation_test()63 validation_test::~validation_test()
64 {
65    ralloc_free(p);
66 }
67 
SetUp()68 void validation_test::SetUp()
69 {
70    struct intel_gfx_info info = GetParam();
71    int devid = intel_device_name_to_pci_device_id(info.name);
72 
73    intel_get_device_info_from_pci_id(devid, &devinfo);
74 
75    elk_init_isa_info(&isa, &devinfo);
76 
77    elk_init_codegen(&isa, p, p);
78 }
79 
80 struct gfx_name {
81    template <class ParamType>
82    std::string
operator ()gfx_name83    operator()(const ::testing::TestParamInfo<ParamType>& info) const {
84       return info.param.name;
85    }
86 };
87 
88 INSTANTIATE_TEST_SUITE_P(
89    eu_assembly, validation_test,
90    ::testing::ValuesIn(gfx_names),
91    gfx_name()
92 );
93 
94 static bool
validate(struct elk_codegen * p)95 validate(struct elk_codegen *p)
96 {
97    const bool print = getenv("TEST_DEBUG");
98    struct elk_disasm_info *disasm = elk_disasm_initialize(p->isa, NULL);
99 
100    if (print) {
101       elk_disasm_new_inst_group(disasm, 0);
102       elk_disasm_new_inst_group(disasm, p->next_insn_offset);
103    }
104 
105    bool ret = elk_validate_instructions(p->isa, p->store, 0,
106                                         p->next_insn_offset, disasm);
107 
108    if (print) {
109       elk_dump_assembly(p->store, 0, p->next_insn_offset, disasm, NULL);
110    }
111    ralloc_free(disasm);
112 
113    return ret;
114 }
115 
116 #define last_inst    (&p->store[p->nr_insn - 1])
117 #define g0           elk_vec8_grf(0, 0)
118 #define acc0         elk_acc_reg(8)
119 #define null         elk_null_reg()
120 #define zero         elk_imm_f(0.0f)
121 
122 static void
clear_instructions(struct elk_codegen * p)123 clear_instructions(struct elk_codegen *p)
124 {
125    p->next_insn_offset = 0;
126    p->nr_insn = 0;
127 }
128 
TEST_P(validation_test,sanity)129 TEST_P(validation_test, sanity)
130 {
131    elk_ADD(p, g0, g0, g0);
132 
133    EXPECT_TRUE(validate(p));
134 }
135 
TEST_P(validation_test,src0_null_reg)136 TEST_P(validation_test, src0_null_reg)
137 {
138    elk_MOV(p, g0, null);
139 
140    EXPECT_FALSE(validate(p));
141 }
142 
TEST_P(validation_test,src1_null_reg)143 TEST_P(validation_test, src1_null_reg)
144 {
145    elk_ADD(p, g0, g0, null);
146 
147    EXPECT_FALSE(validate(p));
148 }
149 
TEST_P(validation_test,math_src0_null_reg)150 TEST_P(validation_test, math_src0_null_reg)
151 {
152    if (devinfo.ver >= 6) {
153       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, null, null);
154    } else {
155       elk_gfx4_math(p, g0, ELK_MATH_FUNCTION_SIN, 0, null, ELK_MATH_PRECISION_FULL);
156    }
157 
158    EXPECT_FALSE(validate(p));
159 }
160 
TEST_P(validation_test,math_src1_null_reg)161 TEST_P(validation_test, math_src1_null_reg)
162 {
163    if (devinfo.ver >= 6) {
164       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_POW, g0, null);
165       EXPECT_FALSE(validate(p));
166    } else {
167       /* Math instructions on Gfx4/5 are actually SEND messages with payloads.
168        * src1 is an immediate message descriptor set by elk_gfx4_math.
169        */
170    }
171 }
172 
TEST_P(validation_test,opcode46)173 TEST_P(validation_test, opcode46)
174 {
175    /* opcode 46 is "push" on Gen 4 and 5
176     *              "fork" on Gen 6
177     *              reserved on Gen 7
178     *              "goto" on Gfx8+
179     */
180    elk_next_insn(p, elk_opcode_decode(&isa, 46));
181 
182    if (devinfo.ver == 7) {
183       EXPECT_FALSE(validate(p));
184    } else {
185       EXPECT_TRUE(validate(p));
186    }
187 }
188 
TEST_P(validation_test,invalid_exec_size_encoding)189 TEST_P(validation_test, invalid_exec_size_encoding)
190 {
191    const struct {
192       enum elk_execution_size exec_size;
193       bool expected_result;
194    } test_case[] = {
195       { ELK_EXECUTE_1,      true  },
196       { ELK_EXECUTE_2,      true  },
197       { ELK_EXECUTE_4,      true  },
198       { ELK_EXECUTE_8,      true  },
199       { ELK_EXECUTE_16,     true  },
200       { ELK_EXECUTE_32,     true  },
201 
202       { (enum elk_execution_size)((int)ELK_EXECUTE_32 + 1), false },
203       { (enum elk_execution_size)((int)ELK_EXECUTE_32 + 2), false },
204    };
205 
206    for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
207       elk_MOV(p, g0, g0);
208 
209       elk_inst_set_exec_size(&devinfo, last_inst, test_case[i].exec_size);
210       elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
211       elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
212 
213       if (test_case[i].exec_size == ELK_EXECUTE_1) {
214          elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
215          elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
216          elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
217       } else {
218          elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_2);
219          elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
220          elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
221       }
222 
223       EXPECT_EQ(test_case[i].expected_result, validate(p));
224 
225       clear_instructions(p);
226    }
227 }
228 
TEST_P(validation_test,invalid_file_encoding)229 TEST_P(validation_test, invalid_file_encoding)
230 {
231    elk_MOV(p, g0, g0);
232    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_MESSAGE_REGISTER_FILE, ELK_REGISTER_TYPE_F);
233 
234    if (devinfo.ver > 6) {
235       EXPECT_FALSE(validate(p));
236    } else {
237       EXPECT_TRUE(validate(p));
238    }
239 
240    clear_instructions(p);
241 
242    if (devinfo.ver < 6) {
243       elk_gfx4_math(p, g0, ELK_MATH_FUNCTION_SIN, 0, g0, ELK_MATH_PRECISION_FULL);
244    } else {
245       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, g0, null);
246    }
247    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_MESSAGE_REGISTER_FILE, ELK_REGISTER_TYPE_F);
248 
249    if (devinfo.ver > 6) {
250       EXPECT_FALSE(validate(p));
251    } else {
252       EXPECT_TRUE(validate(p));
253    }
254 }
255 
TEST_P(validation_test,invalid_type_encoding)256 TEST_P(validation_test, invalid_type_encoding)
257 {
258    enum elk_reg_file files[2] = {
259       ELK_GENERAL_REGISTER_FILE,
260       ELK_IMMEDIATE_VALUE,
261    };
262 
263    for (unsigned i = 0; i < ARRAY_SIZE(files); i++) {
264       const enum elk_reg_file file = files[i];
265       const int num_bits = devinfo.ver >= 8 ? 4 : 3;
266       const int num_encodings = 1 << num_bits;
267 
268       /* The data types are encoded into <num_bits> bits to be used in hardware
269        * instructions, so keep a record in a bitset the invalid patterns so
270        * they can be verified to be invalid when used.
271        */
272       const int max_bits = 4;
273       assert(max_bits >= num_bits);
274       BITSET_DECLARE(invalid_encodings, 1 << max_bits);
275 
276       const struct {
277          enum elk_reg_type type;
278          bool expected_result;
279       } test_case[] = {
280          { ELK_REGISTER_TYPE_NF, devinfo.ver == 11 && file != IMM },
281          { ELK_REGISTER_TYPE_DF, devinfo.has_64bit_float && (devinfo.ver >= 8 || file != IMM) },
282          { ELK_REGISTER_TYPE_F,  true },
283          { ELK_REGISTER_TYPE_HF, devinfo.ver >= 8 },
284          { ELK_REGISTER_TYPE_VF, file == IMM },
285          { ELK_REGISTER_TYPE_Q,  devinfo.has_64bit_int },
286          { ELK_REGISTER_TYPE_UQ, devinfo.has_64bit_int },
287          { ELK_REGISTER_TYPE_D,  true },
288          { ELK_REGISTER_TYPE_UD, true },
289          { ELK_REGISTER_TYPE_W,  true },
290          { ELK_REGISTER_TYPE_UW, true },
291          { ELK_REGISTER_TYPE_B,  file == FIXED_GRF },
292          { ELK_REGISTER_TYPE_UB, file == FIXED_GRF },
293          { ELK_REGISTER_TYPE_V,  file == IMM },
294          { ELK_REGISTER_TYPE_UV, devinfo.ver >= 6 && file == IMM },
295       };
296 
297       /* Initially assume all hardware encodings are invalid */
298       BITSET_ONES(invalid_encodings);
299 
300       elk_set_default_exec_size(p, ELK_EXECUTE_4);
301 
302       for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
303          if (test_case[i].expected_result) {
304             unsigned hw_type = elk_reg_type_to_hw_type(&devinfo, file, test_case[i].type);
305             if (hw_type != INVALID_REG_TYPE) {
306                /* ... and remove valid encodings from the set */
307                assert(BITSET_TEST(invalid_encodings, hw_type));
308                BITSET_CLEAR(invalid_encodings, hw_type);
309             }
310 
311             if (file == FIXED_GRF) {
312                struct elk_reg g = retype(g0, test_case[i].type);
313                elk_MOV(p, g, g);
314                elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
315                elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
316                elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
317             } else {
318                enum elk_reg_type t;
319 
320                switch (test_case[i].type) {
321                case ELK_REGISTER_TYPE_V:
322                   t = ELK_REGISTER_TYPE_W;
323                   break;
324                case ELK_REGISTER_TYPE_UV:
325                   t = ELK_REGISTER_TYPE_UW;
326                   break;
327                case ELK_REGISTER_TYPE_VF:
328                   t = ELK_REGISTER_TYPE_F;
329                   break;
330                default:
331                   t = test_case[i].type;
332                   break;
333                }
334 
335                struct elk_reg g = retype(g0, t);
336                elk_MOV(p, g, retype(elk_imm_w(0), test_case[i].type));
337             }
338 
339             EXPECT_TRUE(validate(p));
340 
341             clear_instructions(p);
342          }
343       }
344 
345       /* The remaining encodings in invalid_encodings do not have a mapping
346        * from ELK_REGISTER_TYPE_* and must be invalid. Verify that invalid
347        * encodings are rejected by the validator.
348        */
349       int e;
350       BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
351          if (file == FIXED_GRF) {
352             elk_MOV(p, g0, g0);
353             elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
354             elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
355             elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
356          } else {
357             elk_MOV(p, g0, elk_imm_w(0));
358          }
359          elk_inst_set_dst_reg_hw_type(&devinfo, last_inst, e);
360          elk_inst_set_src0_reg_hw_type(&devinfo, last_inst, e);
361 
362          EXPECT_FALSE(validate(p));
363 
364          clear_instructions(p);
365       }
366    }
367 }
368 
TEST_P(validation_test,invalid_type_encoding_3src_a16)369 TEST_P(validation_test, invalid_type_encoding_3src_a16)
370 {
371    /* 3-src instructions in align16 mode only supported on Gfx6-10 */
372    if (devinfo.ver < 6)
373       return;
374 
375    const int num_bits = devinfo.ver >= 8 ? 3 : 2;
376    const int num_encodings = 1 << num_bits;
377 
378    /* The data types are encoded into <num_bits> bits to be used in hardware
379     * instructions, so keep a record in a bitset the invalid patterns so
380     * they can be verified to be invalid when used.
381     */
382    const int max_bits = 3;
383    assert(max_bits >= num_bits);
384    BITSET_DECLARE(invalid_encodings, 1 << max_bits);
385 
386    const struct {
387       enum elk_reg_type type;
388       bool expected_result;
389    } test_case[] = {
390       { ELK_REGISTER_TYPE_DF, devinfo.ver >= 7  },
391       { ELK_REGISTER_TYPE_F,  true },
392       { ELK_REGISTER_TYPE_HF, devinfo.ver >= 8  },
393       { ELK_REGISTER_TYPE_D,  devinfo.ver >= 7  },
394       { ELK_REGISTER_TYPE_UD, devinfo.ver >= 7  },
395    };
396 
397    /* Initially assume all hardware encodings are invalid */
398    BITSET_ONES(invalid_encodings);
399 
400    elk_set_default_access_mode(p, ELK_ALIGN_16);
401    elk_set_default_exec_size(p, ELK_EXECUTE_4);
402 
403    for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
404       if (test_case[i].expected_result) {
405          unsigned hw_type = elk_reg_type_to_a16_hw_3src_type(&devinfo, test_case[i].type);
406          if (hw_type != INVALID_HW_REG_TYPE) {
407             /* ... and remove valid encodings from the set */
408             assert(BITSET_TEST(invalid_encodings, hw_type));
409             BITSET_CLEAR(invalid_encodings, hw_type);
410          }
411 
412          struct elk_reg g = retype(g0, test_case[i].type);
413          if (!elk_reg_type_is_integer(test_case[i].type)) {
414             elk_MAD(p, g, g, g, g);
415          } else {
416             elk_BFE(p, g, g, g, g);
417          }
418 
419          EXPECT_TRUE(validate(p));
420 
421          clear_instructions(p);
422       }
423    }
424 
425    /* The remaining encodings in invalid_encodings do not have a mapping
426     * from ELK_REGISTER_TYPE_* and must be invalid. Verify that invalid
427     * encodings are rejected by the validator.
428     */
429    int e;
430    BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
431       for (unsigned i = 0; i < 2; i++) {
432          if (i == 0) {
433             elk_MAD(p, g0, g0, g0, g0);
434          } else {
435             elk_BFE(p, g0, g0, g0, g0);
436          }
437 
438          elk_inst_set_3src_a16_dst_hw_type(&devinfo, last_inst, e);
439          elk_inst_set_3src_a16_src_hw_type(&devinfo, last_inst, e);
440 
441          EXPECT_FALSE(validate(p));
442 
443          clear_instructions(p);
444 
445          if (devinfo.ver == 6)
446             break;
447       }
448    }
449 }
450 
451 TEST_P(validation_test, 3src_inst_access_mode)
452 {
453    /* 3-src instructions only supported on Gfx6+ */
454    if (devinfo.ver < 6)
455       return;
456 
457    const struct {
458       unsigned mode;
459       bool expected_result;
460    } test_case[] = {
461       { ELK_ALIGN_1,  false},
462       { ELK_ALIGN_16, true },
463    };
464 
465    for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
466       elk_set_default_access_mode(p, ELK_ALIGN_16);
467 
468       elk_MAD(p, g0, g0, g0, g0);
469       elk_inst_set_access_mode(&devinfo, last_inst, test_case[i].mode);
470 
471       EXPECT_EQ(test_case[i].expected_result, validate(p));
472 
473       clear_instructions(p);
474    }
475 }
476 
477 /* When the Execution Data Type is wider than the destination data type, the
478  * destination must [...] specify a HorzStride equal to the ratio in sizes of
479  * the two data types.
480  */
TEST_P(validation_test,dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)481 TEST_P(validation_test, dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)
482 {
483    elk_ADD(p, g0, g0, g0);
484    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
485    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
486    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
487 
488    EXPECT_FALSE(validate(p));
489 
490    clear_instructions(p);
491 
492    elk_ADD(p, g0, g0, g0);
493    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
494    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
495    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
496    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
497 
498    EXPECT_TRUE(validate(p));
499 }
500 
501 /* When the Execution Data Type is wider than the destination data type, the
502  * destination must be aligned as required by the wider execution data type
503  * [...]
504  */
TEST_P(validation_test,dst_subreg_must_be_aligned_to_exec_type_size)505 TEST_P(validation_test, dst_subreg_must_be_aligned_to_exec_type_size)
506 {
507    elk_ADD(p, g0, g0, g0);
508    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 2);
509    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
510    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
511    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
512    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
513 
514    EXPECT_FALSE(validate(p));
515 
516    clear_instructions(p);
517 
518    elk_ADD(p, g0, g0, g0);
519    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
520    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 8);
521    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
522    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
523    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
524    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
525    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
526    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
527    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
528    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
529    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
530    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
531 
532    EXPECT_TRUE(validate(p));
533 }
534 
535 /* ExecSize must be greater than or equal to Width. */
TEST_P(validation_test,exec_size_less_than_width)536 TEST_P(validation_test, exec_size_less_than_width)
537 {
538    elk_ADD(p, g0, g0, g0);
539    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_16);
540 
541    EXPECT_FALSE(validate(p));
542 
543    clear_instructions(p);
544 
545    elk_ADD(p, g0, g0, g0);
546    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_16);
547 
548    EXPECT_FALSE(validate(p));
549 }
550 
551 /* If ExecSize = Width and HorzStride ≠ 0,
552  * VertStride must be set to Width * HorzStride.
553  */
TEST_P(validation_test,vertical_stride_is_width_by_horizontal_stride)554 TEST_P(validation_test, vertical_stride_is_width_by_horizontal_stride)
555 {
556    elk_ADD(p, g0, g0, g0);
557    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
558 
559    EXPECT_FALSE(validate(p));
560 
561    clear_instructions(p);
562 
563    elk_ADD(p, g0, g0, g0);
564    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
565 
566    EXPECT_FALSE(validate(p));
567 }
568 
569 /* If Width = 1, HorzStride must be 0 regardless of the values
570  * of ExecSize and VertStride.
571  */
TEST_P(validation_test,horizontal_stride_must_be_0_if_width_is_1)572 TEST_P(validation_test, horizontal_stride_must_be_0_if_width_is_1)
573 {
574    elk_ADD(p, g0, g0, g0);
575    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
576    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
577    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
578 
579    EXPECT_FALSE(validate(p));
580 
581    clear_instructions(p);
582 
583    elk_ADD(p, g0, g0, g0);
584    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
585    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_1);
586    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
587 
588    EXPECT_FALSE(validate(p));
589 }
590 
591 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
TEST_P(validation_test,scalar_region_must_be_0_1_0)592 TEST_P(validation_test, scalar_region_must_be_0_1_0)
593 {
594    struct elk_reg g0_0 = elk_vec1_grf(0, 0);
595 
596    elk_ADD(p, g0, g0, g0_0);
597    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_1);
598    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_1);
599    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
600    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
601 
602    EXPECT_FALSE(validate(p));
603 
604    clear_instructions(p);
605 
606    elk_ADD(p, g0, g0_0, g0);
607    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_1);
608    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_1);
609    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_1);
610    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
611 
612    EXPECT_FALSE(validate(p));
613 }
614 
615 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the value
616  * of ExecSize.
617  */
TEST_P(validation_test,zero_stride_implies_0_1_0)618 TEST_P(validation_test, zero_stride_implies_0_1_0)
619 {
620    elk_ADD(p, g0, g0, g0);
621    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
622    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
623    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
624 
625    EXPECT_FALSE(validate(p));
626 
627    clear_instructions(p);
628 
629    elk_ADD(p, g0, g0, g0);
630    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
631    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_2);
632    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
633 
634    EXPECT_FALSE(validate(p));
635 }
636 
637 /* Dst.HorzStride must not be 0. */
TEST_P(validation_test,dst_horizontal_stride_0)638 TEST_P(validation_test, dst_horizontal_stride_0)
639 {
640    elk_ADD(p, g0, g0, g0);
641    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
642 
643    EXPECT_FALSE(validate(p));
644 
645    clear_instructions(p);
646 
647    elk_set_default_access_mode(p, ELK_ALIGN_16);
648 
649    elk_ADD(p, g0, g0, g0);
650    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
651 
652    EXPECT_FALSE(validate(p));
653 }
654 
655 /* VertStride must be used to cross ELK_GENERAL_REGISTER_FILE register boundaries. This rule implies
656  * that elements within a 'Width' cannot cross ELK_GENERAL_REGISTER_FILE boundaries.
657  */
TEST_P(validation_test,must_not_cross_grf_boundary_in_a_width)658 TEST_P(validation_test, must_not_cross_grf_boundary_in_a_width)
659 {
660    elk_ADD(p, g0, g0, g0);
661    elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 4);
662 
663    EXPECT_FALSE(validate(p));
664 
665    clear_instructions(p);
666 
667    elk_ADD(p, g0, g0, g0);
668    elk_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 4);
669 
670    EXPECT_FALSE(validate(p));
671 
672    clear_instructions(p);
673 
674    elk_ADD(p, g0, g0, g0);
675    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
676    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
677    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
678 
679    EXPECT_FALSE(validate(p));
680 
681    clear_instructions(p);
682 
683    elk_ADD(p, g0, g0, g0);
684    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
685    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
686    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
687 
688    EXPECT_FALSE(validate(p));
689 }
690 
691 /* Destination Horizontal must be 1 in Align16 */
TEST_P(validation_test,dst_hstride_on_align16_must_be_1)692 TEST_P(validation_test, dst_hstride_on_align16_must_be_1)
693 {
694    elk_set_default_access_mode(p, ELK_ALIGN_16);
695 
696    elk_ADD(p, g0, g0, g0);
697    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
698 
699    EXPECT_FALSE(validate(p));
700 
701    clear_instructions(p);
702 
703    elk_ADD(p, g0, g0, g0);
704    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
705 
706    EXPECT_TRUE(validate(p));
707 }
708 
709 /* VertStride must be 0 or 4 in Align16 */
TEST_P(validation_test,vstride_on_align16_must_be_0_or_4)710 TEST_P(validation_test, vstride_on_align16_must_be_0_or_4)
711 {
712    const struct {
713       enum elk_vertical_stride vstride;
714       bool expected_result;
715    } vstride[] = {
716       { ELK_VERTICAL_STRIDE_0, true },
717       { ELK_VERTICAL_STRIDE_1, false },
718       { ELK_VERTICAL_STRIDE_2, devinfo.verx10 >= 75 },
719       { ELK_VERTICAL_STRIDE_4, true },
720       { ELK_VERTICAL_STRIDE_8, false },
721       { ELK_VERTICAL_STRIDE_16, false },
722       { ELK_VERTICAL_STRIDE_32, false },
723       { ELK_VERTICAL_STRIDE_ONE_DIMENSIONAL, false },
724    };
725 
726    elk_set_default_access_mode(p, ELK_ALIGN_16);
727 
728    for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
729       elk_ADD(p, g0, g0, g0);
730       elk_inst_set_src0_vstride(&devinfo, last_inst, vstride[i].vstride);
731 
732       EXPECT_EQ(vstride[i].expected_result, validate(p));
733 
734       clear_instructions(p);
735    }
736 
737    for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
738       elk_ADD(p, g0, g0, g0);
739       elk_inst_set_src1_vstride(&devinfo, last_inst, vstride[i].vstride);
740 
741       EXPECT_EQ(vstride[i].expected_result, validate(p));
742 
743       clear_instructions(p);
744    }
745 }
746 
747 /* In Direct Addressing mode, a source cannot span more than 2 adjacent ELK_GENERAL_REGISTER_FILE
748  * registers.
749  */
TEST_P(validation_test,source_cannot_span_more_than_2_registers)750 TEST_P(validation_test, source_cannot_span_more_than_2_registers)
751 {
752    elk_ADD(p, g0, g0, g0);
753    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_32);
754    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
755    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
756    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
757    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
758    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_8);
759    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
760 
761    EXPECT_FALSE(validate(p));
762 
763    clear_instructions(p);
764 
765    elk_ADD(p, g0, g0, g0);
766    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
767    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
768    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
769    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
770    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
771    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_8);
772    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
773    elk_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 2);
774 
775    EXPECT_TRUE(validate(p));
776 
777    clear_instructions(p);
778 
779    elk_ADD(p, g0, g0, g0);
780    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
781 
782    EXPECT_TRUE(validate(p));
783 }
784 
785 /* A destination cannot span more than 2 adjacent ELK_GENERAL_REGISTER_FILE registers. */
TEST_P(validation_test,destination_cannot_span_more_than_2_registers)786 TEST_P(validation_test, destination_cannot_span_more_than_2_registers)
787 {
788    elk_ADD(p, g0, g0, g0);
789    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_32);
790    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
791    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
792    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
793    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
794 
795    EXPECT_FALSE(validate(p));
796 
797    clear_instructions(p);
798 
799    elk_ADD(p, g0, g0, g0);
800    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_8);
801    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 6);
802    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_4);
803    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
804    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
805    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
806    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
807    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
808    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
809    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
810    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
811    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
812 
813    EXPECT_TRUE(validate(p));
814 }
815 
TEST_P(validation_test,src_region_spans_two_regs_dst_region_spans_one)816 TEST_P(validation_test, src_region_spans_two_regs_dst_region_spans_one)
817 {
818    /* Writes to dest are to the lower OWord */
819    elk_ADD(p, g0, g0, g0);
820    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
821    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
822    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
823    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
824    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
825    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
826 
827    EXPECT_TRUE(validate(p));
828 
829    clear_instructions(p);
830 
831    /* Writes to dest are to the upper OWord */
832    elk_ADD(p, g0, g0, g0);
833    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
834    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
835    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
836    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
837    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
838    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
839    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
840 
841    EXPECT_TRUE(validate(p));
842 
843    clear_instructions(p);
844 
845    /* Writes to dest are evenly split between OWords */
846    elk_ADD(p, g0, g0, g0);
847    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
848    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
849    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
850    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
851    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
852    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_8);
853    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
854 
855    EXPECT_TRUE(validate(p));
856 
857    clear_instructions(p);
858 
859    /* Writes to dest are uneven between OWords */
860    elk_ADD(p, g0, g0, g0);
861    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
862    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 10);
863    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
864    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
865    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
866    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
867    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
868    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
869    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
870    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_2);
871    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
872 
873    EXPECT_FALSE(validate(p));
874 }
875 
TEST_P(validation_test,dst_elements_must_be_evenly_split_between_registers)876 TEST_P(validation_test, dst_elements_must_be_evenly_split_between_registers)
877 {
878    elk_ADD(p, g0, g0, g0);
879    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
880 
881    EXPECT_FALSE(validate(p));
882 
883    clear_instructions(p);
884 
885    elk_ADD(p, g0, g0, g0);
886    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
887 
888    EXPECT_TRUE(validate(p));
889 
890    clear_instructions(p);
891 
892    if (devinfo.ver >= 6) {
893       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, g0, null);
894 
895       EXPECT_TRUE(validate(p));
896 
897       clear_instructions(p);
898 
899       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, g0, null);
900       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
901 
902       EXPECT_FALSE(validate(p));
903    }
904 }
905 
TEST_P(validation_test,two_src_two_dst_source_offsets_must_be_same)906 TEST_P(validation_test, two_src_two_dst_source_offsets_must_be_same)
907 {
908    elk_ADD(p, g0, g0, g0);
909    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
910    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_4);
911    elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 16);
912    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_2);
913    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
914    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
915    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
916    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
917    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
918 
919   if (devinfo.ver <= 7) {
920       EXPECT_FALSE(validate(p));
921    } else {
922       EXPECT_TRUE(validate(p));
923    }
924 
925    clear_instructions(p);
926 
927    elk_ADD(p, g0, g0, g0);
928    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
929    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_4);
930    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
931    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
932    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
933    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_8);
934    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_2);
935    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
936 
937    EXPECT_TRUE(validate(p));
938 }
939 
TEST_P(validation_test,two_src_two_dst_each_dst_must_be_derived_from_one_src)940 TEST_P(validation_test, two_src_two_dst_each_dst_must_be_derived_from_one_src)
941 {
942    elk_MOV(p, g0, g0);
943    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
944    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
945    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
946    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
947    elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
948    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
949    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
950    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
951 
952    if (devinfo.ver <= 7) {
953       EXPECT_FALSE(validate(p));
954    } else {
955       EXPECT_TRUE(validate(p));
956    }
957 
958    clear_instructions(p);
959 
960    elk_MOV(p, g0, g0);
961    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
962    elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
963    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_2);
964    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
965    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
966 
967    if (devinfo.ver <= 7) {
968       EXPECT_FALSE(validate(p));
969    } else {
970       EXPECT_TRUE(validate(p));
971    }
972 }
973 
TEST_P(validation_test,one_src_two_dst)974 TEST_P(validation_test, one_src_two_dst)
975 {
976    struct elk_reg g0_0 = elk_vec1_grf(0, 0);
977 
978    elk_ADD(p, g0, g0_0, g0_0);
979    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
980 
981    EXPECT_TRUE(validate(p));
982 
983    clear_instructions(p);
984 
985    elk_ADD(p, g0, g0, g0);
986    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
987    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
988    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
989    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
990 
991    EXPECT_TRUE(validate(p));
992 
993    clear_instructions(p);
994 
995    elk_ADD(p, g0, g0, g0);
996    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
997    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
998    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
999    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1000 
1001    if (devinfo.ver >= 8) {
1002       EXPECT_TRUE(validate(p));
1003    } else {
1004       EXPECT_FALSE(validate(p));
1005    }
1006 
1007    clear_instructions(p);
1008 
1009    elk_ADD(p, g0, g0, g0);
1010    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
1011    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
1012    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1013    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1014 
1015    if (devinfo.ver >= 8) {
1016       EXPECT_TRUE(validate(p));
1017    } else {
1018       EXPECT_FALSE(validate(p));
1019    }
1020 
1021    clear_instructions(p);
1022 
1023    elk_ADD(p, g0, g0, g0);
1024    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
1025    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1026    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1027    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1028    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1029    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
1030    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_1);
1031    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
1032 
1033    if (devinfo.ver >= 8) {
1034       EXPECT_TRUE(validate(p));
1035    } else {
1036       EXPECT_FALSE(validate(p));
1037    }
1038 
1039    clear_instructions(p);
1040 
1041    elk_ADD(p, g0, g0, g0);
1042    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
1043    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1044    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1045    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1046    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
1047    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
1048    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
1049    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1050 
1051    if (devinfo.ver >= 8) {
1052       EXPECT_TRUE(validate(p));
1053    } else {
1054       EXPECT_FALSE(validate(p));
1055    }
1056 }
1057 
TEST_P(validation_test,packed_byte_destination)1058 TEST_P(validation_test, packed_byte_destination)
1059 {
1060    static const struct {
1061       enum elk_reg_type dst_type;
1062       enum elk_reg_type src_type;
1063       bool neg, abs, sat;
1064       bool expected_result;
1065    } move[] = {
1066       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 0, 0, 0, true },
1067       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 0, 0, 0, true },
1068       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 0, 0, 0, true },
1069       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 0, 0, 0, true },
1070 
1071       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 1, 0, 0, false },
1072       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 1, 0, 0, false },
1073       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 1, 0, 0, false },
1074       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 1, 0, 0, false },
1075 
1076       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 0, 1, 0, false },
1077       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 0, 1, 0, false },
1078       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 0, 1, 0, false },
1079       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 0, 1, 0, false },
1080 
1081       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 0, 0, 1, false },
1082       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 0, 0, 1, false },
1083       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 0, 0, 1, false },
1084       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 0, 0, 1, false },
1085 
1086       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UW, 0, 0, 0, false },
1087       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_W , 0, 0, 0, false },
1088       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UD, 0, 0, 0, false },
1089       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_D , 0, 0, 0, false },
1090    };
1091 
1092    for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
1093       elk_MOV(p, retype(g0, move[i].dst_type), retype(g0, move[i].src_type));
1094       elk_inst_set_src0_negate(&devinfo, last_inst, move[i].neg);
1095       elk_inst_set_src0_abs(&devinfo, last_inst, move[i].abs);
1096       elk_inst_set_saturate(&devinfo, last_inst, move[i].sat);
1097 
1098       EXPECT_EQ(move[i].expected_result, validate(p));
1099 
1100       clear_instructions(p);
1101    }
1102 
1103    elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_UB),
1104               retype(g0, ELK_REGISTER_TYPE_UB),
1105               retype(g0, ELK_REGISTER_TYPE_UB));
1106    elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1107 
1108    EXPECT_FALSE(validate(p));
1109 
1110    clear_instructions(p);
1111 
1112    elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_B),
1113               retype(g0, ELK_REGISTER_TYPE_B),
1114               retype(g0, ELK_REGISTER_TYPE_B));
1115    elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1116 
1117    EXPECT_FALSE(validate(p));
1118 }
1119 
TEST_P(validation_test,byte_destination_relaxed_alignment)1120 TEST_P(validation_test, byte_destination_relaxed_alignment)
1121 {
1122    elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_B),
1123               retype(g0, ELK_REGISTER_TYPE_W),
1124               retype(g0, ELK_REGISTER_TYPE_W));
1125    elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1126    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1127 
1128    EXPECT_TRUE(validate(p));
1129 
1130    clear_instructions(p);
1131 
1132    elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_B),
1133               retype(g0, ELK_REGISTER_TYPE_W),
1134               retype(g0, ELK_REGISTER_TYPE_W));
1135    elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1136    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1137    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 1);
1138 
1139    if (devinfo.verx10 >= 45) {
1140       EXPECT_TRUE(validate(p));
1141    } else {
1142       EXPECT_FALSE(validate(p));
1143    }
1144 }
1145 
TEST_P(validation_test,byte_64bit_conversion)1146 TEST_P(validation_test, byte_64bit_conversion)
1147 {
1148    static const struct {
1149       enum elk_reg_type dst_type;
1150       enum elk_reg_type src_type;
1151       unsigned dst_stride;
1152       bool expected_result;
1153    } inst[] = {
1154 #define INST(dst_type, src_type, dst_stride, expected_result)             \
1155       {                                                                   \
1156          ELK_REGISTER_TYPE_##dst_type,                                    \
1157          ELK_REGISTER_TYPE_##src_type,                                    \
1158          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1159          expected_result,                                                 \
1160       }
1161 
1162       INST(B,   Q, 1, false),
1163       INST(B,  UQ, 1, false),
1164       INST(B,  DF, 1, false),
1165       INST(UB,  Q, 1, false),
1166       INST(UB, UQ, 1, false),
1167       INST(UB, DF, 1, false),
1168 
1169       INST(B,   Q, 2, false),
1170       INST(B,  UQ, 2, false),
1171       INST(B , DF, 2, false),
1172       INST(UB,  Q, 2, false),
1173       INST(UB, UQ, 2, false),
1174       INST(UB, DF, 2, false),
1175 
1176       INST(B,   Q, 4, false),
1177       INST(B,  UQ, 4, false),
1178       INST(B,  DF, 4, false),
1179       INST(UB,  Q, 4, false),
1180       INST(UB, UQ, 4, false),
1181       INST(UB, DF, 4, false),
1182 
1183 #undef INST
1184    };
1185 
1186    if (devinfo.ver < 8)
1187       return;
1188 
1189    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1190       if (!devinfo.has_64bit_float &&
1191           inst[i].src_type == ELK_REGISTER_TYPE_DF)
1192          continue;
1193 
1194       if (!devinfo.has_64bit_int &&
1195           (inst[i].src_type == ELK_REGISTER_TYPE_Q ||
1196            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
1197          continue;
1198 
1199       elk_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1200       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1201       EXPECT_EQ(inst[i].expected_result, validate(p));
1202 
1203       clear_instructions(p);
1204    }
1205 }
1206 
TEST_P(validation_test,half_float_conversion)1207 TEST_P(validation_test, half_float_conversion)
1208 {
1209    static const struct {
1210       enum elk_reg_type dst_type;
1211       enum elk_reg_type src_type;
1212       unsigned dst_stride;
1213       unsigned dst_subnr;
1214       bool expected_result_bdw;
1215       bool expected_result_chv;
1216    } inst[] = {
1217 #define INST(dst_type, src_type, dst_stride, dst_subnr,                     \
1218              expected_result_bdw, expected_result_chv)                      \
1219       {                                                                     \
1220          ELK_REGISTER_TYPE_##dst_type,                                      \
1221          ELK_REGISTER_TYPE_##src_type,                                      \
1222          ELK_HORIZONTAL_STRIDE_##dst_stride,                                \
1223          dst_subnr,                                                         \
1224          expected_result_bdw,                                               \
1225          expected_result_chv,                                               \
1226       }
1227 
1228       /* MOV to half-float destination */
1229       INST(HF,  B, 1, 0, false, false), /* 0 */
1230       INST(HF,  W, 1, 0, false, false),
1231       INST(HF, HF, 1, 0, true,  true),
1232       INST(HF, HF, 1, 2, true,  true),
1233       INST(HF,  D, 1, 0, false, false),
1234       INST(HF,  F, 1, 0, false, true),
1235       INST(HF,  Q, 1, 0, false, false),
1236       INST(HF,  B, 2, 0, true,  true),
1237       INST(HF,  B, 2, 2, false, false),
1238       INST(HF,  W, 2, 0, true,  true),
1239       INST(HF,  W, 2, 2, false, false), /* 10 */
1240       INST(HF, HF, 2, 0, true,  true),
1241       INST(HF, HF, 2, 2, true,  true),
1242       INST(HF,  D, 2, 0, true,  true),
1243       INST(HF,  D, 2, 2, false, false),
1244       INST(HF,  F, 2, 0, true,  true),
1245       INST(HF,  F, 2, 2, false, true),
1246       INST(HF,  Q, 2, 0, false, false),
1247       INST(HF, DF, 2, 0, false, false),
1248       INST(HF,  B, 4, 0, false, false),
1249       INST(HF,  W, 4, 0, false, false), /* 20 */
1250       INST(HF, HF, 4, 0, true,  true),
1251       INST(HF, HF, 4, 2, true,  true),
1252       INST(HF,  D, 4, 0, false, false),
1253       INST(HF,  F, 4, 0, false, false),
1254       INST(HF,  Q, 4, 0, false, false),
1255       INST(HF, DF, 4, 0, false, false),
1256 
1257       /* MOV from half-float source */
1258       INST( B, HF, 1, 0, false, false),
1259       INST( W, HF, 1, 0, false, false),
1260       INST( D, HF, 1, 0, true,  true),
1261       INST( D, HF, 1, 4, true,  true),  /* 30 */
1262       INST( F, HF, 1, 0, true,  true),
1263       INST( F, HF, 1, 4, true,  true),
1264       INST( Q, HF, 1, 0, false, false),
1265       INST(DF, HF, 1, 0, false, false),
1266       INST( B, HF, 2, 0, false, false),
1267       INST( W, HF, 2, 0, true,  true),
1268       INST( W, HF, 2, 2, false, false),
1269       INST( D, HF, 2, 0, false, false),
1270       INST( F, HF, 2, 0, true,  true),
1271       INST( B, HF, 4, 0, true,  true),  /* 40 */
1272       INST( B, HF, 4, 1, false, false),
1273       INST( W, HF, 4, 0, false, false),
1274 
1275 #undef INST
1276    };
1277 
1278    if (devinfo.ver < 8)
1279       return;
1280 
1281    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1282       if (!devinfo.has_64bit_float &&
1283           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
1284            inst[i].src_type == ELK_REGISTER_TYPE_DF))
1285          continue;
1286 
1287       if (!devinfo.has_64bit_int &&
1288           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
1289            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
1290            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
1291            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
1292          continue;
1293 
1294       elk_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1295 
1296       elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
1297 
1298       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1299       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1300 
1301       if (inst[i].src_type == ELK_REGISTER_TYPE_B) {
1302          elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1303          elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
1304          elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1305       } else {
1306          elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1307          elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
1308          elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
1309       }
1310 
1311       if (devinfo.platform == INTEL_PLATFORM_CHV) {
1312          EXPECT_EQ(inst[i].expected_result_chv, validate(p)) <<
1313             "Failing test is: " << i;
1314       } else {
1315          EXPECT_EQ(inst[i].expected_result_bdw, validate(p)) <<
1316             "Failing test is: " << i;
1317       }
1318 
1319       clear_instructions(p);
1320    }
1321 }
1322 
TEST_P(validation_test,mixed_float_source_indirect_addressing)1323 TEST_P(validation_test, mixed_float_source_indirect_addressing)
1324 {
1325    static const struct {
1326       enum elk_reg_type dst_type;
1327       enum elk_reg_type src0_type;
1328       enum elk_reg_type src1_type;
1329       unsigned dst_stride;
1330       bool dst_indirect;
1331       bool src0_indirect;
1332       bool expected_result;
1333       bool gfx125_expected_result;
1334    } inst[] = {
1335 #define INST(dst_type, src0_type, src1_type,                              \
1336              dst_stride, dst_indirect, src0_indirect, expected_result,    \
1337              gfx125_expected_result)                                      \
1338       {                                                                   \
1339          ELK_REGISTER_TYPE_##dst_type,                                    \
1340          ELK_REGISTER_TYPE_##src0_type,                                   \
1341          ELK_REGISTER_TYPE_##src1_type,                                   \
1342          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1343          dst_indirect,                                                    \
1344          src0_indirect,                                                   \
1345          expected_result,                                                 \
1346          gfx125_expected_result,                                          \
1347       }
1348 
1349       /* Source and dest are mixed float: indirect src addressing not allowed */
1350       INST(HF,  F,  F, 2, false, false, true,  true),
1351       INST(HF,  F,  F, 2, true,  false, true,  true),
1352       INST(HF,  F,  F, 2, false, true,  false, false),
1353       INST(HF,  F,  F, 2, true,  true,  false, false),
1354       INST( F, HF,  F, 1, false, false, true,  false),
1355       INST( F, HF,  F, 1, true,  false, true,  false),
1356       INST( F, HF,  F, 1, false, true,  false, false),
1357       INST( F, HF,  F, 1, true,  true,  false, false),
1358 
1359       INST(HF, HF,  F, 2, false, false, true,  false),
1360       INST(HF, HF,  F, 2, true,  false, true,  false),
1361       INST(HF, HF,  F, 2, false, true,  false, false),
1362       INST(HF, HF,  F, 2, true,  true,  false, false),
1363       INST( F,  F, HF, 1, false, false, true,  false),
1364       INST( F,  F, HF, 1, true,  false, true,  false),
1365       INST( F,  F, HF, 1, false, true,  false, false),
1366       INST( F,  F, HF, 1, true,  true,  false, false),
1367 
1368 #undef INST
1369    };
1370 
1371    if (devinfo.ver < 8)
1372       return;
1373 
1374    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1375       elk_ADD(p, retype(g0, inst[i].dst_type),
1376                  retype(g0, inst[i].src0_type),
1377                  retype(g0, inst[i].src1_type));
1378 
1379       elk_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_indirect);
1380       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1381       elk_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src0_indirect);
1382 
1383       EXPECT_EQ(inst[i].expected_result, validate(p));
1384 
1385       clear_instructions(p);
1386    }
1387 }
1388 
TEST_P(validation_test,mixed_float_align1_simd16)1389 TEST_P(validation_test, mixed_float_align1_simd16)
1390 {
1391    static const struct {
1392       unsigned exec_size;
1393       enum elk_reg_type dst_type;
1394       enum elk_reg_type src0_type;
1395       enum elk_reg_type src1_type;
1396       unsigned dst_stride;
1397       bool expected_result;
1398       bool gfx125_expected_result;
1399    } inst[] = {
1400 #define INST(exec_size, dst_type, src0_type, src1_type,                   \
1401              dst_stride, expected_result, gfx125_expected_result)         \
1402       {                                                                   \
1403          ELK_EXECUTE_##exec_size,                                         \
1404          ELK_REGISTER_TYPE_##dst_type,                                    \
1405          ELK_REGISTER_TYPE_##src0_type,                                   \
1406          ELK_REGISTER_TYPE_##src1_type,                                   \
1407          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1408          expected_result,                                                 \
1409          gfx125_expected_result,                                          \
1410       }
1411 
1412       /* No SIMD16 in mixed mode when destination is packed f16 */
1413       INST( 8, HF,  F, HF, 2, true,  false),
1414       INST(16, HF, HF,  F, 2, true,  false),
1415       INST(16, HF, HF,  F, 1, false, false),
1416       INST(16, HF,  F, HF, 1, false, false),
1417 
1418       /* No SIMD16 in mixed mode when destination is f32 */
1419       INST( 8,  F, HF,  F, 1, true,  false),
1420       INST( 8,  F,  F, HF, 1, true,  false),
1421       INST(16,  F, HF,  F, 1, false, false),
1422       INST(16,  F,  F, HF, 1, false, false),
1423 
1424 #undef INST
1425    };
1426 
1427    if (devinfo.ver < 8)
1428       return;
1429 
1430    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1431       elk_ADD(p, retype(g0, inst[i].dst_type),
1432                  retype(g0, inst[i].src0_type),
1433                  retype(g0, inst[i].src1_type));
1434 
1435       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1436 
1437       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1438 
1439       EXPECT_EQ(inst[i].expected_result, validate(p));
1440 
1441       clear_instructions(p);
1442    }
1443 }
1444 
TEST_P(validation_test,mixed_float_align1_packed_fp16_dst_acc_read_offset_0)1445 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst_acc_read_offset_0)
1446 {
1447    static const struct {
1448       enum elk_reg_type dst_type;
1449       enum elk_reg_type src0_type;
1450       enum elk_reg_type src1_type;
1451       unsigned dst_stride;
1452       bool read_acc;
1453       unsigned subnr;
1454       bool expected_result_bdw;
1455       bool expected_result_chv_skl;
1456       bool expected_result_gfx125;
1457    } inst[] = {
1458 #define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr,   \
1459              expected_result_bdw, expected_result_chv_skl,                  \
1460              expected_result_gfx125)                                        \
1461       {                                                                     \
1462          ELK_REGISTER_TYPE_##dst_type,                                      \
1463          ELK_REGISTER_TYPE_##src0_type,                                     \
1464          ELK_REGISTER_TYPE_##src1_type,                                     \
1465          ELK_HORIZONTAL_STRIDE_##dst_stride,                                \
1466          read_acc,                                                          \
1467          subnr,                                                             \
1468          expected_result_bdw,                                               \
1469          expected_result_chv_skl,                                           \
1470          expected_result_gfx125,                                            \
1471       }
1472 
1473       /* Destination is not packed */
1474       INST(HF, HF,  F, 2, true,  0, true, true, false),
1475       INST(HF, HF,  F, 2, true,  2, true, true, false),
1476       INST(HF, HF,  F, 2, true,  4, true, true, false),
1477       INST(HF, HF,  F, 2, true,  8, true, true, false),
1478       INST(HF, HF,  F, 2, true, 16, true, true, false),
1479 
1480       /* Destination is packed, we don't read acc */
1481       INST(HF, HF,  F, 1, false,  0, false, true, false),
1482       INST(HF, HF,  F, 1, false,  2, false, true, false),
1483       INST(HF, HF,  F, 1, false,  4, false, true, false),
1484       INST(HF, HF,  F, 1, false,  8, false, true, false),
1485       INST(HF, HF,  F, 1, false, 16, false, true, false),
1486 
1487       /* Destination is packed, we read acc */
1488       INST(HF, HF,  F, 1, true,  0, false, false, false),
1489       INST(HF, HF,  F, 1, true,  2, false, false, false),
1490       INST(HF, HF,  F, 1, true,  4, false, false, false),
1491       INST(HF, HF,  F, 1, true,  8, false, false, false),
1492       INST(HF, HF,  F, 1, true, 16, false, false, false),
1493 
1494 #undef INST
1495    };
1496 
1497    if (devinfo.ver < 8)
1498       return;
1499 
1500    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1501       elk_ADD(p, retype(g0, inst[i].dst_type),
1502                  retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
1503                  retype(g0, inst[i].src1_type));
1504 
1505       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1506 
1507       elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr);
1508 
1509       if (devinfo.verx10 >= 125)
1510          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1511       else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1512          EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1513       else
1514          EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1515 
1516       clear_instructions(p);
1517    }
1518 }
1519 
TEST_P(validation_test,mixed_float_fp16_dest_with_acc)1520 TEST_P(validation_test, mixed_float_fp16_dest_with_acc)
1521 {
1522    static const struct {
1523       unsigned exec_size;
1524       unsigned opcode;
1525       enum elk_reg_type dst_type;
1526       enum elk_reg_type src0_type;
1527       enum elk_reg_type src1_type;
1528       unsigned dst_stride;
1529       bool read_acc;
1530       bool expected_result_bdw;
1531       bool expected_result_chv_skl;
1532       bool expected_result_gfx125;
1533    } inst[] = {
1534 #define INST(exec_size, opcode, dst_type, src0_type, src1_type,           \
1535              dst_stride, read_acc,expected_result_bdw,                    \
1536              expected_result_chv_skl, expected_result_gfx125)             \
1537       {                                                                   \
1538          ELK_EXECUTE_##exec_size,                                         \
1539          ELK_OPCODE_##opcode,                                             \
1540          ELK_REGISTER_TYPE_##dst_type,                                    \
1541          ELK_REGISTER_TYPE_##src0_type,                                   \
1542          ELK_REGISTER_TYPE_##src1_type,                                   \
1543          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1544          read_acc,                                                        \
1545          expected_result_bdw,                                             \
1546          expected_result_chv_skl,                                         \
1547          expected_result_gfx125,                                          \
1548       }
1549 
1550       /* Packed fp16 dest with implicit acc needs hstride=2 */
1551       INST(8, MAC, HF, HF,  F, 1, false, false, false, false),
1552       INST(8, MAC, HF, HF,  F, 2, false, true,  true,  false),
1553       INST(8, MAC, HF,  F, HF, 1, false, false, false, false),
1554       INST(8, MAC, HF,  F, HF, 2, false, true,  true,  false),
1555 
1556       /* Packed fp16 dest with explicit acc needs hstride=2 */
1557       INST(8, ADD, HF, HF,  F, 1, true,  false, false, false),
1558       INST(8, ADD, HF, HF,  F, 2, true,  true,  true,  false),
1559       INST(8, ADD, HF,  F, HF, 1, true,  false, false, false),
1560       INST(8, ADD, HF,  F, HF, 2, true,  true,  true,  false),
1561 
1562       /* If destination is not fp16, restriction doesn't apply */
1563       INST(8, MAC,  F, HF,  F, 1, false, true, true, false),
1564       INST(8, MAC,  F, HF,  F, 2, false, true, true, false),
1565 
1566       /* If there is no implicit/explicit acc, restriction doesn't apply */
1567       INST(8, ADD, HF, HF,  F, 1, false, false, true, false),
1568       INST(8, ADD, HF, HF,  F, 2, false, true,  true, false),
1569       INST(8, ADD, HF,  F, HF, 1, false, false, true, false),
1570       INST(8, ADD, HF,  F, HF, 2, false, true,  true, false),
1571       INST(8, ADD,  F, HF,  F, 1, false, true,  true, false),
1572       INST(8, ADD,  F, HF,  F, 2, false, true,  true, false),
1573 
1574 #undef INST
1575    };
1576 
1577    if (devinfo.ver < 8)
1578       return;
1579 
1580    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1581       if (inst[i].opcode == ELK_OPCODE_MAC) {
1582          elk_MAC(p, retype(g0, inst[i].dst_type),
1583                     retype(g0, inst[i].src0_type),
1584                     retype(g0, inst[i].src1_type));
1585       } else {
1586          assert(inst[i].opcode == ELK_OPCODE_ADD);
1587          elk_ADD(p, retype(g0, inst[i].dst_type),
1588                     retype(inst[i].read_acc ? acc0: g0, inst[i].src0_type),
1589                     retype(g0, inst[i].src1_type));
1590       }
1591 
1592       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1593 
1594       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1595 
1596       if (devinfo.verx10 >= 125)
1597          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1598       else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1599          EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1600       else
1601          EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1602 
1603       clear_instructions(p);
1604    }
1605 }
1606 
TEST_P(validation_test,mixed_float_align1_math_strided_fp16_inputs)1607 TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs)
1608 {
1609    static const struct {
1610       enum elk_reg_type dst_type;
1611       enum elk_reg_type src0_type;
1612       enum elk_reg_type src1_type;
1613       unsigned dst_stride;
1614       unsigned src0_stride;
1615       unsigned src1_stride;
1616       bool expected_result;
1617       bool expected_result_gfx125;
1618    } inst[] = {
1619 #define INST(dst_type, src0_type, src1_type,                              \
1620              dst_stride, src0_stride, src1_stride, expected_result,       \
1621              expected_result_125)                                         \
1622       {                                                                   \
1623          ELK_REGISTER_TYPE_##dst_type,                                    \
1624          ELK_REGISTER_TYPE_##src0_type,                                   \
1625          ELK_REGISTER_TYPE_##src1_type,                                   \
1626          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1627          ELK_HORIZONTAL_STRIDE_##src0_stride,                             \
1628          ELK_HORIZONTAL_STRIDE_##src1_stride,                             \
1629          expected_result,                                                 \
1630          expected_result_125,                                             \
1631       }
1632 
1633       INST(HF, HF,  F, 2, 2, 1, true,  false),
1634       INST(HF,  F, HF, 2, 1, 2, true,  false),
1635       INST(HF,  F, HF, 1, 1, 2, true,  false),
1636       INST(HF,  F, HF, 2, 1, 1, false, false),
1637       INST(HF, HF,  F, 2, 1, 1, false, false),
1638       INST(HF, HF,  F, 1, 1, 1, false, false),
1639       INST(HF, HF,  F, 2, 1, 1, false, false),
1640       INST( F, HF,  F, 1, 1, 1, false, false),
1641       INST( F,  F, HF, 1, 1, 2, true,  false),
1642       INST( F, HF, HF, 1, 2, 1, false, false),
1643       INST( F, HF, HF, 1, 2, 2, true,  false),
1644 
1645 #undef INST
1646    };
1647 
1648    /* No half-float math in gfx8 */
1649    if (devinfo.ver < 9)
1650       return;
1651 
1652    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1653       elk_gfx6_math(p, retype(g0, inst[i].dst_type),
1654                    ELK_MATH_FUNCTION_POW,
1655                    retype(g0, inst[i].src0_type),
1656                    retype(g0, inst[i].src1_type));
1657 
1658       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1659 
1660       elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1661       elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
1662       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride);
1663 
1664       elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1665       elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
1666       elk_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride);
1667 
1668       if (devinfo.verx10 >= 125)
1669          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1670       else
1671          EXPECT_EQ(inst[i].expected_result, validate(p));
1672 
1673       clear_instructions(p);
1674    }
1675 }
1676 
TEST_P(validation_test,mixed_float_align1_packed_fp16_dst)1677 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst)
1678 {
1679    static const struct {
1680       unsigned exec_size;
1681       enum elk_reg_type dst_type;
1682       enum elk_reg_type src0_type;
1683       enum elk_reg_type src1_type;
1684       unsigned dst_stride;
1685       unsigned dst_subnr;
1686       bool expected_result_bdw;
1687       bool expected_result_chv_skl;
1688       bool expected_result_gfx125;
1689    } inst[] = {
1690 #define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, dst_subnr, \
1691              expected_result_bdw, expected_result_chv_skl,                     \
1692              expected_result_gfx125)                                           \
1693       {                                                                        \
1694          ELK_EXECUTE_##exec_size,                                              \
1695          ELK_REGISTER_TYPE_##dst_type,                                         \
1696          ELK_REGISTER_TYPE_##src0_type,                                        \
1697          ELK_REGISTER_TYPE_##src1_type,                                        \
1698          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
1699          dst_subnr,                                                            \
1700          expected_result_bdw,                                                  \
1701          expected_result_chv_skl,                                              \
1702          expected_result_gfx125                                                \
1703       }
1704 
1705       /* SIMD8 packed fp16 dst won't cross oword boundaries if region is
1706        * oword-aligned
1707        */
1708       INST( 8, HF, HF,  F, 1,  0, false, true,  false),
1709       INST( 8, HF, HF,  F, 1,  2, false, false, false),
1710       INST( 8, HF, HF,  F, 1,  4, false, false, false),
1711       INST( 8, HF, HF,  F, 1,  8, false, false, false),
1712       INST( 8, HF, HF,  F, 1, 16, false, true,  false),
1713 
1714       /* SIMD16 packed fp16 always crosses oword boundaries */
1715       INST(16, HF, HF,  F, 1,  0, false, false, false),
1716       INST(16, HF, HF,  F, 1,  2, false, false, false),
1717       INST(16, HF, HF,  F, 1,  4, false, false, false),
1718       INST(16, HF, HF,  F, 1,  8, false, false, false),
1719       INST(16, HF, HF,  F, 1, 16, false, false, false),
1720 
1721       /* If destination is not packed (or not fp16) we can cross oword
1722        * boundaries
1723        */
1724       INST( 8, HF, HF,  F, 2,  0, true, true, false),
1725       INST( 8,  F, HF,  F, 1,  0, true, true, false),
1726 
1727 #undef INST
1728    };
1729 
1730    if (devinfo.ver < 8)
1731       return;
1732 
1733    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1734       elk_ADD(p, retype(g0, inst[i].dst_type),
1735                  retype(g0, inst[i].src0_type),
1736                  retype(g0, inst[i].src1_type));
1737 
1738       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1739       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1740 
1741       elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1742       elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
1743       elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
1744 
1745       elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1746       elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
1747       elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
1748 
1749       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1750 
1751       if (devinfo.verx10 >= 125)
1752          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1753       else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1754          EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1755       else
1756          EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1757 
1758       clear_instructions(p);
1759    }
1760 }
1761 
TEST_P(validation_test,mixed_float_align16_packed_data)1762 TEST_P(validation_test, mixed_float_align16_packed_data)
1763 {
1764    static const struct {
1765       enum elk_reg_type dst_type;
1766       enum elk_reg_type src0_type;
1767       enum elk_reg_type src1_type;
1768       unsigned src0_vstride;
1769       unsigned src1_vstride;
1770       bool expected_result;
1771    } inst[] = {
1772 #define INST(dst_type, src0_type, src1_type,                              \
1773              src0_vstride, src1_vstride, expected_result)                 \
1774       {                                                                   \
1775          ELK_REGISTER_TYPE_##dst_type,                                    \
1776          ELK_REGISTER_TYPE_##src0_type,                                   \
1777          ELK_REGISTER_TYPE_##src1_type,                                   \
1778          ELK_VERTICAL_STRIDE_##src0_vstride,                              \
1779          ELK_VERTICAL_STRIDE_##src1_vstride,                              \
1780          expected_result,                                                 \
1781       }
1782 
1783       /* We only test with F destination because there is a restriction
1784        * by which F->HF conversions need to be DWord aligned but Align16 also
1785        * requires that destination horizontal stride is 1.
1786        */
1787       INST(F,  F, HF, 4, 4, true),
1788       INST(F,  F, HF, 2, 4, false),
1789       INST(F,  F, HF, 4, 2, false),
1790       INST(F,  F, HF, 0, 4, false),
1791       INST(F,  F, HF, 4, 0, false),
1792       INST(F, HF,  F, 4, 4, true),
1793       INST(F, HF,  F, 4, 2, false),
1794       INST(F, HF,  F, 2, 4, false),
1795       INST(F, HF,  F, 0, 4, false),
1796       INST(F, HF,  F, 4, 0, false),
1797 
1798 #undef INST
1799    };
1800 
1801    if (devinfo.ver < 8 || devinfo.ver >= 11)
1802       return;
1803 
1804    elk_set_default_access_mode(p, ELK_ALIGN_16);
1805 
1806    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1807       elk_ADD(p, retype(g0, inst[i].dst_type),
1808                  retype(g0, inst[i].src0_type),
1809                  retype(g0, inst[i].src1_type));
1810 
1811       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
1812       elk_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
1813 
1814       EXPECT_EQ(inst[i].expected_result, validate(p));
1815 
1816       clear_instructions(p);
1817    }
1818 }
1819 
TEST_P(validation_test,mixed_float_align16_no_simd16)1820 TEST_P(validation_test, mixed_float_align16_no_simd16)
1821 {
1822    static const struct {
1823       unsigned exec_size;
1824       enum elk_reg_type dst_type;
1825       enum elk_reg_type src0_type;
1826       enum elk_reg_type src1_type;
1827       bool expected_result;
1828    } inst[] = {
1829 #define INST(exec_size, dst_type, src0_type, src1_type, expected_result)  \
1830       {                                                                   \
1831          ELK_EXECUTE_##exec_size,                                         \
1832          ELK_REGISTER_TYPE_##dst_type,                                    \
1833          ELK_REGISTER_TYPE_##src0_type,                                   \
1834          ELK_REGISTER_TYPE_##src1_type,                                   \
1835          expected_result,                                                 \
1836       }
1837 
1838       /* We only test with F destination because there is a restriction
1839        * by which F->HF conversions need to be DWord aligned but Align16 also
1840        * requires that destination horizontal stride is 1.
1841        */
1842       INST( 8,  F,  F, HF, true),
1843       INST( 8,  F, HF,  F, true),
1844       INST( 8,  F,  F, HF, true),
1845       INST(16,  F,  F, HF, false),
1846       INST(16,  F, HF,  F, false),
1847       INST(16,  F,  F, HF, false),
1848 
1849 #undef INST
1850    };
1851 
1852    if (devinfo.ver < 8 || devinfo.ver >= 11)
1853       return;
1854 
1855    elk_set_default_access_mode(p, ELK_ALIGN_16);
1856 
1857    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1858       elk_ADD(p, retype(g0, inst[i].dst_type),
1859                  retype(g0, inst[i].src0_type),
1860                  retype(g0, inst[i].src1_type));
1861 
1862       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1863 
1864       elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1865       elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1866 
1867       EXPECT_EQ(inst[i].expected_result, validate(p));
1868 
1869       clear_instructions(p);
1870    }
1871 }
1872 
TEST_P(validation_test,mixed_float_align16_no_acc_read)1873 TEST_P(validation_test, mixed_float_align16_no_acc_read)
1874 {
1875    static const struct {
1876       enum elk_reg_type dst_type;
1877       enum elk_reg_type src0_type;
1878       enum elk_reg_type src1_type;
1879       bool read_acc;
1880       bool expected_result;
1881    } inst[] = {
1882 #define INST(dst_type, src0_type, src1_type, read_acc, expected_result)   \
1883       {                                                                   \
1884          ELK_REGISTER_TYPE_##dst_type,                                    \
1885          ELK_REGISTER_TYPE_##src0_type,                                   \
1886          ELK_REGISTER_TYPE_##src1_type,                                   \
1887          read_acc,                                                        \
1888          expected_result,                                                 \
1889       }
1890 
1891       /* We only test with F destination because there is a restriction
1892        * by which F->HF conversions need to be DWord aligned but Align16 also
1893        * requires that destination horizontal stride is 1.
1894        */
1895       INST( F,  F, HF, false, true),
1896       INST( F,  F, HF, true,  false),
1897       INST( F, HF,  F, false, true),
1898       INST( F, HF,  F, true,  false),
1899 
1900 #undef INST
1901    };
1902 
1903    if (devinfo.ver < 8 || devinfo.ver >= 11)
1904       return;
1905 
1906    elk_set_default_access_mode(p, ELK_ALIGN_16);
1907 
1908    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1909       elk_ADD(p, retype(g0, inst[i].dst_type),
1910                  retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
1911                  retype(g0, inst[i].src1_type));
1912 
1913       elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1914       elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1915 
1916       EXPECT_EQ(inst[i].expected_result, validate(p));
1917 
1918       clear_instructions(p);
1919    }
1920 }
1921 
TEST_P(validation_test,mixed_float_align16_math_packed_format)1922 TEST_P(validation_test, mixed_float_align16_math_packed_format)
1923 {
1924    static const struct {
1925       enum elk_reg_type dst_type;
1926       enum elk_reg_type src0_type;
1927       enum elk_reg_type src1_type;
1928       unsigned src0_vstride;
1929       unsigned src1_vstride;
1930       bool expected_result;
1931    } inst[] = {
1932 #define INST(dst_type, src0_type, src1_type,                              \
1933              src0_vstride, src1_vstride, expected_result)                 \
1934       {                                                                   \
1935          ELK_REGISTER_TYPE_##dst_type,                                    \
1936          ELK_REGISTER_TYPE_##src0_type,                                   \
1937          ELK_REGISTER_TYPE_##src1_type,                                   \
1938          ELK_VERTICAL_STRIDE_##src0_vstride,                              \
1939          ELK_VERTICAL_STRIDE_##src1_vstride,                              \
1940          expected_result,                                                 \
1941       }
1942 
1943       /* We only test with F destination because there is a restriction
1944        * by which F->HF conversions need to be DWord aligned but Align16 also
1945        * requires that destination horizontal stride is 1.
1946        */
1947       INST( F, HF,  F, 4, 0, false),
1948       INST( F, HF, HF, 4, 4, true),
1949       INST( F,  F, HF, 4, 0, false),
1950       INST( F,  F, HF, 2, 4, false),
1951       INST( F,  F, HF, 4, 2, false),
1952       INST( F, HF, HF, 0, 4, false),
1953 
1954 #undef INST
1955    };
1956 
1957    /* Align16 Math for mixed float mode is not supported in gfx8 */
1958    if (devinfo.ver < 9 || devinfo.ver >= 11)
1959       return;
1960 
1961    elk_set_default_access_mode(p, ELK_ALIGN_16);
1962 
1963    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1964       elk_gfx6_math(p, retype(g0, inst[i].dst_type),
1965                    ELK_MATH_FUNCTION_POW,
1966                    retype(g0, inst[i].src0_type),
1967                    retype(g0, inst[i].src1_type));
1968 
1969       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
1970       elk_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
1971 
1972       EXPECT_EQ(inst[i].expected_result, validate(p));
1973 
1974       clear_instructions(p);
1975    }
1976 }
1977 
TEST_P(validation_test,vector_immediate_destination_alignment)1978 TEST_P(validation_test, vector_immediate_destination_alignment)
1979 {
1980    static const struct {
1981       enum elk_reg_type dst_type;
1982       enum elk_reg_type src_type;
1983       unsigned subnr;
1984       unsigned exec_size;
1985       bool expected_result;
1986    } move[] = {
1987       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF,  0, ELK_EXECUTE_4, true  },
1988       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF, 16, ELK_EXECUTE_4, true  },
1989       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF,  1, ELK_EXECUTE_4, false },
1990 
1991       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,   0, ELK_EXECUTE_8, true  },
1992       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,  16, ELK_EXECUTE_8, true  },
1993       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,   1, ELK_EXECUTE_8, false },
1994 
1995       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV,  0, ELK_EXECUTE_8, true  },
1996       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, 16, ELK_EXECUTE_8, true  },
1997       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV,  1, ELK_EXECUTE_8, false },
1998    };
1999 
2000    for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
2001       /* UV type is Gfx6+ */
2002       if (devinfo.ver < 6 &&
2003           move[i].src_type == ELK_REGISTER_TYPE_UV)
2004          continue;
2005 
2006       elk_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2007       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, move[i].subnr);
2008       elk_inst_set_exec_size(&devinfo, last_inst, move[i].exec_size);
2009 
2010       EXPECT_EQ(move[i].expected_result, validate(p));
2011 
2012       clear_instructions(p);
2013    }
2014 }
2015 
TEST_P(validation_test,vector_immediate_destination_stride)2016 TEST_P(validation_test, vector_immediate_destination_stride)
2017 {
2018    static const struct {
2019       enum elk_reg_type dst_type;
2020       enum elk_reg_type src_type;
2021       unsigned stride;
2022       bool expected_result;
2023    } move[] = {
2024       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_1, true  },
2025       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_2, false },
2026       { ELK_REGISTER_TYPE_D, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_1, true  },
2027       { ELK_REGISTER_TYPE_D, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_2, false },
2028       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_2, true  },
2029       { ELK_REGISTER_TYPE_B, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_4, true  },
2030 
2031       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,  ELK_HORIZONTAL_STRIDE_1, true  },
2032       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,  ELK_HORIZONTAL_STRIDE_2, false },
2033       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,  ELK_HORIZONTAL_STRIDE_4, false },
2034       { ELK_REGISTER_TYPE_B, ELK_REGISTER_TYPE_V,  ELK_HORIZONTAL_STRIDE_2, true  },
2035 
2036       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_1, true  },
2037       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_2, false },
2038       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_4, false },
2039       { ELK_REGISTER_TYPE_B, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_2, true  },
2040    };
2041 
2042    for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
2043       /* UV type is Gfx6+ */
2044       if (devinfo.ver < 6 &&
2045           move[i].src_type == ELK_REGISTER_TYPE_UV)
2046          continue;
2047 
2048       elk_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2049       elk_inst_set_dst_hstride(&devinfo, last_inst, move[i].stride);
2050 
2051       EXPECT_EQ(move[i].expected_result, validate(p));
2052 
2053       clear_instructions(p);
2054    }
2055 }
2056 
TEST_P(validation_test,qword_low_power_align1_regioning_restrictions)2057 TEST_P(validation_test, qword_low_power_align1_regioning_restrictions)
2058 {
2059    static const struct {
2060       enum elk_opcode opcode;
2061       unsigned exec_size;
2062 
2063       enum elk_reg_type dst_type;
2064       unsigned dst_subreg;
2065       unsigned dst_stride;
2066 
2067       enum elk_reg_type src_type;
2068       unsigned src_subreg;
2069       unsigned src_vstride;
2070       unsigned src_width;
2071       unsigned src_hstride;
2072 
2073       bool expected_result;
2074    } inst[] = {
2075 #define INST(opcode, exec_size, dst_type, dst_subreg, dst_stride, src_type,    \
2076              src_subreg, src_vstride, src_width, src_hstride, expected_result) \
2077       {                                                                        \
2078          ELK_OPCODE_##opcode,                                                  \
2079          ELK_EXECUTE_##exec_size,                                              \
2080          ELK_REGISTER_TYPE_##dst_type,                                         \
2081          dst_subreg,                                                           \
2082          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
2083          ELK_REGISTER_TYPE_##src_type,                                         \
2084          src_subreg,                                                           \
2085          ELK_VERTICAL_STRIDE_##src_vstride,                                    \
2086          ELK_WIDTH_##src_width,                                                \
2087          ELK_HORIZONTAL_STRIDE_##src_hstride,                                  \
2088          expected_result,                                                      \
2089       }
2090 
2091       /* Some instruction that violate no restrictions, as a control */
2092       INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2093       INST(MOV, 4, Q,  0, 1, Q,  0, 4, 4, 1, true ),
2094       INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2095 
2096       INST(MOV, 4, DF, 0, 1, F,  0, 8, 4, 2, true ),
2097       INST(MOV, 4, Q,  0, 1, D,  0, 8, 4, 2, true ),
2098       INST(MOV, 4, UQ, 0, 1, UD, 0, 8, 4, 2, true ),
2099 
2100       INST(MOV, 4, F,  0, 2, DF, 0, 4, 4, 1, true ),
2101       INST(MOV, 4, D,  0, 2, Q,  0, 4, 4, 1, true ),
2102       INST(MOV, 4, UD, 0, 2, UQ, 0, 4, 4, 1, true ),
2103 
2104       INST(MUL, 8, D,  0, 2, D,  0, 8, 4, 2, true ),
2105       INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2106 
2107       /* Something with subreg nrs */
2108       INST(MOV, 2, DF, 8, 1, DF, 8, 2, 2, 1, true ),
2109       INST(MOV, 2, Q,  8, 1, Q,  8, 2, 2, 1, true ),
2110       INST(MOV, 2, UQ, 8, 1, UQ, 8, 2, 2, 1, true ),
2111 
2112       INST(MUL, 2, D,  4, 2, D,  4, 4, 2, 2, true ),
2113       INST(MUL, 2, UD, 4, 2, UD, 4, 4, 2, 2, true ),
2114 
2115       /* The PRMs say that for CHV, BXT:
2116        *
2117        *    When source or destination datatype is 64b or operation is integer
2118        *    DWord multiply, regioning in Align1 must follow these rules:
2119        *
2120        *    1. Source and Destination horizontal stride must be aligned to the
2121        *       same qword.
2122        */
2123       INST(MOV, 4, DF, 0, 2, DF, 0, 4, 4, 1, false),
2124       INST(MOV, 4, Q,  0, 2, Q,  0, 4, 4, 1, false),
2125       INST(MOV, 4, UQ, 0, 2, UQ, 0, 4, 4, 1, false),
2126 
2127       INST(MOV, 4, DF, 0, 2, F,  0, 8, 4, 2, false),
2128       INST(MOV, 4, Q,  0, 2, D,  0, 8, 4, 2, false),
2129       INST(MOV, 4, UQ, 0, 2, UD, 0, 8, 4, 2, false),
2130 
2131       INST(MOV, 4, DF, 0, 2, F,  0, 4, 4, 1, false),
2132       INST(MOV, 4, Q,  0, 2, D,  0, 4, 4, 1, false),
2133       INST(MOV, 4, UQ, 0, 2, UD, 0, 4, 4, 1, false),
2134 
2135       INST(MUL, 4, D,  0, 2, D,  0, 4, 4, 1, false),
2136       INST(MUL, 4, UD, 0, 2, UD, 0, 4, 4, 1, false),
2137 
2138       INST(MUL, 4, D,  0, 1, D,  0, 8, 4, 2, false),
2139       INST(MUL, 4, UD, 0, 1, UD, 0, 8, 4, 2, false),
2140 
2141       /*    2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. */
2142       INST(MOV, 4, DF, 0, 1, DF, 0, 0, 2, 1, false),
2143       INST(MOV, 4, Q,  0, 1, Q,  0, 0, 2, 1, false),
2144       INST(MOV, 4, UQ, 0, 1, UQ, 0, 0, 2, 1, false),
2145 
2146       INST(MOV, 4, DF, 0, 1, F,  0, 0, 2, 2, false),
2147       INST(MOV, 4, Q,  0, 1, D,  0, 0, 2, 2, false),
2148       INST(MOV, 4, UQ, 0, 1, UD, 0, 0, 2, 2, false),
2149 
2150       INST(MOV, 8, F,  0, 2, DF, 0, 0, 2, 1, false),
2151       INST(MOV, 8, D,  0, 2, Q,  0, 0, 2, 1, false),
2152       INST(MOV, 8, UD, 0, 2, UQ, 0, 0, 2, 1, false),
2153 
2154       INST(MUL, 8, D,  0, 2, D,  0, 0, 4, 2, false),
2155       INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2156 
2157       INST(MUL, 8, D,  0, 2, D,  0, 0, 4, 2, false),
2158       INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2159 
2160       /*    3. Source and Destination offset must be the same, except the case
2161        *       of scalar source.
2162        */
2163       INST(MOV, 2, DF, 8, 1, DF, 0, 2, 2, 1, false),
2164       INST(MOV, 2, Q,  8, 1, Q,  0, 2, 2, 1, false),
2165       INST(MOV, 2, UQ, 8, 1, UQ, 0, 2, 2, 1, false),
2166 
2167       INST(MOV, 2, DF, 0, 1, DF, 8, 2, 2, 1, false),
2168       INST(MOV, 2, Q,  0, 1, Q,  8, 2, 2, 1, false),
2169       INST(MOV, 2, UQ, 0, 1, UQ, 8, 2, 2, 1, false),
2170 
2171       INST(MUL, 4, D,  4, 2, D,  0, 4, 2, 2, false),
2172       INST(MUL, 4, UD, 4, 2, UD, 0, 4, 2, 2, false),
2173 
2174       INST(MUL, 4, D,  0, 2, D,  4, 4, 2, 2, false),
2175       INST(MUL, 4, UD, 0, 2, UD, 4, 4, 2, 2, false),
2176 
2177       INST(MOV, 2, DF, 8, 1, DF, 0, 0, 1, 0, true ),
2178       INST(MOV, 2, Q,  8, 1, Q,  0, 0, 1, 0, true ),
2179       INST(MOV, 2, UQ, 8, 1, UQ, 0, 0, 1, 0, true ),
2180 
2181       INST(MOV, 2, DF, 8, 1, F,  4, 0, 1, 0, true ),
2182       INST(MOV, 2, Q,  8, 1, D,  4, 0, 1, 0, true ),
2183       INST(MOV, 2, UQ, 8, 1, UD, 4, 0, 1, 0, true ),
2184 
2185       INST(MUL, 4, D,  4, 1, D,  0, 0, 1, 0, true ),
2186       INST(MUL, 4, UD, 4, 1, UD, 0, 0, 1, 0, true ),
2187 
2188       INST(MUL, 4, D,  0, 1, D,  4, 0, 1, 0, true ),
2189       INST(MUL, 4, UD, 0, 1, UD, 4, 0, 1, 0, true ),
2190 
2191 #undef INST
2192    };
2193 
2194    /* These restrictions only apply to Gfx8+ */
2195    if (devinfo.ver < 8)
2196       return;
2197 
2198    /* NoDDChk/NoDDClr does not exist on Gfx12+ */
2199    if (devinfo.ver >= 12)
2200       return;
2201 
2202    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2203       if (!devinfo.has_64bit_float &&
2204           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2205            inst[i].src_type == ELK_REGISTER_TYPE_DF))
2206          continue;
2207 
2208       if (!devinfo.has_64bit_int &&
2209           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2210            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2211            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2212            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2213          continue;
2214 
2215       if (inst[i].opcode == ELK_OPCODE_MOV) {
2216          elk_MOV(p, retype(g0, inst[i].dst_type),
2217                     retype(g0, inst[i].src_type));
2218       } else {
2219          assert(inst[i].opcode == ELK_OPCODE_MUL);
2220          elk_MUL(p, retype(g0, inst[i].dst_type),
2221                     retype(g0, inst[i].src_type),
2222                     retype(zero, inst[i].src_type));
2223       }
2224       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2225 
2226       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subreg);
2227       elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].src_subreg);
2228 
2229       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2230 
2231       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2232       elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2233       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2234 
2235       if (devinfo.platform == INTEL_PLATFORM_CHV) {
2236          EXPECT_EQ(inst[i].expected_result, validate(p));
2237       } else {
2238          EXPECT_TRUE(validate(p));
2239       }
2240 
2241       clear_instructions(p);
2242    }
2243 }
2244 
TEST_P(validation_test,qword_low_power_no_indirect_addressing)2245 TEST_P(validation_test, qword_low_power_no_indirect_addressing)
2246 {
2247    static const struct {
2248       enum elk_opcode opcode;
2249       unsigned exec_size;
2250 
2251       enum elk_reg_type dst_type;
2252       bool dst_is_indirect;
2253       unsigned dst_stride;
2254 
2255       enum elk_reg_type src_type;
2256       bool src_is_indirect;
2257       unsigned src_vstride;
2258       unsigned src_width;
2259       unsigned src_hstride;
2260 
2261       bool expected_result;
2262    } inst[] = {
2263 #define INST(opcode, exec_size, dst_type, dst_is_indirect, dst_stride,         \
2264              src_type, src_is_indirect, src_vstride, src_width, src_hstride,   \
2265              expected_result)                                                  \
2266       {                                                                        \
2267          ELK_OPCODE_##opcode,                                                  \
2268          ELK_EXECUTE_##exec_size,                                              \
2269          ELK_REGISTER_TYPE_##dst_type,                                         \
2270          dst_is_indirect,                                                      \
2271          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
2272          ELK_REGISTER_TYPE_##src_type,                                         \
2273          src_is_indirect,                                                      \
2274          ELK_VERTICAL_STRIDE_##src_vstride,                                    \
2275          ELK_WIDTH_##src_width,                                                \
2276          ELK_HORIZONTAL_STRIDE_##src_hstride,                                  \
2277          expected_result,                                                      \
2278       }
2279 
2280       /* Some instruction that violate no restrictions, as a control */
2281       INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2282       INST(MOV, 4, Q,  0, 1, Q,  0, 4, 4, 1, true ),
2283       INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2284 
2285       INST(MUL, 8, D,  0, 2, D,  0, 8, 4, 2, true ),
2286       INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2287 
2288       INST(MOV, 4, F,  1, 1, F,  0, 4, 4, 1, true ),
2289       INST(MOV, 4, F,  0, 1, F,  1, 4, 4, 1, true ),
2290       INST(MOV, 4, F,  1, 1, F,  1, 4, 4, 1, true ),
2291 
2292       /* The PRMs say that for CHV, BXT:
2293        *
2294        *    When source or destination datatype is 64b or operation is integer
2295        *    DWord multiply, indirect addressing must not be used.
2296        */
2297       INST(MOV, 4, DF, 1, 1, DF, 0, 4, 4, 1, false),
2298       INST(MOV, 4, Q,  1, 1, Q,  0, 4, 4, 1, false),
2299       INST(MOV, 4, UQ, 1, 1, UQ, 0, 4, 4, 1, false),
2300 
2301       INST(MOV, 4, DF, 0, 1, DF, 1, 4, 4, 1, false),
2302       INST(MOV, 4, Q,  0, 1, Q,  1, 4, 4, 1, false),
2303       INST(MOV, 4, UQ, 0, 1, UQ, 1, 4, 4, 1, false),
2304 
2305       INST(MOV, 4, DF, 1, 1, F,  0, 8, 4, 2, false),
2306       INST(MOV, 4, Q,  1, 1, D,  0, 8, 4, 2, false),
2307       INST(MOV, 4, UQ, 1, 1, UD, 0, 8, 4, 2, false),
2308 
2309       INST(MOV, 4, DF, 0, 1, F,  1, 8, 4, 2, false),
2310       INST(MOV, 4, Q,  0, 1, D,  1, 8, 4, 2, false),
2311       INST(MOV, 4, UQ, 0, 1, UD, 1, 8, 4, 2, false),
2312 
2313       INST(MOV, 4, F,  1, 2, DF, 0, 4, 4, 1, false),
2314       INST(MOV, 4, D,  1, 2, Q,  0, 4, 4, 1, false),
2315       INST(MOV, 4, UD, 1, 2, UQ, 0, 4, 4, 1, false),
2316 
2317       INST(MOV, 4, F,  0, 2, DF, 1, 4, 4, 1, false),
2318       INST(MOV, 4, D,  0, 2, Q,  1, 4, 4, 1, false),
2319       INST(MOV, 4, UD, 0, 2, UQ, 1, 4, 4, 1, false),
2320 
2321       INST(MUL, 8, D,  1, 2, D,  0, 8, 4, 2, false),
2322       INST(MUL, 8, UD, 1, 2, UD, 0, 8, 4, 2, false),
2323 
2324       INST(MUL, 8, D,  0, 2, D,  1, 8, 4, 2, false),
2325       INST(MUL, 8, UD, 0, 2, UD, 1, 8, 4, 2, false),
2326 
2327 #undef INST
2328    };
2329 
2330    /* These restrictions only apply to Gfx8+ */
2331    if (devinfo.ver < 8)
2332       return;
2333 
2334    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2335       if (!devinfo.has_64bit_float &&
2336           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2337            inst[i].src_type == ELK_REGISTER_TYPE_DF))
2338          continue;
2339 
2340       if (!devinfo.has_64bit_int &&
2341           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2342            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2343            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2344            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2345          continue;
2346 
2347       if (inst[i].opcode == ELK_OPCODE_MOV) {
2348          elk_MOV(p, retype(g0, inst[i].dst_type),
2349                     retype(g0, inst[i].src_type));
2350       } else {
2351          assert(inst[i].opcode == ELK_OPCODE_MUL);
2352          elk_MUL(p, retype(g0, inst[i].dst_type),
2353                     retype(g0, inst[i].src_type),
2354                     retype(zero, inst[i].src_type));
2355       }
2356       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2357 
2358       elk_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_is_indirect);
2359       elk_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src_is_indirect);
2360 
2361       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2362 
2363       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2364       elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2365       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2366 
2367       if (devinfo.platform == INTEL_PLATFORM_CHV) {
2368          EXPECT_EQ(inst[i].expected_result, validate(p));
2369       } else {
2370          EXPECT_TRUE(validate(p));
2371       }
2372 
2373       clear_instructions(p);
2374    }
2375 }
2376 
TEST_P(validation_test,qword_low_power_no_64bit_arf)2377 TEST_P(validation_test, qword_low_power_no_64bit_arf)
2378 {
2379    static const struct {
2380       enum elk_opcode opcode;
2381       unsigned exec_size;
2382 
2383       struct elk_reg dst;
2384       enum elk_reg_type dst_type;
2385       unsigned dst_stride;
2386 
2387       struct elk_reg src;
2388       enum elk_reg_type src_type;
2389       unsigned src_vstride;
2390       unsigned src_width;
2391       unsigned src_hstride;
2392 
2393       bool acc_wr;
2394       bool expected_result;
2395    } inst[] = {
2396 #define INST(opcode, exec_size, dst, dst_type, dst_stride,                     \
2397              src, src_type, src_vstride, src_width, src_hstride,               \
2398              acc_wr, expected_result)                                          \
2399       {                                                                        \
2400          ELK_OPCODE_##opcode,                                                  \
2401          ELK_EXECUTE_##exec_size,                                              \
2402          dst,                                                                  \
2403          ELK_REGISTER_TYPE_##dst_type,                                         \
2404          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
2405          src,                                                                  \
2406          ELK_REGISTER_TYPE_##src_type,                                         \
2407          ELK_VERTICAL_STRIDE_##src_vstride,                                    \
2408          ELK_WIDTH_##src_width,                                                \
2409          ELK_HORIZONTAL_STRIDE_##src_hstride,                                  \
2410          acc_wr,                                                               \
2411          expected_result,                                                      \
2412       }
2413 
2414       /* Some instruction that violate no restrictions, as a control */
2415       INST(MOV, 4, g0,   DF, 1, g0,   F,  4, 2, 2, 0, true ),
2416       INST(MOV, 4, g0,   F,  2, g0,   DF, 4, 4, 1, 0, true ),
2417 
2418       INST(MOV, 4, g0,   Q,  1, g0,   D,  4, 2, 2, 0, true ),
2419       INST(MOV, 4, g0,   D,  2, g0,   Q,  4, 4, 1, 0, true ),
2420 
2421       INST(MOV, 4, g0,   UQ, 1, g0,   UD, 4, 2, 2, 0, true ),
2422       INST(MOV, 4, g0,   UD, 2, g0,   UQ, 4, 4, 1, 0, true ),
2423 
2424       INST(MOV, 4, null, F,  1, g0,   F,  4, 4, 1, 0, true ),
2425       INST(MOV, 4, acc0, F,  1, g0,   F,  4, 4, 1, 0, true ),
2426       INST(MOV, 4, g0,   F,  1, acc0, F,  4, 4, 1, 0, true ),
2427 
2428       INST(MOV, 4, null, D,  1, g0,   D,  4, 4, 1, 0, true ),
2429       INST(MOV, 4, acc0, D,  1, g0,   D,  4, 4, 1, 0, true ),
2430       INST(MOV, 4, g0,   D,  1, acc0, D,  4, 4, 1, 0, true ),
2431 
2432       INST(MOV, 4, null, UD, 1, g0,   UD, 4, 4, 1, 0, true ),
2433       INST(MOV, 4, acc0, UD, 1, g0,   UD, 4, 4, 1, 0, true ),
2434       INST(MOV, 4, g0,   UD, 1, acc0, UD, 4, 4, 1, 0, true ),
2435 
2436       INST(MUL, 4, g0,   D,  2, g0,   D,  4, 2, 2, 0, true ),
2437       INST(MUL, 4, g0,   UD, 2, g0,   UD, 4, 2, 2, 0, true ),
2438 
2439       /* The PRMs say that for CHV, BXT:
2440        *
2441        *    ARF registers must never be used with 64b datatype or when
2442        *    operation is integer DWord multiply.
2443        */
2444       INST(MOV, 4, acc0, DF, 1, g0,   F,  4, 2, 2, 0, false),
2445       INST(MOV, 4, g0,   DF, 1, acc0, F,  4, 2, 2, 0, false),
2446 
2447       INST(MOV, 4, acc0, Q,  1, g0,   D,  4, 2, 2, 0, false),
2448       INST(MOV, 4, g0,   Q,  1, acc0, D,  4, 2, 2, 0, false),
2449 
2450       INST(MOV, 4, acc0, UQ, 1, g0,   UD, 4, 2, 2, 0, false),
2451       INST(MOV, 4, g0,   UQ, 1, acc0, UD, 4, 2, 2, 0, false),
2452 
2453       INST(MOV, 4, acc0, F,  2, g0,   DF, 4, 4, 1, 0, false),
2454       INST(MOV, 4, g0,   F,  2, acc0, DF, 4, 4, 1, 0, false),
2455 
2456       INST(MOV, 4, acc0, D,  2, g0,   Q,  4, 4, 1, 0, false),
2457       INST(MOV, 4, g0,   D,  2, acc0, Q,  4, 4, 1, 0, false),
2458 
2459       INST(MOV, 4, acc0, UD, 2, g0,   UQ, 4, 4, 1, 0, false),
2460       INST(MOV, 4, g0,   UD, 2, acc0, UQ, 4, 4, 1, 0, false),
2461 
2462       INST(MUL, 4, acc0, D,  2, g0,   D,  4, 2, 2, 0, false),
2463       INST(MUL, 4, acc0, UD, 2, g0,   UD, 4, 2, 2, 0, false),
2464       /* MUL cannot have integer accumulator sources, so don't test that */
2465 
2466       /* We assume that the restriction does not apply to the null register */
2467       INST(MOV, 4, null, DF, 1, g0,   F,  4, 2, 2, 0, true ),
2468       INST(MOV, 4, null, Q,  1, g0,   D,  4, 2, 2, 0, true ),
2469       INST(MOV, 4, null, UQ, 1, g0,   UD, 4, 2, 2, 0, true ),
2470 
2471       /* Check implicit accumulator write control */
2472       INST(MOV, 4, null, DF, 1, g0,   F,  4, 2, 2, 1, false),
2473       INST(MUL, 4, null, DF, 1, g0,   F,  4, 2, 2, 1, false),
2474 
2475 #undef INST
2476    };
2477 
2478    /* These restrictions only apply to Gfx8+ */
2479    if (devinfo.ver < 8)
2480       return;
2481 
2482    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2483       if (!devinfo.has_64bit_float &&
2484           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2485            inst[i].src_type == ELK_REGISTER_TYPE_DF))
2486          continue;
2487 
2488       if (!devinfo.has_64bit_int &&
2489           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2490            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2491            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2492            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2493          continue;
2494 
2495       if (inst[i].opcode == ELK_OPCODE_MOV) {
2496          elk_MOV(p, retype(inst[i].dst, inst[i].dst_type),
2497                     retype(inst[i].src, inst[i].src_type));
2498       } else {
2499          assert(inst[i].opcode == ELK_OPCODE_MUL);
2500          elk_MUL(p, retype(inst[i].dst, inst[i].dst_type),
2501                     retype(inst[i].src, inst[i].src_type),
2502                     retype(zero, inst[i].src_type));
2503          elk_inst_set_opcode(&isa, last_inst, inst[i].opcode);
2504       }
2505       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2506       elk_inst_set_acc_wr_control(&devinfo, last_inst, inst[i].acc_wr);
2507 
2508       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2509 
2510       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2511       elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2512       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2513 
2514       /* Note: The Broadwell PRM also lists the restriction that destination
2515        * of DWord multiplication cannot be the accumulator.
2516        */
2517       if (devinfo.platform == INTEL_PLATFORM_CHV ||
2518           (devinfo.ver == 8 &&
2519            inst[i].opcode == ELK_OPCODE_MUL &&
2520            elk_inst_dst_reg_file(&devinfo, last_inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
2521            elk_inst_dst_da_reg_nr(&devinfo, last_inst) != ELK_ARF_NULL)) {
2522          EXPECT_EQ(inst[i].expected_result, validate(p));
2523       } else {
2524          EXPECT_TRUE(validate(p));
2525       }
2526 
2527       clear_instructions(p);
2528    }
2529 
2530    if (!devinfo.has_64bit_float)
2531       return;
2532 
2533    /* MAC implicitly reads the accumulator */
2534    elk_MAC(p, retype(g0, ELK_REGISTER_TYPE_DF),
2535               retype(stride(g0, 4, 4, 1), ELK_REGISTER_TYPE_DF),
2536               retype(stride(g0, 4, 4, 1), ELK_REGISTER_TYPE_DF));
2537    if (devinfo.platform == INTEL_PLATFORM_CHV) {
2538       EXPECT_FALSE(validate(p));
2539    } else {
2540       EXPECT_TRUE(validate(p));
2541    }
2542 }
2543 
TEST_P(validation_test,align16_64_bit_integer)2544 TEST_P(validation_test, align16_64_bit_integer)
2545 {
2546    static const struct {
2547       enum elk_opcode opcode;
2548       unsigned exec_size;
2549 
2550       enum elk_reg_type dst_type;
2551       enum elk_reg_type src_type;
2552 
2553       bool expected_result;
2554    } inst[] = {
2555 #define INST(opcode, exec_size, dst_type, src_type, expected_result)           \
2556       {                                                                        \
2557          ELK_OPCODE_##opcode,                                                  \
2558          ELK_EXECUTE_##exec_size,                                              \
2559          ELK_REGISTER_TYPE_##dst_type,                                         \
2560          ELK_REGISTER_TYPE_##src_type,                                         \
2561          expected_result,                                                      \
2562       }
2563 
2564       /* Some instruction that violate no restrictions, as a control */
2565       INST(MOV, 2, Q,  D,  true ),
2566       INST(MOV, 2, UQ, UD, true ),
2567       INST(MOV, 2, DF, F,  true ),
2568 
2569       INST(ADD, 2, Q,  D,  true ),
2570       INST(ADD, 2, UQ, UD, true ),
2571       INST(ADD, 2, DF, F,  true ),
2572 
2573       /* The PRMs say that for BDW, SKL:
2574        *
2575        *    If Align16 is required for an operation with QW destination and non-QW
2576        *    source datatypes, the execution size cannot exceed 2.
2577        */
2578 
2579       INST(MOV, 4, Q,  D,  false),
2580       INST(MOV, 4, UQ, UD, false),
2581       INST(MOV, 4, DF, F,  false),
2582 
2583       INST(ADD, 4, Q,  D,  false),
2584       INST(ADD, 4, UQ, UD, false),
2585       INST(ADD, 4, DF, F,  false),
2586 
2587 #undef INST
2588    };
2589 
2590    /* 64-bit integer types exist on Gfx8+ */
2591    if (devinfo.ver < 8)
2592       return;
2593 
2594    /* Align16 does not exist on Gfx11+ */
2595    if (devinfo.ver >= 11)
2596       return;
2597 
2598    elk_set_default_access_mode(p, ELK_ALIGN_16);
2599 
2600    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2601       if (inst[i].opcode == ELK_OPCODE_MOV) {
2602          elk_MOV(p, retype(g0, inst[i].dst_type),
2603                     retype(g0, inst[i].src_type));
2604       } else {
2605          assert(inst[i].opcode == ELK_OPCODE_ADD);
2606          elk_ADD(p, retype(g0, inst[i].dst_type),
2607                     retype(g0, inst[i].src_type),
2608                     retype(g0, inst[i].src_type));
2609       }
2610       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2611 
2612       EXPECT_EQ(inst[i].expected_result, validate(p));
2613 
2614       clear_instructions(p);
2615    }
2616 }
2617 
TEST_P(validation_test,qword_low_power_no_depctrl)2618 TEST_P(validation_test, qword_low_power_no_depctrl)
2619 {
2620    static const struct {
2621       enum elk_opcode opcode;
2622       unsigned exec_size;
2623 
2624       enum elk_reg_type dst_type;
2625       unsigned dst_stride;
2626 
2627       enum elk_reg_type src_type;
2628       unsigned src_vstride;
2629       unsigned src_width;
2630       unsigned src_hstride;
2631 
2632       bool no_dd_check;
2633       bool no_dd_clear;
2634 
2635       bool expected_result;
2636    } inst[] = {
2637 #define INST(opcode, exec_size, dst_type, dst_stride,                          \
2638              src_type, src_vstride, src_width, src_hstride,                    \
2639              no_dd_check, no_dd_clear, expected_result)                        \
2640       {                                                                        \
2641          ELK_OPCODE_##opcode,                                                  \
2642          ELK_EXECUTE_##exec_size,                                              \
2643          ELK_REGISTER_TYPE_##dst_type,                                         \
2644          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
2645          ELK_REGISTER_TYPE_##src_type,                                         \
2646          ELK_VERTICAL_STRIDE_##src_vstride,                                    \
2647          ELK_WIDTH_##src_width,                                                \
2648          ELK_HORIZONTAL_STRIDE_##src_hstride,                                  \
2649          no_dd_check,                                                          \
2650          no_dd_clear,                                                          \
2651          expected_result,                                                      \
2652       }
2653 
2654       /* Some instruction that violate no restrictions, as a control */
2655       INST(MOV, 4, DF, 1, F,  8, 4, 2, 0, 0, true ),
2656       INST(MOV, 4, Q,  1, D,  8, 4, 2, 0, 0, true ),
2657       INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 0, true ),
2658 
2659       INST(MOV, 4, F,  2, DF, 4, 4, 1, 0, 0, true ),
2660       INST(MOV, 4, D,  2, Q,  4, 4, 1, 0, 0, true ),
2661       INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 0, true ),
2662 
2663       INST(MUL, 8, D,  2, D,  8, 4, 2, 0, 0, true ),
2664       INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 0, true ),
2665 
2666       INST(MOV, 4, F,  1, F,  4, 4, 1, 1, 1, true ),
2667 
2668       /* The PRMs say that for CHV, BXT:
2669        *
2670        *    When source or destination datatype is 64b or operation is integer
2671        *    DWord multiply, DepCtrl must not be used.
2672        */
2673       INST(MOV, 4, DF, 1, F,  8, 4, 2, 1, 0, false),
2674       INST(MOV, 4, Q,  1, D,  8, 4, 2, 1, 0, false),
2675       INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 1, 0, false),
2676 
2677       INST(MOV, 4, F,  2, DF, 4, 4, 1, 1, 0, false),
2678       INST(MOV, 4, D,  2, Q,  4, 4, 1, 1, 0, false),
2679       INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 1, 0, false),
2680 
2681       INST(MOV, 4, DF, 1, F,  8, 4, 2, 0, 1, false),
2682       INST(MOV, 4, Q,  1, D,  8, 4, 2, 0, 1, false),
2683       INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 1, false),
2684 
2685       INST(MOV, 4, F,  2, DF, 4, 4, 1, 0, 1, false),
2686       INST(MOV, 4, D,  2, Q,  4, 4, 1, 0, 1, false),
2687       INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 1, false),
2688 
2689       INST(MUL, 8, D,  2, D,  8, 4, 2, 1, 0, false),
2690       INST(MUL, 8, UD, 2, UD, 8, 4, 2, 1, 0, false),
2691 
2692       INST(MUL, 8, D,  2, D,  8, 4, 2, 0, 1, false),
2693       INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 1, false),
2694 
2695 #undef INST
2696    };
2697 
2698    /* These restrictions only apply to Gfx8+ */
2699    if (devinfo.ver < 8)
2700       return;
2701 
2702    /* NoDDChk/NoDDClr does not exist on Gfx12+ */
2703    if (devinfo.ver >= 12)
2704       return;
2705 
2706    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2707       if (!devinfo.has_64bit_float &&
2708           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2709            inst[i].src_type == ELK_REGISTER_TYPE_DF))
2710          continue;
2711 
2712       if (!devinfo.has_64bit_int &&
2713           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2714            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2715            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2716            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2717          continue;
2718 
2719       if (inst[i].opcode == ELK_OPCODE_MOV) {
2720          elk_MOV(p, retype(g0, inst[i].dst_type),
2721                     retype(g0, inst[i].src_type));
2722       } else {
2723          assert(inst[i].opcode == ELK_OPCODE_MUL);
2724          elk_MUL(p, retype(g0, inst[i].dst_type),
2725                     retype(g0, inst[i].src_type),
2726                     retype(zero, inst[i].src_type));
2727       }
2728       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2729 
2730       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2731 
2732       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2733       elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2734       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2735 
2736       elk_inst_set_no_dd_check(&devinfo, last_inst, inst[i].no_dd_check);
2737       elk_inst_set_no_dd_clear(&devinfo, last_inst, inst[i].no_dd_clear);
2738 
2739       if (devinfo.platform == INTEL_PLATFORM_CHV) {
2740          EXPECT_EQ(inst[i].expected_result, validate(p));
2741       } else {
2742          EXPECT_TRUE(validate(p));
2743       }
2744 
2745       clear_instructions(p);
2746    }
2747 }
2748