• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <gtest/gtest.h>
25 #include "elk_disasm_info.h"
26 #include "elk_eu.h"
27 #include "elk_eu_defines.h"
28 #include "util/bitset.h"
29 #include "util/ralloc.h"
30 
31 static const struct intel_gfx_info {
32    const char *name;
33 } gfx_names[] = {
34    { "brw", },
35    { "g4x", },
36    { "ilk", },
37    { "snb", },
38    { "ivb", },
39    { "hsw", },
40    { "byt", },
41    { "bdw", },
42    { "chv", },
43 };
44 
45 class validation_test: public ::testing::TestWithParam<struct intel_gfx_info> {
46    virtual void SetUp();
47 
48 public:
49    validation_test();
50    virtual ~validation_test();
51 
52    struct elk_isa_info isa;
53    struct elk_codegen *p;
54    struct intel_device_info devinfo;
55 };
56 
validation_test()57 validation_test::validation_test()
58 {
59    p = rzalloc(NULL, struct elk_codegen);
60    memset(&devinfo, 0, sizeof(devinfo));
61 }
62 
~validation_test()63 validation_test::~validation_test()
64 {
65    ralloc_free(p);
66 }
67 
SetUp()68 void validation_test::SetUp()
69 {
70    struct intel_gfx_info info = GetParam();
71    int devid = intel_device_name_to_pci_device_id(info.name);
72 
73    intel_get_device_info_from_pci_id(devid, &devinfo);
74 
75    elk_init_isa_info(&isa, &devinfo);
76 
77    elk_init_codegen(&isa, p, p);
78 }
79 
80 struct gfx_name {
81    template <class ParamType>
82    std::string
operator ()gfx_name83    operator()(const ::testing::TestParamInfo<ParamType>& info) const {
84       return info.param.name;
85    }
86 };
87 
88 INSTANTIATE_TEST_SUITE_P(
89    eu_assembly, validation_test,
90    ::testing::ValuesIn(gfx_names),
91    gfx_name()
92 );
93 
94 static bool
validate(struct elk_codegen * p)95 validate(struct elk_codegen *p)
96 {
97    const bool print = getenv("TEST_DEBUG");
98    struct elk_disasm_info *disasm = elk_disasm_initialize(p->isa, NULL);
99 
100    if (print) {
101       elk_disasm_new_inst_group(disasm, 0);
102       elk_disasm_new_inst_group(disasm, p->next_insn_offset);
103    }
104 
105    bool ret = elk_validate_instructions(p->isa, p->store, 0,
106                                         p->next_insn_offset, disasm);
107 
108    if (print) {
109       elk_dump_assembly(p->store, 0, p->next_insn_offset, disasm, NULL);
110    }
111    ralloc_free(disasm);
112 
113    return ret;
114 }
115 
116 #define last_inst    (&p->store[p->nr_insn - 1])
117 #define g0           elk_vec8_grf(0, 0)
118 #define acc0         elk_acc_reg(8)
119 #define null         elk_null_reg()
120 #define zero         elk_imm_f(0.0f)
121 
122 static void
clear_instructions(struct elk_codegen * p)123 clear_instructions(struct elk_codegen *p)
124 {
125    p->next_insn_offset = 0;
126    p->nr_insn = 0;
127 }
128 
TEST_P(validation_test,sanity)129 TEST_P(validation_test, sanity)
130 {
131    elk_ADD(p, g0, g0, g0);
132 
133    EXPECT_TRUE(validate(p));
134 }
135 
TEST_P(validation_test,src0_null_reg)136 TEST_P(validation_test, src0_null_reg)
137 {
138    elk_MOV(p, g0, null);
139 
140    EXPECT_FALSE(validate(p));
141 }
142 
TEST_P(validation_test,src1_null_reg)143 TEST_P(validation_test, src1_null_reg)
144 {
145    elk_ADD(p, g0, g0, null);
146 
147    EXPECT_FALSE(validate(p));
148 }
149 
TEST_P(validation_test,math_src0_null_reg)150 TEST_P(validation_test, math_src0_null_reg)
151 {
152    if (devinfo.ver >= 6) {
153       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, null, null);
154    } else {
155       elk_gfx4_math(p, g0, ELK_MATH_FUNCTION_SIN, 0, null, ELK_MATH_PRECISION_FULL);
156    }
157 
158    EXPECT_FALSE(validate(p));
159 }
160 
TEST_P(validation_test,math_src1_null_reg)161 TEST_P(validation_test, math_src1_null_reg)
162 {
163    if (devinfo.ver >= 6) {
164       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_POW, g0, null);
165       EXPECT_FALSE(validate(p));
166    } else {
167       /* Math instructions on Gfx4/5 are actually SEND messages with payloads.
168        * src1 is an immediate message descriptor set by elk_gfx4_math.
169        */
170    }
171 }
172 
TEST_P(validation_test,opcode46)173 TEST_P(validation_test, opcode46)
174 {
175    /* opcode 46 is "push" on Gen 4 and 5
176     *              "fork" on Gen 6
177     *              reserved on Gen 7
178     *              "goto" on Gfx8+
179     */
180    elk_next_insn(p, elk_opcode_decode(&isa, 46));
181 
182    if (devinfo.ver == 7) {
183       EXPECT_FALSE(validate(p));
184    } else {
185       EXPECT_TRUE(validate(p));
186    }
187 }
188 
TEST_P(validation_test,invalid_exec_size_encoding)189 TEST_P(validation_test, invalid_exec_size_encoding)
190 {
191    const struct {
192       enum elk_execution_size exec_size;
193       bool expected_result;
194    } test_case[] = {
195       { ELK_EXECUTE_1,      true  },
196       { ELK_EXECUTE_2,      true  },
197       { ELK_EXECUTE_4,      true  },
198       { ELK_EXECUTE_8,      true  },
199       { ELK_EXECUTE_16,     true  },
200       { ELK_EXECUTE_32,     true  },
201 
202       { (enum elk_execution_size)((int)ELK_EXECUTE_32 + 1), false },
203       { (enum elk_execution_size)((int)ELK_EXECUTE_32 + 2), false },
204    };
205 
206    for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
207       elk_MOV(p, g0, g0);
208 
209       elk_inst_set_exec_size(&devinfo, last_inst, test_case[i].exec_size);
210       elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
211       elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
212 
213       if (test_case[i].exec_size == ELK_EXECUTE_1) {
214          elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
215          elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
216          elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
217       } else {
218          elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_2);
219          elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
220          elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
221       }
222 
223       EXPECT_EQ(test_case[i].expected_result, validate(p));
224 
225       clear_instructions(p);
226    }
227 }
228 
TEST_P(validation_test,invalid_file_encoding)229 TEST_P(validation_test, invalid_file_encoding)
230 {
231    elk_MOV(p, g0, g0);
232    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_MESSAGE_REGISTER_FILE, ELK_REGISTER_TYPE_F);
233 
234    if (devinfo.ver > 6) {
235       EXPECT_FALSE(validate(p));
236    } else {
237       EXPECT_TRUE(validate(p));
238    }
239 
240    clear_instructions(p);
241 
242    if (devinfo.ver < 6) {
243       elk_gfx4_math(p, g0, ELK_MATH_FUNCTION_SIN, 0, g0, ELK_MATH_PRECISION_FULL);
244    } else {
245       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, g0, null);
246    }
247    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_MESSAGE_REGISTER_FILE, ELK_REGISTER_TYPE_F);
248 
249    if (devinfo.ver > 6) {
250       EXPECT_FALSE(validate(p));
251    } else {
252       EXPECT_TRUE(validate(p));
253    }
254 }
255 
TEST_P(validation_test,invalid_type_encoding)256 TEST_P(validation_test, invalid_type_encoding)
257 {
258    enum elk_reg_file files[2] = {
259       ELK_GENERAL_REGISTER_FILE,
260       ELK_IMMEDIATE_VALUE,
261    };
262 
263    for (unsigned i = 0; i < ARRAY_SIZE(files); i++) {
264       const enum elk_reg_file file = files[i];
265       const int num_bits = devinfo.ver >= 8 ? 4 : 3;
266       const int num_encodings = 1 << num_bits;
267 
268       /* The data types are encoded into <num_bits> bits to be used in hardware
269        * instructions, so keep a record in a bitset the invalid patterns so
270        * they can be verified to be invalid when used.
271        */
272       BITSET_DECLARE(invalid_encodings, num_encodings);
273 
274       const struct {
275          enum elk_reg_type type;
276          bool expected_result;
277       } test_case[] = {
278          { ELK_REGISTER_TYPE_NF, devinfo.ver == 11 && file != IMM },
279          { ELK_REGISTER_TYPE_DF, devinfo.has_64bit_float && (devinfo.ver >= 8 || file != IMM) },
280          { ELK_REGISTER_TYPE_F,  true },
281          { ELK_REGISTER_TYPE_HF, devinfo.ver >= 8 },
282          { ELK_REGISTER_TYPE_VF, file == IMM },
283          { ELK_REGISTER_TYPE_Q,  devinfo.has_64bit_int },
284          { ELK_REGISTER_TYPE_UQ, devinfo.has_64bit_int },
285          { ELK_REGISTER_TYPE_D,  true },
286          { ELK_REGISTER_TYPE_UD, true },
287          { ELK_REGISTER_TYPE_W,  true },
288          { ELK_REGISTER_TYPE_UW, true },
289          { ELK_REGISTER_TYPE_B,  file == FIXED_GRF },
290          { ELK_REGISTER_TYPE_UB, file == FIXED_GRF },
291          { ELK_REGISTER_TYPE_V,  file == IMM },
292          { ELK_REGISTER_TYPE_UV, devinfo.ver >= 6 && file == IMM },
293       };
294 
295       /* Initially assume all hardware encodings are invalid */
296       BITSET_ONES(invalid_encodings);
297 
298       elk_set_default_exec_size(p, ELK_EXECUTE_4);
299 
300       for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
301          if (test_case[i].expected_result) {
302             unsigned hw_type = elk_reg_type_to_hw_type(&devinfo, file, test_case[i].type);
303             if (hw_type != INVALID_REG_TYPE) {
304                /* ... and remove valid encodings from the set */
305                assert(BITSET_TEST(invalid_encodings, hw_type));
306                BITSET_CLEAR(invalid_encodings, hw_type);
307             }
308 
309             if (file == FIXED_GRF) {
310                struct elk_reg g = retype(g0, test_case[i].type);
311                elk_MOV(p, g, g);
312                elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
313                elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
314                elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
315             } else {
316                enum elk_reg_type t;
317 
318                switch (test_case[i].type) {
319                case ELK_REGISTER_TYPE_V:
320                   t = ELK_REGISTER_TYPE_W;
321                   break;
322                case ELK_REGISTER_TYPE_UV:
323                   t = ELK_REGISTER_TYPE_UW;
324                   break;
325                case ELK_REGISTER_TYPE_VF:
326                   t = ELK_REGISTER_TYPE_F;
327                   break;
328                default:
329                   t = test_case[i].type;
330                   break;
331                }
332 
333                struct elk_reg g = retype(g0, t);
334                elk_MOV(p, g, retype(elk_imm_w(0), test_case[i].type));
335             }
336 
337             EXPECT_TRUE(validate(p));
338 
339             clear_instructions(p);
340          }
341       }
342 
343       /* The remaining encodings in invalid_encodings do not have a mapping
344        * from ELK_REGISTER_TYPE_* and must be invalid. Verify that invalid
345        * encodings are rejected by the validator.
346        */
347       int e;
348       BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
349          if (file == FIXED_GRF) {
350             elk_MOV(p, g0, g0);
351             elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
352             elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
353             elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
354          } else {
355             elk_MOV(p, g0, elk_imm_w(0));
356          }
357          elk_inst_set_dst_reg_hw_type(&devinfo, last_inst, e);
358          elk_inst_set_src0_reg_hw_type(&devinfo, last_inst, e);
359 
360          EXPECT_FALSE(validate(p));
361 
362          clear_instructions(p);
363       }
364    }
365 }
366 
TEST_P(validation_test,invalid_type_encoding_3src_a16)367 TEST_P(validation_test, invalid_type_encoding_3src_a16)
368 {
369    /* 3-src instructions in align16 mode only supported on Gfx6-10 */
370    if (devinfo.ver < 6)
371       return;
372 
373    const int num_bits = devinfo.ver >= 8 ? 3 : 2;
374    const int num_encodings = 1 << num_bits;
375 
376    /* The data types are encoded into <num_bits> bits to be used in hardware
377     * instructions, so keep a record in a bitset the invalid patterns so
378     * they can be verified to be invalid when used.
379     */
380    BITSET_DECLARE(invalid_encodings, num_encodings);
381 
382    const struct {
383       enum elk_reg_type type;
384       bool expected_result;
385    } test_case[] = {
386       { ELK_REGISTER_TYPE_DF, devinfo.ver >= 7  },
387       { ELK_REGISTER_TYPE_F,  true },
388       { ELK_REGISTER_TYPE_HF, devinfo.ver >= 8  },
389       { ELK_REGISTER_TYPE_D,  devinfo.ver >= 7  },
390       { ELK_REGISTER_TYPE_UD, devinfo.ver >= 7  },
391    };
392 
393    /* Initially assume all hardware encodings are invalid */
394    BITSET_ONES(invalid_encodings);
395 
396    elk_set_default_access_mode(p, ELK_ALIGN_16);
397    elk_set_default_exec_size(p, ELK_EXECUTE_4);
398 
399    for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
400       if (test_case[i].expected_result) {
401          unsigned hw_type = elk_reg_type_to_a16_hw_3src_type(&devinfo, test_case[i].type);
402          if (hw_type != INVALID_HW_REG_TYPE) {
403             /* ... and remove valid encodings from the set */
404             assert(BITSET_TEST(invalid_encodings, hw_type));
405             BITSET_CLEAR(invalid_encodings, hw_type);
406          }
407 
408          struct elk_reg g = retype(g0, test_case[i].type);
409          if (!elk_reg_type_is_integer(test_case[i].type)) {
410             elk_MAD(p, g, g, g, g);
411          } else {
412             elk_BFE(p, g, g, g, g);
413          }
414 
415          EXPECT_TRUE(validate(p));
416 
417          clear_instructions(p);
418       }
419    }
420 
421    /* The remaining encodings in invalid_encodings do not have a mapping
422     * from ELK_REGISTER_TYPE_* and must be invalid. Verify that invalid
423     * encodings are rejected by the validator.
424     */
425    int e;
426    BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
427       for (unsigned i = 0; i < 2; i++) {
428          if (i == 0) {
429             elk_MAD(p, g0, g0, g0, g0);
430          } else {
431             elk_BFE(p, g0, g0, g0, g0);
432          }
433 
434          elk_inst_set_3src_a16_dst_hw_type(&devinfo, last_inst, e);
435          elk_inst_set_3src_a16_src_hw_type(&devinfo, last_inst, e);
436 
437          EXPECT_FALSE(validate(p));
438 
439          clear_instructions(p);
440 
441          if (devinfo.ver == 6)
442             break;
443       }
444    }
445 }
446 
447 TEST_P(validation_test, 3src_inst_access_mode)
448 {
449    /* 3-src instructions only supported on Gfx6+ */
450    if (devinfo.ver < 6)
451       return;
452 
453    const struct {
454       unsigned mode;
455       bool expected_result;
456    } test_case[] = {
457       { ELK_ALIGN_1,  false},
458       { ELK_ALIGN_16, true },
459    };
460 
461    for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
462       elk_set_default_access_mode(p, ELK_ALIGN_16);
463 
464       elk_MAD(p, g0, g0, g0, g0);
465       elk_inst_set_access_mode(&devinfo, last_inst, test_case[i].mode);
466 
467       EXPECT_EQ(test_case[i].expected_result, validate(p));
468 
469       clear_instructions(p);
470    }
471 }
472 
473 /* When the Execution Data Type is wider than the destination data type, the
474  * destination must [...] specify a HorzStride equal to the ratio in sizes of
475  * the two data types.
476  */
TEST_P(validation_test,dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)477 TEST_P(validation_test, dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)
478 {
479    elk_ADD(p, g0, g0, g0);
480    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
481    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
482    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
483 
484    EXPECT_FALSE(validate(p));
485 
486    clear_instructions(p);
487 
488    elk_ADD(p, g0, g0, g0);
489    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
490    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
491    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
492    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
493 
494    EXPECT_TRUE(validate(p));
495 }
496 
497 /* When the Execution Data Type is wider than the destination data type, the
498  * destination must be aligned as required by the wider execution data type
499  * [...]
500  */
TEST_P(validation_test,dst_subreg_must_be_aligned_to_exec_type_size)501 TEST_P(validation_test, dst_subreg_must_be_aligned_to_exec_type_size)
502 {
503    elk_ADD(p, g0, g0, g0);
504    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 2);
505    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
506    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
507    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
508    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
509 
510    EXPECT_FALSE(validate(p));
511 
512    clear_instructions(p);
513 
514    elk_ADD(p, g0, g0, g0);
515    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
516    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 8);
517    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
518    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
519    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
520    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
521    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
522    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
523    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
524    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
525    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
526    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
527 
528    EXPECT_TRUE(validate(p));
529 }
530 
531 /* ExecSize must be greater than or equal to Width. */
TEST_P(validation_test,exec_size_less_than_width)532 TEST_P(validation_test, exec_size_less_than_width)
533 {
534    elk_ADD(p, g0, g0, g0);
535    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_16);
536 
537    EXPECT_FALSE(validate(p));
538 
539    clear_instructions(p);
540 
541    elk_ADD(p, g0, g0, g0);
542    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_16);
543 
544    EXPECT_FALSE(validate(p));
545 }
546 
547 /* If ExecSize = Width and HorzStride ≠ 0,
548  * VertStride must be set to Width * HorzStride.
549  */
TEST_P(validation_test,vertical_stride_is_width_by_horizontal_stride)550 TEST_P(validation_test, vertical_stride_is_width_by_horizontal_stride)
551 {
552    elk_ADD(p, g0, g0, g0);
553    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
554 
555    EXPECT_FALSE(validate(p));
556 
557    clear_instructions(p);
558 
559    elk_ADD(p, g0, g0, g0);
560    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
561 
562    EXPECT_FALSE(validate(p));
563 }
564 
565 /* If Width = 1, HorzStride must be 0 regardless of the values
566  * of ExecSize and VertStride.
567  */
TEST_P(validation_test,horizontal_stride_must_be_0_if_width_is_1)568 TEST_P(validation_test, horizontal_stride_must_be_0_if_width_is_1)
569 {
570    elk_ADD(p, g0, g0, g0);
571    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
572    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
573    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
574 
575    EXPECT_FALSE(validate(p));
576 
577    clear_instructions(p);
578 
579    elk_ADD(p, g0, g0, g0);
580    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
581    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_1);
582    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
583 
584    EXPECT_FALSE(validate(p));
585 }
586 
587 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
TEST_P(validation_test,scalar_region_must_be_0_1_0)588 TEST_P(validation_test, scalar_region_must_be_0_1_0)
589 {
590    struct elk_reg g0_0 = elk_vec1_grf(0, 0);
591 
592    elk_ADD(p, g0, g0, g0_0);
593    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_1);
594    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_1);
595    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
596    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
597 
598    EXPECT_FALSE(validate(p));
599 
600    clear_instructions(p);
601 
602    elk_ADD(p, g0, g0_0, g0);
603    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_1);
604    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_1);
605    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_1);
606    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
607 
608    EXPECT_FALSE(validate(p));
609 }
610 
611 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the value
612  * of ExecSize.
613  */
TEST_P(validation_test,zero_stride_implies_0_1_0)614 TEST_P(validation_test, zero_stride_implies_0_1_0)
615 {
616    elk_ADD(p, g0, g0, g0);
617    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
618    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
619    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
620 
621    EXPECT_FALSE(validate(p));
622 
623    clear_instructions(p);
624 
625    elk_ADD(p, g0, g0, g0);
626    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
627    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_2);
628    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
629 
630    EXPECT_FALSE(validate(p));
631 }
632 
633 /* Dst.HorzStride must not be 0. */
TEST_P(validation_test,dst_horizontal_stride_0)634 TEST_P(validation_test, dst_horizontal_stride_0)
635 {
636    elk_ADD(p, g0, g0, g0);
637    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
638 
639    EXPECT_FALSE(validate(p));
640 
641    clear_instructions(p);
642 
643    elk_set_default_access_mode(p, ELK_ALIGN_16);
644 
645    elk_ADD(p, g0, g0, g0);
646    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
647 
648    EXPECT_FALSE(validate(p));
649 }
650 
651 /* VertStride must be used to cross ELK_GENERAL_REGISTER_FILE register boundaries. This rule implies
652  * that elements within a 'Width' cannot cross ELK_GENERAL_REGISTER_FILE boundaries.
653  */
TEST_P(validation_test,must_not_cross_grf_boundary_in_a_width)654 TEST_P(validation_test, must_not_cross_grf_boundary_in_a_width)
655 {
656    elk_ADD(p, g0, g0, g0);
657    elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 4);
658 
659    EXPECT_FALSE(validate(p));
660 
661    clear_instructions(p);
662 
663    elk_ADD(p, g0, g0, g0);
664    elk_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 4);
665 
666    EXPECT_FALSE(validate(p));
667 
668    clear_instructions(p);
669 
670    elk_ADD(p, g0, g0, g0);
671    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
672    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
673    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
674 
675    EXPECT_FALSE(validate(p));
676 
677    clear_instructions(p);
678 
679    elk_ADD(p, g0, g0, g0);
680    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
681    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
682    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
683 
684    EXPECT_FALSE(validate(p));
685 }
686 
687 /* Destination Horizontal must be 1 in Align16 */
TEST_P(validation_test,dst_hstride_on_align16_must_be_1)688 TEST_P(validation_test, dst_hstride_on_align16_must_be_1)
689 {
690    elk_set_default_access_mode(p, ELK_ALIGN_16);
691 
692    elk_ADD(p, g0, g0, g0);
693    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
694 
695    EXPECT_FALSE(validate(p));
696 
697    clear_instructions(p);
698 
699    elk_ADD(p, g0, g0, g0);
700    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
701 
702    EXPECT_TRUE(validate(p));
703 }
704 
705 /* VertStride must be 0 or 4 in Align16 */
TEST_P(validation_test,vstride_on_align16_must_be_0_or_4)706 TEST_P(validation_test, vstride_on_align16_must_be_0_or_4)
707 {
708    const struct {
709       enum elk_vertical_stride vstride;
710       bool expected_result;
711    } vstride[] = {
712       { ELK_VERTICAL_STRIDE_0, true },
713       { ELK_VERTICAL_STRIDE_1, false },
714       { ELK_VERTICAL_STRIDE_2, devinfo.verx10 >= 75 },
715       { ELK_VERTICAL_STRIDE_4, true },
716       { ELK_VERTICAL_STRIDE_8, false },
717       { ELK_VERTICAL_STRIDE_16, false },
718       { ELK_VERTICAL_STRIDE_32, false },
719       { ELK_VERTICAL_STRIDE_ONE_DIMENSIONAL, false },
720    };
721 
722    elk_set_default_access_mode(p, ELK_ALIGN_16);
723 
724    for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
725       elk_ADD(p, g0, g0, g0);
726       elk_inst_set_src0_vstride(&devinfo, last_inst, vstride[i].vstride);
727 
728       EXPECT_EQ(vstride[i].expected_result, validate(p));
729 
730       clear_instructions(p);
731    }
732 
733    for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
734       elk_ADD(p, g0, g0, g0);
735       elk_inst_set_src1_vstride(&devinfo, last_inst, vstride[i].vstride);
736 
737       EXPECT_EQ(vstride[i].expected_result, validate(p));
738 
739       clear_instructions(p);
740    }
741 }
742 
743 /* In Direct Addressing mode, a source cannot span more than 2 adjacent ELK_GENERAL_REGISTER_FILE
744  * registers.
745  */
TEST_P(validation_test,source_cannot_span_more_than_2_registers)746 TEST_P(validation_test, source_cannot_span_more_than_2_registers)
747 {
748    elk_ADD(p, g0, g0, g0);
749    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_32);
750    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
751    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
752    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
753    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
754    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_8);
755    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
756 
757    EXPECT_FALSE(validate(p));
758 
759    clear_instructions(p);
760 
761    elk_ADD(p, g0, g0, g0);
762    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
763    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
764    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
765    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
766    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
767    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_8);
768    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
769    elk_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 2);
770 
771    EXPECT_TRUE(validate(p));
772 
773    clear_instructions(p);
774 
775    elk_ADD(p, g0, g0, g0);
776    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
777 
778    EXPECT_TRUE(validate(p));
779 }
780 
781 /* A destination cannot span more than 2 adjacent ELK_GENERAL_REGISTER_FILE registers. */
TEST_P(validation_test,destination_cannot_span_more_than_2_registers)782 TEST_P(validation_test, destination_cannot_span_more_than_2_registers)
783 {
784    elk_ADD(p, g0, g0, g0);
785    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_32);
786    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
787    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
788    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
789    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
790 
791    EXPECT_FALSE(validate(p));
792 
793    clear_instructions(p);
794 
795    elk_ADD(p, g0, g0, g0);
796    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_8);
797    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 6);
798    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_4);
799    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
800    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
801    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
802    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
803    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
804    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
805    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
806    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
807    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
808 
809    EXPECT_TRUE(validate(p));
810 }
811 
TEST_P(validation_test,src_region_spans_two_regs_dst_region_spans_one)812 TEST_P(validation_test, src_region_spans_two_regs_dst_region_spans_one)
813 {
814    /* Writes to dest are to the lower OWord */
815    elk_ADD(p, g0, g0, g0);
816    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
817    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
818    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
819    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
820    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
821    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
822 
823    EXPECT_TRUE(validate(p));
824 
825    clear_instructions(p);
826 
827    /* Writes to dest are to the upper OWord */
828    elk_ADD(p, g0, g0, g0);
829    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
830    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
831    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
832    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
833    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
834    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
835    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
836 
837    EXPECT_TRUE(validate(p));
838 
839    clear_instructions(p);
840 
841    /* Writes to dest are evenly split between OWords */
842    elk_ADD(p, g0, g0, g0);
843    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
844    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
845    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
846    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
847    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
848    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_8);
849    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
850 
851    EXPECT_TRUE(validate(p));
852 
853    clear_instructions(p);
854 
855    /* Writes to dest are uneven between OWords */
856    elk_ADD(p, g0, g0, g0);
857    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
858    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 10);
859    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
860    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
861    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
862    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
863    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
864    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
865    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
866    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_2);
867    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
868 
869    EXPECT_FALSE(validate(p));
870 }
871 
TEST_P(validation_test,dst_elements_must_be_evenly_split_between_registers)872 TEST_P(validation_test, dst_elements_must_be_evenly_split_between_registers)
873 {
874    elk_ADD(p, g0, g0, g0);
875    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
876 
877    EXPECT_FALSE(validate(p));
878 
879    clear_instructions(p);
880 
881    elk_ADD(p, g0, g0, g0);
882    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
883 
884    EXPECT_TRUE(validate(p));
885 
886    clear_instructions(p);
887 
888    if (devinfo.ver >= 6) {
889       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, g0, null);
890 
891       EXPECT_TRUE(validate(p));
892 
893       clear_instructions(p);
894 
895       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, g0, null);
896       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
897 
898       EXPECT_FALSE(validate(p));
899    }
900 }
901 
TEST_P(validation_test,two_src_two_dst_source_offsets_must_be_same)902 TEST_P(validation_test, two_src_two_dst_source_offsets_must_be_same)
903 {
904    elk_ADD(p, g0, g0, g0);
905    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
906    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_4);
907    elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 16);
908    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_2);
909    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
910    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
911    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
912    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
913    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
914 
915   if (devinfo.ver <= 7) {
916       EXPECT_FALSE(validate(p));
917    } else {
918       EXPECT_TRUE(validate(p));
919    }
920 
921    clear_instructions(p);
922 
923    elk_ADD(p, g0, g0, g0);
924    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
925    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_4);
926    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
927    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
928    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
929    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_8);
930    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_2);
931    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
932 
933    EXPECT_TRUE(validate(p));
934 }
935 
TEST_P(validation_test,two_src_two_dst_each_dst_must_be_derived_from_one_src)936 TEST_P(validation_test, two_src_two_dst_each_dst_must_be_derived_from_one_src)
937 {
938    elk_MOV(p, g0, g0);
939    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
940    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
941    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
942    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
943    elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
944    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
945    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
946    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
947 
948    if (devinfo.ver <= 7) {
949       EXPECT_FALSE(validate(p));
950    } else {
951       EXPECT_TRUE(validate(p));
952    }
953 
954    clear_instructions(p);
955 
956    elk_MOV(p, g0, g0);
957    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
958    elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
959    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_2);
960    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
961    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
962 
963    if (devinfo.ver <= 7) {
964       EXPECT_FALSE(validate(p));
965    } else {
966       EXPECT_TRUE(validate(p));
967    }
968 }
969 
TEST_P(validation_test,one_src_two_dst)970 TEST_P(validation_test, one_src_two_dst)
971 {
972    struct elk_reg g0_0 = elk_vec1_grf(0, 0);
973 
974    elk_ADD(p, g0, g0_0, g0_0);
975    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
976 
977    EXPECT_TRUE(validate(p));
978 
979    clear_instructions(p);
980 
981    elk_ADD(p, g0, g0, g0);
982    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
983    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
984    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
985    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
986 
987    EXPECT_TRUE(validate(p));
988 
989    clear_instructions(p);
990 
991    elk_ADD(p, g0, g0, g0);
992    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
993    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
994    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
995    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
996 
997    if (devinfo.ver >= 8) {
998       EXPECT_TRUE(validate(p));
999    } else {
1000       EXPECT_FALSE(validate(p));
1001    }
1002 
1003    clear_instructions(p);
1004 
1005    elk_ADD(p, g0, g0, g0);
1006    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
1007    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
1008    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1009    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1010 
1011    if (devinfo.ver >= 8) {
1012       EXPECT_TRUE(validate(p));
1013    } else {
1014       EXPECT_FALSE(validate(p));
1015    }
1016 
1017    clear_instructions(p);
1018 
1019    elk_ADD(p, g0, g0, g0);
1020    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
1021    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1022    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1023    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1024    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1025    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
1026    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_1);
1027    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
1028 
1029    if (devinfo.ver >= 8) {
1030       EXPECT_TRUE(validate(p));
1031    } else {
1032       EXPECT_FALSE(validate(p));
1033    }
1034 
1035    clear_instructions(p);
1036 
1037    elk_ADD(p, g0, g0, g0);
1038    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
1039    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1040    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1041    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1042    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
1043    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
1044    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
1045    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1046 
1047    if (devinfo.ver >= 8) {
1048       EXPECT_TRUE(validate(p));
1049    } else {
1050       EXPECT_FALSE(validate(p));
1051    }
1052 }
1053 
TEST_P(validation_test,packed_byte_destination)1054 TEST_P(validation_test, packed_byte_destination)
1055 {
1056    static const struct {
1057       enum elk_reg_type dst_type;
1058       enum elk_reg_type src_type;
1059       bool neg, abs, sat;
1060       bool expected_result;
1061    } move[] = {
1062       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 0, 0, 0, true },
1063       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 0, 0, 0, true },
1064       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 0, 0, 0, true },
1065       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 0, 0, 0, true },
1066 
1067       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 1, 0, 0, false },
1068       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 1, 0, 0, false },
1069       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 1, 0, 0, false },
1070       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 1, 0, 0, false },
1071 
1072       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 0, 1, 0, false },
1073       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 0, 1, 0, false },
1074       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 0, 1, 0, false },
1075       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 0, 1, 0, false },
1076 
1077       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 0, 0, 1, false },
1078       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 0, 0, 1, false },
1079       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 0, 0, 1, false },
1080       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 0, 0, 1, false },
1081 
1082       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UW, 0, 0, 0, false },
1083       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_W , 0, 0, 0, false },
1084       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UD, 0, 0, 0, false },
1085       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_D , 0, 0, 0, false },
1086    };
1087 
1088    for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
1089       elk_MOV(p, retype(g0, move[i].dst_type), retype(g0, move[i].src_type));
1090       elk_inst_set_src0_negate(&devinfo, last_inst, move[i].neg);
1091       elk_inst_set_src0_abs(&devinfo, last_inst, move[i].abs);
1092       elk_inst_set_saturate(&devinfo, last_inst, move[i].sat);
1093 
1094       EXPECT_EQ(move[i].expected_result, validate(p));
1095 
1096       clear_instructions(p);
1097    }
1098 
1099    elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_UB),
1100               retype(g0, ELK_REGISTER_TYPE_UB),
1101               retype(g0, ELK_REGISTER_TYPE_UB));
1102    elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1103 
1104    EXPECT_FALSE(validate(p));
1105 
1106    clear_instructions(p);
1107 
1108    elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_B),
1109               retype(g0, ELK_REGISTER_TYPE_B),
1110               retype(g0, ELK_REGISTER_TYPE_B));
1111    elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1112 
1113    EXPECT_FALSE(validate(p));
1114 }
1115 
TEST_P(validation_test,byte_destination_relaxed_alignment)1116 TEST_P(validation_test, byte_destination_relaxed_alignment)
1117 {
1118    elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_B),
1119               retype(g0, ELK_REGISTER_TYPE_W),
1120               retype(g0, ELK_REGISTER_TYPE_W));
1121    elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1122    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1123 
1124    EXPECT_TRUE(validate(p));
1125 
1126    clear_instructions(p);
1127 
1128    elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_B),
1129               retype(g0, ELK_REGISTER_TYPE_W),
1130               retype(g0, ELK_REGISTER_TYPE_W));
1131    elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1132    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1133    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 1);
1134 
1135    if (devinfo.verx10 >= 45) {
1136       EXPECT_TRUE(validate(p));
1137    } else {
1138       EXPECT_FALSE(validate(p));
1139    }
1140 }
1141 
TEST_P(validation_test,byte_64bit_conversion)1142 TEST_P(validation_test, byte_64bit_conversion)
1143 {
1144    static const struct {
1145       enum elk_reg_type dst_type;
1146       enum elk_reg_type src_type;
1147       unsigned dst_stride;
1148       bool expected_result;
1149    } inst[] = {
1150 #define INST(dst_type, src_type, dst_stride, expected_result)             \
1151       {                                                                   \
1152          ELK_REGISTER_TYPE_##dst_type,                                    \
1153          ELK_REGISTER_TYPE_##src_type,                                    \
1154          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1155          expected_result,                                                 \
1156       }
1157 
1158       INST(B,   Q, 1, false),
1159       INST(B,  UQ, 1, false),
1160       INST(B,  DF, 1, false),
1161       INST(UB,  Q, 1, false),
1162       INST(UB, UQ, 1, false),
1163       INST(UB, DF, 1, false),
1164 
1165       INST(B,   Q, 2, false),
1166       INST(B,  UQ, 2, false),
1167       INST(B , DF, 2, false),
1168       INST(UB,  Q, 2, false),
1169       INST(UB, UQ, 2, false),
1170       INST(UB, DF, 2, false),
1171 
1172       INST(B,   Q, 4, false),
1173       INST(B,  UQ, 4, false),
1174       INST(B,  DF, 4, false),
1175       INST(UB,  Q, 4, false),
1176       INST(UB, UQ, 4, false),
1177       INST(UB, DF, 4, false),
1178 
1179 #undef INST
1180    };
1181 
1182    if (devinfo.ver < 8)
1183       return;
1184 
1185    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1186       if (!devinfo.has_64bit_float &&
1187           inst[i].src_type == ELK_REGISTER_TYPE_DF)
1188          continue;
1189 
1190       if (!devinfo.has_64bit_int &&
1191           (inst[i].src_type == ELK_REGISTER_TYPE_Q ||
1192            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
1193          continue;
1194 
1195       elk_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1196       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1197       EXPECT_EQ(inst[i].expected_result, validate(p));
1198 
1199       clear_instructions(p);
1200    }
1201 }
1202 
TEST_P(validation_test,half_float_conversion)1203 TEST_P(validation_test, half_float_conversion)
1204 {
1205    static const struct {
1206       enum elk_reg_type dst_type;
1207       enum elk_reg_type src_type;
1208       unsigned dst_stride;
1209       unsigned dst_subnr;
1210       bool expected_result_bdw;
1211       bool expected_result_chv;
1212    } inst[] = {
1213 #define INST(dst_type, src_type, dst_stride, dst_subnr,                     \
1214              expected_result_bdw, expected_result_chv)                      \
1215       {                                                                     \
1216          ELK_REGISTER_TYPE_##dst_type,                                      \
1217          ELK_REGISTER_TYPE_##src_type,                                      \
1218          ELK_HORIZONTAL_STRIDE_##dst_stride,                                \
1219          dst_subnr,                                                         \
1220          expected_result_bdw,                                               \
1221          expected_result_chv,                                               \
1222       }
1223 
1224       /* MOV to half-float destination */
1225       INST(HF,  B, 1, 0, false, false), /* 0 */
1226       INST(HF,  W, 1, 0, false, false),
1227       INST(HF, HF, 1, 0, true,  true),
1228       INST(HF, HF, 1, 2, true,  true),
1229       INST(HF,  D, 1, 0, false, false),
1230       INST(HF,  F, 1, 0, false, true),
1231       INST(HF,  Q, 1, 0, false, false),
1232       INST(HF,  B, 2, 0, true,  true),
1233       INST(HF,  B, 2, 2, false, false),
1234       INST(HF,  W, 2, 0, true,  true),
1235       INST(HF,  W, 2, 2, false, false), /* 10 */
1236       INST(HF, HF, 2, 0, true,  true),
1237       INST(HF, HF, 2, 2, true,  true),
1238       INST(HF,  D, 2, 0, true,  true),
1239       INST(HF,  D, 2, 2, false, false),
1240       INST(HF,  F, 2, 0, true,  true),
1241       INST(HF,  F, 2, 2, false, true),
1242       INST(HF,  Q, 2, 0, false, false),
1243       INST(HF, DF, 2, 0, false, false),
1244       INST(HF,  B, 4, 0, false, false),
1245       INST(HF,  W, 4, 0, false, false), /* 20 */
1246       INST(HF, HF, 4, 0, true,  true),
1247       INST(HF, HF, 4, 2, true,  true),
1248       INST(HF,  D, 4, 0, false, false),
1249       INST(HF,  F, 4, 0, false, false),
1250       INST(HF,  Q, 4, 0, false, false),
1251       INST(HF, DF, 4, 0, false, false),
1252 
1253       /* MOV from half-float source */
1254       INST( B, HF, 1, 0, false, false),
1255       INST( W, HF, 1, 0, false, false),
1256       INST( D, HF, 1, 0, true,  true),
1257       INST( D, HF, 1, 4, true,  true),  /* 30 */
1258       INST( F, HF, 1, 0, true,  true),
1259       INST( F, HF, 1, 4, true,  true),
1260       INST( Q, HF, 1, 0, false, false),
1261       INST(DF, HF, 1, 0, false, false),
1262       INST( B, HF, 2, 0, false, false),
1263       INST( W, HF, 2, 0, true,  true),
1264       INST( W, HF, 2, 2, false, false),
1265       INST( D, HF, 2, 0, false, false),
1266       INST( F, HF, 2, 0, true,  true),
1267       INST( B, HF, 4, 0, true,  true),  /* 40 */
1268       INST( B, HF, 4, 1, false, false),
1269       INST( W, HF, 4, 0, false, false),
1270 
1271 #undef INST
1272    };
1273 
1274    if (devinfo.ver < 8)
1275       return;
1276 
1277    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1278       if (!devinfo.has_64bit_float &&
1279           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
1280            inst[i].src_type == ELK_REGISTER_TYPE_DF))
1281          continue;
1282 
1283       if (!devinfo.has_64bit_int &&
1284           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
1285            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
1286            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
1287            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
1288          continue;
1289 
1290       elk_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1291 
1292       elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
1293 
1294       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1295       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1296 
1297       if (inst[i].src_type == ELK_REGISTER_TYPE_B) {
1298          elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1299          elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
1300          elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1301       } else {
1302          elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1303          elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
1304          elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
1305       }
1306 
1307       if (devinfo.platform == INTEL_PLATFORM_CHV) {
1308          EXPECT_EQ(inst[i].expected_result_chv, validate(p)) <<
1309             "Failing test is: " << i;
1310       } else {
1311          EXPECT_EQ(inst[i].expected_result_bdw, validate(p)) <<
1312             "Failing test is: " << i;
1313       }
1314 
1315       clear_instructions(p);
1316    }
1317 }
1318 
TEST_P(validation_test,mixed_float_source_indirect_addressing)1319 TEST_P(validation_test, mixed_float_source_indirect_addressing)
1320 {
1321    static const struct {
1322       enum elk_reg_type dst_type;
1323       enum elk_reg_type src0_type;
1324       enum elk_reg_type src1_type;
1325       unsigned dst_stride;
1326       bool dst_indirect;
1327       bool src0_indirect;
1328       bool expected_result;
1329       bool gfx125_expected_result;
1330    } inst[] = {
1331 #define INST(dst_type, src0_type, src1_type,                              \
1332              dst_stride, dst_indirect, src0_indirect, expected_result,    \
1333              gfx125_expected_result)                                      \
1334       {                                                                   \
1335          ELK_REGISTER_TYPE_##dst_type,                                    \
1336          ELK_REGISTER_TYPE_##src0_type,                                   \
1337          ELK_REGISTER_TYPE_##src1_type,                                   \
1338          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1339          dst_indirect,                                                    \
1340          src0_indirect,                                                   \
1341          expected_result,                                                 \
1342          gfx125_expected_result,                                          \
1343       }
1344 
1345       /* Source and dest are mixed float: indirect src addressing not allowed */
1346       INST(HF,  F,  F, 2, false, false, true,  true),
1347       INST(HF,  F,  F, 2, true,  false, true,  true),
1348       INST(HF,  F,  F, 2, false, true,  false, false),
1349       INST(HF,  F,  F, 2, true,  true,  false, false),
1350       INST( F, HF,  F, 1, false, false, true,  false),
1351       INST( F, HF,  F, 1, true,  false, true,  false),
1352       INST( F, HF,  F, 1, false, true,  false, false),
1353       INST( F, HF,  F, 1, true,  true,  false, false),
1354 
1355       INST(HF, HF,  F, 2, false, false, true,  false),
1356       INST(HF, HF,  F, 2, true,  false, true,  false),
1357       INST(HF, HF,  F, 2, false, true,  false, false),
1358       INST(HF, HF,  F, 2, true,  true,  false, false),
1359       INST( F,  F, HF, 1, false, false, true,  false),
1360       INST( F,  F, HF, 1, true,  false, true,  false),
1361       INST( F,  F, HF, 1, false, true,  false, false),
1362       INST( F,  F, HF, 1, true,  true,  false, false),
1363 
1364 #undef INST
1365    };
1366 
1367    if (devinfo.ver < 8)
1368       return;
1369 
1370    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1371       elk_ADD(p, retype(g0, inst[i].dst_type),
1372                  retype(g0, inst[i].src0_type),
1373                  retype(g0, inst[i].src1_type));
1374 
1375       elk_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_indirect);
1376       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1377       elk_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src0_indirect);
1378 
1379       EXPECT_EQ(inst[i].expected_result, validate(p));
1380 
1381       clear_instructions(p);
1382    }
1383 }
1384 
TEST_P(validation_test,mixed_float_align1_simd16)1385 TEST_P(validation_test, mixed_float_align1_simd16)
1386 {
1387    static const struct {
1388       unsigned exec_size;
1389       enum elk_reg_type dst_type;
1390       enum elk_reg_type src0_type;
1391       enum elk_reg_type src1_type;
1392       unsigned dst_stride;
1393       bool expected_result;
1394       bool gfx125_expected_result;
1395    } inst[] = {
1396 #define INST(exec_size, dst_type, src0_type, src1_type,                   \
1397              dst_stride, expected_result, gfx125_expected_result)         \
1398       {                                                                   \
1399          ELK_EXECUTE_##exec_size,                                         \
1400          ELK_REGISTER_TYPE_##dst_type,                                    \
1401          ELK_REGISTER_TYPE_##src0_type,                                   \
1402          ELK_REGISTER_TYPE_##src1_type,                                   \
1403          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1404          expected_result,                                                 \
1405          gfx125_expected_result,                                          \
1406       }
1407 
1408       /* No SIMD16 in mixed mode when destination is packed f16 */
1409       INST( 8, HF,  F, HF, 2, true,  false),
1410       INST(16, HF, HF,  F, 2, true,  false),
1411       INST(16, HF, HF,  F, 1, false, false),
1412       INST(16, HF,  F, HF, 1, false, false),
1413 
1414       /* No SIMD16 in mixed mode when destination is f32 */
1415       INST( 8,  F, HF,  F, 1, true,  false),
1416       INST( 8,  F,  F, HF, 1, true,  false),
1417       INST(16,  F, HF,  F, 1, false, false),
1418       INST(16,  F,  F, HF, 1, false, false),
1419 
1420 #undef INST
1421    };
1422 
1423    if (devinfo.ver < 8)
1424       return;
1425 
1426    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1427       elk_ADD(p, retype(g0, inst[i].dst_type),
1428                  retype(g0, inst[i].src0_type),
1429                  retype(g0, inst[i].src1_type));
1430 
1431       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1432 
1433       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1434 
1435       EXPECT_EQ(inst[i].expected_result, validate(p));
1436 
1437       clear_instructions(p);
1438    }
1439 }
1440 
TEST_P(validation_test,mixed_float_align1_packed_fp16_dst_acc_read_offset_0)1441 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst_acc_read_offset_0)
1442 {
1443    static const struct {
1444       enum elk_reg_type dst_type;
1445       enum elk_reg_type src0_type;
1446       enum elk_reg_type src1_type;
1447       unsigned dst_stride;
1448       bool read_acc;
1449       unsigned subnr;
1450       bool expected_result_bdw;
1451       bool expected_result_chv_skl;
1452       bool expected_result_gfx125;
1453    } inst[] = {
1454 #define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr,   \
1455              expected_result_bdw, expected_result_chv_skl,                  \
1456              expected_result_gfx125)                                        \
1457       {                                                                     \
1458          ELK_REGISTER_TYPE_##dst_type,                                      \
1459          ELK_REGISTER_TYPE_##src0_type,                                     \
1460          ELK_REGISTER_TYPE_##src1_type,                                     \
1461          ELK_HORIZONTAL_STRIDE_##dst_stride,                                \
1462          read_acc,                                                          \
1463          subnr,                                                             \
1464          expected_result_bdw,                                               \
1465          expected_result_chv_skl,                                           \
1466          expected_result_gfx125,                                            \
1467       }
1468 
1469       /* Destination is not packed */
1470       INST(HF, HF,  F, 2, true,  0, true, true, false),
1471       INST(HF, HF,  F, 2, true,  2, true, true, false),
1472       INST(HF, HF,  F, 2, true,  4, true, true, false),
1473       INST(HF, HF,  F, 2, true,  8, true, true, false),
1474       INST(HF, HF,  F, 2, true, 16, true, true, false),
1475 
1476       /* Destination is packed, we don't read acc */
1477       INST(HF, HF,  F, 1, false,  0, false, true, false),
1478       INST(HF, HF,  F, 1, false,  2, false, true, false),
1479       INST(HF, HF,  F, 1, false,  4, false, true, false),
1480       INST(HF, HF,  F, 1, false,  8, false, true, false),
1481       INST(HF, HF,  F, 1, false, 16, false, true, false),
1482 
1483       /* Destination is packed, we read acc */
1484       INST(HF, HF,  F, 1, true,  0, false, false, false),
1485       INST(HF, HF,  F, 1, true,  2, false, false, false),
1486       INST(HF, HF,  F, 1, true,  4, false, false, false),
1487       INST(HF, HF,  F, 1, true,  8, false, false, false),
1488       INST(HF, HF,  F, 1, true, 16, false, false, false),
1489 
1490 #undef INST
1491    };
1492 
1493    if (devinfo.ver < 8)
1494       return;
1495 
1496    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1497       elk_ADD(p, retype(g0, inst[i].dst_type),
1498                  retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
1499                  retype(g0, inst[i].src1_type));
1500 
1501       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1502 
1503       elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr);
1504 
1505       if (devinfo.verx10 >= 125)
1506          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1507       else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1508          EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1509       else
1510          EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1511 
1512       clear_instructions(p);
1513    }
1514 }
1515 
TEST_P(validation_test,mixed_float_fp16_dest_with_acc)1516 TEST_P(validation_test, mixed_float_fp16_dest_with_acc)
1517 {
1518    static const struct {
1519       unsigned exec_size;
1520       unsigned opcode;
1521       enum elk_reg_type dst_type;
1522       enum elk_reg_type src0_type;
1523       enum elk_reg_type src1_type;
1524       unsigned dst_stride;
1525       bool read_acc;
1526       bool expected_result_bdw;
1527       bool expected_result_chv_skl;
1528       bool expected_result_gfx125;
1529    } inst[] = {
1530 #define INST(exec_size, opcode, dst_type, src0_type, src1_type,           \
1531              dst_stride, read_acc,expected_result_bdw,                    \
1532              expected_result_chv_skl, expected_result_gfx125)             \
1533       {                                                                   \
1534          ELK_EXECUTE_##exec_size,                                         \
1535          ELK_OPCODE_##opcode,                                             \
1536          ELK_REGISTER_TYPE_##dst_type,                                    \
1537          ELK_REGISTER_TYPE_##src0_type,                                   \
1538          ELK_REGISTER_TYPE_##src1_type,                                   \
1539          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1540          read_acc,                                                        \
1541          expected_result_bdw,                                             \
1542          expected_result_chv_skl,                                         \
1543          expected_result_gfx125,                                          \
1544       }
1545 
1546       /* Packed fp16 dest with implicit acc needs hstride=2 */
1547       INST(8, MAC, HF, HF,  F, 1, false, false, false, false),
1548       INST(8, MAC, HF, HF,  F, 2, false, true,  true,  false),
1549       INST(8, MAC, HF,  F, HF, 1, false, false, false, false),
1550       INST(8, MAC, HF,  F, HF, 2, false, true,  true,  false),
1551 
1552       /* Packed fp16 dest with explicit acc needs hstride=2 */
1553       INST(8, ADD, HF, HF,  F, 1, true,  false, false, false),
1554       INST(8, ADD, HF, HF,  F, 2, true,  true,  true,  false),
1555       INST(8, ADD, HF,  F, HF, 1, true,  false, false, false),
1556       INST(8, ADD, HF,  F, HF, 2, true,  true,  true,  false),
1557 
1558       /* If destination is not fp16, restriction doesn't apply */
1559       INST(8, MAC,  F, HF,  F, 1, false, true, true, false),
1560       INST(8, MAC,  F, HF,  F, 2, false, true, true, false),
1561 
1562       /* If there is no implicit/explicit acc, restriction doesn't apply */
1563       INST(8, ADD, HF, HF,  F, 1, false, false, true, false),
1564       INST(8, ADD, HF, HF,  F, 2, false, true,  true, false),
1565       INST(8, ADD, HF,  F, HF, 1, false, false, true, false),
1566       INST(8, ADD, HF,  F, HF, 2, false, true,  true, false),
1567       INST(8, ADD,  F, HF,  F, 1, false, true,  true, false),
1568       INST(8, ADD,  F, HF,  F, 2, false, true,  true, false),
1569 
1570 #undef INST
1571    };
1572 
1573    if (devinfo.ver < 8)
1574       return;
1575 
1576    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1577       if (inst[i].opcode == ELK_OPCODE_MAC) {
1578          elk_MAC(p, retype(g0, inst[i].dst_type),
1579                     retype(g0, inst[i].src0_type),
1580                     retype(g0, inst[i].src1_type));
1581       } else {
1582          assert(inst[i].opcode == ELK_OPCODE_ADD);
1583          elk_ADD(p, retype(g0, inst[i].dst_type),
1584                     retype(inst[i].read_acc ? acc0: g0, inst[i].src0_type),
1585                     retype(g0, inst[i].src1_type));
1586       }
1587 
1588       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1589 
1590       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1591 
1592       if (devinfo.verx10 >= 125)
1593          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1594       else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1595          EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1596       else
1597          EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1598 
1599       clear_instructions(p);
1600    }
1601 }
1602 
TEST_P(validation_test,mixed_float_align1_math_strided_fp16_inputs)1603 TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs)
1604 {
1605    static const struct {
1606       enum elk_reg_type dst_type;
1607       enum elk_reg_type src0_type;
1608       enum elk_reg_type src1_type;
1609       unsigned dst_stride;
1610       unsigned src0_stride;
1611       unsigned src1_stride;
1612       bool expected_result;
1613       bool expected_result_gfx125;
1614    } inst[] = {
1615 #define INST(dst_type, src0_type, src1_type,                              \
1616              dst_stride, src0_stride, src1_stride, expected_result,       \
1617              expected_result_125)                                         \
1618       {                                                                   \
1619          ELK_REGISTER_TYPE_##dst_type,                                    \
1620          ELK_REGISTER_TYPE_##src0_type,                                   \
1621          ELK_REGISTER_TYPE_##src1_type,                                   \
1622          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1623          ELK_HORIZONTAL_STRIDE_##src0_stride,                             \
1624          ELK_HORIZONTAL_STRIDE_##src1_stride,                             \
1625          expected_result,                                                 \
1626          expected_result_125,                                             \
1627       }
1628 
1629       INST(HF, HF,  F, 2, 2, 1, true,  false),
1630       INST(HF,  F, HF, 2, 1, 2, true,  false),
1631       INST(HF,  F, HF, 1, 1, 2, true,  false),
1632       INST(HF,  F, HF, 2, 1, 1, false, false),
1633       INST(HF, HF,  F, 2, 1, 1, false, false),
1634       INST(HF, HF,  F, 1, 1, 1, false, false),
1635       INST(HF, HF,  F, 2, 1, 1, false, false),
1636       INST( F, HF,  F, 1, 1, 1, false, false),
1637       INST( F,  F, HF, 1, 1, 2, true,  false),
1638       INST( F, HF, HF, 1, 2, 1, false, false),
1639       INST( F, HF, HF, 1, 2, 2, true,  false),
1640 
1641 #undef INST
1642    };
1643 
1644    /* No half-float math in gfx8 */
1645    if (devinfo.ver < 9)
1646       return;
1647 
1648    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1649       elk_gfx6_math(p, retype(g0, inst[i].dst_type),
1650                    ELK_MATH_FUNCTION_POW,
1651                    retype(g0, inst[i].src0_type),
1652                    retype(g0, inst[i].src1_type));
1653 
1654       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1655 
1656       elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1657       elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
1658       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride);
1659 
1660       elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1661       elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
1662       elk_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride);
1663 
1664       if (devinfo.verx10 >= 125)
1665          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1666       else
1667          EXPECT_EQ(inst[i].expected_result, validate(p));
1668 
1669       clear_instructions(p);
1670    }
1671 }
1672 
TEST_P(validation_test,mixed_float_align1_packed_fp16_dst)1673 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst)
1674 {
1675    static const struct {
1676       unsigned exec_size;
1677       enum elk_reg_type dst_type;
1678       enum elk_reg_type src0_type;
1679       enum elk_reg_type src1_type;
1680       unsigned dst_stride;
1681       unsigned dst_subnr;
1682       bool expected_result_bdw;
1683       bool expected_result_chv_skl;
1684       bool expected_result_gfx125;
1685    } inst[] = {
1686 #define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, dst_subnr, \
1687              expected_result_bdw, expected_result_chv_skl,                     \
1688              expected_result_gfx125)                                           \
1689       {                                                                        \
1690          ELK_EXECUTE_##exec_size,                                              \
1691          ELK_REGISTER_TYPE_##dst_type,                                         \
1692          ELK_REGISTER_TYPE_##src0_type,                                        \
1693          ELK_REGISTER_TYPE_##src1_type,                                        \
1694          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
1695          dst_subnr,                                                            \
1696          expected_result_bdw,                                                  \
1697          expected_result_chv_skl,                                              \
1698          expected_result_gfx125                                                \
1699       }
1700 
1701       /* SIMD8 packed fp16 dst won't cross oword boundaries if region is
1702        * oword-aligned
1703        */
1704       INST( 8, HF, HF,  F, 1,  0, false, true,  false),
1705       INST( 8, HF, HF,  F, 1,  2, false, false, false),
1706       INST( 8, HF, HF,  F, 1,  4, false, false, false),
1707       INST( 8, HF, HF,  F, 1,  8, false, false, false),
1708       INST( 8, HF, HF,  F, 1, 16, false, true,  false),
1709 
1710       /* SIMD16 packed fp16 always crosses oword boundaries */
1711       INST(16, HF, HF,  F, 1,  0, false, false, false),
1712       INST(16, HF, HF,  F, 1,  2, false, false, false),
1713       INST(16, HF, HF,  F, 1,  4, false, false, false),
1714       INST(16, HF, HF,  F, 1,  8, false, false, false),
1715       INST(16, HF, HF,  F, 1, 16, false, false, false),
1716 
1717       /* If destination is not packed (or not fp16) we can cross oword
1718        * boundaries
1719        */
1720       INST( 8, HF, HF,  F, 2,  0, true, true, false),
1721       INST( 8,  F, HF,  F, 1,  0, true, true, false),
1722 
1723 #undef INST
1724    };
1725 
1726    if (devinfo.ver < 8)
1727       return;
1728 
1729    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1730       elk_ADD(p, retype(g0, inst[i].dst_type),
1731                  retype(g0, inst[i].src0_type),
1732                  retype(g0, inst[i].src1_type));
1733 
1734       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1735       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1736 
1737       elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1738       elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
1739       elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
1740 
1741       elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1742       elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
1743       elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
1744 
1745       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1746 
1747       if (devinfo.verx10 >= 125)
1748          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1749       else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1750          EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1751       else
1752          EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1753 
1754       clear_instructions(p);
1755    }
1756 }
1757 
TEST_P(validation_test,mixed_float_align16_packed_data)1758 TEST_P(validation_test, mixed_float_align16_packed_data)
1759 {
1760    static const struct {
1761       enum elk_reg_type dst_type;
1762       enum elk_reg_type src0_type;
1763       enum elk_reg_type src1_type;
1764       unsigned src0_vstride;
1765       unsigned src1_vstride;
1766       bool expected_result;
1767    } inst[] = {
1768 #define INST(dst_type, src0_type, src1_type,                              \
1769              src0_vstride, src1_vstride, expected_result)                 \
1770       {                                                                   \
1771          ELK_REGISTER_TYPE_##dst_type,                                    \
1772          ELK_REGISTER_TYPE_##src0_type,                                   \
1773          ELK_REGISTER_TYPE_##src1_type,                                   \
1774          ELK_VERTICAL_STRIDE_##src0_vstride,                              \
1775          ELK_VERTICAL_STRIDE_##src1_vstride,                              \
1776          expected_result,                                                 \
1777       }
1778 
1779       /* We only test with F destination because there is a restriction
1780        * by which F->HF conversions need to be DWord aligned but Align16 also
1781        * requires that destination horizontal stride is 1.
1782        */
1783       INST(F,  F, HF, 4, 4, true),
1784       INST(F,  F, HF, 2, 4, false),
1785       INST(F,  F, HF, 4, 2, false),
1786       INST(F,  F, HF, 0, 4, false),
1787       INST(F,  F, HF, 4, 0, false),
1788       INST(F, HF,  F, 4, 4, true),
1789       INST(F, HF,  F, 4, 2, false),
1790       INST(F, HF,  F, 2, 4, false),
1791       INST(F, HF,  F, 0, 4, false),
1792       INST(F, HF,  F, 4, 0, false),
1793 
1794 #undef INST
1795    };
1796 
1797    if (devinfo.ver < 8 || devinfo.ver >= 11)
1798       return;
1799 
1800    elk_set_default_access_mode(p, ELK_ALIGN_16);
1801 
1802    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1803       elk_ADD(p, retype(g0, inst[i].dst_type),
1804                  retype(g0, inst[i].src0_type),
1805                  retype(g0, inst[i].src1_type));
1806 
1807       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
1808       elk_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
1809 
1810       EXPECT_EQ(inst[i].expected_result, validate(p));
1811 
1812       clear_instructions(p);
1813    }
1814 }
1815 
TEST_P(validation_test,mixed_float_align16_no_simd16)1816 TEST_P(validation_test, mixed_float_align16_no_simd16)
1817 {
1818    static const struct {
1819       unsigned exec_size;
1820       enum elk_reg_type dst_type;
1821       enum elk_reg_type src0_type;
1822       enum elk_reg_type src1_type;
1823       bool expected_result;
1824    } inst[] = {
1825 #define INST(exec_size, dst_type, src0_type, src1_type, expected_result)  \
1826       {                                                                   \
1827          ELK_EXECUTE_##exec_size,                                         \
1828          ELK_REGISTER_TYPE_##dst_type,                                    \
1829          ELK_REGISTER_TYPE_##src0_type,                                   \
1830          ELK_REGISTER_TYPE_##src1_type,                                   \
1831          expected_result,                                                 \
1832       }
1833 
1834       /* We only test with F destination because there is a restriction
1835        * by which F->HF conversions need to be DWord aligned but Align16 also
1836        * requires that destination horizontal stride is 1.
1837        */
1838       INST( 8,  F,  F, HF, true),
1839       INST( 8,  F, HF,  F, true),
1840       INST( 8,  F,  F, HF, true),
1841       INST(16,  F,  F, HF, false),
1842       INST(16,  F, HF,  F, false),
1843       INST(16,  F,  F, HF, false),
1844 
1845 #undef INST
1846    };
1847 
1848    if (devinfo.ver < 8 || devinfo.ver >= 11)
1849       return;
1850 
1851    elk_set_default_access_mode(p, ELK_ALIGN_16);
1852 
1853    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1854       elk_ADD(p, retype(g0, inst[i].dst_type),
1855                  retype(g0, inst[i].src0_type),
1856                  retype(g0, inst[i].src1_type));
1857 
1858       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1859 
1860       elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1861       elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1862 
1863       EXPECT_EQ(inst[i].expected_result, validate(p));
1864 
1865       clear_instructions(p);
1866    }
1867 }
1868 
TEST_P(validation_test,mixed_float_align16_no_acc_read)1869 TEST_P(validation_test, mixed_float_align16_no_acc_read)
1870 {
1871    static const struct {
1872       enum elk_reg_type dst_type;
1873       enum elk_reg_type src0_type;
1874       enum elk_reg_type src1_type;
1875       bool read_acc;
1876       bool expected_result;
1877    } inst[] = {
1878 #define INST(dst_type, src0_type, src1_type, read_acc, expected_result)   \
1879       {                                                                   \
1880          ELK_REGISTER_TYPE_##dst_type,                                    \
1881          ELK_REGISTER_TYPE_##src0_type,                                   \
1882          ELK_REGISTER_TYPE_##src1_type,                                   \
1883          read_acc,                                                        \
1884          expected_result,                                                 \
1885       }
1886 
1887       /* We only test with F destination because there is a restriction
1888        * by which F->HF conversions need to be DWord aligned but Align16 also
1889        * requires that destination horizontal stride is 1.
1890        */
1891       INST( F,  F, HF, false, true),
1892       INST( F,  F, HF, true,  false),
1893       INST( F, HF,  F, false, true),
1894       INST( F, HF,  F, true,  false),
1895 
1896 #undef INST
1897    };
1898 
1899    if (devinfo.ver < 8 || devinfo.ver >= 11)
1900       return;
1901 
1902    elk_set_default_access_mode(p, ELK_ALIGN_16);
1903 
1904    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1905       elk_ADD(p, retype(g0, inst[i].dst_type),
1906                  retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
1907                  retype(g0, inst[i].src1_type));
1908 
1909       elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1910       elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1911 
1912       EXPECT_EQ(inst[i].expected_result, validate(p));
1913 
1914       clear_instructions(p);
1915    }
1916 }
1917 
TEST_P(validation_test,mixed_float_align16_math_packed_format)1918 TEST_P(validation_test, mixed_float_align16_math_packed_format)
1919 {
1920    static const struct {
1921       enum elk_reg_type dst_type;
1922       enum elk_reg_type src0_type;
1923       enum elk_reg_type src1_type;
1924       unsigned src0_vstride;
1925       unsigned src1_vstride;
1926       bool expected_result;
1927    } inst[] = {
1928 #define INST(dst_type, src0_type, src1_type,                              \
1929              src0_vstride, src1_vstride, expected_result)                 \
1930       {                                                                   \
1931          ELK_REGISTER_TYPE_##dst_type,                                    \
1932          ELK_REGISTER_TYPE_##src0_type,                                   \
1933          ELK_REGISTER_TYPE_##src1_type,                                   \
1934          ELK_VERTICAL_STRIDE_##src0_vstride,                              \
1935          ELK_VERTICAL_STRIDE_##src1_vstride,                              \
1936          expected_result,                                                 \
1937       }
1938 
1939       /* We only test with F destination because there is a restriction
1940        * by which F->HF conversions need to be DWord aligned but Align16 also
1941        * requires that destination horizontal stride is 1.
1942        */
1943       INST( F, HF,  F, 4, 0, false),
1944       INST( F, HF, HF, 4, 4, true),
1945       INST( F,  F, HF, 4, 0, false),
1946       INST( F,  F, HF, 2, 4, false),
1947       INST( F,  F, HF, 4, 2, false),
1948       INST( F, HF, HF, 0, 4, false),
1949 
1950 #undef INST
1951    };
1952 
1953    /* Align16 Math for mixed float mode is not supported in gfx8 */
1954    if (devinfo.ver < 9 || devinfo.ver >= 11)
1955       return;
1956 
1957    elk_set_default_access_mode(p, ELK_ALIGN_16);
1958 
1959    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1960       elk_gfx6_math(p, retype(g0, inst[i].dst_type),
1961                    ELK_MATH_FUNCTION_POW,
1962                    retype(g0, inst[i].src0_type),
1963                    retype(g0, inst[i].src1_type));
1964 
1965       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
1966       elk_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
1967 
1968       EXPECT_EQ(inst[i].expected_result, validate(p));
1969 
1970       clear_instructions(p);
1971    }
1972 }
1973 
TEST_P(validation_test,vector_immediate_destination_alignment)1974 TEST_P(validation_test, vector_immediate_destination_alignment)
1975 {
1976    static const struct {
1977       enum elk_reg_type dst_type;
1978       enum elk_reg_type src_type;
1979       unsigned subnr;
1980       unsigned exec_size;
1981       bool expected_result;
1982    } move[] = {
1983       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF,  0, ELK_EXECUTE_4, true  },
1984       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF, 16, ELK_EXECUTE_4, true  },
1985       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF,  1, ELK_EXECUTE_4, false },
1986 
1987       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,   0, ELK_EXECUTE_8, true  },
1988       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,  16, ELK_EXECUTE_8, true  },
1989       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,   1, ELK_EXECUTE_8, false },
1990 
1991       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV,  0, ELK_EXECUTE_8, true  },
1992       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, 16, ELK_EXECUTE_8, true  },
1993       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV,  1, ELK_EXECUTE_8, false },
1994    };
1995 
1996    for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
1997       /* UV type is Gfx6+ */
1998       if (devinfo.ver < 6 &&
1999           move[i].src_type == ELK_REGISTER_TYPE_UV)
2000          continue;
2001 
2002       elk_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2003       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, move[i].subnr);
2004       elk_inst_set_exec_size(&devinfo, last_inst, move[i].exec_size);
2005 
2006       EXPECT_EQ(move[i].expected_result, validate(p));
2007 
2008       clear_instructions(p);
2009    }
2010 }
2011 
TEST_P(validation_test,vector_immediate_destination_stride)2012 TEST_P(validation_test, vector_immediate_destination_stride)
2013 {
2014    static const struct {
2015       enum elk_reg_type dst_type;
2016       enum elk_reg_type src_type;
2017       unsigned stride;
2018       bool expected_result;
2019    } move[] = {
2020       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_1, true  },
2021       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_2, false },
2022       { ELK_REGISTER_TYPE_D, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_1, true  },
2023       { ELK_REGISTER_TYPE_D, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_2, false },
2024       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_2, true  },
2025       { ELK_REGISTER_TYPE_B, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_4, true  },
2026 
2027       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,  ELK_HORIZONTAL_STRIDE_1, true  },
2028       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,  ELK_HORIZONTAL_STRIDE_2, false },
2029       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,  ELK_HORIZONTAL_STRIDE_4, false },
2030       { ELK_REGISTER_TYPE_B, ELK_REGISTER_TYPE_V,  ELK_HORIZONTAL_STRIDE_2, true  },
2031 
2032       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_1, true  },
2033       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_2, false },
2034       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_4, false },
2035       { ELK_REGISTER_TYPE_B, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_2, true  },
2036    };
2037 
2038    for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
2039       /* UV type is Gfx6+ */
2040       if (devinfo.ver < 6 &&
2041           move[i].src_type == ELK_REGISTER_TYPE_UV)
2042          continue;
2043 
2044       elk_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2045       elk_inst_set_dst_hstride(&devinfo, last_inst, move[i].stride);
2046 
2047       EXPECT_EQ(move[i].expected_result, validate(p));
2048 
2049       clear_instructions(p);
2050    }
2051 }
2052 
TEST_P(validation_test,qword_low_power_align1_regioning_restrictions)2053 TEST_P(validation_test, qword_low_power_align1_regioning_restrictions)
2054 {
2055    static const struct {
2056       enum elk_opcode opcode;
2057       unsigned exec_size;
2058 
2059       enum elk_reg_type dst_type;
2060       unsigned dst_subreg;
2061       unsigned dst_stride;
2062 
2063       enum elk_reg_type src_type;
2064       unsigned src_subreg;
2065       unsigned src_vstride;
2066       unsigned src_width;
2067       unsigned src_hstride;
2068 
2069       bool expected_result;
2070    } inst[] = {
2071 #define INST(opcode, exec_size, dst_type, dst_subreg, dst_stride, src_type,    \
2072              src_subreg, src_vstride, src_width, src_hstride, expected_result) \
2073       {                                                                        \
2074          ELK_OPCODE_##opcode,                                                  \
2075          ELK_EXECUTE_##exec_size,                                              \
2076          ELK_REGISTER_TYPE_##dst_type,                                         \
2077          dst_subreg,                                                           \
2078          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
2079          ELK_REGISTER_TYPE_##src_type,                                         \
2080          src_subreg,                                                           \
2081          ELK_VERTICAL_STRIDE_##src_vstride,                                    \
2082          ELK_WIDTH_##src_width,                                                \
2083          ELK_HORIZONTAL_STRIDE_##src_hstride,                                  \
2084          expected_result,                                                      \
2085       }
2086 
2087       /* Some instruction that violate no restrictions, as a control */
2088       INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2089       INST(MOV, 4, Q,  0, 1, Q,  0, 4, 4, 1, true ),
2090       INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2091 
2092       INST(MOV, 4, DF, 0, 1, F,  0, 8, 4, 2, true ),
2093       INST(MOV, 4, Q,  0, 1, D,  0, 8, 4, 2, true ),
2094       INST(MOV, 4, UQ, 0, 1, UD, 0, 8, 4, 2, true ),
2095 
2096       INST(MOV, 4, F,  0, 2, DF, 0, 4, 4, 1, true ),
2097       INST(MOV, 4, D,  0, 2, Q,  0, 4, 4, 1, true ),
2098       INST(MOV, 4, UD, 0, 2, UQ, 0, 4, 4, 1, true ),
2099 
2100       INST(MUL, 8, D,  0, 2, D,  0, 8, 4, 2, true ),
2101       INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2102 
2103       /* Something with subreg nrs */
2104       INST(MOV, 2, DF, 8, 1, DF, 8, 2, 2, 1, true ),
2105       INST(MOV, 2, Q,  8, 1, Q,  8, 2, 2, 1, true ),
2106       INST(MOV, 2, UQ, 8, 1, UQ, 8, 2, 2, 1, true ),
2107 
2108       INST(MUL, 2, D,  4, 2, D,  4, 4, 2, 2, true ),
2109       INST(MUL, 2, UD, 4, 2, UD, 4, 4, 2, 2, true ),
2110 
2111       /* The PRMs say that for CHV, BXT:
2112        *
2113        *    When source or destination datatype is 64b or operation is integer
2114        *    DWord multiply, regioning in Align1 must follow these rules:
2115        *
2116        *    1. Source and Destination horizontal stride must be aligned to the
2117        *       same qword.
2118        */
2119       INST(MOV, 4, DF, 0, 2, DF, 0, 4, 4, 1, false),
2120       INST(MOV, 4, Q,  0, 2, Q,  0, 4, 4, 1, false),
2121       INST(MOV, 4, UQ, 0, 2, UQ, 0, 4, 4, 1, false),
2122 
2123       INST(MOV, 4, DF, 0, 2, F,  0, 8, 4, 2, false),
2124       INST(MOV, 4, Q,  0, 2, D,  0, 8, 4, 2, false),
2125       INST(MOV, 4, UQ, 0, 2, UD, 0, 8, 4, 2, false),
2126 
2127       INST(MOV, 4, DF, 0, 2, F,  0, 4, 4, 1, false),
2128       INST(MOV, 4, Q,  0, 2, D,  0, 4, 4, 1, false),
2129       INST(MOV, 4, UQ, 0, 2, UD, 0, 4, 4, 1, false),
2130 
2131       INST(MUL, 4, D,  0, 2, D,  0, 4, 4, 1, false),
2132       INST(MUL, 4, UD, 0, 2, UD, 0, 4, 4, 1, false),
2133 
2134       INST(MUL, 4, D,  0, 1, D,  0, 8, 4, 2, false),
2135       INST(MUL, 4, UD, 0, 1, UD, 0, 8, 4, 2, false),
2136 
2137       /*    2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. */
2138       INST(MOV, 4, DF, 0, 1, DF, 0, 0, 2, 1, false),
2139       INST(MOV, 4, Q,  0, 1, Q,  0, 0, 2, 1, false),
2140       INST(MOV, 4, UQ, 0, 1, UQ, 0, 0, 2, 1, false),
2141 
2142       INST(MOV, 4, DF, 0, 1, F,  0, 0, 2, 2, false),
2143       INST(MOV, 4, Q,  0, 1, D,  0, 0, 2, 2, false),
2144       INST(MOV, 4, UQ, 0, 1, UD, 0, 0, 2, 2, false),
2145 
2146       INST(MOV, 8, F,  0, 2, DF, 0, 0, 2, 1, false),
2147       INST(MOV, 8, D,  0, 2, Q,  0, 0, 2, 1, false),
2148       INST(MOV, 8, UD, 0, 2, UQ, 0, 0, 2, 1, false),
2149 
2150       INST(MUL, 8, D,  0, 2, D,  0, 0, 4, 2, false),
2151       INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2152 
2153       INST(MUL, 8, D,  0, 2, D,  0, 0, 4, 2, false),
2154       INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2155 
2156       /*    3. Source and Destination offset must be the same, except the case
2157        *       of scalar source.
2158        */
2159       INST(MOV, 2, DF, 8, 1, DF, 0, 2, 2, 1, false),
2160       INST(MOV, 2, Q,  8, 1, Q,  0, 2, 2, 1, false),
2161       INST(MOV, 2, UQ, 8, 1, UQ, 0, 2, 2, 1, false),
2162 
2163       INST(MOV, 2, DF, 0, 1, DF, 8, 2, 2, 1, false),
2164       INST(MOV, 2, Q,  0, 1, Q,  8, 2, 2, 1, false),
2165       INST(MOV, 2, UQ, 0, 1, UQ, 8, 2, 2, 1, false),
2166 
2167       INST(MUL, 4, D,  4, 2, D,  0, 4, 2, 2, false),
2168       INST(MUL, 4, UD, 4, 2, UD, 0, 4, 2, 2, false),
2169 
2170       INST(MUL, 4, D,  0, 2, D,  4, 4, 2, 2, false),
2171       INST(MUL, 4, UD, 0, 2, UD, 4, 4, 2, 2, false),
2172 
2173       INST(MOV, 2, DF, 8, 1, DF, 0, 0, 1, 0, true ),
2174       INST(MOV, 2, Q,  8, 1, Q,  0, 0, 1, 0, true ),
2175       INST(MOV, 2, UQ, 8, 1, UQ, 0, 0, 1, 0, true ),
2176 
2177       INST(MOV, 2, DF, 8, 1, F,  4, 0, 1, 0, true ),
2178       INST(MOV, 2, Q,  8, 1, D,  4, 0, 1, 0, true ),
2179       INST(MOV, 2, UQ, 8, 1, UD, 4, 0, 1, 0, true ),
2180 
2181       INST(MUL, 4, D,  4, 1, D,  0, 0, 1, 0, true ),
2182       INST(MUL, 4, UD, 4, 1, UD, 0, 0, 1, 0, true ),
2183 
2184       INST(MUL, 4, D,  0, 1, D,  4, 0, 1, 0, true ),
2185       INST(MUL, 4, UD, 0, 1, UD, 4, 0, 1, 0, true ),
2186 
2187 #undef INST
2188    };
2189 
2190    /* These restrictions only apply to Gfx8+ */
2191    if (devinfo.ver < 8)
2192       return;
2193 
2194    /* NoDDChk/NoDDClr does not exist on Gfx12+ */
2195    if (devinfo.ver >= 12)
2196       return;
2197 
2198    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2199       if (!devinfo.has_64bit_float &&
2200           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2201            inst[i].src_type == ELK_REGISTER_TYPE_DF))
2202          continue;
2203 
2204       if (!devinfo.has_64bit_int &&
2205           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2206            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2207            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2208            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2209          continue;
2210 
2211       if (inst[i].opcode == ELK_OPCODE_MOV) {
2212          elk_MOV(p, retype(g0, inst[i].dst_type),
2213                     retype(g0, inst[i].src_type));
2214       } else {
2215          assert(inst[i].opcode == ELK_OPCODE_MUL);
2216          elk_MUL(p, retype(g0, inst[i].dst_type),
2217                     retype(g0, inst[i].src_type),
2218                     retype(zero, inst[i].src_type));
2219       }
2220       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2221 
2222       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subreg);
2223       elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].src_subreg);
2224 
2225       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2226 
2227       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2228       elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2229       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2230 
2231       if (devinfo.platform == INTEL_PLATFORM_CHV ||
2232           intel_device_info_is_9lp(&devinfo)) {
2233          EXPECT_EQ(inst[i].expected_result, validate(p));
2234       } else {
2235          EXPECT_TRUE(validate(p));
2236       }
2237 
2238       clear_instructions(p);
2239    }
2240 }
2241 
TEST_P(validation_test,qword_low_power_no_indirect_addressing)2242 TEST_P(validation_test, qword_low_power_no_indirect_addressing)
2243 {
2244    static const struct {
2245       enum elk_opcode opcode;
2246       unsigned exec_size;
2247 
2248       enum elk_reg_type dst_type;
2249       bool dst_is_indirect;
2250       unsigned dst_stride;
2251 
2252       enum elk_reg_type src_type;
2253       bool src_is_indirect;
2254       unsigned src_vstride;
2255       unsigned src_width;
2256       unsigned src_hstride;
2257 
2258       bool expected_result;
2259    } inst[] = {
2260 #define INST(opcode, exec_size, dst_type, dst_is_indirect, dst_stride,         \
2261              src_type, src_is_indirect, src_vstride, src_width, src_hstride,   \
2262              expected_result)                                                  \
2263       {                                                                        \
2264          ELK_OPCODE_##opcode,                                                  \
2265          ELK_EXECUTE_##exec_size,                                              \
2266          ELK_REGISTER_TYPE_##dst_type,                                         \
2267          dst_is_indirect,                                                      \
2268          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
2269          ELK_REGISTER_TYPE_##src_type,                                         \
2270          src_is_indirect,                                                      \
2271          ELK_VERTICAL_STRIDE_##src_vstride,                                    \
2272          ELK_WIDTH_##src_width,                                                \
2273          ELK_HORIZONTAL_STRIDE_##src_hstride,                                  \
2274          expected_result,                                                      \
2275       }
2276 
2277       /* Some instruction that violate no restrictions, as a control */
2278       INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2279       INST(MOV, 4, Q,  0, 1, Q,  0, 4, 4, 1, true ),
2280       INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2281 
2282       INST(MUL, 8, D,  0, 2, D,  0, 8, 4, 2, true ),
2283       INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2284 
2285       INST(MOV, 4, F,  1, 1, F,  0, 4, 4, 1, true ),
2286       INST(MOV, 4, F,  0, 1, F,  1, 4, 4, 1, true ),
2287       INST(MOV, 4, F,  1, 1, F,  1, 4, 4, 1, true ),
2288 
2289       /* The PRMs say that for CHV, BXT:
2290        *
2291        *    When source or destination datatype is 64b or operation is integer
2292        *    DWord multiply, indirect addressing must not be used.
2293        */
2294       INST(MOV, 4, DF, 1, 1, DF, 0, 4, 4, 1, false),
2295       INST(MOV, 4, Q,  1, 1, Q,  0, 4, 4, 1, false),
2296       INST(MOV, 4, UQ, 1, 1, UQ, 0, 4, 4, 1, false),
2297 
2298       INST(MOV, 4, DF, 0, 1, DF, 1, 4, 4, 1, false),
2299       INST(MOV, 4, Q,  0, 1, Q,  1, 4, 4, 1, false),
2300       INST(MOV, 4, UQ, 0, 1, UQ, 1, 4, 4, 1, false),
2301 
2302       INST(MOV, 4, DF, 1, 1, F,  0, 8, 4, 2, false),
2303       INST(MOV, 4, Q,  1, 1, D,  0, 8, 4, 2, false),
2304       INST(MOV, 4, UQ, 1, 1, UD, 0, 8, 4, 2, false),
2305 
2306       INST(MOV, 4, DF, 0, 1, F,  1, 8, 4, 2, false),
2307       INST(MOV, 4, Q,  0, 1, D,  1, 8, 4, 2, false),
2308       INST(MOV, 4, UQ, 0, 1, UD, 1, 8, 4, 2, false),
2309 
2310       INST(MOV, 4, F,  1, 2, DF, 0, 4, 4, 1, false),
2311       INST(MOV, 4, D,  1, 2, Q,  0, 4, 4, 1, false),
2312       INST(MOV, 4, UD, 1, 2, UQ, 0, 4, 4, 1, false),
2313 
2314       INST(MOV, 4, F,  0, 2, DF, 1, 4, 4, 1, false),
2315       INST(MOV, 4, D,  0, 2, Q,  1, 4, 4, 1, false),
2316       INST(MOV, 4, UD, 0, 2, UQ, 1, 4, 4, 1, false),
2317 
2318       INST(MUL, 8, D,  1, 2, D,  0, 8, 4, 2, false),
2319       INST(MUL, 8, UD, 1, 2, UD, 0, 8, 4, 2, false),
2320 
2321       INST(MUL, 8, D,  0, 2, D,  1, 8, 4, 2, false),
2322       INST(MUL, 8, UD, 0, 2, UD, 1, 8, 4, 2, false),
2323 
2324 #undef INST
2325    };
2326 
2327    /* These restrictions only apply to Gfx8+ */
2328    if (devinfo.ver < 8)
2329       return;
2330 
2331    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2332       if (!devinfo.has_64bit_float &&
2333           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2334            inst[i].src_type == ELK_REGISTER_TYPE_DF))
2335          continue;
2336 
2337       if (!devinfo.has_64bit_int &&
2338           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2339            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2340            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2341            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2342          continue;
2343 
2344       if (inst[i].opcode == ELK_OPCODE_MOV) {
2345          elk_MOV(p, retype(g0, inst[i].dst_type),
2346                     retype(g0, inst[i].src_type));
2347       } else {
2348          assert(inst[i].opcode == ELK_OPCODE_MUL);
2349          elk_MUL(p, retype(g0, inst[i].dst_type),
2350                     retype(g0, inst[i].src_type),
2351                     retype(zero, inst[i].src_type));
2352       }
2353       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2354 
2355       elk_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_is_indirect);
2356       elk_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src_is_indirect);
2357 
2358       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2359 
2360       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2361       elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2362       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2363 
2364       if (devinfo.platform == INTEL_PLATFORM_CHV ||
2365           intel_device_info_is_9lp(&devinfo)) {
2366          EXPECT_EQ(inst[i].expected_result, validate(p));
2367       } else {
2368          EXPECT_TRUE(validate(p));
2369       }
2370 
2371       clear_instructions(p);
2372    }
2373 }
2374 
TEST_P(validation_test,qword_low_power_no_64bit_arf)2375 TEST_P(validation_test, qword_low_power_no_64bit_arf)
2376 {
2377    static const struct {
2378       enum elk_opcode opcode;
2379       unsigned exec_size;
2380 
2381       struct elk_reg dst;
2382       enum elk_reg_type dst_type;
2383       unsigned dst_stride;
2384 
2385       struct elk_reg src;
2386       enum elk_reg_type src_type;
2387       unsigned src_vstride;
2388       unsigned src_width;
2389       unsigned src_hstride;
2390 
2391       bool acc_wr;
2392       bool expected_result;
2393    } inst[] = {
2394 #define INST(opcode, exec_size, dst, dst_type, dst_stride,                     \
2395              src, src_type, src_vstride, src_width, src_hstride,               \
2396              acc_wr, expected_result)                                          \
2397       {                                                                        \
2398          ELK_OPCODE_##opcode,                                                  \
2399          ELK_EXECUTE_##exec_size,                                              \
2400          dst,                                                                  \
2401          ELK_REGISTER_TYPE_##dst_type,                                         \
2402          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
2403          src,                                                                  \
2404          ELK_REGISTER_TYPE_##src_type,                                         \
2405          ELK_VERTICAL_STRIDE_##src_vstride,                                    \
2406          ELK_WIDTH_##src_width,                                                \
2407          ELK_HORIZONTAL_STRIDE_##src_hstride,                                  \
2408          acc_wr,                                                               \
2409          expected_result,                                                      \
2410       }
2411 
2412       /* Some instruction that violate no restrictions, as a control */
2413       INST(MOV, 4, g0,   DF, 1, g0,   F,  4, 2, 2, 0, true ),
2414       INST(MOV, 4, g0,   F,  2, g0,   DF, 4, 4, 1, 0, true ),
2415 
2416       INST(MOV, 4, g0,   Q,  1, g0,   D,  4, 2, 2, 0, true ),
2417       INST(MOV, 4, g0,   D,  2, g0,   Q,  4, 4, 1, 0, true ),
2418 
2419       INST(MOV, 4, g0,   UQ, 1, g0,   UD, 4, 2, 2, 0, true ),
2420       INST(MOV, 4, g0,   UD, 2, g0,   UQ, 4, 4, 1, 0, true ),
2421 
2422       INST(MOV, 4, null, F,  1, g0,   F,  4, 4, 1, 0, true ),
2423       INST(MOV, 4, acc0, F,  1, g0,   F,  4, 4, 1, 0, true ),
2424       INST(MOV, 4, g0,   F,  1, acc0, F,  4, 4, 1, 0, true ),
2425 
2426       INST(MOV, 4, null, D,  1, g0,   D,  4, 4, 1, 0, true ),
2427       INST(MOV, 4, acc0, D,  1, g0,   D,  4, 4, 1, 0, true ),
2428       INST(MOV, 4, g0,   D,  1, acc0, D,  4, 4, 1, 0, true ),
2429 
2430       INST(MOV, 4, null, UD, 1, g0,   UD, 4, 4, 1, 0, true ),
2431       INST(MOV, 4, acc0, UD, 1, g0,   UD, 4, 4, 1, 0, true ),
2432       INST(MOV, 4, g0,   UD, 1, acc0, UD, 4, 4, 1, 0, true ),
2433 
2434       INST(MUL, 4, g0,   D,  2, g0,   D,  4, 2, 2, 0, true ),
2435       INST(MUL, 4, g0,   UD, 2, g0,   UD, 4, 2, 2, 0, true ),
2436 
2437       /* The PRMs say that for CHV, BXT:
2438        *
2439        *    ARF registers must never be used with 64b datatype or when
2440        *    operation is integer DWord multiply.
2441        */
2442       INST(MOV, 4, acc0, DF, 1, g0,   F,  4, 2, 2, 0, false),
2443       INST(MOV, 4, g0,   DF, 1, acc0, F,  4, 2, 2, 0, false),
2444 
2445       INST(MOV, 4, acc0, Q,  1, g0,   D,  4, 2, 2, 0, false),
2446       INST(MOV, 4, g0,   Q,  1, acc0, D,  4, 2, 2, 0, false),
2447 
2448       INST(MOV, 4, acc0, UQ, 1, g0,   UD, 4, 2, 2, 0, false),
2449       INST(MOV, 4, g0,   UQ, 1, acc0, UD, 4, 2, 2, 0, false),
2450 
2451       INST(MOV, 4, acc0, F,  2, g0,   DF, 4, 4, 1, 0, false),
2452       INST(MOV, 4, g0,   F,  2, acc0, DF, 4, 4, 1, 0, false),
2453 
2454       INST(MOV, 4, acc0, D,  2, g0,   Q,  4, 4, 1, 0, false),
2455       INST(MOV, 4, g0,   D,  2, acc0, Q,  4, 4, 1, 0, false),
2456 
2457       INST(MOV, 4, acc0, UD, 2, g0,   UQ, 4, 4, 1, 0, false),
2458       INST(MOV, 4, g0,   UD, 2, acc0, UQ, 4, 4, 1, 0, false),
2459 
2460       INST(MUL, 4, acc0, D,  2, g0,   D,  4, 2, 2, 0, false),
2461       INST(MUL, 4, acc0, UD, 2, g0,   UD, 4, 2, 2, 0, false),
2462       /* MUL cannot have integer accumulator sources, so don't test that */
2463 
2464       /* We assume that the restriction does not apply to the null register */
2465       INST(MOV, 4, null, DF, 1, g0,   F,  4, 2, 2, 0, true ),
2466       INST(MOV, 4, null, Q,  1, g0,   D,  4, 2, 2, 0, true ),
2467       INST(MOV, 4, null, UQ, 1, g0,   UD, 4, 2, 2, 0, true ),
2468 
2469       /* Check implicit accumulator write control */
2470       INST(MOV, 4, null, DF, 1, g0,   F,  4, 2, 2, 1, false),
2471       INST(MUL, 4, null, DF, 1, g0,   F,  4, 2, 2, 1, false),
2472 
2473 #undef INST
2474    };
2475 
2476    /* These restrictions only apply to Gfx8+ */
2477    if (devinfo.ver < 8)
2478       return;
2479 
2480    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2481       if (!devinfo.has_64bit_float &&
2482           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2483            inst[i].src_type == ELK_REGISTER_TYPE_DF))
2484          continue;
2485 
2486       if (!devinfo.has_64bit_int &&
2487           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2488            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2489            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2490            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2491          continue;
2492 
2493       if (inst[i].opcode == ELK_OPCODE_MOV) {
2494          elk_MOV(p, retype(inst[i].dst, inst[i].dst_type),
2495                     retype(inst[i].src, inst[i].src_type));
2496       } else {
2497          assert(inst[i].opcode == ELK_OPCODE_MUL);
2498          elk_MUL(p, retype(inst[i].dst, inst[i].dst_type),
2499                     retype(inst[i].src, inst[i].src_type),
2500                     retype(zero, inst[i].src_type));
2501          elk_inst_set_opcode(&isa, last_inst, inst[i].opcode);
2502       }
2503       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2504       elk_inst_set_acc_wr_control(&devinfo, last_inst, inst[i].acc_wr);
2505 
2506       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2507 
2508       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2509       elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2510       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2511 
2512       /* Note: The Broadwell PRM also lists the restriction that destination
2513        * of DWord multiplication cannot be the accumulator.
2514        */
2515       if (devinfo.platform == INTEL_PLATFORM_CHV ||
2516           intel_device_info_is_9lp(&devinfo) ||
2517           (devinfo.ver == 8 &&
2518            inst[i].opcode == ELK_OPCODE_MUL &&
2519            elk_inst_dst_reg_file(&devinfo, last_inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
2520            elk_inst_dst_da_reg_nr(&devinfo, last_inst) != ELK_ARF_NULL)) {
2521          EXPECT_EQ(inst[i].expected_result, validate(p));
2522       } else {
2523          EXPECT_TRUE(validate(p));
2524       }
2525 
2526       clear_instructions(p);
2527    }
2528 
2529    if (!devinfo.has_64bit_float)
2530       return;
2531 
2532    /* MAC implicitly reads the accumulator */
2533    elk_MAC(p, retype(g0, ELK_REGISTER_TYPE_DF),
2534               retype(stride(g0, 4, 4, 1), ELK_REGISTER_TYPE_DF),
2535               retype(stride(g0, 4, 4, 1), ELK_REGISTER_TYPE_DF));
2536    if (devinfo.platform == INTEL_PLATFORM_CHV ||
2537        intel_device_info_is_9lp(&devinfo)) {
2538       EXPECT_FALSE(validate(p));
2539    } else {
2540       EXPECT_TRUE(validate(p));
2541    }
2542 }
2543 
TEST_P(validation_test,align16_64_bit_integer)2544 TEST_P(validation_test, align16_64_bit_integer)
2545 {
2546    static const struct {
2547       enum elk_opcode opcode;
2548       unsigned exec_size;
2549 
2550       enum elk_reg_type dst_type;
2551       enum elk_reg_type src_type;
2552 
2553       bool expected_result;
2554    } inst[] = {
2555 #define INST(opcode, exec_size, dst_type, src_type, expected_result)           \
2556       {                                                                        \
2557          ELK_OPCODE_##opcode,                                                  \
2558          ELK_EXECUTE_##exec_size,                                              \
2559          ELK_REGISTER_TYPE_##dst_type,                                         \
2560          ELK_REGISTER_TYPE_##src_type,                                         \
2561          expected_result,                                                      \
2562       }
2563 
2564       /* Some instruction that violate no restrictions, as a control */
2565       INST(MOV, 2, Q,  D,  true ),
2566       INST(MOV, 2, UQ, UD, true ),
2567       INST(MOV, 2, DF, F,  true ),
2568 
2569       INST(ADD, 2, Q,  D,  true ),
2570       INST(ADD, 2, UQ, UD, true ),
2571       INST(ADD, 2, DF, F,  true ),
2572 
2573       /* The PRMs say that for BDW, SKL:
2574        *
2575        *    If Align16 is required for an operation with QW destination and non-QW
2576        *    source datatypes, the execution size cannot exceed 2.
2577        */
2578 
2579       INST(MOV, 4, Q,  D,  false),
2580       INST(MOV, 4, UQ, UD, false),
2581       INST(MOV, 4, DF, F,  false),
2582 
2583       INST(ADD, 4, Q,  D,  false),
2584       INST(ADD, 4, UQ, UD, false),
2585       INST(ADD, 4, DF, F,  false),
2586 
2587 #undef INST
2588    };
2589 
2590    /* 64-bit integer types exist on Gfx8+ */
2591    if (devinfo.ver < 8)
2592       return;
2593 
2594    /* Align16 does not exist on Gfx11+ */
2595    if (devinfo.ver >= 11)
2596       return;
2597 
2598    elk_set_default_access_mode(p, ELK_ALIGN_16);
2599 
2600    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2601       if (inst[i].opcode == ELK_OPCODE_MOV) {
2602          elk_MOV(p, retype(g0, inst[i].dst_type),
2603                     retype(g0, inst[i].src_type));
2604       } else {
2605          assert(inst[i].opcode == ELK_OPCODE_ADD);
2606          elk_ADD(p, retype(g0, inst[i].dst_type),
2607                     retype(g0, inst[i].src_type),
2608                     retype(g0, inst[i].src_type));
2609       }
2610       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2611 
2612       EXPECT_EQ(inst[i].expected_result, validate(p));
2613 
2614       clear_instructions(p);
2615    }
2616 }
2617 
TEST_P(validation_test,qword_low_power_no_depctrl)2618 TEST_P(validation_test, qword_low_power_no_depctrl)
2619 {
2620    static const struct {
2621       enum elk_opcode opcode;
2622       unsigned exec_size;
2623 
2624       enum elk_reg_type dst_type;
2625       unsigned dst_stride;
2626 
2627       enum elk_reg_type src_type;
2628       unsigned src_vstride;
2629       unsigned src_width;
2630       unsigned src_hstride;
2631 
2632       bool no_dd_check;
2633       bool no_dd_clear;
2634 
2635       bool expected_result;
2636    } inst[] = {
2637 #define INST(opcode, exec_size, dst_type, dst_stride,                          \
2638              src_type, src_vstride, src_width, src_hstride,                    \
2639              no_dd_check, no_dd_clear, expected_result)                        \
2640       {                                                                        \
2641          ELK_OPCODE_##opcode,                                                  \
2642          ELK_EXECUTE_##exec_size,                                              \
2643          ELK_REGISTER_TYPE_##dst_type,                                         \
2644          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
2645          ELK_REGISTER_TYPE_##src_type,                                         \
2646          ELK_VERTICAL_STRIDE_##src_vstride,                                    \
2647          ELK_WIDTH_##src_width,                                                \
2648          ELK_HORIZONTAL_STRIDE_##src_hstride,                                  \
2649          no_dd_check,                                                          \
2650          no_dd_clear,                                                          \
2651          expected_result,                                                      \
2652       }
2653 
2654       /* Some instruction that violate no restrictions, as a control */
2655       INST(MOV, 4, DF, 1, F,  8, 4, 2, 0, 0, true ),
2656       INST(MOV, 4, Q,  1, D,  8, 4, 2, 0, 0, true ),
2657       INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 0, true ),
2658 
2659       INST(MOV, 4, F,  2, DF, 4, 4, 1, 0, 0, true ),
2660       INST(MOV, 4, D,  2, Q,  4, 4, 1, 0, 0, true ),
2661       INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 0, true ),
2662 
2663       INST(MUL, 8, D,  2, D,  8, 4, 2, 0, 0, true ),
2664       INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 0, true ),
2665 
2666       INST(MOV, 4, F,  1, F,  4, 4, 1, 1, 1, true ),
2667 
2668       /* The PRMs say that for CHV, BXT:
2669        *
2670        *    When source or destination datatype is 64b or operation is integer
2671        *    DWord multiply, DepCtrl must not be used.
2672        */
2673       INST(MOV, 4, DF, 1, F,  8, 4, 2, 1, 0, false),
2674       INST(MOV, 4, Q,  1, D,  8, 4, 2, 1, 0, false),
2675       INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 1, 0, false),
2676 
2677       INST(MOV, 4, F,  2, DF, 4, 4, 1, 1, 0, false),
2678       INST(MOV, 4, D,  2, Q,  4, 4, 1, 1, 0, false),
2679       INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 1, 0, false),
2680 
2681       INST(MOV, 4, DF, 1, F,  8, 4, 2, 0, 1, false),
2682       INST(MOV, 4, Q,  1, D,  8, 4, 2, 0, 1, false),
2683       INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 1, false),
2684 
2685       INST(MOV, 4, F,  2, DF, 4, 4, 1, 0, 1, false),
2686       INST(MOV, 4, D,  2, Q,  4, 4, 1, 0, 1, false),
2687       INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 1, false),
2688 
2689       INST(MUL, 8, D,  2, D,  8, 4, 2, 1, 0, false),
2690       INST(MUL, 8, UD, 2, UD, 8, 4, 2, 1, 0, false),
2691 
2692       INST(MUL, 8, D,  2, D,  8, 4, 2, 0, 1, false),
2693       INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 1, false),
2694 
2695 #undef INST
2696    };
2697 
2698    /* These restrictions only apply to Gfx8+ */
2699    if (devinfo.ver < 8)
2700       return;
2701 
2702    /* NoDDChk/NoDDClr does not exist on Gfx12+ */
2703    if (devinfo.ver >= 12)
2704       return;
2705 
2706    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2707       if (!devinfo.has_64bit_float &&
2708           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2709            inst[i].src_type == ELK_REGISTER_TYPE_DF))
2710          continue;
2711 
2712       if (!devinfo.has_64bit_int &&
2713           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2714            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2715            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2716            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2717          continue;
2718 
2719       if (inst[i].opcode == ELK_OPCODE_MOV) {
2720          elk_MOV(p, retype(g0, inst[i].dst_type),
2721                     retype(g0, inst[i].src_type));
2722       } else {
2723          assert(inst[i].opcode == ELK_OPCODE_MUL);
2724          elk_MUL(p, retype(g0, inst[i].dst_type),
2725                     retype(g0, inst[i].src_type),
2726                     retype(zero, inst[i].src_type));
2727       }
2728       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2729 
2730       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2731 
2732       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2733       elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2734       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2735 
2736       elk_inst_set_no_dd_check(&devinfo, last_inst, inst[i].no_dd_check);
2737       elk_inst_set_no_dd_clear(&devinfo, last_inst, inst[i].no_dd_clear);
2738 
2739       if (devinfo.platform == INTEL_PLATFORM_CHV ||
2740           intel_device_info_is_9lp(&devinfo)) {
2741          EXPECT_EQ(inst[i].expected_result, validate(p));
2742       } else {
2743          EXPECT_TRUE(validate(p));
2744       }
2745 
2746       clear_instructions(p);
2747    }
2748 }
2749