1 /*
2 * Copyright © 2015-2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_eu_validate.c
25 *
26 * This file implements a pass that validates shader assembly.
27 *
28 * The restrictions implemented herein are intended to verify that instructions
29 * in shader assembly do not violate restrictions documented in the graphics
30 * programming reference manuals.
31 *
32 * The restrictions are difficult for humans to quickly verify due to their
33 * complexity and abundance.
34 *
35 * It is critical that this code is thoroughly unit tested because false
36 * results will lead developers astray, which is worse than having no validator
37 * at all. Functional changes to this file without corresponding unit tests (in
38 * test_eu_validate.cpp) will be rejected.
39 */
40
41 #include <stdlib.h>
42 #include "brw_eu.h"
43
44 /* We're going to do lots of string concatenation, so this should help. */
45 struct string {
46 char *str;
47 size_t len;
48 };
49
50 static void
cat(struct string * dest,const struct string src)51 cat(struct string *dest, const struct string src)
52 {
53 dest->str = realloc(dest->str, dest->len + src.len + 1);
54 memcpy(dest->str + dest->len, src.str, src.len);
55 dest->str[dest->len + src.len] = '\0';
56 dest->len = dest->len + src.len;
57 }
58 #define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)})
59
60 static bool
contains(const struct string haystack,const struct string needle)61 contains(const struct string haystack, const struct string needle)
62 {
63 return haystack.str && memmem(haystack.str, haystack.len,
64 needle.str, needle.len) != NULL;
65 }
66 #define CONTAINS(haystack, needle) \
67 contains(haystack, (struct string){needle, strlen(needle)})
68
69 #define error(str) "\tERROR: " str "\n"
70 #define ERROR_INDENT "\t "
71
72 #define ERROR(msg) ERROR_IF(true, msg)
73 #define ERROR_IF(cond, msg) \
74 do { \
75 if ((cond) && !CONTAINS(error_msg, error(msg))) { \
76 CAT(error_msg, error(msg)); \
77 } \
78 } while(0)
79
80 #define CHECK(func, args...) \
81 do { \
82 struct string __msg = func(devinfo, inst, ##args); \
83 if (__msg.str) { \
84 cat(&error_msg, __msg); \
85 free(__msg.str); \
86 } \
87 } while (0)
88
89 #define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0)
90 #define WIDTH(width) (1 << (width))
91
92 static bool
inst_is_send(const struct gen_device_info * devinfo,const brw_inst * inst)93 inst_is_send(const struct gen_device_info *devinfo, const brw_inst *inst)
94 {
95 switch (brw_inst_opcode(devinfo, inst)) {
96 case BRW_OPCODE_SEND:
97 case BRW_OPCODE_SENDC:
98 case BRW_OPCODE_SENDS:
99 case BRW_OPCODE_SENDSC:
100 return true;
101 default:
102 return false;
103 }
104 }
105
106 static bool
inst_is_split_send(const struct gen_device_info * devinfo,const brw_inst * inst)107 inst_is_split_send(const struct gen_device_info *devinfo, const brw_inst *inst)
108 {
109 if (devinfo->gen >= 12) {
110 return inst_is_send(devinfo, inst);
111 } else {
112 switch (brw_inst_opcode(devinfo, inst)) {
113 case BRW_OPCODE_SENDS:
114 case BRW_OPCODE_SENDSC:
115 return true;
116 default:
117 return false;
118 }
119 }
120 }
121
122 static unsigned
signed_type(unsigned type)123 signed_type(unsigned type)
124 {
125 switch (type) {
126 case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_TYPE_D;
127 case BRW_REGISTER_TYPE_UW: return BRW_REGISTER_TYPE_W;
128 case BRW_REGISTER_TYPE_UB: return BRW_REGISTER_TYPE_B;
129 case BRW_REGISTER_TYPE_UQ: return BRW_REGISTER_TYPE_Q;
130 default: return type;
131 }
132 }
133
134 static enum brw_reg_type
inst_dst_type(const struct gen_device_info * devinfo,const brw_inst * inst)135 inst_dst_type(const struct gen_device_info *devinfo, const brw_inst *inst)
136 {
137 return (devinfo->gen < 12 || !inst_is_send(devinfo, inst)) ?
138 brw_inst_dst_type(devinfo, inst) : BRW_REGISTER_TYPE_D;
139 }
140
141 static bool
inst_is_raw_move(const struct gen_device_info * devinfo,const brw_inst * inst)142 inst_is_raw_move(const struct gen_device_info *devinfo, const brw_inst *inst)
143 {
144 unsigned dst_type = signed_type(inst_dst_type(devinfo, inst));
145 unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst));
146
147 if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
148 /* FIXME: not strictly true */
149 if (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_VF ||
150 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UV ||
151 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_V) {
152 return false;
153 }
154 } else if (brw_inst_src0_negate(devinfo, inst) ||
155 brw_inst_src0_abs(devinfo, inst)) {
156 return false;
157 }
158
159 return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV &&
160 brw_inst_saturate(devinfo, inst) == 0 &&
161 dst_type == src_type;
162 }
163
164 static bool
dst_is_null(const struct gen_device_info * devinfo,const brw_inst * inst)165 dst_is_null(const struct gen_device_info *devinfo, const brw_inst *inst)
166 {
167 return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
168 brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
169 }
170
171 static bool
src0_is_null(const struct gen_device_info * devinfo,const brw_inst * inst)172 src0_is_null(const struct gen_device_info *devinfo, const brw_inst *inst)
173 {
174 return brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT &&
175 brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
176 brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
177 }
178
179 static bool
src1_is_null(const struct gen_device_info * devinfo,const brw_inst * inst)180 src1_is_null(const struct gen_device_info *devinfo, const brw_inst *inst)
181 {
182 return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
183 brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
184 }
185
186 static bool
src0_is_acc(const struct gen_device_info * devinfo,const brw_inst * inst)187 src0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
188 {
189 return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
190 (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
191 }
192
193 static bool
src1_is_acc(const struct gen_device_info * devinfo,const brw_inst * inst)194 src1_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
195 {
196 return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
197 (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
198 }
199
200 static bool
src0_has_scalar_region(const struct gen_device_info * devinfo,const brw_inst * inst)201 src0_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *inst)
202 {
203 return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
204 brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 &&
205 brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
206 }
207
208 static bool
src1_has_scalar_region(const struct gen_device_info * devinfo,const brw_inst * inst)209 src1_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *inst)
210 {
211 return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
212 brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 &&
213 brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
214 }
215
216 static unsigned
num_sources_from_inst(const struct gen_device_info * devinfo,const brw_inst * inst)217 num_sources_from_inst(const struct gen_device_info *devinfo,
218 const brw_inst *inst)
219 {
220 const struct opcode_desc *desc =
221 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
222 unsigned math_function;
223
224 if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
225 math_function = brw_inst_math_function(devinfo, inst);
226 } else if (devinfo->gen < 6 &&
227 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) {
228 if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) {
229 /* src1 must be a descriptor (including the information to determine
230 * that the SEND is doing an extended math operation), but src0 can
231 * actually be null since it serves as the source of the implicit GRF
232 * to MRF move.
233 *
234 * If we stop using that functionality, we'll have to revisit this.
235 */
236 return 2;
237 } else {
238 /* Send instructions are allowed to have null sources since they use
239 * the base_mrf field to specify which message register source.
240 */
241 return 0;
242 }
243 } else {
244 assert(desc->nsrc < 4);
245 return desc->nsrc;
246 }
247
248 switch (math_function) {
249 case BRW_MATH_FUNCTION_INV:
250 case BRW_MATH_FUNCTION_LOG:
251 case BRW_MATH_FUNCTION_EXP:
252 case BRW_MATH_FUNCTION_SQRT:
253 case BRW_MATH_FUNCTION_RSQ:
254 case BRW_MATH_FUNCTION_SIN:
255 case BRW_MATH_FUNCTION_COS:
256 case BRW_MATH_FUNCTION_SINCOS:
257 case GEN8_MATH_FUNCTION_INVM:
258 case GEN8_MATH_FUNCTION_RSQRTM:
259 return 1;
260 case BRW_MATH_FUNCTION_FDIV:
261 case BRW_MATH_FUNCTION_POW:
262 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
263 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
264 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
265 return 2;
266 default:
267 unreachable("not reached");
268 }
269 }
270
271 static struct string
invalid_values(const struct gen_device_info * devinfo,const brw_inst * inst)272 invalid_values(const struct gen_device_info *devinfo, const brw_inst *inst)
273 {
274 unsigned num_sources = num_sources_from_inst(devinfo, inst);
275 struct string error_msg = { .str = NULL, .len = 0 };
276
277 switch ((enum brw_execution_size) brw_inst_exec_size(devinfo, inst)) {
278 case BRW_EXECUTE_1:
279 case BRW_EXECUTE_2:
280 case BRW_EXECUTE_4:
281 case BRW_EXECUTE_8:
282 case BRW_EXECUTE_16:
283 case BRW_EXECUTE_32:
284 break;
285 default:
286 ERROR("invalid execution size");
287 break;
288 }
289
290 if (inst_is_send(devinfo, inst))
291 return error_msg;
292
293 if (num_sources == 3) {
294 /* Nothing to test:
295 * No 3-src instructions on Gen4-5
296 * No reg file bits on Gen6-10 (align16)
297 * No invalid encodings on Gen10-12 (align1)
298 */
299 } else {
300 if (devinfo->gen > 6) {
301 ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF ||
302 (num_sources > 0 &&
303 brw_inst_src0_reg_file(devinfo, inst) == MRF) ||
304 (num_sources > 1 &&
305 brw_inst_src1_reg_file(devinfo, inst) == MRF),
306 "invalid register file encoding");
307 }
308 }
309
310 if (error_msg.str)
311 return error_msg;
312
313 if (num_sources == 3) {
314 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
315 if (devinfo->gen >= 10) {
316 ERROR_IF(brw_inst_3src_a1_dst_type (devinfo, inst) == INVALID_REG_TYPE ||
317 brw_inst_3src_a1_src0_type(devinfo, inst) == INVALID_REG_TYPE ||
318 brw_inst_3src_a1_src1_type(devinfo, inst) == INVALID_REG_TYPE ||
319 brw_inst_3src_a1_src2_type(devinfo, inst) == INVALID_REG_TYPE,
320 "invalid register type encoding");
321 } else {
322 ERROR("Align1 mode not allowed on Gen < 10");
323 }
324 } else {
325 ERROR_IF(brw_inst_3src_a16_dst_type(devinfo, inst) == INVALID_REG_TYPE ||
326 brw_inst_3src_a16_src_type(devinfo, inst) == INVALID_REG_TYPE,
327 "invalid register type encoding");
328 }
329 } else {
330 ERROR_IF(brw_inst_dst_type (devinfo, inst) == INVALID_REG_TYPE ||
331 (num_sources > 0 &&
332 brw_inst_src0_type(devinfo, inst) == INVALID_REG_TYPE) ||
333 (num_sources > 1 &&
334 brw_inst_src1_type(devinfo, inst) == INVALID_REG_TYPE),
335 "invalid register type encoding");
336 }
337
338 return error_msg;
339 }
340
341 static struct string
sources_not_null(const struct gen_device_info * devinfo,const brw_inst * inst)342 sources_not_null(const struct gen_device_info *devinfo,
343 const brw_inst *inst)
344 {
345 unsigned num_sources = num_sources_from_inst(devinfo, inst);
346 struct string error_msg = { .str = NULL, .len = 0 };
347
348 /* Nothing to test. 3-src instructions can only have GRF sources, and
349 * there's no bit to control the file.
350 */
351 if (num_sources == 3)
352 return (struct string){};
353
354 /* Nothing to test. Split sends can only encode a file in sources that are
355 * allowed to be NULL.
356 */
357 if (inst_is_split_send(devinfo, inst))
358 return (struct string){};
359
360 if (num_sources >= 1 && brw_inst_opcode(devinfo, inst) != BRW_OPCODE_SYNC)
361 ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
362
363 if (num_sources == 2)
364 ERROR_IF(src1_is_null(devinfo, inst), "src1 is null");
365
366 return error_msg;
367 }
368
369 static struct string
alignment_supported(const struct gen_device_info * devinfo,const brw_inst * inst)370 alignment_supported(const struct gen_device_info *devinfo,
371 const brw_inst *inst)
372 {
373 struct string error_msg = { .str = NULL, .len = 0 };
374
375 ERROR_IF(devinfo->gen >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16,
376 "Align16 not supported");
377
378 return error_msg;
379 }
380
381 static bool
inst_uses_src_acc(const struct gen_device_info * devinfo,const brw_inst * inst)382 inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
383 {
384 /* Check instructions that use implicit accumulator sources */
385 switch (brw_inst_opcode(devinfo, inst)) {
386 case BRW_OPCODE_MAC:
387 case BRW_OPCODE_MACH:
388 case BRW_OPCODE_SADA2:
389 return true;
390 default:
391 break;
392 }
393
394 /* FIXME: support 3-src instructions */
395 unsigned num_sources = num_sources_from_inst(devinfo, inst);
396 assert(num_sources < 3);
397
398 return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst));
399 }
400
401 static struct string
send_restrictions(const struct gen_device_info * devinfo,const brw_inst * inst)402 send_restrictions(const struct gen_device_info *devinfo,
403 const brw_inst *inst)
404 {
405 struct string error_msg = { .str = NULL, .len = 0 };
406
407 if (inst_is_split_send(devinfo, inst)) {
408 ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
409 brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL,
410 "src1 of split send must be a GRF or NULL");
411
412 ERROR_IF(brw_inst_eot(devinfo, inst) &&
413 brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
414 "send with EOT must use g112-g127");
415 ERROR_IF(brw_inst_eot(devinfo, inst) &&
416 brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE &&
417 brw_inst_send_src1_reg_nr(devinfo, inst) < 112,
418 "send with EOT must use g112-g127");
419
420 if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) {
421 /* Assume minimums if we don't know */
422 unsigned mlen = 1;
423 if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) {
424 const uint32_t desc = brw_inst_send_desc(devinfo, inst);
425 mlen = brw_message_desc_mlen(devinfo, desc);
426 }
427
428 unsigned ex_mlen = 1;
429 if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) {
430 const uint32_t ex_desc = brw_inst_sends_ex_desc(devinfo, inst);
431 ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc);
432 }
433 const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst);
434 const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst);
435 ERROR_IF((src0_reg_nr <= src1_reg_nr &&
436 src1_reg_nr < src0_reg_nr + mlen) ||
437 (src1_reg_nr <= src0_reg_nr &&
438 src0_reg_nr < src1_reg_nr + ex_mlen),
439 "split send payloads must not overlap");
440 }
441 } else if (inst_is_send(devinfo, inst)) {
442 ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
443 "send must use direct addressing");
444
445 if (devinfo->gen >= 7) {
446 ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != BRW_GENERAL_REGISTER_FILE,
447 "send from non-GRF");
448 ERROR_IF(brw_inst_eot(devinfo, inst) &&
449 brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
450 "send with EOT must use g112-g127");
451 }
452
453 if (devinfo->gen >= 8) {
454 ERROR_IF(!dst_is_null(devinfo, inst) &&
455 (brw_inst_dst_da_reg_nr(devinfo, inst) +
456 brw_inst_rlen(devinfo, inst) > 127) &&
457 (brw_inst_src0_da_reg_nr(devinfo, inst) +
458 brw_inst_mlen(devinfo, inst) >
459 brw_inst_dst_da_reg_nr(devinfo, inst)),
460 "r127 must not be used for return address when there is "
461 "a src and dest overlap");
462 }
463 }
464
465 return error_msg;
466 }
467
468 static bool
is_unsupported_inst(const struct gen_device_info * devinfo,const brw_inst * inst)469 is_unsupported_inst(const struct gen_device_info *devinfo,
470 const brw_inst *inst)
471 {
472 return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_ILLEGAL;
473 }
474
475 /**
476 * Returns whether a combination of two types would qualify as mixed float
477 * operation mode
478 */
479 static inline bool
types_are_mixed_float(enum brw_reg_type t0,enum brw_reg_type t1)480 types_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1)
481 {
482 return (t0 == BRW_REGISTER_TYPE_F && t1 == BRW_REGISTER_TYPE_HF) ||
483 (t1 == BRW_REGISTER_TYPE_F && t0 == BRW_REGISTER_TYPE_HF);
484 }
485
486 static enum brw_reg_type
execution_type_for_type(enum brw_reg_type type)487 execution_type_for_type(enum brw_reg_type type)
488 {
489 switch (type) {
490 case BRW_REGISTER_TYPE_NF:
491 case BRW_REGISTER_TYPE_DF:
492 case BRW_REGISTER_TYPE_F:
493 case BRW_REGISTER_TYPE_HF:
494 return type;
495
496 case BRW_REGISTER_TYPE_VF:
497 return BRW_REGISTER_TYPE_F;
498
499 case BRW_REGISTER_TYPE_Q:
500 case BRW_REGISTER_TYPE_UQ:
501 return BRW_REGISTER_TYPE_Q;
502
503 case BRW_REGISTER_TYPE_D:
504 case BRW_REGISTER_TYPE_UD:
505 return BRW_REGISTER_TYPE_D;
506
507 case BRW_REGISTER_TYPE_W:
508 case BRW_REGISTER_TYPE_UW:
509 case BRW_REGISTER_TYPE_B:
510 case BRW_REGISTER_TYPE_UB:
511 case BRW_REGISTER_TYPE_V:
512 case BRW_REGISTER_TYPE_UV:
513 return BRW_REGISTER_TYPE_W;
514 }
515 unreachable("not reached");
516 }
517
518 /**
519 * Returns the execution type of an instruction \p inst
520 */
521 static enum brw_reg_type
execution_type(const struct gen_device_info * devinfo,const brw_inst * inst)522 execution_type(const struct gen_device_info *devinfo, const brw_inst *inst)
523 {
524 unsigned num_sources = num_sources_from_inst(devinfo, inst);
525 enum brw_reg_type src0_exec_type, src1_exec_type;
526
527 /* Execution data type is independent of destination data type, except in
528 * mixed F/HF instructions.
529 */
530 enum brw_reg_type dst_exec_type = inst_dst_type(devinfo, inst);
531
532 src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst));
533 if (num_sources == 1) {
534 if (src0_exec_type == BRW_REGISTER_TYPE_HF)
535 return dst_exec_type;
536 return src0_exec_type;
537 }
538
539 src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst));
540 if (types_are_mixed_float(src0_exec_type, src1_exec_type) ||
541 types_are_mixed_float(src0_exec_type, dst_exec_type) ||
542 types_are_mixed_float(src1_exec_type, dst_exec_type)) {
543 return BRW_REGISTER_TYPE_F;
544 }
545
546 if (src0_exec_type == src1_exec_type)
547 return src0_exec_type;
548
549 if (src0_exec_type == BRW_REGISTER_TYPE_NF ||
550 src1_exec_type == BRW_REGISTER_TYPE_NF)
551 return BRW_REGISTER_TYPE_NF;
552
553 /* Mixed operand types where one is float is float on Gen < 6
554 * (and not allowed on later platforms)
555 */
556 if (devinfo->gen < 6 &&
557 (src0_exec_type == BRW_REGISTER_TYPE_F ||
558 src1_exec_type == BRW_REGISTER_TYPE_F))
559 return BRW_REGISTER_TYPE_F;
560
561 if (src0_exec_type == BRW_REGISTER_TYPE_Q ||
562 src1_exec_type == BRW_REGISTER_TYPE_Q)
563 return BRW_REGISTER_TYPE_Q;
564
565 if (src0_exec_type == BRW_REGISTER_TYPE_D ||
566 src1_exec_type == BRW_REGISTER_TYPE_D)
567 return BRW_REGISTER_TYPE_D;
568
569 if (src0_exec_type == BRW_REGISTER_TYPE_W ||
570 src1_exec_type == BRW_REGISTER_TYPE_W)
571 return BRW_REGISTER_TYPE_W;
572
573 if (src0_exec_type == BRW_REGISTER_TYPE_DF ||
574 src1_exec_type == BRW_REGISTER_TYPE_DF)
575 return BRW_REGISTER_TYPE_DF;
576
577 unreachable("not reached");
578 }
579
580 /**
581 * Returns whether a region is packed
582 *
583 * A region is packed if its elements are adjacent in memory, with no
584 * intervening space, no overlap, and no replicated values.
585 */
586 static bool
is_packed(unsigned vstride,unsigned width,unsigned hstride)587 is_packed(unsigned vstride, unsigned width, unsigned hstride)
588 {
589 if (vstride == width) {
590 if (vstride == 1) {
591 return hstride == 0;
592 } else {
593 return hstride == 1;
594 }
595 }
596
597 return false;
598 }
599
600 /**
601 * Returns whether an instruction is an explicit or implicit conversion
602 * to/from half-float.
603 */
604 static bool
is_half_float_conversion(const struct gen_device_info * devinfo,const brw_inst * inst)605 is_half_float_conversion(const struct gen_device_info *devinfo,
606 const brw_inst *inst)
607 {
608 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
609
610 unsigned num_sources = num_sources_from_inst(devinfo, inst);
611 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
612
613 if (dst_type != src0_type &&
614 (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) {
615 return true;
616 } else if (num_sources > 1) {
617 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
618 return dst_type != src1_type &&
619 (dst_type == BRW_REGISTER_TYPE_HF ||
620 src1_type == BRW_REGISTER_TYPE_HF);
621 }
622
623 return false;
624 }
625
626 /*
627 * Returns whether an instruction is using mixed float operation mode
628 */
629 static bool
is_mixed_float(const struct gen_device_info * devinfo,const brw_inst * inst)630 is_mixed_float(const struct gen_device_info *devinfo, const brw_inst *inst)
631 {
632 if (devinfo->gen < 8)
633 return false;
634
635 if (inst_is_send(devinfo, inst))
636 return false;
637
638 unsigned opcode = brw_inst_opcode(devinfo, inst);
639 const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode);
640 if (desc->ndst == 0)
641 return false;
642
643 /* FIXME: support 3-src instructions */
644 unsigned num_sources = num_sources_from_inst(devinfo, inst);
645 assert(num_sources < 3);
646
647 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
648 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
649
650 if (num_sources == 1)
651 return types_are_mixed_float(src0_type, dst_type);
652
653 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
654
655 return types_are_mixed_float(src0_type, src1_type) ||
656 types_are_mixed_float(src0_type, dst_type) ||
657 types_are_mixed_float(src1_type, dst_type);
658 }
659
660 /**
661 * Returns whether an instruction is an explicit or implicit conversion
662 * to/from byte.
663 */
664 static bool
is_byte_conversion(const struct gen_device_info * devinfo,const brw_inst * inst)665 is_byte_conversion(const struct gen_device_info *devinfo,
666 const brw_inst *inst)
667 {
668 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
669
670 unsigned num_sources = num_sources_from_inst(devinfo, inst);
671 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
672
673 if (dst_type != src0_type &&
674 (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) {
675 return true;
676 } else if (num_sources > 1) {
677 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
678 return dst_type != src1_type &&
679 (type_sz(dst_type) == 1 || type_sz(src1_type) == 1);
680 }
681
682 return false;
683 }
684
685 /**
686 * Checks restrictions listed in "General Restrictions Based on Operand Types"
687 * in the "Register Region Restrictions" section.
688 */
689 static struct string
general_restrictions_based_on_operand_types(const struct gen_device_info * devinfo,const brw_inst * inst)690 general_restrictions_based_on_operand_types(const struct gen_device_info *devinfo,
691 const brw_inst *inst)
692 {
693 const struct opcode_desc *desc =
694 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
695 unsigned num_sources = num_sources_from_inst(devinfo, inst);
696 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
697 struct string error_msg = { .str = NULL, .len = 0 };
698
699 if (inst_is_send(devinfo, inst))
700 return error_msg;
701
702 if (devinfo->gen >= 11) {
703 if (num_sources == 3) {
704 ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 ||
705 brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1,
706 "Byte data type is not supported for src1/2 register regioning. This includes "
707 "byte broadcast as well.");
708 }
709 if (num_sources == 2) {
710 ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1,
711 "Byte data type is not supported for src1 register regioning. This includes "
712 "byte broadcast as well.");
713 }
714 }
715
716 if (num_sources == 3)
717 return error_msg;
718
719 if (exec_size == 1)
720 return error_msg;
721
722 if (desc->ndst == 0)
723 return error_msg;
724
725 /* The PRMs say:
726 *
727 * Where n is the largest element size in bytes for any source or
728 * destination operand type, ExecSize * n must be <= 64.
729 *
730 * But we do not attempt to enforce it, because it is implied by other
731 * rules:
732 *
733 * - that the destination stride must match the execution data type
734 * - sources may not span more than two adjacent GRF registers
735 * - destination may not span more than two adjacent GRF registers
736 *
737 * In fact, checking it would weaken testing of the other rules.
738 */
739
740 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
741 enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
742 bool dst_type_is_byte =
743 inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_B ||
744 inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_UB;
745
746 if (dst_type_is_byte) {
747 if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) {
748 if (!inst_is_raw_move(devinfo, inst))
749 ERROR("Only raw MOV supports a packed-byte destination");
750 return error_msg;
751 }
752 }
753
754 unsigned exec_type = execution_type(devinfo, inst);
755 unsigned exec_type_size = brw_reg_type_to_size(exec_type);
756 unsigned dst_type_size = brw_reg_type_to_size(dst_type);
757
758 /* On IVB/BYT, region parameters and execution size for DF are in terms of
759 * 32-bit elements, so they are doubled. For evaluating the validity of an
760 * instruction, we halve them.
761 */
762 if (devinfo->gen == 7 && !devinfo->is_haswell &&
763 exec_type_size == 8 && dst_type_size == 4)
764 dst_type_size = 8;
765
766 if (is_byte_conversion(devinfo, inst)) {
767 /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
768 *
769 * "There is no direct conversion from B/UB to DF or DF to B/UB.
770 * There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB."
771 *
772 * Even if these restrictions are listed for the MOV instruction, we
773 * validate this more generally, since there is the possibility
774 * of implicit conversions from other instructions.
775 */
776 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
777 enum brw_reg_type src1_type = num_sources > 1 ?
778 brw_inst_src1_type(devinfo, inst) : 0;
779
780 ERROR_IF(type_sz(dst_type) == 1 &&
781 (type_sz(src0_type) == 8 ||
782 (num_sources > 1 && type_sz(src1_type) == 8)),
783 "There are no direct conversions between 64-bit types and B/UB");
784
785 ERROR_IF(type_sz(dst_type) == 8 &&
786 (type_sz(src0_type) == 1 ||
787 (num_sources > 1 && type_sz(src1_type) == 1)),
788 "There are no direct conversions between 64-bit types and B/UB");
789 }
790
791 if (is_half_float_conversion(devinfo, inst)) {
792 /**
793 * A helper to validate used in the validation of the following restriction
794 * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
795 *
796 * "There is no direct conversion from HF to DF or DF to HF.
797 * There is no direct conversion from HF to Q/UQ or Q/UQ to HF."
798 *
799 * Even if these restrictions are listed for the MOV instruction, we
800 * validate this more generally, since there is the possibility
801 * of implicit conversions from other instructions, such us implicit
802 * conversion from integer to HF with the ADD instruction in SKL+.
803 */
804 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
805 enum brw_reg_type src1_type = num_sources > 1 ?
806 brw_inst_src1_type(devinfo, inst) : 0;
807 ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF &&
808 (type_sz(src0_type) == 8 ||
809 (num_sources > 1 && type_sz(src1_type) == 8)),
810 "There are no direct conversions between 64-bit types and HF");
811
812 ERROR_IF(type_sz(dst_type) == 8 &&
813 (src0_type == BRW_REGISTER_TYPE_HF ||
814 (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)),
815 "There are no direct conversions between 64-bit types and HF");
816
817 /* From the BDW+ PRM:
818 *
819 * "Conversion between Integer and HF (Half Float) must be
820 * DWord-aligned and strided by a DWord on the destination."
821 *
822 * Also, the above restrictions seems to be expanded on CHV and SKL+ by:
823 *
824 * "There is a relaxed alignment rule for word destinations. When
825 * the destination type is word (UW, W, HF), destination data types
826 * can be aligned to either the lowest word or the second lowest
827 * word of the execution channel. This means the destination data
828 * words can be either all in the even word locations or all in the
829 * odd word locations."
830 *
831 * We do not implement the second rule as is though, since empirical
832 * testing shows inconsistencies:
833 * - It suggests that packed 16-bit is not allowed, which is not true.
834 * - It suggests that conversions from Q/DF to W (which need to be
835 * 64-bit aligned on the destination) are not possible, which is
836 * not true.
837 *
838 * So from this rule we only validate the implication that conversions
839 * from F to HF need to be DWord strided (except in Align1 mixed
840 * float mode where packed fp16 destination is allowed so long as the
841 * destination is oword-aligned).
842 *
843 * Finally, we only validate this for Align1 because Align16 always
844 * requires packed destinations, so these restrictions can't possibly
845 * apply to Align16 mode.
846 */
847 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
848 if ((dst_type == BRW_REGISTER_TYPE_HF &&
849 (brw_reg_type_is_integer(src0_type) ||
850 (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) ||
851 (brw_reg_type_is_integer(dst_type) &&
852 (src0_type == BRW_REGISTER_TYPE_HF ||
853 (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) {
854 ERROR_IF(dst_stride * dst_type_size != 4,
855 "Conversions between integer and half-float must be "
856 "strided by a DWord on the destination");
857
858 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
859 ERROR_IF(subreg % 4 != 0,
860 "Conversions between integer and half-float must be "
861 "aligned to a DWord on the destination");
862 } else if ((devinfo->is_cherryview || devinfo->gen >= 9) &&
863 dst_type == BRW_REGISTER_TYPE_HF) {
864 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
865 ERROR_IF(dst_stride != 2 &&
866 !(is_mixed_float(devinfo, inst) &&
867 dst_stride == 1 && subreg % 16 == 0),
868 "Conversions to HF must have either all words in even "
869 "word locations or all words in odd word locations or "
870 "be mixed-float with Oword-aligned packed destination");
871 }
872 }
873 }
874
875 /* There are special regioning rules for mixed-float mode in CHV and SKL that
876 * override the general rule for the ratio of sizes of the destination type
877 * and the execution type. We will add validation for those in a later patch.
878 */
879 bool validate_dst_size_and_exec_size_ratio =
880 !is_mixed_float(devinfo, inst) ||
881 !(devinfo->is_cherryview || devinfo->gen >= 9);
882
883 if (validate_dst_size_and_exec_size_ratio &&
884 exec_type_size > dst_type_size) {
885 if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) {
886 ERROR_IF(dst_stride * dst_type_size != exec_type_size,
887 "Destination stride must be equal to the ratio of the sizes "
888 "of the execution data type to the destination type");
889 }
890
891 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
892
893 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
894 brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) {
895 /* The i965 PRM says:
896 *
897 * Implementation Restriction: The relaxed alignment rule for byte
898 * destination (#10.5) is not supported.
899 */
900 if ((devinfo->gen > 4 || devinfo->is_g4x) && dst_type_is_byte) {
901 ERROR_IF(subreg % exec_type_size != 0 &&
902 subreg % exec_type_size != 1,
903 "Destination subreg must be aligned to the size of the "
904 "execution data type (or to the next lowest byte for byte "
905 "destinations)");
906 } else {
907 ERROR_IF(subreg % exec_type_size != 0,
908 "Destination subreg must be aligned to the size of the "
909 "execution data type");
910 }
911 }
912 }
913
914 return error_msg;
915 }
916
917 /**
918 * Checks restrictions listed in "General Restrictions on Regioning Parameters"
919 * in the "Register Region Restrictions" section.
920 */
921 static struct string
general_restrictions_on_region_parameters(const struct gen_device_info * devinfo,const brw_inst * inst)922 general_restrictions_on_region_parameters(const struct gen_device_info *devinfo,
923 const brw_inst *inst)
924 {
925 const struct opcode_desc *desc =
926 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
927 unsigned num_sources = num_sources_from_inst(devinfo, inst);
928 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
929 struct string error_msg = { .str = NULL, .len = 0 };
930
931 if (num_sources == 3)
932 return (struct string){};
933
934 /* Split sends don't have the bits in the instruction to encode regions so
935 * there's nothing to check.
936 */
937 if (inst_is_split_send(devinfo, inst))
938 return (struct string){};
939
940 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) {
941 if (desc->ndst != 0 && !dst_is_null(devinfo, inst))
942 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1,
943 "Destination Horizontal Stride must be 1");
944
945 if (num_sources >= 1) {
946 if (devinfo->is_haswell || devinfo->gen >= 8) {
947 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
948 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
949 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
950 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
951 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
952 } else {
953 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
954 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
955 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
956 "In Align16 mode, only VertStride of 0 or 4 is allowed");
957 }
958 }
959
960 if (num_sources == 2) {
961 if (devinfo->is_haswell || devinfo->gen >= 8) {
962 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
963 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
964 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
965 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
966 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
967 } else {
968 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
969 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
970 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
971 "In Align16 mode, only VertStride of 0 or 4 is allowed");
972 }
973 }
974
975 return error_msg;
976 }
977
978 for (unsigned i = 0; i < num_sources; i++) {
979 unsigned vstride, width, hstride, element_size, subreg;
980 enum brw_reg_type type;
981
982 #define DO_SRC(n) \
983 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \
984 BRW_IMMEDIATE_VALUE) \
985 continue; \
986 \
987 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
988 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
989 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
990 type = brw_inst_src ## n ## _type(devinfo, inst); \
991 element_size = brw_reg_type_to_size(type); \
992 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst)
993
994 if (i == 0) {
995 DO_SRC(0);
996 } else {
997 DO_SRC(1);
998 }
999 #undef DO_SRC
1000
1001 /* On IVB/BYT, region parameters and execution size for DF are in terms of
1002 * 32-bit elements, so they are doubled. For evaluating the validity of an
1003 * instruction, we halve them.
1004 */
1005 if (devinfo->gen == 7 && !devinfo->is_haswell &&
1006 element_size == 8)
1007 element_size = 4;
1008
1009 /* ExecSize must be greater than or equal to Width. */
1010 ERROR_IF(exec_size < width, "ExecSize must be greater than or equal "
1011 "to Width");
1012
1013 /* If ExecSize = Width and HorzStride ≠ 0,
1014 * VertStride must be set to Width * HorzStride.
1015 */
1016 if (exec_size == width && hstride != 0) {
1017 ERROR_IF(vstride != width * hstride,
1018 "If ExecSize = Width and HorzStride ≠ 0, "
1019 "VertStride must be set to Width * HorzStride");
1020 }
1021
1022 /* If Width = 1, HorzStride must be 0 regardless of the values of
1023 * ExecSize and VertStride.
1024 */
1025 if (width == 1) {
1026 ERROR_IF(hstride != 0,
1027 "If Width = 1, HorzStride must be 0 regardless "
1028 "of the values of ExecSize and VertStride");
1029 }
1030
1031 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
1032 if (exec_size == 1 && width == 1) {
1033 ERROR_IF(vstride != 0 || hstride != 0,
1034 "If ExecSize = Width = 1, both VertStride "
1035 "and HorzStride must be 0");
1036 }
1037
1038 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the
1039 * value of ExecSize.
1040 */
1041 if (vstride == 0 && hstride == 0) {
1042 ERROR_IF(width != 1,
1043 "If VertStride = HorzStride = 0, Width must be "
1044 "1 regardless of the value of ExecSize");
1045 }
1046
1047 /* VertStride must be used to cross GRF register boundaries. This rule
1048 * implies that elements within a 'Width' cannot cross GRF boundaries.
1049 */
1050 const uint64_t mask = (1ULL << element_size) - 1;
1051 unsigned rowbase = subreg;
1052
1053 for (int y = 0; y < exec_size / width; y++) {
1054 uint64_t access_mask = 0;
1055 unsigned offset = rowbase;
1056
1057 for (int x = 0; x < width; x++) {
1058 access_mask |= mask << (offset % 64);
1059 offset += hstride * element_size;
1060 }
1061
1062 rowbase += vstride * element_size;
1063
1064 if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) {
1065 ERROR("VertStride must be used to cross GRF register boundaries");
1066 break;
1067 }
1068 }
1069 }
1070
1071 /* Dst.HorzStride must not be 0. */
1072 if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) {
1073 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0,
1074 "Destination Horizontal Stride must not be 0");
1075 }
1076
1077 return error_msg;
1078 }
1079
1080 static struct string
special_restrictions_for_mixed_float_mode(const struct gen_device_info * devinfo,const brw_inst * inst)1081 special_restrictions_for_mixed_float_mode(const struct gen_device_info *devinfo,
1082 const brw_inst *inst)
1083 {
1084 struct string error_msg = { .str = NULL, .len = 0 };
1085
1086 const unsigned opcode = brw_inst_opcode(devinfo, inst);
1087 const unsigned num_sources = num_sources_from_inst(devinfo, inst);
1088 if (num_sources >= 3)
1089 return error_msg;
1090
1091 if (!is_mixed_float(devinfo, inst))
1092 return error_msg;
1093
1094 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
1095 bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
1096
1097 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
1098 enum brw_reg_type src1_type = num_sources > 1 ?
1099 brw_inst_src1_type(devinfo, inst) : 0;
1100 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
1101
1102 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1103 bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride);
1104
1105 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1106 * Float Operations:
1107 *
1108 * "Indirect addressing on source is not supported when source and
1109 * destination data types are mixed float."
1110 */
1111 ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT ||
1112 (num_sources > 1 &&
1113 brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT),
1114 "Indirect addressing on source is not supported when source and "
1115 "destination data types are mixed float");
1116
1117 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1118 * Float Operations:
1119 *
1120 * "No SIMD16 in mixed mode when destination is f32. Instruction
1121 * execution size must be no more than 8."
1122 */
1123 ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F,
1124 "Mixed float mode with 32-bit float destination is limited "
1125 "to SIMD8");
1126
1127 if (is_align16) {
1128 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1129 * Float Operations:
1130 *
1131 * "In Align16 mode, when half float and float data types are mixed
1132 * between source operands OR between source and destination operands,
1133 * the register content are assumed to be packed."
1134 *
1135 * Since Align16 doesn't have a concept of horizontal stride (or width),
1136 * it means that vertical stride must always be 4, since 0 and 2 would
1137 * lead to replicated data, and any other value is disallowed in Align16.
1138 */
1139 ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1140 "Align16 mixed float mode assumes packed data (vstride must be 4");
1141
1142 ERROR_IF(num_sources >= 2 &&
1143 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1144 "Align16 mixed float mode assumes packed data (vstride must be 4");
1145
1146 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1147 * Float Operations:
1148 *
1149 * "For Align16 mixed mode, both input and output packed f16 data
1150 * must be oword aligned, no oword crossing in packed f16."
1151 *
1152 * The previous rule requires that Align16 operands are always packed,
1153 * and since there is only one bit for Align16 subnr, which represents
1154 * offsets 0B and 16B, this rule is always enforced and we don't need to
1155 * validate it.
1156 */
1157
1158 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1159 * Float Operations:
1160 *
1161 * "No SIMD16 in mixed mode when destination is packed f16 for both
1162 * Align1 and Align16."
1163 *
1164 * And:
1165 *
1166 * "In Align16 mode, when half float and float data types are mixed
1167 * between source operands OR between source and destination operands,
1168 * the register content are assumed to be packed."
1169 *
1170 * Which implies that SIMD16 is not available in Align16. This is further
1171 * confirmed by:
1172 *
1173 * "For Align16 mixed mode, both input and output packed f16 data
1174 * must be oword aligned, no oword crossing in packed f16"
1175 *
1176 * Since oword-aligned packed f16 data would cross oword boundaries when
1177 * the execution size is larger than 8.
1178 */
1179 ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8");
1180
1181 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1182 * Float Operations:
1183 *
1184 * "No accumulator read access for Align16 mixed float."
1185 */
1186 ERROR_IF(inst_uses_src_acc(devinfo, inst),
1187 "No accumulator read access for Align16 mixed float");
1188 } else {
1189 assert(!is_align16);
1190
1191 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1192 * Float Operations:
1193 *
1194 * "No SIMD16 in mixed mode when destination is packed f16 for both
1195 * Align1 and Align16."
1196 */
1197 ERROR_IF(exec_size > 8 && dst_is_packed &&
1198 dst_type == BRW_REGISTER_TYPE_HF,
1199 "Align1 mixed float mode is limited to SIMD8 when destination "
1200 "is packed half-float");
1201
1202 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1203 * Float Operations:
1204 *
1205 * "Math operations for mixed mode:
1206 * - In Align1, f16 inputs need to be strided"
1207 */
1208 if (opcode == BRW_OPCODE_MATH) {
1209 if (src0_type == BRW_REGISTER_TYPE_HF) {
1210 ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1,
1211 "Align1 mixed mode math needs strided half-float inputs");
1212 }
1213
1214 if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) {
1215 ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1,
1216 "Align1 mixed mode math needs strided half-float inputs");
1217 }
1218 }
1219
1220 if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) {
1221 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1222 * Float Operations:
1223 *
1224 * "In Align1, destination stride can be smaller than execution
1225 * type. When destination is stride of 1, 16 bit packed data is
1226 * updated on the destination. However, output packed f16 data
1227 * must be oword aligned, no oword crossing in packed f16."
1228 *
1229 * The requirement of not crossing oword boundaries for 16-bit oword
1230 * aligned data means that execution size is limited to 8.
1231 */
1232 unsigned subreg;
1233 if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT)
1234 subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1235 else
1236 subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst);
1237 ERROR_IF(subreg % 16 != 0,
1238 "Align1 mixed mode packed half-float output must be "
1239 "oword aligned");
1240 ERROR_IF(exec_size > 8,
1241 "Align1 mixed mode packed half-float output must not "
1242 "cross oword boundaries (max exec size is 8)");
1243
1244 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1245 * Float Operations:
1246 *
1247 * "When source is float or half float from accumulator register and
1248 * destination is half float with a stride of 1, the source must
1249 * register aligned. i.e., source must have offset zero."
1250 *
1251 * Align16 mixed float mode doesn't allow accumulator access on sources,
1252 * so we only need to check this for Align1.
1253 */
1254 if (src0_is_acc(devinfo, inst) &&
1255 (src0_type == BRW_REGISTER_TYPE_F ||
1256 src0_type == BRW_REGISTER_TYPE_HF)) {
1257 ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0,
1258 "Mixed float mode requires register-aligned accumulator "
1259 "source reads when destination is packed half-float");
1260
1261 }
1262
1263 if (num_sources > 1 &&
1264 src1_is_acc(devinfo, inst) &&
1265 (src1_type == BRW_REGISTER_TYPE_F ||
1266 src1_type == BRW_REGISTER_TYPE_HF)) {
1267 ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0,
1268 "Mixed float mode requires register-aligned accumulator "
1269 "source reads when destination is packed half-float");
1270 }
1271 }
1272
1273 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1274 * Float Operations:
1275 *
1276 * "No swizzle is allowed when an accumulator is used as an implicit
1277 * source or an explicit source in an instruction. i.e. when
1278 * destination is half float with an implicit accumulator source,
1279 * destination stride needs to be 2."
1280 *
1281 * FIXME: it is not quite clear what the first sentence actually means
1282 * or its link to the implication described after it, so we only
1283 * validate the explicit implication, which is clearly described.
1284 */
1285 if (dst_type == BRW_REGISTER_TYPE_HF &&
1286 inst_uses_src_acc(devinfo, inst)) {
1287 ERROR_IF(dst_stride != 2,
1288 "Mixed float mode with implicit/explicit accumulator "
1289 "source and half-float destination requires a stride "
1290 "of 2 on the destination");
1291 }
1292 }
1293
1294 return error_msg;
1295 }
1296
1297 /**
1298 * Creates an \p access_mask for an \p exec_size, \p element_size, and a region
1299 *
1300 * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is
1301 * a bitmask of bytes accessed by the region.
1302 *
1303 * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4
1304 * instruction would be
1305 *
1306 * access_mask[0] = 0x00000000000000F0
1307 * access_mask[1] = 0x000000000000F000
1308 * access_mask[2] = 0x0000000000F00000
1309 * access_mask[3] = 0x00000000F0000000
1310 * access_mask[4-31] = 0
1311 *
1312 * because the first execution channel accesses bytes 7-4 and the second
1313 * execution channel accesses bytes 15-12, etc.
1314 */
1315 static void
align1_access_mask(uint64_t access_mask[static32],unsigned exec_size,unsigned element_size,unsigned subreg,unsigned vstride,unsigned width,unsigned hstride)1316 align1_access_mask(uint64_t access_mask[static 32],
1317 unsigned exec_size, unsigned element_size, unsigned subreg,
1318 unsigned vstride, unsigned width, unsigned hstride)
1319 {
1320 const uint64_t mask = (1ULL << element_size) - 1;
1321 unsigned rowbase = subreg;
1322 unsigned element = 0;
1323
1324 for (int y = 0; y < exec_size / width; y++) {
1325 unsigned offset = rowbase;
1326
1327 for (int x = 0; x < width; x++) {
1328 access_mask[element++] = mask << (offset % 64);
1329 offset += hstride * element_size;
1330 }
1331
1332 rowbase += vstride * element_size;
1333 }
1334
1335 assert(element == 0 || element == exec_size);
1336 }
1337
1338 /**
1339 * Returns the number of registers accessed according to the \p access_mask
1340 */
1341 static int
registers_read(const uint64_t access_mask[static32])1342 registers_read(const uint64_t access_mask[static 32])
1343 {
1344 int regs_read = 0;
1345
1346 for (unsigned i = 0; i < 32; i++) {
1347 if (access_mask[i] > 0xFFFFFFFF) {
1348 return 2;
1349 } else if (access_mask[i]) {
1350 regs_read = 1;
1351 }
1352 }
1353
1354 return regs_read;
1355 }
1356
1357 /**
1358 * Checks restrictions listed in "Region Alignment Rules" in the "Register
1359 * Region Restrictions" section.
1360 */
1361 static struct string
region_alignment_rules(const struct gen_device_info * devinfo,const brw_inst * inst)1362 region_alignment_rules(const struct gen_device_info *devinfo,
1363 const brw_inst *inst)
1364 {
1365 const struct opcode_desc *desc =
1366 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
1367 unsigned num_sources = num_sources_from_inst(devinfo, inst);
1368 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
1369 uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32];
1370 struct string error_msg = { .str = NULL, .len = 0 };
1371
1372 if (num_sources == 3)
1373 return (struct string){};
1374
1375 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16)
1376 return (struct string){};
1377
1378 if (inst_is_send(devinfo, inst))
1379 return (struct string){};
1380
1381 memset(dst_access_mask, 0, sizeof(dst_access_mask));
1382 memset(src0_access_mask, 0, sizeof(src0_access_mask));
1383 memset(src1_access_mask, 0, sizeof(src1_access_mask));
1384
1385 for (unsigned i = 0; i < num_sources; i++) {
1386 unsigned vstride, width, hstride, element_size, subreg;
1387 enum brw_reg_type type;
1388
1389 /* In Direct Addressing mode, a source cannot span more than 2 adjacent
1390 * GRF registers.
1391 */
1392
1393 #define DO_SRC(n) \
1394 if (brw_inst_src ## n ## _address_mode(devinfo, inst) != \
1395 BRW_ADDRESS_DIRECT) \
1396 continue; \
1397 \
1398 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \
1399 BRW_IMMEDIATE_VALUE) \
1400 continue; \
1401 \
1402 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
1403 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
1404 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
1405 type = brw_inst_src ## n ## _type(devinfo, inst); \
1406 element_size = brw_reg_type_to_size(type); \
1407 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
1408 align1_access_mask(src ## n ## _access_mask, \
1409 exec_size, element_size, subreg, \
1410 vstride, width, hstride)
1411
1412 if (i == 0) {
1413 DO_SRC(0);
1414 } else {
1415 DO_SRC(1);
1416 }
1417 #undef DO_SRC
1418
1419 unsigned num_vstride = exec_size / width;
1420 unsigned num_hstride = width;
1421 unsigned vstride_elements = (num_vstride - 1) * vstride;
1422 unsigned hstride_elements = (num_hstride - 1) * hstride;
1423 unsigned offset = (vstride_elements + hstride_elements) * element_size +
1424 subreg;
1425 ERROR_IF(offset >= 64,
1426 "A source cannot span more than 2 adjacent GRF registers");
1427 }
1428
1429 if (desc->ndst == 0 || dst_is_null(devinfo, inst))
1430 return error_msg;
1431
1432 unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1433 enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
1434 unsigned element_size = brw_reg_type_to_size(dst_type);
1435 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1436 unsigned offset = ((exec_size - 1) * stride * element_size) + subreg;
1437 ERROR_IF(offset >= 64,
1438 "A destination cannot span more than 2 adjacent GRF registers");
1439
1440 if (error_msg.str)
1441 return error_msg;
1442
1443 /* On IVB/BYT, region parameters and execution size for DF are in terms of
1444 * 32-bit elements, so they are doubled. For evaluating the validity of an
1445 * instruction, we halve them.
1446 */
1447 if (devinfo->gen == 7 && !devinfo->is_haswell &&
1448 element_size == 8)
1449 element_size = 4;
1450
1451 align1_access_mask(dst_access_mask, exec_size, element_size, subreg,
1452 exec_size == 1 ? 0 : exec_size * stride,
1453 exec_size == 1 ? 1 : exec_size,
1454 exec_size == 1 ? 0 : stride);
1455
1456 unsigned dst_regs = registers_read(dst_access_mask);
1457 unsigned src0_regs = registers_read(src0_access_mask);
1458 unsigned src1_regs = registers_read(src1_access_mask);
1459
1460 /* The SNB, IVB, HSW, BDW, and CHV PRMs say:
1461 *
1462 * When an instruction has a source region spanning two registers and a
1463 * destination region contained in one register, the number of elements
1464 * must be the same between two sources and one of the following must be
1465 * true:
1466 *
1467 * 1. The destination region is entirely contained in the lower OWord
1468 * of a register.
1469 * 2. The destination region is entirely contained in the upper OWord
1470 * of a register.
1471 * 3. The destination elements are evenly split between the two OWords
1472 * of a register.
1473 */
1474 if (devinfo->gen <= 8) {
1475 if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) {
1476 unsigned upper_oword_writes = 0, lower_oword_writes = 0;
1477
1478 for (unsigned i = 0; i < exec_size; i++) {
1479 if (dst_access_mask[i] > 0x0000FFFF) {
1480 upper_oword_writes++;
1481 } else {
1482 assert(dst_access_mask[i] != 0);
1483 lower_oword_writes++;
1484 }
1485 }
1486
1487 ERROR_IF(lower_oword_writes != 0 &&
1488 upper_oword_writes != 0 &&
1489 upper_oword_writes != lower_oword_writes,
1490 "Writes must be to only one OWord or "
1491 "evenly split between OWords");
1492 }
1493 }
1494
1495 /* The IVB and HSW PRMs say:
1496 *
1497 * When an instruction has a source region that spans two registers and
1498 * the destination spans two registers, the destination elements must be
1499 * evenly split between the two registers [...]
1500 *
1501 * The SNB PRM contains similar wording (but written in a much more
1502 * confusing manner).
1503 *
1504 * The BDW PRM says:
1505 *
1506 * When destination spans two registers, the source may be one or two
1507 * registers. The destination elements must be evenly split between the
1508 * two registers.
1509 *
1510 * The SKL PRM says:
1511 *
1512 * When destination of MATH instruction spans two registers, the
1513 * destination elements must be evenly split between the two registers.
1514 *
1515 * It is not known whether this restriction applies to KBL other Gens after
1516 * SKL.
1517 */
1518 if (devinfo->gen <= 8 ||
1519 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
1520
1521 /* Nothing explicitly states that on Gen < 8 elements must be evenly
1522 * split between two destination registers in the two exceptional
1523 * source-region-spans-one-register cases, but since Broadwell requires
1524 * evenly split writes regardless of source region, we assume that it was
1525 * an oversight and require it.
1526 */
1527 if (dst_regs == 2) {
1528 unsigned upper_reg_writes = 0, lower_reg_writes = 0;
1529
1530 for (unsigned i = 0; i < exec_size; i++) {
1531 if (dst_access_mask[i] > 0xFFFFFFFF) {
1532 upper_reg_writes++;
1533 } else {
1534 assert(dst_access_mask[i] != 0);
1535 lower_reg_writes++;
1536 }
1537 }
1538
1539 ERROR_IF(upper_reg_writes != lower_reg_writes,
1540 "Writes must be evenly split between the two "
1541 "destination registers");
1542 }
1543 }
1544
1545 /* The IVB and HSW PRMs say:
1546 *
1547 * When an instruction has a source region that spans two registers and
1548 * the destination spans two registers, the destination elements must be
1549 * evenly split between the two registers and each destination register
1550 * must be entirely derived from one source register.
1551 *
1552 * Note: In such cases, the regioning parameters must ensure that the
1553 * offset from the two source registers is the same.
1554 *
1555 * The SNB PRM contains similar wording (but written in a much more
1556 * confusing manner).
1557 *
1558 * There are effectively three rules stated here:
1559 *
1560 * For an instruction with a source and a destination spanning two
1561 * registers,
1562 *
1563 * (1) destination elements must be evenly split between the two
1564 * registers
1565 * (2) all destination elements in a register must be derived
1566 * from one source register
1567 * (3) the offset (i.e. the starting location in each of the two
1568 * registers spanned by a region) must be the same in the two
1569 * registers spanned by a region
1570 *
1571 * It is impossible to violate rule (1) without violating (2) or (3), so we
1572 * do not attempt to validate it.
1573 */
1574 if (devinfo->gen <= 7 && dst_regs == 2) {
1575 for (unsigned i = 0; i < num_sources; i++) {
1576 #define DO_SRC(n) \
1577 if (src ## n ## _regs <= 1) \
1578 continue; \
1579 \
1580 for (unsigned i = 0; i < exec_size; i++) { \
1581 if ((dst_access_mask[i] > 0xFFFFFFFF) != \
1582 (src ## n ## _access_mask[i] > 0xFFFFFFFF)) { \
1583 ERROR("Each destination register must be entirely derived " \
1584 "from one source register"); \
1585 break; \
1586 } \
1587 } \
1588 \
1589 unsigned offset_0 = \
1590 brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
1591 unsigned offset_1 = offset_0; \
1592 \
1593 for (unsigned i = 0; i < exec_size; i++) { \
1594 if (src ## n ## _access_mask[i] > 0xFFFFFFFF) { \
1595 offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32; \
1596 break; \
1597 } \
1598 } \
1599 \
1600 ERROR_IF(num_sources == 2 && offset_0 != offset_1, \
1601 "The offset from the two source registers " \
1602 "must be the same")
1603
1604 if (i == 0) {
1605 DO_SRC(0);
1606 } else {
1607 DO_SRC(1);
1608 }
1609 #undef DO_SRC
1610 }
1611 }
1612
1613 /* The IVB and HSW PRMs say:
1614 *
1615 * When destination spans two registers, the source MUST span two
1616 * registers. The exception to the above rule:
1617 * 1. When source is scalar, the source registers are not
1618 * incremented.
1619 * 2. When source is packed integer Word and destination is packed
1620 * integer DWord, the source register is not incremented by the
1621 * source sub register is incremented.
1622 *
1623 * The SNB PRM does not contain this rule, but the internal documentation
1624 * indicates that it applies to SNB as well. We assume that the rule applies
1625 * to Gen <= 5 although their PRMs do not state it.
1626 *
1627 * While the documentation explicitly says in exception (2) that the
1628 * destination must be an integer DWord, the hardware allows at least a
1629 * float destination type as well. We emit such instructions from
1630 *
1631 * fs_visitor::emit_interpolation_setup_gen6
1632 * fs_visitor::emit_fragcoord_interpolation
1633 *
1634 * and have for years with no ill effects.
1635 *
1636 * Additionally the simulator source code indicates that the real condition
1637 * is that the size of the destination type is 4 bytes.
1638 */
1639 if (devinfo->gen <= 7 && dst_regs == 2) {
1640 enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
1641 bool dst_is_packed_dword =
1642 is_packed(exec_size * stride, exec_size, stride) &&
1643 brw_reg_type_to_size(dst_type) == 4;
1644
1645 for (unsigned i = 0; i < num_sources; i++) {
1646 #define DO_SRC(n) \
1647 unsigned vstride, width, hstride; \
1648 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
1649 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
1650 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
1651 bool src ## n ## _is_packed_word = \
1652 is_packed(vstride, width, hstride) && \
1653 (brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W || \
1654 brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW); \
1655 \
1656 ERROR_IF(src ## n ## _regs == 1 && \
1657 !src ## n ## _has_scalar_region(devinfo, inst) && \
1658 !(dst_is_packed_dword && src ## n ## _is_packed_word), \
1659 "When the destination spans two registers, the source must " \
1660 "span two registers\n" ERROR_INDENT "(exceptions for scalar " \
1661 "source and packed-word to packed-dword expansion)")
1662
1663 if (i == 0) {
1664 DO_SRC(0);
1665 } else {
1666 DO_SRC(1);
1667 }
1668 #undef DO_SRC
1669 }
1670 }
1671
1672 return error_msg;
1673 }
1674
1675 static struct string
vector_immediate_restrictions(const struct gen_device_info * devinfo,const brw_inst * inst)1676 vector_immediate_restrictions(const struct gen_device_info *devinfo,
1677 const brw_inst *inst)
1678 {
1679 unsigned num_sources = num_sources_from_inst(devinfo, inst);
1680 struct string error_msg = { .str = NULL, .len = 0 };
1681
1682 if (num_sources == 3 || num_sources == 0)
1683 return (struct string){};
1684
1685 unsigned file = num_sources == 1 ?
1686 brw_inst_src0_reg_file(devinfo, inst) :
1687 brw_inst_src1_reg_file(devinfo, inst);
1688 if (file != BRW_IMMEDIATE_VALUE)
1689 return (struct string){};
1690
1691 enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
1692 unsigned dst_type_size = brw_reg_type_to_size(dst_type);
1693 unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ?
1694 brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0;
1695 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1696 enum brw_reg_type type = num_sources == 1 ?
1697 brw_inst_src0_type(devinfo, inst) :
1698 brw_inst_src1_type(devinfo, inst);
1699
1700 /* The PRMs say:
1701 *
1702 * When an immediate vector is used in an instruction, the destination
1703 * must be 128-bit aligned with destination horizontal stride equivalent
1704 * to a word for an immediate integer vector (v) and equivalent to a
1705 * DWord for an immediate float vector (vf).
1706 *
1707 * The text has not been updated for the addition of the immediate unsigned
1708 * integer vector type (uv) on SNB, but presumably the same restriction
1709 * applies.
1710 */
1711 switch (type) {
1712 case BRW_REGISTER_TYPE_V:
1713 case BRW_REGISTER_TYPE_UV:
1714 case BRW_REGISTER_TYPE_VF:
1715 ERROR_IF(dst_subreg % (128 / 8) != 0,
1716 "Destination must be 128-bit aligned in order to use immediate "
1717 "vector types");
1718
1719 if (type == BRW_REGISTER_TYPE_VF) {
1720 ERROR_IF(dst_type_size * dst_stride != 4,
1721 "Destination must have stride equivalent to dword in order "
1722 "to use the VF type");
1723 } else {
1724 ERROR_IF(dst_type_size * dst_stride != 2,
1725 "Destination must have stride equivalent to word in order "
1726 "to use the V or UV type");
1727 }
1728 break;
1729 default:
1730 break;
1731 }
1732
1733 return error_msg;
1734 }
1735
1736 static struct string
special_requirements_for_handling_double_precision_data_types(const struct gen_device_info * devinfo,const brw_inst * inst)1737 special_requirements_for_handling_double_precision_data_types(
1738 const struct gen_device_info *devinfo,
1739 const brw_inst *inst)
1740 {
1741 unsigned num_sources = num_sources_from_inst(devinfo, inst);
1742 struct string error_msg = { .str = NULL, .len = 0 };
1743
1744 if (num_sources == 3 || num_sources == 0)
1745 return (struct string){};
1746
1747 /* Split sends don't have types so there's no doubles there. */
1748 if (inst_is_split_send(devinfo, inst))
1749 return (struct string){};
1750
1751 enum brw_reg_type exec_type = execution_type(devinfo, inst);
1752 unsigned exec_type_size = brw_reg_type_to_size(exec_type);
1753
1754 enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst);
1755 enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
1756 unsigned dst_type_size = brw_reg_type_to_size(dst_type);
1757 unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1758 unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst);
1759 unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1760 unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst);
1761
1762 bool is_integer_dword_multiply =
1763 devinfo->gen >= 8 &&
1764 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL &&
1765 (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
1766 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) &&
1767 (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
1768 brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD);
1769
1770 if (dst_type_size != 8 && exec_type_size != 8 && !is_integer_dword_multiply)
1771 return (struct string){};
1772
1773 for (unsigned i = 0; i < num_sources; i++) {
1774 unsigned vstride, width, hstride, type_size, reg, subreg, address_mode;
1775 bool is_scalar_region;
1776 enum brw_reg_file file;
1777 enum brw_reg_type type;
1778
1779 #define DO_SRC(n) \
1780 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \
1781 BRW_IMMEDIATE_VALUE) \
1782 continue; \
1783 \
1784 is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst); \
1785 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
1786 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
1787 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
1788 file = brw_inst_src ## n ## _reg_file(devinfo, inst); \
1789 type = brw_inst_src ## n ## _type(devinfo, inst); \
1790 type_size = brw_reg_type_to_size(type); \
1791 reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst); \
1792 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
1793 address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst)
1794
1795 if (i == 0) {
1796 DO_SRC(0);
1797 } else {
1798 DO_SRC(1);
1799 }
1800 #undef DO_SRC
1801
1802 /* The PRMs say that for CHV, BXT:
1803 *
1804 * When source or destination datatype is 64b or operation is integer
1805 * DWord multiply, regioning in Align1 must follow these rules:
1806 *
1807 * 1. Source and Destination horizontal stride must be aligned to the
1808 * same qword.
1809 * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
1810 * 3. Source and Destination offset must be the same, except the case
1811 * of scalar source.
1812 *
1813 * We assume that the restriction applies to GLK as well.
1814 */
1815 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
1816 (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
1817 unsigned src_stride = hstride * type_size;
1818 unsigned dst_stride = dst_hstride * dst_type_size;
1819
1820 ERROR_IF(!is_scalar_region &&
1821 (src_stride % 8 != 0 ||
1822 dst_stride % 8 != 0 ||
1823 src_stride != dst_stride),
1824 "Source and destination horizontal stride must equal and a "
1825 "multiple of a qword when the execution type is 64-bit");
1826
1827 ERROR_IF(vstride != width * hstride,
1828 "Vstride must be Width * Hstride when the execution type is "
1829 "64-bit");
1830
1831 ERROR_IF(!is_scalar_region && dst_subreg != subreg,
1832 "Source and destination offset must be the same when the "
1833 "execution type is 64-bit");
1834 }
1835
1836 /* The PRMs say that for CHV, BXT:
1837 *
1838 * When source or destination datatype is 64b or operation is integer
1839 * DWord multiply, indirect addressing must not be used.
1840 *
1841 * We assume that the restriction applies to GLK as well.
1842 */
1843 if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
1844 ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode ||
1845 BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode,
1846 "Indirect addressing is not allowed when the execution type "
1847 "is 64-bit");
1848 }
1849
1850 /* The PRMs say that for CHV, BXT:
1851 *
1852 * ARF registers must never be used with 64b datatype or when
1853 * operation is integer DWord multiply.
1854 *
1855 * We assume that the restriction applies to GLK as well.
1856 *
1857 * We assume that the restriction does not apply to the null register.
1858 */
1859 if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
1860 ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MAC ||
1861 brw_inst_acc_wr_control(devinfo, inst) ||
1862 (BRW_ARCHITECTURE_REGISTER_FILE == file &&
1863 reg != BRW_ARF_NULL) ||
1864 (BRW_ARCHITECTURE_REGISTER_FILE == dst_file &&
1865 dst_reg != BRW_ARF_NULL),
1866 "Architecture registers cannot be used when the execution "
1867 "type is 64-bit");
1868 }
1869 }
1870
1871 /* The PRMs say that for BDW, SKL:
1872 *
1873 * If Align16 is required for an operation with QW destination and non-QW
1874 * source datatypes, the execution size cannot exceed 2.
1875 *
1876 * We assume that the restriction applies to all Gen8+ parts.
1877 */
1878 if (devinfo->gen >= 8) {
1879 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
1880 enum brw_reg_type src1_type =
1881 num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type;
1882 unsigned src0_type_size = brw_reg_type_to_size(src0_type);
1883 unsigned src1_type_size = brw_reg_type_to_size(src1_type);
1884
1885 ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 &&
1886 dst_type_size == 8 &&
1887 (src0_type_size != 8 || src1_type_size != 8) &&
1888 brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2,
1889 "In Align16 exec size cannot exceed 2 with a QWord destination "
1890 "and a non-QWord source");
1891 }
1892
1893 /* The PRMs say that for CHV, BXT:
1894 *
1895 * When source or destination datatype is 64b or operation is integer
1896 * DWord multiply, DepCtrl must not be used.
1897 *
1898 * We assume that the restriction applies to GLK as well.
1899 */
1900 if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
1901 ERROR_IF(brw_inst_no_dd_check(devinfo, inst) ||
1902 brw_inst_no_dd_clear(devinfo, inst),
1903 "DepCtrl is not allowed when the execution type is 64-bit");
1904 }
1905
1906 return error_msg;
1907 }
1908
1909 static struct string
instruction_restrictions(const struct gen_device_info * devinfo,const brw_inst * inst)1910 instruction_restrictions(const struct gen_device_info *devinfo,
1911 const brw_inst *inst)
1912 {
1913 struct string error_msg = { .str = NULL, .len = 0 };
1914
1915 /* From GEN:BUG:1604601757:
1916 *
1917 * "When multiplying a DW and any lower precision integer, source modifier
1918 * is not supported."
1919 */
1920 if (devinfo->gen >= 12 &&
1921 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL) {
1922 enum brw_reg_type exec_type = execution_type(devinfo, inst);
1923 const bool src0_valid = type_sz(brw_inst_src0_type(devinfo, inst)) == 4 ||
1924 brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE ||
1925 !(brw_inst_src0_negate(devinfo, inst) ||
1926 brw_inst_src0_abs(devinfo, inst));
1927 const bool src1_valid = type_sz(brw_inst_src1_type(devinfo, inst)) == 4 ||
1928 brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE ||
1929 !(brw_inst_src1_negate(devinfo, inst) ||
1930 brw_inst_src1_abs(devinfo, inst));
1931
1932 ERROR_IF(!brw_reg_type_is_floating_point(exec_type) &&
1933 type_sz(exec_type) == 4 && !(src0_valid && src1_valid),
1934 "When multiplying a DW and any lower precision integer, source "
1935 "modifier is not supported.");
1936 }
1937
1938 return error_msg;
1939 }
1940
1941 bool
brw_validate_instruction(const struct gen_device_info * devinfo,const brw_inst * inst,int offset,struct disasm_info * disasm)1942 brw_validate_instruction(const struct gen_device_info *devinfo,
1943 const brw_inst *inst, int offset,
1944 struct disasm_info *disasm)
1945 {
1946 struct string error_msg = { .str = NULL, .len = 0 };
1947
1948 if (is_unsupported_inst(devinfo, inst)) {
1949 ERROR("Instruction not supported on this Gen");
1950 } else {
1951 CHECK(invalid_values);
1952
1953 if (error_msg.str == NULL) {
1954 CHECK(sources_not_null);
1955 CHECK(send_restrictions);
1956 CHECK(alignment_supported);
1957 CHECK(general_restrictions_based_on_operand_types);
1958 CHECK(general_restrictions_on_region_parameters);
1959 CHECK(special_restrictions_for_mixed_float_mode);
1960 CHECK(region_alignment_rules);
1961 CHECK(vector_immediate_restrictions);
1962 CHECK(special_requirements_for_handling_double_precision_data_types);
1963 CHECK(instruction_restrictions);
1964 }
1965 }
1966
1967 if (error_msg.str && disasm) {
1968 disasm_insert_error(disasm, offset, error_msg.str);
1969 }
1970 free(error_msg.str);
1971
1972 return error_msg.len == 0;
1973 }
1974
1975 bool
brw_validate_instructions(const struct gen_device_info * devinfo,const void * assembly,int start_offset,int end_offset,struct disasm_info * disasm)1976 brw_validate_instructions(const struct gen_device_info *devinfo,
1977 const void *assembly, int start_offset, int end_offset,
1978 struct disasm_info *disasm)
1979 {
1980 bool valid = true;
1981
1982 for (int src_offset = start_offset; src_offset < end_offset;) {
1983 const brw_inst *inst = assembly + src_offset;
1984 bool is_compact = brw_inst_cmpt_control(devinfo, inst);
1985 unsigned inst_size = is_compact ? sizeof(brw_compact_inst)
1986 : sizeof(brw_inst);
1987 brw_inst uncompacted;
1988
1989 if (is_compact) {
1990 brw_compact_inst *compacted = (void *)inst;
1991 brw_uncompact_instruction(devinfo, &uncompacted, compacted);
1992 inst = &uncompacted;
1993 }
1994
1995 bool v = brw_validate_instruction(devinfo, inst, src_offset, disasm);
1996 valid = valid && v;
1997
1998 src_offset += inst_size;
1999 }
2000
2001 return valid;
2002 }
2003