1 /*
2 * Copyright © 2015-2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_eu_validate.c
25 *
26 * This file implements a pass that validates shader assembly.
27 *
28 * The restrictions implemented herein are intended to verify that instructions
29 * in shader assembly do not violate restrictions documented in the graphics
30 * programming reference manuals.
31 *
32 * The restrictions are difficult for humans to quickly verify due to their
33 * complexity and abundance.
34 *
35 * It is critical that this code is thoroughly unit tested because false
36 * results will lead developers astray, which is worse than having no validator
37 * at all. Functional changes to this file without corresponding unit tests (in
38 * test_eu_validate.cpp) will be rejected.
39 */
40
41 #include <stdlib.h>
42 #include "brw_eu.h"
43 #include "brw_disasm_info.h"
44
45 /* We're going to do lots of string concatenation, so this should help. */
46 struct string {
47 char *str;
48 size_t len;
49 };
50
51 static void
cat(struct string * dest,const struct string src)52 cat(struct string *dest, const struct string src)
53 {
54 dest->str = realloc(dest->str, dest->len + src.len + 1);
55 memcpy(dest->str + dest->len, src.str, src.len);
56 dest->str[dest->len + src.len] = '\0';
57 dest->len = dest->len + src.len;
58 }
59 #define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)})
60
61 static bool
contains(const struct string haystack,const struct string needle)62 contains(const struct string haystack, const struct string needle)
63 {
64 return haystack.str && memmem(haystack.str, haystack.len,
65 needle.str, needle.len) != NULL;
66 }
67 #define CONTAINS(haystack, needle) \
68 contains(haystack, (struct string){needle, strlen(needle)})
69
70 #define error(str) "\tERROR: " str "\n"
71 #define ERROR_INDENT "\t "
72
73 #define ERROR(msg) ERROR_IF(true, msg)
74 #define ERROR_IF(cond, msg) \
75 do { \
76 if ((cond) && !CONTAINS(error_msg, error(msg))) { \
77 CAT(error_msg, error(msg)); \
78 } \
79 } while(0)
80
81 #define CHECK(func, args...) \
82 do { \
83 struct string __msg = func(isa, inst, ##args); \
84 if (__msg.str) { \
85 cat(&error_msg, __msg); \
86 free(__msg.str); \
87 } \
88 } while (0)
89
90 #define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0)
91 #define WIDTH(width) (1 << (width))
92
93 static bool
inst_is_send(const struct brw_isa_info * isa,const brw_inst * inst)94 inst_is_send(const struct brw_isa_info *isa, const brw_inst *inst)
95 {
96 switch (brw_inst_opcode(isa, inst)) {
97 case BRW_OPCODE_SEND:
98 case BRW_OPCODE_SENDC:
99 case BRW_OPCODE_SENDS:
100 case BRW_OPCODE_SENDSC:
101 return true;
102 default:
103 return false;
104 }
105 }
106
107 static bool
inst_is_split_send(const struct brw_isa_info * isa,const brw_inst * inst)108 inst_is_split_send(const struct brw_isa_info *isa, const brw_inst *inst)
109 {
110 const struct intel_device_info *devinfo = isa->devinfo;
111
112 if (devinfo->ver >= 12) {
113 return inst_is_send(isa, inst);
114 } else {
115 switch (brw_inst_opcode(isa, inst)) {
116 case BRW_OPCODE_SENDS:
117 case BRW_OPCODE_SENDSC:
118 return true;
119 default:
120 return false;
121 }
122 }
123 }
124
125 static unsigned
signed_type(unsigned type)126 signed_type(unsigned type)
127 {
128 switch (type) {
129 case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_TYPE_D;
130 case BRW_REGISTER_TYPE_UW: return BRW_REGISTER_TYPE_W;
131 case BRW_REGISTER_TYPE_UB: return BRW_REGISTER_TYPE_B;
132 case BRW_REGISTER_TYPE_UQ: return BRW_REGISTER_TYPE_Q;
133 default: return type;
134 }
135 }
136
137 static enum brw_reg_type
inst_dst_type(const struct brw_isa_info * isa,const brw_inst * inst)138 inst_dst_type(const struct brw_isa_info *isa, const brw_inst *inst)
139 {
140 const struct intel_device_info *devinfo = isa->devinfo;
141
142 return (devinfo->ver < 12 || !inst_is_send(isa, inst)) ?
143 brw_inst_dst_type(devinfo, inst) : BRW_REGISTER_TYPE_D;
144 }
145
146 static bool
inst_is_raw_move(const struct brw_isa_info * isa,const brw_inst * inst)147 inst_is_raw_move(const struct brw_isa_info *isa, const brw_inst *inst)
148 {
149 const struct intel_device_info *devinfo = isa->devinfo;
150
151 unsigned dst_type = signed_type(inst_dst_type(isa, inst));
152 unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst));
153
154 if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
155 /* FIXME: not strictly true */
156 if (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_VF ||
157 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UV ||
158 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_V) {
159 return false;
160 }
161 } else if (brw_inst_src0_negate(devinfo, inst) ||
162 brw_inst_src0_abs(devinfo, inst)) {
163 return false;
164 }
165
166 return brw_inst_opcode(isa, inst) == BRW_OPCODE_MOV &&
167 brw_inst_saturate(devinfo, inst) == 0 &&
168 dst_type == src_type;
169 }
170
171 static bool
dst_is_null(const struct intel_device_info * devinfo,const brw_inst * inst)172 dst_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
173 {
174 return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
175 brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
176 }
177
178 static bool
src0_is_null(const struct intel_device_info * devinfo,const brw_inst * inst)179 src0_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
180 {
181 return brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT &&
182 brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
183 brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
184 }
185
186 static bool
src1_is_null(const struct intel_device_info * devinfo,const brw_inst * inst)187 src1_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
188 {
189 return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
190 brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
191 }
192
193 static bool
src0_is_acc(const struct intel_device_info * devinfo,const brw_inst * inst)194 src0_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst)
195 {
196 return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
197 (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
198 }
199
200 static bool
src1_is_acc(const struct intel_device_info * devinfo,const brw_inst * inst)201 src1_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst)
202 {
203 return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
204 (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
205 }
206
207 static bool
src0_has_scalar_region(const struct intel_device_info * devinfo,const brw_inst * inst)208 src0_has_scalar_region(const struct intel_device_info *devinfo,
209 const brw_inst *inst)
210 {
211 return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
212 brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 &&
213 brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
214 }
215
216 static bool
src1_has_scalar_region(const struct intel_device_info * devinfo,const brw_inst * inst)217 src1_has_scalar_region(const struct intel_device_info *devinfo,
218 const brw_inst *inst)
219 {
220 return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
221 brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 &&
222 brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
223 }
224
225 static struct string
invalid_values(const struct brw_isa_info * isa,const brw_inst * inst)226 invalid_values(const struct brw_isa_info *isa, const brw_inst *inst)
227 {
228 const struct intel_device_info *devinfo = isa->devinfo;
229
230 unsigned num_sources = brw_num_sources_from_inst(isa, inst);
231 struct string error_msg = { .str = NULL, .len = 0 };
232
233 switch ((enum brw_execution_size) brw_inst_exec_size(devinfo, inst)) {
234 case BRW_EXECUTE_1:
235 case BRW_EXECUTE_2:
236 case BRW_EXECUTE_4:
237 case BRW_EXECUTE_8:
238 case BRW_EXECUTE_16:
239 case BRW_EXECUTE_32:
240 break;
241 default:
242 ERROR("invalid execution size");
243 break;
244 }
245
246 if (error_msg.str)
247 return error_msg;
248
249 if (devinfo->ver >= 12) {
250 unsigned group_size = 1 << brw_inst_exec_size(devinfo, inst);
251 unsigned qtr_ctrl = brw_inst_qtr_control(devinfo, inst);
252 unsigned nib_ctrl = brw_inst_nib_control(devinfo, inst);
253
254 unsigned chan_off = (qtr_ctrl * 2 + nib_ctrl) << 2;
255 ERROR_IF(chan_off % group_size != 0,
256 "The execution size must be a factor of the chosen offset");
257 }
258
259 if (inst_is_send(isa, inst))
260 return error_msg;
261
262 if (error_msg.str)
263 return error_msg;
264
265 if (num_sources == 3) {
266 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
267 if (devinfo->ver >= 10) {
268 ERROR_IF(brw_inst_3src_a1_dst_type (devinfo, inst) == INVALID_REG_TYPE ||
269 brw_inst_3src_a1_src0_type(devinfo, inst) == INVALID_REG_TYPE ||
270 brw_inst_3src_a1_src1_type(devinfo, inst) == INVALID_REG_TYPE ||
271 brw_inst_3src_a1_src2_type(devinfo, inst) == INVALID_REG_TYPE,
272 "invalid register type encoding");
273 } else {
274 ERROR("Align1 mode not allowed on Gen < 10");
275 }
276 } else {
277 ERROR_IF(brw_inst_3src_a16_dst_type(devinfo, inst) == INVALID_REG_TYPE ||
278 brw_inst_3src_a16_src_type(devinfo, inst) == INVALID_REG_TYPE,
279 "invalid register type encoding");
280 }
281 } else {
282 ERROR_IF(brw_inst_dst_type (devinfo, inst) == INVALID_REG_TYPE ||
283 (num_sources > 0 &&
284 brw_inst_src0_type(devinfo, inst) == INVALID_REG_TYPE) ||
285 (num_sources > 1 &&
286 brw_inst_src1_type(devinfo, inst) == INVALID_REG_TYPE),
287 "invalid register type encoding");
288 }
289
290 return error_msg;
291 }
292
293 static struct string
sources_not_null(const struct brw_isa_info * isa,const brw_inst * inst)294 sources_not_null(const struct brw_isa_info *isa,
295 const brw_inst *inst)
296 {
297 const struct intel_device_info *devinfo = isa->devinfo;
298 unsigned num_sources = brw_num_sources_from_inst(isa, inst);
299 struct string error_msg = { .str = NULL, .len = 0 };
300
301 /* Nothing to test. 3-src instructions can only have GRF sources, and
302 * there's no bit to control the file.
303 */
304 if (num_sources == 3)
305 return (struct string){};
306
307 /* Nothing to test. Split sends can only encode a file in sources that are
308 * allowed to be NULL.
309 */
310 if (inst_is_split_send(isa, inst))
311 return (struct string){};
312
313 if (num_sources >= 1 && brw_inst_opcode(isa, inst) != BRW_OPCODE_SYNC)
314 ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
315
316 if (num_sources == 2)
317 ERROR_IF(src1_is_null(devinfo, inst), "src1 is null");
318
319 return error_msg;
320 }
321
322 static struct string
alignment_supported(const struct brw_isa_info * isa,const brw_inst * inst)323 alignment_supported(const struct brw_isa_info *isa,
324 const brw_inst *inst)
325 {
326 const struct intel_device_info *devinfo = isa->devinfo;
327 struct string error_msg = { .str = NULL, .len = 0 };
328
329 ERROR_IF(devinfo->ver >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16,
330 "Align16 not supported");
331
332 return error_msg;
333 }
334
335 static bool
inst_uses_src_acc(const struct brw_isa_info * isa,const brw_inst * inst)336 inst_uses_src_acc(const struct brw_isa_info *isa,
337 const brw_inst *inst)
338 {
339 const struct intel_device_info *devinfo = isa->devinfo;
340
341 /* Check instructions that use implicit accumulator sources */
342 switch (brw_inst_opcode(isa, inst)) {
343 case BRW_OPCODE_MAC:
344 case BRW_OPCODE_MACH:
345 case BRW_OPCODE_SADA2:
346 return true;
347 default:
348 break;
349 }
350
351 /* FIXME: support 3-src instructions */
352 unsigned num_sources = brw_num_sources_from_inst(isa, inst);
353 assert(num_sources < 3);
354
355 return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst));
356 }
357
358 static struct string
send_restrictions(const struct brw_isa_info * isa,const brw_inst * inst)359 send_restrictions(const struct brw_isa_info *isa,
360 const brw_inst *inst)
361 {
362 const struct intel_device_info *devinfo = isa->devinfo;
363
364 struct string error_msg = { .str = NULL, .len = 0 };
365
366 if (inst_is_split_send(isa, inst)) {
367 ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
368 brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL,
369 "src1 of split send must be a GRF or NULL");
370
371 ERROR_IF(brw_inst_eot(devinfo, inst) &&
372 brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
373 "send with EOT must use g112-g127");
374 ERROR_IF(brw_inst_eot(devinfo, inst) &&
375 brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE &&
376 brw_inst_send_src1_reg_nr(devinfo, inst) < 112,
377 "send with EOT must use g112-g127");
378
379 if (brw_inst_send_src0_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE &&
380 brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) {
381 /* Assume minimums if we don't know */
382 unsigned mlen = 1;
383 if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) {
384 const uint32_t desc = brw_inst_send_desc(devinfo, inst);
385 mlen = brw_message_desc_mlen(devinfo, desc) / reg_unit(devinfo);
386 }
387
388 unsigned ex_mlen = 1;
389 if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) {
390 const uint32_t ex_desc = brw_inst_sends_ex_desc(devinfo, inst);
391 ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc) /
392 reg_unit(devinfo);
393 }
394 const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst);
395 const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst);
396 ERROR_IF((src0_reg_nr <= src1_reg_nr &&
397 src1_reg_nr < src0_reg_nr + mlen) ||
398 (src1_reg_nr <= src0_reg_nr &&
399 src0_reg_nr < src1_reg_nr + ex_mlen),
400 "split send payloads must not overlap");
401 }
402 } else if (inst_is_send(isa, inst)) {
403 ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
404 "send must use direct addressing");
405
406 ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != BRW_GENERAL_REGISTER_FILE,
407 "send from non-GRF");
408 ERROR_IF(brw_inst_eot(devinfo, inst) &&
409 brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
410 "send with EOT must use g112-g127");
411
412 ERROR_IF(!dst_is_null(devinfo, inst) &&
413 (brw_inst_dst_da_reg_nr(devinfo, inst) +
414 brw_inst_rlen(devinfo, inst) > 127) &&
415 (brw_inst_src0_da_reg_nr(devinfo, inst) +
416 brw_inst_mlen(devinfo, inst) >
417 brw_inst_dst_da_reg_nr(devinfo, inst)),
418 "r127 must not be used for return address when there is "
419 "a src and dest overlap");
420 }
421
422 return error_msg;
423 }
424
425 static bool
is_unsupported_inst(const struct brw_isa_info * isa,const brw_inst * inst)426 is_unsupported_inst(const struct brw_isa_info *isa,
427 const brw_inst *inst)
428 {
429 return brw_inst_opcode(isa, inst) == BRW_OPCODE_ILLEGAL;
430 }
431
432 /**
433 * Returns whether a combination of two types would qualify as mixed float
434 * operation mode
435 */
436 static inline bool
types_are_mixed_float(enum brw_reg_type t0,enum brw_reg_type t1)437 types_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1)
438 {
439 return (t0 == BRW_REGISTER_TYPE_F && t1 == BRW_REGISTER_TYPE_HF) ||
440 (t1 == BRW_REGISTER_TYPE_F && t0 == BRW_REGISTER_TYPE_HF);
441 }
442
443 static enum brw_reg_type
execution_type_for_type(enum brw_reg_type type)444 execution_type_for_type(enum brw_reg_type type)
445 {
446 switch (type) {
447 case BRW_REGISTER_TYPE_NF:
448 case BRW_REGISTER_TYPE_DF:
449 case BRW_REGISTER_TYPE_F:
450 case BRW_REGISTER_TYPE_HF:
451 return type;
452
453 case BRW_REGISTER_TYPE_VF:
454 return BRW_REGISTER_TYPE_F;
455
456 case BRW_REGISTER_TYPE_Q:
457 case BRW_REGISTER_TYPE_UQ:
458 return BRW_REGISTER_TYPE_Q;
459
460 case BRW_REGISTER_TYPE_D:
461 case BRW_REGISTER_TYPE_UD:
462 return BRW_REGISTER_TYPE_D;
463
464 case BRW_REGISTER_TYPE_W:
465 case BRW_REGISTER_TYPE_UW:
466 case BRW_REGISTER_TYPE_B:
467 case BRW_REGISTER_TYPE_UB:
468 case BRW_REGISTER_TYPE_V:
469 case BRW_REGISTER_TYPE_UV:
470 return BRW_REGISTER_TYPE_W;
471 }
472 unreachable("not reached");
473 }
474
475 /**
476 * Returns the execution type of an instruction \p inst
477 */
478 static enum brw_reg_type
execution_type(const struct brw_isa_info * isa,const brw_inst * inst)479 execution_type(const struct brw_isa_info *isa, const brw_inst *inst)
480 {
481 const struct intel_device_info *devinfo = isa->devinfo;
482
483 unsigned num_sources = brw_num_sources_from_inst(isa, inst);
484 enum brw_reg_type src0_exec_type, src1_exec_type;
485
486 /* Execution data type is independent of destination data type, except in
487 * mixed F/HF instructions.
488 */
489 enum brw_reg_type dst_exec_type = inst_dst_type(isa, inst);
490
491 src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst));
492 if (num_sources == 1) {
493 if (src0_exec_type == BRW_REGISTER_TYPE_HF)
494 return dst_exec_type;
495 return src0_exec_type;
496 }
497
498 src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst));
499 if (types_are_mixed_float(src0_exec_type, src1_exec_type) ||
500 types_are_mixed_float(src0_exec_type, dst_exec_type) ||
501 types_are_mixed_float(src1_exec_type, dst_exec_type)) {
502 return BRW_REGISTER_TYPE_F;
503 }
504
505 if (src0_exec_type == src1_exec_type)
506 return src0_exec_type;
507
508 if (src0_exec_type == BRW_REGISTER_TYPE_NF ||
509 src1_exec_type == BRW_REGISTER_TYPE_NF)
510 return BRW_REGISTER_TYPE_NF;
511
512 if (src0_exec_type == BRW_REGISTER_TYPE_Q ||
513 src1_exec_type == BRW_REGISTER_TYPE_Q)
514 return BRW_REGISTER_TYPE_Q;
515
516 if (src0_exec_type == BRW_REGISTER_TYPE_D ||
517 src1_exec_type == BRW_REGISTER_TYPE_D)
518 return BRW_REGISTER_TYPE_D;
519
520 if (src0_exec_type == BRW_REGISTER_TYPE_W ||
521 src1_exec_type == BRW_REGISTER_TYPE_W)
522 return BRW_REGISTER_TYPE_W;
523
524 if (src0_exec_type == BRW_REGISTER_TYPE_DF ||
525 src1_exec_type == BRW_REGISTER_TYPE_DF)
526 return BRW_REGISTER_TYPE_DF;
527
528 unreachable("not reached");
529 }
530
531 /**
532 * Returns whether a region is packed
533 *
534 * A region is packed if its elements are adjacent in memory, with no
535 * intervening space, no overlap, and no replicated values.
536 */
537 static bool
is_packed(unsigned vstride,unsigned width,unsigned hstride)538 is_packed(unsigned vstride, unsigned width, unsigned hstride)
539 {
540 if (vstride == width) {
541 if (vstride == 1) {
542 return hstride == 0;
543 } else {
544 return hstride == 1;
545 }
546 }
547
548 return false;
549 }
550
551 /**
552 * Returns whether a region is linear
553 *
554 * A region is linear if its elements do not overlap and are not replicated.
555 * Unlike a packed region, intervening space (i.e. strided values) is allowed.
556 */
557 static bool
is_linear(unsigned vstride,unsigned width,unsigned hstride)558 is_linear(unsigned vstride, unsigned width, unsigned hstride)
559 {
560 return vstride == width * hstride ||
561 (hstride == 0 && width == 1);
562 }
563
564 /**
565 * Returns whether an instruction is an explicit or implicit conversion
566 * to/from half-float.
567 */
568 static bool
is_half_float_conversion(const struct brw_isa_info * isa,const brw_inst * inst)569 is_half_float_conversion(const struct brw_isa_info *isa,
570 const brw_inst *inst)
571 {
572 const struct intel_device_info *devinfo = isa->devinfo;
573
574 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
575
576 unsigned num_sources = brw_num_sources_from_inst(isa, inst);
577 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
578
579 if (dst_type != src0_type &&
580 (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) {
581 return true;
582 } else if (num_sources > 1) {
583 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
584 return dst_type != src1_type &&
585 (dst_type == BRW_REGISTER_TYPE_HF ||
586 src1_type == BRW_REGISTER_TYPE_HF);
587 }
588
589 return false;
590 }
591
592 /*
593 * Returns whether an instruction is using mixed float operation mode
594 */
595 static bool
is_mixed_float(const struct brw_isa_info * isa,const brw_inst * inst)596 is_mixed_float(const struct brw_isa_info *isa, const brw_inst *inst)
597 {
598 const struct intel_device_info *devinfo = isa->devinfo;
599
600 if (inst_is_send(isa, inst))
601 return false;
602
603 unsigned opcode = brw_inst_opcode(isa, inst);
604 const struct opcode_desc *desc = brw_opcode_desc(isa, opcode);
605 if (desc->ndst == 0)
606 return false;
607
608 /* FIXME: support 3-src instructions */
609 unsigned num_sources = brw_num_sources_from_inst(isa, inst);
610 assert(num_sources < 3);
611
612 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
613 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
614
615 if (num_sources == 1)
616 return types_are_mixed_float(src0_type, dst_type);
617
618 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
619
620 return types_are_mixed_float(src0_type, src1_type) ||
621 types_are_mixed_float(src0_type, dst_type) ||
622 types_are_mixed_float(src1_type, dst_type);
623 }
624
625 /**
626 * Returns whether an instruction is an explicit or implicit conversion
627 * to/from byte.
628 */
629 static bool
is_byte_conversion(const struct brw_isa_info * isa,const brw_inst * inst)630 is_byte_conversion(const struct brw_isa_info *isa,
631 const brw_inst *inst)
632 {
633 const struct intel_device_info *devinfo = isa->devinfo;
634
635 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
636
637 unsigned num_sources = brw_num_sources_from_inst(isa, inst);
638 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
639
640 if (dst_type != src0_type &&
641 (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) {
642 return true;
643 } else if (num_sources > 1) {
644 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
645 return dst_type != src1_type &&
646 (type_sz(dst_type) == 1 || type_sz(src1_type) == 1);
647 }
648
649 return false;
650 }
651
652 /**
653 * Checks restrictions listed in "General Restrictions Based on Operand Types"
654 * in the "Register Region Restrictions" section.
655 */
656 static struct string
general_restrictions_based_on_operand_types(const struct brw_isa_info * isa,const brw_inst * inst)657 general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
658 const brw_inst *inst)
659 {
660 const struct intel_device_info *devinfo = isa->devinfo;
661
662 const struct opcode_desc *desc =
663 brw_opcode_desc(isa, brw_inst_opcode(isa, inst));
664 unsigned num_sources = brw_num_sources_from_inst(isa, inst);
665 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
666 struct string error_msg = { .str = NULL, .len = 0 };
667
668 if (inst_is_send(isa, inst))
669 return error_msg;
670
671 if (devinfo->ver >= 11) {
672 /* A register type of B or UB for DPAS actually means 4 bytes packed into
673 * a D or UD, so it is allowed.
674 */
675 if (num_sources == 3 && brw_inst_opcode(isa, inst) != BRW_OPCODE_DPAS) {
676 ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 ||
677 brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1,
678 "Byte data type is not supported for src1/2 register regioning. This includes "
679 "byte broadcast as well.");
680 }
681 if (num_sources == 2) {
682 ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1,
683 "Byte data type is not supported for src1 register regioning. This includes "
684 "byte broadcast as well.");
685 }
686 }
687
688 enum brw_reg_type dst_type;
689
690 if (num_sources == 3) {
691 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1)
692 dst_type = brw_inst_3src_a1_dst_type(devinfo, inst);
693 else
694 dst_type = brw_inst_3src_a16_dst_type(devinfo, inst);
695 } else {
696 dst_type = inst_dst_type(isa, inst);
697 }
698
699 ERROR_IF(dst_type == BRW_REGISTER_TYPE_DF &&
700 !devinfo->has_64bit_float,
701 "64-bit float destination, but platform does not support it");
702
703 ERROR_IF((dst_type == BRW_REGISTER_TYPE_Q ||
704 dst_type == BRW_REGISTER_TYPE_UQ) &&
705 !devinfo->has_64bit_int,
706 "64-bit int destination, but platform does not support it");
707
708 for (unsigned s = 0; s < num_sources; s++) {
709 enum brw_reg_type src_type;
710 if (num_sources == 3) {
711 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
712 switch (s) {
713 case 0: src_type = brw_inst_3src_a1_src0_type(devinfo, inst); break;
714 case 1: src_type = brw_inst_3src_a1_src1_type(devinfo, inst); break;
715 case 2: src_type = brw_inst_3src_a1_src2_type(devinfo, inst); break;
716 default: unreachable("invalid src");
717 }
718 } else {
719 src_type = brw_inst_3src_a16_src_type(devinfo, inst);
720 }
721 } else {
722 switch (s) {
723 case 0: src_type = brw_inst_src0_type(devinfo, inst); break;
724 case 1: src_type = brw_inst_src1_type(devinfo, inst); break;
725 default: unreachable("invalid src");
726 }
727 }
728
729 ERROR_IF(src_type == BRW_REGISTER_TYPE_DF &&
730 !devinfo->has_64bit_float,
731 "64-bit float source, but platform does not support it");
732
733 ERROR_IF((src_type == BRW_REGISTER_TYPE_Q ||
734 src_type == BRW_REGISTER_TYPE_UQ) &&
735 !devinfo->has_64bit_int,
736 "64-bit int source, but platform does not support it");
737 }
738
739 if (num_sources == 3)
740 return error_msg;
741
742 if (exec_size == 1)
743 return error_msg;
744
745 if (desc->ndst == 0)
746 return error_msg;
747
748 /* The PRMs say:
749 *
750 * Where n is the largest element size in bytes for any source or
751 * destination operand type, ExecSize * n must be <= 64.
752 *
753 * But we do not attempt to enforce it, because it is implied by other
754 * rules:
755 *
756 * - that the destination stride must match the execution data type
757 * - sources may not span more than two adjacent GRF registers
758 * - destination may not span more than two adjacent GRF registers
759 *
760 * In fact, checking it would weaken testing of the other rules.
761 */
762
763 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
764 bool dst_type_is_byte =
765 inst_dst_type(isa, inst) == BRW_REGISTER_TYPE_B ||
766 inst_dst_type(isa, inst) == BRW_REGISTER_TYPE_UB;
767
768 if (dst_type_is_byte) {
769 if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) {
770 if (!inst_is_raw_move(isa, inst))
771 ERROR("Only raw MOV supports a packed-byte destination");
772 return error_msg;
773 }
774 }
775
776 unsigned exec_type = execution_type(isa, inst);
777 unsigned exec_type_size = brw_reg_type_to_size(exec_type);
778 unsigned dst_type_size = brw_reg_type_to_size(dst_type);
779
780 if (is_byte_conversion(isa, inst)) {
781 /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
782 *
783 * "There is no direct conversion from B/UB to DF or DF to B/UB.
784 * There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB."
785 *
786 * Even if these restrictions are listed for the MOV instruction, we
787 * validate this more generally, since there is the possibility
788 * of implicit conversions from other instructions.
789 */
790 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
791 enum brw_reg_type src1_type = num_sources > 1 ?
792 brw_inst_src1_type(devinfo, inst) : 0;
793
794 ERROR_IF(type_sz(dst_type) == 1 &&
795 (type_sz(src0_type) == 8 ||
796 (num_sources > 1 && type_sz(src1_type) == 8)),
797 "There are no direct conversions between 64-bit types and B/UB");
798
799 ERROR_IF(type_sz(dst_type) == 8 &&
800 (type_sz(src0_type) == 1 ||
801 (num_sources > 1 && type_sz(src1_type) == 1)),
802 "There are no direct conversions between 64-bit types and B/UB");
803 }
804
805 if (is_half_float_conversion(isa, inst)) {
806 /**
807 * A helper to validate used in the validation of the following restriction
808 * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
809 *
810 * "There is no direct conversion from HF to DF or DF to HF.
811 * There is no direct conversion from HF to Q/UQ or Q/UQ to HF."
812 *
813 * Even if these restrictions are listed for the MOV instruction, we
814 * validate this more generally, since there is the possibility
815 * of implicit conversions from other instructions, such us implicit
816 * conversion from integer to HF with the ADD instruction in SKL+.
817 */
818 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
819 enum brw_reg_type src1_type = num_sources > 1 ?
820 brw_inst_src1_type(devinfo, inst) : 0;
821 ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF &&
822 (type_sz(src0_type) == 8 ||
823 (num_sources > 1 && type_sz(src1_type) == 8)),
824 "There are no direct conversions between 64-bit types and HF");
825
826 ERROR_IF(type_sz(dst_type) == 8 &&
827 (src0_type == BRW_REGISTER_TYPE_HF ||
828 (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)),
829 "There are no direct conversions between 64-bit types and HF");
830
831 /* From the BDW+ PRM:
832 *
833 * "Conversion between Integer and HF (Half Float) must be
834 * DWord-aligned and strided by a DWord on the destination."
835 *
836 * Also, the above restrictions seems to be expanded on CHV and SKL+ by:
837 *
838 * "There is a relaxed alignment rule for word destinations. When
839 * the destination type is word (UW, W, HF), destination data types
840 * can be aligned to either the lowest word or the second lowest
841 * word of the execution channel. This means the destination data
842 * words can be either all in the even word locations or all in the
843 * odd word locations."
844 *
845 * We do not implement the second rule as is though, since empirical
846 * testing shows inconsistencies:
847 * - It suggests that packed 16-bit is not allowed, which is not true.
848 * - It suggests that conversions from Q/DF to W (which need to be
849 * 64-bit aligned on the destination) are not possible, which is
850 * not true.
851 *
852 * So from this rule we only validate the implication that conversions
853 * from F to HF need to be DWord strided (except in Align1 mixed
854 * float mode where packed fp16 destination is allowed so long as the
855 * destination is oword-aligned).
856 *
857 * Finally, we only validate this for Align1 because Align16 always
858 * requires packed destinations, so these restrictions can't possibly
859 * apply to Align16 mode.
860 */
861 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
862 if ((dst_type == BRW_REGISTER_TYPE_HF &&
863 (brw_reg_type_is_integer(src0_type) ||
864 (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) ||
865 (brw_reg_type_is_integer(dst_type) &&
866 (src0_type == BRW_REGISTER_TYPE_HF ||
867 (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) {
868 ERROR_IF(dst_stride * dst_type_size != 4,
869 "Conversions between integer and half-float must be "
870 "strided by a DWord on the destination");
871
872 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
873 ERROR_IF(subreg % 4 != 0,
874 "Conversions between integer and half-float must be "
875 "aligned to a DWord on the destination");
876 } else if (dst_type == BRW_REGISTER_TYPE_HF) {
877 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
878 ERROR_IF(dst_stride != 2 &&
879 !(is_mixed_float(isa, inst) &&
880 dst_stride == 1 && subreg % 16 == 0),
881 "Conversions to HF must have either all words in even "
882 "word locations or all words in odd word locations or "
883 "be mixed-float with Oword-aligned packed destination");
884 }
885 }
886 }
887
888 /* There are special regioning rules for mixed-float mode in CHV and SKL that
889 * override the general rule for the ratio of sizes of the destination type
890 * and the execution type. We will add validation for those in a later patch.
891 */
892 bool validate_dst_size_and_exec_size_ratio = !is_mixed_float(isa, inst);
893
894 if (validate_dst_size_and_exec_size_ratio &&
895 exec_type_size > dst_type_size) {
896 if (!(dst_type_is_byte && inst_is_raw_move(isa, inst))) {
897 ERROR_IF(dst_stride * dst_type_size != exec_type_size,
898 "Destination stride must be equal to the ratio of the sizes "
899 "of the execution data type to the destination type");
900 }
901
902 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
903
904 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
905 brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) {
906 /* The i965 PRM says:
907 *
908 * Implementation Restriction: The relaxed alignment rule for byte
909 * destination (#10.5) is not supported.
910 */
911 if (dst_type_is_byte) {
912 ERROR_IF(subreg % exec_type_size != 0 &&
913 subreg % exec_type_size != 1,
914 "Destination subreg must be aligned to the size of the "
915 "execution data type (or to the next lowest byte for byte "
916 "destinations)");
917 } else {
918 ERROR_IF(subreg % exec_type_size != 0,
919 "Destination subreg must be aligned to the size of the "
920 "execution data type");
921 }
922 }
923 }
924
925 return error_msg;
926 }
927
928 /**
929 * Checks restrictions listed in "General Restrictions on Regioning Parameters"
930 * in the "Register Region Restrictions" section.
931 */
932 static struct string
general_restrictions_on_region_parameters(const struct brw_isa_info * isa,const brw_inst * inst)933 general_restrictions_on_region_parameters(const struct brw_isa_info *isa,
934 const brw_inst *inst)
935 {
936 const struct intel_device_info *devinfo = isa->devinfo;
937
938 const struct opcode_desc *desc =
939 brw_opcode_desc(isa, brw_inst_opcode(isa, inst));
940 unsigned num_sources = brw_num_sources_from_inst(isa, inst);
941 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
942 struct string error_msg = { .str = NULL, .len = 0 };
943
944 if (num_sources == 3)
945 return (struct string){};
946
947 /* Split sends don't have the bits in the instruction to encode regions so
948 * there's nothing to check.
949 */
950 if (inst_is_split_send(isa, inst))
951 return (struct string){};
952
953 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) {
954 if (desc->ndst != 0 && !dst_is_null(devinfo, inst))
955 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1,
956 "Destination Horizontal Stride must be 1");
957
958 if (num_sources >= 1) {
959 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
960 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
961 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
962 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
963 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
964 }
965
966 if (num_sources == 2) {
967 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
968 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
969 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
970 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
971 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
972 }
973
974 return error_msg;
975 }
976
977 for (unsigned i = 0; i < num_sources; i++) {
978 unsigned vstride, width, hstride, element_size, subreg;
979 enum brw_reg_type type;
980
981 #define DO_SRC(n) \
982 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \
983 BRW_IMMEDIATE_VALUE) \
984 continue; \
985 \
986 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
987 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
988 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
989 type = brw_inst_src ## n ## _type(devinfo, inst); \
990 element_size = brw_reg_type_to_size(type); \
991 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst)
992
993 if (i == 0) {
994 DO_SRC(0);
995 } else {
996 DO_SRC(1);
997 }
998 #undef DO_SRC
999
1000 /* ExecSize must be greater than or equal to Width. */
1001 ERROR_IF(exec_size < width, "ExecSize must be greater than or equal "
1002 "to Width");
1003
1004 /* If ExecSize = Width and HorzStride ≠ 0,
1005 * VertStride must be set to Width * HorzStride.
1006 */
1007 if (exec_size == width && hstride != 0) {
1008 ERROR_IF(vstride != width * hstride,
1009 "If ExecSize = Width and HorzStride ≠ 0, "
1010 "VertStride must be set to Width * HorzStride");
1011 }
1012
1013 /* If Width = 1, HorzStride must be 0 regardless of the values of
1014 * ExecSize and VertStride.
1015 */
1016 if (width == 1) {
1017 ERROR_IF(hstride != 0,
1018 "If Width = 1, HorzStride must be 0 regardless "
1019 "of the values of ExecSize and VertStride");
1020 }
1021
1022 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
1023 if (exec_size == 1 && width == 1) {
1024 ERROR_IF(vstride != 0 || hstride != 0,
1025 "If ExecSize = Width = 1, both VertStride "
1026 "and HorzStride must be 0");
1027 }
1028
1029 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the
1030 * value of ExecSize.
1031 */
1032 if (vstride == 0 && hstride == 0) {
1033 ERROR_IF(width != 1,
1034 "If VertStride = HorzStride = 0, Width must be "
1035 "1 regardless of the value of ExecSize");
1036 }
1037
1038 /* VertStride must be used to cross GRF register boundaries. This rule
1039 * implies that elements within a 'Width' cannot cross GRF boundaries.
1040 */
1041 const uint64_t mask = (1ULL << element_size) - 1;
1042 unsigned rowbase = subreg;
1043
1044 for (int y = 0; y < exec_size / width; y++) {
1045 uint64_t access_mask = 0;
1046 unsigned offset = rowbase;
1047
1048 for (int x = 0; x < width; x++) {
1049 access_mask |= mask << (offset % 64);
1050 offset += hstride * element_size;
1051 }
1052
1053 rowbase += vstride * element_size;
1054
1055 if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) {
1056 ERROR("VertStride must be used to cross GRF register boundaries");
1057 break;
1058 }
1059 }
1060 }
1061
1062 /* Dst.HorzStride must not be 0. */
1063 if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) {
1064 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0,
1065 "Destination Horizontal Stride must not be 0");
1066 }
1067
1068 return error_msg;
1069 }
1070
1071 static struct string
special_restrictions_for_mixed_float_mode(const struct brw_isa_info * isa,const brw_inst * inst)1072 special_restrictions_for_mixed_float_mode(const struct brw_isa_info *isa,
1073 const brw_inst *inst)
1074 {
1075 const struct intel_device_info *devinfo = isa->devinfo;
1076
1077 struct string error_msg = { .str = NULL, .len = 0 };
1078
1079 const unsigned opcode = brw_inst_opcode(isa, inst);
1080 const unsigned num_sources = brw_num_sources_from_inst(isa, inst);
1081 if (num_sources >= 3)
1082 return error_msg;
1083
1084 if (!is_mixed_float(isa, inst))
1085 return error_msg;
1086
1087 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
1088 bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
1089
1090 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
1091 enum brw_reg_type src1_type = num_sources > 1 ?
1092 brw_inst_src1_type(devinfo, inst) : 0;
1093 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
1094
1095 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1096 bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride);
1097
1098 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1099 * Float Operations:
1100 *
1101 * "Indirect addressing on source is not supported when source and
1102 * destination data types are mixed float."
1103 */
1104 ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT ||
1105 (num_sources > 1 &&
1106 brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT),
1107 "Indirect addressing on source is not supported when source and "
1108 "destination data types are mixed float");
1109
1110 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1111 * Float Operations:
1112 *
1113 * "No SIMD16 in mixed mode when destination is f32. Instruction
1114 * execution size must be no more than 8."
1115 */
1116 ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F,
1117 "Mixed float mode with 32-bit float destination is limited "
1118 "to SIMD8");
1119
1120 if (is_align16) {
1121 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1122 * Float Operations:
1123 *
1124 * "In Align16 mode, when half float and float data types are mixed
1125 * between source operands OR between source and destination operands,
1126 * the register content are assumed to be packed."
1127 *
1128 * Since Align16 doesn't have a concept of horizontal stride (or width),
1129 * it means that vertical stride must always be 4, since 0 and 2 would
1130 * lead to replicated data, and any other value is disallowed in Align16.
1131 */
1132 ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1133 "Align16 mixed float mode assumes packed data (vstride must be 4");
1134
1135 ERROR_IF(num_sources >= 2 &&
1136 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1137 "Align16 mixed float mode assumes packed data (vstride must be 4");
1138
1139 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1140 * Float Operations:
1141 *
1142 * "For Align16 mixed mode, both input and output packed f16 data
1143 * must be oword aligned, no oword crossing in packed f16."
1144 *
1145 * The previous rule requires that Align16 operands are always packed,
1146 * and since there is only one bit for Align16 subnr, which represents
1147 * offsets 0B and 16B, this rule is always enforced and we don't need to
1148 * validate it.
1149 */
1150
1151 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1152 * Float Operations:
1153 *
1154 * "No SIMD16 in mixed mode when destination is packed f16 for both
1155 * Align1 and Align16."
1156 *
1157 * And:
1158 *
1159 * "In Align16 mode, when half float and float data types are mixed
1160 * between source operands OR between source and destination operands,
1161 * the register content are assumed to be packed."
1162 *
1163 * Which implies that SIMD16 is not available in Align16. This is further
1164 * confirmed by:
1165 *
1166 * "For Align16 mixed mode, both input and output packed f16 data
1167 * must be oword aligned, no oword crossing in packed f16"
1168 *
1169 * Since oword-aligned packed f16 data would cross oword boundaries when
1170 * the execution size is larger than 8.
1171 */
1172 ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8");
1173
1174 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1175 * Float Operations:
1176 *
1177 * "No accumulator read access for Align16 mixed float."
1178 */
1179 ERROR_IF(inst_uses_src_acc(isa, inst),
1180 "No accumulator read access for Align16 mixed float");
1181 } else {
1182 assert(!is_align16);
1183
1184 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1185 * Float Operations:
1186 *
1187 * "No SIMD16 in mixed mode when destination is packed f16 for both
1188 * Align1 and Align16."
1189 */
1190 ERROR_IF(exec_size > 8 && dst_is_packed &&
1191 dst_type == BRW_REGISTER_TYPE_HF,
1192 "Align1 mixed float mode is limited to SIMD8 when destination "
1193 "is packed half-float");
1194
1195 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1196 * Float Operations:
1197 *
1198 * "Math operations for mixed mode:
1199 * - In Align1, f16 inputs need to be strided"
1200 */
1201 if (opcode == BRW_OPCODE_MATH) {
1202 if (src0_type == BRW_REGISTER_TYPE_HF) {
1203 ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1,
1204 "Align1 mixed mode math needs strided half-float inputs");
1205 }
1206
1207 if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) {
1208 ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1,
1209 "Align1 mixed mode math needs strided half-float inputs");
1210 }
1211 }
1212
1213 if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) {
1214 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1215 * Float Operations:
1216 *
1217 * "In Align1, destination stride can be smaller than execution
1218 * type. When destination is stride of 1, 16 bit packed data is
1219 * updated on the destination. However, output packed f16 data
1220 * must be oword aligned, no oword crossing in packed f16."
1221 *
1222 * The requirement of not crossing oword boundaries for 16-bit oword
1223 * aligned data means that execution size is limited to 8.
1224 */
1225 unsigned subreg;
1226 if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT)
1227 subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1228 else
1229 subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst);
1230 ERROR_IF(subreg % 16 != 0,
1231 "Align1 mixed mode packed half-float output must be "
1232 "oword aligned");
1233 ERROR_IF(exec_size > 8,
1234 "Align1 mixed mode packed half-float output must not "
1235 "cross oword boundaries (max exec size is 8)");
1236
1237 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1238 * Float Operations:
1239 *
1240 * "When source is float or half float from accumulator register and
1241 * destination is half float with a stride of 1, the source must
1242 * register aligned. i.e., source must have offset zero."
1243 *
1244 * Align16 mixed float mode doesn't allow accumulator access on sources,
1245 * so we only need to check this for Align1.
1246 */
1247 if (src0_is_acc(devinfo, inst) &&
1248 (src0_type == BRW_REGISTER_TYPE_F ||
1249 src0_type == BRW_REGISTER_TYPE_HF)) {
1250 ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0,
1251 "Mixed float mode requires register-aligned accumulator "
1252 "source reads when destination is packed half-float");
1253
1254 }
1255
1256 if (num_sources > 1 &&
1257 src1_is_acc(devinfo, inst) &&
1258 (src1_type == BRW_REGISTER_TYPE_F ||
1259 src1_type == BRW_REGISTER_TYPE_HF)) {
1260 ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0,
1261 "Mixed float mode requires register-aligned accumulator "
1262 "source reads when destination is packed half-float");
1263 }
1264 }
1265
1266 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1267 * Float Operations:
1268 *
1269 * "No swizzle is allowed when an accumulator is used as an implicit
1270 * source or an explicit source in an instruction. i.e. when
1271 * destination is half float with an implicit accumulator source,
1272 * destination stride needs to be 2."
1273 *
1274 * FIXME: it is not quite clear what the first sentence actually means
1275 * or its link to the implication described after it, so we only
1276 * validate the explicit implication, which is clearly described.
1277 */
1278 if (dst_type == BRW_REGISTER_TYPE_HF &&
1279 inst_uses_src_acc(isa, inst)) {
1280 ERROR_IF(dst_stride != 2,
1281 "Mixed float mode with implicit/explicit accumulator "
1282 "source and half-float destination requires a stride "
1283 "of 2 on the destination");
1284 }
1285 }
1286
1287 return error_msg;
1288 }
1289
1290 /**
1291 * Creates an \p access_mask for an \p exec_size, \p element_size, and a region
1292 *
1293 * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is
1294 * a bitmask of bytes accessed by the region.
1295 *
1296 * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4
1297 * instruction would be
1298 *
1299 * access_mask[0] = 0x00000000000000F0
1300 * access_mask[1] = 0x000000000000F000
1301 * access_mask[2] = 0x0000000000F00000
1302 * access_mask[3] = 0x00000000F0000000
1303 * access_mask[4-31] = 0
1304 *
1305 * because the first execution channel accesses bytes 7-4 and the second
1306 * execution channel accesses bytes 15-12, etc.
1307 */
1308 static void
align1_access_mask(uint64_t access_mask[static32],unsigned exec_size,unsigned element_size,unsigned subreg,unsigned vstride,unsigned width,unsigned hstride)1309 align1_access_mask(uint64_t access_mask[static 32],
1310 unsigned exec_size, unsigned element_size, unsigned subreg,
1311 unsigned vstride, unsigned width, unsigned hstride)
1312 {
1313 const uint64_t mask = (1ULL << element_size) - 1;
1314 unsigned rowbase = subreg;
1315 unsigned element = 0;
1316
1317 for (int y = 0; y < exec_size / width; y++) {
1318 unsigned offset = rowbase;
1319
1320 for (int x = 0; x < width; x++) {
1321 access_mask[element++] = mask << (offset % 64);
1322 offset += hstride * element_size;
1323 }
1324
1325 rowbase += vstride * element_size;
1326 }
1327
1328 assert(element == 0 || element == exec_size);
1329 }
1330
1331 /**
1332 * Returns the number of registers accessed according to the \p access_mask
1333 */
1334 static int
registers_read(const uint64_t access_mask[static32])1335 registers_read(const uint64_t access_mask[static 32])
1336 {
1337 int regs_read = 0;
1338
1339 for (unsigned i = 0; i < 32; i++) {
1340 if (access_mask[i] > 0xFFFFFFFF) {
1341 return 2;
1342 } else if (access_mask[i]) {
1343 regs_read = 1;
1344 }
1345 }
1346
1347 return regs_read;
1348 }
1349
1350 /**
1351 * Checks restrictions listed in "Region Alignment Rules" in the "Register
1352 * Region Restrictions" section.
1353 */
1354 static struct string
region_alignment_rules(const struct brw_isa_info * isa,const brw_inst * inst)1355 region_alignment_rules(const struct brw_isa_info *isa,
1356 const brw_inst *inst)
1357 {
1358 const struct intel_device_info *devinfo = isa->devinfo;
1359 const struct opcode_desc *desc =
1360 brw_opcode_desc(isa, brw_inst_opcode(isa, inst));
1361 unsigned num_sources = brw_num_sources_from_inst(isa, inst);
1362 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
1363 uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32];
1364 struct string error_msg = { .str = NULL, .len = 0 };
1365
1366 if (num_sources == 3)
1367 return (struct string){};
1368
1369 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16)
1370 return (struct string){};
1371
1372 if (inst_is_send(isa, inst))
1373 return (struct string){};
1374
1375 memset(dst_access_mask, 0, sizeof(dst_access_mask));
1376 memset(src0_access_mask, 0, sizeof(src0_access_mask));
1377 memset(src1_access_mask, 0, sizeof(src1_access_mask));
1378
1379 for (unsigned i = 0; i < num_sources; i++) {
1380 unsigned vstride, width, hstride, element_size, subreg;
1381 enum brw_reg_type type;
1382
1383 /* In Direct Addressing mode, a source cannot span more than 2 adjacent
1384 * GRF registers.
1385 */
1386
1387 #define DO_SRC(n) \
1388 if (brw_inst_src ## n ## _address_mode(devinfo, inst) != \
1389 BRW_ADDRESS_DIRECT) \
1390 continue; \
1391 \
1392 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \
1393 BRW_IMMEDIATE_VALUE) \
1394 continue; \
1395 \
1396 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
1397 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
1398 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
1399 type = brw_inst_src ## n ## _type(devinfo, inst); \
1400 element_size = brw_reg_type_to_size(type); \
1401 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
1402 align1_access_mask(src ## n ## _access_mask, \
1403 exec_size, element_size, subreg, \
1404 vstride, width, hstride)
1405
1406 if (i == 0) {
1407 DO_SRC(0);
1408 } else {
1409 DO_SRC(1);
1410 }
1411 #undef DO_SRC
1412
1413 unsigned num_vstride = exec_size / width;
1414 unsigned num_hstride = width;
1415 unsigned vstride_elements = (num_vstride - 1) * vstride;
1416 unsigned hstride_elements = (num_hstride - 1) * hstride;
1417 unsigned offset = (vstride_elements + hstride_elements) * element_size +
1418 subreg;
1419 ERROR_IF(offset >= 64 * reg_unit(devinfo),
1420 "A source cannot span more than 2 adjacent GRF registers");
1421 }
1422
1423 if (desc->ndst == 0 || dst_is_null(devinfo, inst))
1424 return error_msg;
1425
1426 unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1427 enum brw_reg_type dst_type = inst_dst_type(isa, inst);
1428 unsigned element_size = brw_reg_type_to_size(dst_type);
1429 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1430 unsigned offset = ((exec_size - 1) * stride * element_size) + subreg;
1431 ERROR_IF(offset >= 64 * reg_unit(devinfo),
1432 "A destination cannot span more than 2 adjacent GRF registers");
1433
1434 if (error_msg.str)
1435 return error_msg;
1436
1437 align1_access_mask(dst_access_mask, exec_size, element_size, subreg,
1438 exec_size == 1 ? 0 : exec_size * stride,
1439 exec_size == 1 ? 1 : exec_size,
1440 exec_size == 1 ? 0 : stride);
1441
1442 unsigned dst_regs = registers_read(dst_access_mask);
1443
1444 /* The SKL PRM says:
1445 *
1446 * When destination of MATH instruction spans two registers, the
1447 * destination elements must be evenly split between the two registers.
1448 *
1449 * It is not known whether this restriction applies to KBL other Gens after
1450 * SKL.
1451 */
1452 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) {
1453 if (dst_regs == 2) {
1454 unsigned upper_reg_writes = 0, lower_reg_writes = 0;
1455
1456 for (unsigned i = 0; i < exec_size; i++) {
1457 if (dst_access_mask[i] > 0xFFFFFFFF) {
1458 upper_reg_writes++;
1459 } else {
1460 assert(dst_access_mask[i] != 0);
1461 lower_reg_writes++;
1462 }
1463 }
1464
1465 ERROR_IF(upper_reg_writes != lower_reg_writes,
1466 "Writes must be evenly split between the two "
1467 "destination registers");
1468 }
1469 }
1470
1471 return error_msg;
1472 }
1473
1474 static struct string
vector_immediate_restrictions(const struct brw_isa_info * isa,const brw_inst * inst)1475 vector_immediate_restrictions(const struct brw_isa_info *isa,
1476 const brw_inst *inst)
1477 {
1478 const struct intel_device_info *devinfo = isa->devinfo;
1479
1480 unsigned num_sources = brw_num_sources_from_inst(isa, inst);
1481 struct string error_msg = { .str = NULL, .len = 0 };
1482
1483 if (num_sources == 3 || num_sources == 0 ||
1484 (devinfo->ver >= 12 && inst_is_send(isa, inst)))
1485 return (struct string){};
1486
1487 unsigned file = num_sources == 1 ?
1488 brw_inst_src0_reg_file(devinfo, inst) :
1489 brw_inst_src1_reg_file(devinfo, inst);
1490 if (file != BRW_IMMEDIATE_VALUE)
1491 return (struct string){};
1492
1493 enum brw_reg_type dst_type = inst_dst_type(isa, inst);
1494 unsigned dst_type_size = brw_reg_type_to_size(dst_type);
1495 unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ?
1496 brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0;
1497 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1498 enum brw_reg_type type = num_sources == 1 ?
1499 brw_inst_src0_type(devinfo, inst) :
1500 brw_inst_src1_type(devinfo, inst);
1501
1502 /* The PRMs say:
1503 *
1504 * When an immediate vector is used in an instruction, the destination
1505 * must be 128-bit aligned with destination horizontal stride equivalent
1506 * to a word for an immediate integer vector (v) and equivalent to a
1507 * DWord for an immediate float vector (vf).
1508 *
1509 * The text has not been updated for the addition of the immediate unsigned
1510 * integer vector type (uv) on SNB, but presumably the same restriction
1511 * applies.
1512 */
1513 switch (type) {
1514 case BRW_REGISTER_TYPE_V:
1515 case BRW_REGISTER_TYPE_UV:
1516 case BRW_REGISTER_TYPE_VF:
1517 ERROR_IF(dst_subreg % (128 / 8) != 0,
1518 "Destination must be 128-bit aligned in order to use immediate "
1519 "vector types");
1520
1521 if (type == BRW_REGISTER_TYPE_VF) {
1522 ERROR_IF(dst_type_size * dst_stride != 4,
1523 "Destination must have stride equivalent to dword in order "
1524 "to use the VF type");
1525 } else {
1526 ERROR_IF(dst_type_size * dst_stride != 2,
1527 "Destination must have stride equivalent to word in order "
1528 "to use the V or UV type");
1529 }
1530 break;
1531 default:
1532 break;
1533 }
1534
1535 return error_msg;
1536 }
1537
1538 static struct string
special_requirements_for_handling_double_precision_data_types(const struct brw_isa_info * isa,const brw_inst * inst)1539 special_requirements_for_handling_double_precision_data_types(
1540 const struct brw_isa_info *isa,
1541 const brw_inst *inst)
1542 {
1543 const struct intel_device_info *devinfo = isa->devinfo;
1544
1545 unsigned num_sources = brw_num_sources_from_inst(isa, inst);
1546 struct string error_msg = { .str = NULL, .len = 0 };
1547
1548 if (num_sources == 3 || num_sources == 0)
1549 return (struct string){};
1550
1551 /* Split sends don't have types so there's no doubles there. */
1552 if (inst_is_split_send(isa, inst))
1553 return (struct string){};
1554
1555 enum brw_reg_type exec_type = execution_type(isa, inst);
1556 unsigned exec_type_size = brw_reg_type_to_size(exec_type);
1557
1558 enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst);
1559 enum brw_reg_type dst_type = inst_dst_type(isa, inst);
1560 unsigned dst_type_size = brw_reg_type_to_size(dst_type);
1561 unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1562 unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst);
1563 unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1564 unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst);
1565
1566 bool is_integer_dword_multiply =
1567 brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL &&
1568 (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
1569 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) &&
1570 (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
1571 brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD);
1572
1573 const bool is_double_precision =
1574 dst_type_size == 8 || exec_type_size == 8 || is_integer_dword_multiply;
1575
1576 for (unsigned i = 0; i < num_sources; i++) {
1577 unsigned vstride, width, hstride, type_size, reg, subreg, address_mode;
1578 bool is_scalar_region;
1579 enum brw_reg_file file;
1580 enum brw_reg_type type;
1581
1582 #define DO_SRC(n) \
1583 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \
1584 BRW_IMMEDIATE_VALUE) \
1585 continue; \
1586 \
1587 is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst); \
1588 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
1589 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
1590 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
1591 file = brw_inst_src ## n ## _reg_file(devinfo, inst); \
1592 type = brw_inst_src ## n ## _type(devinfo, inst); \
1593 type_size = brw_reg_type_to_size(type); \
1594 reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst); \
1595 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
1596 address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst)
1597
1598 if (i == 0) {
1599 DO_SRC(0);
1600 } else {
1601 DO_SRC(1);
1602 }
1603 #undef DO_SRC
1604
1605 const unsigned src_stride = (hstride ? hstride : vstride) * type_size;
1606 const unsigned dst_stride = dst_hstride * dst_type_size;
1607
1608 /* The PRMs say that for CHV, BXT:
1609 *
1610 * When source or destination datatype is 64b or operation is integer
1611 * DWord multiply, regioning in Align1 must follow these rules:
1612 *
1613 * 1. Source and Destination horizontal stride must be aligned to the
1614 * same qword.
1615 * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
1616 * 3. Source and Destination offset must be the same, except the case
1617 * of scalar source.
1618 *
1619 * We assume that the restriction applies to GLK as well.
1620 */
1621 if (is_double_precision &&
1622 brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
1623 intel_device_info_is_9lp(devinfo)) {
1624 ERROR_IF(!is_scalar_region &&
1625 (src_stride % 8 != 0 ||
1626 dst_stride % 8 != 0 ||
1627 src_stride != dst_stride),
1628 "Source and destination horizontal stride must equal and a "
1629 "multiple of a qword when the execution type is 64-bit");
1630
1631 ERROR_IF(vstride != width * hstride,
1632 "Vstride must be Width * Hstride when the execution type is "
1633 "64-bit");
1634
1635 ERROR_IF(!is_scalar_region && dst_subreg != subreg,
1636 "Source and destination offset must be the same when the "
1637 "execution type is 64-bit");
1638 }
1639
1640 /* The PRMs say that for CHV, BXT:
1641 *
1642 * When source or destination datatype is 64b or operation is integer
1643 * DWord multiply, indirect addressing must not be used.
1644 *
1645 * We assume that the restriction applies to GLK as well.
1646 */
1647 if (is_double_precision &&
1648 intel_device_info_is_9lp(devinfo)) {
1649 ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode ||
1650 BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode,
1651 "Indirect addressing is not allowed when the execution type "
1652 "is 64-bit");
1653 }
1654
1655 /* The PRMs say that for CHV, BXT:
1656 *
1657 * ARF registers must never be used with 64b datatype or when
1658 * operation is integer DWord multiply.
1659 *
1660 * We assume that the restriction applies to GLK as well.
1661 *
1662 * We assume that the restriction does not apply to the null register.
1663 */
1664 if (is_double_precision &&
1665 intel_device_info_is_9lp(devinfo)) {
1666 ERROR_IF(brw_inst_opcode(isa, inst) == BRW_OPCODE_MAC ||
1667 brw_inst_acc_wr_control(devinfo, inst) ||
1668 (BRW_ARCHITECTURE_REGISTER_FILE == file &&
1669 reg != BRW_ARF_NULL) ||
1670 (BRW_ARCHITECTURE_REGISTER_FILE == dst_file &&
1671 dst_reg != BRW_ARF_NULL),
1672 "Architecture registers cannot be used when the execution "
1673 "type is 64-bit");
1674 }
1675
1676 /* From the hardware spec section "Register Region Restrictions":
1677 *
1678 * There are two rules:
1679 *
1680 * "In case of all floating point data types used in destination:" and
1681 *
1682 * "In case where source or destination datatype is 64b or operation is
1683 * integer DWord multiply:"
1684 *
1685 * both of which list the same restrictions:
1686 *
1687 * "1. Register Regioning patterns where register data bit location
1688 * of the LSB of the channels are changed between source and
1689 * destination are not supported on Src0 and Src1 except for
1690 * broadcast of a scalar.
1691 *
1692 * 2. Explicit ARF registers except null and accumulator must not be
1693 * used."
1694 */
1695 if (devinfo->verx10 >= 125 &&
1696 (brw_reg_type_is_floating_point(dst_type) ||
1697 is_double_precision)) {
1698 ERROR_IF(!is_scalar_region &&
1699 BRW_ADDRESS_REGISTER_INDIRECT_REGISTER != address_mode &&
1700 (!is_linear(vstride, width, hstride) ||
1701 src_stride != dst_stride ||
1702 subreg != dst_subreg),
1703 "Register Regioning patterns where register data bit "
1704 "location of the LSB of the channels are changed between "
1705 "source and destination are not supported except for "
1706 "broadcast of a scalar.");
1707
1708 ERROR_IF((address_mode == BRW_ADDRESS_DIRECT && file == BRW_ARCHITECTURE_REGISTER_FILE &&
1709 reg != BRW_ARF_NULL && !(reg >= BRW_ARF_ACCUMULATOR && reg < BRW_ARF_FLAG)) ||
1710 (dst_file == BRW_ARCHITECTURE_REGISTER_FILE &&
1711 dst_reg != BRW_ARF_NULL && dst_reg != BRW_ARF_ACCUMULATOR),
1712 "Explicit ARF registers except null and accumulator must not "
1713 "be used.");
1714 }
1715
1716 /* From the hardware spec section "Register Region Restrictions":
1717 *
1718 * "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float and
1719 * Quad-Word data must not be used."
1720 */
1721 if (devinfo->verx10 >= 125 &&
1722 (brw_reg_type_is_floating_point(type) || type_sz(type) == 8)) {
1723 ERROR_IF(address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER &&
1724 vstride == BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL,
1725 "Vx1 and VxH indirect addressing for Float, Half-Float, "
1726 "Double-Float and Quad-Word data must not be used");
1727 }
1728 }
1729
1730 /* The PRMs say that for BDW, SKL:
1731 *
1732 * If Align16 is required for an operation with QW destination and non-QW
1733 * source datatypes, the execution size cannot exceed 2.
1734 *
1735 * We assume that the restriction applies to all Gfx8+ parts.
1736 */
1737 if (is_double_precision) {
1738 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
1739 enum brw_reg_type src1_type =
1740 num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type;
1741 unsigned src0_type_size = brw_reg_type_to_size(src0_type);
1742 unsigned src1_type_size = brw_reg_type_to_size(src1_type);
1743
1744 ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 &&
1745 dst_type_size == 8 &&
1746 (src0_type_size != 8 || src1_type_size != 8) &&
1747 brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2,
1748 "In Align16 exec size cannot exceed 2 with a QWord destination "
1749 "and a non-QWord source");
1750 }
1751
1752 /* The PRMs say that for CHV, BXT:
1753 *
1754 * When source or destination datatype is 64b or operation is integer
1755 * DWord multiply, DepCtrl must not be used.
1756 *
1757 * We assume that the restriction applies to GLK as well.
1758 */
1759 if (is_double_precision &&
1760 intel_device_info_is_9lp(devinfo)) {
1761 ERROR_IF(brw_inst_no_dd_check(devinfo, inst) ||
1762 brw_inst_no_dd_clear(devinfo, inst),
1763 "DepCtrl is not allowed when the execution type is 64-bit");
1764 }
1765
1766 return error_msg;
1767 }
1768
1769 static struct string
instruction_restrictions(const struct brw_isa_info * isa,const brw_inst * inst)1770 instruction_restrictions(const struct brw_isa_info *isa,
1771 const brw_inst *inst)
1772 {
1773 const struct intel_device_info *devinfo = isa->devinfo;
1774 struct string error_msg = { .str = NULL, .len = 0 };
1775
1776 /* From Wa_1604601757:
1777 *
1778 * "When multiplying a DW and any lower precision integer, source modifier
1779 * is not supported."
1780 */
1781 if (devinfo->ver >= 12 &&
1782 brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL) {
1783 enum brw_reg_type exec_type = execution_type(isa, inst);
1784 const bool src0_valid = type_sz(brw_inst_src0_type(devinfo, inst)) == 4 ||
1785 brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE ||
1786 !(brw_inst_src0_negate(devinfo, inst) ||
1787 brw_inst_src0_abs(devinfo, inst));
1788 const bool src1_valid = type_sz(brw_inst_src1_type(devinfo, inst)) == 4 ||
1789 brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE ||
1790 !(brw_inst_src1_negate(devinfo, inst) ||
1791 brw_inst_src1_abs(devinfo, inst));
1792
1793 ERROR_IF(!brw_reg_type_is_floating_point(exec_type) &&
1794 type_sz(exec_type) == 4 && !(src0_valid && src1_valid),
1795 "When multiplying a DW and any lower precision integer, source "
1796 "modifier is not supported.");
1797 }
1798
1799 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_CMP ||
1800 brw_inst_opcode(isa, inst) == BRW_OPCODE_CMPN) {
1801 ERROR_IF(brw_inst_cond_modifier(devinfo, inst) == BRW_CONDITIONAL_NONE,
1802 "CMP (or CMPN) must have a condition.");
1803 }
1804
1805 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_SEL) {
1806 ERROR_IF((brw_inst_cond_modifier(devinfo, inst) != BRW_CONDITIONAL_NONE) ==
1807 (brw_inst_pred_control(devinfo, inst) != BRW_PREDICATE_NONE),
1808 "SEL must either be predicated or have a condition modifiers");
1809 }
1810
1811 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL) {
1812 const enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
1813 const enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
1814 const enum brw_reg_type dst_type = inst_dst_type(isa, inst);
1815
1816 /* Page 966 (page 982 of the PDF) of Broadwell PRM volume 2a says:
1817 *
1818 * When multiplying a DW and any lower precision integer, the DW
1819 * operand must on src0.
1820 *
1821 * Ivy Bridge, Haswell, Skylake, and Ice Lake PRMs contain the same
1822 * text.
1823 */
1824 ERROR_IF(brw_reg_type_is_integer(src1_type) &&
1825 type_sz(src0_type) < 4 && type_sz(src1_type) == 4,
1826 "When multiplying a DW and any lower precision integer, the "
1827 "DW operand must be src0.");
1828
1829 /* Page 971 (page 987 of the PDF), section "Accumulator
1830 * Restrictions," of the Broadwell PRM volume 7 says:
1831 *
1832 * Integer source operands cannot be accumulators.
1833 *
1834 * The Skylake and Ice Lake PRMs contain the same text.
1835 */
1836 ERROR_IF((src0_is_acc(devinfo, inst) &&
1837 brw_reg_type_is_integer(src0_type)) ||
1838 (src1_is_acc(devinfo, inst) &&
1839 brw_reg_type_is_integer(src1_type)),
1840 "Integer source operands cannot be accumulators.");
1841
1842 /* Page 935 (page 951 of the PDF) of the Ice Lake PRM volume 2a says:
1843 *
1844 * When multiplying integer data types, if one of the sources is a
1845 * DW, the resulting full precision data is stored in the
1846 * accumulator. However, if the destination data type is either W or
1847 * DW, the low bits of the result are written to the destination
1848 * register and the remaining high bits are discarded. This results
1849 * in undefined Overflow and Sign flags. Therefore, conditional
1850 * modifiers and saturation (.sat) cannot be used in this case.
1851 *
1852 * Similar text appears in every version of the PRM.
1853 *
1854 * The wording of the last sentence is not very clear. It could either
1855 * be interpreted as "conditional modifiers combined with saturation
1856 * cannot be used" or "neither conditional modifiers nor saturation can
1857 * be used." I have interpreted it as the latter primarily because that
1858 * is the more restrictive interpretation.
1859 */
1860 ERROR_IF((src0_type == BRW_REGISTER_TYPE_UD ||
1861 src0_type == BRW_REGISTER_TYPE_D ||
1862 src1_type == BRW_REGISTER_TYPE_UD ||
1863 src1_type == BRW_REGISTER_TYPE_D) &&
1864 (dst_type == BRW_REGISTER_TYPE_UD ||
1865 dst_type == BRW_REGISTER_TYPE_D ||
1866 dst_type == BRW_REGISTER_TYPE_UW ||
1867 dst_type == BRW_REGISTER_TYPE_W) &&
1868 (brw_inst_saturate(devinfo, inst) != 0 ||
1869 brw_inst_cond_modifier(devinfo, inst) != BRW_CONDITIONAL_NONE),
1870 "Neither Saturate nor conditional modifier allowed with DW "
1871 "integer multiply.");
1872 }
1873
1874 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) {
1875 unsigned math_function = brw_inst_math_function(devinfo, inst);
1876 switch (math_function) {
1877 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
1878 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
1879 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: {
1880 /* Page 442 of the Broadwell PRM Volume 2a "Extended Math Function" says:
1881 * INT DIV function does not support source modifiers.
1882 * Bspec 6647 extends it back to Ivy Bridge.
1883 */
1884 bool src0_valid = !brw_inst_src0_negate(devinfo, inst) &&
1885 !brw_inst_src0_abs(devinfo, inst);
1886 bool src1_valid = !brw_inst_src1_negate(devinfo, inst) &&
1887 !brw_inst_src1_abs(devinfo, inst);
1888 ERROR_IF(!src0_valid || !src1_valid,
1889 "INT DIV function does not support source modifiers.");
1890 break;
1891 }
1892 default:
1893 break;
1894 }
1895 }
1896
1897 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_DP4A) {
1898 /* Page 396 (page 412 of the PDF) of the DG1 PRM volume 2a says:
1899 *
1900 * Only one of src0 or src1 operand may be an the (sic) accumulator
1901 * register (acc#).
1902 */
1903 ERROR_IF(src0_is_acc(devinfo, inst) && src1_is_acc(devinfo, inst),
1904 "Only one of src0 or src1 operand may be an accumulator "
1905 "register (acc#).");
1906
1907 }
1908
1909 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_ADD3) {
1910 const enum brw_reg_type dst_type = inst_dst_type(isa, inst);
1911
1912 ERROR_IF(dst_type != BRW_REGISTER_TYPE_D &&
1913 dst_type != BRW_REGISTER_TYPE_UD &&
1914 dst_type != BRW_REGISTER_TYPE_W &&
1915 dst_type != BRW_REGISTER_TYPE_UW,
1916 "Destination must be integer D, UD, W, or UW type.");
1917
1918 for (unsigned i = 0; i < 3; i++) {
1919 enum brw_reg_type src_type;
1920
1921 switch (i) {
1922 case 0: src_type = brw_inst_3src_a1_src0_type(devinfo, inst); break;
1923 case 1: src_type = brw_inst_3src_a1_src1_type(devinfo, inst); break;
1924 case 2: src_type = brw_inst_3src_a1_src2_type(devinfo, inst); break;
1925 default: unreachable("invalid src");
1926 }
1927
1928 ERROR_IF(src_type != BRW_REGISTER_TYPE_D &&
1929 src_type != BRW_REGISTER_TYPE_UD &&
1930 src_type != BRW_REGISTER_TYPE_W &&
1931 src_type != BRW_REGISTER_TYPE_UW,
1932 "Source must be integer D, UD, W, or UW type.");
1933
1934 if (i == 0) {
1935 if (brw_inst_3src_a1_src0_is_imm(devinfo, inst)) {
1936 ERROR_IF(src_type != BRW_REGISTER_TYPE_W &&
1937 src_type != BRW_REGISTER_TYPE_UW,
1938 "Immediate source must be integer W or UW type.");
1939 }
1940 } else if (i == 2) {
1941 if (brw_inst_3src_a1_src2_is_imm(devinfo, inst)) {
1942 ERROR_IF(src_type != BRW_REGISTER_TYPE_W &&
1943 src_type != BRW_REGISTER_TYPE_UW,
1944 "Immediate source must be integer W or UW type.");
1945 }
1946 }
1947 }
1948 }
1949
1950 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_OR ||
1951 brw_inst_opcode(isa, inst) == BRW_OPCODE_AND ||
1952 brw_inst_opcode(isa, inst) == BRW_OPCODE_XOR ||
1953 brw_inst_opcode(isa, inst) == BRW_OPCODE_NOT) {
1954 /* While the behavior of the negate source modifier is defined as
1955 * logical not, the behavior of abs source modifier is not
1956 * defined. Disallow it to be safe.
1957 */
1958 ERROR_IF(brw_inst_src0_abs(devinfo, inst),
1959 "Behavior of abs source modifier in logic ops is undefined.");
1960 ERROR_IF(brw_inst_opcode(isa, inst) != BRW_OPCODE_NOT &&
1961 brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
1962 brw_inst_src1_abs(devinfo, inst),
1963 "Behavior of abs source modifier in logic ops is undefined.");
1964
1965 /* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says:
1966 *
1967 * Source modifier is not allowed if source is an accumulator.
1968 *
1969 * The same text also appears for OR, NOT, and XOR instructions.
1970 */
1971 ERROR_IF((brw_inst_src0_abs(devinfo, inst) ||
1972 brw_inst_src0_negate(devinfo, inst)) &&
1973 src0_is_acc(devinfo, inst),
1974 "Source modifier is not allowed if source is an accumulator.");
1975 ERROR_IF(brw_num_sources_from_inst(isa, inst) > 1 &&
1976 (brw_inst_src1_abs(devinfo, inst) ||
1977 brw_inst_src1_negate(devinfo, inst)) &&
1978 src1_is_acc(devinfo, inst),
1979 "Source modifier is not allowed if source is an accumulator.");
1980
1981 /* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says:
1982 *
1983 * This operation does not produce sign or overflow conditions. Only
1984 * the .e/.z or .ne/.nz conditional modifiers should be used.
1985 *
1986 * The same text also appears for OR, NOT, and XOR instructions.
1987 *
1988 * Per the comment around nir_op_imod in brw_fs_nir.cpp, we have
1989 * determined this to not be true. The only conditions that seem
1990 * absolutely sketchy are O, R, and U. Some OpenGL shaders from Doom
1991 * 2016 have been observed to generate and.g and operate correctly.
1992 */
1993 const enum brw_conditional_mod cmod =
1994 brw_inst_cond_modifier(devinfo, inst);
1995 ERROR_IF(cmod == BRW_CONDITIONAL_O ||
1996 cmod == BRW_CONDITIONAL_R ||
1997 cmod == BRW_CONDITIONAL_U,
1998 "O, R, and U conditional modifiers should not be used.");
1999 }
2000
2001 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_BFI2) {
2002 ERROR_IF(brw_inst_cond_modifier(devinfo, inst) != BRW_CONDITIONAL_NONE,
2003 "BFI2 cannot have conditional modifier");
2004
2005 ERROR_IF(brw_inst_saturate(devinfo, inst),
2006 "BFI2 cannot have saturate modifier");
2007
2008 enum brw_reg_type dst_type;
2009
2010 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1)
2011 dst_type = brw_inst_3src_a1_dst_type(devinfo, inst);
2012 else
2013 dst_type = brw_inst_3src_a16_dst_type(devinfo, inst);
2014
2015 ERROR_IF(dst_type != BRW_REGISTER_TYPE_D &&
2016 dst_type != BRW_REGISTER_TYPE_UD,
2017 "BFI2 destination type must be D or UD");
2018
2019 for (unsigned s = 0; s < 3; s++) {
2020 enum brw_reg_type src_type;
2021
2022 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
2023 switch (s) {
2024 case 0: src_type = brw_inst_3src_a1_src0_type(devinfo, inst); break;
2025 case 1: src_type = brw_inst_3src_a1_src1_type(devinfo, inst); break;
2026 case 2: src_type = brw_inst_3src_a1_src2_type(devinfo, inst); break;
2027 default: unreachable("invalid src");
2028 }
2029 } else {
2030 src_type = brw_inst_3src_a16_src_type(devinfo, inst);
2031 }
2032
2033 ERROR_IF(src_type != dst_type,
2034 "BFI2 source type must match destination type");
2035 }
2036 }
2037
2038 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_CSEL) {
2039 ERROR_IF(brw_inst_pred_control(devinfo, inst) != BRW_PREDICATE_NONE,
2040 "CSEL cannot be predicated");
2041
2042 /* CSEL is CMP and SEL fused into one. The condition modifier, which
2043 * does not actually modify the flags, controls the built-in comparison.
2044 */
2045 ERROR_IF(brw_inst_cond_modifier(devinfo, inst) == BRW_CONDITIONAL_NONE,
2046 "CSEL must have a condition.");
2047
2048 enum brw_reg_type dst_type;
2049
2050 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1)
2051 dst_type = brw_inst_3src_a1_dst_type(devinfo, inst);
2052 else
2053 dst_type = brw_inst_3src_a16_dst_type(devinfo, inst);
2054
2055 if (devinfo->ver == 9) {
2056 ERROR_IF(dst_type != BRW_REGISTER_TYPE_F,
2057 "CSEL destination type must be F");
2058 } else {
2059 ERROR_IF(dst_type != BRW_REGISTER_TYPE_F &&
2060 dst_type != BRW_REGISTER_TYPE_HF &&
2061 dst_type != BRW_REGISTER_TYPE_D &&
2062 dst_type != BRW_REGISTER_TYPE_W,
2063 "CSEL destination type must be F, HF, D, or W");
2064 }
2065
2066 for (unsigned s = 0; s < 3; s++) {
2067 enum brw_reg_type src_type;
2068
2069 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
2070 switch (s) {
2071 case 0: src_type = brw_inst_3src_a1_src0_type(devinfo, inst); break;
2072 case 1: src_type = brw_inst_3src_a1_src1_type(devinfo, inst); break;
2073 case 2: src_type = brw_inst_3src_a1_src2_type(devinfo, inst); break;
2074 default: unreachable("invalid src");
2075 }
2076 } else {
2077 src_type = brw_inst_3src_a16_src_type(devinfo, inst);
2078 }
2079
2080 ERROR_IF(src_type != dst_type,
2081 "CSEL source type must match destination type");
2082 }
2083 }
2084
2085 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_DPAS) {
2086 ERROR_IF(brw_inst_dpas_3src_sdepth(devinfo, inst) != BRW_SYSTOLIC_DEPTH_8,
2087 "Systolic depth must be 8.");
2088
2089 const unsigned sdepth = 8;
2090
2091 const enum brw_reg_type dst_type =
2092 brw_inst_dpas_3src_dst_type(devinfo, inst);
2093 const enum brw_reg_type src0_type =
2094 brw_inst_dpas_3src_src0_type(devinfo, inst);
2095 const enum brw_reg_type src1_type =
2096 brw_inst_dpas_3src_src1_type(devinfo, inst);
2097 const enum brw_reg_type src2_type =
2098 brw_inst_dpas_3src_src2_type(devinfo, inst);
2099
2100 const enum gfx12_sub_byte_precision src1_sub_byte =
2101 brw_inst_dpas_3src_src1_subbyte(devinfo, inst);
2102
2103 if (src1_type != BRW_REGISTER_TYPE_B && src1_type != BRW_REGISTER_TYPE_UB) {
2104 ERROR_IF(src1_sub_byte != BRW_SUB_BYTE_PRECISION_NONE,
2105 "Sub-byte precision must be None for source type larger than Byte.");
2106 } else {
2107 ERROR_IF(src1_sub_byte != BRW_SUB_BYTE_PRECISION_NONE &&
2108 src1_sub_byte != BRW_SUB_BYTE_PRECISION_4BIT &&
2109 src1_sub_byte != BRW_SUB_BYTE_PRECISION_2BIT,
2110 "Invalid sub-byte precision.");
2111 }
2112
2113 const enum gfx12_sub_byte_precision src2_sub_byte =
2114 brw_inst_dpas_3src_src2_subbyte(devinfo, inst);
2115
2116 if (src2_type != BRW_REGISTER_TYPE_B && src2_type != BRW_REGISTER_TYPE_UB) {
2117 ERROR_IF(src2_sub_byte != BRW_SUB_BYTE_PRECISION_NONE,
2118 "Sub-byte precision must be None.");
2119 } else {
2120 ERROR_IF(src2_sub_byte != BRW_SUB_BYTE_PRECISION_NONE &&
2121 src2_sub_byte != BRW_SUB_BYTE_PRECISION_4BIT &&
2122 src2_sub_byte != BRW_SUB_BYTE_PRECISION_2BIT,
2123 "Invalid sub-byte precision.");
2124 }
2125
2126 const unsigned src1_bits_per_element =
2127 (8 * brw_reg_type_to_size(src1_type)) >>
2128 brw_inst_dpas_3src_src1_subbyte(devinfo, inst);
2129
2130 const unsigned src2_bits_per_element =
2131 (8 * brw_reg_type_to_size(src2_type)) >>
2132 brw_inst_dpas_3src_src2_subbyte(devinfo, inst);
2133
2134 /* The MAX2(1, ...) is just to prevent possible division by 0 later. */
2135 const unsigned ops_per_chan =
2136 MAX2(1, 32 / MAX2(src1_bits_per_element, src2_bits_per_element));
2137
2138 ERROR_IF(brw_inst_exec_size(devinfo, inst) != BRW_EXECUTE_8,
2139 "DPAS execution size must be 8.");
2140
2141 const unsigned exec_size = 8;
2142
2143 const unsigned dst_subnr = brw_inst_dpas_3src_dst_subreg_nr(devinfo, inst);
2144 const unsigned src0_subnr = brw_inst_dpas_3src_src0_subreg_nr(devinfo, inst);
2145 const unsigned src1_subnr = brw_inst_dpas_3src_src1_subreg_nr(devinfo, inst);
2146 const unsigned src2_subnr = brw_inst_dpas_3src_src2_subreg_nr(devinfo, inst);
2147
2148 /* Until HF is supported as dst type, this is effectively subnr == 0. */
2149 ERROR_IF(dst_subnr % exec_size != 0,
2150 "Destination subregister offset must be a multiple of ExecSize.");
2151
2152 /* Until HF is supported as src0 type, this is effectively subnr == 0. */
2153 ERROR_IF(src0_subnr % exec_size != 0,
2154 "Src0 subregister offset must be a multiple of ExecSize.");
2155
2156 ERROR_IF(src1_subnr != 0,
2157 "Src1 subregister offsets must be 0.");
2158
2159 /* In nearly all cases, this effectively requires that src2.subnr be
2160 * 0. It is only when src1 is 8 bits and src2 is 2 or 4 bits that the
2161 * ops_per_chan value can allow non-zero src2.subnr.
2162 */
2163 ERROR_IF(src2_subnr % (sdepth * ops_per_chan) != 0,
2164 "Src2 subregister offset must be a multiple of SystolicDepth "
2165 "times OPS_PER_CHAN.");
2166
2167 ERROR_IF(dst_subnr * type_sz(dst_type) >= REG_SIZE,
2168 "Destination subregister specifies next register.");
2169
2170 ERROR_IF(src0_subnr * type_sz(src0_type) >= REG_SIZE,
2171 "Src0 subregister specifies next register.");
2172
2173 ERROR_IF((src1_subnr * type_sz(src1_type) * src1_bits_per_element) / 8 >= REG_SIZE,
2174 "Src1 subregister specifies next register.");
2175
2176 ERROR_IF((src2_subnr * type_sz(src2_type) * src2_bits_per_element) / 8 >= REG_SIZE,
2177 "Src2 subregister specifies next register.");
2178
2179 if (brw_inst_3src_atomic_control(devinfo, inst)) {
2180 /* FINISHME: When we start emitting DPAS with Atomic set, figure out
2181 * a way to validate it. Also add a test in test_eu_validate.cpp.
2182 */
2183 ERROR_IF(true,
2184 "When instruction option Atomic is used it must be follwed by a "
2185 "DPAS instruction.");
2186 }
2187
2188 if (brw_inst_dpas_3src_exec_type(devinfo, inst) ==
2189 BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT) {
2190 ERROR_IF(dst_type != BRW_REGISTER_TYPE_F,
2191 "DPAS destination type must be F.");
2192 ERROR_IF(src0_type != BRW_REGISTER_TYPE_F,
2193 "DPAS src0 type must be F.");
2194 ERROR_IF(src1_type != BRW_REGISTER_TYPE_HF,
2195 "DPAS src1 type must be HF.");
2196 ERROR_IF(src2_type != BRW_REGISTER_TYPE_HF,
2197 "DPAS src2 type must be HF.");
2198 } else {
2199 ERROR_IF(dst_type != BRW_REGISTER_TYPE_D &&
2200 dst_type != BRW_REGISTER_TYPE_UD,
2201 "DPAS destination type must be D or UD.");
2202 ERROR_IF(src0_type != BRW_REGISTER_TYPE_D &&
2203 src0_type != BRW_REGISTER_TYPE_UD,
2204 "DPAS src0 type must be D or UD.");
2205 ERROR_IF(src1_type != BRW_REGISTER_TYPE_B &&
2206 src1_type != BRW_REGISTER_TYPE_UB,
2207 "DPAS src1 base type must be B or UB.");
2208 ERROR_IF(src2_type != BRW_REGISTER_TYPE_B &&
2209 src2_type != BRW_REGISTER_TYPE_UB,
2210 "DPAS src2 base type must be B or UB.");
2211
2212 if (brw_reg_type_is_unsigned_integer(dst_type)) {
2213 ERROR_IF(!brw_reg_type_is_unsigned_integer(src0_type) ||
2214 !brw_reg_type_is_unsigned_integer(src1_type) ||
2215 !brw_reg_type_is_unsigned_integer(src2_type),
2216 "If any source datatype is signed, destination datatype "
2217 "must be signed.");
2218 }
2219 }
2220
2221 /* FINISHME: Additional restrictions mentioned in the Bspec that are not
2222 * yet enforced here:
2223 *
2224 * - General Accumulator registers access is not supported. This is
2225 * currently enforced in brw_dpas_three_src (brw_eu_emit.c).
2226 *
2227 * - Given any combination of datatypes in the sources of a DPAS
2228 * instructions, the boundaries of a register should not be crossed.
2229 */
2230 }
2231
2232 return error_msg;
2233 }
2234
2235 static struct string
send_descriptor_restrictions(const struct brw_isa_info * isa,const brw_inst * inst)2236 send_descriptor_restrictions(const struct brw_isa_info *isa,
2237 const brw_inst *inst)
2238 {
2239 const struct intel_device_info *devinfo = isa->devinfo;
2240 struct string error_msg = { .str = NULL, .len = 0 };
2241
2242 if (inst_is_split_send(isa, inst)) {
2243 /* We can only validate immediate descriptors */
2244 if (brw_inst_send_sel_reg32_desc(devinfo, inst))
2245 return error_msg;
2246 } else if (inst_is_send(isa, inst)) {
2247 /* We can only validate immediate descriptors */
2248 if (brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE)
2249 return error_msg;
2250 } else {
2251 return error_msg;
2252 }
2253
2254 const uint32_t desc = brw_inst_send_desc(devinfo, inst);
2255
2256 switch (brw_inst_sfid(devinfo, inst)) {
2257 case BRW_SFID_URB:
2258 if (devinfo->ver < 20)
2259 break;
2260 FALLTHROUGH;
2261 case GFX12_SFID_TGM:
2262 case GFX12_SFID_SLM:
2263 case GFX12_SFID_UGM:
2264 ERROR_IF(!devinfo->has_lsc, "Platform does not support LSC");
2265
2266 ERROR_IF(lsc_opcode_has_transpose(lsc_msg_desc_opcode(devinfo, desc)) &&
2267 lsc_msg_desc_transpose(devinfo, desc) &&
2268 brw_inst_exec_size(devinfo, inst) != BRW_EXECUTE_1,
2269 "Transposed vectors are restricted to Exec_Mask = 1.");
2270 break;
2271
2272 default:
2273 break;
2274 }
2275
2276 if (brw_inst_sfid(devinfo, inst) == BRW_SFID_URB && devinfo->ver < 20) {
2277 ERROR_IF(!brw_inst_header_present(devinfo, inst),
2278 "Header must be present for all URB messages.");
2279
2280 switch (brw_inst_urb_opcode(devinfo, inst)) {
2281 case BRW_URB_OPCODE_READ_HWORD:
2282 case BRW_URB_OPCODE_READ_OWORD:
2283 case BRW_URB_OPCODE_WRITE_HWORD:
2284 case BRW_URB_OPCODE_WRITE_OWORD:
2285 case GFX7_URB_OPCODE_ATOMIC_INC:
2286 case GFX7_URB_OPCODE_ATOMIC_MOV:
2287 case GFX8_URB_OPCODE_ATOMIC_ADD:
2288 case GFX8_URB_OPCODE_SIMD8_WRITE:
2289 break;
2290
2291 case GFX8_URB_OPCODE_SIMD8_READ:
2292 ERROR_IF(brw_inst_rlen(devinfo, inst) == 0,
2293 "URB SIMD8 read message must read some data.");
2294 break;
2295
2296 case GFX125_URB_OPCODE_FENCE:
2297 ERROR_IF(devinfo->verx10 < 125,
2298 "URB fence message only valid on gfx >= 12.5");
2299 break;
2300
2301 default:
2302 ERROR_IF(true, "Invalid URB message");
2303 break;
2304 }
2305 }
2306
2307 return error_msg;
2308 }
2309
2310 bool
brw_validate_instruction(const struct brw_isa_info * isa,const brw_inst * inst,int offset,unsigned inst_size,struct disasm_info * disasm)2311 brw_validate_instruction(const struct brw_isa_info *isa,
2312 const brw_inst *inst, int offset,
2313 unsigned inst_size,
2314 struct disasm_info *disasm)
2315 {
2316 struct string error_msg = { .str = NULL, .len = 0 };
2317
2318 if (is_unsupported_inst(isa, inst)) {
2319 ERROR("Instruction not supported on this Gen");
2320 } else {
2321 CHECK(invalid_values);
2322
2323 if (error_msg.str == NULL) {
2324 CHECK(sources_not_null);
2325 CHECK(send_restrictions);
2326 CHECK(alignment_supported);
2327 CHECK(general_restrictions_based_on_operand_types);
2328 CHECK(general_restrictions_on_region_parameters);
2329 CHECK(special_restrictions_for_mixed_float_mode);
2330 CHECK(region_alignment_rules);
2331 CHECK(vector_immediate_restrictions);
2332 CHECK(special_requirements_for_handling_double_precision_data_types);
2333 CHECK(instruction_restrictions);
2334 CHECK(send_descriptor_restrictions);
2335 }
2336 }
2337
2338 if (error_msg.str && disasm) {
2339 disasm_insert_error(disasm, offset, inst_size, error_msg.str);
2340 }
2341 free(error_msg.str);
2342
2343 return error_msg.len == 0;
2344 }
2345
2346 bool
brw_validate_instructions(const struct brw_isa_info * isa,const void * assembly,int start_offset,int end_offset,struct disasm_info * disasm)2347 brw_validate_instructions(const struct brw_isa_info *isa,
2348 const void *assembly, int start_offset, int end_offset,
2349 struct disasm_info *disasm)
2350 {
2351 const struct intel_device_info *devinfo = isa->devinfo;
2352 bool valid = true;
2353
2354 for (int src_offset = start_offset; src_offset < end_offset;) {
2355 const brw_inst *inst = assembly + src_offset;
2356 bool is_compact = brw_inst_cmpt_control(devinfo, inst);
2357 unsigned inst_size = is_compact ? sizeof(brw_compact_inst)
2358 : sizeof(brw_inst);
2359 brw_inst uncompacted;
2360
2361 if (is_compact) {
2362 brw_compact_inst *compacted = (void *)inst;
2363 brw_uncompact_instruction(isa, &uncompacted, compacted);
2364 inst = &uncompacted;
2365 }
2366
2367 bool v = brw_validate_instruction(isa, inst, src_offset,
2368 inst_size, disasm);
2369 valid = valid && v;
2370
2371 src_offset += inst_size;
2372 }
2373
2374 return valid;
2375 }
2376