1 /*
2 * Copyright © 2015-2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_eu_validate.c
25 *
26 * This file implements a pass that validates shader assembly.
27 *
28 * The restrictions implemented herein are intended to verify that instructions
29 * in shader assembly do not violate restrictions documented in the graphics
30 * programming reference manuals.
31 *
32 * The restrictions are difficult for humans to quickly verify due to their
33 * complexity and abundance.
34 *
35 * It is critical that this code is thoroughly unit tested because false
36 * results will lead developers astray, which is worse than having no validator
37 * at all. Functional changes to this file without corresponding unit tests (in
38 * test_eu_validate.cpp) will be rejected.
39 */
40
41 #include <stdlib.h>
42 #include "brw_eu.h"
43
44 /* We're going to do lots of string concatenation, so this should help. */
45 struct string {
46 char *str;
47 size_t len;
48 };
49
50 static void
cat(struct string * dest,const struct string src)51 cat(struct string *dest, const struct string src)
52 {
53 dest->str = realloc(dest->str, dest->len + src.len + 1);
54 memcpy(dest->str + dest->len, src.str, src.len);
55 dest->str[dest->len + src.len] = '\0';
56 dest->len = dest->len + src.len;
57 }
58 #define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)})
59
60 static bool
contains(const struct string haystack,const struct string needle)61 contains(const struct string haystack, const struct string needle)
62 {
63 return haystack.str && memmem(haystack.str, haystack.len,
64 needle.str, needle.len) != NULL;
65 }
66 #define CONTAINS(haystack, needle) \
67 contains(haystack, (struct string){needle, strlen(needle)})
68
69 #define error(str) "\tERROR: " str "\n"
70 #define ERROR_INDENT "\t "
71
72 #define ERROR(msg) ERROR_IF(true, msg)
73 #define ERROR_IF(cond, msg) \
74 do { \
75 if ((cond) && !CONTAINS(error_msg, error(msg))) { \
76 CAT(error_msg, error(msg)); \
77 } \
78 } while(0)
79
80 #define CHECK(func, args...) \
81 do { \
82 struct string __msg = func(isa, inst, ##args); \
83 if (__msg.str) { \
84 cat(&error_msg, __msg); \
85 free(__msg.str); \
86 } \
87 } while (0)
88
89 #define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0)
90 #define WIDTH(width) (1 << (width))
91
92 static bool
inst_is_send(const struct brw_isa_info * isa,const brw_inst * inst)93 inst_is_send(const struct brw_isa_info *isa, const brw_inst *inst)
94 {
95 switch (brw_inst_opcode(isa, inst)) {
96 case BRW_OPCODE_SEND:
97 case BRW_OPCODE_SENDC:
98 case BRW_OPCODE_SENDS:
99 case BRW_OPCODE_SENDSC:
100 return true;
101 default:
102 return false;
103 }
104 }
105
106 static bool
inst_is_split_send(const struct brw_isa_info * isa,const brw_inst * inst)107 inst_is_split_send(const struct brw_isa_info *isa, const brw_inst *inst)
108 {
109 const struct intel_device_info *devinfo = isa->devinfo;
110
111 if (devinfo->ver >= 12) {
112 return inst_is_send(isa, inst);
113 } else {
114 switch (brw_inst_opcode(isa, inst)) {
115 case BRW_OPCODE_SENDS:
116 case BRW_OPCODE_SENDSC:
117 return true;
118 default:
119 return false;
120 }
121 }
122 }
123
124 static unsigned
signed_type(unsigned type)125 signed_type(unsigned type)
126 {
127 switch (type) {
128 case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_TYPE_D;
129 case BRW_REGISTER_TYPE_UW: return BRW_REGISTER_TYPE_W;
130 case BRW_REGISTER_TYPE_UB: return BRW_REGISTER_TYPE_B;
131 case BRW_REGISTER_TYPE_UQ: return BRW_REGISTER_TYPE_Q;
132 default: return type;
133 }
134 }
135
136 static enum brw_reg_type
inst_dst_type(const struct brw_isa_info * isa,const brw_inst * inst)137 inst_dst_type(const struct brw_isa_info *isa, const brw_inst *inst)
138 {
139 const struct intel_device_info *devinfo = isa->devinfo;
140
141 return (devinfo->ver < 12 || !inst_is_send(isa, inst)) ?
142 brw_inst_dst_type(devinfo, inst) : BRW_REGISTER_TYPE_D;
143 }
144
145 static bool
inst_is_raw_move(const struct brw_isa_info * isa,const brw_inst * inst)146 inst_is_raw_move(const struct brw_isa_info *isa, const brw_inst *inst)
147 {
148 const struct intel_device_info *devinfo = isa->devinfo;
149
150 unsigned dst_type = signed_type(inst_dst_type(isa, inst));
151 unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst));
152
153 if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
154 /* FIXME: not strictly true */
155 if (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_VF ||
156 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UV ||
157 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_V) {
158 return false;
159 }
160 } else if (brw_inst_src0_negate(devinfo, inst) ||
161 brw_inst_src0_abs(devinfo, inst)) {
162 return false;
163 }
164
165 return brw_inst_opcode(isa, inst) == BRW_OPCODE_MOV &&
166 brw_inst_saturate(devinfo, inst) == 0 &&
167 dst_type == src_type;
168 }
169
170 static bool
dst_is_null(const struct intel_device_info * devinfo,const brw_inst * inst)171 dst_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
172 {
173 return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
174 brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
175 }
176
177 static bool
src0_is_null(const struct intel_device_info * devinfo,const brw_inst * inst)178 src0_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
179 {
180 return brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT &&
181 brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
182 brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
183 }
184
185 static bool
src1_is_null(const struct intel_device_info * devinfo,const brw_inst * inst)186 src1_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
187 {
188 return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
189 brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
190 }
191
192 static bool
src0_is_acc(const struct intel_device_info * devinfo,const brw_inst * inst)193 src0_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst)
194 {
195 return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
196 (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
197 }
198
199 static bool
src1_is_acc(const struct intel_device_info * devinfo,const brw_inst * inst)200 src1_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst)
201 {
202 return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
203 (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
204 }
205
206 static bool
src0_has_scalar_region(const struct intel_device_info * devinfo,const brw_inst * inst)207 src0_has_scalar_region(const struct intel_device_info *devinfo,
208 const brw_inst *inst)
209 {
210 return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
211 brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 &&
212 brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
213 }
214
215 static bool
src1_has_scalar_region(const struct intel_device_info * devinfo,const brw_inst * inst)216 src1_has_scalar_region(const struct intel_device_info *devinfo,
217 const brw_inst *inst)
218 {
219 return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
220 brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 &&
221 brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
222 }
223
224 static unsigned
num_sources_from_inst(const struct brw_isa_info * isa,const brw_inst * inst)225 num_sources_from_inst(const struct brw_isa_info *isa,
226 const brw_inst *inst)
227 {
228 const struct intel_device_info *devinfo = isa->devinfo;
229 const struct opcode_desc *desc =
230 brw_opcode_desc(isa, brw_inst_opcode(isa, inst));
231 unsigned math_function;
232
233 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) {
234 math_function = brw_inst_math_function(devinfo, inst);
235 } else if (devinfo->ver < 6 &&
236 brw_inst_opcode(isa, inst) == BRW_OPCODE_SEND) {
237 if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) {
238 /* src1 must be a descriptor (including the information to determine
239 * that the SEND is doing an extended math operation), but src0 can
240 * actually be null since it serves as the source of the implicit GRF
241 * to MRF move.
242 *
243 * If we stop using that functionality, we'll have to revisit this.
244 */
245 return 2;
246 } else {
247 /* Send instructions are allowed to have null sources since they use
248 * the base_mrf field to specify which message register source.
249 */
250 return 0;
251 }
252 } else {
253 assert(desc->nsrc < 4);
254 return desc->nsrc;
255 }
256
257 switch (math_function) {
258 case BRW_MATH_FUNCTION_INV:
259 case BRW_MATH_FUNCTION_LOG:
260 case BRW_MATH_FUNCTION_EXP:
261 case BRW_MATH_FUNCTION_SQRT:
262 case BRW_MATH_FUNCTION_RSQ:
263 case BRW_MATH_FUNCTION_SIN:
264 case BRW_MATH_FUNCTION_COS:
265 case BRW_MATH_FUNCTION_SINCOS:
266 case GFX8_MATH_FUNCTION_INVM:
267 case GFX8_MATH_FUNCTION_RSQRTM:
268 return 1;
269 case BRW_MATH_FUNCTION_FDIV:
270 case BRW_MATH_FUNCTION_POW:
271 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
272 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
273 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
274 return 2;
275 default:
276 unreachable("not reached");
277 }
278 }
279
280 static struct string
invalid_values(const struct brw_isa_info * isa,const brw_inst * inst)281 invalid_values(const struct brw_isa_info *isa, const brw_inst *inst)
282 {
283 const struct intel_device_info *devinfo = isa->devinfo;
284
285 unsigned num_sources = num_sources_from_inst(isa, inst);
286 struct string error_msg = { .str = NULL, .len = 0 };
287
288 switch ((enum brw_execution_size) brw_inst_exec_size(devinfo, inst)) {
289 case BRW_EXECUTE_1:
290 case BRW_EXECUTE_2:
291 case BRW_EXECUTE_4:
292 case BRW_EXECUTE_8:
293 case BRW_EXECUTE_16:
294 case BRW_EXECUTE_32:
295 break;
296 default:
297 ERROR("invalid execution size");
298 break;
299 }
300
301 if (inst_is_send(isa, inst))
302 return error_msg;
303
304 if (num_sources == 3) {
305 /* Nothing to test:
306 * No 3-src instructions on Gfx4-5
307 * No reg file bits on Gfx6-10 (align16)
308 * No invalid encodings on Gfx10-12 (align1)
309 */
310 } else {
311 if (devinfo->ver > 6) {
312 ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF ||
313 (num_sources > 0 &&
314 brw_inst_src0_reg_file(devinfo, inst) == MRF) ||
315 (num_sources > 1 &&
316 brw_inst_src1_reg_file(devinfo, inst) == MRF),
317 "invalid register file encoding");
318 }
319 }
320
321 if (error_msg.str)
322 return error_msg;
323
324 if (num_sources == 3) {
325 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
326 if (devinfo->ver >= 10) {
327 ERROR_IF(brw_inst_3src_a1_dst_type (devinfo, inst) == INVALID_REG_TYPE ||
328 brw_inst_3src_a1_src0_type(devinfo, inst) == INVALID_REG_TYPE ||
329 brw_inst_3src_a1_src1_type(devinfo, inst) == INVALID_REG_TYPE ||
330 brw_inst_3src_a1_src2_type(devinfo, inst) == INVALID_REG_TYPE,
331 "invalid register type encoding");
332 } else {
333 ERROR("Align1 mode not allowed on Gen < 10");
334 }
335 } else {
336 ERROR_IF(brw_inst_3src_a16_dst_type(devinfo, inst) == INVALID_REG_TYPE ||
337 brw_inst_3src_a16_src_type(devinfo, inst) == INVALID_REG_TYPE,
338 "invalid register type encoding");
339 }
340 } else {
341 ERROR_IF(brw_inst_dst_type (devinfo, inst) == INVALID_REG_TYPE ||
342 (num_sources > 0 &&
343 brw_inst_src0_type(devinfo, inst) == INVALID_REG_TYPE) ||
344 (num_sources > 1 &&
345 brw_inst_src1_type(devinfo, inst) == INVALID_REG_TYPE),
346 "invalid register type encoding");
347 }
348
349 return error_msg;
350 }
351
352 static struct string
sources_not_null(const struct brw_isa_info * isa,const brw_inst * inst)353 sources_not_null(const struct brw_isa_info *isa,
354 const brw_inst *inst)
355 {
356 const struct intel_device_info *devinfo = isa->devinfo;
357 unsigned num_sources = num_sources_from_inst(isa, inst);
358 struct string error_msg = { .str = NULL, .len = 0 };
359
360 /* Nothing to test. 3-src instructions can only have GRF sources, and
361 * there's no bit to control the file.
362 */
363 if (num_sources == 3)
364 return (struct string){};
365
366 /* Nothing to test. Split sends can only encode a file in sources that are
367 * allowed to be NULL.
368 */
369 if (inst_is_split_send(isa, inst))
370 return (struct string){};
371
372 if (num_sources >= 1 && brw_inst_opcode(isa, inst) != BRW_OPCODE_SYNC)
373 ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
374
375 if (num_sources == 2)
376 ERROR_IF(src1_is_null(devinfo, inst), "src1 is null");
377
378 return error_msg;
379 }
380
381 static struct string
alignment_supported(const struct brw_isa_info * isa,const brw_inst * inst)382 alignment_supported(const struct brw_isa_info *isa,
383 const brw_inst *inst)
384 {
385 const struct intel_device_info *devinfo = isa->devinfo;
386 struct string error_msg = { .str = NULL, .len = 0 };
387
388 ERROR_IF(devinfo->ver >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16,
389 "Align16 not supported");
390
391 return error_msg;
392 }
393
394 static bool
inst_uses_src_acc(const struct brw_isa_info * isa,const brw_inst * inst)395 inst_uses_src_acc(const struct brw_isa_info *isa,
396 const brw_inst *inst)
397 {
398 const struct intel_device_info *devinfo = isa->devinfo;
399
400 /* Check instructions that use implicit accumulator sources */
401 switch (brw_inst_opcode(isa, inst)) {
402 case BRW_OPCODE_MAC:
403 case BRW_OPCODE_MACH:
404 case BRW_OPCODE_SADA2:
405 return true;
406 default:
407 break;
408 }
409
410 /* FIXME: support 3-src instructions */
411 unsigned num_sources = num_sources_from_inst(isa, inst);
412 assert(num_sources < 3);
413
414 return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst));
415 }
416
417 static struct string
send_restrictions(const struct brw_isa_info * isa,const brw_inst * inst)418 send_restrictions(const struct brw_isa_info *isa,
419 const brw_inst *inst)
420 {
421 const struct intel_device_info *devinfo = isa->devinfo;
422
423 struct string error_msg = { .str = NULL, .len = 0 };
424
425 if (inst_is_split_send(isa, inst)) {
426 ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
427 brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL,
428 "src1 of split send must be a GRF or NULL");
429
430 ERROR_IF(brw_inst_eot(devinfo, inst) &&
431 brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
432 "send with EOT must use g112-g127");
433 ERROR_IF(brw_inst_eot(devinfo, inst) &&
434 brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE &&
435 brw_inst_send_src1_reg_nr(devinfo, inst) < 112,
436 "send with EOT must use g112-g127");
437
438 if (brw_inst_send_src0_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE &&
439 brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) {
440 /* Assume minimums if we don't know */
441 unsigned mlen = 1;
442 if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) {
443 const uint32_t desc = brw_inst_send_desc(devinfo, inst);
444 mlen = brw_message_desc_mlen(devinfo, desc);
445 }
446
447 unsigned ex_mlen = 1;
448 if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) {
449 const uint32_t ex_desc = brw_inst_sends_ex_desc(devinfo, inst);
450 ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc);
451 }
452 const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst);
453 const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst);
454 ERROR_IF((src0_reg_nr <= src1_reg_nr &&
455 src1_reg_nr < src0_reg_nr + mlen) ||
456 (src1_reg_nr <= src0_reg_nr &&
457 src0_reg_nr < src1_reg_nr + ex_mlen),
458 "split send payloads must not overlap");
459 }
460 } else if (inst_is_send(isa, inst)) {
461 ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
462 "send must use direct addressing");
463
464 if (devinfo->ver >= 7) {
465 ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != BRW_GENERAL_REGISTER_FILE,
466 "send from non-GRF");
467 ERROR_IF(brw_inst_eot(devinfo, inst) &&
468 brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
469 "send with EOT must use g112-g127");
470 }
471
472 if (devinfo->ver >= 8) {
473 ERROR_IF(!dst_is_null(devinfo, inst) &&
474 (brw_inst_dst_da_reg_nr(devinfo, inst) +
475 brw_inst_rlen(devinfo, inst) > 127) &&
476 (brw_inst_src0_da_reg_nr(devinfo, inst) +
477 brw_inst_mlen(devinfo, inst) >
478 brw_inst_dst_da_reg_nr(devinfo, inst)),
479 "r127 must not be used for return address when there is "
480 "a src and dest overlap");
481 }
482 }
483
484 return error_msg;
485 }
486
487 static bool
is_unsupported_inst(const struct brw_isa_info * isa,const brw_inst * inst)488 is_unsupported_inst(const struct brw_isa_info *isa,
489 const brw_inst *inst)
490 {
491 return brw_inst_opcode(isa, inst) == BRW_OPCODE_ILLEGAL;
492 }
493
494 /**
495 * Returns whether a combination of two types would qualify as mixed float
496 * operation mode
497 */
498 static inline bool
types_are_mixed_float(enum brw_reg_type t0,enum brw_reg_type t1)499 types_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1)
500 {
501 return (t0 == BRW_REGISTER_TYPE_F && t1 == BRW_REGISTER_TYPE_HF) ||
502 (t1 == BRW_REGISTER_TYPE_F && t0 == BRW_REGISTER_TYPE_HF);
503 }
504
505 static enum brw_reg_type
execution_type_for_type(enum brw_reg_type type)506 execution_type_for_type(enum brw_reg_type type)
507 {
508 switch (type) {
509 case BRW_REGISTER_TYPE_NF:
510 case BRW_REGISTER_TYPE_DF:
511 case BRW_REGISTER_TYPE_F:
512 case BRW_REGISTER_TYPE_HF:
513 return type;
514
515 case BRW_REGISTER_TYPE_VF:
516 return BRW_REGISTER_TYPE_F;
517
518 case BRW_REGISTER_TYPE_Q:
519 case BRW_REGISTER_TYPE_UQ:
520 return BRW_REGISTER_TYPE_Q;
521
522 case BRW_REGISTER_TYPE_D:
523 case BRW_REGISTER_TYPE_UD:
524 return BRW_REGISTER_TYPE_D;
525
526 case BRW_REGISTER_TYPE_W:
527 case BRW_REGISTER_TYPE_UW:
528 case BRW_REGISTER_TYPE_B:
529 case BRW_REGISTER_TYPE_UB:
530 case BRW_REGISTER_TYPE_V:
531 case BRW_REGISTER_TYPE_UV:
532 return BRW_REGISTER_TYPE_W;
533 }
534 unreachable("not reached");
535 }
536
537 /**
538 * Returns the execution type of an instruction \p inst
539 */
540 static enum brw_reg_type
execution_type(const struct brw_isa_info * isa,const brw_inst * inst)541 execution_type(const struct brw_isa_info *isa, const brw_inst *inst)
542 {
543 const struct intel_device_info *devinfo = isa->devinfo;
544
545 unsigned num_sources = num_sources_from_inst(isa, inst);
546 enum brw_reg_type src0_exec_type, src1_exec_type;
547
548 /* Execution data type is independent of destination data type, except in
549 * mixed F/HF instructions.
550 */
551 enum brw_reg_type dst_exec_type = inst_dst_type(isa, inst);
552
553 src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst));
554 if (num_sources == 1) {
555 if (src0_exec_type == BRW_REGISTER_TYPE_HF)
556 return dst_exec_type;
557 return src0_exec_type;
558 }
559
560 src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst));
561 if (types_are_mixed_float(src0_exec_type, src1_exec_type) ||
562 types_are_mixed_float(src0_exec_type, dst_exec_type) ||
563 types_are_mixed_float(src1_exec_type, dst_exec_type)) {
564 return BRW_REGISTER_TYPE_F;
565 }
566
567 if (src0_exec_type == src1_exec_type)
568 return src0_exec_type;
569
570 if (src0_exec_type == BRW_REGISTER_TYPE_NF ||
571 src1_exec_type == BRW_REGISTER_TYPE_NF)
572 return BRW_REGISTER_TYPE_NF;
573
574 /* Mixed operand types where one is float is float on Gen < 6
575 * (and not allowed on later platforms)
576 */
577 if (devinfo->ver < 6 &&
578 (src0_exec_type == BRW_REGISTER_TYPE_F ||
579 src1_exec_type == BRW_REGISTER_TYPE_F))
580 return BRW_REGISTER_TYPE_F;
581
582 if (src0_exec_type == BRW_REGISTER_TYPE_Q ||
583 src1_exec_type == BRW_REGISTER_TYPE_Q)
584 return BRW_REGISTER_TYPE_Q;
585
586 if (src0_exec_type == BRW_REGISTER_TYPE_D ||
587 src1_exec_type == BRW_REGISTER_TYPE_D)
588 return BRW_REGISTER_TYPE_D;
589
590 if (src0_exec_type == BRW_REGISTER_TYPE_W ||
591 src1_exec_type == BRW_REGISTER_TYPE_W)
592 return BRW_REGISTER_TYPE_W;
593
594 if (src0_exec_type == BRW_REGISTER_TYPE_DF ||
595 src1_exec_type == BRW_REGISTER_TYPE_DF)
596 return BRW_REGISTER_TYPE_DF;
597
598 unreachable("not reached");
599 }
600
601 /**
602 * Returns whether a region is packed
603 *
604 * A region is packed if its elements are adjacent in memory, with no
605 * intervening space, no overlap, and no replicated values.
606 */
607 static bool
is_packed(unsigned vstride,unsigned width,unsigned hstride)608 is_packed(unsigned vstride, unsigned width, unsigned hstride)
609 {
610 if (vstride == width) {
611 if (vstride == 1) {
612 return hstride == 0;
613 } else {
614 return hstride == 1;
615 }
616 }
617
618 return false;
619 }
620
621 /**
622 * Returns whether a region is linear
623 *
624 * A region is linear if its elements do not overlap and are not replicated.
625 * Unlike a packed region, intervening space (i.e. strided values) is allowed.
626 */
627 static bool
is_linear(unsigned vstride,unsigned width,unsigned hstride)628 is_linear(unsigned vstride, unsigned width, unsigned hstride)
629 {
630 return vstride == width * hstride ||
631 (hstride == 0 && width == 1);
632 }
633
634 /**
635 * Returns whether an instruction is an explicit or implicit conversion
636 * to/from half-float.
637 */
638 static bool
is_half_float_conversion(const struct brw_isa_info * isa,const brw_inst * inst)639 is_half_float_conversion(const struct brw_isa_info *isa,
640 const brw_inst *inst)
641 {
642 const struct intel_device_info *devinfo = isa->devinfo;
643
644 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
645
646 unsigned num_sources = num_sources_from_inst(isa, inst);
647 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
648
649 if (dst_type != src0_type &&
650 (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) {
651 return true;
652 } else if (num_sources > 1) {
653 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
654 return dst_type != src1_type &&
655 (dst_type == BRW_REGISTER_TYPE_HF ||
656 src1_type == BRW_REGISTER_TYPE_HF);
657 }
658
659 return false;
660 }
661
662 /*
663 * Returns whether an instruction is using mixed float operation mode
664 */
665 static bool
is_mixed_float(const struct brw_isa_info * isa,const brw_inst * inst)666 is_mixed_float(const struct brw_isa_info *isa, const brw_inst *inst)
667 {
668 const struct intel_device_info *devinfo = isa->devinfo;
669
670 if (devinfo->ver < 8)
671 return false;
672
673 if (inst_is_send(isa, inst))
674 return false;
675
676 unsigned opcode = brw_inst_opcode(isa, inst);
677 const struct opcode_desc *desc = brw_opcode_desc(isa, opcode);
678 if (desc->ndst == 0)
679 return false;
680
681 /* FIXME: support 3-src instructions */
682 unsigned num_sources = num_sources_from_inst(isa, inst);
683 assert(num_sources < 3);
684
685 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
686 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
687
688 if (num_sources == 1)
689 return types_are_mixed_float(src0_type, dst_type);
690
691 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
692
693 return types_are_mixed_float(src0_type, src1_type) ||
694 types_are_mixed_float(src0_type, dst_type) ||
695 types_are_mixed_float(src1_type, dst_type);
696 }
697
698 /**
699 * Returns whether an instruction is an explicit or implicit conversion
700 * to/from byte.
701 */
702 static bool
is_byte_conversion(const struct brw_isa_info * isa,const brw_inst * inst)703 is_byte_conversion(const struct brw_isa_info *isa,
704 const brw_inst *inst)
705 {
706 const struct intel_device_info *devinfo = isa->devinfo;
707
708 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
709
710 unsigned num_sources = num_sources_from_inst(isa, inst);
711 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
712
713 if (dst_type != src0_type &&
714 (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) {
715 return true;
716 } else if (num_sources > 1) {
717 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
718 return dst_type != src1_type &&
719 (type_sz(dst_type) == 1 || type_sz(src1_type) == 1);
720 }
721
722 return false;
723 }
724
725 /**
726 * Checks restrictions listed in "General Restrictions Based on Operand Types"
727 * in the "Register Region Restrictions" section.
728 */
729 static struct string
general_restrictions_based_on_operand_types(const struct brw_isa_info * isa,const brw_inst * inst)730 general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
731 const brw_inst *inst)
732 {
733 const struct intel_device_info *devinfo = isa->devinfo;
734
735 const struct opcode_desc *desc =
736 brw_opcode_desc(isa, brw_inst_opcode(isa, inst));
737 unsigned num_sources = num_sources_from_inst(isa, inst);
738 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
739 struct string error_msg = { .str = NULL, .len = 0 };
740
741 if (inst_is_send(isa, inst))
742 return error_msg;
743
744 if (devinfo->ver >= 11) {
745 if (num_sources == 3) {
746 ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 ||
747 brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1,
748 "Byte data type is not supported for src1/2 register regioning. This includes "
749 "byte broadcast as well.");
750 }
751 if (num_sources == 2) {
752 ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1,
753 "Byte data type is not supported for src1 register regioning. This includes "
754 "byte broadcast as well.");
755 }
756 }
757
758 enum brw_reg_type dst_type;
759
760 if (num_sources == 3) {
761 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1)
762 dst_type = brw_inst_3src_a1_dst_type(devinfo, inst);
763 else
764 dst_type = brw_inst_3src_a16_dst_type(devinfo, inst);
765 } else {
766 dst_type = inst_dst_type(isa, inst);
767 }
768
769 ERROR_IF(dst_type == BRW_REGISTER_TYPE_DF &&
770 !devinfo->has_64bit_float,
771 "64-bit float destination, but platform does not support it");
772
773 ERROR_IF((dst_type == BRW_REGISTER_TYPE_Q ||
774 dst_type == BRW_REGISTER_TYPE_UQ) &&
775 !devinfo->has_64bit_int,
776 "64-bit int destination, but platform does not support it");
777
778 for (unsigned s = 0; s < num_sources; s++) {
779 enum brw_reg_type src_type;
780 if (num_sources == 3) {
781 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
782 switch (s) {
783 case 0: src_type = brw_inst_3src_a1_src0_type(devinfo, inst); break;
784 case 1: src_type = brw_inst_3src_a1_src1_type(devinfo, inst); break;
785 case 2: src_type = brw_inst_3src_a1_src2_type(devinfo, inst); break;
786 default: unreachable("invalid src");
787 }
788 } else {
789 src_type = brw_inst_3src_a16_src_type(devinfo, inst);
790 }
791 } else {
792 switch (s) {
793 case 0: src_type = brw_inst_src0_type(devinfo, inst); break;
794 case 1: src_type = brw_inst_src1_type(devinfo, inst); break;
795 default: unreachable("invalid src");
796 }
797 }
798
799 ERROR_IF(src_type == BRW_REGISTER_TYPE_DF &&
800 !devinfo->has_64bit_float,
801 "64-bit float source, but platform does not support it");
802
803 ERROR_IF((src_type == BRW_REGISTER_TYPE_Q ||
804 src_type == BRW_REGISTER_TYPE_UQ) &&
805 !devinfo->has_64bit_int,
806 "64-bit int source, but platform does not support it");
807 }
808
809 if (num_sources == 3)
810 return error_msg;
811
812 if (exec_size == 1)
813 return error_msg;
814
815 if (desc->ndst == 0)
816 return error_msg;
817
818 /* The PRMs say:
819 *
820 * Where n is the largest element size in bytes for any source or
821 * destination operand type, ExecSize * n must be <= 64.
822 *
823 * But we do not attempt to enforce it, because it is implied by other
824 * rules:
825 *
826 * - that the destination stride must match the execution data type
827 * - sources may not span more than two adjacent GRF registers
828 * - destination may not span more than two adjacent GRF registers
829 *
830 * In fact, checking it would weaken testing of the other rules.
831 */
832
833 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
834 bool dst_type_is_byte =
835 inst_dst_type(isa, inst) == BRW_REGISTER_TYPE_B ||
836 inst_dst_type(isa, inst) == BRW_REGISTER_TYPE_UB;
837
838 if (dst_type_is_byte) {
839 if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) {
840 if (!inst_is_raw_move(isa, inst))
841 ERROR("Only raw MOV supports a packed-byte destination");
842 return error_msg;
843 }
844 }
845
846 unsigned exec_type = execution_type(isa, inst);
847 unsigned exec_type_size = brw_reg_type_to_size(exec_type);
848 unsigned dst_type_size = brw_reg_type_to_size(dst_type);
849
850 /* On IVB/BYT, region parameters and execution size for DF are in terms of
851 * 32-bit elements, so they are doubled. For evaluating the validity of an
852 * instruction, we halve them.
853 */
854 if (devinfo->verx10 == 70 &&
855 exec_type_size == 8 && dst_type_size == 4)
856 dst_type_size = 8;
857
858 if (is_byte_conversion(isa, inst)) {
859 /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
860 *
861 * "There is no direct conversion from B/UB to DF or DF to B/UB.
862 * There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB."
863 *
864 * Even if these restrictions are listed for the MOV instruction, we
865 * validate this more generally, since there is the possibility
866 * of implicit conversions from other instructions.
867 */
868 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
869 enum brw_reg_type src1_type = num_sources > 1 ?
870 brw_inst_src1_type(devinfo, inst) : 0;
871
872 ERROR_IF(type_sz(dst_type) == 1 &&
873 (type_sz(src0_type) == 8 ||
874 (num_sources > 1 && type_sz(src1_type) == 8)),
875 "There are no direct conversions between 64-bit types and B/UB");
876
877 ERROR_IF(type_sz(dst_type) == 8 &&
878 (type_sz(src0_type) == 1 ||
879 (num_sources > 1 && type_sz(src1_type) == 1)),
880 "There are no direct conversions between 64-bit types and B/UB");
881 }
882
883 if (is_half_float_conversion(isa, inst)) {
884 /**
885 * A helper to validate used in the validation of the following restriction
886 * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
887 *
888 * "There is no direct conversion from HF to DF or DF to HF.
889 * There is no direct conversion from HF to Q/UQ or Q/UQ to HF."
890 *
891 * Even if these restrictions are listed for the MOV instruction, we
892 * validate this more generally, since there is the possibility
893 * of implicit conversions from other instructions, such us implicit
894 * conversion from integer to HF with the ADD instruction in SKL+.
895 */
896 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
897 enum brw_reg_type src1_type = num_sources > 1 ?
898 brw_inst_src1_type(devinfo, inst) : 0;
899 ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF &&
900 (type_sz(src0_type) == 8 ||
901 (num_sources > 1 && type_sz(src1_type) == 8)),
902 "There are no direct conversions between 64-bit types and HF");
903
904 ERROR_IF(type_sz(dst_type) == 8 &&
905 (src0_type == BRW_REGISTER_TYPE_HF ||
906 (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)),
907 "There are no direct conversions between 64-bit types and HF");
908
909 /* From the BDW+ PRM:
910 *
911 * "Conversion between Integer and HF (Half Float) must be
912 * DWord-aligned and strided by a DWord on the destination."
913 *
914 * Also, the above restrictions seems to be expanded on CHV and SKL+ by:
915 *
916 * "There is a relaxed alignment rule for word destinations. When
917 * the destination type is word (UW, W, HF), destination data types
918 * can be aligned to either the lowest word or the second lowest
919 * word of the execution channel. This means the destination data
920 * words can be either all in the even word locations or all in the
921 * odd word locations."
922 *
923 * We do not implement the second rule as is though, since empirical
924 * testing shows inconsistencies:
925 * - It suggests that packed 16-bit is not allowed, which is not true.
926 * - It suggests that conversions from Q/DF to W (which need to be
927 * 64-bit aligned on the destination) are not possible, which is
928 * not true.
929 *
930 * So from this rule we only validate the implication that conversions
931 * from F to HF need to be DWord strided (except in Align1 mixed
932 * float mode where packed fp16 destination is allowed so long as the
933 * destination is oword-aligned).
934 *
935 * Finally, we only validate this for Align1 because Align16 always
936 * requires packed destinations, so these restrictions can't possibly
937 * apply to Align16 mode.
938 */
939 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
940 if ((dst_type == BRW_REGISTER_TYPE_HF &&
941 (brw_reg_type_is_integer(src0_type) ||
942 (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) ||
943 (brw_reg_type_is_integer(dst_type) &&
944 (src0_type == BRW_REGISTER_TYPE_HF ||
945 (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) {
946 ERROR_IF(dst_stride * dst_type_size != 4,
947 "Conversions between integer and half-float must be "
948 "strided by a DWord on the destination");
949
950 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
951 ERROR_IF(subreg % 4 != 0,
952 "Conversions between integer and half-float must be "
953 "aligned to a DWord on the destination");
954 } else if ((devinfo->platform == INTEL_PLATFORM_CHV ||
955 devinfo->ver >= 9) &&
956 dst_type == BRW_REGISTER_TYPE_HF) {
957 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
958 ERROR_IF(dst_stride != 2 &&
959 !(is_mixed_float(isa, inst) &&
960 dst_stride == 1 && subreg % 16 == 0),
961 "Conversions to HF must have either all words in even "
962 "word locations or all words in odd word locations or "
963 "be mixed-float with Oword-aligned packed destination");
964 }
965 }
966 }
967
968 /* There are special regioning rules for mixed-float mode in CHV and SKL that
969 * override the general rule for the ratio of sizes of the destination type
970 * and the execution type. We will add validation for those in a later patch.
971 */
972 bool validate_dst_size_and_exec_size_ratio =
973 !is_mixed_float(isa, inst) ||
974 !(devinfo->platform == INTEL_PLATFORM_CHV || devinfo->ver >= 9);
975
976 if (validate_dst_size_and_exec_size_ratio &&
977 exec_type_size > dst_type_size) {
978 if (!(dst_type_is_byte && inst_is_raw_move(isa, inst))) {
979 ERROR_IF(dst_stride * dst_type_size != exec_type_size,
980 "Destination stride must be equal to the ratio of the sizes "
981 "of the execution data type to the destination type");
982 }
983
984 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
985
986 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
987 brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) {
988 /* The i965 PRM says:
989 *
990 * Implementation Restriction: The relaxed alignment rule for byte
991 * destination (#10.5) is not supported.
992 */
993 if (devinfo->verx10 >= 45 && dst_type_is_byte) {
994 ERROR_IF(subreg % exec_type_size != 0 &&
995 subreg % exec_type_size != 1,
996 "Destination subreg must be aligned to the size of the "
997 "execution data type (or to the next lowest byte for byte "
998 "destinations)");
999 } else {
1000 ERROR_IF(subreg % exec_type_size != 0,
1001 "Destination subreg must be aligned to the size of the "
1002 "execution data type");
1003 }
1004 }
1005 }
1006
1007 return error_msg;
1008 }
1009
1010 /**
1011 * Checks restrictions listed in "General Restrictions on Regioning Parameters"
1012 * in the "Register Region Restrictions" section.
1013 */
1014 static struct string
general_restrictions_on_region_parameters(const struct brw_isa_info * isa,const brw_inst * inst)1015 general_restrictions_on_region_parameters(const struct brw_isa_info *isa,
1016 const brw_inst *inst)
1017 {
1018 const struct intel_device_info *devinfo = isa->devinfo;
1019
1020 const struct opcode_desc *desc =
1021 brw_opcode_desc(isa, brw_inst_opcode(isa, inst));
1022 unsigned num_sources = num_sources_from_inst(isa, inst);
1023 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
1024 struct string error_msg = { .str = NULL, .len = 0 };
1025
1026 if (num_sources == 3)
1027 return (struct string){};
1028
1029 /* Split sends don't have the bits in the instruction to encode regions so
1030 * there's nothing to check.
1031 */
1032 if (inst_is_split_send(isa, inst))
1033 return (struct string){};
1034
1035 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) {
1036 if (desc->ndst != 0 && !dst_is_null(devinfo, inst))
1037 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1,
1038 "Destination Horizontal Stride must be 1");
1039
1040 if (num_sources >= 1) {
1041 if (devinfo->verx10 >= 75) {
1042 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
1043 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
1044 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
1045 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1046 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
1047 } else {
1048 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
1049 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
1050 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1051 "In Align16 mode, only VertStride of 0 or 4 is allowed");
1052 }
1053 }
1054
1055 if (num_sources == 2) {
1056 if (devinfo->verx10 >= 75) {
1057 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
1058 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
1059 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
1060 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1061 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
1062 } else {
1063 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
1064 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
1065 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1066 "In Align16 mode, only VertStride of 0 or 4 is allowed");
1067 }
1068 }
1069
1070 return error_msg;
1071 }
1072
1073 for (unsigned i = 0; i < num_sources; i++) {
1074 unsigned vstride, width, hstride, element_size, subreg;
1075 enum brw_reg_type type;
1076
1077 #define DO_SRC(n) \
1078 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \
1079 BRW_IMMEDIATE_VALUE) \
1080 continue; \
1081 \
1082 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
1083 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
1084 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
1085 type = brw_inst_src ## n ## _type(devinfo, inst); \
1086 element_size = brw_reg_type_to_size(type); \
1087 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst)
1088
1089 if (i == 0) {
1090 DO_SRC(0);
1091 } else {
1092 DO_SRC(1);
1093 }
1094 #undef DO_SRC
1095
1096 /* On IVB/BYT, region parameters and execution size for DF are in terms of
1097 * 32-bit elements, so they are doubled. For evaluating the validity of an
1098 * instruction, we halve them.
1099 */
1100 if (devinfo->verx10 == 70 &&
1101 element_size == 8)
1102 element_size = 4;
1103
1104 /* ExecSize must be greater than or equal to Width. */
1105 ERROR_IF(exec_size < width, "ExecSize must be greater than or equal "
1106 "to Width");
1107
1108 /* If ExecSize = Width and HorzStride ≠ 0,
1109 * VertStride must be set to Width * HorzStride.
1110 */
1111 if (exec_size == width && hstride != 0) {
1112 ERROR_IF(vstride != width * hstride,
1113 "If ExecSize = Width and HorzStride ≠ 0, "
1114 "VertStride must be set to Width * HorzStride");
1115 }
1116
1117 /* If Width = 1, HorzStride must be 0 regardless of the values of
1118 * ExecSize and VertStride.
1119 */
1120 if (width == 1) {
1121 ERROR_IF(hstride != 0,
1122 "If Width = 1, HorzStride must be 0 regardless "
1123 "of the values of ExecSize and VertStride");
1124 }
1125
1126 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
1127 if (exec_size == 1 && width == 1) {
1128 ERROR_IF(vstride != 0 || hstride != 0,
1129 "If ExecSize = Width = 1, both VertStride "
1130 "and HorzStride must be 0");
1131 }
1132
1133 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the
1134 * value of ExecSize.
1135 */
1136 if (vstride == 0 && hstride == 0) {
1137 ERROR_IF(width != 1,
1138 "If VertStride = HorzStride = 0, Width must be "
1139 "1 regardless of the value of ExecSize");
1140 }
1141
1142 /* VertStride must be used to cross GRF register boundaries. This rule
1143 * implies that elements within a 'Width' cannot cross GRF boundaries.
1144 */
1145 const uint64_t mask = (1ULL << element_size) - 1;
1146 unsigned rowbase = subreg;
1147
1148 for (int y = 0; y < exec_size / width; y++) {
1149 uint64_t access_mask = 0;
1150 unsigned offset = rowbase;
1151
1152 for (int x = 0; x < width; x++) {
1153 access_mask |= mask << (offset % 64);
1154 offset += hstride * element_size;
1155 }
1156
1157 rowbase += vstride * element_size;
1158
1159 if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) {
1160 ERROR("VertStride must be used to cross GRF register boundaries");
1161 break;
1162 }
1163 }
1164 }
1165
1166 /* Dst.HorzStride must not be 0. */
1167 if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) {
1168 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0,
1169 "Destination Horizontal Stride must not be 0");
1170 }
1171
1172 return error_msg;
1173 }
1174
1175 static struct string
special_restrictions_for_mixed_float_mode(const struct brw_isa_info * isa,const brw_inst * inst)1176 special_restrictions_for_mixed_float_mode(const struct brw_isa_info *isa,
1177 const brw_inst *inst)
1178 {
1179 const struct intel_device_info *devinfo = isa->devinfo;
1180
1181 struct string error_msg = { .str = NULL, .len = 0 };
1182
1183 const unsigned opcode = brw_inst_opcode(isa, inst);
1184 const unsigned num_sources = num_sources_from_inst(isa, inst);
1185 if (num_sources >= 3)
1186 return error_msg;
1187
1188 if (!is_mixed_float(isa, inst))
1189 return error_msg;
1190
1191 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
1192 bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
1193
1194 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
1195 enum brw_reg_type src1_type = num_sources > 1 ?
1196 brw_inst_src1_type(devinfo, inst) : 0;
1197 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
1198
1199 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1200 bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride);
1201
1202 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1203 * Float Operations:
1204 *
1205 * "Indirect addressing on source is not supported when source and
1206 * destination data types are mixed float."
1207 */
1208 ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT ||
1209 (num_sources > 1 &&
1210 brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT),
1211 "Indirect addressing on source is not supported when source and "
1212 "destination data types are mixed float");
1213
1214 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1215 * Float Operations:
1216 *
1217 * "No SIMD16 in mixed mode when destination is f32. Instruction
1218 * execution size must be no more than 8."
1219 */
1220 ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F,
1221 "Mixed float mode with 32-bit float destination is limited "
1222 "to SIMD8");
1223
1224 if (is_align16) {
1225 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1226 * Float Operations:
1227 *
1228 * "In Align16 mode, when half float and float data types are mixed
1229 * between source operands OR between source and destination operands,
1230 * the register content are assumed to be packed."
1231 *
1232 * Since Align16 doesn't have a concept of horizontal stride (or width),
1233 * it means that vertical stride must always be 4, since 0 and 2 would
1234 * lead to replicated data, and any other value is disallowed in Align16.
1235 */
1236 ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1237 "Align16 mixed float mode assumes packed data (vstride must be 4");
1238
1239 ERROR_IF(num_sources >= 2 &&
1240 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1241 "Align16 mixed float mode assumes packed data (vstride must be 4");
1242
1243 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1244 * Float Operations:
1245 *
1246 * "For Align16 mixed mode, both input and output packed f16 data
1247 * must be oword aligned, no oword crossing in packed f16."
1248 *
1249 * The previous rule requires that Align16 operands are always packed,
1250 * and since there is only one bit for Align16 subnr, which represents
1251 * offsets 0B and 16B, this rule is always enforced and we don't need to
1252 * validate it.
1253 */
1254
1255 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1256 * Float Operations:
1257 *
1258 * "No SIMD16 in mixed mode when destination is packed f16 for both
1259 * Align1 and Align16."
1260 *
1261 * And:
1262 *
1263 * "In Align16 mode, when half float and float data types are mixed
1264 * between source operands OR between source and destination operands,
1265 * the register content are assumed to be packed."
1266 *
1267 * Which implies that SIMD16 is not available in Align16. This is further
1268 * confirmed by:
1269 *
1270 * "For Align16 mixed mode, both input and output packed f16 data
1271 * must be oword aligned, no oword crossing in packed f16"
1272 *
1273 * Since oword-aligned packed f16 data would cross oword boundaries when
1274 * the execution size is larger than 8.
1275 */
1276 ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8");
1277
1278 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1279 * Float Operations:
1280 *
1281 * "No accumulator read access for Align16 mixed float."
1282 */
1283 ERROR_IF(inst_uses_src_acc(isa, inst),
1284 "No accumulator read access for Align16 mixed float");
1285 } else {
1286 assert(!is_align16);
1287
1288 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1289 * Float Operations:
1290 *
1291 * "No SIMD16 in mixed mode when destination is packed f16 for both
1292 * Align1 and Align16."
1293 */
1294 ERROR_IF(exec_size > 8 && dst_is_packed &&
1295 dst_type == BRW_REGISTER_TYPE_HF,
1296 "Align1 mixed float mode is limited to SIMD8 when destination "
1297 "is packed half-float");
1298
1299 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1300 * Float Operations:
1301 *
1302 * "Math operations for mixed mode:
1303 * - In Align1, f16 inputs need to be strided"
1304 */
1305 if (opcode == BRW_OPCODE_MATH) {
1306 if (src0_type == BRW_REGISTER_TYPE_HF) {
1307 ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1,
1308 "Align1 mixed mode math needs strided half-float inputs");
1309 }
1310
1311 if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) {
1312 ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1,
1313 "Align1 mixed mode math needs strided half-float inputs");
1314 }
1315 }
1316
1317 if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) {
1318 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1319 * Float Operations:
1320 *
1321 * "In Align1, destination stride can be smaller than execution
1322 * type. When destination is stride of 1, 16 bit packed data is
1323 * updated on the destination. However, output packed f16 data
1324 * must be oword aligned, no oword crossing in packed f16."
1325 *
1326 * The requirement of not crossing oword boundaries for 16-bit oword
1327 * aligned data means that execution size is limited to 8.
1328 */
1329 unsigned subreg;
1330 if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT)
1331 subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1332 else
1333 subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst);
1334 ERROR_IF(subreg % 16 != 0,
1335 "Align1 mixed mode packed half-float output must be "
1336 "oword aligned");
1337 ERROR_IF(exec_size > 8,
1338 "Align1 mixed mode packed half-float output must not "
1339 "cross oword boundaries (max exec size is 8)");
1340
1341 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1342 * Float Operations:
1343 *
1344 * "When source is float or half float from accumulator register and
1345 * destination is half float with a stride of 1, the source must
1346 * register aligned. i.e., source must have offset zero."
1347 *
1348 * Align16 mixed float mode doesn't allow accumulator access on sources,
1349 * so we only need to check this for Align1.
1350 */
1351 if (src0_is_acc(devinfo, inst) &&
1352 (src0_type == BRW_REGISTER_TYPE_F ||
1353 src0_type == BRW_REGISTER_TYPE_HF)) {
1354 ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0,
1355 "Mixed float mode requires register-aligned accumulator "
1356 "source reads when destination is packed half-float");
1357
1358 }
1359
1360 if (num_sources > 1 &&
1361 src1_is_acc(devinfo, inst) &&
1362 (src1_type == BRW_REGISTER_TYPE_F ||
1363 src1_type == BRW_REGISTER_TYPE_HF)) {
1364 ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0,
1365 "Mixed float mode requires register-aligned accumulator "
1366 "source reads when destination is packed half-float");
1367 }
1368 }
1369
1370 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1371 * Float Operations:
1372 *
1373 * "No swizzle is allowed when an accumulator is used as an implicit
1374 * source or an explicit source in an instruction. i.e. when
1375 * destination is half float with an implicit accumulator source,
1376 * destination stride needs to be 2."
1377 *
1378 * FIXME: it is not quite clear what the first sentence actually means
1379 * or its link to the implication described after it, so we only
1380 * validate the explicit implication, which is clearly described.
1381 */
1382 if (dst_type == BRW_REGISTER_TYPE_HF &&
1383 inst_uses_src_acc(isa, inst)) {
1384 ERROR_IF(dst_stride != 2,
1385 "Mixed float mode with implicit/explicit accumulator "
1386 "source and half-float destination requires a stride "
1387 "of 2 on the destination");
1388 }
1389 }
1390
1391 return error_msg;
1392 }
1393
1394 /**
1395 * Creates an \p access_mask for an \p exec_size, \p element_size, and a region
1396 *
1397 * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is
1398 * a bitmask of bytes accessed by the region.
1399 *
1400 * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4
1401 * instruction would be
1402 *
1403 * access_mask[0] = 0x00000000000000F0
1404 * access_mask[1] = 0x000000000000F000
1405 * access_mask[2] = 0x0000000000F00000
1406 * access_mask[3] = 0x00000000F0000000
1407 * access_mask[4-31] = 0
1408 *
1409 * because the first execution channel accesses bytes 7-4 and the second
1410 * execution channel accesses bytes 15-12, etc.
1411 */
1412 static void
align1_access_mask(uint64_t access_mask[static32],unsigned exec_size,unsigned element_size,unsigned subreg,unsigned vstride,unsigned width,unsigned hstride)1413 align1_access_mask(uint64_t access_mask[static 32],
1414 unsigned exec_size, unsigned element_size, unsigned subreg,
1415 unsigned vstride, unsigned width, unsigned hstride)
1416 {
1417 const uint64_t mask = (1ULL << element_size) - 1;
1418 unsigned rowbase = subreg;
1419 unsigned element = 0;
1420
1421 for (int y = 0; y < exec_size / width; y++) {
1422 unsigned offset = rowbase;
1423
1424 for (int x = 0; x < width; x++) {
1425 access_mask[element++] = mask << (offset % 64);
1426 offset += hstride * element_size;
1427 }
1428
1429 rowbase += vstride * element_size;
1430 }
1431
1432 assert(element == 0 || element == exec_size);
1433 }
1434
1435 /**
1436 * Returns the number of registers accessed according to the \p access_mask
1437 */
1438 static int
registers_read(const uint64_t access_mask[static32])1439 registers_read(const uint64_t access_mask[static 32])
1440 {
1441 int regs_read = 0;
1442
1443 for (unsigned i = 0; i < 32; i++) {
1444 if (access_mask[i] > 0xFFFFFFFF) {
1445 return 2;
1446 } else if (access_mask[i]) {
1447 regs_read = 1;
1448 }
1449 }
1450
1451 return regs_read;
1452 }
1453
1454 /**
1455 * Checks restrictions listed in "Region Alignment Rules" in the "Register
1456 * Region Restrictions" section.
1457 */
1458 static struct string
region_alignment_rules(const struct brw_isa_info * isa,const brw_inst * inst)1459 region_alignment_rules(const struct brw_isa_info *isa,
1460 const brw_inst *inst)
1461 {
1462 const struct intel_device_info *devinfo = isa->devinfo;
1463 const struct opcode_desc *desc =
1464 brw_opcode_desc(isa, brw_inst_opcode(isa, inst));
1465 unsigned num_sources = num_sources_from_inst(isa, inst);
1466 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
1467 uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32];
1468 struct string error_msg = { .str = NULL, .len = 0 };
1469
1470 if (num_sources == 3)
1471 return (struct string){};
1472
1473 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16)
1474 return (struct string){};
1475
1476 if (inst_is_send(isa, inst))
1477 return (struct string){};
1478
1479 memset(dst_access_mask, 0, sizeof(dst_access_mask));
1480 memset(src0_access_mask, 0, sizeof(src0_access_mask));
1481 memset(src1_access_mask, 0, sizeof(src1_access_mask));
1482
1483 for (unsigned i = 0; i < num_sources; i++) {
1484 unsigned vstride, width, hstride, element_size, subreg;
1485 enum brw_reg_type type;
1486
1487 /* In Direct Addressing mode, a source cannot span more than 2 adjacent
1488 * GRF registers.
1489 */
1490
1491 #define DO_SRC(n) \
1492 if (brw_inst_src ## n ## _address_mode(devinfo, inst) != \
1493 BRW_ADDRESS_DIRECT) \
1494 continue; \
1495 \
1496 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \
1497 BRW_IMMEDIATE_VALUE) \
1498 continue; \
1499 \
1500 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
1501 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
1502 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
1503 type = brw_inst_src ## n ## _type(devinfo, inst); \
1504 element_size = brw_reg_type_to_size(type); \
1505 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
1506 align1_access_mask(src ## n ## _access_mask, \
1507 exec_size, element_size, subreg, \
1508 vstride, width, hstride)
1509
1510 if (i == 0) {
1511 DO_SRC(0);
1512 } else {
1513 DO_SRC(1);
1514 }
1515 #undef DO_SRC
1516
1517 unsigned num_vstride = exec_size / width;
1518 unsigned num_hstride = width;
1519 unsigned vstride_elements = (num_vstride - 1) * vstride;
1520 unsigned hstride_elements = (num_hstride - 1) * hstride;
1521 unsigned offset = (vstride_elements + hstride_elements) * element_size +
1522 subreg;
1523 ERROR_IF(offset >= 64,
1524 "A source cannot span more than 2 adjacent GRF registers");
1525 }
1526
1527 if (desc->ndst == 0 || dst_is_null(devinfo, inst))
1528 return error_msg;
1529
1530 unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1531 enum brw_reg_type dst_type = inst_dst_type(isa, inst);
1532 unsigned element_size = brw_reg_type_to_size(dst_type);
1533 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1534 unsigned offset = ((exec_size - 1) * stride * element_size) + subreg;
1535 ERROR_IF(offset >= 64,
1536 "A destination cannot span more than 2 adjacent GRF registers");
1537
1538 if (error_msg.str)
1539 return error_msg;
1540
1541 /* On IVB/BYT, region parameters and execution size for DF are in terms of
1542 * 32-bit elements, so they are doubled. For evaluating the validity of an
1543 * instruction, we halve them.
1544 */
1545 if (devinfo->verx10 == 70 &&
1546 element_size == 8)
1547 element_size = 4;
1548
1549 align1_access_mask(dst_access_mask, exec_size, element_size, subreg,
1550 exec_size == 1 ? 0 : exec_size * stride,
1551 exec_size == 1 ? 1 : exec_size,
1552 exec_size == 1 ? 0 : stride);
1553
1554 unsigned dst_regs = registers_read(dst_access_mask);
1555 unsigned src0_regs = registers_read(src0_access_mask);
1556 unsigned src1_regs = registers_read(src1_access_mask);
1557
1558 /* The SNB, IVB, HSW, BDW, and CHV PRMs say:
1559 *
1560 * When an instruction has a source region spanning two registers and a
1561 * destination region contained in one register, the number of elements
1562 * must be the same between two sources and one of the following must be
1563 * true:
1564 *
1565 * 1. The destination region is entirely contained in the lower OWord
1566 * of a register.
1567 * 2. The destination region is entirely contained in the upper OWord
1568 * of a register.
1569 * 3. The destination elements are evenly split between the two OWords
1570 * of a register.
1571 */
1572 if (devinfo->ver <= 8) {
1573 if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) {
1574 unsigned upper_oword_writes = 0, lower_oword_writes = 0;
1575
1576 for (unsigned i = 0; i < exec_size; i++) {
1577 if (dst_access_mask[i] > 0x0000FFFF) {
1578 upper_oword_writes++;
1579 } else {
1580 assert(dst_access_mask[i] != 0);
1581 lower_oword_writes++;
1582 }
1583 }
1584
1585 ERROR_IF(lower_oword_writes != 0 &&
1586 upper_oword_writes != 0 &&
1587 upper_oword_writes != lower_oword_writes,
1588 "Writes must be to only one OWord or "
1589 "evenly split between OWords");
1590 }
1591 }
1592
1593 /* The IVB and HSW PRMs say:
1594 *
1595 * When an instruction has a source region that spans two registers and
1596 * the destination spans two registers, the destination elements must be
1597 * evenly split between the two registers [...]
1598 *
1599 * The SNB PRM contains similar wording (but written in a much more
1600 * confusing manner).
1601 *
1602 * The BDW PRM says:
1603 *
1604 * When destination spans two registers, the source may be one or two
1605 * registers. The destination elements must be evenly split between the
1606 * two registers.
1607 *
1608 * The SKL PRM says:
1609 *
1610 * When destination of MATH instruction spans two registers, the
1611 * destination elements must be evenly split between the two registers.
1612 *
1613 * It is not known whether this restriction applies to KBL other Gens after
1614 * SKL.
1615 */
1616 if (devinfo->ver <= 8 ||
1617 brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) {
1618
1619 /* Nothing explicitly states that on Gen < 8 elements must be evenly
1620 * split between two destination registers in the two exceptional
1621 * source-region-spans-one-register cases, but since Broadwell requires
1622 * evenly split writes regardless of source region, we assume that it was
1623 * an oversight and require it.
1624 */
1625 if (dst_regs == 2) {
1626 unsigned upper_reg_writes = 0, lower_reg_writes = 0;
1627
1628 for (unsigned i = 0; i < exec_size; i++) {
1629 if (dst_access_mask[i] > 0xFFFFFFFF) {
1630 upper_reg_writes++;
1631 } else {
1632 assert(dst_access_mask[i] != 0);
1633 lower_reg_writes++;
1634 }
1635 }
1636
1637 ERROR_IF(upper_reg_writes != lower_reg_writes,
1638 "Writes must be evenly split between the two "
1639 "destination registers");
1640 }
1641 }
1642
1643 /* The IVB and HSW PRMs say:
1644 *
1645 * When an instruction has a source region that spans two registers and
1646 * the destination spans two registers, the destination elements must be
1647 * evenly split between the two registers and each destination register
1648 * must be entirely derived from one source register.
1649 *
1650 * Note: In such cases, the regioning parameters must ensure that the
1651 * offset from the two source registers is the same.
1652 *
1653 * The SNB PRM contains similar wording (but written in a much more
1654 * confusing manner).
1655 *
1656 * There are effectively three rules stated here:
1657 *
1658 * For an instruction with a source and a destination spanning two
1659 * registers,
1660 *
1661 * (1) destination elements must be evenly split between the two
1662 * registers
1663 * (2) all destination elements in a register must be derived
1664 * from one source register
1665 * (3) the offset (i.e. the starting location in each of the two
1666 * registers spanned by a region) must be the same in the two
1667 * registers spanned by a region
1668 *
1669 * It is impossible to violate rule (1) without violating (2) or (3), so we
1670 * do not attempt to validate it.
1671 */
1672 if (devinfo->ver <= 7 && dst_regs == 2) {
1673 for (unsigned i = 0; i < num_sources; i++) {
1674 #define DO_SRC(n) \
1675 if (src ## n ## _regs <= 1) \
1676 continue; \
1677 \
1678 for (unsigned i = 0; i < exec_size; i++) { \
1679 if ((dst_access_mask[i] > 0xFFFFFFFF) != \
1680 (src ## n ## _access_mask[i] > 0xFFFFFFFF)) { \
1681 ERROR("Each destination register must be entirely derived " \
1682 "from one source register"); \
1683 break; \
1684 } \
1685 } \
1686 \
1687 unsigned offset_0 = \
1688 brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
1689 unsigned offset_1 = offset_0; \
1690 \
1691 for (unsigned i = 0; i < exec_size; i++) { \
1692 if (src ## n ## _access_mask[i] > 0xFFFFFFFF) { \
1693 offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32; \
1694 break; \
1695 } \
1696 } \
1697 \
1698 ERROR_IF(num_sources == 2 && offset_0 != offset_1, \
1699 "The offset from the two source registers " \
1700 "must be the same")
1701
1702 if (i == 0) {
1703 DO_SRC(0);
1704 } else {
1705 DO_SRC(1);
1706 }
1707 #undef DO_SRC
1708 }
1709 }
1710
1711 /* The IVB and HSW PRMs say:
1712 *
1713 * When destination spans two registers, the source MUST span two
1714 * registers. The exception to the above rule:
1715 * 1. When source is scalar, the source registers are not
1716 * incremented.
1717 * 2. When source is packed integer Word and destination is packed
1718 * integer DWord, the source register is not incremented by the
1719 * source sub register is incremented.
1720 *
1721 * The SNB PRM does not contain this rule, but the internal documentation
1722 * indicates that it applies to SNB as well. We assume that the rule applies
1723 * to Gen <= 5 although their PRMs do not state it.
1724 *
1725 * While the documentation explicitly says in exception (2) that the
1726 * destination must be an integer DWord, the hardware allows at least a
1727 * float destination type as well. We emit such instructions from
1728 *
1729 * fs_visitor::emit_interpolation_setup_gfx6
1730 * fs_visitor::emit_fragcoord_interpolation
1731 *
1732 * and have for years with no ill effects.
1733 *
1734 * Additionally the simulator source code indicates that the real condition
1735 * is that the size of the destination type is 4 bytes.
1736 */
1737 if (devinfo->ver <= 7 && dst_regs == 2) {
1738 enum brw_reg_type dst_type = inst_dst_type(isa, inst);
1739 bool dst_is_packed_dword =
1740 is_packed(exec_size * stride, exec_size, stride) &&
1741 brw_reg_type_to_size(dst_type) == 4;
1742
1743 for (unsigned i = 0; i < num_sources; i++) {
1744 #define DO_SRC(n) \
1745 unsigned vstride, width, hstride; \
1746 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
1747 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
1748 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
1749 bool src ## n ## _is_packed_word = \
1750 is_packed(vstride, width, hstride) && \
1751 (brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W || \
1752 brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW); \
1753 \
1754 ERROR_IF(src ## n ## _regs == 1 && \
1755 !src ## n ## _has_scalar_region(devinfo, inst) && \
1756 !(dst_is_packed_dword && src ## n ## _is_packed_word), \
1757 "When the destination spans two registers, the source must " \
1758 "span two registers\n" ERROR_INDENT "(exceptions for scalar " \
1759 "source and packed-word to packed-dword expansion)")
1760
1761 if (i == 0) {
1762 DO_SRC(0);
1763 } else {
1764 DO_SRC(1);
1765 }
1766 #undef DO_SRC
1767 }
1768 }
1769
1770 return error_msg;
1771 }
1772
1773 static struct string
vector_immediate_restrictions(const struct brw_isa_info * isa,const brw_inst * inst)1774 vector_immediate_restrictions(const struct brw_isa_info *isa,
1775 const brw_inst *inst)
1776 {
1777 const struct intel_device_info *devinfo = isa->devinfo;
1778
1779 unsigned num_sources = num_sources_from_inst(isa, inst);
1780 struct string error_msg = { .str = NULL, .len = 0 };
1781
1782 if (num_sources == 3 || num_sources == 0)
1783 return (struct string){};
1784
1785 unsigned file = num_sources == 1 ?
1786 brw_inst_src0_reg_file(devinfo, inst) :
1787 brw_inst_src1_reg_file(devinfo, inst);
1788 if (file != BRW_IMMEDIATE_VALUE)
1789 return (struct string){};
1790
1791 enum brw_reg_type dst_type = inst_dst_type(isa, inst);
1792 unsigned dst_type_size = brw_reg_type_to_size(dst_type);
1793 unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ?
1794 brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0;
1795 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1796 enum brw_reg_type type = num_sources == 1 ?
1797 brw_inst_src0_type(devinfo, inst) :
1798 brw_inst_src1_type(devinfo, inst);
1799
1800 /* The PRMs say:
1801 *
1802 * When an immediate vector is used in an instruction, the destination
1803 * must be 128-bit aligned with destination horizontal stride equivalent
1804 * to a word for an immediate integer vector (v) and equivalent to a
1805 * DWord for an immediate float vector (vf).
1806 *
1807 * The text has not been updated for the addition of the immediate unsigned
1808 * integer vector type (uv) on SNB, but presumably the same restriction
1809 * applies.
1810 */
1811 switch (type) {
1812 case BRW_REGISTER_TYPE_V:
1813 case BRW_REGISTER_TYPE_UV:
1814 case BRW_REGISTER_TYPE_VF:
1815 ERROR_IF(dst_subreg % (128 / 8) != 0,
1816 "Destination must be 128-bit aligned in order to use immediate "
1817 "vector types");
1818
1819 if (type == BRW_REGISTER_TYPE_VF) {
1820 ERROR_IF(dst_type_size * dst_stride != 4,
1821 "Destination must have stride equivalent to dword in order "
1822 "to use the VF type");
1823 } else {
1824 ERROR_IF(dst_type_size * dst_stride != 2,
1825 "Destination must have stride equivalent to word in order "
1826 "to use the V or UV type");
1827 }
1828 break;
1829 default:
1830 break;
1831 }
1832
1833 return error_msg;
1834 }
1835
1836 static struct string
special_requirements_for_handling_double_precision_data_types(const struct brw_isa_info * isa,const brw_inst * inst)1837 special_requirements_for_handling_double_precision_data_types(
1838 const struct brw_isa_info *isa,
1839 const brw_inst *inst)
1840 {
1841 const struct intel_device_info *devinfo = isa->devinfo;
1842
1843 unsigned num_sources = num_sources_from_inst(isa, inst);
1844 struct string error_msg = { .str = NULL, .len = 0 };
1845
1846 if (num_sources == 3 || num_sources == 0)
1847 return (struct string){};
1848
1849 /* Split sends don't have types so there's no doubles there. */
1850 if (inst_is_split_send(isa, inst))
1851 return (struct string){};
1852
1853 enum brw_reg_type exec_type = execution_type(isa, inst);
1854 unsigned exec_type_size = brw_reg_type_to_size(exec_type);
1855
1856 enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst);
1857 enum brw_reg_type dst_type = inst_dst_type(isa, inst);
1858 unsigned dst_type_size = brw_reg_type_to_size(dst_type);
1859 unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1860 unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst);
1861 unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1862 unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst);
1863
1864 bool is_integer_dword_multiply =
1865 devinfo->ver >= 8 &&
1866 brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL &&
1867 (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
1868 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) &&
1869 (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
1870 brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD);
1871
1872 const bool is_double_precision =
1873 dst_type_size == 8 || exec_type_size == 8 || is_integer_dword_multiply;
1874
1875 for (unsigned i = 0; i < num_sources; i++) {
1876 unsigned vstride, width, hstride, type_size, reg, subreg, address_mode;
1877 bool is_scalar_region;
1878 enum brw_reg_file file;
1879 enum brw_reg_type type;
1880
1881 #define DO_SRC(n) \
1882 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \
1883 BRW_IMMEDIATE_VALUE) \
1884 continue; \
1885 \
1886 is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst); \
1887 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \
1888 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \
1889 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \
1890 file = brw_inst_src ## n ## _reg_file(devinfo, inst); \
1891 type = brw_inst_src ## n ## _type(devinfo, inst); \
1892 type_size = brw_reg_type_to_size(type); \
1893 reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst); \
1894 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
1895 address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst)
1896
1897 if (i == 0) {
1898 DO_SRC(0);
1899 } else {
1900 DO_SRC(1);
1901 }
1902 #undef DO_SRC
1903
1904 const unsigned src_stride = (hstride ? hstride : vstride) * type_size;
1905 const unsigned dst_stride = dst_hstride * dst_type_size;
1906
1907 /* The PRMs say that for CHV, BXT:
1908 *
1909 * When source or destination datatype is 64b or operation is integer
1910 * DWord multiply, regioning in Align1 must follow these rules:
1911 *
1912 * 1. Source and Destination horizontal stride must be aligned to the
1913 * same qword.
1914 * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
1915 * 3. Source and Destination offset must be the same, except the case
1916 * of scalar source.
1917 *
1918 * We assume that the restriction applies to GLK as well.
1919 */
1920 if (is_double_precision &&
1921 brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
1922 (devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo))) {
1923 ERROR_IF(!is_scalar_region &&
1924 (src_stride % 8 != 0 ||
1925 dst_stride % 8 != 0 ||
1926 src_stride != dst_stride),
1927 "Source and destination horizontal stride must equal and a "
1928 "multiple of a qword when the execution type is 64-bit");
1929
1930 ERROR_IF(vstride != width * hstride,
1931 "Vstride must be Width * Hstride when the execution type is "
1932 "64-bit");
1933
1934 ERROR_IF(!is_scalar_region && dst_subreg != subreg,
1935 "Source and destination offset must be the same when the "
1936 "execution type is 64-bit");
1937 }
1938
1939 /* The PRMs say that for CHV, BXT:
1940 *
1941 * When source or destination datatype is 64b or operation is integer
1942 * DWord multiply, indirect addressing must not be used.
1943 *
1944 * We assume that the restriction applies to GLK as well.
1945 */
1946 if (is_double_precision &&
1947 (devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo))) {
1948 ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode ||
1949 BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode,
1950 "Indirect addressing is not allowed when the execution type "
1951 "is 64-bit");
1952 }
1953
1954 /* The PRMs say that for CHV, BXT:
1955 *
1956 * ARF registers must never be used with 64b datatype or when
1957 * operation is integer DWord multiply.
1958 *
1959 * We assume that the restriction applies to GLK as well.
1960 *
1961 * We assume that the restriction does not apply to the null register.
1962 */
1963 if (is_double_precision &&
1964 (devinfo->platform == INTEL_PLATFORM_CHV ||
1965 intel_device_info_is_9lp(devinfo))) {
1966 ERROR_IF(brw_inst_opcode(isa, inst) == BRW_OPCODE_MAC ||
1967 brw_inst_acc_wr_control(devinfo, inst) ||
1968 (BRW_ARCHITECTURE_REGISTER_FILE == file &&
1969 reg != BRW_ARF_NULL) ||
1970 (BRW_ARCHITECTURE_REGISTER_FILE == dst_file &&
1971 dst_reg != BRW_ARF_NULL),
1972 "Architecture registers cannot be used when the execution "
1973 "type is 64-bit");
1974 }
1975
1976 /* From the hardware spec section "Register Region Restrictions":
1977 *
1978 * There are two rules:
1979 *
1980 * "In case of all floating point data types used in destination:" and
1981 *
1982 * "In case where source or destination datatype is 64b or operation is
1983 * integer DWord multiply:"
1984 *
1985 * both of which list the same restrictions:
1986 *
1987 * "1. Register Regioning patterns where register data bit location
1988 * of the LSB of the channels are changed between source and
1989 * destination are not supported on Src0 and Src1 except for
1990 * broadcast of a scalar.
1991 *
1992 * 2. Explicit ARF registers except null and accumulator must not be
1993 * used."
1994 */
1995 if (devinfo->verx10 >= 125 &&
1996 (brw_reg_type_is_floating_point(dst_type) ||
1997 is_double_precision)) {
1998 ERROR_IF(!is_scalar_region &&
1999 BRW_ADDRESS_REGISTER_INDIRECT_REGISTER != address_mode &&
2000 (!is_linear(vstride, width, hstride) ||
2001 src_stride != dst_stride ||
2002 subreg != dst_subreg),
2003 "Register Regioning patterns where register data bit "
2004 "location of the LSB of the channels are changed between "
2005 "source and destination are not supported except for "
2006 "broadcast of a scalar.");
2007
2008 ERROR_IF((file == BRW_ARCHITECTURE_REGISTER_FILE &&
2009 reg != BRW_ARF_NULL && !(reg >= BRW_ARF_ACCUMULATOR && reg < BRW_ARF_FLAG)) ||
2010 (dst_file == BRW_ARCHITECTURE_REGISTER_FILE &&
2011 dst_reg != BRW_ARF_NULL && dst_reg != BRW_ARF_ACCUMULATOR),
2012 "Explicit ARF registers except null and accumulator must not "
2013 "be used.");
2014 }
2015
2016 /* From the hardware spec section "Register Region Restrictions":
2017 *
2018 * "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float and
2019 * Quad-Word data must not be used."
2020 */
2021 if (devinfo->verx10 >= 125 &&
2022 (brw_reg_type_is_floating_point(type) || type_sz(type) == 8)) {
2023 ERROR_IF(address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER &&
2024 vstride == BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL,
2025 "Vx1 and VxH indirect addressing for Float, Half-Float, "
2026 "Double-Float and Quad-Word data must not be used");
2027 }
2028 }
2029
2030 /* The PRMs say that for BDW, SKL:
2031 *
2032 * If Align16 is required for an operation with QW destination and non-QW
2033 * source datatypes, the execution size cannot exceed 2.
2034 *
2035 * We assume that the restriction applies to all Gfx8+ parts.
2036 */
2037 if (is_double_precision && devinfo->ver >= 8) {
2038 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
2039 enum brw_reg_type src1_type =
2040 num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type;
2041 unsigned src0_type_size = brw_reg_type_to_size(src0_type);
2042 unsigned src1_type_size = brw_reg_type_to_size(src1_type);
2043
2044 ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 &&
2045 dst_type_size == 8 &&
2046 (src0_type_size != 8 || src1_type_size != 8) &&
2047 brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2,
2048 "In Align16 exec size cannot exceed 2 with a QWord destination "
2049 "and a non-QWord source");
2050 }
2051
2052 /* The PRMs say that for CHV, BXT:
2053 *
2054 * When source or destination datatype is 64b or operation is integer
2055 * DWord multiply, DepCtrl must not be used.
2056 *
2057 * We assume that the restriction applies to GLK as well.
2058 */
2059 if (is_double_precision &&
2060 (devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo))) {
2061 ERROR_IF(brw_inst_no_dd_check(devinfo, inst) ||
2062 brw_inst_no_dd_clear(devinfo, inst),
2063 "DepCtrl is not allowed when the execution type is 64-bit");
2064 }
2065
2066 return error_msg;
2067 }
2068
2069 static struct string
instruction_restrictions(const struct brw_isa_info * isa,const brw_inst * inst)2070 instruction_restrictions(const struct brw_isa_info *isa,
2071 const brw_inst *inst)
2072 {
2073 const struct intel_device_info *devinfo = isa->devinfo;
2074 struct string error_msg = { .str = NULL, .len = 0 };
2075
2076 /* From Wa_1604601757:
2077 *
2078 * "When multiplying a DW and any lower precision integer, source modifier
2079 * is not supported."
2080 */
2081 if (devinfo->ver >= 12 &&
2082 brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL) {
2083 enum brw_reg_type exec_type = execution_type(isa, inst);
2084 const bool src0_valid = type_sz(brw_inst_src0_type(devinfo, inst)) == 4 ||
2085 brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE ||
2086 !(brw_inst_src0_negate(devinfo, inst) ||
2087 brw_inst_src0_abs(devinfo, inst));
2088 const bool src1_valid = type_sz(brw_inst_src1_type(devinfo, inst)) == 4 ||
2089 brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE ||
2090 !(brw_inst_src1_negate(devinfo, inst) ||
2091 brw_inst_src1_abs(devinfo, inst));
2092
2093 ERROR_IF(!brw_reg_type_is_floating_point(exec_type) &&
2094 type_sz(exec_type) == 4 && !(src0_valid && src1_valid),
2095 "When multiplying a DW and any lower precision integer, source "
2096 "modifier is not supported.");
2097 }
2098
2099 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_CMP ||
2100 brw_inst_opcode(isa, inst) == BRW_OPCODE_CMPN) {
2101 if (devinfo->ver <= 7) {
2102 /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit
2103 * ISA) says:
2104 *
2105 * Accumulator cannot be destination, implicit or explicit. The
2106 * destination must be a general register or the null register.
2107 *
2108 * Page 77 of the Haswell PRM Volume 2b contains the same text. The
2109 * 965G PRMs contain similar text.
2110 *
2111 * Page 864 (page 880 of the PDF) of the Broadwell PRM Volume 7 says:
2112 *
2113 * For the cmp and cmpn instructions, remove the accumulator
2114 * restrictions.
2115 */
2116 ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
2117 brw_inst_dst_da_reg_nr(devinfo, inst) != BRW_ARF_NULL,
2118 "Accumulator cannot be destination, implicit or explicit.");
2119 }
2120
2121 /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA)
2122 * says:
2123 *
2124 * If the destination is the null register, the {Switch} instruction
2125 * option must be used.
2126 *
2127 * Page 77 of the Haswell PRM Volume 2b contains the same text.
2128 */
2129 if (devinfo->ver == 7) {
2130 ERROR_IF(dst_is_null(devinfo, inst) &&
2131 brw_inst_thread_control(devinfo, inst) != BRW_THREAD_SWITCH,
2132 "If the destination is the null register, the {Switch} "
2133 "instruction option must be used.");
2134 }
2135 }
2136
2137 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) {
2138 unsigned math_function = brw_inst_math_function(devinfo, inst);
2139 switch (math_function) {
2140 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
2141 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
2142 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: {
2143 /* Page 442 of the Broadwell PRM Volume 2a "Extended Math Function" says:
2144 * INT DIV function does not support source modifiers.
2145 * Bspec 6647 extends it back to Ivy Bridge.
2146 */
2147 bool src0_valid = !brw_inst_src0_negate(devinfo, inst) &&
2148 !brw_inst_src0_abs(devinfo, inst);
2149 bool src1_valid = !brw_inst_src1_negate(devinfo, inst) &&
2150 !brw_inst_src1_abs(devinfo, inst);
2151 ERROR_IF(!src0_valid || !src1_valid,
2152 "INT DIV function does not support source modifiers.");
2153 break;
2154 }
2155 default:
2156 break;
2157 }
2158 }
2159
2160 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_DP4A) {
2161 /* Page 396 (page 412 of the PDF) of the DG1 PRM volume 2a says:
2162 *
2163 * Only one of src0 or src1 operand may be an the (sic) accumulator
2164 * register (acc#).
2165 */
2166 ERROR_IF(src0_is_acc(devinfo, inst) && src1_is_acc(devinfo, inst),
2167 "Only one of src0 or src1 operand may be an accumulator "
2168 "register (acc#).");
2169
2170 }
2171
2172 return error_msg;
2173 }
2174
2175 static struct string
send_descriptor_restrictions(const struct brw_isa_info * isa,const brw_inst * inst)2176 send_descriptor_restrictions(const struct brw_isa_info *isa,
2177 const brw_inst *inst)
2178 {
2179 const struct intel_device_info *devinfo = isa->devinfo;
2180 struct string error_msg = { .str = NULL, .len = 0 };
2181
2182 if (inst_is_split_send(isa, inst)) {
2183 /* We can only validate immediate descriptors */
2184 if (brw_inst_send_sel_reg32_desc(devinfo, inst))
2185 return error_msg;
2186 } else if (inst_is_send(isa, inst)) {
2187 /* We can only validate immediate descriptors */
2188 if (brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE)
2189 return error_msg;
2190 } else {
2191 return error_msg;
2192 }
2193
2194 const uint32_t desc = brw_inst_send_desc(devinfo, inst);
2195
2196 switch (brw_inst_sfid(devinfo, inst)) {
2197 case GFX12_SFID_TGM:
2198 case GFX12_SFID_SLM:
2199 case GFX12_SFID_UGM:
2200 ERROR_IF(!devinfo->has_lsc, "Platform does not support LSC");
2201
2202 ERROR_IF(lsc_opcode_has_transpose(lsc_msg_desc_opcode(devinfo, desc)) &&
2203 lsc_msg_desc_transpose(devinfo, desc) &&
2204 brw_inst_exec_size(devinfo, inst) != BRW_EXECUTE_1,
2205 "Transposed vectors are restricted to Exec_Mask = 1.");
2206 break;
2207
2208 default:
2209 break;
2210 }
2211
2212 if (brw_inst_sfid(devinfo, inst) == BRW_SFID_URB) {
2213 /* Gfx4 doesn't have a "header present" bit in the SEND message. */
2214 ERROR_IF(devinfo->ver > 4 && !brw_inst_header_present(devinfo, inst),
2215 "Header must be present for all URB messages.");
2216
2217 switch (brw_inst_urb_opcode(devinfo, inst)) {
2218 case BRW_URB_OPCODE_WRITE_HWORD:
2219 break;
2220
2221 /* case FF_SYNC: */
2222 case BRW_URB_OPCODE_WRITE_OWORD:
2223 /* Gfx5 / Gfx6 FF_SYNC message and Gfx7+ URB_WRITE_OWORD have the
2224 * same opcode value.
2225 */
2226 if (devinfo->ver == 5 || devinfo->ver == 6) {
2227 ERROR_IF(brw_inst_urb_global_offset(devinfo, inst) != 0,
2228 "FF_SYNC global offset must be zero.");
2229 ERROR_IF(brw_inst_urb_swizzle_control(devinfo, inst) != 0,
2230 "FF_SYNC swizzle control must be zero.");
2231 ERROR_IF(brw_inst_urb_used(devinfo, inst) != 0,
2232 "FF_SYNC used must be zero.");
2233 ERROR_IF(brw_inst_urb_complete(devinfo, inst) != 0,
2234 "FF_SYNC complete must be zero.");
2235
2236 /* Volume 4 part 2 of the Sandybridge PRM (page 28) says:
2237 *
2238 * A message response (writeback) length of 1 GRF will be
2239 * indicated on the ‘send’ instruction if the thread requires
2240 * response data and/or synchronization.
2241 */
2242 ERROR_IF((unsigned)brw_inst_rlen(devinfo, inst) > 1,
2243 "FF_SYNC read length must be 0 or 1.");
2244 } else {
2245 ERROR_IF(devinfo->ver < 7,
2246 "URB OWORD write messages only valid on gfx >= 7");
2247 }
2248 break;
2249
2250 case BRW_URB_OPCODE_READ_HWORD:
2251 case BRW_URB_OPCODE_READ_OWORD:
2252 ERROR_IF(devinfo->ver < 7,
2253 "URB read messages only valid on gfx >= 7");
2254 break;
2255
2256 case GFX7_URB_OPCODE_ATOMIC_MOV:
2257 case GFX7_URB_OPCODE_ATOMIC_INC:
2258 ERROR_IF(devinfo->ver < 7,
2259 "URB atomic move and increment messages only valid on gfx >= 7");
2260 break;
2261
2262 case GFX8_URB_OPCODE_ATOMIC_ADD:
2263 /* The Haswell PRM lists this opcode as valid on page 317. */
2264 ERROR_IF(devinfo->verx10 < 75,
2265 "URB atomic add message only valid on gfx >= 7.5");
2266 break;
2267
2268 case GFX8_URB_OPCODE_SIMD8_READ:
2269 ERROR_IF(brw_inst_rlen(devinfo, inst) == 0,
2270 "URB SIMD8 read message must read some data.");
2271 FALLTHROUGH;
2272
2273 case GFX8_URB_OPCODE_SIMD8_WRITE:
2274 ERROR_IF(devinfo->ver < 8,
2275 "URB SIMD8 messages only valid on gfx >= 8");
2276 break;
2277
2278 case GFX125_URB_OPCODE_FENCE:
2279 ERROR_IF(devinfo->verx10 < 125,
2280 "URB fence message only valid on gfx >= 12.5");
2281 break;
2282
2283 default:
2284 ERROR_IF(true, "Invalid URB message");
2285 break;
2286 }
2287 }
2288
2289 return error_msg;
2290 }
2291
2292 bool
brw_validate_instruction(const struct brw_isa_info * isa,const brw_inst * inst,int offset,unsigned inst_size,struct disasm_info * disasm)2293 brw_validate_instruction(const struct brw_isa_info *isa,
2294 const brw_inst *inst, int offset,
2295 unsigned inst_size,
2296 struct disasm_info *disasm)
2297 {
2298 struct string error_msg = { .str = NULL, .len = 0 };
2299
2300 if (is_unsupported_inst(isa, inst)) {
2301 ERROR("Instruction not supported on this Gen");
2302 } else {
2303 CHECK(invalid_values);
2304
2305 if (error_msg.str == NULL) {
2306 CHECK(sources_not_null);
2307 CHECK(send_restrictions);
2308 CHECK(alignment_supported);
2309 CHECK(general_restrictions_based_on_operand_types);
2310 CHECK(general_restrictions_on_region_parameters);
2311 CHECK(special_restrictions_for_mixed_float_mode);
2312 CHECK(region_alignment_rules);
2313 CHECK(vector_immediate_restrictions);
2314 CHECK(special_requirements_for_handling_double_precision_data_types);
2315 CHECK(instruction_restrictions);
2316 CHECK(send_descriptor_restrictions);
2317 }
2318 }
2319
2320 if (error_msg.str && disasm) {
2321 disasm_insert_error(disasm, offset, inst_size, error_msg.str);
2322 }
2323 free(error_msg.str);
2324
2325 return error_msg.len == 0;
2326 }
2327
2328 bool
brw_validate_instructions(const struct brw_isa_info * isa,const void * assembly,int start_offset,int end_offset,struct disasm_info * disasm)2329 brw_validate_instructions(const struct brw_isa_info *isa,
2330 const void *assembly, int start_offset, int end_offset,
2331 struct disasm_info *disasm)
2332 {
2333 const struct intel_device_info *devinfo = isa->devinfo;
2334 bool valid = true;
2335
2336 for (int src_offset = start_offset; src_offset < end_offset;) {
2337 const brw_inst *inst = assembly + src_offset;
2338 bool is_compact = brw_inst_cmpt_control(devinfo, inst);
2339 unsigned inst_size = is_compact ? sizeof(brw_compact_inst)
2340 : sizeof(brw_inst);
2341 brw_inst uncompacted;
2342
2343 if (is_compact) {
2344 brw_compact_inst *compacted = (void *)inst;
2345 brw_uncompact_instruction(isa, &uncompacted, compacted);
2346 inst = &uncompacted;
2347 }
2348
2349 bool v = brw_validate_instruction(isa, inst, src_offset,
2350 inst_size, disasm);
2351 valid = valid && v;
2352
2353 src_offset += inst_size;
2354 }
2355
2356 return valid;
2357 }
2358