1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file
25 *
26 * Implements a pass that validates various invariants of the IR. The current
27 * pass only validates that GRF's uses are sane. More can be added later.
28 */
29
30 #include "brw_fs.h"
31 #include "brw_cfg.h"
32 #include "brw_eu.h"
33
34 #define fsv_assert(assertion) \
35 { \
36 if (!(assertion)) { \
37 fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \
38 _mesa_shader_stage_to_abbrev(s.stage)); \
39 brw_print_instruction(s, inst, stderr); \
40 fprintf(stderr, "%s:%d: '%s' failed\n", __FILE__, __LINE__, #assertion); \
41 abort(); \
42 } \
43 }
44
45 #define fsv_assert_eq(A, B) \
46 { \
47 unsigned a = (A); \
48 unsigned b = (B); \
49 if (a != b) { \
50 fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \
51 _mesa_shader_stage_to_abbrev(s.stage)); \
52 brw_print_instruction(s, inst, stderr); \
53 fprintf(stderr, "%s:%d: A == B failed\n", __FILE__, __LINE__); \
54 fprintf(stderr, " A = %s = %u\n", #A, a); \
55 fprintf(stderr, " B = %s = %u\n", #B, b); \
56 abort(); \
57 } \
58 }
59
60 #define fsv_assert_ne(A, B) \
61 { \
62 unsigned a = (A); \
63 unsigned b = (B); \
64 if (a == b) { \
65 fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \
66 _mesa_shader_stage_to_abbrev(s.stage)); \
67 brw_print_instruction(s, inst, stderr); \
68 fprintf(stderr, "%s:%d: A != B failed\n", __FILE__, __LINE__); \
69 fprintf(stderr, " A = %s = %u\n", #A, a); \
70 fprintf(stderr, " B = %s = %u\n", #B, b); \
71 abort(); \
72 } \
73 }
74
75 #define fsv_assert_lte(A, B) \
76 { \
77 unsigned a = (A); \
78 unsigned b = (B); \
79 if (a > b) { \
80 fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \
81 _mesa_shader_stage_to_abbrev(s.stage)); \
82 brw_print_instruction(s, inst, stderr); \
83 fprintf(stderr, "%s:%d: A <= B failed\n", __FILE__, __LINE__); \
84 fprintf(stderr, " A = %s = %u\n", #A, a); \
85 fprintf(stderr, " B = %s = %u\n", #B, b); \
86 abort(); \
87 } \
88 }
89
90 #ifndef NDEBUG
91 static inline bool
is_ud_imm(const brw_reg & reg)92 is_ud_imm(const brw_reg ®)
93 {
94 return reg.file == IMM && reg.type == BRW_TYPE_UD;
95 }
96
97 static void
validate_memory_logical(const fs_visitor & s,const fs_inst * inst)98 validate_memory_logical(const fs_visitor &s, const fs_inst *inst)
99 {
100 const intel_device_info *devinfo = s.devinfo;
101
102 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_OPCODE]));
103 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_MODE]));
104 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_BINDING_TYPE]));
105 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS]));
106 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_ALIGNMENT]));
107 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_DATA_SIZE]));
108 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_COMPONENTS]));
109 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_FLAGS]));
110
111 enum lsc_opcode op = (enum lsc_opcode) inst->src[MEMORY_LOGICAL_OPCODE].ud;
112 enum memory_flags flags = (memory_flags)inst->src[MEMORY_LOGICAL_FLAGS].ud;
113 bool transpose = flags & MEMORY_FLAG_TRANSPOSE;
114 bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS;
115 enum memory_logical_mode mode =
116 (memory_logical_mode)inst->src[MEMORY_LOGICAL_MODE].ud;
117
118 enum lsc_data_size data_size =
119 (enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud;
120 unsigned data_size_B = lsc_data_size_bytes(data_size);
121
122 if (!devinfo->has_lsc) {
123 fsv_assert(data_size == LSC_DATA_SIZE_D8U32 ||
124 data_size == LSC_DATA_SIZE_D16U32 ||
125 data_size == LSC_DATA_SIZE_D32 ||
126 data_size == LSC_DATA_SIZE_D64);
127
128 if (transpose) {
129 const unsigned min_alignment =
130 mode == MEMORY_MODE_SHARED_LOCAL ? 16 : 4;
131 fsv_assert(inst->src[MEMORY_LOGICAL_ALIGNMENT].ud >= min_alignment);
132 }
133 }
134
135 fsv_assert(!transpose || !include_helpers);
136 fsv_assert(!transpose || lsc_opcode_has_transpose(op));
137
138 if (inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud == LSC_ADDR_SURFTYPE_FLAT)
139 fsv_assert(inst->src[MEMORY_LOGICAL_BINDING].file == BAD_FILE);
140
141 if (inst->src[MEMORY_LOGICAL_DATA1].file != BAD_FILE) {
142 fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud ==
143 inst->components_read(MEMORY_LOGICAL_DATA1));
144
145 fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].type ==
146 inst->src[MEMORY_LOGICAL_DATA1].type);
147 }
148
149 if (inst->src[MEMORY_LOGICAL_DATA0].file != BAD_FILE) {
150 fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud ==
151 inst->components_read(MEMORY_LOGICAL_DATA0));
152
153 fsv_assert(brw_type_size_bytes(inst->src[MEMORY_LOGICAL_DATA0].type) ==
154 data_size_B);
155 }
156
157 if (inst->dst.file != BAD_FILE)
158 fsv_assert(brw_type_size_bytes(inst->dst.type) == data_size_B);
159
160 switch (inst->opcode) {
161 case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
162 fsv_assert(op == LSC_OP_LOAD || op == LSC_OP_LOAD_CMASK);
163 fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].file == BAD_FILE);
164 fsv_assert(inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE);
165 break;
166 case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
167 fsv_assert(lsc_opcode_is_store(op));
168 fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].file != BAD_FILE);
169 fsv_assert(inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE);
170 break;
171 case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
172 fsv_assert(lsc_opcode_is_atomic(op));
173 fsv_assert((inst->src[MEMORY_LOGICAL_DATA0].file == BAD_FILE)
174 == (lsc_op_num_data_values(op) < 1));
175 fsv_assert((inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE)
176 == (lsc_op_num_data_values(op) < 2));
177 fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud == 1);
178 fsv_assert(!include_helpers);
179 break;
180 default:
181 unreachable("invalid opcode");
182 }
183 }
184
185 static const char *
brw_shader_phase_to_string(enum brw_shader_phase phase)186 brw_shader_phase_to_string(enum brw_shader_phase phase)
187 {
188 switch (phase) {
189 case BRW_SHADER_PHASE_INITIAL: return "INITIAL";
190 case BRW_SHADER_PHASE_AFTER_NIR: return "AFTER_NIR";
191 case BRW_SHADER_PHASE_AFTER_OPT_LOOP: return "AFTER_OPT_LOOP";
192 case BRW_SHADER_PHASE_AFTER_EARLY_LOWERING: return "AFTER_EARLY_LOWERING";
193 case BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING: return "AFTER_MIDDLE_LOWERING";
194 case BRW_SHADER_PHASE_AFTER_LATE_LOWERING: return "AFTER_LATE_LOWERING";
195 case BRW_SHADER_PHASE_AFTER_REGALLOC: return "AFTER_REGALLOC";
196 case BRW_SHADER_PHASE_INVALID: break;
197 }
198 unreachable("invalid_phase");
199 return NULL;
200 }
201
202 static void
brw_validate_instruction_phase(const fs_visitor & s,fs_inst * inst)203 brw_validate_instruction_phase(const fs_visitor &s, fs_inst *inst)
204 {
205 enum brw_shader_phase invalid_from = BRW_SHADER_PHASE_INVALID;
206
207 switch (inst->opcode) {
208 case FS_OPCODE_FB_WRITE_LOGICAL:
209 case FS_OPCODE_FB_READ_LOGICAL:
210 case SHADER_OPCODE_TEX_LOGICAL:
211 case SHADER_OPCODE_TXD_LOGICAL:
212 case SHADER_OPCODE_TXF_LOGICAL:
213 case SHADER_OPCODE_TXL_LOGICAL:
214 case SHADER_OPCODE_TXS_LOGICAL:
215 case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
216 case FS_OPCODE_TXB_LOGICAL:
217 case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
218 case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
219 case SHADER_OPCODE_TXF_MCS_LOGICAL:
220 case SHADER_OPCODE_LOD_LOGICAL:
221 case SHADER_OPCODE_TG4_LOGICAL:
222 case SHADER_OPCODE_TG4_BIAS_LOGICAL:
223 case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
224 case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
225 case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
226 case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
227 case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
228 case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
229 case SHADER_OPCODE_GET_BUFFER_SIZE:
230 case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
231 case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
232 case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
233 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
234 case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
235 case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
236 case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
237 case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
238 case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
239 case RT_OPCODE_TRACE_RAY_LOGICAL:
240 case SHADER_OPCODE_URB_READ_LOGICAL:
241 case SHADER_OPCODE_URB_WRITE_LOGICAL:
242 case SHADER_OPCODE_REDUCE:
243 case SHADER_OPCODE_INCLUSIVE_SCAN:
244 case SHADER_OPCODE_EXCLUSIVE_SCAN:
245 case SHADER_OPCODE_VOTE_ANY:
246 case SHADER_OPCODE_VOTE_ALL:
247 case SHADER_OPCODE_VOTE_EQUAL:
248 case SHADER_OPCODE_BALLOT:
249 case SHADER_OPCODE_QUAD_SWAP:
250 case SHADER_OPCODE_READ_FROM_LIVE_CHANNEL:
251 case SHADER_OPCODE_READ_FROM_CHANNEL:
252 invalid_from = BRW_SHADER_PHASE_AFTER_EARLY_LOWERING;
253 break;
254
255 case SHADER_OPCODE_LOAD_PAYLOAD:
256 invalid_from = BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING;
257 break;
258
259 default:
260 /* Nothing to do. */
261 break;
262 }
263
264 assert(s.phase < BRW_SHADER_PHASE_INVALID);
265 if (s.phase >= invalid_from) {
266 fprintf(stderr, "INVALID INSTRUCTION IN PHASE: %s\n",
267 brw_shader_phase_to_string(s.phase));
268 brw_print_instruction(s, inst, stderr);
269 abort();
270 }
271 }
272
273 void
brw_validate(const fs_visitor & s)274 brw_validate(const fs_visitor &s)
275 {
276 const intel_device_info *devinfo = s.devinfo;
277
278 if (s.phase <= BRW_SHADER_PHASE_AFTER_NIR)
279 return;
280
281 s.cfg->validate(_mesa_shader_stage_to_abbrev(s.stage));
282
283 foreach_block(block, s.cfg) {
284 /* Track the last used address register. Usage of the address register
285 * in the IR should be limited to within a block, otherwise we would
286 * unable to schedule some instructions without spilling the address
287 * register to a VGRF.
288 *
289 * Another pattern we stick to when using the address register in the IR
290 * is that we write and read the register in pairs of instruction.
291 */
292 uint32_t last_used_address_register[16] = {};
293
294 foreach_inst_in_block (fs_inst, inst, block) {
295 brw_validate_instruction_phase(s, inst);
296
297 switch (inst->opcode) {
298 case SHADER_OPCODE_SEND:
299 fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1]));
300 break;
301
302 case SHADER_OPCODE_SEND_GATHER:
303 fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1]));
304 fsv_assert(devinfo->ver >= 30);
305 break;
306
307 case BRW_OPCODE_MOV:
308 fsv_assert(inst->sources == 1);
309 break;
310
311 case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
312 case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
313 case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
314 validate_memory_logical(s, inst);
315 break;
316
317 default:
318 break;
319 }
320
321 /* On Xe2, the "write the accumulator in addition to the explicit
322 * destination" bit no longer exists. Try to catch uses of this
323 * feature earlier in the process.
324 */
325 if (devinfo->ver >= 20 && inst->writes_accumulator) {
326 fsv_assert(inst->dst.is_accumulator() ||
327 inst->opcode == BRW_OPCODE_ADDC ||
328 inst->opcode == BRW_OPCODE_MACH ||
329 inst->opcode == BRW_OPCODE_SUBB);
330 }
331
332 if (inst->is_3src(s.compiler)) {
333 const unsigned integer_sources =
334 brw_type_is_int(inst->src[0].type) +
335 brw_type_is_int(inst->src[1].type) +
336 brw_type_is_int(inst->src[2].type);
337 const unsigned float_sources =
338 brw_type_is_float(inst->src[0].type) +
339 brw_type_is_float(inst->src[1].type) +
340 brw_type_is_float(inst->src[2].type);
341
342 fsv_assert((integer_sources == 3 && float_sources == 0) ||
343 (integer_sources == 0 && float_sources == 3));
344
345 if (devinfo->ver >= 10) {
346 for (unsigned i = 0; i < 3; i++) {
347 if (inst->src[i].file == IMM)
348 continue;
349
350 switch (inst->src[i].vstride) {
351 case BRW_VERTICAL_STRIDE_0:
352 case BRW_VERTICAL_STRIDE_4:
353 case BRW_VERTICAL_STRIDE_8:
354 case BRW_VERTICAL_STRIDE_16:
355 break;
356
357 case BRW_VERTICAL_STRIDE_1:
358 fsv_assert_lte(12, devinfo->ver);
359 break;
360
361 case BRW_VERTICAL_STRIDE_2:
362 fsv_assert_lte(devinfo->ver, 11);
363 break;
364
365 default:
366 fsv_assert(!"invalid vstride");
367 break;
368 }
369 }
370 } else if (s.grf_used != 0) {
371 /* Only perform the pre-Gfx10 checks after register allocation
372 * has occured.
373 *
374 * Many passes (e.g., constant copy propagation) will
375 * genenerate invalid 3-source instructions with the
376 * expectation that later passes (e.g., combine constants) will
377 * fix them.
378 */
379 for (unsigned i = 0; i < 3; i++) {
380 fsv_assert_ne(inst->src[i].file, IMM);
381
382 /* A stride of 1 (the usual case) or 0, with a special
383 * "repctrl" bit, is allowed. The repctrl bit doesn't work
384 * for 64-bit datatypes, so if the source type is 64-bit
385 * then only a stride of 1 is allowed. From the Broadwell
386 * PRM, Volume 7 "3D Media GPGPU", page 944:
387 *
388 * This is applicable to 32b datatypes and 16b datatype.
389 * 64b datatypes cannot use the replicate control.
390 */
391 const unsigned stride_in_bytes = byte_stride(inst->src[i]);
392 const unsigned size_in_bytes = brw_type_size_bytes(inst->src[i].type);
393 if (stride_in_bytes == 0) {
394 /* If the source is_scalar, then the stride will be
395 * converted to <4;4,1> in brw_lower_scalar_fp64_MAD
396 * after SIMD splitting.
397 */
398 if (!inst->src[i].is_scalar)
399 fsv_assert_lte(size_in_bytes, 4);
400 } else {
401 fsv_assert_eq(stride_in_bytes, size_in_bytes);
402 }
403 }
404 }
405 }
406
407 if (inst->dst.file == VGRF) {
408 fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst),
409 s.alloc.sizes[inst->dst.nr]);
410 if (inst->exec_size > 1)
411 fsv_assert_ne(inst->dst.stride, 0);
412 } else if (inst->dst.is_address()) {
413 fsv_assert(inst->dst.nr != 0);
414 }
415
416 bool read_address_reg = false;
417 for (unsigned i = 0; i < inst->sources; i++) {
418 if (inst->src[i].file == VGRF) {
419 fsv_assert_lte(inst->src[i].offset / REG_SIZE + regs_read(devinfo, inst, i),
420 s.alloc.sizes[inst->src[i].nr]);
421 } else if (inst->src[i].is_address()) {
422 fsv_assert(inst->src[i].nr != 0);
423 for (unsigned hw = 0; hw < inst->size_read(devinfo, i); hw += 2) {
424 fsv_assert_eq(inst->src[i].nr,
425 last_used_address_register[inst->src[i].address_slot(hw)]);
426 }
427 read_address_reg = true;
428 }
429 }
430
431 /* Accumulator Registers, bspec 47251:
432 *
433 * "When destination is accumulator with offset 0, destination
434 * horizontal stride must be 1."
435 */
436 if (intel_needs_workaround(devinfo, 14014617373) &&
437 inst->dst.is_accumulator() &&
438 phys_subnr(devinfo, inst->dst) == 0) {
439 fsv_assert_eq(inst->dst.hstride, 1);
440 }
441
442 if (inst->is_math() && intel_needs_workaround(devinfo, 22016140776)) {
443 /* Wa_22016140776:
444 *
445 * Scalar broadcast on HF math (packed or unpacked) must not be
446 * used. Compiler must use a mov instruction to expand the
447 * scalar value to a vector before using in a HF (packed or
448 * unpacked) math operation.
449 *
450 * Since copy propagation knows about this restriction, nothing
451 * should be able to generate these invalid source strides. Detect
452 * potential problems sooner rather than later.
453 */
454 if (devinfo->ver >= 20 && inst->writes_accumulator) {
455 fsv_assert(inst->dst.is_accumulator() ||
456 inst->opcode == BRW_OPCODE_ADDC ||
457 inst->opcode == BRW_OPCODE_MACH ||
458 inst->opcode == BRW_OPCODE_SUBB);
459 }
460
461 if (inst->is_3src(s.compiler)) {
462 const unsigned integer_sources =
463 brw_type_is_int(inst->src[0].type) +
464 brw_type_is_int(inst->src[1].type) +
465 brw_type_is_int(inst->src[2].type);
466 const unsigned float_sources =
467 brw_type_is_float(inst->src[0].type) +
468 brw_type_is_float(inst->src[1].type) +
469 brw_type_is_float(inst->src[2].type);
470
471 fsv_assert((integer_sources == 3 && float_sources == 0) ||
472 (integer_sources == 0 && float_sources == 3));
473
474 if (devinfo->ver >= 10) {
475 for (unsigned i = 0; i < 3; i++) {
476 if (inst->src[i].file == IMM)
477 continue;
478
479 switch (inst->src[i].vstride) {
480 case BRW_VERTICAL_STRIDE_0:
481 case BRW_VERTICAL_STRIDE_4:
482 case BRW_VERTICAL_STRIDE_8:
483 case BRW_VERTICAL_STRIDE_16:
484 break;
485
486 case BRW_VERTICAL_STRIDE_1:
487 fsv_assert_lte(12, devinfo->ver);
488 break;
489
490 case BRW_VERTICAL_STRIDE_2:
491 fsv_assert_lte(devinfo->ver, 11);
492 break;
493
494 default:
495 fsv_assert(!"invalid vstride");
496 break;
497 }
498 }
499 } else if (s.grf_used != 0) {
500 /* Only perform the pre-Gfx10 checks after register
501 * allocation has occured.
502 *
503 * Many passes (e.g., constant copy propagation) will
504 * genenerate invalid 3-source instructions with the
505 * expectation that later passes (e.g., combine constants)
506 * will fix them.
507 */
508 for (unsigned i = 0; i < 3; i++) {
509 fsv_assert_ne(inst->src[i].file, IMM);
510
511 /* A stride of 1 (the usual case) or 0, with a special
512 * "repctrl" bit, is allowed. The repctrl bit doesn't
513 * work for 64-bit datatypes, so if the source type is
514 * 64-bit then only a stride of 1 is allowed. From the
515 * Broadwell PRM, Volume 7 "3D Media GPGPU", page 944:
516 *
517 * This is applicable to 32b datatypes and 16b
518 * datatype. 64b datatypes cannot use the replicate
519 * control.
520 */
521 const unsigned stride_in_bytes = byte_stride(inst->src[i]);
522 const unsigned size_in_bytes = brw_type_size_bytes(inst->src[i].type);
523 if (stride_in_bytes == 0) {
524 fsv_assert_lte(size_in_bytes, 4);
525 } else {
526 fsv_assert_eq(stride_in_bytes, size_in_bytes);
527 }
528 }
529 }
530 }
531
532 if (inst->dst.file == VGRF) {
533 fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst),
534 s.alloc.sizes[inst->dst.nr]);
535 }
536
537 for (unsigned i = 0; i < inst->sources; i++) {
538 fsv_assert(inst->src[i].is_scalar ||
539 !is_uniform(inst->src[i]) ||
540 inst->src[i].type != BRW_TYPE_HF);
541 }
542 }
543
544 /* Update the last used address register. */
545 if (read_address_reg) {
546 /* When an instruction only reads the address register, we assume
547 * the read parts are never going to be used again.
548 */
549 for (unsigned i = 0; i < inst->sources; i++) {
550 if (!inst->src[i].is_address())
551 continue;
552 for (unsigned hw = 0; hw < inst->size_read(devinfo, i); hw += 2)
553 last_used_address_register[inst->src[i].address_slot(hw)] = 0;
554 }
555 }
556 if (inst->dst.is_address()) {
557 /* For the written part of the address register */
558 for (unsigned hw = 0; hw < inst->size_written; hw += 2)
559 last_used_address_register[inst->dst.address_slot(hw)] = inst->dst.nr;
560 } else if (inst->uses_address_register_implicitly()) {
561 /* If the instruction is making use of the address register,
562 * discard the entire thing.
563 */
564 memset(last_used_address_register, 0,
565 sizeof(last_used_address_register));
566 }
567 }
568 }
569 }
570 #endif
571