• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file
25  *
26  * Implements a pass that validates various invariants of the IR.  The current
27  * pass only validates that GRF's uses are sane.  More can be added later.
28  */
29 
30 #include "brw_fs.h"
31 #include "brw_cfg.h"
32 #include "brw_eu.h"
33 
34 #define fsv_assert(assertion)                                           \
35    {                                                                    \
36       if (!(assertion)) {                                               \
37          fprintf(stderr, "ASSERT: Scalar %s validation failed!\n",      \
38                  _mesa_shader_stage_to_abbrev(s.stage));                \
39          brw_print_instruction(s, inst, stderr);                        \
40          fprintf(stderr, "%s:%d: '%s' failed\n", __FILE__, __LINE__, #assertion);  \
41          abort();                                                       \
42       }                                                                 \
43    }
44 
45 #define fsv_assert_eq(A, B)                                             \
46    {                                                                    \
47       unsigned a = (A);                                                 \
48       unsigned b = (B);                                                 \
49       if (a != b) {                                                     \
50          fprintf(stderr, "ASSERT: Scalar %s validation failed!\n",      \
51                  _mesa_shader_stage_to_abbrev(s.stage));                \
52          brw_print_instruction(s, inst, stderr);                        \
53          fprintf(stderr, "%s:%d: A == B failed\n", __FILE__, __LINE__); \
54          fprintf(stderr, "  A = %s = %u\n", #A, a);                     \
55          fprintf(stderr, "  B = %s = %u\n", #B, b);                     \
56          abort();                                                       \
57       }                                                                 \
58    }
59 
60 #define fsv_assert_ne(A, B)                                             \
61    {                                                                    \
62       unsigned a = (A);                                                 \
63       unsigned b = (B);                                                 \
64       if (a == b) {                                                     \
65          fprintf(stderr, "ASSERT: Scalar %s validation failed!\n",      \
66                  _mesa_shader_stage_to_abbrev(s.stage));                \
67          brw_print_instruction(s, inst, stderr);                        \
68          fprintf(stderr, "%s:%d: A != B failed\n", __FILE__, __LINE__); \
69          fprintf(stderr, "  A = %s = %u\n", #A, a);                     \
70          fprintf(stderr, "  B = %s = %u\n", #B, b);                     \
71          abort();                                                       \
72       }                                                                 \
73    }
74 
75 #define fsv_assert_lte(A, B)                                            \
76    {                                                                    \
77       unsigned a = (A);                                                 \
78       unsigned b = (B);                                                 \
79       if (a > b) {                                                      \
80          fprintf(stderr, "ASSERT: Scalar %s validation failed!\n",      \
81                  _mesa_shader_stage_to_abbrev(s.stage));                \
82          brw_print_instruction(s, inst, stderr);                        \
83          fprintf(stderr, "%s:%d: A <= B failed\n", __FILE__, __LINE__); \
84          fprintf(stderr, "  A = %s = %u\n", #A, a);                     \
85          fprintf(stderr, "  B = %s = %u\n", #B, b);                     \
86          abort();                                                       \
87       }                                                                 \
88    }
89 
90 #ifndef NDEBUG
91 static inline bool
is_ud_imm(const brw_reg & reg)92 is_ud_imm(const brw_reg &reg)
93 {
94    return reg.file == IMM && reg.type == BRW_TYPE_UD;
95 }
96 
97 static void
validate_memory_logical(const fs_visitor & s,const fs_inst * inst)98 validate_memory_logical(const fs_visitor &s, const fs_inst *inst)
99 {
100    const intel_device_info *devinfo = s.devinfo;
101 
102    fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_OPCODE]));
103    fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_MODE]));
104    fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_BINDING_TYPE]));
105    fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS]));
106    fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_ALIGNMENT]));
107    fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_DATA_SIZE]));
108    fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_COMPONENTS]));
109    fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_FLAGS]));
110 
111    enum lsc_opcode op = (enum lsc_opcode) inst->src[MEMORY_LOGICAL_OPCODE].ud;
112    enum memory_flags flags = (memory_flags)inst->src[MEMORY_LOGICAL_FLAGS].ud;
113    bool transpose = flags & MEMORY_FLAG_TRANSPOSE;
114    bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS;
115    enum memory_logical_mode mode =
116       (memory_logical_mode)inst->src[MEMORY_LOGICAL_MODE].ud;
117 
118    enum lsc_data_size data_size =
119       (enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud;
120    unsigned data_size_B = lsc_data_size_bytes(data_size);
121 
122    if (!devinfo->has_lsc) {
123       fsv_assert(data_size == LSC_DATA_SIZE_D8U32 ||
124                  data_size == LSC_DATA_SIZE_D16U32 ||
125                  data_size == LSC_DATA_SIZE_D32 ||
126                  data_size == LSC_DATA_SIZE_D64);
127 
128       if (transpose) {
129          const unsigned min_alignment =
130             mode == MEMORY_MODE_SHARED_LOCAL ? 16 : 4;
131          fsv_assert(inst->src[MEMORY_LOGICAL_ALIGNMENT].ud >= min_alignment);
132       }
133    }
134 
135    fsv_assert(!transpose || !include_helpers);
136    fsv_assert(!transpose || lsc_opcode_has_transpose(op));
137 
138    if (inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud == LSC_ADDR_SURFTYPE_FLAT)
139       fsv_assert(inst->src[MEMORY_LOGICAL_BINDING].file == BAD_FILE);
140 
141    if (inst->src[MEMORY_LOGICAL_DATA1].file != BAD_FILE) {
142       fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud ==
143                  inst->components_read(MEMORY_LOGICAL_DATA1));
144 
145       fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].type ==
146                  inst->src[MEMORY_LOGICAL_DATA1].type);
147    }
148 
149    if (inst->src[MEMORY_LOGICAL_DATA0].file != BAD_FILE) {
150       fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud ==
151                  inst->components_read(MEMORY_LOGICAL_DATA0));
152 
153       fsv_assert(brw_type_size_bytes(inst->src[MEMORY_LOGICAL_DATA0].type) ==
154                  data_size_B);
155    }
156 
157    if (inst->dst.file != BAD_FILE)
158       fsv_assert(brw_type_size_bytes(inst->dst.type) == data_size_B);
159 
160    switch (inst->opcode) {
161    case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
162       fsv_assert(op == LSC_OP_LOAD || op == LSC_OP_LOAD_CMASK);
163       fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].file == BAD_FILE);
164       fsv_assert(inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE);
165       break;
166    case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
167       fsv_assert(lsc_opcode_is_store(op));
168       fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].file != BAD_FILE);
169       fsv_assert(inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE);
170       break;
171    case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
172       fsv_assert(lsc_opcode_is_atomic(op));
173       fsv_assert((inst->src[MEMORY_LOGICAL_DATA0].file == BAD_FILE)
174                   == (lsc_op_num_data_values(op) < 1));
175       fsv_assert((inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE)
176                   == (lsc_op_num_data_values(op) < 2));
177       fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud == 1);
178       fsv_assert(!include_helpers);
179       break;
180    default:
181       unreachable("invalid opcode");
182    }
183 }
184 
185 static const char *
brw_shader_phase_to_string(enum brw_shader_phase phase)186 brw_shader_phase_to_string(enum brw_shader_phase phase)
187 {
188    switch (phase) {
189    case BRW_SHADER_PHASE_INITIAL:               return "INITIAL";
190    case BRW_SHADER_PHASE_AFTER_NIR:             return "AFTER_NIR";
191    case BRW_SHADER_PHASE_AFTER_OPT_LOOP:        return "AFTER_OPT_LOOP";
192    case BRW_SHADER_PHASE_AFTER_EARLY_LOWERING:  return "AFTER_EARLY_LOWERING";
193    case BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING: return "AFTER_MIDDLE_LOWERING";
194    case BRW_SHADER_PHASE_AFTER_LATE_LOWERING:   return "AFTER_LATE_LOWERING";
195    case BRW_SHADER_PHASE_AFTER_REGALLOC:        return "AFTER_REGALLOC";
196    case BRW_SHADER_PHASE_INVALID:               break;
197    }
198    unreachable("invalid_phase");
199    return NULL;
200 }
201 
202 static void
brw_validate_instruction_phase(const fs_visitor & s,fs_inst * inst)203 brw_validate_instruction_phase(const fs_visitor &s, fs_inst *inst)
204 {
205    enum brw_shader_phase invalid_from = BRW_SHADER_PHASE_INVALID;
206 
207    switch (inst->opcode) {
208    case FS_OPCODE_FB_WRITE_LOGICAL:
209    case FS_OPCODE_FB_READ_LOGICAL:
210    case SHADER_OPCODE_TEX_LOGICAL:
211    case SHADER_OPCODE_TXD_LOGICAL:
212    case SHADER_OPCODE_TXF_LOGICAL:
213    case SHADER_OPCODE_TXL_LOGICAL:
214    case SHADER_OPCODE_TXS_LOGICAL:
215    case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
216    case FS_OPCODE_TXB_LOGICAL:
217    case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
218    case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
219    case SHADER_OPCODE_TXF_MCS_LOGICAL:
220    case SHADER_OPCODE_LOD_LOGICAL:
221    case SHADER_OPCODE_TG4_LOGICAL:
222    case SHADER_OPCODE_TG4_BIAS_LOGICAL:
223    case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
224    case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
225    case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
226    case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
227    case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
228    case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
229    case SHADER_OPCODE_GET_BUFFER_SIZE:
230    case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
231    case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
232    case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
233    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
234    case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
235    case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
236    case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
237    case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
238    case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
239    case RT_OPCODE_TRACE_RAY_LOGICAL:
240    case SHADER_OPCODE_URB_READ_LOGICAL:
241    case SHADER_OPCODE_URB_WRITE_LOGICAL:
242    case SHADER_OPCODE_REDUCE:
243    case SHADER_OPCODE_INCLUSIVE_SCAN:
244    case SHADER_OPCODE_EXCLUSIVE_SCAN:
245    case SHADER_OPCODE_VOTE_ANY:
246    case SHADER_OPCODE_VOTE_ALL:
247    case SHADER_OPCODE_VOTE_EQUAL:
248    case SHADER_OPCODE_BALLOT:
249    case SHADER_OPCODE_QUAD_SWAP:
250    case SHADER_OPCODE_READ_FROM_LIVE_CHANNEL:
251    case SHADER_OPCODE_READ_FROM_CHANNEL:
252       invalid_from = BRW_SHADER_PHASE_AFTER_EARLY_LOWERING;
253       break;
254 
255    case SHADER_OPCODE_LOAD_PAYLOAD:
256       invalid_from = BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING;
257       break;
258 
259    default:
260       /* Nothing to do. */
261       break;
262    }
263 
264    assert(s.phase < BRW_SHADER_PHASE_INVALID);
265    if (s.phase >= invalid_from) {
266       fprintf(stderr, "INVALID INSTRUCTION IN PHASE: %s\n",
267               brw_shader_phase_to_string(s.phase));
268       brw_print_instruction(s, inst, stderr);
269       abort();
270    }
271 }
272 
273 void
brw_validate(const fs_visitor & s)274 brw_validate(const fs_visitor &s)
275 {
276    const intel_device_info *devinfo = s.devinfo;
277 
278    if (s.phase <= BRW_SHADER_PHASE_AFTER_NIR)
279       return;
280 
281    s.cfg->validate(_mesa_shader_stage_to_abbrev(s.stage));
282 
283    foreach_block(block, s.cfg) {
284       /* Track the last used address register. Usage of the address register
285        * in the IR should be limited to within a block, otherwise we would
286        * unable to schedule some instructions without spilling the address
287        * register to a VGRF.
288        *
289        * Another pattern we stick to when using the address register in the IR
290        * is that we write and read the register in pairs of instruction.
291        */
292       uint32_t last_used_address_register[16] = {};
293 
294       foreach_inst_in_block (fs_inst, inst, block) {
295          brw_validate_instruction_phase(s, inst);
296 
297          switch (inst->opcode) {
298          case SHADER_OPCODE_SEND:
299             fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1]));
300             break;
301 
302          case SHADER_OPCODE_SEND_GATHER:
303             fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1]));
304             fsv_assert(devinfo->ver >= 30);
305             break;
306 
307          case BRW_OPCODE_MOV:
308             fsv_assert(inst->sources == 1);
309             break;
310 
311          case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
312          case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
313          case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
314             validate_memory_logical(s, inst);
315             break;
316 
317          default:
318             break;
319          }
320 
321          /* On Xe2, the "write the accumulator in addition to the explicit
322           * destination" bit no longer exists. Try to catch uses of this
323           * feature earlier in the process.
324           */
325          if (devinfo->ver >= 20 && inst->writes_accumulator) {
326             fsv_assert(inst->dst.is_accumulator() ||
327                        inst->opcode == BRW_OPCODE_ADDC ||
328                        inst->opcode == BRW_OPCODE_MACH ||
329                        inst->opcode == BRW_OPCODE_SUBB);
330          }
331 
332          if (inst->is_3src(s.compiler)) {
333             const unsigned integer_sources =
334                brw_type_is_int(inst->src[0].type) +
335                brw_type_is_int(inst->src[1].type) +
336                brw_type_is_int(inst->src[2].type);
337             const unsigned float_sources =
338                brw_type_is_float(inst->src[0].type) +
339                brw_type_is_float(inst->src[1].type) +
340                brw_type_is_float(inst->src[2].type);
341 
342             fsv_assert((integer_sources == 3 && float_sources == 0) ||
343                        (integer_sources == 0 && float_sources == 3));
344 
345             if (devinfo->ver >= 10) {
346                for (unsigned i = 0; i < 3; i++) {
347                   if (inst->src[i].file == IMM)
348                      continue;
349 
350                   switch (inst->src[i].vstride) {
351                   case BRW_VERTICAL_STRIDE_0:
352                   case BRW_VERTICAL_STRIDE_4:
353                   case BRW_VERTICAL_STRIDE_8:
354                   case BRW_VERTICAL_STRIDE_16:
355                      break;
356 
357                   case BRW_VERTICAL_STRIDE_1:
358                      fsv_assert_lte(12, devinfo->ver);
359                      break;
360 
361                case BRW_VERTICAL_STRIDE_2:
362                   fsv_assert_lte(devinfo->ver, 11);
363                   break;
364 
365                   default:
366                      fsv_assert(!"invalid vstride");
367                      break;
368                   }
369                }
370             } else if (s.grf_used != 0) {
371                /* Only perform the pre-Gfx10 checks after register allocation
372                 * has occured.
373                 *
374                 * Many passes (e.g., constant copy propagation) will
375                 * genenerate invalid 3-source instructions with the
376                 * expectation that later passes (e.g., combine constants) will
377                 * fix them.
378                 */
379                for (unsigned i = 0; i < 3; i++) {
380                   fsv_assert_ne(inst->src[i].file, IMM);
381 
382                   /* A stride of 1 (the usual case) or 0, with a special
383                    * "repctrl" bit, is allowed. The repctrl bit doesn't work
384                    * for 64-bit datatypes, so if the source type is 64-bit
385                    * then only a stride of 1 is allowed. From the Broadwell
386                    * PRM, Volume 7 "3D Media GPGPU", page 944:
387                    *
388                    *    This is applicable to 32b datatypes and 16b datatype.
389                    *    64b datatypes cannot use the replicate control.
390                    */
391                   const unsigned stride_in_bytes = byte_stride(inst->src[i]);
392                   const unsigned size_in_bytes = brw_type_size_bytes(inst->src[i].type);
393                   if (stride_in_bytes == 0) {
394                      /* If the source is_scalar, then the stride will be
395                       * converted to <4;4,1> in brw_lower_scalar_fp64_MAD
396                       * after SIMD splitting.
397                       */
398                      if (!inst->src[i].is_scalar)
399                         fsv_assert_lte(size_in_bytes, 4);
400                   } else {
401                      fsv_assert_eq(stride_in_bytes, size_in_bytes);
402                   }
403                }
404             }
405          }
406 
407          if (inst->dst.file == VGRF) {
408             fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst),
409                            s.alloc.sizes[inst->dst.nr]);
410             if (inst->exec_size > 1)
411                fsv_assert_ne(inst->dst.stride, 0);
412          } else if (inst->dst.is_address()) {
413             fsv_assert(inst->dst.nr != 0);
414          }
415 
416          bool read_address_reg = false;
417          for (unsigned i = 0; i < inst->sources; i++) {
418             if (inst->src[i].file == VGRF) {
419                fsv_assert_lte(inst->src[i].offset / REG_SIZE + regs_read(devinfo, inst, i),
420                               s.alloc.sizes[inst->src[i].nr]);
421             } else if (inst->src[i].is_address()) {
422                fsv_assert(inst->src[i].nr != 0);
423                for (unsigned hw = 0; hw < inst->size_read(devinfo, i); hw += 2) {
424                   fsv_assert_eq(inst->src[i].nr,
425                                 last_used_address_register[inst->src[i].address_slot(hw)]);
426                }
427                read_address_reg = true;
428             }
429          }
430 
431          /* Accumulator Registers, bspec 47251:
432           *
433           * "When destination is accumulator with offset 0, destination
434           * horizontal stride must be 1."
435           */
436          if (intel_needs_workaround(devinfo, 14014617373) &&
437              inst->dst.is_accumulator() &&
438              phys_subnr(devinfo, inst->dst) == 0) {
439             fsv_assert_eq(inst->dst.hstride, 1);
440          }
441 
442          if (inst->is_math() && intel_needs_workaround(devinfo, 22016140776)) {
443             /* Wa_22016140776:
444              *
445              *    Scalar broadcast on HF math (packed or unpacked) must not be
446              *    used. Compiler must use a mov instruction to expand the
447              *    scalar value to a vector before using in a HF (packed or
448              *    unpacked) math operation.
449              *
450              * Since copy propagation knows about this restriction, nothing
451              * should be able to generate these invalid source strides. Detect
452              * potential problems sooner rather than later.
453              */
454             if (devinfo->ver >= 20 && inst->writes_accumulator) {
455                fsv_assert(inst->dst.is_accumulator() ||
456                           inst->opcode == BRW_OPCODE_ADDC ||
457                           inst->opcode == BRW_OPCODE_MACH ||
458                           inst->opcode == BRW_OPCODE_SUBB);
459             }
460 
461             if (inst->is_3src(s.compiler)) {
462                const unsigned integer_sources =
463                   brw_type_is_int(inst->src[0].type) +
464                   brw_type_is_int(inst->src[1].type) +
465                   brw_type_is_int(inst->src[2].type);
466                const unsigned float_sources =
467                   brw_type_is_float(inst->src[0].type) +
468                   brw_type_is_float(inst->src[1].type) +
469                   brw_type_is_float(inst->src[2].type);
470 
471                fsv_assert((integer_sources == 3 && float_sources == 0) ||
472                           (integer_sources == 0 && float_sources == 3));
473 
474                if (devinfo->ver >= 10) {
475                   for (unsigned i = 0; i < 3; i++) {
476                      if (inst->src[i].file == IMM)
477                         continue;
478 
479                      switch (inst->src[i].vstride) {
480                      case BRW_VERTICAL_STRIDE_0:
481                      case BRW_VERTICAL_STRIDE_4:
482                      case BRW_VERTICAL_STRIDE_8:
483                      case BRW_VERTICAL_STRIDE_16:
484                         break;
485 
486                      case BRW_VERTICAL_STRIDE_1:
487                         fsv_assert_lte(12, devinfo->ver);
488                         break;
489 
490                      case BRW_VERTICAL_STRIDE_2:
491                         fsv_assert_lte(devinfo->ver, 11);
492                         break;
493 
494                      default:
495                         fsv_assert(!"invalid vstride");
496                         break;
497                      }
498                   }
499                } else if (s.grf_used != 0) {
500                   /* Only perform the pre-Gfx10 checks after register
501                    * allocation has occured.
502                    *
503                    * Many passes (e.g., constant copy propagation) will
504                    * genenerate invalid 3-source instructions with the
505                    * expectation that later passes (e.g., combine constants)
506                    * will fix them.
507                    */
508                   for (unsigned i = 0; i < 3; i++) {
509                      fsv_assert_ne(inst->src[i].file, IMM);
510 
511                      /* A stride of 1 (the usual case) or 0, with a special
512                       * "repctrl" bit, is allowed. The repctrl bit doesn't
513                       * work for 64-bit datatypes, so if the source type is
514                       * 64-bit then only a stride of 1 is allowed. From the
515                       * Broadwell PRM, Volume 7 "3D Media GPGPU", page 944:
516                       *
517                       *    This is applicable to 32b datatypes and 16b
518                       *    datatype. 64b datatypes cannot use the replicate
519                       *    control.
520                       */
521                      const unsigned stride_in_bytes = byte_stride(inst->src[i]);
522                      const unsigned size_in_bytes = brw_type_size_bytes(inst->src[i].type);
523                      if (stride_in_bytes == 0) {
524                         fsv_assert_lte(size_in_bytes, 4);
525                      } else {
526                         fsv_assert_eq(stride_in_bytes, size_in_bytes);
527                      }
528                   }
529                }
530             }
531 
532             if (inst->dst.file == VGRF) {
533                fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst),
534                               s.alloc.sizes[inst->dst.nr]);
535             }
536 
537             for (unsigned i = 0; i < inst->sources; i++) {
538                fsv_assert(inst->src[i].is_scalar ||
539                           !is_uniform(inst->src[i]) ||
540                           inst->src[i].type != BRW_TYPE_HF);
541             }
542          }
543 
544          /* Update the last used address register. */
545          if (read_address_reg) {
546             /* When an instruction only reads the address register, we assume
547              * the read parts are never going to be used again.
548              */
549             for (unsigned i = 0; i < inst->sources; i++) {
550                if (!inst->src[i].is_address())
551                   continue;
552                for (unsigned hw = 0; hw < inst->size_read(devinfo, i); hw += 2)
553                   last_used_address_register[inst->src[i].address_slot(hw)] = 0;
554             }
555          }
556          if (inst->dst.is_address()) {
557             /* For the written part of the address register */
558             for (unsigned hw = 0; hw < inst->size_written; hw += 2)
559                last_used_address_register[inst->dst.address_slot(hw)] = inst->dst.nr;
560          } else if (inst->uses_address_register_implicitly()) {
561             /* If the instruction is making use of the address register,
562              * discard the entire thing.
563              */
564             memset(last_used_address_register, 0,
565                    sizeof(last_used_address_register));
566          }
567       }
568    }
569 }
570 #endif
571