• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "ir3_ra.h"
7 #include "ir3_shader.h"
8 
9 struct copy_src {
10    unsigned flags;
11    union {
12       uint32_t imm;
13       physreg_t reg;
14       unsigned const_num;
15    };
16 };
17 
18 struct copy_entry {
19    physreg_t dst;
20    unsigned flags;
21    bool done;
22 
23    struct copy_src src;
24 };
25 
26 static unsigned
copy_entry_size(const struct copy_entry * entry)27 copy_entry_size(const struct copy_entry *entry)
28 {
29    return (entry->flags & IR3_REG_HALF) ? 1 : 2;
30 }
31 
32 static struct copy_src
get_copy_src(const struct ir3_register * reg,unsigned offset)33 get_copy_src(const struct ir3_register *reg, unsigned offset)
34 {
35    if (reg->flags & IR3_REG_IMMED) {
36       return (struct copy_src){
37          .flags = IR3_REG_IMMED,
38          .imm = reg->uim_val,
39       };
40    } else if (reg->flags & IR3_REG_CONST) {
41       return (struct copy_src){
42          .flags = IR3_REG_CONST,
43          .const_num = reg->num,
44       };
45    } else {
46       return (struct copy_src){
47          .flags = 0,
48          .reg = ra_reg_get_physreg(reg) + offset,
49       };
50    }
51 }
52 
53 static void
do_xor(struct ir3_instruction * instr,unsigned dst_num,unsigned src1_num,unsigned src2_num,unsigned flags)54 do_xor(struct ir3_instruction *instr, unsigned dst_num, unsigned src1_num,
55        unsigned src2_num, unsigned flags)
56 {
57    struct ir3_instruction * xor
58       = ir3_instr_create_at(ir3_before_instr(instr), OPC_XOR_B, 1, 2);
59    ir3_dst_create(xor, dst_num, flags);
60    ir3_src_create(xor, src1_num, flags);
61    ir3_src_create(xor, src2_num, flags);
62 }
63 
64 static void
do_swap(struct ir3_compiler * compiler,struct ir3_instruction * instr,const struct copy_entry * entry)65 do_swap(struct ir3_compiler *compiler, struct ir3_instruction *instr,
66         const struct copy_entry *entry)
67 {
68    assert(!entry->src.flags);
69 
70    if (entry->flags & IR3_REG_HALF) {
71       const unsigned half_size =
72          (entry->flags & IR3_REG_SHARED) ? RA_SHARED_HALF_SIZE : RA_HALF_SIZE;
73 
74       /* We currently make sure to never emit parallel copies where the
75        * source/destination is a half-reg above the range accessable to half
76        * registers. However, when a full-reg source overlaps a half-reg
77        * destination or vice versa, it can be very, very complicated to come
78        * up with a series of "legal" swaps and copies to resolve the
79        * parallel copy. So here we provide a fallback to implement the
80        * "illegal" swap instead. This may also be useful for implementing
81        * "spilling" half-regs to the inaccessable space.
82        */
83       if (entry->src.reg >= half_size) {
84          /* Choose a temporary that doesn't overlap src or dst */
85          physreg_t tmp = entry->dst < 2 ? 2 : 0;
86 
87          /* Swap src and the temporary */
88          do_swap(compiler, instr,
89                  &(struct copy_entry){
90                     .src = {.reg = entry->src.reg & ~1u},
91                     .dst = tmp,
92                     .flags = entry->flags & ~IR3_REG_HALF,
93                  });
94 
95          /* If src and dst are within the same full register, then swapping src
96           * with tmp above will also move dst to tmp. Account for that here.
97           */
98          unsigned dst =
99             (entry->src.reg & ~1u) == (entry->dst & ~1u) ?
100             tmp + (entry->dst & 1u) : entry->dst;
101 
102          /* Do the original swap with src replaced with tmp */
103          do_swap(compiler, instr,
104                  &(struct copy_entry){
105                     .src = {.reg = tmp + (entry->src.reg & 1)},
106                     .dst = dst,
107                     .flags = entry->flags,
108                  });
109 
110          /* Swap src and the temporary back */
111          do_swap(compiler, instr,
112                  &(struct copy_entry){
113                     .src = {.reg = entry->src.reg & ~1u},
114                     .dst = tmp,
115                     .flags = entry->flags & ~IR3_REG_HALF,
116                  });
117          return;
118       }
119 
120       /* If dst is not addressable, we only need to swap the arguments and
121        * let the case above handle it.
122        */
123       if (entry->dst >= half_size) {
124          do_swap(compiler, instr,
125                  &(struct copy_entry){
126                     .src = {.reg = entry->dst},
127                     .dst = entry->src.reg,
128                     .flags = entry->flags,
129                  });
130          return;
131       }
132    }
133 
134    unsigned src_num = ra_physreg_to_num(entry->src.reg, entry->flags);
135    unsigned dst_num = ra_physreg_to_num(entry->dst, entry->flags);
136 
137    /* a5xx+ is known to support swz, which enables us to swap two registers
138     * in-place. If unsupported we emulate it using the xor trick.
139     */
140    if (compiler->gen < 5 || (entry->flags & IR3_REG_SHARED)) {
141       do_xor(instr, dst_num, dst_num, src_num, entry->flags);
142       do_xor(instr, src_num, src_num, dst_num, entry->flags);
143       do_xor(instr, dst_num, dst_num, src_num, entry->flags);
144    } else {
145       struct ir3_instruction *swz =
146          ir3_instr_create_at(ir3_before_instr(instr), OPC_SWZ, 2, 2);
147       ir3_dst_create(swz, dst_num, entry->flags);
148       ir3_dst_create(swz, src_num, entry->flags);
149       ir3_src_create(swz, src_num, entry->flags);
150       ir3_src_create(swz, dst_num, entry->flags);
151       swz->cat1.dst_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
152       swz->cat1.src_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
153       swz->repeat = 1;
154    }
155 }
156 
157 static void
do_copy(struct ir3_compiler * compiler,struct ir3_instruction * instr,const struct copy_entry * entry)158 do_copy(struct ir3_compiler *compiler, struct ir3_instruction *instr,
159         const struct copy_entry *entry)
160 {
161    if (entry->flags & IR3_REG_HALF) {
162       /* See do_swap() for why this is here. */
163       const unsigned half_size =
164          (entry->flags & IR3_REG_SHARED) ? RA_SHARED_HALF_SIZE : RA_HALF_SIZE;
165       if (entry->dst >= half_size) {
166          /* TODO: is there a hw instruction we can use for this case? */
167          physreg_t tmp = !entry->src.flags && entry->src.reg < 2 ? 2 : 0;
168 
169          do_swap(compiler, instr,
170                  &(struct copy_entry){
171                     .src = {.reg = entry->dst & ~1u},
172                     .dst = tmp,
173                     .flags = entry->flags & ~IR3_REG_HALF,
174                  });
175 
176          /* Similar to in do_swap(), account for src being swapped with tmp if
177           * src and dst are in the same register.
178           */
179          struct copy_src src = entry->src;
180          if (!src.flags && (src.reg & ~1u) == (entry->dst & ~1u))
181             src.reg = tmp + (src.reg & 1u);
182 
183          do_copy(compiler, instr,
184                  &(struct copy_entry){
185                     .src = src,
186                     .dst = tmp + (entry->dst & 1),
187                     .flags = entry->flags,
188                  });
189 
190          do_swap(compiler, instr,
191                  &(struct copy_entry){
192                     .src = {.reg = entry->dst & ~1u},
193                     .dst = tmp,
194                     .flags = entry->flags & ~IR3_REG_HALF,
195                  });
196          return;
197       }
198 
199       if (!entry->src.flags && entry->src.reg >= half_size) {
200          unsigned src_num = ra_physreg_to_num(entry->src.reg & ~1u,
201                                               entry->flags & ~IR3_REG_HALF);
202          unsigned dst_num = ra_physreg_to_num(entry->dst, entry->flags);
203 
204          if (entry->src.reg % 2 == 0) {
205             /* cov.u32u16 dst, src */
206             struct ir3_instruction *cov =
207                ir3_instr_create_at(ir3_before_instr(instr), OPC_MOV, 1, 1);
208             ir3_dst_create(cov, dst_num, entry->flags);
209             ir3_src_create(cov, src_num, entry->flags & ~IR3_REG_HALF);
210             cov->cat1.dst_type = TYPE_U16;
211             cov->cat1.src_type = TYPE_U32;
212          } else {
213             /* shr.b dst, src, (16) */
214             struct ir3_instruction *shr =
215                ir3_instr_create_at(ir3_before_instr(instr), OPC_SHR_B, 1, 2);
216             ir3_dst_create(shr, dst_num, entry->flags);
217             ir3_src_create(shr, src_num, entry->flags & ~IR3_REG_HALF);
218             ir3_src_create(shr, 0, IR3_REG_IMMED)->uim_val = 16;
219          }
220          return;
221       }
222    }
223 
224    unsigned src_num = ra_physreg_to_num(entry->src.reg, entry->flags);
225    unsigned dst_num = ra_physreg_to_num(entry->dst, entry->flags);
226 
227    struct ir3_instruction *mov =
228       ir3_instr_create_at(ir3_before_instr(instr), OPC_MOV, 1, 1);
229    ir3_dst_create(mov, dst_num, entry->flags);
230    if (entry->src.flags & (IR3_REG_IMMED | IR3_REG_CONST))
231       ir3_src_create(mov, INVALID_REG, (entry->flags & IR3_REG_HALF) | entry->src.flags);
232    else
233       ir3_src_create(mov, src_num, entry->flags);
234    mov->cat1.dst_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
235    mov->cat1.src_type = (entry->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
236    if (entry->src.flags & IR3_REG_IMMED)
237       mov->srcs[0]->uim_val = entry->src.imm;
238    else if (entry->src.flags & IR3_REG_CONST)
239       mov->srcs[0]->num = entry->src.const_num;
240 }
241 
242 struct copy_ctx {
243    /* For each physreg, the number of pending copy entries that use it as a
244     * source. Once this drops to zero, then the physreg is unblocked and can
245     * be moved to.
246     */
247    unsigned physreg_use_count[RA_MAX_FILE_SIZE];
248 
249    /* For each physreg, the pending copy_entry that uses it as a dest. */
250    struct copy_entry *physreg_dst[RA_MAX_FILE_SIZE];
251 
252    struct copy_entry entries[RA_MAX_FILE_SIZE];
253    unsigned entry_count;
254 };
255 
256 static bool
entry_blocked(struct copy_entry * entry,struct copy_ctx * ctx)257 entry_blocked(struct copy_entry *entry, struct copy_ctx *ctx)
258 {
259    for (unsigned i = 0; i < copy_entry_size(entry); i++) {
260       if (ctx->physreg_use_count[entry->dst + i] != 0)
261          return true;
262    }
263 
264    return false;
265 }
266 
267 static void
split_32bit_copy(struct copy_ctx * ctx,struct copy_entry * entry)268 split_32bit_copy(struct copy_ctx *ctx, struct copy_entry *entry)
269 {
270    assert(!entry->done);
271    assert(!(entry->src.flags & (IR3_REG_IMMED | IR3_REG_CONST)));
272    assert(copy_entry_size(entry) == 2);
273    struct copy_entry *new_entry = &ctx->entries[ctx->entry_count++];
274 
275    new_entry->dst = entry->dst + 1;
276    new_entry->src.flags = entry->src.flags;
277    new_entry->src.reg = entry->src.reg + 1;
278    new_entry->done = false;
279    entry->flags |= IR3_REG_HALF;
280    new_entry->flags = entry->flags;
281    ctx->physreg_dst[entry->dst + 1] = new_entry;
282 }
283 
284 static void
_handle_copies(struct ir3_compiler * compiler,struct ir3_instruction * instr,struct copy_ctx * ctx)285 _handle_copies(struct ir3_compiler *compiler, struct ir3_instruction *instr,
286                struct copy_ctx *ctx)
287 {
288    /* Set up the bookkeeping */
289    memset(ctx->physreg_dst, 0, sizeof(ctx->physreg_dst));
290    memset(ctx->physreg_use_count, 0, sizeof(ctx->physreg_use_count));
291 
292    for (unsigned i = 0; i < ctx->entry_count; i++) {
293       struct copy_entry *entry = &ctx->entries[i];
294       for (unsigned j = 0; j < copy_entry_size(entry); j++) {
295          if (!entry->src.flags)
296             ctx->physreg_use_count[entry->src.reg + j]++;
297 
298          /* Copies should not have overlapping destinations. */
299          assert(!ctx->physreg_dst[entry->dst + j]);
300          ctx->physreg_dst[entry->dst + j] = entry;
301       }
302    }
303 
304    bool progress = true;
305    while (progress) {
306       progress = false;
307 
308       /* Step 1: resolve paths in the transfer graph. This means finding
309        * copies whose destination aren't blocked by something else and then
310        * emitting them, continuing this process until every copy is blocked
311        * and there are only cycles left.
312        *
313        * TODO: We should note that src is also available in dst to unblock
314        * cycles that src is involved in.
315        */
316 
317       for (unsigned i = 0; i < ctx->entry_count; i++) {
318          struct copy_entry *entry = &ctx->entries[i];
319          if (!entry->done && !entry_blocked(entry, ctx)) {
320             entry->done = true;
321             progress = true;
322             do_copy(compiler, instr, entry);
323             for (unsigned j = 0; j < copy_entry_size(entry); j++) {
324                if (!entry->src.flags)
325                   ctx->physreg_use_count[entry->src.reg + j]--;
326                ctx->physreg_dst[entry->dst + j] = NULL;
327             }
328          }
329       }
330 
331       if (progress)
332          continue;
333 
334       /* Step 2: Find partially blocked copies and split them. In the
335        * mergedregs case, we can 32-bit copies which are only blocked on one
336        * 16-bit half, and splitting them helps get things moving.
337        *
338        * We can skip splitting copies if the source isn't a register,
339        * however, because it does not unblock anything and therefore doesn't
340        * contribute to making forward progress with step 1. These copies
341        * should still be resolved eventually in step 1 because they can't be
342        * part of a cycle.
343        */
344       for (unsigned i = 0; i < ctx->entry_count; i++) {
345          struct copy_entry *entry = &ctx->entries[i];
346          if (entry->done || entry->flags & IR3_REG_HALF)
347             continue;
348 
349          if (((ctx->physreg_use_count[entry->dst] == 0 ||
350                ctx->physreg_use_count[entry->dst + 1] == 0)) &&
351              !(entry->src.flags & (IR3_REG_IMMED | IR3_REG_CONST))) {
352             split_32bit_copy(ctx, entry);
353             progress = true;
354          }
355       }
356    }
357 
358    /* Step 3: resolve cycles through swapping.
359     *
360     * At this point, the transfer graph should consist of only cycles.
361     * The reason is that, given any physreg n_1 that's the source of a
362     * remaining entry, it has a destination n_2, which (because every
363     * copy is blocked) is the source of some other copy whose destination
364     * is n_3, and so we can follow the chain until we get a cycle. If we
365     * reached some other node than n_1:
366     *
367     *  n_1 -> n_2 -> ... -> n_i
368     *          ^             |
369     *          |-------------|
370     *
371     *  then n_2 would be the destination of 2 copies, which is illegal
372     *  (checked above in an assert). So n_1 must be part of a cycle:
373     *
374     *  n_1 -> n_2 -> ... -> n_i
375     *  ^                     |
376     *  |---------------------|
377     *
378     *  and this must be only cycle n_1 is involved in, because any other
379     *  path starting from n_1 would also have to end in n_1, resulting in
380     *  a node somewhere along the way being the destination of 2 copies
381     *  when the 2 paths merge.
382     *
383     *  The way we resolve the cycle is through picking a copy (n_1, n_2)
384     *  and swapping n_1 and n_2. This moves n_1 to n_2, so n_2 is taken
385     *  out of the cycle:
386     *
387     *  n_1 -> ... -> n_i
388     *  ^              |
389     *  |--------------|
390     *
391     *  and we can keep repeating this until the cycle is empty.
392     */
393 
394    for (unsigned i = 0; i < ctx->entry_count; i++) {
395       struct copy_entry *entry = &ctx->entries[i];
396       if (entry->done)
397          continue;
398 
399       assert(!entry->src.flags);
400 
401       /* catch trivial copies */
402       if (entry->dst == entry->src.reg) {
403          entry->done = true;
404          continue;
405       }
406 
407       do_swap(compiler, instr, entry);
408 
409       /* Split any blocking copies whose sources are only partially
410        * contained within our destination.
411        */
412       if (entry->flags & IR3_REG_HALF) {
413          for (unsigned j = 0; j < ctx->entry_count; j++) {
414             struct copy_entry *blocking = &ctx->entries[j];
415 
416             if (blocking->done)
417                continue;
418 
419             if (blocking->src.reg <= entry->dst &&
420                 blocking->src.reg + 1 >= entry->dst &&
421                 !(blocking->flags & IR3_REG_HALF)) {
422                split_32bit_copy(ctx, blocking);
423             }
424          }
425       }
426 
427       /* Update sources of blocking copies.
428        *
429        * Note: at this point, every blocking copy's source should be
430        * contained within our destination.
431        */
432       for (unsigned j = 0; j < ctx->entry_count; j++) {
433          struct copy_entry *blocking = &ctx->entries[j];
434          if (blocking->src.reg >= entry->dst &&
435              blocking->src.reg < entry->dst + copy_entry_size(entry)) {
436             blocking->src.reg =
437                entry->src.reg + (blocking->src.reg - entry->dst);
438          }
439       }
440 
441       entry->done = true;
442    }
443 }
444 
445 static void
handle_copies(struct ir3_shader_variant * v,struct ir3_instruction * instr,struct copy_entry * entries,unsigned entry_count)446 handle_copies(struct ir3_shader_variant *v, struct ir3_instruction *instr,
447               struct copy_entry *entries, unsigned entry_count)
448 {
449    struct copy_ctx ctx;
450 
451    /* handle shared copies first */
452    ctx.entry_count = 0;
453    for (unsigned i = 0; i < entry_count; i++) {
454       if (entries[i].flags & IR3_REG_SHARED)
455          ctx.entries[ctx.entry_count++] = entries[i];
456    }
457    _handle_copies(v->compiler, instr, &ctx);
458 
459    if (v->mergedregs) {
460       /* Half regs and full regs are in the same file, so handle everything
461        * at once.
462        */
463       ctx.entry_count = 0;
464       for (unsigned i = 0; i < entry_count; i++) {
465          if (!(entries[i].flags & IR3_REG_SHARED))
466             ctx.entries[ctx.entry_count++] = entries[i];
467       }
468       _handle_copies(v->compiler, instr, &ctx);
469    } else {
470       /* There may be both half copies and full copies, so we have to split
471        * them up since they don't interfere.
472        */
473       ctx.entry_count = 0;
474       for (unsigned i = 0; i < entry_count; i++) {
475          if (entries[i].flags & IR3_REG_HALF)
476             ctx.entries[ctx.entry_count++] = entries[i];
477       }
478       _handle_copies(v->compiler, instr, &ctx);
479 
480       ctx.entry_count = 0;
481       for (unsigned i = 0; i < entry_count; i++) {
482          if (!(entries[i].flags & (IR3_REG_HALF | IR3_REG_SHARED)))
483             ctx.entries[ctx.entry_count++] = entries[i];
484       }
485       _handle_copies(v->compiler, instr, &ctx);
486    }
487 }
488 
489 void
ir3_lower_copies(struct ir3_shader_variant * v)490 ir3_lower_copies(struct ir3_shader_variant *v)
491 {
492    DECLARE_ARRAY(struct copy_entry, copies);
493    copies_count = copies_sz = 0;
494    copies = NULL;
495 
496    foreach_block (block, &v->ir->block_list) {
497       foreach_instr_safe (instr, &block->instr_list) {
498          if (instr->opc == OPC_META_PARALLEL_COPY) {
499             copies_count = 0;
500             for (unsigned i = 0; i < instr->dsts_count; i++) {
501                struct ir3_register *dst = instr->dsts[i];
502                struct ir3_register *src = instr->srcs[i];
503                unsigned flags = dst->flags & (IR3_REG_HALF | IR3_REG_SHARED);
504                unsigned dst_physreg = ra_reg_get_physreg(dst);
505                for (unsigned j = 0; j < reg_elems(dst); j++) {
506                   array_insert(
507                      NULL, copies,
508                      (struct copy_entry){
509                         .dst = dst_physreg + j * reg_elem_size(dst),
510                         .src = get_copy_src(src, j * reg_elem_size(dst)),
511                         .flags = flags,
512                      });
513                }
514             }
515             handle_copies(v, instr, copies, copies_count);
516             list_del(&instr->node);
517          } else if (instr->opc == OPC_META_COLLECT) {
518             copies_count = 0;
519             struct ir3_register *dst = instr->dsts[0];
520             unsigned flags = dst->flags & (IR3_REG_HALF | IR3_REG_SHARED);
521             for (unsigned i = 0; i < instr->srcs_count; i++) {
522                struct ir3_register *src = instr->srcs[i];
523                array_insert(NULL, copies,
524                             (struct copy_entry){
525                                .dst = ra_num_to_physreg(dst->num + i, flags),
526                                .src = get_copy_src(src, 0),
527                                .flags = flags,
528                             });
529             }
530             handle_copies(v, instr, copies, copies_count);
531             list_del(&instr->node);
532          } else if (instr->opc == OPC_META_SPLIT) {
533             copies_count = 0;
534             struct ir3_register *dst = instr->dsts[0];
535             struct ir3_register *src = instr->srcs[0];
536             unsigned flags = src->flags & (IR3_REG_HALF | IR3_REG_SHARED);
537             array_insert(NULL, copies,
538                          (struct copy_entry){
539                             .dst = ra_reg_get_physreg(dst),
540                             .src = get_copy_src(
541                                src, instr->split.off * reg_elem_size(dst)),
542                             .flags = flags,
543                          });
544             handle_copies(v, instr, copies, copies_count);
545             list_del(&instr->node);
546          } else if (instr->opc == OPC_META_PHI) {
547             list_del(&instr->node);
548          } else if (instr->opc == OPC_MOV) {
549             /* There seems to be a HW bug where moves where the source is 16-bit
550              * non-shared and the destination is 16-bit shared don't work when
551              * only fibers 64-127 are active. We work around it by instead
552              * generating a narrowing mov, which only works with even-numbered
553              * registers (i.e. .x and .z), but for odd numbers we can swap the
554              * components of the normal src and its even neighbor and then
555              * unswap afterwords to make it work for everything.
556              */
557             if ((instr->dsts[0]->flags & IR3_REG_SHARED) &&
558                 (instr->dsts[0]->flags & IR3_REG_HALF) &&
559                 !(instr->srcs[0]->flags & (IR3_REG_SHARED | IR3_REG_IMMED |
560                                            IR3_REG_CONST)) &&
561                 (instr->srcs[0]->flags & IR3_REG_HALF)) {
562                unsigned src_num = instr->srcs[0]->num;
563                unsigned dst_num = instr->dsts[0]->num;
564 
565                for (unsigned i = 0; i <= instr->repeat; i++,
566                     src_num++, dst_num++) {
567                   if (src_num & 1) {
568                      for (unsigned i = 0; i < 2; i++) {
569                         struct ir3_cursor cursor = i == 0
570                                                       ? ir3_before_instr(instr)
571                                                       : ir3_after_instr(instr);
572                         struct ir3_instruction *swz =
573                            ir3_instr_create_at(cursor, OPC_SWZ, 2, 2);
574                         ir3_dst_create(swz, src_num - 1, IR3_REG_HALF);
575                         ir3_dst_create(swz, src_num, IR3_REG_HALF);
576                         ir3_src_create(swz, src_num, IR3_REG_HALF);
577                         ir3_src_create(swz, src_num - 1, IR3_REG_HALF);
578                         swz->cat1.dst_type = TYPE_U16;
579                         swz->cat1.src_type = TYPE_U16;
580                         swz->repeat = 1;
581                      }
582                   }
583 
584                   struct ir3_instruction *mov = ir3_instr_create_at(
585                      ir3_before_instr(instr), OPC_MOV, 1, 1);
586 
587                   ir3_dst_create(mov, dst_num, instr->dsts[0]->flags);
588                   ir3_src_create(mov, src_num / 2,
589                                  instr->srcs[0]->flags & ~IR3_REG_HALF);
590 
591                   /* Float conversions are banned in this case in
592                    * ir3_valid_flags(), so we only have to worry about normal
593                    * non-converting moves.
594                    */
595                   assert(instr->cat1.src_type == TYPE_U16 ||
596                          instr->cat1.src_type == TYPE_S16);
597                   mov->cat1.src_type = TYPE_U32;
598                   mov->cat1.dst_type = TYPE_U16;
599                }
600 
601                list_del(&instr->node);
602             }
603          }
604       }
605    }
606 
607    if (copies)
608       ralloc_free(copies);
609 }
610