1 /*
2 * Copyright (C) 2019 Google.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "util/ralloc.h"
25
26 #include "ir3.h"
27
28 static bool
is_fp16_conv(struct ir3_instruction * instr)29 is_fp16_conv(struct ir3_instruction *instr)
30 {
31 if (instr->opc != OPC_MOV)
32 return false;
33
34 struct ir3_register *dst = instr->regs[0];
35 struct ir3_register *src = instr->regs[1];
36
37 /* disallow conversions that cannot be folded into
38 * alu instructions:
39 */
40 if (dst->flags & (IR3_REG_EVEN | IR3_REG_POS_INF))
41 return false;
42
43 if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
44 return false;
45 if (src->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
46 return false;
47
48 if (instr->cat1.src_type == TYPE_F32 &&
49 instr->cat1.dst_type == TYPE_F16)
50 return true;
51
52 if (instr->cat1.src_type == TYPE_F16 &&
53 instr->cat1.dst_type == TYPE_F32)
54 return true;
55
56 return false;
57 }
58
59 static bool
all_uses_fp16_conv(struct ir3_instruction * conv_src)60 all_uses_fp16_conv(struct ir3_instruction *conv_src)
61 {
62 foreach_ssa_use (use, conv_src)
63 if (!is_fp16_conv(use))
64 return false;
65 return true;
66 }
67
68 /* For an instruction which has a conversion folded in, re-write the
69 * uses of *all* conv's that used that src to be a simple mov that
70 * cp can eliminate. This avoids invalidating the SSA uses, it just
71 * shifts the use to a simple mov.
72 */
73 static void
rewrite_src_uses(struct ir3_instruction * src)74 rewrite_src_uses(struct ir3_instruction *src)
75 {
76 foreach_ssa_use (use, src) {
77 assert(is_fp16_conv(use));
78
79 if (is_half(src)) {
80 use->regs[1]->flags |= IR3_REG_HALF;
81 } else {
82 use->regs[1]->flags &= ~IR3_REG_HALF;
83 }
84
85 use->cat1.src_type = use->cat1.dst_type;
86 }
87 }
88
89 static bool
try_conversion_folding(struct ir3_instruction * conv)90 try_conversion_folding(struct ir3_instruction *conv)
91 {
92 struct ir3_instruction *src;
93
94 if (!is_fp16_conv(conv))
95 return false;
96
97 /* NOTE: we can have non-ssa srcs after copy propagation: */
98 src = ssa(conv->regs[1]);
99 if (!src)
100 return false;
101
102 if (!is_alu(src))
103 return false;
104
105 /* avoid folding f2f32(f2f16) together, in cases where this is legal to
106 * do (glsl) nir should have handled that for us already:
107 */
108 if (is_fp16_conv(src))
109 return false;
110
111 switch (src->opc) {
112 case OPC_SEL_B32:
113 case OPC_SEL_B16:
114 case OPC_MAX_F:
115 case OPC_MIN_F:
116 case OPC_SIGN_F:
117 case OPC_ABSNEG_F:
118 return false;
119 case OPC_MOV:
120 /* if src is a "cov" and type doesn't match, then it can't be folded
121 * for example cov.u32u16+cov.f16f32 can't be folded to cov.u32f32
122 */
123 if (src->cat1.dst_type != src->cat1.src_type &&
124 conv->cat1.src_type != src->cat1.dst_type)
125 return false;
126 break;
127 default:
128 break;
129 }
130
131 if (!all_uses_fp16_conv(src))
132 return false;
133
134 if (src->opc == OPC_MOV) {
135 if (src->cat1.dst_type == src->cat1.src_type) {
136 /* If we're folding a conversion into a bitwise move, we need to
137 * change the dst type to F32 to get the right behavior, since we
138 * could be moving a float with a u32.u32 move.
139 */
140 src->cat1.dst_type = conv->cat1.dst_type;
141 src->cat1.src_type = conv->cat1.src_type;
142 } else {
143 /* Otherwise, for typechanging movs, we can just change the dst
144 * type to F16 to collaps the two conversions. For example
145 * cov.s32f32 follwed by cov.f32f16 becomes cov.s32f16.
146 */
147 src->cat1.dst_type = conv->cat1.dst_type;
148 }
149 }
150
151 ir3_set_dst_type(src, is_half(conv));
152 rewrite_src_uses(src);
153
154 return true;
155 }
156
157 bool
ir3_cf(struct ir3 * ir)158 ir3_cf(struct ir3 *ir)
159 {
160 void *mem_ctx = ralloc_context(NULL);
161 bool progress = false;
162
163 ir3_find_ssa_uses(ir, mem_ctx, false);
164
165 foreach_block (block, &ir->block_list) {
166 foreach_instr (instr, &block->instr_list) {
167 progress |= try_conversion_folding(instr);
168 }
169 }
170
171 ralloc_free(mem_ctx);
172
173 return progress;
174 }
175