• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Jonathan Marek <jonathan@marek.ca>
25  */
26 
27 #include "ir2_private.h"
28 
29 /* if an instruction has side effects, we should never kill it */
has_side_effects(struct ir2_instr * instr)30 static bool has_side_effects(struct ir2_instr *instr)
31 {
32 	if (instr->type == IR2_CF)
33 		return true;
34 	else if (instr->type == IR2_FETCH)
35 		return false;
36 
37 	switch (instr->alu.scalar_opc) {
38 	case PRED_SETEs ... KILLONEs:
39 		return true;
40 	default:
41 		break;
42 	}
43 
44 	switch (instr->alu.vector_opc) {
45 	case PRED_SETE_PUSHv ... KILLNEv:
46 		return true;
47 	default:
48 		break;
49 	}
50 
51 	return instr->alu.export >= 0;
52 }
53 
54 /* mark an instruction as required, and all its sources recursively */
set_need_emit(struct ir2_context * ctx,struct ir2_instr * instr)55 static void set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)
56 {
57 	struct ir2_reg *reg;
58 
59 	/* don't repeat work already done */
60 	if (instr->need_emit)
61 		return;
62 
63 	instr->need_emit = true;
64 
65 	ir2_foreach_src(src, instr) {
66 		switch (src->type) {
67 		case IR2_SRC_SSA:
68 			set_need_emit(ctx, &ctx->instr[src->num]);
69 			break;
70 		case IR2_SRC_REG:
71 			/* slow ..  */
72 			reg = get_reg_src(ctx, src);
73 			ir2_foreach_instr(instr, ctx) {
74 				if (!instr->is_ssa && instr->reg == reg)
75 					set_need_emit(ctx, instr);
76 			}
77 		default:
78 			break;
79 		}
80 	}
81 }
82 
83 /* get current bit mask of allocated components for a register */
reg_mask(struct ir2_context * ctx,unsigned idx)84 static unsigned reg_mask(struct ir2_context *ctx, unsigned idx)
85 {
86 	return ctx->reg_state[idx/8] >> idx%8*4 & 0xf;
87 }
88 
reg_setmask(struct ir2_context * ctx,unsigned idx,unsigned c)89 static void reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)
90 {
91 	idx = idx * 4 + c;
92 	ctx->reg_state[idx/32] |= 1 << idx%32;
93 }
94 
reg_freemask(struct ir2_context * ctx,unsigned idx,unsigned c)95 static void reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)
96 {
97 	idx = idx * 4 + c;
98 	ctx->reg_state[idx/32] &= ~(1 << idx%32);
99 }
100 
ra_count_refs(struct ir2_context * ctx)101 void ra_count_refs(struct ir2_context *ctx)
102 {
103 	struct ir2_reg *reg;
104 
105 	/* mark instructions as needed
106 	 * need to do this because "substitutions" pass makes many movs not needed
107 	 */
108 	ir2_foreach_instr(instr, ctx) {
109 		if (has_side_effects(instr))
110 			set_need_emit(ctx, instr);
111 	}
112 
113 	/* compute ref_counts */
114 	ir2_foreach_instr(instr, ctx) {
115 		/* kill non-needed so they can be skipped */
116 		if (!instr->need_emit) {
117 			instr->type = IR2_NONE;
118 			continue;
119 		}
120 
121 		ir2_foreach_src(src, instr) {
122 			if (src->type == IR2_SRC_CONST)
123 				continue;
124 
125 			reg = get_reg_src(ctx, src);
126 			for (int i = 0; i < src_ncomp(instr); i++)
127 				reg->comp[swiz_get(src->swizzle, i)].ref_count++;
128 		}
129 	}
130 }
131 
ra_reg(struct ir2_context * ctx,struct ir2_reg * reg,int force_idx,bool export,uint8_t export_writemask)132 void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
133 	bool export, uint8_t export_writemask)
134 {
135 	/* for export, don't allocate anything but set component layout */
136 	if (export) {
137 		for (int i = 0; i < 4; i++)
138 			reg->comp[i].c = i;
139 		return;
140 	}
141 
142 	unsigned idx = force_idx;
143 
144 	/* TODO: allocate into the same register if theres room
145 	 * note: the blob doesn't do it, so verify that it is indeed better
146 	 * also, doing it would conflict with scalar mov insertion
147 	 */
148 
149 	/* check if already allocated */
150 	for (int i = 0; i < reg->ncomp; i++) {
151 		if (reg->comp[i].alloc)
152 			return;
153 	}
154 
155 	if (force_idx < 0) {
156 		for (idx = 0; idx < 64; idx++) {
157 			if (reg_mask(ctx, idx) == 0)
158 				break;
159 		}
160 	}
161 	assert(idx != 64); /* TODO ran out of register space.. */
162 
163 	/* update max_reg value */
164 	ctx->info->max_reg = MAX2(ctx->info->max_reg, (int) idx);
165 
166 	unsigned mask = reg_mask(ctx, idx);
167 
168 	for (int i = 0; i < reg->ncomp; i++) {
169 		/* don't allocate never used values */
170 		if (reg->comp[i].ref_count == 0) {
171 			reg->comp[i].c = 7;
172 			continue;
173 		}
174 
175 		/* TODO */
176 		unsigned c = 1 ? i : (ffs(~mask) - 1);
177 		mask |= 1 << c;
178 		reg->comp[i].c = c;
179 		reg_setmask(ctx, idx, c);
180 		reg->comp[i].alloc = true;
181 	}
182 
183 	reg->idx = idx;
184 	ctx->live_regs[reg->idx] = reg;
185 }
186 
187 /* reduce srcs ref_count and free if needed */
ra_src_free(struct ir2_context * ctx,struct ir2_instr * instr)188 void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr)
189 {
190 	struct ir2_reg *reg;
191 	struct ir2_reg_component *comp;
192 
193 	ir2_foreach_src(src, instr) {
194 		if (src->type == IR2_SRC_CONST)
195 			continue;
196 
197 		reg = get_reg_src(ctx, src);
198 		/* XXX use before write case */
199 
200 		for (int i = 0; i < src_ncomp(instr); i++) {
201 			comp = &reg->comp[swiz_get(src->swizzle, i)];
202 			if (!--comp->ref_count && reg->block_idx_free < 0) {
203 				reg_freemask(ctx, reg->idx, comp->c);
204 				comp->alloc = false;
205 			}
206 		}
207 	}
208 }
209 
210 /* free any regs left for a block */
ra_block_free(struct ir2_context * ctx,unsigned block)211 void ra_block_free(struct ir2_context *ctx, unsigned block)
212 {
213 	ir2_foreach_live_reg(reg, ctx) {
214 		if (reg->block_idx_free != block)
215 			continue;
216 
217 		for (int i = 0; i < reg->ncomp; i++) {
218 			if (!reg->comp[i].alloc) /* XXX should never be true? */
219 				continue;
220 
221 			reg_freemask(ctx, reg->idx, reg->comp[i].c);
222 			reg->comp[i].alloc = false;
223 		}
224 		ctx->live_regs[reg->idx] = NULL;
225 	}
226 }
227