• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Jonathan Marek <jonathan@marek.ca>
25  */
26 
27 #include "ir2_private.h"
28 
29 /* if an instruction has side effects, we should never kill it */
30 static bool
has_side_effects(struct ir2_instr * instr)31 has_side_effects(struct ir2_instr *instr)
32 {
33    if (instr->type == IR2_CF)
34       return true;
35    else if (instr->type == IR2_FETCH)
36       return false;
37 
38    switch (instr->alu.scalar_opc) {
39    case PRED_SETEs ... KILLONEs:
40       return true;
41    default:
42       break;
43    }
44 
45    switch (instr->alu.vector_opc) {
46    case PRED_SETE_PUSHv ... KILLNEv:
47       return true;
48    default:
49       break;
50    }
51 
52    return instr->alu.export >= 0;
53 }
54 
55 /* mark an instruction as required, and all its sources recursively */
56 static void
set_need_emit(struct ir2_context * ctx,struct ir2_instr * instr)57 set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)
58 {
59    struct ir2_reg *reg;
60 
61    /* don't repeat work already done */
62    if (instr->need_emit)
63       return;
64 
65    instr->need_emit = true;
66 
67    ir2_foreach_src (src, instr) {
68       switch (src->type) {
69       case IR2_SRC_SSA:
70          set_need_emit(ctx, &ctx->instr[src->num]);
71          break;
72       case IR2_SRC_REG:
73          /* slow ..  */
74          reg = get_reg_src(ctx, src);
75          ir2_foreach_instr (instr, ctx) {
76             if (!instr->is_ssa && instr->reg == reg)
77                set_need_emit(ctx, instr);
78          }
79          break;
80       default:
81          break;
82       }
83    }
84 }
85 
86 /* get current bit mask of allocated components for a register */
87 static unsigned
reg_mask(struct ir2_context * ctx,unsigned idx)88 reg_mask(struct ir2_context *ctx, unsigned idx)
89 {
90    return ctx->reg_state[idx / 8] >> idx % 8 * 4 & 0xf;
91 }
92 
93 static void
reg_setmask(struct ir2_context * ctx,unsigned idx,unsigned c)94 reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)
95 {
96    idx = idx * 4 + c;
97    ctx->reg_state[idx / 32] |= 1 << idx % 32;
98 }
99 
100 static void
reg_freemask(struct ir2_context * ctx,unsigned idx,unsigned c)101 reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)
102 {
103    idx = idx * 4 + c;
104    ctx->reg_state[idx / 32] &= ~(1 << idx % 32);
105 }
106 
107 void
ra_count_refs(struct ir2_context * ctx)108 ra_count_refs(struct ir2_context *ctx)
109 {
110    struct ir2_reg *reg;
111 
112    /* mark instructions as needed
113     * need to do this because "substitutions" pass makes many movs not needed
114     */
115    ir2_foreach_instr (instr, ctx) {
116       if (has_side_effects(instr))
117          set_need_emit(ctx, instr);
118    }
119 
120    /* compute ref_counts */
121    ir2_foreach_instr (instr, ctx) {
122       /* kill non-needed so they can be skipped */
123       if (!instr->need_emit) {
124          instr->type = IR2_NONE;
125          continue;
126       }
127 
128       ir2_foreach_src (src, instr) {
129          if (src->type == IR2_SRC_CONST)
130             continue;
131 
132          reg = get_reg_src(ctx, src);
133          for (int i = 0; i < src_ncomp(instr); i++)
134             reg->comp[swiz_get(src->swizzle, i)].ref_count++;
135       }
136    }
137 }
138 
139 void
ra_reg(struct ir2_context * ctx,struct ir2_reg * reg,int force_idx,bool export,uint8_t export_writemask)140 ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, bool export,
141        uint8_t export_writemask)
142 {
143    /* for export, don't allocate anything but set component layout */
144    if (export) {
145       for (int i = 0; i < 4; i++)
146          reg->comp[i].c = i;
147       return;
148    }
149 
150    unsigned idx = force_idx;
151 
152    /* TODO: allocate into the same register if theres room
153     * note: the blob doesn't do it, so verify that it is indeed better
154     * also, doing it would conflict with scalar mov insertion
155     */
156 
157    /* check if already allocated */
158    for (int i = 0; i < reg->ncomp; i++) {
159       if (reg->comp[i].alloc)
160          return;
161    }
162 
163    if (force_idx < 0) {
164       for (idx = 0; idx < 64; idx++) {
165          if (reg_mask(ctx, idx) == 0)
166             break;
167       }
168    }
169    assert(idx != 64); /* TODO ran out of register space.. */
170 
171    /* update max_reg value */
172    ctx->info->max_reg = MAX2(ctx->info->max_reg, (int)idx);
173 
174    unsigned mask = reg_mask(ctx, idx);
175 
176    for (int i = 0; i < reg->ncomp; i++) {
177       /* don't allocate never used values */
178       if (reg->comp[i].ref_count == 0) {
179          reg->comp[i].c = 7;
180          continue;
181       }
182 
183       /* TODO */
184       unsigned c = 1 ? i : (ffs(~mask) - 1);
185       mask |= 1 << c;
186       reg->comp[i].c = c;
187       reg_setmask(ctx, idx, c);
188       reg->comp[i].alloc = true;
189    }
190 
191    reg->idx = idx;
192    ctx->live_regs[reg->idx] = reg;
193 }
194 
195 /* reduce srcs ref_count and free if needed */
196 void
ra_src_free(struct ir2_context * ctx,struct ir2_instr * instr)197 ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr)
198 {
199    struct ir2_reg *reg;
200    struct ir2_reg_component *comp;
201 
202    ir2_foreach_src (src, instr) {
203       if (src->type == IR2_SRC_CONST)
204          continue;
205 
206       reg = get_reg_src(ctx, src);
207       /* XXX use before write case */
208 
209       for (int i = 0; i < src_ncomp(instr); i++) {
210          comp = &reg->comp[swiz_get(src->swizzle, i)];
211          if (!--comp->ref_count && reg->block_idx_free < 0) {
212             reg_freemask(ctx, reg->idx, comp->c);
213             comp->alloc = false;
214          }
215       }
216    }
217 }
218 
219 /* free any regs left for a block */
220 void
ra_block_free(struct ir2_context * ctx,unsigned block)221 ra_block_free(struct ir2_context *ctx, unsigned block)
222 {
223    ir2_foreach_live_reg (reg, ctx) {
224       if (reg->block_idx_free != block)
225          continue;
226 
227       for (int i = 0; i < reg->ncomp; i++) {
228          if (!reg->comp[i].alloc) /* XXX should never be true? */
229             continue;
230 
231          reg_freemask(ctx, reg->idx, reg->comp[i].c);
232          reg->comp[i].alloc = false;
233       }
234       ctx->live_regs[reg->idx] = NULL;
235    }
236 }
237