• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 
26 #include "aco_ir.h"
27 
28 #include <map>
29 
30 namespace aco {
31 namespace {
32 
33 /* map: block-id -> pair (dest, src) to store phi information */
34 typedef std::map<uint32_t, std::vector<std::pair<Definition, Operand>>> phi_info;
35 
36 struct ssa_elimination_ctx {
37    phi_info logical_phi_info;
38    phi_info linear_phi_info;
39    std::vector<bool> empty_blocks;
40    Program* program;
41 
ssa_elimination_ctxaco::__anon164aafed0111::ssa_elimination_ctx42    ssa_elimination_ctx(Program* program) : empty_blocks(program->blocks.size(), true), program(program) {}
43 };
44 
collect_phi_info(ssa_elimination_ctx & ctx)45 void collect_phi_info(ssa_elimination_ctx& ctx)
46 {
47    for (Block& block : ctx.program->blocks) {
48       for (aco_ptr<Instruction>& phi : block.instructions) {
49          if (phi->opcode != aco_opcode::p_phi && phi->opcode != aco_opcode::p_linear_phi)
50             break;
51 
52          for (unsigned i = 0; i < phi->operands.size(); i++) {
53             if (phi->operands[i].isUndefined())
54                continue;
55             if (phi->operands[i].isTemp() && phi->operands[i].physReg() == phi->definitions[0].physReg())
56                continue;
57 
58             std::vector<unsigned>& preds = phi->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds;
59             phi_info& info = phi->opcode == aco_opcode::p_phi ? ctx.logical_phi_info : ctx.linear_phi_info;
60             const auto result = info.emplace(preds[i], std::vector<std::pair<Definition, Operand>>());
61             assert(phi->definitions[0].size() == phi->operands[i].size());
62             result.first->second.emplace_back(phi->definitions[0], phi->operands[i]);
63             ctx.empty_blocks[preds[i]] = false;
64          }
65       }
66    }
67 }
68 
insert_parallelcopies(ssa_elimination_ctx & ctx)69 void insert_parallelcopies(ssa_elimination_ctx& ctx)
70 {
71    /* insert the parallelcopies from logical phis before p_logical_end */
72    for (auto&& entry : ctx.logical_phi_info) {
73       Block& block = ctx.program->blocks[entry.first];
74       unsigned idx = block.instructions.size() - 1;
75       while (block.instructions[idx]->opcode != aco_opcode::p_logical_end) {
76          assert(idx > 0);
77          idx--;
78       }
79 
80       std::vector<aco_ptr<Instruction>>::iterator it = std::next(block.instructions.begin(), idx);
81       aco_ptr<Pseudo_instruction> pc{create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, entry.second.size(), entry.second.size())};
82       unsigned i = 0;
83       for (std::pair<Definition, Operand>& pair : entry.second)
84       {
85          pc->definitions[i] = pair.first;
86          pc->operands[i] = pair.second;
87          i++;
88       }
89       /* this shouldn't be needed since we're only copying vgprs */
90       pc->tmp_in_scc = false;
91       block.instructions.insert(it, std::move(pc));
92    }
93 
94    /* insert parallelcopies for the linear phis at the end of blocks just before the branch */
95    for (auto&& entry : ctx.linear_phi_info) {
96       Block& block = ctx.program->blocks[entry.first];
97       std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.end();
98       --it;
99       assert((*it)->format == Format::PSEUDO_BRANCH);
100       aco_ptr<Pseudo_instruction> pc{create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, entry.second.size(), entry.second.size())};
101       unsigned i = 0;
102       for (std::pair<Definition, Operand>& pair : entry.second)
103       {
104          pc->definitions[i] = pair.first;
105          pc->operands[i] = pair.second;
106          i++;
107       }
108       pc->tmp_in_scc = block.scc_live_out;
109       pc->scratch_sgpr = block.scratch_sgpr;
110       block.instructions.insert(it, std::move(pc));
111    }
112 }
113 
is_empty_block(Block * block,bool ignore_exec_writes)114 bool is_empty_block(Block* block, bool ignore_exec_writes)
115 {
116    /* check if this block is empty and the exec mask is not needed */
117    for (aco_ptr<Instruction>& instr : block->instructions) {
118       switch (instr->opcode) {
119          case aco_opcode::p_linear_phi:
120          case aco_opcode::p_phi:
121          case aco_opcode::p_logical_start:
122          case aco_opcode::p_logical_end:
123          case aco_opcode::p_branch:
124             break;
125          case aco_opcode::p_parallelcopy:
126             for (unsigned i = 0; i < instr->definitions.size(); i++) {
127                if (ignore_exec_writes && instr->definitions[i].physReg() == exec)
128                   continue;
129                if (instr->definitions[i].physReg() != instr->operands[i].physReg())
130                   return false;
131             }
132             break;
133          case aco_opcode::s_andn2_b64:
134          case aco_opcode::s_andn2_b32:
135             if (ignore_exec_writes && instr->definitions[0].physReg() == exec)
136                break;
137          default:
138             return false;
139       }
140    }
141    return true;
142 }
143 
try_remove_merge_block(ssa_elimination_ctx & ctx,Block * block)144 void try_remove_merge_block(ssa_elimination_ctx& ctx, Block* block)
145 {
146    /* check if the successor is another merge block which restores exec */
147    // TODO: divergent loops also restore exec
148    if (block->linear_succs.size() != 1 ||
149        !(ctx.program->blocks[block->linear_succs[0]].kind & block_kind_merge))
150       return;
151 
152    /* check if this block is empty */
153    if (!is_empty_block(block, true))
154       return;
155 
156    /* keep the branch instruction and remove the rest */
157    aco_ptr<Instruction> branch = std::move(block->instructions.back());
158    block->instructions.clear();
159    block->instructions.emplace_back(std::move(branch));
160 }
161 
try_remove_invert_block(ssa_elimination_ctx & ctx,Block * block)162 void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block)
163 {
164    assert(block->linear_succs.size() == 2);
165    /* only remove this block if the successor got removed as well */
166    if (block->linear_succs[0] != block->linear_succs[1])
167       return;
168 
169    /* check if block is otherwise empty */
170    if (!is_empty_block(block, true))
171       return;
172 
173    unsigned succ_idx = block->linear_succs[0];
174    assert(block->linear_preds.size() == 2);
175    for (unsigned i = 0; i < 2; i++) {
176       Block *pred = &ctx.program->blocks[block->linear_preds[i]];
177       pred->linear_succs[0] = succ_idx;
178       ctx.program->blocks[succ_idx].linear_preds[i] = pred->index;
179 
180       Pseudo_branch_instruction *branch = static_cast<Pseudo_branch_instruction*>(pred->instructions.back().get());
181       assert(branch->format == Format::PSEUDO_BRANCH);
182       branch->target[0] = succ_idx;
183       branch->target[1] = succ_idx;
184    }
185 
186    block->instructions.clear();
187    block->linear_preds.clear();
188    block->linear_succs.clear();
189 }
190 
try_remove_simple_block(ssa_elimination_ctx & ctx,Block * block)191 void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
192 {
193    if (!is_empty_block(block, false))
194       return;
195 
196    Block& pred = ctx.program->blocks[block->linear_preds[0]];
197    Block& succ = ctx.program->blocks[block->linear_succs[0]];
198    Pseudo_branch_instruction* branch = static_cast<Pseudo_branch_instruction*>(pred.instructions.back().get());
199    if (branch->opcode == aco_opcode::p_branch) {
200       branch->target[0] = succ.index;
201       branch->target[1] = succ.index;
202    } else if (branch->target[0] == block->index) {
203       branch->target[0] = succ.index;
204    } else if (branch->target[0] == succ.index) {
205       assert(branch->target[1] == block->index);
206       branch->target[1] = succ.index;
207       branch->opcode = aco_opcode::p_branch;
208    } else if (branch->target[1] == block->index) {
209       /* check if there is a fall-through path from block to succ */
210       bool falls_through = block->index < succ.index;
211       for (unsigned j = block->index + 1; falls_through && j < succ.index; j++) {
212          assert(ctx.program->blocks[j].index == j);
213          if (!ctx.program->blocks[j].instructions.empty())
214             falls_through = false;
215       }
216       if (falls_through) {
217          branch->target[1] = succ.index;
218       } else {
219          /* check if there is a fall-through path for the alternative target */
220          if (block->index >= branch->target[0])
221             return;
222          for (unsigned j = block->index + 1; j < branch->target[0]; j++) {
223             if (!ctx.program->blocks[j].instructions.empty())
224                return;
225          }
226 
227          /* This is a (uniform) break or continue block. The branch condition has to be inverted. */
228          if (branch->opcode == aco_opcode::p_cbranch_z)
229             branch->opcode = aco_opcode::p_cbranch_nz;
230          else if (branch->opcode == aco_opcode::p_cbranch_nz)
231             branch->opcode = aco_opcode::p_cbranch_z;
232          else
233             assert(false);
234          /* also invert the linear successors */
235          pred.linear_succs[0] = pred.linear_succs[1];
236          pred.linear_succs[1] = succ.index;
237          branch->target[1] = branch->target[0];
238          branch->target[0] = succ.index;
239       }
240    } else {
241       assert(false);
242    }
243 
244    if (branch->target[0] == branch->target[1])
245       branch->opcode = aco_opcode::p_branch;
246 
247    for (unsigned i = 0; i < pred.linear_succs.size(); i++)
248       if (pred.linear_succs[i] == block->index)
249          pred.linear_succs[i] = succ.index;
250 
251    for (unsigned i = 0; i < succ.linear_preds.size(); i++)
252       if (succ.linear_preds[i] == block->index)
253          succ.linear_preds[i] = pred.index;
254 
255    block->instructions.clear();
256    block->linear_preds.clear();
257    block->linear_succs.clear();
258 }
259 
jump_threading(ssa_elimination_ctx & ctx)260 void jump_threading(ssa_elimination_ctx& ctx)
261 {
262    for (int i = ctx.program->blocks.size() - 1; i >= 0; i--) {
263       Block* block = &ctx.program->blocks[i];
264 
265       if (!ctx.empty_blocks[i])
266          continue;
267 
268       if (block->kind & block_kind_invert) {
269          try_remove_invert_block(ctx, block);
270          continue;
271       }
272 
273       if (block->linear_succs.size() > 1)
274          continue;
275 
276       if (block->kind & block_kind_merge ||
277           block->kind & block_kind_loop_exit)
278          try_remove_merge_block(ctx, block);
279 
280       if (block->linear_preds.size() == 1)
281          try_remove_simple_block(ctx, block);
282    }
283 }
284 
285 } /* end namespace */
286 
287 
ssa_elimination(Program * program)288 void ssa_elimination(Program* program)
289 {
290    ssa_elimination_ctx ctx(program);
291 
292    /* Collect information about every phi-instruction */
293    collect_phi_info(ctx);
294 
295    /* eliminate empty blocks */
296    jump_threading(ctx);
297 
298    /* insert parallelcopies from SSA elimination */
299    insert_parallelcopies(ctx);
300 
301 }
302 }
303