• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "aco_builder.h"
26 #include "aco_ir.h"
27 
28 #include <vector>
29 
30 namespace aco {
31 namespace {
32 
33 /* there can also be LDS and VALU clauses, but I don't see how those are interesting */
34 enum clause_type {
35    clause_smem,
36    clause_other,
37    /* GFX10: */
38    clause_vmem,
39    clause_flat,
40    /* GFX11: */
41    clause_mimg_load,
42    clause_mimg_store,
43    clause_mimg_atomic,
44    clause_mimg_sample,
45    clause_vmem_load,
46    clause_vmem_store,
47    clause_vmem_atomic,
48    clause_flat_load,
49    clause_flat_store,
50    clause_flat_atomic,
51    clause_bvh,
52 };
53 
54 void
emit_clause(Builder & bld,unsigned num_instrs,aco_ptr<Instruction> * instrs)55 emit_clause(Builder& bld, unsigned num_instrs, aco_ptr<Instruction>* instrs)
56 {
57    unsigned start = 0;
58    unsigned end = num_instrs;
59 
60    if (bld.program->gfx_level < GFX11) {
61       /* skip any stores at the start */
62       for (; (start < num_instrs) && instrs[start]->definitions.empty(); start++)
63          bld.insert(std::move(instrs[start]));
64 
65       for (end = start; (end < num_instrs) && !instrs[end]->definitions.empty(); end++)
66          ;
67    }
68 
69    unsigned clause_size = end - start;
70    if (clause_size > 1)
71       bld.sopp(aco_opcode::s_clause, -1, clause_size - 1);
72 
73    for (unsigned i = start; i < num_instrs; i++)
74       bld.insert(std::move(instrs[i]));
75 }
76 
77 clause_type
get_type(Program * program,aco_ptr<Instruction> & instr)78 get_type(Program* program, aco_ptr<Instruction>& instr)
79 {
80    if (instr->isSMEM() && !instr->operands.empty())
81       return clause_smem;
82 
83    if (program->gfx_level >= GFX11) {
84       if (instr->isMIMG()) {
85          switch (instr->opcode) {
86          case aco_opcode::image_bvh_intersect_ray:
87          case aco_opcode::image_bvh64_intersect_ray: return clause_bvh;
88          case aco_opcode::image_atomic_swap:
89          case aco_opcode::image_atomic_cmpswap:
90          case aco_opcode::image_atomic_add:
91          case aco_opcode::image_atomic_sub:
92          case aco_opcode::image_atomic_rsub:
93          case aco_opcode::image_atomic_smin:
94          case aco_opcode::image_atomic_umin:
95          case aco_opcode::image_atomic_smax:
96          case aco_opcode::image_atomic_umax:
97          case aco_opcode::image_atomic_and:
98          case aco_opcode::image_atomic_or:
99          case aco_opcode::image_atomic_xor:
100          case aco_opcode::image_atomic_inc:
101          case aco_opcode::image_atomic_dec:
102          case aco_opcode::image_atomic_fcmpswap:
103          case aco_opcode::image_atomic_fmin:
104          case aco_opcode::image_atomic_fmax: return clause_mimg_atomic;
105          default:
106             if (instr->definitions.empty())
107                return clause_mimg_store;
108             else
109                return !instr->operands[1].isUndefined() && instr->operands[1].regClass() == s4
110                          ? clause_mimg_sample
111                          : clause_mimg_load;
112          }
113       } else if (instr->isMTBUF() || instr->isScratch()) {
114          return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load;
115       } else if (instr->isMUBUF()) {
116          switch (instr->opcode) {
117          case aco_opcode::buffer_atomic_add:
118          case aco_opcode::buffer_atomic_and_x2:
119          case aco_opcode::buffer_atomic_rsub:
120          case aco_opcode::buffer_atomic_umax:
121          case aco_opcode::buffer_atomic_dec:
122          case aco_opcode::buffer_atomic_smax:
123          case aco_opcode::buffer_atomic_fmax:
124          case aco_opcode::buffer_atomic_rsub_x2:
125          case aco_opcode::buffer_atomic_smin:
126          case aco_opcode::buffer_atomic_sub:
127          case aco_opcode::buffer_atomic_sub_x2:
128          case aco_opcode::buffer_atomic_xor_x2:
129          case aco_opcode::buffer_atomic_add_f32:
130          case aco_opcode::buffer_atomic_inc:
131          case aco_opcode::buffer_atomic_swap_x2:
132          case aco_opcode::buffer_atomic_cmpswap:
133          case aco_opcode::buffer_atomic_fmin_x2:
134          case aco_opcode::buffer_atomic_umin:
135          case aco_opcode::buffer_atomic_or:
136          case aco_opcode::buffer_atomic_umax_x2:
137          case aco_opcode::buffer_atomic_smin_x2:
138          case aco_opcode::buffer_atomic_umin_x2:
139          case aco_opcode::buffer_atomic_cmpswap_x2:
140          case aco_opcode::buffer_atomic_add_x2:
141          case aco_opcode::buffer_atomic_swap:
142          case aco_opcode::buffer_atomic_and:
143          case aco_opcode::buffer_atomic_fmin:
144          case aco_opcode::buffer_atomic_fcmpswap_x2:
145          case aco_opcode::buffer_atomic_or_x2:
146          case aco_opcode::buffer_atomic_fcmpswap:
147          case aco_opcode::buffer_atomic_xor:
148          case aco_opcode::buffer_atomic_dec_x2:
149          case aco_opcode::buffer_atomic_fmax_x2:
150          case aco_opcode::buffer_atomic_csub:
151          case aco_opcode::buffer_atomic_inc_x2:
152          case aco_opcode::buffer_atomic_smax_x2: return clause_vmem_atomic;
153          default: return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load;
154          }
155       } else if (instr->isGlobal()) {
156          switch (instr->opcode) {
157          case aco_opcode::global_atomic_swap:
158          case aco_opcode::global_atomic_umax:
159          case aco_opcode::global_atomic_cmpswap:
160          case aco_opcode::global_atomic_and_x2:
161          case aco_opcode::global_atomic_fmax:
162          case aco_opcode::global_atomic_smax_x2:
163          case aco_opcode::global_atomic_fmax_x2:
164          case aco_opcode::global_atomic_dec:
165          case aco_opcode::global_atomic_dec_x2:
166          case aco_opcode::global_atomic_umin:
167          case aco_opcode::global_atomic_fcmpswap_x2:
168          case aco_opcode::global_atomic_inc:
169          case aco_opcode::global_atomic_and:
170          case aco_opcode::global_atomic_fmin:
171          case aco_opcode::global_atomic_fcmpswap:
172          case aco_opcode::global_atomic_or_x2:
173          case aco_opcode::global_atomic_smax:
174          case aco_opcode::global_atomic_sub:
175          case aco_opcode::global_atomic_xor:
176          case aco_opcode::global_atomic_swap_x2:
177          case aco_opcode::global_atomic_umax_x2:
178          case aco_opcode::global_atomic_umin_x2:
179          case aco_opcode::global_atomic_xor_x2:
180          case aco_opcode::global_atomic_inc_x2:
181          case aco_opcode::global_atomic_fmin_x2:
182          case aco_opcode::global_atomic_add_f32:
183          case aco_opcode::global_atomic_add:
184          case aco_opcode::global_atomic_or:
185          case aco_opcode::global_atomic_add_x2:
186          case aco_opcode::global_atomic_smin_x2:
187          case aco_opcode::global_atomic_smin:
188          case aco_opcode::global_atomic_csub:
189          case aco_opcode::global_atomic_sub_x2:
190          case aco_opcode::global_atomic_cmpswap_x2: return clause_vmem_atomic;
191          default: return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load;
192          }
193       } else if (instr->isFlat()) {
194          switch (instr->opcode) {
195          case aco_opcode::flat_atomic_smax:
196          case aco_opcode::flat_atomic_fcmpswap_x2:
197          case aco_opcode::flat_atomic_inc_x2:
198          case aco_opcode::flat_atomic_dec:
199          case aco_opcode::flat_atomic_fmin:
200          case aco_opcode::flat_atomic_umax_x2:
201          case aco_opcode::flat_atomic_add_f32:
202          case aco_opcode::flat_atomic_or:
203          case aco_opcode::flat_atomic_smax_x2:
204          case aco_opcode::flat_atomic_umin:
205          case aco_opcode::flat_atomic_sub:
206          case aco_opcode::flat_atomic_swap:
207          case aco_opcode::flat_atomic_swap_x2:
208          case aco_opcode::flat_atomic_cmpswap_x2:
209          case aco_opcode::flat_atomic_fcmpswap:
210          case aco_opcode::flat_atomic_add:
211          case aco_opcode::flat_atomic_umin_x2:
212          case aco_opcode::flat_atomic_xor_x2:
213          case aco_opcode::flat_atomic_smin:
214          case aco_opcode::flat_atomic_fmax_x2:
215          case aco_opcode::flat_atomic_cmpswap:
216          case aco_opcode::flat_atomic_dec_x2:
217          case aco_opcode::flat_atomic_sub_x2:
218          case aco_opcode::flat_atomic_add_x2:
219          case aco_opcode::flat_atomic_umax:
220          case aco_opcode::flat_atomic_xor:
221          case aco_opcode::flat_atomic_and_x2:
222          case aco_opcode::flat_atomic_inc:
223          case aco_opcode::flat_atomic_and:
224          case aco_opcode::flat_atomic_fmin_x2:
225          case aco_opcode::flat_atomic_smin_x2:
226          case aco_opcode::flat_atomic_or_x2:
227          case aco_opcode::flat_atomic_fmax: return clause_flat_atomic;
228          default: return instr->definitions.empty() ? clause_flat_store : clause_flat_load;
229          }
230       }
231    } else {
232       if (instr->isVMEM() && !instr->operands.empty()) {
233          if (program->gfx_level == GFX10 && instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 0)
234             return clause_other;
235          else
236             return clause_vmem;
237       } else if (instr->isScratch() || instr->isGlobal()) {
238          return clause_vmem;
239       } else if (instr->isFlat()) {
240          return clause_flat;
241       }
242    }
243    return clause_other;
244 }
245 
246 } /* end namespace */
247 
248 void
form_hard_clauses(Program * program)249 form_hard_clauses(Program* program)
250 {
251    /* The ISA documentation says 63 is the maximum for GFX11/12, but according to
252     * LLVM there are HW bugs with more than 32 instructions.
253     */
254    const unsigned max_clause_length = program->gfx_level >= GFX11 ? 32 : 63;
255    for (Block& block : program->blocks) {
256       unsigned num_instrs = 0;
257       aco_ptr<Instruction> current_instrs[63];
258       clause_type current_type = clause_other;
259 
260       std::vector<aco_ptr<Instruction>> new_instructions;
261       new_instructions.reserve(block.instructions.size());
262       Builder bld(program, &new_instructions);
263 
264       for (unsigned i = 0; i < block.instructions.size(); i++) {
265          aco_ptr<Instruction>& instr = block.instructions[i];
266 
267          clause_type type = get_type(program, instr);
268          if (type != current_type || num_instrs == max_clause_length ||
269              (num_instrs && !should_form_clause(current_instrs[0].get(), instr.get()))) {
270             emit_clause(bld, num_instrs, current_instrs);
271             num_instrs = 0;
272             current_type = type;
273          }
274 
275          if (type == clause_other) {
276             bld.insert(std::move(instr));
277             continue;
278          }
279 
280          current_instrs[num_instrs++] = std::move(instr);
281       }
282 
283       emit_clause(bld, num_instrs, current_instrs);
284 
285       block.instructions = std::move(new_instructions);
286    }
287 }
288 } // namespace aco
289