1 /*
2 * Copyright (c) 2019 Connor Abbott <cwabbott0@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "nir.h"
26 #include "nir_builder.h"
27 #include "lima_ir.h"
28
29 /* This pass clones certain input intrinsics, creating a copy for each user.
30 * Inputs are relatively cheap, since in both PP and GP one input can be
31 * loaded "for free" in each instruction bundle. In GP especially, if there is
32 * a load instruction with multiple uses in different basic blocks, we need to
33 * split it in NIR so that we don't generate a register write and reads for
34 * it, which is almost certainly more expensive than splitting. Hence this
35 * pass is more aggressive than nir_opt_move, which just moves the intrinsic
36 * down but won't split it.
37 */
38
39 static nir_ssa_def *
clone_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin)40 clone_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
41 {
42 nir_intrinsic_instr *new_intrin =
43 nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intrin->instr));
44
45 assert(new_intrin->dest.is_ssa);
46
47 unsigned num_srcs = nir_intrinsic_infos[new_intrin->intrinsic].num_srcs;
48 for (unsigned i = 0; i < num_srcs; i++) {
49 assert(new_intrin->src[i].is_ssa);
50 }
51
52 nir_builder_instr_insert(b, &new_intrin->instr);
53
54 return &new_intrin->dest.ssa;
55 }
56
57 static bool
replace_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin)58 replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
59 {
60 if (!intrin->dest.is_ssa)
61 return false;
62
63 if (intrin->intrinsic != nir_intrinsic_load_input &&
64 intrin->intrinsic != nir_intrinsic_load_uniform)
65 return false;
66
67 if (!intrin->src[0].is_ssa)
68 return false;
69
70 if (intrin->src[0].ssa->parent_instr->type == nir_instr_type_load_const)
71 return false;
72
73 struct hash_table *visited_instrs = _mesa_pointer_hash_table_create(NULL);
74
75 nir_foreach_use_safe(src, &intrin->dest.ssa) {
76 struct hash_entry *entry =
77 _mesa_hash_table_search(visited_instrs, src->parent_instr);
78 if (entry && (src->parent_instr->type != nir_instr_type_phi)) {
79 nir_ssa_def *def = entry->data;
80 nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(def));
81 continue;
82 }
83 b->cursor = nir_before_src(src, false);
84 nir_ssa_def *new = clone_intrinsic(b, intrin);
85 nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(new));
86 _mesa_hash_table_insert(visited_instrs, src->parent_instr, new);
87 }
88 nir_foreach_if_use_safe(src, &intrin->dest.ssa) {
89 b->cursor = nir_before_src(src, true);
90 nir_if_rewrite_condition(src->parent_if,
91 nir_src_for_ssa(clone_intrinsic(b, intrin)));
92 }
93
94 nir_instr_remove(&intrin->instr);
95 _mesa_hash_table_destroy(visited_instrs, NULL);
96 return true;
97 }
98
99 static void
replace_load_const(nir_builder * b,nir_load_const_instr * load_const)100 replace_load_const(nir_builder *b, nir_load_const_instr *load_const)
101 {
102 struct hash_table *visited_instrs = _mesa_pointer_hash_table_create(NULL);
103
104 nir_foreach_use_safe(src, &load_const->def) {
105 struct hash_entry *entry =
106 _mesa_hash_table_search(visited_instrs, src->parent_instr);
107 if (entry && (src->parent_instr->type != nir_instr_type_phi)) {
108 nir_ssa_def *def = entry->data;
109 nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(def));
110 continue;
111 }
112 b->cursor = nir_before_src(src, false);
113 nir_ssa_def *new = nir_build_imm(b, load_const->def.num_components,
114 load_const->def.bit_size,
115 load_const->value);
116 nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(new));
117 _mesa_hash_table_insert(visited_instrs, src->parent_instr, new);
118 }
119
120 nir_instr_remove(&load_const->instr);
121 _mesa_hash_table_destroy(visited_instrs, NULL);
122 }
123
124 bool
lima_nir_split_loads(nir_shader * shader)125 lima_nir_split_loads(nir_shader *shader)
126 {
127 bool progress = false;
128
129 nir_foreach_function(function, shader) {
130 if (function->impl) {
131 nir_builder b;
132 nir_builder_init(&b, function->impl);
133
134 nir_foreach_block_reverse(block, function->impl) {
135 nir_foreach_instr_reverse_safe(instr, block) {
136 if (instr->type == nir_instr_type_load_const) {
137 replace_load_const(&b, nir_instr_as_load_const(instr));
138 progress = true;
139 } else if (instr->type == nir_instr_type_intrinsic) {
140 progress |= replace_intrinsic(&b, nir_instr_as_intrinsic(instr));
141 }
142 }
143 }
144 }
145 }
146
147 return progress;
148 }
149
150