1 /*
2 * Copyright © 2017 Ilia Mirkin
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "ir3_nir.h"
25 #include "compiler/nir/nir_builder.h"
26
27 /* A4XX has a broken GATHER4 operation. It performs the texture swizzle on the
28 * gather results, rather than before. As a result, it must be emulated with
29 * direct texture calls.
30 */
31
32 static bool
lower_tg4(nir_block * block,nir_builder * b,void * mem_ctx)33 lower_tg4(nir_block *block, nir_builder *b, void *mem_ctx)
34 {
35 bool progress = false;
36
37 static const int offsets[3][2] = { {0, 1}, {1, 1}, {1, 0} };
38
39 nir_foreach_instr_safe(instr, block) {
40 if (instr->type != nir_instr_type_tex)
41 continue;
42
43 nir_tex_instr *tg4 = (nir_tex_instr *)instr;
44
45 if (tg4->op != nir_texop_tg4)
46 continue;
47
48 b->cursor = nir_before_instr(&tg4->instr);
49
50 nir_ssa_def *results[4];
51 int offset_index = nir_tex_instr_src_index(tg4, nir_tex_src_offset);
52 for (int i = 0; i < 4; i++) {
53 int num_srcs = tg4->num_srcs + 1 /* lod */;
54 if (offset_index < 0 && i < 3)
55 num_srcs++;
56
57 nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs);
58 tex->op = nir_texop_txl;
59 tex->sampler_dim = tg4->sampler_dim;
60 tex->coord_components = tg4->coord_components;
61 tex->is_array = tg4->is_array;
62 tex->is_shadow = tg4->is_shadow;
63 tex->is_new_style_shadow = tg4->is_new_style_shadow;
64 tex->texture_index = tg4->texture_index;
65 tex->texture = nir_deref_var_clone(tg4->texture, tex);
66 tex->sampler_index = tg4->sampler_index;
67 tex->sampler = nir_deref_var_clone(tg4->sampler, tex);
68 tex->dest_type = tg4->dest_type;
69
70 for (int j = 0; j < tg4->num_srcs; j++) {
71 nir_src_copy(&tex->src[j].src, &tg4->src[j].src, tex);
72 tex->src[j].src_type = tg4->src[j].src_type;
73 }
74 if (i != 3) {
75 nir_ssa_def *offset =
76 nir_vec2(b, nir_imm_int(b, offsets[i][0]),
77 nir_imm_int(b, offsets[i][1]));
78 if (offset_index < 0) {
79 tex->src[tg4->num_srcs].src = nir_src_for_ssa(offset);
80 tex->src[tg4->num_srcs].src_type = nir_tex_src_offset;
81 } else {
82 assert(nir_tex_instr_src_size(tex, offset_index) == 2);
83 nir_ssa_def *orig = nir_ssa_for_src(
84 b, tex->src[offset_index].src, 2);
85 tex->src[offset_index].src =
86 nir_src_for_ssa(nir_iadd(b, orig, offset));
87 }
88 }
89 tex->src[num_srcs - 1].src = nir_src_for_ssa(nir_imm_float(b, 0));
90 tex->src[num_srcs - 1].src_type = nir_tex_src_lod;
91
92 nir_ssa_dest_init(&tex->instr, &tex->dest,
93 nir_tex_instr_dest_size(tex), 32, NULL);
94 nir_builder_instr_insert(b, &tex->instr);
95
96 results[i] = nir_channel(b, &tex->dest.ssa, tg4->component);
97 }
98
99 nir_ssa_def *result = nir_vec4(b, results[0], results[1], results[2], results[3]);
100 nir_ssa_def_rewrite_uses(&tg4->dest.ssa, nir_src_for_ssa(result));
101
102 nir_instr_remove(&tg4->instr);
103
104 progress = true;
105 }
106
107 return progress;
108 }
109
110 static bool
lower_tg4_func(nir_function_impl * impl)111 lower_tg4_func(nir_function_impl *impl)
112 {
113 void *mem_ctx = ralloc_parent(impl);
114 nir_builder b;
115 nir_builder_init(&b, impl);
116
117 bool progress = false;
118 nir_foreach_block_safe(block, impl) {
119 progress |= lower_tg4(block, &b, mem_ctx);
120 }
121
122 if (progress)
123 nir_metadata_preserve(impl, nir_metadata_block_index |
124 nir_metadata_dominance);
125
126 return progress;
127 }
128
129 bool
ir3_nir_lower_tg4_to_tex(nir_shader * shader)130 ir3_nir_lower_tg4_to_tex(nir_shader *shader)
131 {
132 bool progress = false;
133
134 nir_foreach_function(function, shader) {
135 if (function->impl)
136 progress |= lower_tg4_func(function->impl);
137 }
138
139 return progress;
140 }
141