1 /*
2 * Copyright (C) 2022 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "bi_test.h"
25 #include "bi_builder.h"
26 #include "va_compiler.h"
27 #include "valhall_enums.h"
28
29 #include <gtest/gtest.h>
30
31 static void
strip_nops(bi_context * ctx)32 strip_nops(bi_context *ctx)
33 {
34 bi_foreach_instr_global_safe(ctx, I) {
35 if (I->op == BI_OPCODE_NOP)
36 bi_remove_instruction(I);
37 }
38 }
39
40 #define CASE(shader_stage, test) do { \
41 bi_builder *A = bit_builder(mem_ctx); \
42 bi_builder *B = bit_builder(mem_ctx); \
43 { \
44 UNUSED bi_builder *b = A; \
45 A->shader->stage = MESA_SHADER_ ## shader_stage; \
46 test; \
47 } \
48 strip_nops(A->shader); \
49 va_insert_flow_control_nops(A->shader); \
50 { \
51 UNUSED bi_builder *b = B; \
52 B->shader->stage = MESA_SHADER_ ## shader_stage; \
53 test; \
54 } \
55 ASSERT_SHADER_EQUAL(A->shader, B->shader); \
56 } while(0)
57
58 #define flow(f) bi_nop(b)->flow = VA_FLOW_ ## f
59
60 class InsertFlow : public testing::Test {
61 protected:
InsertFlow()62 InsertFlow() {
63 mem_ctx = ralloc_context(NULL);
64 }
65
~InsertFlow()66 ~InsertFlow() {
67 ralloc_free(mem_ctx);
68 }
69
70 void *mem_ctx;
71 };
72
TEST_F(InsertFlow,PreserveEmptyShader)73 TEST_F(InsertFlow, PreserveEmptyShader) {
74 CASE(FRAGMENT, {});
75 }
76
TEST_F(InsertFlow,TilebufferWait7)77 TEST_F(InsertFlow, TilebufferWait7) {
78 CASE(FRAGMENT, {
79 flow(DISCARD);
80 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
81 flow(WAIT);
82 bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
83 bi_register(6), bi_register(7), bi_register(8),
84 BI_REGISTER_FORMAT_AUTO, 4, 4);
85 flow(END);
86 });
87
88 CASE(FRAGMENT, {
89 flow(DISCARD);
90 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
91 flow(WAIT);
92 bi_st_tile(b, bi_register(0), bi_register(4), bi_register(5),
93 bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
94 flow(END);
95 });
96
97 CASE(FRAGMENT, {
98 flow(DISCARD);
99 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
100 flow(WAIT);
101 bi_ld_tile_to(b, bi_register(0), bi_register(4), bi_register(5),
102 bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
103 flow(END);
104 });
105 }
106
TEST_F(InsertFlow,AtestWait6AndWait0After)107 TEST_F(InsertFlow, AtestWait6AndWait0After) {
108 CASE(FRAGMENT, {
109 flow(DISCARD);
110 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
111 flow(WAIT0126);
112 bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5));
113 flow(WAIT0);
114 flow(END);
115 });
116 }
117
TEST_F(InsertFlow,ZSEmitWait6)118 TEST_F(InsertFlow, ZSEmitWait6) {
119 CASE(FRAGMENT, {
120 flow(DISCARD);
121 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
122 flow(WAIT0126);
123 bi_zs_emit_to(b, bi_register(0), bi_register(4), bi_register(5),
124 bi_register(6), true, true);
125 flow(END);
126 });
127 }
128
TEST_F(InsertFlow,LoadThenUnrelatedThenUse)129 TEST_F(InsertFlow, LoadThenUnrelatedThenUse) {
130 CASE(VERTEX, {
131 bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
132 BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
133 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
134 flow(WAIT0);
135 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
136 flow(END);
137 });
138 }
139
TEST_F(InsertFlow,SingleLdVar)140 TEST_F(InsertFlow, SingleLdVar) {
141 CASE(FRAGMENT, {
142 flow(DISCARD);
143 bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
144 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
145 BI_SOURCE_FORMAT_F16,
146 BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0);
147 flow(WAIT0);
148 flow(END);
149 });
150 }
151
TEST_F(InsertFlow,SerializeLdVars)152 TEST_F(InsertFlow, SerializeLdVars) {
153 CASE(FRAGMENT, {
154 flow(DISCARD);
155 bi_ld_var_buf_imm_f16_to(b, bi_register(16), bi_register(61),
156 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
157 BI_SOURCE_FORMAT_F16,
158 BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
159 bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
160 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
161 BI_SOURCE_FORMAT_F16,
162 BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0);
163 flow(WAIT0);
164 bi_ld_var_buf_imm_f16_to(b, bi_register(8), bi_register(61),
165 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
166 BI_SOURCE_FORMAT_F16,
167 BI_UPDATE_STORE, BI_VECSIZE_V4, 1);
168 flow(WAIT0);
169 flow(END);
170 });
171 }
172
TEST_F(InsertFlow,Clper)173 TEST_F(InsertFlow, Clper) {
174 CASE(FRAGMENT, {
175 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
176 bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
177 BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
178 BI_SUBGROUP_SUBGROUP4);
179 flow(DISCARD);
180 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
181 flow(END);
182 });
183 }
184
TEST_F(InsertFlow,TextureImplicit)185 TEST_F(InsertFlow, TextureImplicit) {
186 CASE(FRAGMENT, {
187 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
188 bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
189 bi_register(12), false, BI_DIMENSION_2D,
190 BI_REGISTER_FORMAT_F32, false, false,
191 BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
192 flow(DISCARD);
193 flow(WAIT0);
194 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
195 flow(END);
196 });
197 }
198
TEST_F(InsertFlow,TextureExplicit)199 TEST_F(InsertFlow, TextureExplicit) {
200 CASE(FRAGMENT, {
201 flow(DISCARD);
202 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
203 bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
204 bi_register(12), false, BI_DIMENSION_2D,
205 BI_REGISTER_FORMAT_F32, false, false,
206 BI_VA_LOD_MODE_ZERO_LOD, BI_WRITE_MASK_RGBA, 4);
207 flow(WAIT0);
208 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
209 flow(END);
210 });
211 }
212
213 /* A
214 * / \
215 * B C
216 * \ /
217 * D
218 */
TEST_F(InsertFlow,DiamondCFG)219 TEST_F(InsertFlow, DiamondCFG) {
220 CASE(FRAGMENT, {
221 bi_block *A = bi_start_block(&b->shader->blocks);
222 bi_block *B = bit_block(b->shader);
223 bi_block *C = bit_block(b->shader);
224 bi_block *D = bit_block(b->shader);
225
226 bi_block_add_successor(A, B);
227 bi_block_add_successor(A, C);
228
229 bi_block_add_successor(B, D);
230 bi_block_add_successor(C, D);
231
232 /* B uses helper invocations, no other block does.
233 *
234 * That means B and C need to discard helpers.
235 */
236 b->cursor = bi_after_block(B);
237 bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
238 BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
239 BI_SUBGROUP_SUBGROUP4);
240 flow(DISCARD);
241 flow(RECONVERGE);
242
243 b->cursor = bi_after_block(C);
244 flow(DISCARD);
245 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
246 flow(RECONVERGE);
247
248 b->cursor = bi_after_block(D);
249 flow(END);
250 });
251 }
252
TEST_F(InsertFlow,BarrierBug)253 TEST_F(InsertFlow, BarrierBug) {
254 CASE(KERNEL, {
255 bi_instr *I = bi_store_i32(b, bi_register(0), bi_register(2), bi_register(4), BI_SEG_NONE, 0);
256 I->slot = 2;
257
258 bi_fadd_f32_to(b, bi_register(10), bi_register(10), bi_register(10));
259 flow(WAIT2);
260 bi_barrier(b);
261 flow(WAIT);
262 flow(END);
263 });
264 }
265