1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <gtest/gtest.h>
25
26 #include "nir.h"
27 #include "nir_builder.h"
28
29 /* This is a macro so you get good line numbers */
30 #define EXPECT_INSTR_SWIZZLES(instr, load, expected_swizzle) \
31 EXPECT_EQ((instr)->src[0].src.ssa, &(load)->dest.ssa); \
32 EXPECT_EQ(swizzle(instr, 0), expected_swizzle);
33
34 namespace {
35
36 class nir_load_store_vectorize_test : public ::testing::Test {
37 protected:
38 nir_load_store_vectorize_test();
39 ~nir_load_store_vectorize_test();
40
41 unsigned count_intrinsics(nir_intrinsic_op intrinsic);
42
43 nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
44 unsigned index);
45
46 bool run_vectorizer(nir_variable_mode modes, bool cse=false,
47 nir_variable_mode robust_modes = (nir_variable_mode)0);
48
49 nir_ssa_def *get_resource(uint32_t binding, bool ssbo);
50
51 nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
52 uint32_t id, unsigned bit_size=32, unsigned components=1,
53 unsigned access=0);
54 void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
55 uint32_t id, unsigned bit_size=32, unsigned components=1,
56 unsigned wrmask=0xf, unsigned access=0);
57
58 nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
59 uint32_t id, unsigned bit_size=32, unsigned components=1,
60 unsigned access=0);
61 void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
62 uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
63 unsigned access=0);
64
65 void create_shared_load(nir_deref_instr *deref, uint32_t id,
66 unsigned bit_size=32, unsigned components=1);
67 void create_shared_store(nir_deref_instr *deref, uint32_t id,
68 unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf);
69
70 bool test_alu(nir_instr *instr, nir_op op);
71 bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);
72
73 static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
74 unsigned bit_size,
75 unsigned num_components,
76 nir_intrinsic_instr *low, nir_intrinsic_instr *high);
77 static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
78
79 std::string swizzle(nir_alu_instr *instr, int src);
80
81 void *mem_ctx;
82
83 nir_builder *b;
84 std::map<unsigned, nir_alu_instr*> movs;
85 std::map<unsigned, nir_alu_src*> loads;
86 std::map<unsigned, nir_ssa_def*> res_map;
87 };
88
nir_load_store_vectorize_test()89 nir_load_store_vectorize_test::nir_load_store_vectorize_test()
90 {
91 glsl_type_singleton_init_or_ref();
92
93 mem_ctx = ralloc_context(NULL);
94 static const nir_shader_compiler_options options = { };
95 b = rzalloc(mem_ctx, nir_builder);
96 nir_builder_init_simple_shader(b, mem_ctx, MESA_SHADER_COMPUTE, &options);
97 }
98
~nir_load_store_vectorize_test()99 nir_load_store_vectorize_test::~nir_load_store_vectorize_test()
100 {
101 if (HasFailure()) {
102 printf("\nShader from the failed test:\n\n");
103 nir_print_shader(b->shader, stdout);
104 }
105
106 ralloc_free(mem_ctx);
107
108 glsl_type_singleton_decref();
109 }
110
111 std::string
swizzle(nir_alu_instr * instr,int src)112 nir_load_store_vectorize_test::swizzle(nir_alu_instr *instr, int src)
113 {
114 std::string swizzle;
115 for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(instr, src); i++) {
116 swizzle += "xyzw"[instr->src[src].swizzle[i]];
117 }
118
119 return swizzle;
120 }
121
122 unsigned
count_intrinsics(nir_intrinsic_op intrinsic)123 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic)
124 {
125 unsigned count = 0;
126 nir_foreach_block(block, b->impl) {
127 nir_foreach_instr(instr, block) {
128 if (instr->type != nir_instr_type_intrinsic)
129 continue;
130 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
131 if (intrin->intrinsic == intrinsic)
132 count++;
133 }
134 }
135 return count;
136 }
137
138 nir_intrinsic_instr *
get_intrinsic(nir_intrinsic_op intrinsic,unsigned index)139 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
140 unsigned index)
141 {
142 nir_foreach_block(block, b->impl) {
143 nir_foreach_instr(instr, block) {
144 if (instr->type != nir_instr_type_intrinsic)
145 continue;
146 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
147 if (intrin->intrinsic == intrinsic) {
148 if (index == 0)
149 return intrin;
150 index--;
151 }
152 }
153 }
154 return NULL;
155 }
156
157 bool
run_vectorizer(nir_variable_mode modes,bool cse,nir_variable_mode robust_modes)158 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes,
159 bool cse,
160 nir_variable_mode robust_modes)
161 {
162 if (modes & nir_var_mem_shared)
163 nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
164 bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback, robust_modes);
165 if (progress) {
166 nir_validate_shader(b->shader, NULL);
167 if (cse)
168 nir_opt_cse(b->shader);
169 nir_copy_prop(b->shader);
170 nir_opt_algebraic(b->shader);
171 nir_opt_constant_folding(b->shader);
172 }
173 return progress;
174 }
175
176 nir_ssa_def *
get_resource(uint32_t binding,bool ssbo)177 nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
178 {
179 if (res_map.count(binding))
180 return res_map[binding];
181
182 nir_intrinsic_instr *res = nir_intrinsic_instr_create(
183 b->shader, nir_intrinsic_vulkan_resource_index);
184 nir_ssa_dest_init(&res->instr, &res->dest, 1, 32, NULL);
185 res->num_components = 1;
186 res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32));
187 nir_intrinsic_set_desc_type(
188 res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
189 nir_intrinsic_set_desc_set(res, 0);
190 nir_intrinsic_set_binding(res, binding);
191 nir_builder_instr_insert(b, &res->instr);
192 res_map[binding] = &res->dest.ssa;
193 return &res->dest.ssa;
194 }
195
196 nir_intrinsic_instr *
create_indirect_load(nir_variable_mode mode,uint32_t binding,nir_ssa_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)197 nir_load_store_vectorize_test::create_indirect_load(
198 nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
199 unsigned bit_size, unsigned components, unsigned access)
200 {
201 nir_intrinsic_op intrinsic;
202 nir_ssa_def *res = NULL;
203 switch (mode) {
204 case nir_var_mem_ubo:
205 intrinsic = nir_intrinsic_load_ubo;
206 res = get_resource(binding, false);
207 break;
208 case nir_var_mem_ssbo:
209 intrinsic = nir_intrinsic_load_ssbo;
210 res = get_resource(binding, true);
211 break;
212 case nir_var_mem_push_const:
213 intrinsic = nir_intrinsic_load_push_constant;
214 break;
215 default:
216 return NULL;
217 }
218 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic);
219 nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
220 load->num_components = components;
221 if (res) {
222 load->src[0] = nir_src_for_ssa(res);
223 load->src[1] = nir_src_for_ssa(offset);
224 } else {
225 load->src[0] = nir_src_for_ssa(offset);
226 }
227 int byte_size = (bit_size == 1 ? 32 : bit_size) / 8;
228
229 if (mode != nir_var_mem_push_const) {
230 nir_intrinsic_set_align(load, byte_size, 0);
231 nir_intrinsic_set_access(load, (gl_access_qualifier)access);
232 }
233
234 if (nir_intrinsic_has_range_base(load)) {
235 uint32_t range = byte_size * components;
236 int offset_src = res ? 1 : 0;
237
238 if (nir_src_is_const(load->src[offset_src])) {
239 nir_intrinsic_set_range_base(load, nir_src_as_uint(load->src[offset_src]));
240 nir_intrinsic_set_range(load, range);
241 } else {
242 /* Unknown range */
243 nir_intrinsic_set_range_base(load, 0);
244 nir_intrinsic_set_range(load, ~0);
245 }
246 }
247
248 nir_builder_instr_insert(b, &load->instr);
249 nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->dest.ssa)->parent_instr);
250 movs[id] = mov;
251 loads[id] = &mov->src[0];
252
253 return load;
254 }
255
256 void
create_indirect_store(nir_variable_mode mode,uint32_t binding,nir_ssa_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)257 nir_load_store_vectorize_test::create_indirect_store(
258 nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
259 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
260 {
261 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
262 for (unsigned i = 0; i < components; i++)
263 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
264 nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
265
266 nir_intrinsic_op intrinsic;
267 nir_ssa_def *res = NULL;
268 switch (mode) {
269 case nir_var_mem_ssbo:
270 intrinsic = nir_intrinsic_store_ssbo;
271 res = get_resource(binding, true);
272 break;
273 case nir_var_mem_shared:
274 intrinsic = nir_intrinsic_store_shared;
275 break;
276 default:
277 return;
278 }
279 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic);
280 nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
281 store->num_components = components;
282 if (res) {
283 store->src[0] = nir_src_for_ssa(value);
284 store->src[1] = nir_src_for_ssa(res);
285 store->src[2] = nir_src_for_ssa(offset);
286 } else {
287 store->src[0] = nir_src_for_ssa(value);
288 store->src[1] = nir_src_for_ssa(offset);
289 }
290 nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0);
291 nir_intrinsic_set_access(store, (gl_access_qualifier)access);
292 nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
293 nir_builder_instr_insert(b, &store->instr);
294 }
295
296 nir_intrinsic_instr *
create_load(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)297 nir_load_store_vectorize_test::create_load(
298 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
299 unsigned bit_size, unsigned components, unsigned access)
300 {
301 return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
302 }
303
304 void
create_store(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)305 nir_load_store_vectorize_test::create_store(
306 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
307 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
308 {
309 create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access);
310 }
311
create_shared_load(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components)312 void nir_load_store_vectorize_test::create_shared_load(
313 nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components)
314 {
315 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref);
316 nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
317 load->num_components = components;
318 load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
319 nir_builder_instr_insert(b, &load->instr);
320 nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->dest.ssa)->parent_instr);
321 movs[id] = mov;
322 loads[id] = &mov->src[0];
323 }
324
create_shared_store(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask)325 void nir_load_store_vectorize_test::create_shared_store(
326 nir_deref_instr *deref, uint32_t id,
327 unsigned bit_size, unsigned components, unsigned wrmask)
328 {
329 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
330 for (unsigned i = 0; i < components; i++)
331 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
332 nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
333
334 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_deref);
335 nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
336 store->num_components = components;
337 store->src[0] = nir_src_for_ssa(&deref->dest.ssa);
338 store->src[1] = nir_src_for_ssa(value);
339 nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
340 nir_builder_instr_insert(b, &store->instr);
341 }
342
test_alu(nir_instr * instr,nir_op op)343 bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op)
344 {
345 return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op;
346 }
347
test_alu_def(nir_instr * instr,unsigned index,nir_ssa_def * def,unsigned swizzle)348 bool nir_load_store_vectorize_test::test_alu_def(
349 nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle)
350 {
351 if (instr->type != nir_instr_type_alu)
352 return false;
353
354 nir_alu_instr *alu = nir_instr_as_alu(instr);
355
356 if (index >= nir_op_infos[alu->op].num_inputs)
357 return false;
358 if (alu->src[index].src.ssa != def)
359 return false;
360 if (alu->src[index].swizzle[0] != swizzle)
361 return false;
362
363 return true;
364 }
365
mem_vectorize_callback(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,nir_intrinsic_instr * low,nir_intrinsic_instr * high)366 bool nir_load_store_vectorize_test::mem_vectorize_callback(
367 unsigned align_mul, unsigned align_offset, unsigned bit_size,
368 unsigned num_components,
369 nir_intrinsic_instr *low, nir_intrinsic_instr *high)
370 {
371 return bit_size / 8;
372 }
373
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)374 void nir_load_store_vectorize_test::shared_type_info(
375 const struct glsl_type *type, unsigned *size, unsigned *align)
376 {
377 assert(glsl_type_is_vector_or_scalar(type));
378
379 uint32_t comp_size = glsl_type_is_boolean(type)
380 ? 4 : glsl_get_bit_size(type) / 8;
381 unsigned length = glsl_get_vector_elements(type);
382 *size = comp_size * length,
383 *align = comp_size;
384 }
385 } // namespace
386
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent)387 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
388 {
389 create_load(nir_var_mem_ubo, 0, 0, 0x1);
390 create_load(nir_var_mem_ubo, 0, 4, 0x2);
391
392 nir_validate_shader(b->shader, NULL);
393 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
394
395 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
396
397 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
398
399 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
400 ASSERT_EQ(load->dest.ssa.bit_size, 32);
401 ASSERT_EQ(load->dest.ssa.num_components, 2);
402 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
403 ASSERT_EQ(nir_intrinsic_range(load), 8);
404 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
405 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
406 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
407 }
408
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting)409 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
410 {
411 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
412 create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2);
413
414 nir_validate_shader(b->shader, NULL);
415 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
416
417 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
418
419 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
420
421 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
422 ASSERT_EQ(load->dest.ssa.bit_size, 32);
423 ASSERT_EQ(load->dest.ssa.num_components, 3);
424 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
425 ASSERT_EQ(nir_intrinsic_range(load), 12);
426 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
427 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
428 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yz");
429 }
430
431 /* Test for a bug in range handling */
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting_range)432 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting_range)
433 {
434 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
435 create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 1);
436
437 nir_validate_shader(b->shader, NULL);
438 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
439
440 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
441
442 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
443
444 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
445 ASSERT_EQ(load->dest.ssa.bit_size, 32);
446 ASSERT_EQ(load->dest.ssa.num_components, 4);
447 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
448 ASSERT_EQ(nir_intrinsic_range(load), 16);
449 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
450 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
451 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
452 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
453 ASSERT_EQ(loads[0x1]->swizzle[1], 1);
454 ASSERT_EQ(loads[0x1]->swizzle[2], 2);
455 ASSERT_EQ(loads[0x1]->swizzle[3], 3);
456 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
457 }
458
TEST_F(nir_load_store_vectorize_test,ubo_load_identical)459 TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
460 {
461 create_load(nir_var_mem_ubo, 0, 0, 0x1);
462 create_load(nir_var_mem_ubo, 0, 0, 0x2);
463
464 nir_validate_shader(b->shader, NULL);
465 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
466
467 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
468
469 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
470
471 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
472 ASSERT_EQ(load->dest.ssa.bit_size, 32);
473 ASSERT_EQ(load->dest.ssa.num_components, 1);
474 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
475 ASSERT_EQ(nir_intrinsic_range(load), 4);
476 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
477 ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
478 ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
479 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
480 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
481 }
482
TEST_F(nir_load_store_vectorize_test,ubo_load_large)483 TEST_F(nir_load_store_vectorize_test, ubo_load_large)
484 {
485 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
486 create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3);
487
488 nir_validate_shader(b->shader, NULL);
489 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
490
491 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
492
493 nir_validate_shader(b->shader, NULL);
494 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
495 }
496
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent)497 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent)
498 {
499 create_load(nir_var_mem_push_const, 0, 0, 0x1);
500 create_load(nir_var_mem_push_const, 0, 4, 0x2);
501
502 nir_validate_shader(b->shader, NULL);
503 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
504
505 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
506
507 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
508
509 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
510 ASSERT_EQ(load->dest.ssa.bit_size, 32);
511 ASSERT_EQ(load->dest.ssa.num_components, 2);
512 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
513 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
514 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
515 }
516
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_base)517 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base)
518 {
519 create_load(nir_var_mem_push_const, 0, 0, 0x1);
520 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4);
521
522 nir_validate_shader(b->shader, NULL);
523 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
524
525 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
526
527 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
528
529 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
530 ASSERT_EQ(load->dest.ssa.bit_size, 32);
531 ASSERT_EQ(load->dest.ssa.num_components, 2);
532 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
533 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
534 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
535 }
536
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent)537 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent)
538 {
539 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
540 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
541
542 nir_validate_shader(b->shader, NULL);
543 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
544
545 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
546
547 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
548
549 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
550 ASSERT_EQ(load->dest.ssa.bit_size, 32);
551 ASSERT_EQ(load->dest.ssa.num_components, 2);
552 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
553 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
554 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
555 }
556
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect)557 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect)
558 {
559 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
560 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1);
561 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2);
562
563 nir_validate_shader(b->shader, NULL);
564 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
565
566 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
567
568 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
569
570 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
571 ASSERT_EQ(load->dest.ssa.bit_size, 32);
572 ASSERT_EQ(load->dest.ssa.num_components, 2);
573 ASSERT_EQ(load->src[1].ssa, index_base);
574 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
575 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
576 }
577
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_sub)578 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub)
579 {
580 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
581 nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc);
582 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
583 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
584
585 nir_validate_shader(b->shader, NULL);
586 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
587
588 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
589
590 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
591
592 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
593 ASSERT_EQ(load->dest.ssa.bit_size, 32);
594 ASSERT_EQ(load->dest.ssa.num_components, 2);
595 ASSERT_EQ(load->src[1].ssa, index_base_prev);
596 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
597 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
598 }
599
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_neg_stride)600 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride)
601 {
602 nir_ssa_def *inv = nir_load_local_invocation_index(b);
603 nir_ssa_def *inv_plus_one = nir_iadd_imm(b, inv, 1);
604 nir_ssa_def *index_base = nir_imul_imm(b, inv, 0xfffffffc);
605 nir_ssa_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc);
606 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
607 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
608
609 nir_validate_shader(b->shader, NULL);
610 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
611
612 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
613
614 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
615
616 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
617 ASSERT_EQ(load->dest.ssa.bit_size, 32);
618 ASSERT_EQ(load->dest.ssa.num_components, 2);
619 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
620 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
621
622 /* nir_opt_algebraic optimizes the imul */
623 ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg));
624 nir_ssa_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa;
625 ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl));
626 nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr);
627 ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one);
628 ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2);
629 }
630
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_adjacent)631 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent)
632 {
633 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
634 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
635 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
636
637 nir_validate_shader(b->shader, NULL);
638 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
639
640 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
641
642 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
643
644 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
645 ASSERT_EQ(load->dest.ssa.bit_size, 32);
646 ASSERT_EQ(load->dest.ssa.num_components, 1);
647 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
648 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
649 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
650 }
651
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_intersecting)652 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting)
653 {
654 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
655 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
656 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2);
657
658 nir_validate_shader(b->shader, NULL);
659 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
660
661 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
662
663 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
664 }
665
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_identical)666 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical)
667 {
668 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
669 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
670 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
671
672 nir_validate_shader(b->shader, NULL);
673 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
674
675 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
676
677 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
678 }
679
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_load_identical)680 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_load_identical)
681 {
682 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
683 create_load(nir_var_mem_ssbo, 0, 0, 0x2);
684 create_store(nir_var_mem_ssbo, 0, 0, 0x3);
685
686 nir_validate_shader(b->shader, NULL);
687 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
688
689 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
690
691 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
692 }
693
694 /* if nir_opt_load_store_vectorize were implemented like many load/store
695 * optimization passes are (for example, nir_opt_combine_stores and
696 * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
697 * encountered, this case wouldn't be optimized.
698 * A similar test for derefs is shared_load_adjacent_store_identical. */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_store_identical)699 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical)
700 {
701 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
702 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
703 create_load(nir_var_mem_ssbo, 0, 4, 0x3);
704
705 nir_validate_shader(b->shader, NULL);
706 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
707 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
708
709 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
710
711 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
712 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
713
714 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
715 ASSERT_EQ(load->dest.ssa.bit_size, 32);
716 ASSERT_EQ(load->dest.ssa.num_components, 2);
717 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
718 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
719 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
720 }
721
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent)722 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent)
723 {
724 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
725 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
726
727 nir_validate_shader(b->shader, NULL);
728 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
729
730 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
731
732 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
733
734 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
735 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
736 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
737 nir_ssa_def *val = store->src[0].ssa;
738 ASSERT_EQ(val->bit_size, 32);
739 ASSERT_EQ(val->num_components, 2);
740 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
741 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
742 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
743 }
744
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting)745 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting)
746 {
747 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
748 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2);
749
750 nir_validate_shader(b->shader, NULL);
751 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
752
753 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
754
755 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
756
757 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
758 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
759 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
760 nir_ssa_def *val = store->src[0].ssa;
761 ASSERT_EQ(val->bit_size, 32);
762 ASSERT_EQ(val->num_components, 3);
763 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
764 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
765 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
766 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21);
767 }
768
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical)769 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical)
770 {
771 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
772 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
773
774 nir_validate_shader(b->shader, NULL);
775 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
776
777 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
778
779 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
780
781 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
782 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
783 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1);
784 nir_ssa_def *val = store->src[0].ssa;
785 ASSERT_EQ(val->bit_size, 32);
786 ASSERT_EQ(val->num_components, 1);
787 ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20);
788 }
789
TEST_F(nir_load_store_vectorize_test,ssbo_store_large)790 TEST_F(nir_load_store_vectorize_test, ssbo_store_large)
791 {
792 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
793 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3);
794
795 nir_validate_shader(b->shader, NULL);
796 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
797
798 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
799
800 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
801 }
802
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent_memory_barrier)803 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier)
804 {
805 create_load(nir_var_mem_ubo, 0, 0, 0x1);
806
807 nir_scoped_memory_barrier(b, NIR_SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
808 nir_var_mem_ssbo);
809
810 create_load(nir_var_mem_ubo, 0, 4, 0x2);
811
812 nir_validate_shader(b->shader, NULL);
813 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
814
815 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
816
817 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
818 }
819
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier)820 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier)
821 {
822 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
823
824 nir_scoped_memory_barrier(b, NIR_SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
825 nir_var_mem_ssbo);
826
827 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
828
829 nir_validate_shader(b->shader, NULL);
830 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
831
832 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
833
834 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
835 }
836
837 /* nir_intrinsic_control_barrier only syncs invocations in a workgroup, it
838 * doesn't require that loads/stores complete.
839 */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_barrier)840 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier)
841 {
842 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
843 nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_control_barrier)->instr);
844 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
845
846 nir_validate_shader(b->shader, NULL);
847 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
848
849 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
850
851 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
852 }
853
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier_shared)854 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared)
855 {
856 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
857
858 nir_scoped_memory_barrier(b, NIR_SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL,
859 nir_var_mem_shared);
860
861 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
862
863 nir_validate_shader(b->shader, NULL);
864 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
865
866 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
867
868 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
869 }
870
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_discard)871 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_discard)
872 {
873 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
874 nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard)->instr);
875 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
876
877 nir_validate_shader(b->shader, NULL);
878 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
879
880 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
881
882 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
883 }
884
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_demote)885 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_demote)
886 {
887 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
888 nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_demote)->instr);
889 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
890
891 nir_validate_shader(b->shader, NULL);
892 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
893
894 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
895
896 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
897 }
898
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_discard)899 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_discard)
900 {
901 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
902 nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard)->instr);
903 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
904
905 nir_validate_shader(b->shader, NULL);
906 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
907
908 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
909
910 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
911 }
912
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_demote)913 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_demote)
914 {
915 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
916 nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_demote)->instr);
917 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
918
919 nir_validate_shader(b->shader, NULL);
920 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
921
922 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
923
924 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
925 }
926
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_8_8_16)927 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16)
928 {
929 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8);
930 create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8);
931 create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16);
932
933 nir_validate_shader(b->shader, NULL);
934 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
935
936 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
937
938 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
939
940 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
941 ASSERT_EQ(load->dest.ssa.bit_size, 8);
942 ASSERT_EQ(load->dest.ssa.num_components, 4);
943 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
944 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
945 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
946
947 nir_ssa_def *val = loads[0x3]->src.ssa;
948 ASSERT_EQ(val->bit_size, 16);
949 ASSERT_EQ(val->num_components, 1);
950 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior));
951 nir_ssa_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa;
952 nir_ssa_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa;
953 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl));
954 high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa;
955 ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16));
956 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16));
957 ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->dest.ssa, 2));
958 ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->dest.ssa, 3));
959 }
960
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64)961 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64)
962 {
963 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
964 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
965
966 nir_validate_shader(b->shader, NULL);
967 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
968
969 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
970
971 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
972
973 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
974 ASSERT_EQ(load->dest.ssa.bit_size, 32);
975 ASSERT_EQ(load->dest.ssa.num_components, 4);
976 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
977 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
978
979 nir_ssa_def *val = loads[0x2]->src.ssa;
980 ASSERT_EQ(val->bit_size, 64);
981 ASSERT_EQ(val->num_components, 1);
982 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
983 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
984 EXPECT_INSTR_SWIZZLES(pack, load, "zw");
985 }
986
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64_64)987 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
988 {
989 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
990 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
991 create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64);
992
993 nir_validate_shader(b->shader, NULL);
994 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
995
996 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true));
997
998 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
999
1000 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1001 ASSERT_EQ(load->dest.ssa.bit_size, 64);
1002 ASSERT_EQ(load->dest.ssa.num_components, 3);
1003 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1004 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "z");
1005
1006 nir_ssa_def *val = loads[0x2]->src.ssa;
1007 ASSERT_EQ(val->bit_size, 64);
1008 ASSERT_EQ(val->num_components, 1);
1009 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_mov));
1010 nir_alu_instr *mov = nir_instr_as_alu(val->parent_instr);
1011 EXPECT_INSTR_SWIZZLES(mov, load, "y");
1012
1013 val = loads[0x1]->src.ssa;
1014 ASSERT_EQ(val->bit_size, 32);
1015 ASSERT_EQ(val->num_components, 2);
1016 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32));
1017 nir_alu_instr *unpack = nir_instr_as_alu(val->parent_instr);
1018 EXPECT_INSTR_SWIZZLES(unpack, load, "x");
1019 }
1020
TEST_F(nir_load_store_vectorize_test,ssbo_load_intersecting_32_32_64)1021 TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64)
1022 {
1023 create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2);
1024 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1025
1026 nir_validate_shader(b->shader, NULL);
1027 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1028
1029 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1030
1031 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1032
1033 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1034 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1035 ASSERT_EQ(load->dest.ssa.num_components, 3);
1036 ASSERT_EQ(nir_src_as_uint(load->src[1]), 4);
1037 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
1038
1039 nir_ssa_def *val = loads[0x2]->src.ssa;
1040 ASSERT_EQ(val->bit_size, 64);
1041 ASSERT_EQ(val->num_components, 1);
1042 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
1043 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
1044 EXPECT_INSTR_SWIZZLES(pack, load, "yz");
1045 }
1046
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_8_8_16)1047 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16)
1048 {
1049 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8);
1050 create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8);
1051 create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16);
1052
1053 nir_validate_shader(b->shader, NULL);
1054 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1055
1056 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1057
1058 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1059
1060 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1061 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1062 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1063 nir_ssa_def *val = store->src[0].ssa;
1064 ASSERT_EQ(val->bit_size, 8);
1065 ASSERT_EQ(val->num_components, 4);
1066 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1067 ASSERT_EQ(nir_const_value_as_uint(cv[0], 8), 0x10);
1068 ASSERT_EQ(nir_const_value_as_uint(cv[1], 8), 0x20);
1069 ASSERT_EQ(nir_const_value_as_uint(cv[2], 8), 0x30);
1070 ASSERT_EQ(nir_const_value_as_uint(cv[3], 8), 0x0);
1071 }
1072
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64)1073 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64)
1074 {
1075 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1076 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1077
1078 nir_validate_shader(b->shader, NULL);
1079 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1080
1081 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1082
1083 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1084
1085 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1086 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1087 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1088 nir_ssa_def *val = store->src[0].ssa;
1089 ASSERT_EQ(val->bit_size, 32);
1090 ASSERT_EQ(val->num_components, 4);
1091 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1092 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1093 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11);
1094 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20);
1095 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
1096 }
1097
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64_64)1098 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64)
1099 {
1100 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1101 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1102 create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64);
1103
1104 nir_validate_shader(b->shader, NULL);
1105 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1106
1107 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1108
1109 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1110
1111 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1112 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1113 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1114 nir_ssa_def *val = store->src[0].ssa;
1115 ASSERT_EQ(val->bit_size, 64);
1116 ASSERT_EQ(val->num_components, 3);
1117 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1118 ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull);
1119 ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20);
1120 ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30);
1121 }
1122
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting_32_32_64)1123 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64)
1124 {
1125 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1126 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64);
1127
1128 nir_validate_shader(b->shader, NULL);
1129 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1130
1131 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1132
1133 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1134
1135 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1136 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1137 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1138 nir_ssa_def *val = store->src[0].ssa;
1139 ASSERT_EQ(val->bit_size, 32);
1140 ASSERT_EQ(val->num_components, 3);
1141 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1142 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1143 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1144 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0);
1145 }
1146
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_64)1147 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64)
1148 {
1149 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32);
1150 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2);
1151
1152 nir_validate_shader(b->shader, NULL);
1153 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1154
1155 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1156
1157 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1158 }
1159
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_wrmask)1160 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask)
1161 {
1162 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4);
1163 create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1164
1165 nir_validate_shader(b->shader, NULL);
1166 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1167
1168 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1169
1170 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1171
1172 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1173 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1174 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1175 nir_ssa_def *val = store->src[0].ssa;
1176 ASSERT_EQ(val->bit_size, 32);
1177 ASSERT_EQ(val->num_components, 4);
1178 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1179 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1180 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21);
1181 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22);
1182 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23);
1183 }
1184
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent)1185 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent)
1186 {
1187 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1188 nir_deref_instr *deref = nir_build_deref_var(b, var);
1189
1190 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1191 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2);
1192
1193 nir_validate_shader(b->shader, NULL);
1194 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1195
1196 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1197
1198 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1199
1200 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1201 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1202 ASSERT_EQ(load->dest.ssa.num_components, 2);
1203
1204 deref = nir_src_as_deref(load->src[0]);
1205 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1206
1207 deref = nir_deref_instr_parent(deref);
1208 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1209 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1210
1211 deref = nir_deref_instr_parent(deref);
1212 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1213 ASSERT_EQ(deref->var, var);
1214
1215 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1216 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1217 }
1218
TEST_F(nir_load_store_vectorize_test,shared_load_distant_64bit)1219 TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit)
1220 {
1221 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1222 nir_deref_instr *deref = nir_build_deref_var(b, var);
1223 nir_ssa_dest_init(&deref->instr, &deref->dest, 1, 64, NULL);
1224
1225 create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1);
1226 create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2);
1227
1228 nir_validate_shader(b->shader, NULL);
1229 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1230
1231 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1232
1233 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1234 }
1235
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect)1236 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect)
1237 {
1238 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1239 nir_deref_instr *deref = nir_build_deref_var(b, var);
1240 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1241
1242 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1);
1243 create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2);
1244
1245 nir_validate_shader(b->shader, NULL);
1246 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1247
1248 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1249
1250 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1251
1252 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1253 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1254 ASSERT_EQ(load->dest.ssa.num_components, 2);
1255
1256 deref = nir_src_as_deref(load->src[0]);
1257 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1258
1259 deref = nir_deref_instr_parent(deref);
1260 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1261 ASSERT_EQ(deref->arr.index.ssa, index_base);
1262
1263 deref = nir_deref_instr_parent(deref);
1264 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1265 ASSERT_EQ(deref->var, var);
1266
1267 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1268 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1269 }
1270
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect_sub)1271 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub)
1272 {
1273 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1274 nir_deref_instr *deref = nir_build_deref_var(b, var);
1275 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1276 nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff);
1277
1278 create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1);
1279 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2);
1280
1281 nir_validate_shader(b->shader, NULL);
1282 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1283
1284 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1285
1286 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1287
1288 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1289 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1290 ASSERT_EQ(load->dest.ssa.num_components, 2);
1291
1292 deref = nir_src_as_deref(load->src[0]);
1293 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1294
1295 deref = nir_deref_instr_parent(deref);
1296 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1297 ASSERT_EQ(deref->arr.index.ssa, index_base_prev);
1298
1299 deref = nir_deref_instr_parent(deref);
1300 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1301 ASSERT_EQ(deref->var, var);
1302
1303 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1304 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1305 }
1306
TEST_F(nir_load_store_vectorize_test,shared_load_struct)1307 TEST_F(nir_load_store_vectorize_test, shared_load_struct)
1308 {
1309 glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1310 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1311
1312 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1313 nir_deref_instr *deref = nir_build_deref_var(b, var);
1314
1315 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1);
1316 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1317
1318 nir_validate_shader(b->shader, NULL);
1319 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1320
1321 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1322
1323 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1324
1325 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1326 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1327 ASSERT_EQ(load->dest.ssa.num_components, 2);
1328
1329 deref = nir_src_as_deref(load->src[0]);
1330 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1331
1332 deref = nir_deref_instr_parent(deref);
1333 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1334 ASSERT_EQ(deref->strct.index, 0);
1335
1336 deref = nir_deref_instr_parent(deref);
1337 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1338 ASSERT_EQ(deref->var, var);
1339
1340 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1341 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1342 }
1343
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_adjacent)1344 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent)
1345 {
1346 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1347 nir_deref_instr *deref = nir_build_deref_var(b, var);
1348
1349 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1350 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1351 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1352
1353 nir_validate_shader(b->shader, NULL);
1354 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1355 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1356
1357 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1358
1359 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1360 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1361
1362 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1363 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1364 ASSERT_EQ(load->dest.ssa.num_components, 1);
1365
1366 deref = nir_src_as_deref(load->src[0]);
1367 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1368 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1369
1370 deref = nir_deref_instr_parent(deref);
1371 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1372 ASSERT_EQ(deref->var, var);
1373
1374 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1375 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1376 }
1377
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_identical)1378 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical)
1379 {
1380 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1381 nir_deref_instr *deref = nir_build_deref_var(b, var);
1382
1383 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1384 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1385 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1386
1387 nir_validate_shader(b->shader, NULL);
1388 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1389
1390 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1391
1392 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1393 }
1394
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_store_identical)1395 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical)
1396 {
1397 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1398 nir_deref_instr *deref = nir_build_deref_var(b, var);
1399
1400 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1401 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1402 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3);
1403
1404 nir_validate_shader(b->shader, NULL);
1405 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1406 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1407
1408 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1409
1410 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1411 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1412
1413 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1414 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1415 ASSERT_EQ(load->dest.ssa.num_components, 2);
1416
1417 deref = nir_src_as_deref(load->src[0]);
1418 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1419
1420 deref = nir_deref_instr_parent(deref);
1421 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1422 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1423
1424 deref = nir_deref_instr_parent(deref);
1425 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1426 ASSERT_EQ(deref->var, var);
1427
1428 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1429 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1430 }
1431
TEST_F(nir_load_store_vectorize_test,shared_load_bool)1432 TEST_F(nir_load_store_vectorize_test, shared_load_bool)
1433 {
1434 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1435 nir_deref_instr *deref = nir_build_deref_var(b, var);
1436
1437 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1);
1438 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1);
1439
1440 nir_validate_shader(b->shader, NULL);
1441 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1442
1443 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1444
1445 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1446
1447 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1448 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1449 ASSERT_EQ(load->dest.ssa.num_components, 2);
1450
1451 deref = nir_src_as_deref(load->src[0]);
1452 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1453
1454 deref = nir_deref_instr_parent(deref);
1455 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1456 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1457
1458 deref = nir_deref_instr_parent(deref);
1459 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1460 ASSERT_EQ(deref->var, var);
1461
1462 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1463 ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_i2b1));
1464 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1465 ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->dest.ssa, 1));
1466 }
1467
TEST_F(nir_load_store_vectorize_test,shared_load_bool_mixed)1468 TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed)
1469 {
1470 glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1471 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1472
1473 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1474 nir_deref_instr *deref = nir_build_deref_var(b, var);
1475
1476 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1);
1477 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1478
1479 nir_validate_shader(b->shader, NULL);
1480 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1481
1482 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1483
1484 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1485
1486 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1487 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1488 ASSERT_EQ(load->dest.ssa.num_components, 2);
1489
1490 deref = nir_src_as_deref(load->src[0]);
1491 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1492
1493 deref = nir_deref_instr_parent(deref);
1494 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1495 ASSERT_EQ(deref->strct.index, 0);
1496
1497 deref = nir_deref_instr_parent(deref);
1498 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1499 ASSERT_EQ(deref->var, var);
1500
1501 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1502 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1503
1504 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1505 }
1506
TEST_F(nir_load_store_vectorize_test,shared_store_adjacent)1507 TEST_F(nir_load_store_vectorize_test, shared_store_adjacent)
1508 {
1509 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1510 nir_deref_instr *deref = nir_build_deref_var(b, var);
1511
1512 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1);
1513 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1514
1515 nir_validate_shader(b->shader, NULL);
1516 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
1517
1518 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1519
1520 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1521
1522 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0);
1523 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
1524 nir_ssa_def *val = store->src[1].ssa;
1525 ASSERT_EQ(val->bit_size, 32);
1526 ASSERT_EQ(val->num_components, 2);
1527 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1528 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1529 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1530
1531 deref = nir_src_as_deref(store->src[0]);
1532 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1533
1534 deref = nir_deref_instr_parent(deref);
1535 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1536 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1537
1538 deref = nir_deref_instr_parent(deref);
1539 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1540 ASSERT_EQ(deref->var, var);
1541 }
1542
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_base)1543 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base)
1544 {
1545 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1546 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4);
1547
1548 nir_validate_shader(b->shader, NULL);
1549 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1550
1551 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1552
1553 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1554 }
1555
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_direct)1556 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct)
1557 {
1558 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1559 create_load(nir_var_mem_push_const, 0, 8, 0x2);
1560
1561 nir_validate_shader(b->shader, NULL);
1562 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1563
1564 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1565
1566 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1567 }
1568
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_indirect)1569 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect)
1570 {
1571 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1572 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1573 create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2);
1574
1575 nir_validate_shader(b->shader, NULL);
1576 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1577
1578 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1579
1580 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1581 }
1582
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_indirect_indirect)1583 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect)
1584 {
1585 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1586 create_indirect_load(nir_var_mem_push_const, 0,
1587 nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 2), 16), 32), 0x1);
1588 create_indirect_load(nir_var_mem_push_const, 0,
1589 nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 3), 16), 32), 0x2);
1590
1591 nir_validate_shader(b->shader, NULL);
1592 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1593
1594 EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1595
1596 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1597 }
1598
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_complex_indirect)1599 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect)
1600 {
1601 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1602 //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1603 nir_ssa_def *low = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 12);
1604 nir_ssa_def *high = nir_imul_imm(b, nir_iadd_imm(b, index_base, 1), 16);
1605 create_indirect_load(nir_var_mem_push_const, 0, low, 0x1);
1606 create_indirect_load(nir_var_mem_push_const, 0, high, 0x2);
1607
1608 nir_validate_shader(b->shader, NULL);
1609 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1610
1611 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1612
1613 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
1614
1615 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
1616 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1617 ASSERT_EQ(load->dest.ssa.num_components, 2);
1618 ASSERT_EQ(load->src[0].ssa, low);
1619 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1620 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1621 }
1622
TEST_F(nir_load_store_vectorize_test,ssbo_alias0)1623 TEST_F(nir_load_store_vectorize_test, ssbo_alias0)
1624 {
1625 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1626 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1627 create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2);
1628 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1629
1630 nir_validate_shader(b->shader, NULL);
1631 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1632
1633 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1634
1635 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1636 }
1637
TEST_F(nir_load_store_vectorize_test,ssbo_alias1)1638 TEST_F(nir_load_store_vectorize_test, ssbo_alias1)
1639 {
1640 nir_ssa_def *load_base = nir_load_global_invocation_index(b, 32);
1641 nir_ssa_def *store_base = nir_load_local_invocation_index(b);
1642 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1);
1643 create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2);
1644 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3);
1645
1646 nir_validate_shader(b->shader, NULL);
1647 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1648
1649 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1650
1651 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1652 }
1653
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias2)1654 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2)
1655 {
1656 /* TODO: try to combine these loads */
1657 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1658 nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 4);
1659 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1660 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1661 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1662
1663 nir_validate_shader(b->shader, NULL);
1664 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1665
1666 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1667
1668 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1669
1670 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1671 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1672 ASSERT_EQ(load->dest.ssa.num_components, 1);
1673 ASSERT_EQ(load->src[1].ssa, offset);
1674 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1675 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1676 }
1677
TEST_F(nir_load_store_vectorize_test,ssbo_alias3)1678 TEST_F(nir_load_store_vectorize_test, ssbo_alias3)
1679 {
1680 /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1681 * these loads can't be combined because if index_base == 268435455, then
1682 * offset == 0 because the addition would wrap around */
1683 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1684 nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1685 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1686 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1687 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1688
1689 nir_validate_shader(b->shader, NULL);
1690 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1691
1692 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1693
1694 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1695 }
1696
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias4)1697 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4)
1698 {
1699 /* TODO: try to combine these loads */
1700 nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1701 nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1702 nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true;
1703 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1704 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1705 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1706
1707 nir_validate_shader(b->shader, NULL);
1708 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1709
1710 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1711
1712 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1713
1714 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1715 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1716 ASSERT_EQ(load->dest.ssa.num_components, 1);
1717 ASSERT_EQ(load->src[1].ssa, offset);
1718 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1719 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1720 }
1721
TEST_F(nir_load_store_vectorize_test,ssbo_alias5)1722 TEST_F(nir_load_store_vectorize_test, ssbo_alias5)
1723 {
1724 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1725 create_store(nir_var_mem_ssbo, 1, 0, 0x2);
1726 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1727
1728 nir_validate_shader(b->shader, NULL);
1729 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1730
1731 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1732
1733 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1734 }
1735
TEST_F(nir_load_store_vectorize_test,ssbo_alias6)1736 TEST_F(nir_load_store_vectorize_test, ssbo_alias6)
1737 {
1738 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT);
1739 create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT);
1740 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT);
1741
1742 nir_validate_shader(b->shader, NULL);
1743 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1744
1745 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1746
1747 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1748
1749 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1750 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1751 ASSERT_EQ(load->dest.ssa.num_components, 1);
1752 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1753 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1754 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1755 }
1756
TEST_F(nir_load_store_vectorize_test,DISABLED_shared_alias0)1757 TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0)
1758 {
1759 /* TODO: implement type-based alias analysis so that these loads can be
1760 * combined. this is made a bit more difficult than simply using
1761 * nir_compare_derefs() because the vectorizer creates loads/stores with
1762 * casted derefs. The solution would probably be to keep multiple derefs for
1763 * an entry (one for each load/store combined into it). */
1764 glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1765 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1766
1767 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1768 nir_deref_instr *deref = nir_build_deref_var(b, var);
1769
1770 nir_ssa_def *index0 = nir_load_local_invocation_index(b);
1771 nir_ssa_def *index1 = nir_load_global_invocation_index(b, 32);
1772 nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0);
1773
1774 create_shared_load(load_deref, 0x1);
1775 create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2);
1776 create_shared_load(load_deref, 0x3);
1777
1778 nir_validate_shader(b->shader, NULL);
1779 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1780
1781 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1782
1783 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1784
1785 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1786 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1787 ASSERT_EQ(load->dest.ssa.num_components, 1);
1788 ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1789 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1790 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1791 }
1792
TEST_F(nir_load_store_vectorize_test,shared_alias1)1793 TEST_F(nir_load_store_vectorize_test, shared_alias1)
1794 {
1795 nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0");
1796 nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1");
1797 nir_deref_instr *load_deref = nir_build_deref_var(b, var0);
1798
1799 create_shared_load(load_deref, 0x1);
1800 create_shared_store(nir_build_deref_var(b, var1), 0x2);
1801 create_shared_load(load_deref, 0x3);
1802
1803 nir_validate_shader(b->shader, NULL);
1804 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1805
1806 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1807
1808 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1809
1810 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1811 ASSERT_EQ(load->dest.ssa.bit_size, 32);
1812 ASSERT_EQ(load->dest.ssa.num_components, 1);
1813 ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1814 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1815 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1816 }
1817
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_64bit)1818 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit)
1819 {
1820 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x100000000), 0x1);
1821 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x200000004), 0x2);
1822
1823 nir_validate_shader(b->shader, NULL);
1824 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1825
1826 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1827
1828 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1829 }
1830
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_indirect_64bit)1831 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
1832 {
1833 nir_ssa_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b));
1834 nir_ssa_def *first = nir_imul_imm(b, index_base, 0x100000000);
1835 nir_ssa_def *second = nir_imul_imm(b, index_base, 0x200000000);
1836 create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1);
1837 create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2);
1838
1839 nir_validate_shader(b->shader, NULL);
1840 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1841
1842 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1843
1844 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1845 }
1846
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust)1847 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust)
1848 {
1849 create_load(nir_var_mem_ssbo, 0, 0xfffffffc, 0x1);
1850 create_load(nir_var_mem_ssbo, 0, 0x0, 0x2);
1851
1852 nir_validate_shader(b->shader, NULL);
1853 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1854
1855 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1856
1857 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1858 }
1859
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4)1860 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4)
1861 {
1862 nir_ssa_def *offset = nir_load_local_invocation_index(b);
1863 offset = nir_imul_imm(b, offset, 16);
1864 offset = nir_iadd_imm(b, offset, 4);
1865 nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
1866 0x1);
1867
1868 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1869 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1870 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1871 }
1872
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4_swapped)1873 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4_swapped)
1874 {
1875 nir_ssa_def *offset = nir_load_local_invocation_index(b);
1876 offset = nir_iadd_imm(b, offset, 1);
1877 offset = nir_imul_imm(b, offset, 16);
1878 offset = nir_iadd_imm(b, offset, 4);
1879 nir_intrinsic_instr *load =
1880 create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
1881
1882 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1883 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1884 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1885 }
1886
1887 /* Check offset % mul != 0 */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_20)1888 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_20)
1889 {
1890 nir_ssa_def *offset = nir_load_local_invocation_index(b);
1891 offset = nir_imul_imm(b, offset, 16);
1892 offset = nir_iadd_imm(b, offset, 20);
1893 nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
1894 0x1);
1895
1896 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1897 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1898 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1899 }
1900
1901 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_24_4)1902 TEST_F(nir_load_store_vectorize_test, ubo_alignment_24_4)
1903 {
1904 nir_ssa_def *offset = nir_load_local_invocation_index(b);
1905 offset = nir_imul_imm(b, offset, 24);
1906 offset = nir_iadd_imm(b, offset, 4);
1907 nir_intrinsic_instr *load =
1908 create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
1909
1910 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1911 EXPECT_EQ(nir_intrinsic_align_mul(load), 8);
1912 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1913 }
1914
1915 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_64_16_8)1916 TEST_F(nir_load_store_vectorize_test, ubo_alignment_64_16_8)
1917 {
1918 nir_ssa_def *x = nir_imul_imm(b, nir_load_local_invocation_index(b), 64);
1919 nir_ssa_def *y = nir_imul_imm(b, nir_load_instance_id(b), 16);
1920 nir_ssa_def *offset = nir_iadd(b, x, y);
1921 offset = nir_iadd_imm(b, offset, 8);
1922 nir_intrinsic_instr *load =
1923 create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
1924
1925 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1926 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1927 EXPECT_EQ(nir_intrinsic_align_offset(load), 8);
1928 }
1929
TEST_F(nir_load_store_vectorize_test,ubo_alignment_const_100)1930 TEST_F(nir_load_store_vectorize_test, ubo_alignment_const_100)
1931 {
1932 nir_intrinsic_instr *load =
1933 create_indirect_load(nir_var_mem_ubo, 0, nir_imm_int(b, 100), 0x1);
1934
1935 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1936 EXPECT_EQ(nir_intrinsic_align_mul(load), NIR_ALIGN_MUL_MAX);
1937 EXPECT_EQ(nir_intrinsic_align_offset(load), 100);
1938 }
1939