1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "nir_test.h"
25
26 /* This is a macro so you get good line numbers */
27 #define EXPECT_INSTR_SWIZZLES(instr, load, expected_swizzle) \
28 EXPECT_EQ((instr)->src[0].src.ssa, &(load)->def); \
29 EXPECT_EQ(swizzle(instr, 0), expected_swizzle);
30
31 namespace {
32
33 class nir_load_store_vectorize_test : public nir_test {
34 protected:
nir_load_store_vectorize_test()35 nir_load_store_vectorize_test()
36 : nir_test::nir_test("nir_load_store_vectorize_test")
37 {
38 }
39
40 unsigned count_intrinsics(nir_intrinsic_op intrinsic);
41
42 nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
43 unsigned index);
44
45 bool run_vectorizer(nir_variable_mode modes, bool cse=false,
46 nir_variable_mode robust_modes = (nir_variable_mode)0);
47
48 nir_def *get_resource(uint32_t binding, bool ssbo);
49
50 nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_def *offset,
51 uint32_t id, unsigned bit_size=32, unsigned components=1,
52 unsigned access=0);
53 void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_def *offset,
54 uint32_t id, unsigned bit_size=32, unsigned components=1,
55 unsigned wrmask=0xf, unsigned access=0);
56
57 nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
58 uint32_t id, unsigned bit_size=32, unsigned components=1,
59 unsigned access=0);
60 void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
61 uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
62 unsigned access=0);
63
64 void create_shared_load(nir_deref_instr *deref, uint32_t id,
65 unsigned bit_size=32, unsigned components=1);
66 void create_shared_store(nir_deref_instr *deref, uint32_t id,
67 unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf);
68
69 bool test_alu(nir_instr *instr, nir_op op);
70 bool test_alu_def(nir_instr *instr, unsigned index, nir_def *def, unsigned swizzle=0);
71
72 static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
73 unsigned bit_size,
74 unsigned num_components, int64_t hole_size,
75 nir_intrinsic_instr *low, nir_intrinsic_instr *high,
76 void *data);
77 static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
78
79 std::string swizzle(nir_alu_instr *instr, int src);
80
81 std::map<unsigned, nir_alu_instr*> movs;
82 std::map<unsigned, nir_alu_src*> loads;
83 std::map<unsigned, nir_def*> res_map;
84 unsigned max_components = 4;
85 bool overfetch = false;
86 int64_t max_hole_size = 0;
87 };
88
89 std::string
swizzle(nir_alu_instr * instr,int src)90 nir_load_store_vectorize_test::swizzle(nir_alu_instr *instr, int src)
91 {
92 std::string swizzle;
93 for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(instr, src); i++) {
94 swizzle += "xyzwefghijklmnop"[instr->src[src].swizzle[i]];
95 }
96
97 return swizzle;
98 }
99
100 unsigned
count_intrinsics(nir_intrinsic_op intrinsic)101 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic)
102 {
103 unsigned count = 0;
104 nir_foreach_block(block, b->impl) {
105 nir_foreach_instr(instr, block) {
106 if (instr->type != nir_instr_type_intrinsic)
107 continue;
108 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
109 if (intrin->intrinsic == intrinsic)
110 count++;
111 }
112 }
113 return count;
114 }
115
116 nir_intrinsic_instr *
get_intrinsic(nir_intrinsic_op intrinsic,unsigned index)117 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
118 unsigned index)
119 {
120 nir_foreach_block(block, b->impl) {
121 nir_foreach_instr(instr, block) {
122 if (instr->type != nir_instr_type_intrinsic)
123 continue;
124 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
125 if (intrin->intrinsic == intrinsic) {
126 if (index == 0)
127 return intrin;
128 index--;
129 }
130 }
131 }
132 return NULL;
133 }
134
135 bool
run_vectorizer(nir_variable_mode modes,bool cse,nir_variable_mode robust_modes)136 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes,
137 bool cse,
138 nir_variable_mode robust_modes)
139 {
140 if (modes & nir_var_mem_shared)
141 nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
142
143 nir_load_store_vectorize_options opts = { };
144 opts.callback = mem_vectorize_callback;
145 opts.modes = modes;
146 opts.robust_modes = robust_modes;
147 opts.cb_data = this;
148 bool progress = nir_opt_load_store_vectorize(b->shader, &opts);
149
150 if (progress) {
151 nir_validate_shader(b->shader, NULL);
152 if (cse)
153 nir_opt_cse(b->shader);
154 nir_copy_prop(b->shader);
155 nir_opt_algebraic(b->shader);
156 nir_opt_constant_folding(b->shader);
157 }
158 return progress;
159 }
160
161 nir_def *
get_resource(uint32_t binding,bool ssbo)162 nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
163 {
164 if (res_map.count(binding))
165 return res_map[binding];
166
167 nir_intrinsic_instr *res = nir_intrinsic_instr_create(
168 b->shader, nir_intrinsic_vulkan_resource_index);
169 nir_def_init(&res->instr, &res->def, 1, 32);
170 res->num_components = 1;
171 res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32));
172 nir_intrinsic_set_desc_type(
173 res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
174 nir_intrinsic_set_desc_set(res, 0);
175 nir_intrinsic_set_binding(res, binding);
176 nir_builder_instr_insert(b, &res->instr);
177 res_map[binding] = &res->def;
178 return &res->def;
179 }
180
181 nir_intrinsic_instr *
create_indirect_load(nir_variable_mode mode,uint32_t binding,nir_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)182 nir_load_store_vectorize_test::create_indirect_load(
183 nir_variable_mode mode, uint32_t binding, nir_def *offset, uint32_t id,
184 unsigned bit_size, unsigned components, unsigned access)
185 {
186 nir_intrinsic_op intrinsic;
187 nir_def *res = NULL;
188 switch (mode) {
189 case nir_var_mem_ubo:
190 intrinsic = nir_intrinsic_load_ubo;
191 res = get_resource(binding, false);
192 break;
193 case nir_var_mem_ssbo:
194 intrinsic = nir_intrinsic_load_ssbo;
195 res = get_resource(binding, true);
196 break;
197 case nir_var_mem_push_const:
198 intrinsic = nir_intrinsic_load_push_constant;
199 break;
200 default:
201 return NULL;
202 }
203 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic);
204 nir_def_init(&load->instr, &load->def, components, bit_size);
205 load->num_components = components;
206 if (res) {
207 load->src[0] = nir_src_for_ssa(res);
208 load->src[1] = nir_src_for_ssa(offset);
209 } else {
210 load->src[0] = nir_src_for_ssa(offset);
211 }
212 int byte_size = (bit_size == 1 ? 32 : bit_size) / 8;
213
214 nir_intrinsic_set_align(load, byte_size, 0);
215 if (mode != nir_var_mem_push_const) {
216 nir_intrinsic_set_access(load, (gl_access_qualifier)access);
217 }
218
219 if (nir_intrinsic_has_range_base(load)) {
220 uint32_t range = byte_size * components;
221 int offset_src = res ? 1 : 0;
222
223 if (nir_src_is_const(load->src[offset_src])) {
224 nir_intrinsic_set_range_base(load, nir_src_as_uint(load->src[offset_src]));
225 nir_intrinsic_set_range(load, range);
226 } else {
227 /* Unknown range */
228 nir_intrinsic_set_range_base(load, 0);
229 nir_intrinsic_set_range(load, ~0);
230 }
231 }
232
233 nir_builder_instr_insert(b, &load->instr);
234 nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->def)->parent_instr);
235 movs[id] = mov;
236 loads[id] = &mov->src[0];
237
238 return load;
239 }
240
241 void
create_indirect_store(nir_variable_mode mode,uint32_t binding,nir_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)242 nir_load_store_vectorize_test::create_indirect_store(
243 nir_variable_mode mode, uint32_t binding, nir_def *offset, uint32_t id,
244 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
245 {
246 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
247 for (unsigned i = 0; i < components; i++)
248 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
249 nir_def *value = nir_build_imm(b, components, bit_size, values);
250
251 nir_intrinsic_op intrinsic;
252 nir_def *res = NULL;
253 switch (mode) {
254 case nir_var_mem_ssbo:
255 intrinsic = nir_intrinsic_store_ssbo;
256 res = get_resource(binding, true);
257 break;
258 case nir_var_mem_shared:
259 intrinsic = nir_intrinsic_store_shared;
260 break;
261 default:
262 return;
263 }
264 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic);
265 nir_def_init(&store->instr, &store->def, components, bit_size);
266 store->num_components = components;
267 if (res) {
268 store->src[0] = nir_src_for_ssa(value);
269 store->src[1] = nir_src_for_ssa(res);
270 store->src[2] = nir_src_for_ssa(offset);
271 } else {
272 store->src[0] = nir_src_for_ssa(value);
273 store->src[1] = nir_src_for_ssa(offset);
274 }
275 nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0);
276 nir_intrinsic_set_access(store, (gl_access_qualifier)access);
277 nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
278 nir_builder_instr_insert(b, &store->instr);
279 }
280
281 nir_intrinsic_instr *
create_load(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)282 nir_load_store_vectorize_test::create_load(
283 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
284 unsigned bit_size, unsigned components, unsigned access)
285 {
286 return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
287 }
288
289 void
create_store(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)290 nir_load_store_vectorize_test::create_store(
291 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
292 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
293 {
294 create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access);
295 }
296
create_shared_load(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components)297 void nir_load_store_vectorize_test::create_shared_load(
298 nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components)
299 {
300 nir_def *load = nir_load_deref(b, deref);
301 nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, load)->parent_instr);
302 movs[id] = mov;
303 loads[id] = &mov->src[0];
304 }
305
create_shared_store(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask)306 void nir_load_store_vectorize_test::create_shared_store(
307 nir_deref_instr *deref, uint32_t id,
308 unsigned bit_size, unsigned components, unsigned wrmask)
309 {
310 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
311 for (unsigned i = 0; i < components; i++)
312 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
313 nir_def *value = nir_build_imm(b, components, bit_size, values);
314
315 nir_store_deref(b, deref, value, wrmask & ((1 << components) - 1));
316 }
317
test_alu(nir_instr * instr,nir_op op)318 bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op)
319 {
320 return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op;
321 }
322
test_alu_def(nir_instr * instr,unsigned index,nir_def * def,unsigned swizzle)323 bool nir_load_store_vectorize_test::test_alu_def(
324 nir_instr *instr, unsigned index, nir_def *def, unsigned swizzle)
325 {
326 if (instr->type != nir_instr_type_alu)
327 return false;
328
329 nir_alu_instr *alu = nir_instr_as_alu(instr);
330
331 if (index >= nir_op_infos[alu->op].num_inputs)
332 return false;
333 if (alu->src[index].src.ssa != def)
334 return false;
335 if (alu->src[index].swizzle[0] != swizzle)
336 return false;
337
338 return true;
339 }
340
mem_vectorize_callback(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,int64_t hole_size,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)341 bool nir_load_store_vectorize_test::mem_vectorize_callback(
342 unsigned align_mul, unsigned align_offset, unsigned bit_size,
343 unsigned num_components, int64_t hole_size,
344 nir_intrinsic_instr *low, nir_intrinsic_instr *high,
345 void *data)
346 {
347 nir_load_store_vectorize_test *test = (nir_load_store_vectorize_test *)data;
348
349 if (hole_size > test->max_hole_size ||
350 (!test->overfetch && !nir_num_components_valid(num_components)))
351 return false;
352
353 /* Calculate a simple alignment, like how nir_intrinsic_align() does. */
354 uint32_t align = align_mul;
355 if (align_offset)
356 align = 1 << (ffs(align_offset) - 1);
357
358 /* Require scalar alignment and less than 5 components. */
359 return align % (bit_size / 8) == 0 &&
360 (test->overfetch || num_components <= test->max_components);
361 }
362
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)363 void nir_load_store_vectorize_test::shared_type_info(
364 const struct glsl_type *type, unsigned *size, unsigned *align)
365 {
366 assert(glsl_type_is_vector_or_scalar(type));
367
368 uint32_t comp_size = glsl_type_is_boolean(type)
369 ? 4 : glsl_get_bit_size(type) / 8;
370 unsigned length = glsl_get_vector_elements(type);
371 *size = comp_size * length,
372 *align = comp_size;
373 }
374 } // namespace
375
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent)376 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
377 {
378 create_load(nir_var_mem_ubo, 0, 0, 0x1);
379 create_load(nir_var_mem_ubo, 0, 4, 0x2);
380
381 nir_validate_shader(b->shader, NULL);
382 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
383
384 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
385
386 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
387
388 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
389 ASSERT_EQ(load->def.bit_size, 32);
390 ASSERT_EQ(load->def.num_components, 2);
391 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
392 ASSERT_EQ(nir_intrinsic_range(load), 8);
393 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
394 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
395 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
396 }
397
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting)398 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
399 {
400 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
401 create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2);
402
403 nir_validate_shader(b->shader, NULL);
404 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
405
406 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
407
408 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
409
410 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
411 ASSERT_EQ(load->def.bit_size, 32);
412 ASSERT_EQ(load->def.num_components, 3);
413 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
414 ASSERT_EQ(nir_intrinsic_range(load), 12);
415 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
416 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
417 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yz");
418 }
419
420 /* Test for a bug in range handling */
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting_range)421 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting_range)
422 {
423 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
424 create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 1);
425
426 nir_validate_shader(b->shader, NULL);
427 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
428
429 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
430
431 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
432
433 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
434 ASSERT_EQ(load->def.bit_size, 32);
435 ASSERT_EQ(load->def.num_components, 4);
436 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
437 ASSERT_EQ(nir_intrinsic_range(load), 16);
438 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
439 ASSERT_EQ(loads[0x1]->src.ssa, &load->def);
440 ASSERT_EQ(loads[0x2]->src.ssa, &load->def);
441 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
442 ASSERT_EQ(loads[0x1]->swizzle[1], 1);
443 ASSERT_EQ(loads[0x1]->swizzle[2], 2);
444 ASSERT_EQ(loads[0x1]->swizzle[3], 3);
445 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
446 }
447
TEST_F(nir_load_store_vectorize_test,ubo_load_identical)448 TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
449 {
450 create_load(nir_var_mem_ubo, 0, 0, 0x1);
451 create_load(nir_var_mem_ubo, 0, 0, 0x2);
452
453 nir_validate_shader(b->shader, NULL);
454 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
455
456 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
457
458 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
459
460 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
461 ASSERT_EQ(load->def.bit_size, 32);
462 ASSERT_EQ(load->def.num_components, 1);
463 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
464 ASSERT_EQ(nir_intrinsic_range(load), 4);
465 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
466 ASSERT_EQ(loads[0x1]->src.ssa, &load->def);
467 ASSERT_EQ(loads[0x2]->src.ssa, &load->def);
468 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
469 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
470 }
471
TEST_F(nir_load_store_vectorize_test,ubo_load_large)472 TEST_F(nir_load_store_vectorize_test, ubo_load_large)
473 {
474 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
475 create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3);
476
477 nir_validate_shader(b->shader, NULL);
478 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
479
480 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
481
482 nir_validate_shader(b->shader, NULL);
483 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
484 }
485
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent)486 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent)
487 {
488 create_load(nir_var_mem_push_const, 0, 0, 0x1);
489 create_load(nir_var_mem_push_const, 0, 4, 0x2);
490
491 nir_validate_shader(b->shader, NULL);
492 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
493
494 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
495
496 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
497
498 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
499 ASSERT_EQ(load->def.bit_size, 32);
500 ASSERT_EQ(load->def.num_components, 2);
501 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
502 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
503 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
504 }
505
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_base)506 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base)
507 {
508 create_load(nir_var_mem_push_const, 0, 0, 0x1);
509 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4);
510
511 nir_validate_shader(b->shader, NULL);
512 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
513
514 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
515
516 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
517
518 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
519 ASSERT_EQ(load->def.bit_size, 32);
520 ASSERT_EQ(load->def.num_components, 2);
521 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
522 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
523 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
524 }
525
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent)526 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent)
527 {
528 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
529 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
530
531 nir_validate_shader(b->shader, NULL);
532 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
533
534 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
535
536 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
537
538 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
539 ASSERT_EQ(load->def.bit_size, 32);
540 ASSERT_EQ(load->def.num_components, 2);
541 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
542 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
543 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
544 }
545
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect)546 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect)
547 {
548 nir_def *index_base = nir_load_local_invocation_index(b);
549 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1);
550 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2);
551
552 nir_validate_shader(b->shader, NULL);
553 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
554
555 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
556
557 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
558
559 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
560 ASSERT_EQ(load->def.bit_size, 32);
561 ASSERT_EQ(load->def.num_components, 2);
562 ASSERT_EQ(load->src[1].ssa, index_base);
563 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
564 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
565 }
566
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_sub)567 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub)
568 {
569 nir_def *index_base = nir_load_local_invocation_index(b);
570 nir_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc);
571 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
572 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
573
574 nir_validate_shader(b->shader, NULL);
575 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
576
577 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
578
579 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
580
581 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
582 ASSERT_EQ(load->def.bit_size, 32);
583 ASSERT_EQ(load->def.num_components, 2);
584 ASSERT_EQ(load->src[1].ssa, index_base_prev);
585 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
586 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
587 }
588
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_neg_stride)589 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride)
590 {
591 nir_def *inv = nir_load_local_invocation_index(b);
592 nir_def *inv_plus_one = nir_iadd_imm(b, inv, 1);
593 nir_def *index_base = nir_imul_imm(b, inv, 0xfffffffc);
594 nir_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc);
595 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
596 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
597
598 nir_validate_shader(b->shader, NULL);
599 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
600
601 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
602
603 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
604
605 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
606 ASSERT_EQ(load->def.bit_size, 32);
607 ASSERT_EQ(load->def.num_components, 2);
608 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
609 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
610
611 /* nir_opt_algebraic optimizes the imul */
612 ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg));
613 nir_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa;
614 ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl));
615 nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr);
616 ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one);
617 ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2);
618 }
619
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_adjacent)620 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent)
621 {
622 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
623 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
624 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
625
626 nir_validate_shader(b->shader, NULL);
627 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
628
629 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
630
631 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
632
633 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
634 ASSERT_EQ(load->def.bit_size, 32);
635 ASSERT_EQ(load->def.num_components, 1);
636 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
637 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
638 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
639 }
640
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_intersecting)641 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting)
642 {
643 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
644 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
645 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2);
646
647 nir_validate_shader(b->shader, NULL);
648 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
649
650 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
651
652 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
653 }
654
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_identical)655 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical)
656 {
657 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
658 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
659 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
660
661 nir_validate_shader(b->shader, NULL);
662 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
663
664 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
665
666 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
667 }
668
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_load_identical)669 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_load_identical)
670 {
671 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
672 create_load(nir_var_mem_ssbo, 0, 0, 0x2);
673 create_store(nir_var_mem_ssbo, 0, 0, 0x3);
674
675 nir_validate_shader(b->shader, NULL);
676 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
677
678 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
679
680 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
681 }
682
683 /* if nir_opt_load_store_vectorize were implemented like many load/store
684 * optimization passes are (for example, nir_opt_combine_stores and
685 * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
686 * encountered, this case wouldn't be optimized.
687 * A similar test for derefs is shared_load_adjacent_store_identical. */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_store_identical)688 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical)
689 {
690 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
691 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
692 create_load(nir_var_mem_ssbo, 0, 4, 0x3);
693
694 nir_validate_shader(b->shader, NULL);
695 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
696 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
697
698 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
699
700 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
701 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
702
703 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
704 ASSERT_EQ(load->def.bit_size, 32);
705 ASSERT_EQ(load->def.num_components, 2);
706 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
707 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
708 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
709 }
710
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent)711 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent)
712 {
713 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
714 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
715
716 nir_validate_shader(b->shader, NULL);
717 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
718
719 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
720
721 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
722
723 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
724 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
725 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
726 nir_def *val = store->src[0].ssa;
727 ASSERT_EQ(val->bit_size, 32);
728 ASSERT_EQ(val->num_components, 2);
729 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
730 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
731 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
732 }
733
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting)734 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting)
735 {
736 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
737 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2);
738
739 nir_validate_shader(b->shader, NULL);
740 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
741
742 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
743
744 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
745
746 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
747 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
748 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
749 nir_def *val = store->src[0].ssa;
750 ASSERT_EQ(val->bit_size, 32);
751 ASSERT_EQ(val->num_components, 3);
752 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
753 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
754 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
755 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21);
756 }
757
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical)758 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical)
759 {
760 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
761 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
762
763 nir_validate_shader(b->shader, NULL);
764 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
765
766 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
767
768 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
769
770 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
771 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
772 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1);
773 nir_def *val = store->src[0].ssa;
774 ASSERT_EQ(val->bit_size, 32);
775 ASSERT_EQ(val->num_components, 1);
776 ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20);
777 }
778
TEST_F(nir_load_store_vectorize_test,ssbo_store_large)779 TEST_F(nir_load_store_vectorize_test, ssbo_store_large)
780 {
781 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
782 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3);
783
784 nir_validate_shader(b->shader, NULL);
785 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
786
787 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
788
789 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
790 }
791
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent_memory_barrier)792 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier)
793 {
794 create_load(nir_var_mem_ubo, 0, 0, 0x1);
795
796 nir_scoped_memory_barrier(b, SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
797 nir_var_mem_ssbo);
798
799 create_load(nir_var_mem_ubo, 0, 4, 0x2);
800
801 nir_validate_shader(b->shader, NULL);
802 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
803
804 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
805
806 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
807 }
808
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier)809 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier)
810 {
811 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
812
813 nir_scoped_memory_barrier(b, SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
814 nir_var_mem_ssbo);
815
816 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
817
818 nir_validate_shader(b->shader, NULL);
819 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
820
821 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
822
823 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
824 }
825
826 /* A control barrier may only sync invocations in a workgroup, it doesn't
827 * require that loads/stores complete.
828 */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_barrier)829 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier)
830 {
831 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
832 nir_barrier(b, SCOPE_WORKGROUP, SCOPE_NONE,
833 (nir_memory_semantics)0, (nir_variable_mode)0);
834 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
835
836 nir_validate_shader(b->shader, NULL);
837 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
838
839 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
840
841 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
842 }
843
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier_shared)844 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared)
845 {
846 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
847
848 nir_scoped_memory_barrier(b, SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL,
849 nir_var_mem_shared);
850
851 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
852
853 nir_validate_shader(b->shader, NULL);
854 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
855
856 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
857
858 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
859 }
860
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_discard)861 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_discard)
862 {
863 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
864 nir_discard(b);
865 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
866
867 nir_validate_shader(b->shader, NULL);
868 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
869
870 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
871
872 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
873 }
874
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_demote)875 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_demote)
876 {
877 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
878 nir_demote(b);
879 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
880
881 nir_validate_shader(b->shader, NULL);
882 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
883
884 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
885
886 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
887 }
888
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_discard)889 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_discard)
890 {
891 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
892 nir_discard(b);
893 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
894
895 nir_validate_shader(b->shader, NULL);
896 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
897
898 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
899
900 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
901 }
902
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_demote)903 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_demote)
904 {
905 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
906 nir_demote(b);
907 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
908
909 nir_validate_shader(b->shader, NULL);
910 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
911
912 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
913
914 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
915 }
916
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_8_8_16)917 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16)
918 {
919 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8);
920 create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8);
921 create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16);
922
923 nir_validate_shader(b->shader, NULL);
924 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
925
926 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
927
928 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
929
930 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
931 ASSERT_EQ(load->def.bit_size, 8);
932 ASSERT_EQ(load->def.num_components, 4);
933 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
934 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
935 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
936
937 nir_def *val = loads[0x3]->src.ssa;
938 ASSERT_EQ(val->bit_size, 16);
939 ASSERT_EQ(val->num_components, 1);
940 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior));
941 nir_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa;
942 nir_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa;
943 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl));
944 high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa;
945 ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16));
946 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16));
947 ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->def, 2));
948 ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->def, 3));
949 }
950
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64)951 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64)
952 {
953 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
954 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
955
956 nir_validate_shader(b->shader, NULL);
957 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
958
959 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
960
961 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
962
963 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
964 ASSERT_EQ(load->def.bit_size, 32);
965 ASSERT_EQ(load->def.num_components, 4);
966 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
967 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
968
969 nir_def *val = loads[0x2]->src.ssa;
970 ASSERT_EQ(val->bit_size, 64);
971 ASSERT_EQ(val->num_components, 1);
972 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
973 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
974 EXPECT_INSTR_SWIZZLES(pack, load, "zw");
975 }
976
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64_64)977 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
978 {
979 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
980 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
981 create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64);
982
983 nir_validate_shader(b->shader, NULL);
984 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
985
986 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true));
987
988 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
989
990 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
991 ASSERT_EQ(load->def.bit_size, 64);
992 ASSERT_EQ(load->def.num_components, 3);
993 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
994 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "z");
995
996 nir_def *val = loads[0x2]->src.ssa;
997 ASSERT_EQ(val->bit_size, 64);
998 ASSERT_EQ(val->num_components, 1);
999 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_mov));
1000 nir_alu_instr *mov = nir_instr_as_alu(val->parent_instr);
1001 EXPECT_INSTR_SWIZZLES(mov, load, "y");
1002
1003 val = loads[0x1]->src.ssa;
1004 ASSERT_EQ(val->bit_size, 32);
1005 ASSERT_EQ(val->num_components, 2);
1006 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32));
1007 nir_alu_instr *unpack = nir_instr_as_alu(val->parent_instr);
1008 EXPECT_INSTR_SWIZZLES(unpack, load, "x");
1009 }
1010
TEST_F(nir_load_store_vectorize_test,ssbo_load_intersecting_32_32_64)1011 TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64)
1012 {
1013 create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2);
1014 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1015
1016 nir_validate_shader(b->shader, NULL);
1017 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1018
1019 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1020
1021 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1022
1023 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1024 ASSERT_EQ(load->def.bit_size, 32);
1025 ASSERT_EQ(load->def.num_components, 3);
1026 ASSERT_EQ(nir_src_as_uint(load->src[1]), 4);
1027 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
1028
1029 nir_def *val = loads[0x2]->src.ssa;
1030 ASSERT_EQ(val->bit_size, 64);
1031 ASSERT_EQ(val->num_components, 1);
1032 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
1033 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
1034 EXPECT_INSTR_SWIZZLES(pack, load, "yz");
1035 }
1036
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_8_8_16)1037 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16)
1038 {
1039 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8);
1040 create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8);
1041 create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16);
1042
1043 nir_validate_shader(b->shader, NULL);
1044 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1045
1046 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1047
1048 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1049
1050 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1051 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1052 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1053 nir_def *val = store->src[0].ssa;
1054 ASSERT_EQ(val->bit_size, 8);
1055 ASSERT_EQ(val->num_components, 4);
1056 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1057 ASSERT_EQ(nir_const_value_as_uint(cv[0], 8), 0x10);
1058 ASSERT_EQ(nir_const_value_as_uint(cv[1], 8), 0x20);
1059 ASSERT_EQ(nir_const_value_as_uint(cv[2], 8), 0x30);
1060 ASSERT_EQ(nir_const_value_as_uint(cv[3], 8), 0x0);
1061 }
1062
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64)1063 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64)
1064 {
1065 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1066 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1067
1068 nir_validate_shader(b->shader, NULL);
1069 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1070
1071 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1072
1073 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1074
1075 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1076 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1077 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1078 nir_def *val = store->src[0].ssa;
1079 ASSERT_EQ(val->bit_size, 32);
1080 ASSERT_EQ(val->num_components, 4);
1081 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1082 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1083 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11);
1084 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20);
1085 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
1086 }
1087
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64_64)1088 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64)
1089 {
1090 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1091 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1092 create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64);
1093
1094 nir_validate_shader(b->shader, NULL);
1095 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1096
1097 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1098
1099 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1100
1101 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1102 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1103 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1104 nir_def *val = store->src[0].ssa;
1105 ASSERT_EQ(val->bit_size, 64);
1106 ASSERT_EQ(val->num_components, 3);
1107 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1108 ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull);
1109 ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20);
1110 ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30);
1111 }
1112
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting_32_32_64)1113 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64)
1114 {
1115 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1116 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64);
1117
1118 nir_validate_shader(b->shader, NULL);
1119 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1120
1121 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1122
1123 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1124
1125 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1126 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1127 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1128 nir_def *val = store->src[0].ssa;
1129 ASSERT_EQ(val->bit_size, 32);
1130 ASSERT_EQ(val->num_components, 3);
1131 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1132 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1133 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1134 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0);
1135 }
1136
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_64)1137 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64)
1138 {
1139 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32);
1140 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2);
1141
1142 nir_validate_shader(b->shader, NULL);
1143 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1144
1145 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1146
1147 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1148 }
1149
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_wrmask)1150 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask)
1151 {
1152 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4);
1153 create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1154
1155 nir_validate_shader(b->shader, NULL);
1156 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1157
1158 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1159
1160 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1161
1162 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1163 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1164 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1165 nir_def *val = store->src[0].ssa;
1166 ASSERT_EQ(val->bit_size, 32);
1167 ASSERT_EQ(val->num_components, 4);
1168 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1169 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1170 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21);
1171 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22);
1172 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23);
1173 }
1174
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent)1175 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent)
1176 {
1177 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1178 nir_deref_instr *deref = nir_build_deref_var(b, var);
1179
1180 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1181 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2);
1182
1183 nir_validate_shader(b->shader, NULL);
1184 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1185
1186 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1187
1188 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1189
1190 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1191 ASSERT_EQ(load->def.bit_size, 32);
1192 ASSERT_EQ(load->def.num_components, 2);
1193
1194 deref = nir_src_as_deref(load->src[0]);
1195 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1196
1197 deref = nir_deref_instr_parent(deref);
1198 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1199 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1200
1201 deref = nir_deref_instr_parent(deref);
1202 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1203 ASSERT_EQ(deref->var, var);
1204
1205 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1206 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1207 }
1208
TEST_F(nir_load_store_vectorize_test,shared_load_distant_64bit)1209 TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit)
1210 {
1211 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1212 nir_deref_instr *deref = nir_build_deref_var(b, var);
1213 nir_def_init(&deref->instr, &deref->def, 1, 64);
1214
1215 create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1);
1216 create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2);
1217
1218 nir_validate_shader(b->shader, NULL);
1219 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1220
1221 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1222
1223 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1224 }
1225
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect)1226 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect)
1227 {
1228 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1229 nir_deref_instr *deref = nir_build_deref_var(b, var);
1230 nir_def *index_base = nir_load_local_invocation_index(b);
1231
1232 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1);
1233 create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2);
1234
1235 nir_validate_shader(b->shader, NULL);
1236 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1237
1238 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1239
1240 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1241
1242 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1243 ASSERT_EQ(load->def.bit_size, 32);
1244 ASSERT_EQ(load->def.num_components, 2);
1245
1246 deref = nir_src_as_deref(load->src[0]);
1247 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1248
1249 deref = nir_deref_instr_parent(deref);
1250 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1251 ASSERT_EQ(deref->arr.index.ssa, index_base);
1252
1253 deref = nir_deref_instr_parent(deref);
1254 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1255 ASSERT_EQ(deref->var, var);
1256
1257 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1258 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1259 }
1260
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect_sub)1261 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub)
1262 {
1263 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1264 nir_deref_instr *deref = nir_build_deref_var(b, var);
1265 nir_def *index_base = nir_load_local_invocation_index(b);
1266 nir_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff);
1267
1268 create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1);
1269 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2);
1270
1271 nir_validate_shader(b->shader, NULL);
1272 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1273
1274 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1275
1276 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1277
1278 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1279 ASSERT_EQ(load->def.bit_size, 32);
1280 ASSERT_EQ(load->def.num_components, 2);
1281
1282 deref = nir_src_as_deref(load->src[0]);
1283 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1284
1285 deref = nir_deref_instr_parent(deref);
1286 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1287 ASSERT_EQ(deref->arr.index.ssa, index_base_prev);
1288
1289 deref = nir_deref_instr_parent(deref);
1290 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1291 ASSERT_EQ(deref->var, var);
1292
1293 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1294 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1295 }
1296
TEST_F(nir_load_store_vectorize_test,shared_load_struct)1297 TEST_F(nir_load_store_vectorize_test, shared_load_struct)
1298 {
1299 glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1300 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1301
1302 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1303 nir_deref_instr *deref = nir_build_deref_var(b, var);
1304
1305 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1);
1306 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1307
1308 nir_validate_shader(b->shader, NULL);
1309 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1310
1311 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1312
1313 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1314
1315 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1316 ASSERT_EQ(load->def.bit_size, 32);
1317 ASSERT_EQ(load->def.num_components, 2);
1318
1319 deref = nir_src_as_deref(load->src[0]);
1320 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1321
1322 deref = nir_deref_instr_parent(deref);
1323 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1324 ASSERT_EQ(deref->strct.index, 0);
1325
1326 deref = nir_deref_instr_parent(deref);
1327 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1328 ASSERT_EQ(deref->var, var);
1329
1330 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1331 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1332 }
1333
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_adjacent)1334 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent)
1335 {
1336 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1337 nir_deref_instr *deref = nir_build_deref_var(b, var);
1338
1339 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1340 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1341 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1342
1343 nir_validate_shader(b->shader, NULL);
1344 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1345 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1346
1347 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1348
1349 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1350 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1351
1352 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1353 ASSERT_EQ(load->def.bit_size, 32);
1354 ASSERT_EQ(load->def.num_components, 1);
1355
1356 deref = nir_src_as_deref(load->src[0]);
1357 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1358 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1359
1360 deref = nir_deref_instr_parent(deref);
1361 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1362 ASSERT_EQ(deref->var, var);
1363
1364 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1365 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1366 }
1367
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_identical)1368 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical)
1369 {
1370 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1371 nir_deref_instr *deref = nir_build_deref_var(b, var);
1372
1373 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1374 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1375 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1376
1377 nir_validate_shader(b->shader, NULL);
1378 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1379
1380 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1381
1382 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1383 }
1384
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_store_identical)1385 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical)
1386 {
1387 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1388 nir_deref_instr *deref = nir_build_deref_var(b, var);
1389
1390 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1391 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1392 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3);
1393
1394 nir_validate_shader(b->shader, NULL);
1395 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1396 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1397
1398 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1399
1400 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1401 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1402
1403 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1404 ASSERT_EQ(load->def.bit_size, 32);
1405 ASSERT_EQ(load->def.num_components, 2);
1406
1407 deref = nir_src_as_deref(load->src[0]);
1408 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1409
1410 deref = nir_deref_instr_parent(deref);
1411 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1412 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1413
1414 deref = nir_deref_instr_parent(deref);
1415 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1416 ASSERT_EQ(deref->var, var);
1417
1418 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1419 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1420 }
1421
TEST_F(nir_load_store_vectorize_test,shared_load_bool)1422 TEST_F(nir_load_store_vectorize_test, shared_load_bool)
1423 {
1424 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1425 nir_deref_instr *deref = nir_build_deref_var(b, var);
1426
1427 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1);
1428 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1);
1429
1430 nir_validate_shader(b->shader, NULL);
1431 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1432
1433 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1434
1435 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1436
1437 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1438 ASSERT_EQ(load->def.bit_size, 32);
1439 ASSERT_EQ(load->def.num_components, 2);
1440
1441 deref = nir_src_as_deref(load->src[0]);
1442 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1443
1444 deref = nir_deref_instr_parent(deref);
1445 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1446 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1447
1448 deref = nir_deref_instr_parent(deref);
1449 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1450 ASSERT_EQ(deref->var, var);
1451
1452 /* The loaded value is converted to Boolean by (loaded != 0). */
1453 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_ine));
1454 ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_ine));
1455 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->def, 0));
1456 ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->def, 1));
1457 }
1458
TEST_F(nir_load_store_vectorize_test,shared_load_bool_mixed)1459 TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed)
1460 {
1461 glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1462 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1463
1464 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1465 nir_deref_instr *deref = nir_build_deref_var(b, var);
1466
1467 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1);
1468 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1469
1470 nir_validate_shader(b->shader, NULL);
1471 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1472
1473 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1474
1475 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1476
1477 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1478 ASSERT_EQ(load->def.bit_size, 32);
1479 ASSERT_EQ(load->def.num_components, 2);
1480
1481 deref = nir_src_as_deref(load->src[0]);
1482 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1483
1484 deref = nir_deref_instr_parent(deref);
1485 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1486 ASSERT_EQ(deref->strct.index, 0);
1487
1488 deref = nir_deref_instr_parent(deref);
1489 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1490 ASSERT_EQ(deref->var, var);
1491
1492 /* The loaded value is converted to Boolean by (loaded != 0). */
1493 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_ine));
1494 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->def, 0));
1495
1496 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1497 }
1498
TEST_F(nir_load_store_vectorize_test,shared_store_adjacent)1499 TEST_F(nir_load_store_vectorize_test, shared_store_adjacent)
1500 {
1501 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1502 nir_deref_instr *deref = nir_build_deref_var(b, var);
1503
1504 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1);
1505 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1506
1507 nir_validate_shader(b->shader, NULL);
1508 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
1509
1510 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1511
1512 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1513
1514 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0);
1515 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
1516 nir_def *val = store->src[1].ssa;
1517 ASSERT_EQ(val->bit_size, 32);
1518 ASSERT_EQ(val->num_components, 2);
1519 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1520 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1521 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1522
1523 deref = nir_src_as_deref(store->src[0]);
1524 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1525
1526 deref = nir_deref_instr_parent(deref);
1527 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1528 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1529
1530 deref = nir_deref_instr_parent(deref);
1531 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1532 ASSERT_EQ(deref->var, var);
1533 }
1534
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_base)1535 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base)
1536 {
1537 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1538 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4);
1539
1540 nir_validate_shader(b->shader, NULL);
1541 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1542
1543 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1544
1545 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1546 }
1547
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_direct)1548 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct)
1549 {
1550 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1551 create_load(nir_var_mem_push_const, 0, 8, 0x2);
1552
1553 nir_validate_shader(b->shader, NULL);
1554 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1555
1556 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1557
1558 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1559 }
1560
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_indirect)1561 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect)
1562 {
1563 nir_def *index_base = nir_load_local_invocation_index(b);
1564 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1565 create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2);
1566
1567 nir_validate_shader(b->shader, NULL);
1568 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1569
1570 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1571
1572 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1573 }
1574
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_indirect_indirect)1575 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect)
1576 {
1577 nir_def *index_base = nir_load_local_invocation_index(b);
1578 create_indirect_load(nir_var_mem_push_const, 0,
1579 nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 2), 16), 32), 0x1);
1580 create_indirect_load(nir_var_mem_push_const, 0,
1581 nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 3), 16), 32), 0x2);
1582
1583 nir_validate_shader(b->shader, NULL);
1584 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1585
1586 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1587
1588 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1589 }
1590
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_complex_indirect)1591 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect)
1592 {
1593 nir_def *index_base = nir_load_local_invocation_index(b);
1594 //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1595 nir_def *low = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 12);
1596 nir_def *high = nir_imul_imm(b, nir_iadd_imm(b, index_base, 1), 16);
1597 create_indirect_load(nir_var_mem_push_const, 0, low, 0x1);
1598 create_indirect_load(nir_var_mem_push_const, 0, high, 0x2);
1599
1600 nir_validate_shader(b->shader, NULL);
1601 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1602
1603 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1604
1605 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
1606
1607 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
1608 ASSERT_EQ(load->def.bit_size, 32);
1609 ASSERT_EQ(load->def.num_components, 2);
1610 ASSERT_EQ(load->src[0].ssa, low);
1611 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1612 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1613 }
1614
TEST_F(nir_load_store_vectorize_test,ssbo_alias0)1615 TEST_F(nir_load_store_vectorize_test, ssbo_alias0)
1616 {
1617 nir_def *index_base = nir_load_local_invocation_index(b);
1618 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1619 create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2);
1620 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1621
1622 nir_validate_shader(b->shader, NULL);
1623 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1624
1625 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1626
1627 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1628 }
1629
TEST_F(nir_load_store_vectorize_test,ssbo_alias1)1630 TEST_F(nir_load_store_vectorize_test, ssbo_alias1)
1631 {
1632 nir_def *load_base = nir_load_global_invocation_index(b, 32);
1633 nir_def *store_base = nir_load_local_invocation_index(b);
1634 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1);
1635 create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2);
1636 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3);
1637
1638 nir_validate_shader(b->shader, NULL);
1639 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1640
1641 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1642
1643 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1644 }
1645
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias2)1646 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2)
1647 {
1648 /* TODO: try to combine these loads */
1649 nir_def *index_base = nir_load_local_invocation_index(b);
1650 nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 4);
1651 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1652 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1653 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1654
1655 nir_validate_shader(b->shader, NULL);
1656 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1657
1658 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1659
1660 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1661
1662 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1663 ASSERT_EQ(load->def.bit_size, 32);
1664 ASSERT_EQ(load->def.num_components, 1);
1665 ASSERT_EQ(load->src[1].ssa, offset);
1666 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1667 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1668 }
1669
TEST_F(nir_load_store_vectorize_test,ssbo_alias3)1670 TEST_F(nir_load_store_vectorize_test, ssbo_alias3)
1671 {
1672 /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1673 * these loads can't be combined because if index_base == 268435455, then
1674 * offset == 0 because the addition would wrap around */
1675 nir_def *index_base = nir_load_local_invocation_index(b);
1676 nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1677 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1678 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1679 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1680
1681 nir_validate_shader(b->shader, NULL);
1682 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1683
1684 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1685
1686 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1687 }
1688
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias4)1689 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4)
1690 {
1691 /* TODO: try to combine these loads */
1692 nir_def *index_base = nir_load_local_invocation_index(b);
1693 nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1694 nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true;
1695 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1696 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1697 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1698
1699 nir_validate_shader(b->shader, NULL);
1700 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1701
1702 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1703
1704 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1705
1706 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1707 ASSERT_EQ(load->def.bit_size, 32);
1708 ASSERT_EQ(load->def.num_components, 1);
1709 ASSERT_EQ(load->src[1].ssa, offset);
1710 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1711 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1712 }
1713
TEST_F(nir_load_store_vectorize_test,ssbo_alias5)1714 TEST_F(nir_load_store_vectorize_test, ssbo_alias5)
1715 {
1716 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1717 create_store(nir_var_mem_ssbo, 1, 0, 0x2);
1718 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1719
1720 nir_validate_shader(b->shader, NULL);
1721 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1722
1723 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1724
1725 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1726 }
1727
TEST_F(nir_load_store_vectorize_test,ssbo_alias6)1728 TEST_F(nir_load_store_vectorize_test, ssbo_alias6)
1729 {
1730 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT);
1731 create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT);
1732 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT);
1733
1734 nir_validate_shader(b->shader, NULL);
1735 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1736
1737 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1738
1739 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1740
1741 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1742 ASSERT_EQ(load->def.bit_size, 32);
1743 ASSERT_EQ(load->def.num_components, 1);
1744 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1745 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1746 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1747 }
1748
TEST_F(nir_load_store_vectorize_test,DISABLED_shared_alias0)1749 TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0)
1750 {
1751 /* TODO: implement type-based alias analysis so that these loads can be
1752 * combined. this is made a bit more difficult than simply using
1753 * nir_compare_derefs() because the vectorizer creates loads/stores with
1754 * casted derefs. The solution would probably be to keep multiple derefs for
1755 * an entry (one for each load/store combined into it). */
1756 glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1757 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1758
1759 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1760 nir_deref_instr *deref = nir_build_deref_var(b, var);
1761
1762 nir_def *index0 = nir_load_local_invocation_index(b);
1763 nir_def *index1 = nir_load_global_invocation_index(b, 32);
1764 nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0);
1765
1766 create_shared_load(load_deref, 0x1);
1767 create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2);
1768 create_shared_load(load_deref, 0x3);
1769
1770 nir_validate_shader(b->shader, NULL);
1771 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1772
1773 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1774
1775 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1776
1777 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1778 ASSERT_EQ(load->def.bit_size, 32);
1779 ASSERT_EQ(load->def.num_components, 1);
1780 ASSERT_EQ(load->src[0].ssa, &load_deref->def);
1781 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1782 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1783 }
1784
TEST_F(nir_load_store_vectorize_test,shared_alias1)1785 TEST_F(nir_load_store_vectorize_test, shared_alias1)
1786 {
1787 nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0");
1788 nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1");
1789 nir_deref_instr *load_deref = nir_build_deref_var(b, var0);
1790
1791 create_shared_load(load_deref, 0x1);
1792 create_shared_store(nir_build_deref_var(b, var1), 0x2);
1793 create_shared_load(load_deref, 0x3);
1794
1795 nir_validate_shader(b->shader, NULL);
1796 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1797
1798 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1799
1800 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1801
1802 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1803 ASSERT_EQ(load->def.bit_size, 32);
1804 ASSERT_EQ(load->def.num_components, 1);
1805 ASSERT_EQ(load->src[0].ssa, &load_deref->def);
1806 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1807 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1808 }
1809
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_64bit)1810 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit)
1811 {
1812 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x100000000), 0x1);
1813 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x200000004), 0x2);
1814
1815 nir_validate_shader(b->shader, NULL);
1816 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1817
1818 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1819
1820 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1821 }
1822
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_indirect_64bit)1823 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
1824 {
1825 nir_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b));
1826 nir_def *first = nir_imul_imm(b, index_base, 0x100000000);
1827 nir_def *second = nir_imul_imm(b, index_base, 0x200000000);
1828 create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1);
1829 create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2);
1830
1831 nir_validate_shader(b->shader, NULL);
1832 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1833
1834 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1835
1836 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1837 }
1838
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust)1839 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust)
1840 {
1841 create_load(nir_var_mem_ssbo, 0, 0xfffffffc, 0x1);
1842 create_load(nir_var_mem_ssbo, 0, 0x0, 0x2);
1843
1844 nir_validate_shader(b->shader, NULL);
1845 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1846
1847 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1848
1849 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1850 }
1851
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride1)1852 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride1)
1853 {
1854 nir_def *offset = nir_load_local_invocation_index(b);
1855 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1856 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1857
1858 nir_validate_shader(b->shader, NULL);
1859 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1860
1861 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1862
1863 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1864 }
1865
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride8)1866 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride8)
1867 {
1868 nir_def *offset = nir_load_local_invocation_index(b);
1869 offset = nir_imul_imm(b, offset, 8);
1870 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1871 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1872
1873 nir_validate_shader(b->shader, NULL);
1874 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1875
1876 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1877
1878 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1879 }
1880
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride12)1881 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride12)
1882 {
1883 nir_def *offset = nir_load_local_invocation_index(b);
1884 offset = nir_imul_imm(b, offset, 12);
1885 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1886 nir_def *offset_4 = nir_iadd_imm(b, offset, 4);
1887 create_indirect_load(nir_var_mem_ssbo, 0, offset_4, 0x2);
1888 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 8), 0x3);
1889
1890 nir_validate_shader(b->shader, NULL);
1891 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
1892
1893 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1894
1895 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1896
1897 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1898 ASSERT_EQ(load->def.bit_size, 32);
1899 ASSERT_EQ(load->def.num_components, 1);
1900 ASSERT_EQ(load->src[1].ssa, offset);
1901 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1902
1903 load = get_intrinsic(nir_intrinsic_load_ssbo, 1);
1904 ASSERT_EQ(load->def.bit_size, 32);
1905 ASSERT_EQ(load->def.num_components, 2);
1906 ASSERT_EQ(load->src[1].ssa, offset_4);
1907 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
1908 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1909 }
1910
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride16)1911 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride16)
1912 {
1913 nir_def *offset = nir_load_local_invocation_index(b);
1914 offset = nir_imul_imm(b, offset, 16);
1915 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1916 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1917 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 8), 0x3);
1918 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 12), 0x4);
1919
1920 nir_validate_shader(b->shader, NULL);
1921 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 4);
1922
1923 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1924
1925 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1926 }
1927
TEST_F(nir_load_store_vectorize_test,shared_offset_overflow_robust_indirect_stride12)1928 TEST_F(nir_load_store_vectorize_test, shared_offset_overflow_robust_indirect_stride12)
1929 {
1930 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared,
1931 glsl_array_type(glsl_uint_type(), 4, 0), "var");
1932 nir_deref_instr *deref = nir_build_deref_var(b, var);
1933
1934 nir_def *index = nir_load_local_invocation_index(b);
1935 index = nir_imul_imm(b, index, 3);
1936 create_shared_load(nir_build_deref_array(b, deref, index), 0x1);
1937 create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index, 1)), 0x2);
1938 create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index, 2)), 0x3);
1939
1940 nir_validate_shader(b->shader, NULL);
1941 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 3);
1942
1943 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared, false, nir_var_mem_shared));
1944
1945 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 3);
1946 }
1947
TEST_F(nir_load_store_vectorize_test,ubo_overlapping_vec4_vec4_unused_components)1948 TEST_F(nir_load_store_vectorize_test, ubo_overlapping_vec4_vec4_unused_components)
1949 {
1950 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
1951 create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 4);
1952 movs[0x1]->def.num_components = 1;
1953 movs[0x2]->def.num_components = 1;
1954
1955 nir_validate_shader(b->shader, NULL);
1956 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
1957
1958 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1959 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
1960
1961 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
1962 ASSERT_EQ(load->def.bit_size, 32);
1963 ASSERT_EQ(load->def.num_components, 2);
1964 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
1965 ASSERT_EQ(nir_intrinsic_range(load), 8);
1966 ASSERT_EQ(nir_def_components_read(&load->def), 0x3);
1967 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1968 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1969 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1970 }
1971
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4)1972 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4)
1973 {
1974 nir_def *offset = nir_load_local_invocation_index(b);
1975 offset = nir_imul_imm(b, offset, 16);
1976 offset = nir_iadd_imm(b, offset, 4);
1977 nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
1978 0x1);
1979
1980 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1981 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1982 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1983 }
1984
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4_swapped)1985 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4_swapped)
1986 {
1987 nir_def *offset = nir_load_local_invocation_index(b);
1988 offset = nir_iadd_imm(b, offset, 1);
1989 offset = nir_imul_imm(b, offset, 16);
1990 offset = nir_iadd_imm(b, offset, 4);
1991 nir_intrinsic_instr *load =
1992 create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
1993
1994 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1995 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1996 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1997 }
1998
1999 /* Check offset % mul != 0 */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_20)2000 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_20)
2001 {
2002 nir_def *offset = nir_load_local_invocation_index(b);
2003 offset = nir_imul_imm(b, offset, 16);
2004 offset = nir_iadd_imm(b, offset, 20);
2005 nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
2006 0x1);
2007
2008 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2009 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
2010 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
2011 }
2012
2013 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_24_4)2014 TEST_F(nir_load_store_vectorize_test, ubo_alignment_24_4)
2015 {
2016 nir_def *offset = nir_load_local_invocation_index(b);
2017 offset = nir_imul_imm(b, offset, 24);
2018 offset = nir_iadd_imm(b, offset, 4);
2019 nir_intrinsic_instr *load =
2020 create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
2021
2022 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2023 EXPECT_EQ(nir_intrinsic_align_mul(load), 8);
2024 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
2025 }
2026
2027 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_64_16_8)2028 TEST_F(nir_load_store_vectorize_test, ubo_alignment_64_16_8)
2029 {
2030 nir_def *x = nir_imul_imm(b, nir_load_local_invocation_index(b), 64);
2031 nir_def *y = nir_imul_imm(b, nir_load_instance_id(b), 16);
2032 nir_def *offset = nir_iadd(b, x, y);
2033 offset = nir_iadd_imm(b, offset, 8);
2034 nir_intrinsic_instr *load =
2035 create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
2036
2037 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2038 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
2039 EXPECT_EQ(nir_intrinsic_align_offset(load), 8);
2040 }
2041
TEST_F(nir_load_store_vectorize_test,ubo_alignment_const_100)2042 TEST_F(nir_load_store_vectorize_test, ubo_alignment_const_100)
2043 {
2044 nir_intrinsic_instr *load =
2045 create_indirect_load(nir_var_mem_ubo, 0, nir_imm_int(b, 100), 0x1);
2046
2047 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2048 EXPECT_EQ(nir_intrinsic_align_mul(load), NIR_ALIGN_MUL_MAX);
2049 EXPECT_EQ(nir_intrinsic_align_offset(load), 100);
2050 }
2051
TEST_F(nir_load_store_vectorize_test,ubo_overfetch_vec6_as_vec8)2052 TEST_F(nir_load_store_vectorize_test, ubo_overfetch_vec6_as_vec8)
2053 {
2054 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
2055 create_load(nir_var_mem_ubo, 0, 16, 0x2, 32, 2);
2056
2057 nir_validate_shader(b->shader, NULL);
2058 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2059
2060 this->overfetch = true;
2061 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2062 this->overfetch = false;
2063
2064 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
2065
2066 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
2067 ASSERT_EQ(load->def.bit_size, 32);
2068 ASSERT_EQ(load->def.num_components, 8);
2069 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
2070 ASSERT_EQ(nir_intrinsic_range(load), 32);
2071 ASSERT_EQ(nir_def_components_read(&load->def), 0x3f);
2072 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
2073 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xyzw");
2074 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "ef");
2075 }
2076
TEST_F(nir_load_store_vectorize_test,ubo_overfetch_vec7_as_vec8)2077 TEST_F(nir_load_store_vectorize_test, ubo_overfetch_vec7_as_vec8)
2078 {
2079 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
2080 create_load(nir_var_mem_ubo, 0, 16, 0x2, 32, 3);
2081
2082 nir_validate_shader(b->shader, NULL);
2083 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2084
2085 this->overfetch = true;
2086 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2087 this->overfetch = false;
2088
2089 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
2090
2091 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
2092 ASSERT_EQ(load->def.bit_size, 32);
2093 ASSERT_EQ(load->def.num_components, 8);
2094 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
2095 ASSERT_EQ(nir_intrinsic_range(load), 32);
2096 ASSERT_EQ(nir_def_components_read(&load->def), 0x7f);
2097 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
2098 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xyzw");
2099 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "efg");
2100 }
2101
TEST_F(nir_load_store_vectorize_test,ubo_overfetch_vec7_as_vec8_disallowed)2102 TEST_F(nir_load_store_vectorize_test, ubo_overfetch_vec7_as_vec8_disallowed)
2103 {
2104 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
2105 create_load(nir_var_mem_ubo, 0, 16, 0x2, 32, 3);
2106
2107 nir_validate_shader(b->shader, NULL);
2108 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2109
2110 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2111 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2112 }
2113
TEST_F(nir_load_store_vectorize_test,ubo_overfetch_vec11_as_vec16)2114 TEST_F(nir_load_store_vectorize_test, ubo_overfetch_vec11_as_vec16)
2115 {
2116 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 8);
2117 create_load(nir_var_mem_ubo, 0, 32, 0x2, 32, 3);
2118
2119 nir_validate_shader(b->shader, NULL);
2120 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2121
2122 this->overfetch = true;
2123 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2124 this->overfetch = false;
2125
2126 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
2127
2128 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
2129 ASSERT_EQ(load->def.bit_size, 32);
2130 ASSERT_EQ(load->def.num_components, 16);
2131 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
2132 ASSERT_EQ(nir_intrinsic_range(load), 64);
2133 ASSERT_EQ(nir_def_components_read(&load->def), 0x7ff);
2134 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
2135 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xyzwefgh");
2136 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "ijk");
2137 }
2138
TEST_F(nir_load_store_vectorize_test,ubo_vec1_vec7as8)2139 TEST_F(nir_load_store_vectorize_test, ubo_vec1_vec7as8)
2140 {
2141 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 1);
2142 create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 8);
2143 movs[0x2]->src[0].swizzle[7] = 6; /* use only components 0..6 */
2144
2145 nir_validate_shader(b->shader, NULL);
2146 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2147
2148 this->max_components = 8;
2149 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2150 this->max_components = 4;
2151
2152 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
2153
2154 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
2155 ASSERT_EQ(load->def.bit_size, 32);
2156 ASSERT_EQ(load->def.num_components, 8);
2157 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
2158 ASSERT_EQ(nir_intrinsic_range(load), 32);
2159 ASSERT_EQ(nir_def_components_read(&load->def), 0xff);
2160 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
2161 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
2162 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yzwefghh");
2163 }
2164
TEST_F(nir_load_store_vectorize_test,ubo_vec7as8_vec1)2165 TEST_F(nir_load_store_vectorize_test, ubo_vec7as8_vec1)
2166 {
2167 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 8);
2168 movs[0x1]->src[0].swizzle[0] = 1; /* use only components 1..7 */
2169 create_load(nir_var_mem_ubo, 0, 32, 0x2, 32, 1);
2170
2171 nir_validate_shader(b->shader, NULL);
2172 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2173
2174 this->max_components = 8;
2175 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2176 this->max_components = 4;
2177
2178 /* TODO: This is not merged by the pass, but we could implement it. */
2179 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2180 }
2181
TEST_F(nir_load_store_vectorize_test,ubo_vec2_hole1_vec1_disallowed)2182 TEST_F(nir_load_store_vectorize_test, ubo_vec2_hole1_vec1_disallowed)
2183 {
2184 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
2185 create_load(nir_var_mem_ubo, 0, 12, 0x2, 32, 1);
2186
2187 nir_validate_shader(b->shader, NULL);
2188 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2189
2190 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2191 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2192 }
2193
TEST_F(nir_load_store_vectorize_test,ubo_vec2_hole1_vec1)2194 TEST_F(nir_load_store_vectorize_test, ubo_vec2_hole1_vec1)
2195 {
2196 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
2197 create_load(nir_var_mem_ubo, 0, 12, 0x2, 32, 1);
2198
2199 nir_validate_shader(b->shader, NULL);
2200 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2201
2202 this->max_hole_size = 4;
2203 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2204 this->max_hole_size = 0;
2205
2206 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
2207
2208 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
2209 ASSERT_EQ(load->def.bit_size, 32);
2210 ASSERT_EQ(load->def.num_components, 4);
2211 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
2212 ASSERT_EQ(nir_intrinsic_range(load), 16);
2213 ASSERT_EQ(nir_def_components_read(&load->def), 1 | 2 | 8);
2214 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
2215 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
2216 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "w");
2217 }
2218
TEST_F(nir_load_store_vectorize_test,ubo_vec2_hole2_vec4_disallowed)2219 TEST_F(nir_load_store_vectorize_test, ubo_vec2_hole2_vec4_disallowed)
2220 {
2221 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
2222 create_load(nir_var_mem_ubo, 0, 16, 0x2, 32, 1);
2223
2224 nir_validate_shader(b->shader, NULL);
2225 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2226
2227 /* The pass only allows 4-byte holes. */
2228 this->max_hole_size = 8;
2229 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2230 this->max_hole_size = 0;
2231
2232 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2233 }
2234
TEST_F(nir_load_store_vectorize_test,ubo_vec3_hole1_vec3)2235 TEST_F(nir_load_store_vectorize_test, ubo_vec3_hole1_vec3)
2236 {
2237 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 3);
2238 create_load(nir_var_mem_ubo, 0, 16, 0x2, 32, 3);
2239
2240 nir_validate_shader(b->shader, NULL);
2241 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2242
2243 this->overfetch = true;
2244 this->max_hole_size = 4;
2245 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2246 this->max_hole_size = 0;
2247 this->overfetch = false;
2248
2249 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
2250
2251 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
2252 ASSERT_EQ(load->def.bit_size, 32);
2253 ASSERT_EQ(load->def.num_components, 8);
2254 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
2255 ASSERT_EQ(nir_intrinsic_range(load), 32);
2256 ASSERT_EQ(nir_def_components_read(&load->def), 0x77);
2257 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
2258 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xyz");
2259 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "efg");
2260 }
2261