• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include <gtest/gtest.h>
25 
26 #include "nir.h"
27 #include "nir_builder.h"
28 
29 /* This is a macro so you get good line numbers */
30 #define EXPECT_INSTR_SWIZZLES(instr, load, expected_swizzle)    \
31    EXPECT_EQ((instr)->src[0].src.ssa, &(load)->dest.ssa);       \
32    EXPECT_EQ(swizzle(instr, 0), expected_swizzle);
33 
34 namespace {
35 
36 class nir_load_store_vectorize_test : public ::testing::Test {
37 protected:
38    nir_load_store_vectorize_test();
39    ~nir_load_store_vectorize_test();
40 
41    unsigned count_intrinsics(nir_intrinsic_op intrinsic);
42 
43    nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
44                                       unsigned index);
45 
46    bool run_vectorizer(nir_variable_mode modes, bool cse=false,
47                        nir_variable_mode robust_modes = (nir_variable_mode)0);
48 
49    nir_ssa_def *get_resource(uint32_t binding, bool ssbo);
50 
51    nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
52                                              uint32_t id, unsigned bit_size=32, unsigned components=1,
53                                              unsigned access=0);
54    void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset,
55                               uint32_t id, unsigned bit_size=32, unsigned components=1,
56                               unsigned wrmask=0xf, unsigned access=0);
57 
58    nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
59                                     uint32_t id, unsigned bit_size=32, unsigned components=1,
60                                     unsigned access=0);
61    void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
62                      uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
63                      unsigned access=0);
64 
65    void create_shared_load(nir_deref_instr *deref, uint32_t id,
66                            unsigned bit_size=32, unsigned components=1);
67    void create_shared_store(nir_deref_instr *deref, uint32_t id,
68                             unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf);
69 
70    bool test_alu(nir_instr *instr, nir_op op);
71    bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);
72 
73    static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
74                                       unsigned bit_size,
75                                       unsigned num_components,
76                                       nir_intrinsic_instr *low, nir_intrinsic_instr *high);
77    static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
78 
79    std::string swizzle(nir_alu_instr *instr, int src);
80 
81    void *mem_ctx;
82 
83    nir_builder *b;
84    std::map<unsigned, nir_alu_instr*> movs;
85    std::map<unsigned, nir_alu_src*> loads;
86    std::map<unsigned, nir_ssa_def*> res_map;
87 };
88 
nir_load_store_vectorize_test()89 nir_load_store_vectorize_test::nir_load_store_vectorize_test()
90 {
91    glsl_type_singleton_init_or_ref();
92 
93    mem_ctx = ralloc_context(NULL);
94    static const nir_shader_compiler_options options = { };
95    b = rzalloc(mem_ctx, nir_builder);
96    nir_builder_init_simple_shader(b, mem_ctx, MESA_SHADER_COMPUTE, &options);
97 }
98 
~nir_load_store_vectorize_test()99 nir_load_store_vectorize_test::~nir_load_store_vectorize_test()
100 {
101    if (HasFailure()) {
102       printf("\nShader from the failed test:\n\n");
103       nir_print_shader(b->shader, stdout);
104    }
105 
106    ralloc_free(mem_ctx);
107 
108    glsl_type_singleton_decref();
109 }
110 
111 std::string
swizzle(nir_alu_instr * instr,int src)112 nir_load_store_vectorize_test::swizzle(nir_alu_instr *instr, int src)
113 {
114    std::string swizzle;
115    for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(instr, src); i++) {
116       swizzle += "xyzw"[instr->src[src].swizzle[i]];
117    }
118 
119    return swizzle;
120 }
121 
122 unsigned
count_intrinsics(nir_intrinsic_op intrinsic)123 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic)
124 {
125    unsigned count = 0;
126    nir_foreach_block(block, b->impl) {
127       nir_foreach_instr(instr, block) {
128          if (instr->type != nir_instr_type_intrinsic)
129             continue;
130          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
131          if (intrin->intrinsic == intrinsic)
132             count++;
133       }
134    }
135    return count;
136 }
137 
138 nir_intrinsic_instr *
get_intrinsic(nir_intrinsic_op intrinsic,unsigned index)139 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
140                              unsigned index)
141 {
142    nir_foreach_block(block, b->impl) {
143       nir_foreach_instr(instr, block) {
144          if (instr->type != nir_instr_type_intrinsic)
145             continue;
146          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
147          if (intrin->intrinsic == intrinsic) {
148             if (index == 0)
149                return intrin;
150             index--;
151          }
152       }
153    }
154    return NULL;
155 }
156 
157 bool
run_vectorizer(nir_variable_mode modes,bool cse,nir_variable_mode robust_modes)158 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes,
159                                               bool cse,
160                                               nir_variable_mode robust_modes)
161 {
162    if (modes & nir_var_mem_shared)
163       nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
164    bool progress = nir_opt_load_store_vectorize(b->shader, modes, mem_vectorize_callback, robust_modes);
165    if (progress) {
166       nir_validate_shader(b->shader, NULL);
167       if (cse)
168          nir_opt_cse(b->shader);
169       nir_copy_prop(b->shader);
170       nir_opt_algebraic(b->shader);
171       nir_opt_constant_folding(b->shader);
172    }
173    return progress;
174 }
175 
176 nir_ssa_def *
get_resource(uint32_t binding,bool ssbo)177 nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
178 {
179    if (res_map.count(binding))
180       return res_map[binding];
181 
182    nir_intrinsic_instr *res = nir_intrinsic_instr_create(
183       b->shader, nir_intrinsic_vulkan_resource_index);
184    nir_ssa_dest_init(&res->instr, &res->dest, 1, 32, NULL);
185    res->num_components = 1;
186    res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32));
187    nir_intrinsic_set_desc_type(
188       res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
189    nir_intrinsic_set_desc_set(res, 0);
190    nir_intrinsic_set_binding(res, binding);
191    nir_builder_instr_insert(b, &res->instr);
192    res_map[binding] = &res->dest.ssa;
193    return &res->dest.ssa;
194 }
195 
196 nir_intrinsic_instr *
create_indirect_load(nir_variable_mode mode,uint32_t binding,nir_ssa_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)197 nir_load_store_vectorize_test::create_indirect_load(
198    nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
199    unsigned bit_size, unsigned components, unsigned access)
200 {
201    nir_intrinsic_op intrinsic;
202    nir_ssa_def *res = NULL;
203    switch (mode) {
204    case nir_var_mem_ubo:
205       intrinsic = nir_intrinsic_load_ubo;
206       res = get_resource(binding, false);
207       break;
208    case nir_var_mem_ssbo:
209       intrinsic = nir_intrinsic_load_ssbo;
210       res = get_resource(binding, true);
211       break;
212    case nir_var_mem_push_const:
213       intrinsic = nir_intrinsic_load_push_constant;
214       break;
215    default:
216       return NULL;
217    }
218    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic);
219    nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
220    load->num_components = components;
221    if (res) {
222       load->src[0] = nir_src_for_ssa(res);
223       load->src[1] = nir_src_for_ssa(offset);
224    } else {
225       load->src[0] = nir_src_for_ssa(offset);
226    }
227    int byte_size = (bit_size == 1 ? 32 : bit_size) / 8;
228 
229    if (mode != nir_var_mem_push_const) {
230       nir_intrinsic_set_align(load, byte_size, 0);
231       nir_intrinsic_set_access(load, (gl_access_qualifier)access);
232    }
233 
234    if (nir_intrinsic_has_range_base(load)) {
235       uint32_t range = byte_size * components;
236       int offset_src = res ? 1 : 0;
237 
238       if (nir_src_is_const(load->src[offset_src])) {
239          nir_intrinsic_set_range_base(load, nir_src_as_uint(load->src[offset_src]));
240          nir_intrinsic_set_range(load, range);
241       } else {
242          /* Unknown range */
243          nir_intrinsic_set_range_base(load, 0);
244          nir_intrinsic_set_range(load, ~0);
245       }
246    }
247 
248    nir_builder_instr_insert(b, &load->instr);
249    nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->dest.ssa)->parent_instr);
250    movs[id] = mov;
251    loads[id] = &mov->src[0];
252 
253    return load;
254 }
255 
256 void
create_indirect_store(nir_variable_mode mode,uint32_t binding,nir_ssa_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)257 nir_load_store_vectorize_test::create_indirect_store(
258    nir_variable_mode mode, uint32_t binding, nir_ssa_def *offset, uint32_t id,
259    unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
260 {
261    nir_const_value values[NIR_MAX_VEC_COMPONENTS];
262    for (unsigned i = 0; i < components; i++)
263       values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
264    nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
265 
266    nir_intrinsic_op intrinsic;
267    nir_ssa_def *res = NULL;
268    switch (mode) {
269    case nir_var_mem_ssbo:
270       intrinsic = nir_intrinsic_store_ssbo;
271       res = get_resource(binding, true);
272       break;
273    case nir_var_mem_shared:
274       intrinsic = nir_intrinsic_store_shared;
275       break;
276    default:
277       return;
278    }
279    nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic);
280    nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
281    store->num_components = components;
282    if (res) {
283       store->src[0] = nir_src_for_ssa(value);
284       store->src[1] = nir_src_for_ssa(res);
285       store->src[2] = nir_src_for_ssa(offset);
286    } else {
287       store->src[0] = nir_src_for_ssa(value);
288       store->src[1] = nir_src_for_ssa(offset);
289    }
290    nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0);
291    nir_intrinsic_set_access(store, (gl_access_qualifier)access);
292    nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
293    nir_builder_instr_insert(b, &store->instr);
294 }
295 
296 nir_intrinsic_instr *
create_load(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)297 nir_load_store_vectorize_test::create_load(
298    nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
299    unsigned bit_size, unsigned components, unsigned access)
300 {
301    return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
302 }
303 
304 void
create_store(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)305 nir_load_store_vectorize_test::create_store(
306    nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
307    unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
308 {
309    create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access);
310 }
311 
create_shared_load(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components)312 void nir_load_store_vectorize_test::create_shared_load(
313    nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components)
314 {
315    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref);
316    nir_ssa_dest_init(&load->instr, &load->dest, components, bit_size, NULL);
317    load->num_components = components;
318    load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
319    nir_builder_instr_insert(b, &load->instr);
320    nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->dest.ssa)->parent_instr);
321    movs[id] = mov;
322    loads[id] = &mov->src[0];
323 }
324 
create_shared_store(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask)325 void nir_load_store_vectorize_test::create_shared_store(
326    nir_deref_instr *deref, uint32_t id,
327    unsigned bit_size, unsigned components, unsigned wrmask)
328 {
329    nir_const_value values[NIR_MAX_VEC_COMPONENTS];
330    for (unsigned i = 0; i < components; i++)
331       values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
332    nir_ssa_def *value = nir_build_imm(b, components, bit_size, values);
333 
334    nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_deref);
335    nir_ssa_dest_init(&store->instr, &store->dest, components, bit_size, NULL);
336    store->num_components = components;
337    store->src[0] = nir_src_for_ssa(&deref->dest.ssa);
338    store->src[1] = nir_src_for_ssa(value);
339    nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
340    nir_builder_instr_insert(b, &store->instr);
341 }
342 
test_alu(nir_instr * instr,nir_op op)343 bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op)
344 {
345    return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op;
346 }
347 
test_alu_def(nir_instr * instr,unsigned index,nir_ssa_def * def,unsigned swizzle)348 bool nir_load_store_vectorize_test::test_alu_def(
349    nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle)
350 {
351    if (instr->type != nir_instr_type_alu)
352       return false;
353 
354    nir_alu_instr *alu = nir_instr_as_alu(instr);
355 
356    if (index >= nir_op_infos[alu->op].num_inputs)
357       return false;
358    if (alu->src[index].src.ssa != def)
359       return false;
360    if (alu->src[index].swizzle[0] != swizzle)
361       return false;
362 
363    return true;
364 }
365 
mem_vectorize_callback(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,nir_intrinsic_instr * low,nir_intrinsic_instr * high)366 bool nir_load_store_vectorize_test::mem_vectorize_callback(
367    unsigned align_mul, unsigned align_offset, unsigned bit_size,
368    unsigned num_components,
369    nir_intrinsic_instr *low, nir_intrinsic_instr *high)
370 {
371    return bit_size / 8;
372 }
373 
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)374 void nir_load_store_vectorize_test::shared_type_info(
375    const struct glsl_type *type, unsigned *size, unsigned *align)
376 {
377    assert(glsl_type_is_vector_or_scalar(type));
378 
379    uint32_t comp_size = glsl_type_is_boolean(type)
380       ? 4 : glsl_get_bit_size(type) / 8;
381    unsigned length = glsl_get_vector_elements(type);
382    *size = comp_size * length,
383    *align = comp_size;
384 }
385 } // namespace
386 
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent)387 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
388 {
389    create_load(nir_var_mem_ubo, 0, 0, 0x1);
390    create_load(nir_var_mem_ubo, 0, 4, 0x2);
391 
392    nir_validate_shader(b->shader, NULL);
393    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
394 
395    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
396 
397    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
398 
399    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
400    ASSERT_EQ(load->dest.ssa.bit_size, 32);
401    ASSERT_EQ(load->dest.ssa.num_components, 2);
402    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
403    ASSERT_EQ(nir_intrinsic_range(load), 8);
404    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
405    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
406    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
407 }
408 
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting)409 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
410 {
411    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
412    create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2);
413 
414    nir_validate_shader(b->shader, NULL);
415    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
416 
417    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
418 
419    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
420 
421    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
422    ASSERT_EQ(load->dest.ssa.bit_size, 32);
423    ASSERT_EQ(load->dest.ssa.num_components, 3);
424    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
425    ASSERT_EQ(nir_intrinsic_range(load), 12);
426    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
427    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
428    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yz");
429 }
430 
431 /* Test for a bug in range handling */
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting_range)432 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting_range)
433 {
434    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
435    create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 1);
436 
437    nir_validate_shader(b->shader, NULL);
438    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
439 
440    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
441 
442    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
443 
444    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
445    ASSERT_EQ(load->dest.ssa.bit_size, 32);
446    ASSERT_EQ(load->dest.ssa.num_components, 4);
447    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
448    ASSERT_EQ(nir_intrinsic_range(load), 16);
449    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
450    ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
451    ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
452    ASSERT_EQ(loads[0x1]->swizzle[0], 0);
453    ASSERT_EQ(loads[0x1]->swizzle[1], 1);
454    ASSERT_EQ(loads[0x1]->swizzle[2], 2);
455    ASSERT_EQ(loads[0x1]->swizzle[3], 3);
456    ASSERT_EQ(loads[0x2]->swizzle[0], 1);
457 }
458 
TEST_F(nir_load_store_vectorize_test,ubo_load_identical)459 TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
460 {
461    create_load(nir_var_mem_ubo, 0, 0, 0x1);
462    create_load(nir_var_mem_ubo, 0, 0, 0x2);
463 
464    nir_validate_shader(b->shader, NULL);
465    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
466 
467    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
468 
469    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
470 
471    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
472    ASSERT_EQ(load->dest.ssa.bit_size, 32);
473    ASSERT_EQ(load->dest.ssa.num_components, 1);
474    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
475    ASSERT_EQ(nir_intrinsic_range(load), 4);
476    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
477    ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
478    ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
479    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
480    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
481 }
482 
TEST_F(nir_load_store_vectorize_test,ubo_load_large)483 TEST_F(nir_load_store_vectorize_test, ubo_load_large)
484 {
485    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
486    create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3);
487 
488    nir_validate_shader(b->shader, NULL);
489    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
490 
491    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
492 
493    nir_validate_shader(b->shader, NULL);
494    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
495 }
496 
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent)497 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent)
498 {
499    create_load(nir_var_mem_push_const, 0, 0, 0x1);
500    create_load(nir_var_mem_push_const, 0, 4, 0x2);
501 
502    nir_validate_shader(b->shader, NULL);
503    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
504 
505    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
506 
507    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
508 
509    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
510    ASSERT_EQ(load->dest.ssa.bit_size, 32);
511    ASSERT_EQ(load->dest.ssa.num_components, 2);
512    ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
513    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
514    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
515 }
516 
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_base)517 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base)
518 {
519    create_load(nir_var_mem_push_const, 0, 0, 0x1);
520    nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4);
521 
522    nir_validate_shader(b->shader, NULL);
523    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
524 
525    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
526 
527    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
528 
529    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
530    ASSERT_EQ(load->dest.ssa.bit_size, 32);
531    ASSERT_EQ(load->dest.ssa.num_components, 2);
532    ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
533    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
534    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
535 }
536 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent)537 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent)
538 {
539    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
540    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
541 
542    nir_validate_shader(b->shader, NULL);
543    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
544 
545    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
546 
547    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
548 
549    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
550    ASSERT_EQ(load->dest.ssa.bit_size, 32);
551    ASSERT_EQ(load->dest.ssa.num_components, 2);
552    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
553    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
554    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
555 }
556 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect)557 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect)
558 {
559    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
560    create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1);
561    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2);
562 
563    nir_validate_shader(b->shader, NULL);
564    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
565 
566    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
567 
568    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
569 
570    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
571    ASSERT_EQ(load->dest.ssa.bit_size, 32);
572    ASSERT_EQ(load->dest.ssa.num_components, 2);
573    ASSERT_EQ(load->src[1].ssa, index_base);
574    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
575    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
576 }
577 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_sub)578 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub)
579 {
580    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
581    nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc);
582    create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
583    create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
584 
585    nir_validate_shader(b->shader, NULL);
586    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
587 
588    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
589 
590    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
591 
592    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
593    ASSERT_EQ(load->dest.ssa.bit_size, 32);
594    ASSERT_EQ(load->dest.ssa.num_components, 2);
595    ASSERT_EQ(load->src[1].ssa, index_base_prev);
596    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
597    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
598 }
599 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_neg_stride)600 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride)
601 {
602    nir_ssa_def *inv = nir_load_local_invocation_index(b);
603    nir_ssa_def *inv_plus_one = nir_iadd_imm(b, inv, 1);
604    nir_ssa_def *index_base = nir_imul_imm(b, inv, 0xfffffffc);
605    nir_ssa_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc);
606    create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
607    create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
608 
609    nir_validate_shader(b->shader, NULL);
610    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
611 
612    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
613 
614    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
615 
616    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
617    ASSERT_EQ(load->dest.ssa.bit_size, 32);
618    ASSERT_EQ(load->dest.ssa.num_components, 2);
619    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
620    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
621 
622    /* nir_opt_algebraic optimizes the imul */
623    ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg));
624    nir_ssa_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa;
625    ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl));
626    nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr);
627    ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one);
628    ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2);
629 }
630 
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_adjacent)631 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent)
632 {
633    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
634    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
635    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
636 
637    nir_validate_shader(b->shader, NULL);
638    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
639 
640    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
641 
642    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
643 
644    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
645    ASSERT_EQ(load->dest.ssa.bit_size, 32);
646    ASSERT_EQ(load->dest.ssa.num_components, 1);
647    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
648    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
649    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
650 }
651 
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_intersecting)652 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting)
653 {
654    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
655    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
656    create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2);
657 
658    nir_validate_shader(b->shader, NULL);
659    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
660 
661    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
662 
663    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
664 }
665 
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_identical)666 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical)
667 {
668    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
669    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
670    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
671 
672    nir_validate_shader(b->shader, NULL);
673    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
674 
675    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
676 
677    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
678 }
679 
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_load_identical)680 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_load_identical)
681 {
682    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
683    create_load(nir_var_mem_ssbo, 0, 0, 0x2);
684    create_store(nir_var_mem_ssbo, 0, 0, 0x3);
685 
686    nir_validate_shader(b->shader, NULL);
687    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
688 
689    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
690 
691    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
692 }
693 
694 /* if nir_opt_load_store_vectorize were implemented like many load/store
695  * optimization passes are (for example, nir_opt_combine_stores and
696  * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
697  * encountered, this case wouldn't be optimized.
698  * A similar test for derefs is shared_load_adjacent_store_identical. */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_store_identical)699 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical)
700 {
701    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
702    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
703    create_load(nir_var_mem_ssbo, 0, 4, 0x3);
704 
705    nir_validate_shader(b->shader, NULL);
706    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
707    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
708 
709    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
710 
711    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
712    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
713 
714    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
715    ASSERT_EQ(load->dest.ssa.bit_size, 32);
716    ASSERT_EQ(load->dest.ssa.num_components, 2);
717    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
718    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
719    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
720 }
721 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent)722 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent)
723 {
724    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
725    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
726 
727    nir_validate_shader(b->shader, NULL);
728    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
729 
730    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
731 
732    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
733 
734    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
735    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
736    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
737    nir_ssa_def *val = store->src[0].ssa;
738    ASSERT_EQ(val->bit_size, 32);
739    ASSERT_EQ(val->num_components, 2);
740    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
741    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
742    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
743 }
744 
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting)745 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting)
746 {
747    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
748    create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2);
749 
750    nir_validate_shader(b->shader, NULL);
751    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
752 
753    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
754 
755    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
756 
757    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
758    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
759    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
760    nir_ssa_def *val = store->src[0].ssa;
761    ASSERT_EQ(val->bit_size, 32);
762    ASSERT_EQ(val->num_components, 3);
763    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
764    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
765    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
766    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21);
767 }
768 
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical)769 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical)
770 {
771    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
772    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
773 
774    nir_validate_shader(b->shader, NULL);
775    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
776 
777    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
778 
779    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
780 
781    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
782    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
783    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1);
784    nir_ssa_def *val = store->src[0].ssa;
785    ASSERT_EQ(val->bit_size, 32);
786    ASSERT_EQ(val->num_components, 1);
787    ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20);
788 }
789 
TEST_F(nir_load_store_vectorize_test,ssbo_store_large)790 TEST_F(nir_load_store_vectorize_test, ssbo_store_large)
791 {
792    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
793    create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3);
794 
795    nir_validate_shader(b->shader, NULL);
796    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
797 
798    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
799 
800    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
801 }
802 
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent_memory_barrier)803 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier)
804 {
805    create_load(nir_var_mem_ubo, 0, 0, 0x1);
806 
807    nir_scoped_memory_barrier(b, NIR_SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
808                              nir_var_mem_ssbo);
809 
810    create_load(nir_var_mem_ubo, 0, 4, 0x2);
811 
812    nir_validate_shader(b->shader, NULL);
813    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
814 
815    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
816 
817    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
818 }
819 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier)820 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier)
821 {
822    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
823 
824    nir_scoped_memory_barrier(b, NIR_SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
825                              nir_var_mem_ssbo);
826 
827    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
828 
829    nir_validate_shader(b->shader, NULL);
830    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
831 
832    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
833 
834    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
835 }
836 
837 /* nir_intrinsic_control_barrier only syncs invocations in a workgroup, it
838  * doesn't require that loads/stores complete.
839  */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_barrier)840 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier)
841 {
842    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
843    nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_control_barrier)->instr);
844    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
845 
846    nir_validate_shader(b->shader, NULL);
847    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
848 
849    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
850 
851    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
852 }
853 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier_shared)854 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared)
855 {
856    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
857 
858    nir_scoped_memory_barrier(b, NIR_SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL,
859                              nir_var_mem_shared);
860 
861    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
862 
863    nir_validate_shader(b->shader, NULL);
864    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
865 
866    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
867 
868    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
869 }
870 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_discard)871 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_discard)
872 {
873    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
874    nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard)->instr);
875    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
876 
877    nir_validate_shader(b->shader, NULL);
878    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
879 
880    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
881 
882    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
883 }
884 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_demote)885 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_demote)
886 {
887    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
888    nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_demote)->instr);
889    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
890 
891    nir_validate_shader(b->shader, NULL);
892    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
893 
894    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
895 
896    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
897 }
898 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_discard)899 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_discard)
900 {
901    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
902    nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard)->instr);
903    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
904 
905    nir_validate_shader(b->shader, NULL);
906    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
907 
908    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
909 
910    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
911 }
912 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_demote)913 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_demote)
914 {
915    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
916    nir_builder_instr_insert(b, &nir_intrinsic_instr_create(b->shader, nir_intrinsic_demote)->instr);
917    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
918 
919    nir_validate_shader(b->shader, NULL);
920    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
921 
922    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
923 
924    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
925 }
926 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_8_8_16)927 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16)
928 {
929    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8);
930    create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8);
931    create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16);
932 
933    nir_validate_shader(b->shader, NULL);
934    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
935 
936    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
937 
938    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
939 
940    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
941    ASSERT_EQ(load->dest.ssa.bit_size, 8);
942    ASSERT_EQ(load->dest.ssa.num_components, 4);
943    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
944    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
945    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
946 
947    nir_ssa_def *val = loads[0x3]->src.ssa;
948    ASSERT_EQ(val->bit_size, 16);
949    ASSERT_EQ(val->num_components, 1);
950    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior));
951    nir_ssa_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa;
952    nir_ssa_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa;
953    ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl));
954    high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa;
955    ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16));
956    ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16));
957    ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->dest.ssa, 2));
958    ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->dest.ssa, 3));
959 }
960 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64)961 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64)
962 {
963    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
964    create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
965 
966    nir_validate_shader(b->shader, NULL);
967    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
968 
969    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
970 
971    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
972 
973    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
974    ASSERT_EQ(load->dest.ssa.bit_size, 32);
975    ASSERT_EQ(load->dest.ssa.num_components, 4);
976    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
977    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
978 
979    nir_ssa_def *val = loads[0x2]->src.ssa;
980    ASSERT_EQ(val->bit_size, 64);
981    ASSERT_EQ(val->num_components, 1);
982    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
983    nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
984    EXPECT_INSTR_SWIZZLES(pack, load, "zw");
985 }
986 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64_64)987 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
988 {
989    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
990    create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
991    create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64);
992 
993    nir_validate_shader(b->shader, NULL);
994    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
995 
996    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true));
997 
998    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
999 
1000    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1001    ASSERT_EQ(load->dest.ssa.bit_size, 64);
1002    ASSERT_EQ(load->dest.ssa.num_components, 3);
1003    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1004    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "z");
1005 
1006    nir_ssa_def *val = loads[0x2]->src.ssa;
1007    ASSERT_EQ(val->bit_size, 64);
1008    ASSERT_EQ(val->num_components, 1);
1009    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_mov));
1010    nir_alu_instr *mov = nir_instr_as_alu(val->parent_instr);
1011    EXPECT_INSTR_SWIZZLES(mov, load, "y");
1012 
1013    val = loads[0x1]->src.ssa;
1014    ASSERT_EQ(val->bit_size, 32);
1015    ASSERT_EQ(val->num_components, 2);
1016    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32));
1017    nir_alu_instr *unpack = nir_instr_as_alu(val->parent_instr);
1018    EXPECT_INSTR_SWIZZLES(unpack, load, "x");
1019 }
1020 
TEST_F(nir_load_store_vectorize_test,ssbo_load_intersecting_32_32_64)1021 TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64)
1022 {
1023    create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2);
1024    create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1025 
1026    nir_validate_shader(b->shader, NULL);
1027    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1028 
1029    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1030 
1031    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1032 
1033    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1034    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1035    ASSERT_EQ(load->dest.ssa.num_components, 3);
1036    ASSERT_EQ(nir_src_as_uint(load->src[1]), 4);
1037    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
1038 
1039    nir_ssa_def *val = loads[0x2]->src.ssa;
1040    ASSERT_EQ(val->bit_size, 64);
1041    ASSERT_EQ(val->num_components, 1);
1042    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
1043    nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
1044    EXPECT_INSTR_SWIZZLES(pack, load, "yz");
1045 }
1046 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_8_8_16)1047 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16)
1048 {
1049    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8);
1050    create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8);
1051    create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16);
1052 
1053    nir_validate_shader(b->shader, NULL);
1054    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1055 
1056    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1057 
1058    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1059 
1060    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1061    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1062    ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1063    nir_ssa_def *val = store->src[0].ssa;
1064    ASSERT_EQ(val->bit_size, 8);
1065    ASSERT_EQ(val->num_components, 4);
1066    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1067    ASSERT_EQ(nir_const_value_as_uint(cv[0], 8), 0x10);
1068    ASSERT_EQ(nir_const_value_as_uint(cv[1], 8), 0x20);
1069    ASSERT_EQ(nir_const_value_as_uint(cv[2], 8), 0x30);
1070    ASSERT_EQ(nir_const_value_as_uint(cv[3], 8), 0x0);
1071 }
1072 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64)1073 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64)
1074 {
1075    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1076    create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1077 
1078    nir_validate_shader(b->shader, NULL);
1079    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1080 
1081    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1082 
1083    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1084 
1085    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1086    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1087    ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1088    nir_ssa_def *val = store->src[0].ssa;
1089    ASSERT_EQ(val->bit_size, 32);
1090    ASSERT_EQ(val->num_components, 4);
1091    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1092    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1093    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11);
1094    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20);
1095    ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
1096 }
1097 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64_64)1098 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64)
1099 {
1100    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1101    create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1102    create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64);
1103 
1104    nir_validate_shader(b->shader, NULL);
1105    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1106 
1107    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1108 
1109    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1110 
1111    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1112    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1113    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1114    nir_ssa_def *val = store->src[0].ssa;
1115    ASSERT_EQ(val->bit_size, 64);
1116    ASSERT_EQ(val->num_components, 3);
1117    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1118    ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull);
1119    ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20);
1120    ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30);
1121 }
1122 
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting_32_32_64)1123 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64)
1124 {
1125    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1126    create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64);
1127 
1128    nir_validate_shader(b->shader, NULL);
1129    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1130 
1131    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1132 
1133    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1134 
1135    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1136    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1137    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1138    nir_ssa_def *val = store->src[0].ssa;
1139    ASSERT_EQ(val->bit_size, 32);
1140    ASSERT_EQ(val->num_components, 3);
1141    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1142    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1143    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1144    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0);
1145 }
1146 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_64)1147 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64)
1148 {
1149    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32);
1150    create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2);
1151 
1152    nir_validate_shader(b->shader, NULL);
1153    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1154 
1155    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1156 
1157    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1158 }
1159 
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_wrmask)1160 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask)
1161 {
1162    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4);
1163    create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1164 
1165    nir_validate_shader(b->shader, NULL);
1166    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1167 
1168    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1169 
1170    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1171 
1172    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1173    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1174    ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1175    nir_ssa_def *val = store->src[0].ssa;
1176    ASSERT_EQ(val->bit_size, 32);
1177    ASSERT_EQ(val->num_components, 4);
1178    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1179    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1180    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21);
1181    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22);
1182    ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23);
1183 }
1184 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent)1185 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent)
1186 {
1187    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1188    nir_deref_instr *deref = nir_build_deref_var(b, var);
1189 
1190    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1191    create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2);
1192 
1193    nir_validate_shader(b->shader, NULL);
1194    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1195 
1196    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1197 
1198    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1199 
1200    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1201    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1202    ASSERT_EQ(load->dest.ssa.num_components, 2);
1203 
1204    deref = nir_src_as_deref(load->src[0]);
1205    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1206 
1207    deref = nir_deref_instr_parent(deref);
1208    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1209    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1210 
1211    deref = nir_deref_instr_parent(deref);
1212    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1213    ASSERT_EQ(deref->var, var);
1214 
1215    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1216    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1217 }
1218 
TEST_F(nir_load_store_vectorize_test,shared_load_distant_64bit)1219 TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit)
1220 {
1221    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1222    nir_deref_instr *deref = nir_build_deref_var(b, var);
1223    nir_ssa_dest_init(&deref->instr, &deref->dest, 1, 64, NULL);
1224 
1225    create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1);
1226    create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2);
1227 
1228    nir_validate_shader(b->shader, NULL);
1229    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1230 
1231    EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1232 
1233    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1234 }
1235 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect)1236 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect)
1237 {
1238    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1239    nir_deref_instr *deref = nir_build_deref_var(b, var);
1240    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1241 
1242    create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1);
1243    create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2);
1244 
1245    nir_validate_shader(b->shader, NULL);
1246    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1247 
1248    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1249 
1250    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1251 
1252    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1253    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1254    ASSERT_EQ(load->dest.ssa.num_components, 2);
1255 
1256    deref = nir_src_as_deref(load->src[0]);
1257    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1258 
1259    deref = nir_deref_instr_parent(deref);
1260    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1261    ASSERT_EQ(deref->arr.index.ssa, index_base);
1262 
1263    deref = nir_deref_instr_parent(deref);
1264    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1265    ASSERT_EQ(deref->var, var);
1266 
1267    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1268    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1269 }
1270 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect_sub)1271 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub)
1272 {
1273    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1274    nir_deref_instr *deref = nir_build_deref_var(b, var);
1275    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1276    nir_ssa_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff);
1277 
1278    create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1);
1279    create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2);
1280 
1281    nir_validate_shader(b->shader, NULL);
1282    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1283 
1284    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1285 
1286    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1287 
1288    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1289    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1290    ASSERT_EQ(load->dest.ssa.num_components, 2);
1291 
1292    deref = nir_src_as_deref(load->src[0]);
1293    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1294 
1295    deref = nir_deref_instr_parent(deref);
1296    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1297    ASSERT_EQ(deref->arr.index.ssa, index_base_prev);
1298 
1299    deref = nir_deref_instr_parent(deref);
1300    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1301    ASSERT_EQ(deref->var, var);
1302 
1303    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1304    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1305 }
1306 
TEST_F(nir_load_store_vectorize_test,shared_load_struct)1307 TEST_F(nir_load_store_vectorize_test, shared_load_struct)
1308 {
1309    glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1310                                   glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1311 
1312    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1313    nir_deref_instr *deref = nir_build_deref_var(b, var);
1314 
1315    create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1);
1316    create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1317 
1318    nir_validate_shader(b->shader, NULL);
1319    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1320 
1321    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1322 
1323    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1324 
1325    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1326    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1327    ASSERT_EQ(load->dest.ssa.num_components, 2);
1328 
1329    deref = nir_src_as_deref(load->src[0]);
1330    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1331 
1332    deref = nir_deref_instr_parent(deref);
1333    ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1334    ASSERT_EQ(deref->strct.index, 0);
1335 
1336    deref = nir_deref_instr_parent(deref);
1337    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1338    ASSERT_EQ(deref->var, var);
1339 
1340    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1341    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1342 }
1343 
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_adjacent)1344 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent)
1345 {
1346    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1347    nir_deref_instr *deref = nir_build_deref_var(b, var);
1348 
1349    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1350    create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1351    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1352 
1353    nir_validate_shader(b->shader, NULL);
1354    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1355    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1356 
1357    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1358 
1359    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1360    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1361 
1362    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1363    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1364    ASSERT_EQ(load->dest.ssa.num_components, 1);
1365 
1366    deref = nir_src_as_deref(load->src[0]);
1367    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1368    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1369 
1370    deref = nir_deref_instr_parent(deref);
1371    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1372    ASSERT_EQ(deref->var, var);
1373 
1374    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1375    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1376 }
1377 
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_identical)1378 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical)
1379 {
1380    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1381    nir_deref_instr *deref = nir_build_deref_var(b, var);
1382 
1383    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1384    create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1385    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1386 
1387    nir_validate_shader(b->shader, NULL);
1388    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1389 
1390    EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1391 
1392    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1393 }
1394 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_store_identical)1395 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical)
1396 {
1397    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1398    nir_deref_instr *deref = nir_build_deref_var(b, var);
1399 
1400    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1401    create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1402    create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3);
1403 
1404    nir_validate_shader(b->shader, NULL);
1405    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1406    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1407 
1408    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1409 
1410    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1411    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1412 
1413    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1414    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1415    ASSERT_EQ(load->dest.ssa.num_components, 2);
1416 
1417    deref = nir_src_as_deref(load->src[0]);
1418    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1419 
1420    deref = nir_deref_instr_parent(deref);
1421    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1422    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1423 
1424    deref = nir_deref_instr_parent(deref);
1425    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1426    ASSERT_EQ(deref->var, var);
1427 
1428    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1429    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1430 }
1431 
TEST_F(nir_load_store_vectorize_test,shared_load_bool)1432 TEST_F(nir_load_store_vectorize_test, shared_load_bool)
1433 {
1434    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1435    nir_deref_instr *deref = nir_build_deref_var(b, var);
1436 
1437    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1);
1438    create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1);
1439 
1440    nir_validate_shader(b->shader, NULL);
1441    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1442 
1443    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1444 
1445    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1446 
1447    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1448    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1449    ASSERT_EQ(load->dest.ssa.num_components, 2);
1450 
1451    deref = nir_src_as_deref(load->src[0]);
1452    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1453 
1454    deref = nir_deref_instr_parent(deref);
1455    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1456    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1457 
1458    deref = nir_deref_instr_parent(deref);
1459    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1460    ASSERT_EQ(deref->var, var);
1461 
1462    ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1463    ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_i2b1));
1464    ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1465    ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->dest.ssa, 1));
1466 }
1467 
TEST_F(nir_load_store_vectorize_test,shared_load_bool_mixed)1468 TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed)
1469 {
1470    glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1471                                   glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1472 
1473    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1474    nir_deref_instr *deref = nir_build_deref_var(b, var);
1475 
1476    create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1);
1477    create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1478 
1479    nir_validate_shader(b->shader, NULL);
1480    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1481 
1482    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1483 
1484    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1485 
1486    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1487    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1488    ASSERT_EQ(load->dest.ssa.num_components, 2);
1489 
1490    deref = nir_src_as_deref(load->src[0]);
1491    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1492 
1493    deref = nir_deref_instr_parent(deref);
1494    ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1495    ASSERT_EQ(deref->strct.index, 0);
1496 
1497    deref = nir_deref_instr_parent(deref);
1498    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1499    ASSERT_EQ(deref->var, var);
1500 
1501    ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_i2b1));
1502    ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->dest.ssa, 0));
1503 
1504    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1505 }
1506 
TEST_F(nir_load_store_vectorize_test,shared_store_adjacent)1507 TEST_F(nir_load_store_vectorize_test, shared_store_adjacent)
1508 {
1509    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1510    nir_deref_instr *deref = nir_build_deref_var(b, var);
1511 
1512    create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1);
1513    create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1514 
1515    nir_validate_shader(b->shader, NULL);
1516    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
1517 
1518    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1519 
1520    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1521 
1522    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0);
1523    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
1524    nir_ssa_def *val = store->src[1].ssa;
1525    ASSERT_EQ(val->bit_size, 32);
1526    ASSERT_EQ(val->num_components, 2);
1527    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1528    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1529    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1530 
1531    deref = nir_src_as_deref(store->src[0]);
1532    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1533 
1534    deref = nir_deref_instr_parent(deref);
1535    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1536    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1537 
1538    deref = nir_deref_instr_parent(deref);
1539    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1540    ASSERT_EQ(deref->var, var);
1541 }
1542 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_base)1543 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base)
1544 {
1545    create_load(nir_var_mem_push_const, 0, 0, 0x1);
1546    nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4);
1547 
1548    nir_validate_shader(b->shader, NULL);
1549    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1550 
1551    EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1552 
1553    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1554 }
1555 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_direct)1556 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct)
1557 {
1558    create_load(nir_var_mem_push_const, 0, 0, 0x1);
1559    create_load(nir_var_mem_push_const, 0, 8, 0x2);
1560 
1561    nir_validate_shader(b->shader, NULL);
1562    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1563 
1564    EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1565 
1566    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1567 }
1568 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_indirect)1569 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect)
1570 {
1571    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1572    create_load(nir_var_mem_push_const, 0, 0, 0x1);
1573    create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2);
1574 
1575    nir_validate_shader(b->shader, NULL);
1576    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1577 
1578    EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1579 
1580    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1581 }
1582 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_indirect_indirect)1583 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect)
1584 {
1585    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1586    create_indirect_load(nir_var_mem_push_const, 0,
1587       nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 2), 16), 32), 0x1);
1588    create_indirect_load(nir_var_mem_push_const, 0,
1589       nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 3), 16), 32), 0x2);
1590 
1591    nir_validate_shader(b->shader, NULL);
1592    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1593 
1594    EXPECT_FALSE(run_vectorizer(nir_var_mem_push_const));
1595 
1596    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1597 }
1598 
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_complex_indirect)1599 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect)
1600 {
1601    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1602    //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1603    nir_ssa_def *low = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 12);
1604    nir_ssa_def *high = nir_imul_imm(b, nir_iadd_imm(b, index_base, 1), 16);
1605    create_indirect_load(nir_var_mem_push_const, 0, low, 0x1);
1606    create_indirect_load(nir_var_mem_push_const, 0, high, 0x2);
1607 
1608    nir_validate_shader(b->shader, NULL);
1609    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1610 
1611    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1612 
1613    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
1614 
1615    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
1616    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1617    ASSERT_EQ(load->dest.ssa.num_components, 2);
1618    ASSERT_EQ(load->src[0].ssa, low);
1619    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1620    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1621 }
1622 
TEST_F(nir_load_store_vectorize_test,ssbo_alias0)1623 TEST_F(nir_load_store_vectorize_test, ssbo_alias0)
1624 {
1625    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1626    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1627    create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2);
1628    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1629 
1630    nir_validate_shader(b->shader, NULL);
1631    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1632 
1633    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1634 
1635    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1636 }
1637 
TEST_F(nir_load_store_vectorize_test,ssbo_alias1)1638 TEST_F(nir_load_store_vectorize_test, ssbo_alias1)
1639 {
1640    nir_ssa_def *load_base = nir_load_global_invocation_index(b, 32);
1641    nir_ssa_def *store_base = nir_load_local_invocation_index(b);
1642    create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1);
1643    create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2);
1644    create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3);
1645 
1646    nir_validate_shader(b->shader, NULL);
1647    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1648 
1649    EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1650 
1651    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1652 }
1653 
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias2)1654 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2)
1655 {
1656    /* TODO: try to combine these loads */
1657    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1658    nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 4);
1659    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1660    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1661    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1662 
1663    nir_validate_shader(b->shader, NULL);
1664    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1665 
1666    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1667 
1668    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1669 
1670    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1671    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1672    ASSERT_EQ(load->dest.ssa.num_components, 1);
1673    ASSERT_EQ(load->src[1].ssa, offset);
1674    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1675    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1676 }
1677 
TEST_F(nir_load_store_vectorize_test,ssbo_alias3)1678 TEST_F(nir_load_store_vectorize_test, ssbo_alias3)
1679 {
1680    /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1681     * these loads can't be combined because if index_base == 268435455, then
1682     * offset == 0 because the addition would wrap around */
1683    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1684    nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1685    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1686    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1687    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1688 
1689    nir_validate_shader(b->shader, NULL);
1690    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1691 
1692    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1693 
1694    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1695 }
1696 
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias4)1697 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4)
1698 {
1699    /* TODO: try to combine these loads */
1700    nir_ssa_def *index_base = nir_load_local_invocation_index(b);
1701    nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1702    nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true;
1703    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1704    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1705    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1706 
1707    nir_validate_shader(b->shader, NULL);
1708    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1709 
1710    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1711 
1712    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1713 
1714    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1715    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1716    ASSERT_EQ(load->dest.ssa.num_components, 1);
1717    ASSERT_EQ(load->src[1].ssa, offset);
1718    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1719    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1720 }
1721 
TEST_F(nir_load_store_vectorize_test,ssbo_alias5)1722 TEST_F(nir_load_store_vectorize_test, ssbo_alias5)
1723 {
1724    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1725    create_store(nir_var_mem_ssbo, 1, 0, 0x2);
1726    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1727 
1728    nir_validate_shader(b->shader, NULL);
1729    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1730 
1731    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1732 
1733    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1734 }
1735 
TEST_F(nir_load_store_vectorize_test,ssbo_alias6)1736 TEST_F(nir_load_store_vectorize_test, ssbo_alias6)
1737 {
1738    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT);
1739    create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT);
1740    create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT);
1741 
1742    nir_validate_shader(b->shader, NULL);
1743    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1744 
1745    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1746 
1747    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1748 
1749    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1750    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1751    ASSERT_EQ(load->dest.ssa.num_components, 1);
1752    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1753    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1754    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1755 }
1756 
TEST_F(nir_load_store_vectorize_test,DISABLED_shared_alias0)1757 TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0)
1758 {
1759    /* TODO: implement type-based alias analysis so that these loads can be
1760     * combined. this is made a bit more difficult than simply using
1761     * nir_compare_derefs() because the vectorizer creates loads/stores with
1762     * casted derefs. The solution would probably be to keep multiple derefs for
1763     * an entry (one for each load/store combined into it). */
1764    glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1765                                   glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1766 
1767    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1768    nir_deref_instr *deref = nir_build_deref_var(b, var);
1769 
1770    nir_ssa_def *index0 = nir_load_local_invocation_index(b);
1771    nir_ssa_def *index1 = nir_load_global_invocation_index(b, 32);
1772    nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0);
1773 
1774    create_shared_load(load_deref, 0x1);
1775    create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2);
1776    create_shared_load(load_deref, 0x3);
1777 
1778    nir_validate_shader(b->shader, NULL);
1779    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1780 
1781    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1782 
1783    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1784 
1785    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1786    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1787    ASSERT_EQ(load->dest.ssa.num_components, 1);
1788    ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1789    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1790    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1791 }
1792 
TEST_F(nir_load_store_vectorize_test,shared_alias1)1793 TEST_F(nir_load_store_vectorize_test, shared_alias1)
1794 {
1795    nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0");
1796    nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1");
1797    nir_deref_instr *load_deref = nir_build_deref_var(b, var0);
1798 
1799    create_shared_load(load_deref, 0x1);
1800    create_shared_store(nir_build_deref_var(b, var1), 0x2);
1801    create_shared_load(load_deref, 0x3);
1802 
1803    nir_validate_shader(b->shader, NULL);
1804    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1805 
1806    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1807 
1808    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1809 
1810    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1811    ASSERT_EQ(load->dest.ssa.bit_size, 32);
1812    ASSERT_EQ(load->dest.ssa.num_components, 1);
1813    ASSERT_EQ(load->src[0].ssa, &load_deref->dest.ssa);
1814    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1815    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1816 }
1817 
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_64bit)1818 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit)
1819 {
1820    create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x100000000), 0x1);
1821    create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x200000004), 0x2);
1822 
1823    nir_validate_shader(b->shader, NULL);
1824    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1825 
1826    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1827 
1828    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1829 }
1830 
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_indirect_64bit)1831 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
1832 {
1833    nir_ssa_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b));
1834    nir_ssa_def *first = nir_imul_imm(b, index_base, 0x100000000);
1835    nir_ssa_def *second = nir_imul_imm(b, index_base, 0x200000000);
1836    create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1);
1837    create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2);
1838 
1839    nir_validate_shader(b->shader, NULL);
1840    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1841 
1842    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1843 
1844    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1845 }
1846 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust)1847 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust)
1848 {
1849    create_load(nir_var_mem_ssbo, 0, 0xfffffffc, 0x1);
1850    create_load(nir_var_mem_ssbo, 0, 0x0, 0x2);
1851 
1852    nir_validate_shader(b->shader, NULL);
1853    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1854 
1855    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1856 
1857    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1858 }
1859 
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4)1860 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4)
1861 {
1862    nir_ssa_def *offset = nir_load_local_invocation_index(b);
1863    offset = nir_imul_imm(b, offset, 16);
1864    offset = nir_iadd_imm(b, offset, 4);
1865    nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
1866                                                     0x1);
1867 
1868    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1869    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1870    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1871 }
1872 
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4_swapped)1873 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4_swapped)
1874 {
1875    nir_ssa_def *offset = nir_load_local_invocation_index(b);
1876    offset = nir_iadd_imm(b, offset, 1);
1877    offset = nir_imul_imm(b, offset, 16);
1878    offset = nir_iadd_imm(b, offset, 4);
1879    nir_intrinsic_instr *load =
1880       create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
1881 
1882    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1883    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1884    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1885 }
1886 
1887 /* Check offset % mul != 0 */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_20)1888 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_20)
1889 {
1890    nir_ssa_def *offset = nir_load_local_invocation_index(b);
1891    offset = nir_imul_imm(b, offset, 16);
1892    offset = nir_iadd_imm(b, offset, 20);
1893    nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
1894                                                     0x1);
1895 
1896    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1897    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1898    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1899 }
1900 
1901 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_24_4)1902 TEST_F(nir_load_store_vectorize_test, ubo_alignment_24_4)
1903 {
1904    nir_ssa_def *offset = nir_load_local_invocation_index(b);
1905    offset = nir_imul_imm(b, offset, 24);
1906    offset = nir_iadd_imm(b, offset, 4);
1907    nir_intrinsic_instr *load =
1908       create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
1909 
1910    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1911    EXPECT_EQ(nir_intrinsic_align_mul(load), 8);
1912    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1913 }
1914 
1915 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_64_16_8)1916 TEST_F(nir_load_store_vectorize_test, ubo_alignment_64_16_8)
1917 {
1918    nir_ssa_def *x = nir_imul_imm(b, nir_load_local_invocation_index(b), 64);
1919    nir_ssa_def *y = nir_imul_imm(b, nir_load_instance_id(b), 16);
1920    nir_ssa_def *offset = nir_iadd(b, x, y);
1921    offset = nir_iadd_imm(b, offset, 8);
1922    nir_intrinsic_instr *load =
1923       create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
1924 
1925    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1926    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1927    EXPECT_EQ(nir_intrinsic_align_offset(load), 8);
1928 }
1929 
TEST_F(nir_load_store_vectorize_test,ubo_alignment_const_100)1930 TEST_F(nir_load_store_vectorize_test, ubo_alignment_const_100)
1931 {
1932    nir_intrinsic_instr *load =
1933       create_indirect_load(nir_var_mem_ubo, 0, nir_imm_int(b, 100), 0x1);
1934 
1935    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1936    EXPECT_EQ(nir_intrinsic_align_mul(load), NIR_ALIGN_MUL_MAX);
1937    EXPECT_EQ(nir_intrinsic_align_offset(load), 100);
1938 }
1939