• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "nir_test.h"
25 
26 /* This is a macro so you get good line numbers */
27 #define EXPECT_INSTR_SWIZZLES(instr, load, expected_swizzle)    \
28    EXPECT_EQ((instr)->src[0].src.ssa, &(load)->def);       \
29    EXPECT_EQ(swizzle(instr, 0), expected_swizzle);
30 
31 namespace {
32 
33 class nir_load_store_vectorize_test : public nir_test {
34 protected:
nir_load_store_vectorize_test()35    nir_load_store_vectorize_test()
36       : nir_test::nir_test("nir_load_store_vectorize_test")
37    {
38    }
39 
40    unsigned count_intrinsics(nir_intrinsic_op intrinsic);
41 
42    nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
43                                       unsigned index);
44 
45    bool run_vectorizer(nir_variable_mode modes, bool cse=false,
46                        nir_variable_mode robust_modes = (nir_variable_mode)0);
47 
48    nir_def *get_resource(uint32_t binding, bool ssbo);
49 
50    nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_def *offset,
51                                              uint32_t id, unsigned bit_size=32, unsigned components=1,
52                                              unsigned access=0);
53    void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_def *offset,
54                               uint32_t id, unsigned bit_size=32, unsigned components=1,
55                               unsigned wrmask=0xf, unsigned access=0);
56 
57    nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
58                                     uint32_t id, unsigned bit_size=32, unsigned components=1,
59                                     unsigned access=0);
60    void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
61                      uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
62                      unsigned access=0);
63 
64    void create_shared_load(nir_deref_instr *deref, uint32_t id,
65                            unsigned bit_size=32, unsigned components=1);
66    void create_shared_store(nir_deref_instr *deref, uint32_t id,
67                             unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf);
68 
69    bool test_alu(nir_instr *instr, nir_op op);
70    bool test_alu_def(nir_instr *instr, unsigned index, nir_def *def, unsigned swizzle=0);
71 
72    static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
73                                       unsigned bit_size,
74                                       unsigned num_components, int64_t hole_size,
75                                       nir_intrinsic_instr *low, nir_intrinsic_instr *high,
76                                       void *data);
77    static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
78 
79    std::string swizzle(nir_alu_instr *instr, int src);
80 
81    std::map<unsigned, nir_alu_instr*> movs;
82    std::map<unsigned, nir_alu_src*> loads;
83    std::map<unsigned, nir_def*> res_map;
84    unsigned max_components = 4;
85    bool overfetch = false;
86    int64_t max_hole_size = 0;
87 };
88 
89 std::string
swizzle(nir_alu_instr * instr,int src)90 nir_load_store_vectorize_test::swizzle(nir_alu_instr *instr, int src)
91 {
92    std::string swizzle;
93    for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(instr, src); i++) {
94       swizzle += "xyzwefghijklmnop"[instr->src[src].swizzle[i]];
95    }
96 
97    return swizzle;
98 }
99 
100 unsigned
count_intrinsics(nir_intrinsic_op intrinsic)101 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic)
102 {
103    unsigned count = 0;
104    nir_foreach_block(block, b->impl) {
105       nir_foreach_instr(instr, block) {
106          if (instr->type != nir_instr_type_intrinsic)
107             continue;
108          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
109          if (intrin->intrinsic == intrinsic)
110             count++;
111       }
112    }
113    return count;
114 }
115 
116 nir_intrinsic_instr *
get_intrinsic(nir_intrinsic_op intrinsic,unsigned index)117 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
118                              unsigned index)
119 {
120    nir_foreach_block(block, b->impl) {
121       nir_foreach_instr(instr, block) {
122          if (instr->type != nir_instr_type_intrinsic)
123             continue;
124          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
125          if (intrin->intrinsic == intrinsic) {
126             if (index == 0)
127                return intrin;
128             index--;
129          }
130       }
131    }
132    return NULL;
133 }
134 
135 bool
run_vectorizer(nir_variable_mode modes,bool cse,nir_variable_mode robust_modes)136 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes,
137                                               bool cse,
138                                               nir_variable_mode robust_modes)
139 {
140    if (modes & nir_var_mem_shared)
141       nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
142 
143    nir_load_store_vectorize_options opts = { };
144    opts.callback = mem_vectorize_callback;
145    opts.modes = modes;
146    opts.robust_modes = robust_modes;
147    opts.cb_data = this;
148    bool progress = nir_opt_load_store_vectorize(b->shader, &opts);
149 
150    if (progress) {
151       nir_validate_shader(b->shader, NULL);
152       if (cse)
153          nir_opt_cse(b->shader);
154       nir_copy_prop(b->shader);
155       nir_opt_algebraic(b->shader);
156       nir_opt_constant_folding(b->shader);
157    }
158    return progress;
159 }
160 
161 nir_def *
get_resource(uint32_t binding,bool ssbo)162 nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
163 {
164    if (res_map.count(binding))
165       return res_map[binding];
166 
167    nir_intrinsic_instr *res = nir_intrinsic_instr_create(
168       b->shader, nir_intrinsic_vulkan_resource_index);
169    nir_def_init(&res->instr, &res->def, 1, 32);
170    res->num_components = 1;
171    res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32));
172    nir_intrinsic_set_desc_type(
173       res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
174    nir_intrinsic_set_desc_set(res, 0);
175    nir_intrinsic_set_binding(res, binding);
176    nir_builder_instr_insert(b, &res->instr);
177    res_map[binding] = &res->def;
178    return &res->def;
179 }
180 
181 nir_intrinsic_instr *
create_indirect_load(nir_variable_mode mode,uint32_t binding,nir_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)182 nir_load_store_vectorize_test::create_indirect_load(
183    nir_variable_mode mode, uint32_t binding, nir_def *offset, uint32_t id,
184    unsigned bit_size, unsigned components, unsigned access)
185 {
186    nir_intrinsic_op intrinsic;
187    nir_def *res = NULL;
188    switch (mode) {
189    case nir_var_mem_ubo:
190       intrinsic = nir_intrinsic_load_ubo;
191       res = get_resource(binding, false);
192       break;
193    case nir_var_mem_ssbo:
194       intrinsic = nir_intrinsic_load_ssbo;
195       res = get_resource(binding, true);
196       break;
197    case nir_var_mem_push_const:
198       intrinsic = nir_intrinsic_load_push_constant;
199       break;
200    default:
201       return NULL;
202    }
203    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic);
204    nir_def_init(&load->instr, &load->def, components, bit_size);
205    load->num_components = components;
206    if (res) {
207       load->src[0] = nir_src_for_ssa(res);
208       load->src[1] = nir_src_for_ssa(offset);
209    } else {
210       load->src[0] = nir_src_for_ssa(offset);
211    }
212    int byte_size = (bit_size == 1 ? 32 : bit_size) / 8;
213 
214    nir_intrinsic_set_align(load, byte_size, 0);
215    if (mode != nir_var_mem_push_const) {
216       nir_intrinsic_set_access(load, (gl_access_qualifier)access);
217    }
218 
219    if (nir_intrinsic_has_range_base(load)) {
220       uint32_t range = byte_size * components;
221       int offset_src = res ? 1 : 0;
222 
223       if (nir_src_is_const(load->src[offset_src])) {
224          nir_intrinsic_set_range_base(load, nir_src_as_uint(load->src[offset_src]));
225          nir_intrinsic_set_range(load, range);
226       } else {
227          /* Unknown range */
228          nir_intrinsic_set_range_base(load, 0);
229          nir_intrinsic_set_range(load, ~0);
230       }
231    }
232 
233    nir_builder_instr_insert(b, &load->instr);
234    nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->def)->parent_instr);
235    movs[id] = mov;
236    loads[id] = &mov->src[0];
237 
238    return load;
239 }
240 
241 void
create_indirect_store(nir_variable_mode mode,uint32_t binding,nir_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)242 nir_load_store_vectorize_test::create_indirect_store(
243    nir_variable_mode mode, uint32_t binding, nir_def *offset, uint32_t id,
244    unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
245 {
246    nir_const_value values[NIR_MAX_VEC_COMPONENTS];
247    for (unsigned i = 0; i < components; i++)
248       values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
249    nir_def *value = nir_build_imm(b, components, bit_size, values);
250 
251    nir_intrinsic_op intrinsic;
252    nir_def *res = NULL;
253    switch (mode) {
254    case nir_var_mem_ssbo:
255       intrinsic = nir_intrinsic_store_ssbo;
256       res = get_resource(binding, true);
257       break;
258    case nir_var_mem_shared:
259       intrinsic = nir_intrinsic_store_shared;
260       break;
261    default:
262       return;
263    }
264    nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic);
265    nir_def_init(&store->instr, &store->def, components, bit_size);
266    store->num_components = components;
267    if (res) {
268       store->src[0] = nir_src_for_ssa(value);
269       store->src[1] = nir_src_for_ssa(res);
270       store->src[2] = nir_src_for_ssa(offset);
271    } else {
272       store->src[0] = nir_src_for_ssa(value);
273       store->src[1] = nir_src_for_ssa(offset);
274    }
275    nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0);
276    nir_intrinsic_set_access(store, (gl_access_qualifier)access);
277    nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
278    nir_builder_instr_insert(b, &store->instr);
279 }
280 
281 nir_intrinsic_instr *
create_load(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)282 nir_load_store_vectorize_test::create_load(
283    nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
284    unsigned bit_size, unsigned components, unsigned access)
285 {
286    return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
287 }
288 
289 void
create_store(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)290 nir_load_store_vectorize_test::create_store(
291    nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
292    unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
293 {
294    create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access);
295 }
296 
create_shared_load(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components)297 void nir_load_store_vectorize_test::create_shared_load(
298    nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components)
299 {
300    nir_def *load = nir_load_deref(b, deref);
301    nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, load)->parent_instr);
302    movs[id] = mov;
303    loads[id] = &mov->src[0];
304 }
305 
create_shared_store(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask)306 void nir_load_store_vectorize_test::create_shared_store(
307    nir_deref_instr *deref, uint32_t id,
308    unsigned bit_size, unsigned components, unsigned wrmask)
309 {
310    nir_const_value values[NIR_MAX_VEC_COMPONENTS];
311    for (unsigned i = 0; i < components; i++)
312       values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
313    nir_def *value = nir_build_imm(b, components, bit_size, values);
314 
315    nir_store_deref(b, deref, value, wrmask & ((1 << components) - 1));
316 }
317 
test_alu(nir_instr * instr,nir_op op)318 bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op)
319 {
320    return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op;
321 }
322 
test_alu_def(nir_instr * instr,unsigned index,nir_def * def,unsigned swizzle)323 bool nir_load_store_vectorize_test::test_alu_def(
324    nir_instr *instr, unsigned index, nir_def *def, unsigned swizzle)
325 {
326    if (instr->type != nir_instr_type_alu)
327       return false;
328 
329    nir_alu_instr *alu = nir_instr_as_alu(instr);
330 
331    if (index >= nir_op_infos[alu->op].num_inputs)
332       return false;
333    if (alu->src[index].src.ssa != def)
334       return false;
335    if (alu->src[index].swizzle[0] != swizzle)
336       return false;
337 
338    return true;
339 }
340 
mem_vectorize_callback(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,int64_t hole_size,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)341 bool nir_load_store_vectorize_test::mem_vectorize_callback(
342    unsigned align_mul, unsigned align_offset, unsigned bit_size,
343    unsigned num_components, int64_t hole_size,
344    nir_intrinsic_instr *low, nir_intrinsic_instr *high,
345    void *data)
346 {
347    nir_load_store_vectorize_test *test = (nir_load_store_vectorize_test *)data;
348 
349    if (hole_size > test->max_hole_size ||
350        (!test->overfetch && !nir_num_components_valid(num_components)))
351       return false;
352 
353    /* Calculate a simple alignment, like how nir_intrinsic_align() does. */
354    uint32_t align = align_mul;
355    if (align_offset)
356       align = 1 << (ffs(align_offset) - 1);
357 
358    /* Require scalar alignment and less than 5 components. */
359    return align % (bit_size / 8) == 0 &&
360           (test->overfetch || num_components <= test->max_components);
361 }
362 
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)363 void nir_load_store_vectorize_test::shared_type_info(
364    const struct glsl_type *type, unsigned *size, unsigned *align)
365 {
366    assert(glsl_type_is_vector_or_scalar(type));
367 
368    uint32_t comp_size = glsl_type_is_boolean(type)
369       ? 4 : glsl_get_bit_size(type) / 8;
370    unsigned length = glsl_get_vector_elements(type);
371    *size = comp_size * length,
372    *align = comp_size;
373 }
374 } // namespace
375 
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent)376 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
377 {
378    create_load(nir_var_mem_ubo, 0, 0, 0x1);
379    create_load(nir_var_mem_ubo, 0, 4, 0x2);
380 
381    nir_validate_shader(b->shader, NULL);
382    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
383 
384    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
385 
386    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
387 
388    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
389    ASSERT_EQ(load->def.bit_size, 32);
390    ASSERT_EQ(load->def.num_components, 2);
391    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
392    ASSERT_EQ(nir_intrinsic_range(load), 8);
393    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
394    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
395    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
396 }
397 
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting)398 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
399 {
400    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
401    create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2);
402 
403    nir_validate_shader(b->shader, NULL);
404    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
405 
406    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
407 
408    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
409 
410    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
411    ASSERT_EQ(load->def.bit_size, 32);
412    ASSERT_EQ(load->def.num_components, 3);
413    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
414    ASSERT_EQ(nir_intrinsic_range(load), 12);
415    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
416    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
417    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yz");
418 }
419 
420 /* Test for a bug in range handling */
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting_range)421 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting_range)
422 {
423    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
424    create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 1);
425 
426    nir_validate_shader(b->shader, NULL);
427    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
428 
429    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
430 
431    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
432 
433    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
434    ASSERT_EQ(load->def.bit_size, 32);
435    ASSERT_EQ(load->def.num_components, 4);
436    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
437    ASSERT_EQ(nir_intrinsic_range(load), 16);
438    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
439    ASSERT_EQ(loads[0x1]->src.ssa, &load->def);
440    ASSERT_EQ(loads[0x2]->src.ssa, &load->def);
441    ASSERT_EQ(loads[0x1]->swizzle[0], 0);
442    ASSERT_EQ(loads[0x1]->swizzle[1], 1);
443    ASSERT_EQ(loads[0x1]->swizzle[2], 2);
444    ASSERT_EQ(loads[0x1]->swizzle[3], 3);
445    ASSERT_EQ(loads[0x2]->swizzle[0], 1);
446 }
447 
TEST_F(nir_load_store_vectorize_test,ubo_load_identical)448 TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
449 {
450    create_load(nir_var_mem_ubo, 0, 0, 0x1);
451    create_load(nir_var_mem_ubo, 0, 0, 0x2);
452 
453    nir_validate_shader(b->shader, NULL);
454    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
455 
456    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
457 
458    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
459 
460    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
461    ASSERT_EQ(load->def.bit_size, 32);
462    ASSERT_EQ(load->def.num_components, 1);
463    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
464    ASSERT_EQ(nir_intrinsic_range(load), 4);
465    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
466    ASSERT_EQ(loads[0x1]->src.ssa, &load->def);
467    ASSERT_EQ(loads[0x2]->src.ssa, &load->def);
468    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
469    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
470 }
471 
TEST_F(nir_load_store_vectorize_test,ubo_load_large)472 TEST_F(nir_load_store_vectorize_test, ubo_load_large)
473 {
474    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
475    create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3);
476 
477    nir_validate_shader(b->shader, NULL);
478    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
479 
480    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
481 
482    nir_validate_shader(b->shader, NULL);
483    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
484 }
485 
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent)486 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent)
487 {
488    create_load(nir_var_mem_push_const, 0, 0, 0x1);
489    create_load(nir_var_mem_push_const, 0, 4, 0x2);
490 
491    nir_validate_shader(b->shader, NULL);
492    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
493 
494    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
495 
496    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
497 
498    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
499    ASSERT_EQ(load->def.bit_size, 32);
500    ASSERT_EQ(load->def.num_components, 2);
501    ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
502    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
503    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
504 }
505 
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_base)506 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base)
507 {
508    create_load(nir_var_mem_push_const, 0, 0, 0x1);
509    nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4);
510 
511    nir_validate_shader(b->shader, NULL);
512    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
513 
514    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
515 
516    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
517 
518    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
519    ASSERT_EQ(load->def.bit_size, 32);
520    ASSERT_EQ(load->def.num_components, 2);
521    ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
522    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
523    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
524 }
525 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent)526 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent)
527 {
528    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
529    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
530 
531    nir_validate_shader(b->shader, NULL);
532    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
533 
534    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
535 
536    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
537 
538    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
539    ASSERT_EQ(load->def.bit_size, 32);
540    ASSERT_EQ(load->def.num_components, 2);
541    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
542    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
543    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
544 }
545 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect)546 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect)
547 {
548    nir_def *index_base = nir_load_local_invocation_index(b);
549    create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1);
550    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2);
551 
552    nir_validate_shader(b->shader, NULL);
553    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
554 
555    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
556 
557    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
558 
559    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
560    ASSERT_EQ(load->def.bit_size, 32);
561    ASSERT_EQ(load->def.num_components, 2);
562    ASSERT_EQ(load->src[1].ssa, index_base);
563    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
564    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
565 }
566 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_sub)567 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub)
568 {
569    nir_def *index_base = nir_load_local_invocation_index(b);
570    nir_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc);
571    create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
572    create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
573 
574    nir_validate_shader(b->shader, NULL);
575    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
576 
577    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
578 
579    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
580 
581    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
582    ASSERT_EQ(load->def.bit_size, 32);
583    ASSERT_EQ(load->def.num_components, 2);
584    ASSERT_EQ(load->src[1].ssa, index_base_prev);
585    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
586    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
587 }
588 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_neg_stride)589 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride)
590 {
591    nir_def *inv = nir_load_local_invocation_index(b);
592    nir_def *inv_plus_one = nir_iadd_imm(b, inv, 1);
593    nir_def *index_base = nir_imul_imm(b, inv, 0xfffffffc);
594    nir_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc);
595    create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
596    create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
597 
598    nir_validate_shader(b->shader, NULL);
599    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
600 
601    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
602 
603    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
604 
605    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
606    ASSERT_EQ(load->def.bit_size, 32);
607    ASSERT_EQ(load->def.num_components, 2);
608    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
609    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
610 
611    /* nir_opt_algebraic optimizes the imul */
612    ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg));
613    nir_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa;
614    ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl));
615    nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr);
616    ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one);
617    ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2);
618 }
619 
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_adjacent)620 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent)
621 {
622    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
623    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
624    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
625 
626    nir_validate_shader(b->shader, NULL);
627    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
628 
629    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
630 
631    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
632 
633    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
634    ASSERT_EQ(load->def.bit_size, 32);
635    ASSERT_EQ(load->def.num_components, 1);
636    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
637    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
638    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
639 }
640 
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_intersecting)641 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting)
642 {
643    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
644    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
645    create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2);
646 
647    nir_validate_shader(b->shader, NULL);
648    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
649 
650    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
651 
652    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
653 }
654 
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_identical)655 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical)
656 {
657    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
658    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
659    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
660 
661    nir_validate_shader(b->shader, NULL);
662    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
663 
664    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
665 
666    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
667 }
668 
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_load_identical)669 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_load_identical)
670 {
671    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
672    create_load(nir_var_mem_ssbo, 0, 0, 0x2);
673    create_store(nir_var_mem_ssbo, 0, 0, 0x3);
674 
675    nir_validate_shader(b->shader, NULL);
676    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
677 
678    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
679 
680    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
681 }
682 
683 /* if nir_opt_load_store_vectorize were implemented like many load/store
684  * optimization passes are (for example, nir_opt_combine_stores and
685  * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
686  * encountered, this case wouldn't be optimized.
687  * A similar test for derefs is shared_load_adjacent_store_identical. */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_store_identical)688 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical)
689 {
690    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
691    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
692    create_load(nir_var_mem_ssbo, 0, 4, 0x3);
693 
694    nir_validate_shader(b->shader, NULL);
695    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
696    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
697 
698    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
699 
700    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
701    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
702 
703    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
704    ASSERT_EQ(load->def.bit_size, 32);
705    ASSERT_EQ(load->def.num_components, 2);
706    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
707    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
708    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
709 }
710 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent)711 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent)
712 {
713    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
714    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
715 
716    nir_validate_shader(b->shader, NULL);
717    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
718 
719    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
720 
721    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
722 
723    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
724    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
725    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
726    nir_def *val = store->src[0].ssa;
727    ASSERT_EQ(val->bit_size, 32);
728    ASSERT_EQ(val->num_components, 2);
729    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
730    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
731    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
732 }
733 
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting)734 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting)
735 {
736    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
737    create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2);
738 
739    nir_validate_shader(b->shader, NULL);
740    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
741 
742    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
743 
744    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
745 
746    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
747    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
748    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
749    nir_def *val = store->src[0].ssa;
750    ASSERT_EQ(val->bit_size, 32);
751    ASSERT_EQ(val->num_components, 3);
752    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
753    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
754    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
755    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21);
756 }
757 
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical)758 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical)
759 {
760    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
761    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
762 
763    nir_validate_shader(b->shader, NULL);
764    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
765 
766    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
767 
768    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
769 
770    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
771    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
772    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1);
773    nir_def *val = store->src[0].ssa;
774    ASSERT_EQ(val->bit_size, 32);
775    ASSERT_EQ(val->num_components, 1);
776    ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20);
777 }
778 
TEST_F(nir_load_store_vectorize_test,ssbo_store_large)779 TEST_F(nir_load_store_vectorize_test, ssbo_store_large)
780 {
781    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
782    create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3);
783 
784    nir_validate_shader(b->shader, NULL);
785    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
786 
787    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
788 
789    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
790 }
791 
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent_memory_barrier)792 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier)
793 {
794    create_load(nir_var_mem_ubo, 0, 0, 0x1);
795 
796    nir_scoped_memory_barrier(b, SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
797                              nir_var_mem_ssbo);
798 
799    create_load(nir_var_mem_ubo, 0, 4, 0x2);
800 
801    nir_validate_shader(b->shader, NULL);
802    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
803 
804    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
805 
806    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
807 }
808 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier)809 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier)
810 {
811    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
812 
813    nir_scoped_memory_barrier(b, SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
814                              nir_var_mem_ssbo);
815 
816    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
817 
818    nir_validate_shader(b->shader, NULL);
819    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
820 
821    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
822 
823    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
824 }
825 
826 /* A control barrier may only sync invocations in a workgroup, it doesn't
827  * require that loads/stores complete.
828  */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_barrier)829 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier)
830 {
831    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
832    nir_barrier(b, SCOPE_WORKGROUP, SCOPE_NONE,
833                       (nir_memory_semantics)0, (nir_variable_mode)0);
834    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
835 
836    nir_validate_shader(b->shader, NULL);
837    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
838 
839    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
840 
841    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
842 }
843 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier_shared)844 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared)
845 {
846    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
847 
848    nir_scoped_memory_barrier(b, SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL,
849                              nir_var_mem_shared);
850 
851    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
852 
853    nir_validate_shader(b->shader, NULL);
854    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
855 
856    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
857 
858    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
859 }
860 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_discard)861 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_discard)
862 {
863    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
864    nir_discard(b);
865    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
866 
867    nir_validate_shader(b->shader, NULL);
868    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
869 
870    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
871 
872    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
873 }
874 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_demote)875 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_demote)
876 {
877    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
878    nir_demote(b);
879    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
880 
881    nir_validate_shader(b->shader, NULL);
882    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
883 
884    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
885 
886    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
887 }
888 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_discard)889 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_discard)
890 {
891    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
892    nir_discard(b);
893    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
894 
895    nir_validate_shader(b->shader, NULL);
896    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
897 
898    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
899 
900    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
901 }
902 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_demote)903 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_demote)
904 {
905    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
906    nir_demote(b);
907    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
908 
909    nir_validate_shader(b->shader, NULL);
910    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
911 
912    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
913 
914    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
915 }
916 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_8_8_16)917 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16)
918 {
919    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8);
920    create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8);
921    create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16);
922 
923    nir_validate_shader(b->shader, NULL);
924    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
925 
926    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
927 
928    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
929 
930    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
931    ASSERT_EQ(load->def.bit_size, 8);
932    ASSERT_EQ(load->def.num_components, 4);
933    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
934    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
935    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
936 
937    nir_def *val = loads[0x3]->src.ssa;
938    ASSERT_EQ(val->bit_size, 16);
939    ASSERT_EQ(val->num_components, 1);
940    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior));
941    nir_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa;
942    nir_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa;
943    ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl));
944    high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa;
945    ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16));
946    ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16));
947    ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->def, 2));
948    ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->def, 3));
949 }
950 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64)951 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64)
952 {
953    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
954    create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
955 
956    nir_validate_shader(b->shader, NULL);
957    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
958 
959    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
960 
961    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
962 
963    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
964    ASSERT_EQ(load->def.bit_size, 32);
965    ASSERT_EQ(load->def.num_components, 4);
966    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
967    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
968 
969    nir_def *val = loads[0x2]->src.ssa;
970    ASSERT_EQ(val->bit_size, 64);
971    ASSERT_EQ(val->num_components, 1);
972    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
973    nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
974    EXPECT_INSTR_SWIZZLES(pack, load, "zw");
975 }
976 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64_64)977 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
978 {
979    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
980    create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
981    create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64);
982 
983    nir_validate_shader(b->shader, NULL);
984    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
985 
986    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true));
987 
988    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
989 
990    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
991    ASSERT_EQ(load->def.bit_size, 64);
992    ASSERT_EQ(load->def.num_components, 3);
993    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
994    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "z");
995 
996    nir_def *val = loads[0x2]->src.ssa;
997    ASSERT_EQ(val->bit_size, 64);
998    ASSERT_EQ(val->num_components, 1);
999    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_mov));
1000    nir_alu_instr *mov = nir_instr_as_alu(val->parent_instr);
1001    EXPECT_INSTR_SWIZZLES(mov, load, "y");
1002 
1003    val = loads[0x1]->src.ssa;
1004    ASSERT_EQ(val->bit_size, 32);
1005    ASSERT_EQ(val->num_components, 2);
1006    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32));
1007    nir_alu_instr *unpack = nir_instr_as_alu(val->parent_instr);
1008    EXPECT_INSTR_SWIZZLES(unpack, load, "x");
1009 }
1010 
TEST_F(nir_load_store_vectorize_test,ssbo_load_intersecting_32_32_64)1011 TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64)
1012 {
1013    create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2);
1014    create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1015 
1016    nir_validate_shader(b->shader, NULL);
1017    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1018 
1019    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1020 
1021    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1022 
1023    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1024    ASSERT_EQ(load->def.bit_size, 32);
1025    ASSERT_EQ(load->def.num_components, 3);
1026    ASSERT_EQ(nir_src_as_uint(load->src[1]), 4);
1027    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
1028 
1029    nir_def *val = loads[0x2]->src.ssa;
1030    ASSERT_EQ(val->bit_size, 64);
1031    ASSERT_EQ(val->num_components, 1);
1032    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
1033    nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
1034    EXPECT_INSTR_SWIZZLES(pack, load, "yz");
1035 }
1036 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_8_8_16)1037 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16)
1038 {
1039    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8);
1040    create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8);
1041    create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16);
1042 
1043    nir_validate_shader(b->shader, NULL);
1044    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1045 
1046    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1047 
1048    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1049 
1050    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1051    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1052    ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1053    nir_def *val = store->src[0].ssa;
1054    ASSERT_EQ(val->bit_size, 8);
1055    ASSERT_EQ(val->num_components, 4);
1056    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1057    ASSERT_EQ(nir_const_value_as_uint(cv[0], 8), 0x10);
1058    ASSERT_EQ(nir_const_value_as_uint(cv[1], 8), 0x20);
1059    ASSERT_EQ(nir_const_value_as_uint(cv[2], 8), 0x30);
1060    ASSERT_EQ(nir_const_value_as_uint(cv[3], 8), 0x0);
1061 }
1062 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64)1063 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64)
1064 {
1065    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1066    create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1067 
1068    nir_validate_shader(b->shader, NULL);
1069    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1070 
1071    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1072 
1073    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1074 
1075    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1076    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1077    ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1078    nir_def *val = store->src[0].ssa;
1079    ASSERT_EQ(val->bit_size, 32);
1080    ASSERT_EQ(val->num_components, 4);
1081    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1082    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1083    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11);
1084    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20);
1085    ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
1086 }
1087 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64_64)1088 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64)
1089 {
1090    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1091    create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1092    create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64);
1093 
1094    nir_validate_shader(b->shader, NULL);
1095    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1096 
1097    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1098 
1099    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1100 
1101    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1102    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1103    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1104    nir_def *val = store->src[0].ssa;
1105    ASSERT_EQ(val->bit_size, 64);
1106    ASSERT_EQ(val->num_components, 3);
1107    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1108    ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull);
1109    ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20);
1110    ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30);
1111 }
1112 
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting_32_32_64)1113 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64)
1114 {
1115    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1116    create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64);
1117 
1118    nir_validate_shader(b->shader, NULL);
1119    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1120 
1121    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1122 
1123    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1124 
1125    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1126    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1127    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1128    nir_def *val = store->src[0].ssa;
1129    ASSERT_EQ(val->bit_size, 32);
1130    ASSERT_EQ(val->num_components, 3);
1131    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1132    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1133    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1134    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0);
1135 }
1136 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_64)1137 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64)
1138 {
1139    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32);
1140    create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2);
1141 
1142    nir_validate_shader(b->shader, NULL);
1143    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1144 
1145    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1146 
1147    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1148 }
1149 
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_wrmask)1150 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask)
1151 {
1152    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4);
1153    create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1154 
1155    nir_validate_shader(b->shader, NULL);
1156    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1157 
1158    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1159 
1160    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1161 
1162    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1163    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1164    ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1165    nir_def *val = store->src[0].ssa;
1166    ASSERT_EQ(val->bit_size, 32);
1167    ASSERT_EQ(val->num_components, 4);
1168    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1169    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1170    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21);
1171    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22);
1172    ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23);
1173 }
1174 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent)1175 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent)
1176 {
1177    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1178    nir_deref_instr *deref = nir_build_deref_var(b, var);
1179 
1180    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1181    create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2);
1182 
1183    nir_validate_shader(b->shader, NULL);
1184    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1185 
1186    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1187 
1188    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1189 
1190    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1191    ASSERT_EQ(load->def.bit_size, 32);
1192    ASSERT_EQ(load->def.num_components, 2);
1193 
1194    deref = nir_src_as_deref(load->src[0]);
1195    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1196 
1197    deref = nir_deref_instr_parent(deref);
1198    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1199    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1200 
1201    deref = nir_deref_instr_parent(deref);
1202    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1203    ASSERT_EQ(deref->var, var);
1204 
1205    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1206    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1207 }
1208 
TEST_F(nir_load_store_vectorize_test,shared_load_distant_64bit)1209 TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit)
1210 {
1211    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1212    nir_deref_instr *deref = nir_build_deref_var(b, var);
1213    nir_def_init(&deref->instr, &deref->def, 1, 64);
1214 
1215    create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1);
1216    create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2);
1217 
1218    nir_validate_shader(b->shader, NULL);
1219    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1220 
1221    EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1222 
1223    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1224 }
1225 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect)1226 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect)
1227 {
1228    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1229    nir_deref_instr *deref = nir_build_deref_var(b, var);
1230    nir_def *index_base = nir_load_local_invocation_index(b);
1231 
1232    create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1);
1233    create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2);
1234 
1235    nir_validate_shader(b->shader, NULL);
1236    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1237 
1238    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1239 
1240    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1241 
1242    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1243    ASSERT_EQ(load->def.bit_size, 32);
1244    ASSERT_EQ(load->def.num_components, 2);
1245 
1246    deref = nir_src_as_deref(load->src[0]);
1247    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1248 
1249    deref = nir_deref_instr_parent(deref);
1250    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1251    ASSERT_EQ(deref->arr.index.ssa, index_base);
1252 
1253    deref = nir_deref_instr_parent(deref);
1254    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1255    ASSERT_EQ(deref->var, var);
1256 
1257    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1258    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1259 }
1260 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect_sub)1261 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub)
1262 {
1263    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1264    nir_deref_instr *deref = nir_build_deref_var(b, var);
1265    nir_def *index_base = nir_load_local_invocation_index(b);
1266    nir_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff);
1267 
1268    create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1);
1269    create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2);
1270 
1271    nir_validate_shader(b->shader, NULL);
1272    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1273 
1274    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1275 
1276    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1277 
1278    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1279    ASSERT_EQ(load->def.bit_size, 32);
1280    ASSERT_EQ(load->def.num_components, 2);
1281 
1282    deref = nir_src_as_deref(load->src[0]);
1283    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1284 
1285    deref = nir_deref_instr_parent(deref);
1286    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1287    ASSERT_EQ(deref->arr.index.ssa, index_base_prev);
1288 
1289    deref = nir_deref_instr_parent(deref);
1290    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1291    ASSERT_EQ(deref->var, var);
1292 
1293    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1294    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1295 }
1296 
TEST_F(nir_load_store_vectorize_test,shared_load_struct)1297 TEST_F(nir_load_store_vectorize_test, shared_load_struct)
1298 {
1299    glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1300                                   glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1301 
1302    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1303    nir_deref_instr *deref = nir_build_deref_var(b, var);
1304 
1305    create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1);
1306    create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1307 
1308    nir_validate_shader(b->shader, NULL);
1309    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1310 
1311    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1312 
1313    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1314 
1315    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1316    ASSERT_EQ(load->def.bit_size, 32);
1317    ASSERT_EQ(load->def.num_components, 2);
1318 
1319    deref = nir_src_as_deref(load->src[0]);
1320    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1321 
1322    deref = nir_deref_instr_parent(deref);
1323    ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1324    ASSERT_EQ(deref->strct.index, 0);
1325 
1326    deref = nir_deref_instr_parent(deref);
1327    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1328    ASSERT_EQ(deref->var, var);
1329 
1330    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1331    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1332 }
1333 
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_adjacent)1334 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent)
1335 {
1336    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1337    nir_deref_instr *deref = nir_build_deref_var(b, var);
1338 
1339    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1340    create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1341    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1342 
1343    nir_validate_shader(b->shader, NULL);
1344    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1345    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1346 
1347    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1348 
1349    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1350    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1351 
1352    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1353    ASSERT_EQ(load->def.bit_size, 32);
1354    ASSERT_EQ(load->def.num_components, 1);
1355 
1356    deref = nir_src_as_deref(load->src[0]);
1357    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1358    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1359 
1360    deref = nir_deref_instr_parent(deref);
1361    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1362    ASSERT_EQ(deref->var, var);
1363 
1364    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1365    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1366 }
1367 
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_identical)1368 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical)
1369 {
1370    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1371    nir_deref_instr *deref = nir_build_deref_var(b, var);
1372 
1373    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1374    create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1375    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1376 
1377    nir_validate_shader(b->shader, NULL);
1378    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1379 
1380    EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1381 
1382    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1383 }
1384 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_store_identical)1385 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical)
1386 {
1387    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1388    nir_deref_instr *deref = nir_build_deref_var(b, var);
1389 
1390    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1391    create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1392    create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3);
1393 
1394    nir_validate_shader(b->shader, NULL);
1395    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1396    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1397 
1398    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1399 
1400    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1401    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1402 
1403    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1404    ASSERT_EQ(load->def.bit_size, 32);
1405    ASSERT_EQ(load->def.num_components, 2);
1406 
1407    deref = nir_src_as_deref(load->src[0]);
1408    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1409 
1410    deref = nir_deref_instr_parent(deref);
1411    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1412    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1413 
1414    deref = nir_deref_instr_parent(deref);
1415    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1416    ASSERT_EQ(deref->var, var);
1417 
1418    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1419    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1420 }
1421 
TEST_F(nir_load_store_vectorize_test,shared_load_bool)1422 TEST_F(nir_load_store_vectorize_test, shared_load_bool)
1423 {
1424    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1425    nir_deref_instr *deref = nir_build_deref_var(b, var);
1426 
1427    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1);
1428    create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1);
1429 
1430    nir_validate_shader(b->shader, NULL);
1431    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1432 
1433    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1434 
1435    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1436 
1437    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1438    ASSERT_EQ(load->def.bit_size, 32);
1439    ASSERT_EQ(load->def.num_components, 2);
1440 
1441    deref = nir_src_as_deref(load->src[0]);
1442    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1443 
1444    deref = nir_deref_instr_parent(deref);
1445    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1446    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1447 
1448    deref = nir_deref_instr_parent(deref);
1449    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1450    ASSERT_EQ(deref->var, var);
1451 
1452    /* The loaded value is converted to Boolean by (loaded != 0). */
1453    ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_ine));
1454    ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_ine));
1455    ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->def, 0));
1456    ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->def, 1));
1457 }
1458 
TEST_F(nir_load_store_vectorize_test,shared_load_bool_mixed)1459 TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed)
1460 {
1461    glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1462                                   glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1463 
1464    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1465    nir_deref_instr *deref = nir_build_deref_var(b, var);
1466 
1467    create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1);
1468    create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1469 
1470    nir_validate_shader(b->shader, NULL);
1471    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1472 
1473    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1474 
1475    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1476 
1477    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1478    ASSERT_EQ(load->def.bit_size, 32);
1479    ASSERT_EQ(load->def.num_components, 2);
1480 
1481    deref = nir_src_as_deref(load->src[0]);
1482    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1483 
1484    deref = nir_deref_instr_parent(deref);
1485    ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1486    ASSERT_EQ(deref->strct.index, 0);
1487 
1488    deref = nir_deref_instr_parent(deref);
1489    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1490    ASSERT_EQ(deref->var, var);
1491 
1492    /* The loaded value is converted to Boolean by (loaded != 0). */
1493    ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_ine));
1494    ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->def, 0));
1495 
1496    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1497 }
1498 
TEST_F(nir_load_store_vectorize_test,shared_store_adjacent)1499 TEST_F(nir_load_store_vectorize_test, shared_store_adjacent)
1500 {
1501    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1502    nir_deref_instr *deref = nir_build_deref_var(b, var);
1503 
1504    create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1);
1505    create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1506 
1507    nir_validate_shader(b->shader, NULL);
1508    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
1509 
1510    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1511 
1512    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1513 
1514    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0);
1515    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
1516    nir_def *val = store->src[1].ssa;
1517    ASSERT_EQ(val->bit_size, 32);
1518    ASSERT_EQ(val->num_components, 2);
1519    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1520    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1521    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1522 
1523    deref = nir_src_as_deref(store->src[0]);
1524    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1525 
1526    deref = nir_deref_instr_parent(deref);
1527    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1528    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1529 
1530    deref = nir_deref_instr_parent(deref);
1531    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1532    ASSERT_EQ(deref->var, var);
1533 }
1534 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_base)1535 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base)
1536 {
1537    create_load(nir_var_mem_push_const, 0, 0, 0x1);
1538    nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4);
1539 
1540    nir_validate_shader(b->shader, NULL);
1541    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1542 
1543    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1544 
1545    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1546 }
1547 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_direct)1548 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct)
1549 {
1550    create_load(nir_var_mem_push_const, 0, 0, 0x1);
1551    create_load(nir_var_mem_push_const, 0, 8, 0x2);
1552 
1553    nir_validate_shader(b->shader, NULL);
1554    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1555 
1556    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1557 
1558    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1559 }
1560 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_indirect)1561 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect)
1562 {
1563    nir_def *index_base = nir_load_local_invocation_index(b);
1564    create_load(nir_var_mem_push_const, 0, 0, 0x1);
1565    create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2);
1566 
1567    nir_validate_shader(b->shader, NULL);
1568    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1569 
1570    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1571 
1572    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1573 }
1574 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_indirect_indirect)1575 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect)
1576 {
1577    nir_def *index_base = nir_load_local_invocation_index(b);
1578    create_indirect_load(nir_var_mem_push_const, 0,
1579       nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 2), 16), 32), 0x1);
1580    create_indirect_load(nir_var_mem_push_const, 0,
1581       nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 3), 16), 32), 0x2);
1582 
1583    nir_validate_shader(b->shader, NULL);
1584    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1585 
1586    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1587 
1588    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1589 }
1590 
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_complex_indirect)1591 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect)
1592 {
1593    nir_def *index_base = nir_load_local_invocation_index(b);
1594    //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1595    nir_def *low = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 12);
1596    nir_def *high = nir_imul_imm(b, nir_iadd_imm(b, index_base, 1), 16);
1597    create_indirect_load(nir_var_mem_push_const, 0, low, 0x1);
1598    create_indirect_load(nir_var_mem_push_const, 0, high, 0x2);
1599 
1600    nir_validate_shader(b->shader, NULL);
1601    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1602 
1603    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1604 
1605    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
1606 
1607    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
1608    ASSERT_EQ(load->def.bit_size, 32);
1609    ASSERT_EQ(load->def.num_components, 2);
1610    ASSERT_EQ(load->src[0].ssa, low);
1611    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1612    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1613 }
1614 
TEST_F(nir_load_store_vectorize_test,ssbo_alias0)1615 TEST_F(nir_load_store_vectorize_test, ssbo_alias0)
1616 {
1617    nir_def *index_base = nir_load_local_invocation_index(b);
1618    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1619    create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2);
1620    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1621 
1622    nir_validate_shader(b->shader, NULL);
1623    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1624 
1625    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1626 
1627    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1628 }
1629 
TEST_F(nir_load_store_vectorize_test,ssbo_alias1)1630 TEST_F(nir_load_store_vectorize_test, ssbo_alias1)
1631 {
1632    nir_def *load_base = nir_load_global_invocation_index(b, 32);
1633    nir_def *store_base = nir_load_local_invocation_index(b);
1634    create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1);
1635    create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2);
1636    create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3);
1637 
1638    nir_validate_shader(b->shader, NULL);
1639    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1640 
1641    EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1642 
1643    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1644 }
1645 
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias2)1646 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2)
1647 {
1648    /* TODO: try to combine these loads */
1649    nir_def *index_base = nir_load_local_invocation_index(b);
1650    nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 4);
1651    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1652    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1653    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1654 
1655    nir_validate_shader(b->shader, NULL);
1656    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1657 
1658    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1659 
1660    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1661 
1662    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1663    ASSERT_EQ(load->def.bit_size, 32);
1664    ASSERT_EQ(load->def.num_components, 1);
1665    ASSERT_EQ(load->src[1].ssa, offset);
1666    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1667    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1668 }
1669 
TEST_F(nir_load_store_vectorize_test,ssbo_alias3)1670 TEST_F(nir_load_store_vectorize_test, ssbo_alias3)
1671 {
1672    /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1673     * these loads can't be combined because if index_base == 268435455, then
1674     * offset == 0 because the addition would wrap around */
1675    nir_def *index_base = nir_load_local_invocation_index(b);
1676    nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1677    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1678    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1679    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1680 
1681    nir_validate_shader(b->shader, NULL);
1682    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1683 
1684    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1685 
1686    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1687 }
1688 
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias4)1689 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4)
1690 {
1691    /* TODO: try to combine these loads */
1692    nir_def *index_base = nir_load_local_invocation_index(b);
1693    nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1694    nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true;
1695    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1696    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1697    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1698 
1699    nir_validate_shader(b->shader, NULL);
1700    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1701 
1702    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1703 
1704    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1705 
1706    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1707    ASSERT_EQ(load->def.bit_size, 32);
1708    ASSERT_EQ(load->def.num_components, 1);
1709    ASSERT_EQ(load->src[1].ssa, offset);
1710    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1711    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1712 }
1713 
TEST_F(nir_load_store_vectorize_test,ssbo_alias5)1714 TEST_F(nir_load_store_vectorize_test, ssbo_alias5)
1715 {
1716    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1717    create_store(nir_var_mem_ssbo, 1, 0, 0x2);
1718    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1719 
1720    nir_validate_shader(b->shader, NULL);
1721    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1722 
1723    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1724 
1725    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1726 }
1727 
TEST_F(nir_load_store_vectorize_test,ssbo_alias6)1728 TEST_F(nir_load_store_vectorize_test, ssbo_alias6)
1729 {
1730    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT);
1731    create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT);
1732    create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT);
1733 
1734    nir_validate_shader(b->shader, NULL);
1735    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1736 
1737    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1738 
1739    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1740 
1741    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1742    ASSERT_EQ(load->def.bit_size, 32);
1743    ASSERT_EQ(load->def.num_components, 1);
1744    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1745    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1746    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1747 }
1748 
TEST_F(nir_load_store_vectorize_test,DISABLED_shared_alias0)1749 TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0)
1750 {
1751    /* TODO: implement type-based alias analysis so that these loads can be
1752     * combined. this is made a bit more difficult than simply using
1753     * nir_compare_derefs() because the vectorizer creates loads/stores with
1754     * casted derefs. The solution would probably be to keep multiple derefs for
1755     * an entry (one for each load/store combined into it). */
1756    glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1757                                   glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1758 
1759    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1760    nir_deref_instr *deref = nir_build_deref_var(b, var);
1761 
1762    nir_def *index0 = nir_load_local_invocation_index(b);
1763    nir_def *index1 = nir_load_global_invocation_index(b, 32);
1764    nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0);
1765 
1766    create_shared_load(load_deref, 0x1);
1767    create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2);
1768    create_shared_load(load_deref, 0x3);
1769 
1770    nir_validate_shader(b->shader, NULL);
1771    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1772 
1773    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1774 
1775    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1776 
1777    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1778    ASSERT_EQ(load->def.bit_size, 32);
1779    ASSERT_EQ(load->def.num_components, 1);
1780    ASSERT_EQ(load->src[0].ssa, &load_deref->def);
1781    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1782    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1783 }
1784 
TEST_F(nir_load_store_vectorize_test,shared_alias1)1785 TEST_F(nir_load_store_vectorize_test, shared_alias1)
1786 {
1787    nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0");
1788    nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1");
1789    nir_deref_instr *load_deref = nir_build_deref_var(b, var0);
1790 
1791    create_shared_load(load_deref, 0x1);
1792    create_shared_store(nir_build_deref_var(b, var1), 0x2);
1793    create_shared_load(load_deref, 0x3);
1794 
1795    nir_validate_shader(b->shader, NULL);
1796    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1797 
1798    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1799 
1800    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1801 
1802    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1803    ASSERT_EQ(load->def.bit_size, 32);
1804    ASSERT_EQ(load->def.num_components, 1);
1805    ASSERT_EQ(load->src[0].ssa, &load_deref->def);
1806    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1807    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1808 }
1809 
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_64bit)1810 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit)
1811 {
1812    create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x100000000), 0x1);
1813    create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x200000004), 0x2);
1814 
1815    nir_validate_shader(b->shader, NULL);
1816    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1817 
1818    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1819 
1820    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1821 }
1822 
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_indirect_64bit)1823 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
1824 {
1825    nir_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b));
1826    nir_def *first = nir_imul_imm(b, index_base, 0x100000000);
1827    nir_def *second = nir_imul_imm(b, index_base, 0x200000000);
1828    create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1);
1829    create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2);
1830 
1831    nir_validate_shader(b->shader, NULL);
1832    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1833 
1834    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1835 
1836    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1837 }
1838 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust)1839 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust)
1840 {
1841    create_load(nir_var_mem_ssbo, 0, 0xfffffffc, 0x1);
1842    create_load(nir_var_mem_ssbo, 0, 0x0, 0x2);
1843 
1844    nir_validate_shader(b->shader, NULL);
1845    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1846 
1847    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1848 
1849    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1850 }
1851 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride1)1852 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride1)
1853 {
1854    nir_def *offset = nir_load_local_invocation_index(b);
1855    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1856    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1857 
1858    nir_validate_shader(b->shader, NULL);
1859    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1860 
1861    EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1862 
1863    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1864 }
1865 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride8)1866 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride8)
1867 {
1868    nir_def *offset = nir_load_local_invocation_index(b);
1869    offset = nir_imul_imm(b, offset, 8);
1870    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1871    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1872 
1873    nir_validate_shader(b->shader, NULL);
1874    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1875 
1876    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1877 
1878    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1879 }
1880 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride12)1881 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride12)
1882 {
1883    nir_def *offset = nir_load_local_invocation_index(b);
1884    offset = nir_imul_imm(b, offset, 12);
1885    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1886    nir_def *offset_4 = nir_iadd_imm(b, offset, 4);
1887    create_indirect_load(nir_var_mem_ssbo, 0, offset_4, 0x2);
1888    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 8), 0x3);
1889 
1890    nir_validate_shader(b->shader, NULL);
1891    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
1892 
1893    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1894 
1895    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1896 
1897    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1898    ASSERT_EQ(load->def.bit_size, 32);
1899    ASSERT_EQ(load->def.num_components, 1);
1900    ASSERT_EQ(load->src[1].ssa, offset);
1901    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1902 
1903    load = get_intrinsic(nir_intrinsic_load_ssbo, 1);
1904    ASSERT_EQ(load->def.bit_size, 32);
1905    ASSERT_EQ(load->def.num_components, 2);
1906    ASSERT_EQ(load->src[1].ssa, offset_4);
1907    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
1908    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1909 }
1910 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride16)1911 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride16)
1912 {
1913    nir_def *offset = nir_load_local_invocation_index(b);
1914    offset = nir_imul_imm(b, offset, 16);
1915    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1916    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1917    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 8), 0x3);
1918    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 12), 0x4);
1919 
1920    nir_validate_shader(b->shader, NULL);
1921    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 4);
1922 
1923    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1924 
1925    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1926 }
1927 
TEST_F(nir_load_store_vectorize_test,shared_offset_overflow_robust_indirect_stride12)1928 TEST_F(nir_load_store_vectorize_test, shared_offset_overflow_robust_indirect_stride12)
1929 {
1930    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared,
1931                                            glsl_array_type(glsl_uint_type(), 4, 0), "var");
1932    nir_deref_instr *deref = nir_build_deref_var(b, var);
1933 
1934    nir_def *index = nir_load_local_invocation_index(b);
1935    index = nir_imul_imm(b, index, 3);
1936    create_shared_load(nir_build_deref_array(b, deref, index), 0x1);
1937    create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index, 1)), 0x2);
1938    create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index, 2)), 0x3);
1939 
1940    nir_validate_shader(b->shader, NULL);
1941    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 3);
1942 
1943    EXPECT_FALSE(run_vectorizer(nir_var_mem_shared, false, nir_var_mem_shared));
1944 
1945    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 3);
1946 }
1947 
TEST_F(nir_load_store_vectorize_test,ubo_overlapping_vec4_vec4_unused_components)1948 TEST_F(nir_load_store_vectorize_test, ubo_overlapping_vec4_vec4_unused_components)
1949 {
1950    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
1951    create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 4);
1952    movs[0x1]->def.num_components = 1;
1953    movs[0x2]->def.num_components = 1;
1954 
1955    nir_validate_shader(b->shader, NULL);
1956    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
1957 
1958    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1959    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
1960 
1961    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
1962    ASSERT_EQ(load->def.bit_size, 32);
1963    ASSERT_EQ(load->def.num_components, 2);
1964    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
1965    ASSERT_EQ(nir_intrinsic_range(load), 8);
1966    ASSERT_EQ(nir_def_components_read(&load->def), 0x3);
1967    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1968    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1969    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1970 }
1971 
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4)1972 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4)
1973 {
1974    nir_def *offset = nir_load_local_invocation_index(b);
1975    offset = nir_imul_imm(b, offset, 16);
1976    offset = nir_iadd_imm(b, offset, 4);
1977    nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
1978                                                     0x1);
1979 
1980    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1981    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1982    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1983 }
1984 
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4_swapped)1985 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4_swapped)
1986 {
1987    nir_def *offset = nir_load_local_invocation_index(b);
1988    offset = nir_iadd_imm(b, offset, 1);
1989    offset = nir_imul_imm(b, offset, 16);
1990    offset = nir_iadd_imm(b, offset, 4);
1991    nir_intrinsic_instr *load =
1992       create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
1993 
1994    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1995    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1996    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1997 }
1998 
1999 /* Check offset % mul != 0 */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_20)2000 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_20)
2001 {
2002    nir_def *offset = nir_load_local_invocation_index(b);
2003    offset = nir_imul_imm(b, offset, 16);
2004    offset = nir_iadd_imm(b, offset, 20);
2005    nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
2006                                                     0x1);
2007 
2008    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2009    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
2010    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
2011 }
2012 
2013 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_24_4)2014 TEST_F(nir_load_store_vectorize_test, ubo_alignment_24_4)
2015 {
2016    nir_def *offset = nir_load_local_invocation_index(b);
2017    offset = nir_imul_imm(b, offset, 24);
2018    offset = nir_iadd_imm(b, offset, 4);
2019    nir_intrinsic_instr *load =
2020       create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
2021 
2022    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2023    EXPECT_EQ(nir_intrinsic_align_mul(load), 8);
2024    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
2025 }
2026 
2027 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_64_16_8)2028 TEST_F(nir_load_store_vectorize_test, ubo_alignment_64_16_8)
2029 {
2030    nir_def *x = nir_imul_imm(b, nir_load_local_invocation_index(b), 64);
2031    nir_def *y = nir_imul_imm(b, nir_load_instance_id(b), 16);
2032    nir_def *offset = nir_iadd(b, x, y);
2033    offset = nir_iadd_imm(b, offset, 8);
2034    nir_intrinsic_instr *load =
2035       create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
2036 
2037    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2038    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
2039    EXPECT_EQ(nir_intrinsic_align_offset(load), 8);
2040 }
2041 
TEST_F(nir_load_store_vectorize_test,ubo_alignment_const_100)2042 TEST_F(nir_load_store_vectorize_test, ubo_alignment_const_100)
2043 {
2044    nir_intrinsic_instr *load =
2045       create_indirect_load(nir_var_mem_ubo, 0, nir_imm_int(b, 100), 0x1);
2046 
2047    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2048    EXPECT_EQ(nir_intrinsic_align_mul(load), NIR_ALIGN_MUL_MAX);
2049    EXPECT_EQ(nir_intrinsic_align_offset(load), 100);
2050 }
2051 
TEST_F(nir_load_store_vectorize_test,ubo_overfetch_vec6_as_vec8)2052 TEST_F(nir_load_store_vectorize_test, ubo_overfetch_vec6_as_vec8)
2053 {
2054    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
2055    create_load(nir_var_mem_ubo, 0, 16, 0x2, 32, 2);
2056 
2057    nir_validate_shader(b->shader, NULL);
2058    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2059 
2060    this->overfetch = true;
2061    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2062    this->overfetch = false;
2063 
2064    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
2065 
2066    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
2067    ASSERT_EQ(load->def.bit_size, 32);
2068    ASSERT_EQ(load->def.num_components, 8);
2069    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
2070    ASSERT_EQ(nir_intrinsic_range(load), 32);
2071    ASSERT_EQ(nir_def_components_read(&load->def), 0x3f);
2072    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
2073    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xyzw");
2074    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "ef");
2075 }
2076 
TEST_F(nir_load_store_vectorize_test,ubo_overfetch_vec7_as_vec8)2077 TEST_F(nir_load_store_vectorize_test, ubo_overfetch_vec7_as_vec8)
2078 {
2079    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
2080    create_load(nir_var_mem_ubo, 0, 16, 0x2, 32, 3);
2081 
2082    nir_validate_shader(b->shader, NULL);
2083    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2084 
2085    this->overfetch = true;
2086    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2087    this->overfetch = false;
2088 
2089    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
2090 
2091    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
2092    ASSERT_EQ(load->def.bit_size, 32);
2093    ASSERT_EQ(load->def.num_components, 8);
2094    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
2095    ASSERT_EQ(nir_intrinsic_range(load), 32);
2096    ASSERT_EQ(nir_def_components_read(&load->def), 0x7f);
2097    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
2098    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xyzw");
2099    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "efg");
2100 }
2101 
TEST_F(nir_load_store_vectorize_test,ubo_overfetch_vec7_as_vec8_disallowed)2102 TEST_F(nir_load_store_vectorize_test, ubo_overfetch_vec7_as_vec8_disallowed)
2103 {
2104    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
2105    create_load(nir_var_mem_ubo, 0, 16, 0x2, 32, 3);
2106 
2107    nir_validate_shader(b->shader, NULL);
2108    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2109 
2110    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2111    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2112 }
2113 
TEST_F(nir_load_store_vectorize_test,ubo_overfetch_vec11_as_vec16)2114 TEST_F(nir_load_store_vectorize_test, ubo_overfetch_vec11_as_vec16)
2115 {
2116    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 8);
2117    create_load(nir_var_mem_ubo, 0, 32, 0x2, 32, 3);
2118 
2119    nir_validate_shader(b->shader, NULL);
2120    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2121 
2122    this->overfetch = true;
2123    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2124    this->overfetch = false;
2125 
2126    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
2127 
2128    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
2129    ASSERT_EQ(load->def.bit_size, 32);
2130    ASSERT_EQ(load->def.num_components, 16);
2131    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
2132    ASSERT_EQ(nir_intrinsic_range(load), 64);
2133    ASSERT_EQ(nir_def_components_read(&load->def), 0x7ff);
2134    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
2135    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xyzwefgh");
2136    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "ijk");
2137 }
2138 
TEST_F(nir_load_store_vectorize_test,ubo_vec1_vec7as8)2139 TEST_F(nir_load_store_vectorize_test, ubo_vec1_vec7as8)
2140 {
2141    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 1);
2142    create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 8);
2143    movs[0x2]->src[0].swizzle[7] = 6; /* use only components 0..6 */
2144 
2145    nir_validate_shader(b->shader, NULL);
2146    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2147 
2148    this->max_components = 8;
2149    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2150    this->max_components = 4;
2151 
2152    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
2153 
2154    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
2155    ASSERT_EQ(load->def.bit_size, 32);
2156    ASSERT_EQ(load->def.num_components, 8);
2157    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
2158    ASSERT_EQ(nir_intrinsic_range(load), 32);
2159    ASSERT_EQ(nir_def_components_read(&load->def), 0xff);
2160    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
2161    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
2162    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yzwefghh");
2163 }
2164 
TEST_F(nir_load_store_vectorize_test,ubo_vec7as8_vec1)2165 TEST_F(nir_load_store_vectorize_test, ubo_vec7as8_vec1)
2166 {
2167    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 8);
2168    movs[0x1]->src[0].swizzle[0] = 1; /* use only components 1..7 */
2169    create_load(nir_var_mem_ubo, 0, 32, 0x2, 32, 1);
2170 
2171    nir_validate_shader(b->shader, NULL);
2172    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2173 
2174    this->max_components = 8;
2175    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2176    this->max_components = 4;
2177 
2178    /* TODO: This is not merged by the pass, but we could implement it. */
2179    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2180 }
2181 
TEST_F(nir_load_store_vectorize_test,ubo_vec2_hole1_vec1_disallowed)2182 TEST_F(nir_load_store_vectorize_test, ubo_vec2_hole1_vec1_disallowed)
2183 {
2184    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
2185    create_load(nir_var_mem_ubo, 0, 12, 0x2, 32, 1);
2186 
2187    nir_validate_shader(b->shader, NULL);
2188    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2189 
2190    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2191    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2192 }
2193 
TEST_F(nir_load_store_vectorize_test,ubo_vec2_hole1_vec1)2194 TEST_F(nir_load_store_vectorize_test, ubo_vec2_hole1_vec1)
2195 {
2196    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
2197    create_load(nir_var_mem_ubo, 0, 12, 0x2, 32, 1);
2198 
2199    nir_validate_shader(b->shader, NULL);
2200    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2201 
2202    this->max_hole_size = 4;
2203    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2204    this->max_hole_size = 0;
2205 
2206    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
2207 
2208    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
2209    ASSERT_EQ(load->def.bit_size, 32);
2210    ASSERT_EQ(load->def.num_components, 4);
2211    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
2212    ASSERT_EQ(nir_intrinsic_range(load), 16);
2213    ASSERT_EQ(nir_def_components_read(&load->def), 1 | 2 | 8);
2214    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
2215    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
2216    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "w");
2217 }
2218 
TEST_F(nir_load_store_vectorize_test,ubo_vec2_hole2_vec4_disallowed)2219 TEST_F(nir_load_store_vectorize_test, ubo_vec2_hole2_vec4_disallowed)
2220 {
2221    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
2222    create_load(nir_var_mem_ubo, 0, 16, 0x2, 32, 1);
2223 
2224    nir_validate_shader(b->shader, NULL);
2225    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2226 
2227    /* The pass only allows 4-byte holes. */
2228    this->max_hole_size = 8;
2229    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2230    this->max_hole_size = 0;
2231 
2232    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2233 }
2234 
TEST_F(nir_load_store_vectorize_test,ubo_vec3_hole1_vec3)2235 TEST_F(nir_load_store_vectorize_test, ubo_vec3_hole1_vec3)
2236 {
2237    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 3);
2238    create_load(nir_var_mem_ubo, 0, 16, 0x2, 32, 3);
2239 
2240    nir_validate_shader(b->shader, NULL);
2241    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
2242 
2243    this->overfetch = true;
2244    this->max_hole_size = 4;
2245    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2246    this->max_hole_size = 0;
2247    this->overfetch = false;
2248 
2249    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
2250 
2251    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
2252    ASSERT_EQ(load->def.bit_size, 32);
2253    ASSERT_EQ(load->def.num_components, 8);
2254    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
2255    ASSERT_EQ(nir_intrinsic_range(load), 32);
2256    ASSERT_EQ(nir_def_components_read(&load->def), 0x77);
2257    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
2258    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xyz");
2259    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "efg");
2260 }
2261