• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Connor Abbott
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "nir.h"
26 #include "nir_vla.h"
27 #include "nir_builder.h"
28 #include "util/u_dynarray.h"
29 
30 #define HASH(hash, data) XXH32(&data, sizeof(data), hash)
31 
32 static uint32_t
hash_src(uint32_t hash,const nir_src * src)33 hash_src(uint32_t hash, const nir_src *src)
34 {
35    assert(src->is_ssa);
36    void *hash_data = nir_src_is_const(*src) ? NULL : src->ssa;
37 
38    return HASH(hash, hash_data);
39 }
40 
41 static uint32_t
hash_alu_src(uint32_t hash,const nir_alu_src * src)42 hash_alu_src(uint32_t hash, const nir_alu_src *src)
43 {
44    assert(!src->abs && !src->negate);
45 
46    /* intentionally don't hash swizzle */
47 
48    return hash_src(hash, &src->src);
49 }
50 
51 static uint32_t
hash_alu(uint32_t hash,const nir_alu_instr * instr)52 hash_alu(uint32_t hash, const nir_alu_instr *instr)
53 {
54    hash = HASH(hash, instr->op);
55 
56    hash = HASH(hash, instr->dest.dest.ssa.bit_size);
57 
58    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
59       hash = hash_alu_src(hash, &instr->src[i]);
60 
61    return hash;
62 }
63 
64 static uint32_t
hash_instr(const nir_instr * instr)65 hash_instr(const nir_instr *instr)
66 {
67    uint32_t hash = 0;
68 
69    switch (instr->type) {
70    case nir_instr_type_alu:
71       return hash_alu(hash, nir_instr_as_alu(instr));
72    default:
73       unreachable("bad instruction type");
74    }
75 }
76 
77 static bool
srcs_equal(const nir_src * src1,const nir_src * src2)78 srcs_equal(const nir_src *src1, const nir_src *src2)
79 {
80    assert(src1->is_ssa);
81    assert(src2->is_ssa);
82 
83    return src1->ssa == src2->ssa ||
84       nir_src_is_const(*src1) == nir_src_is_const(*src2);
85 }
86 
87 static bool
alu_srcs_equal(const nir_alu_src * src1,const nir_alu_src * src2)88 alu_srcs_equal(const nir_alu_src *src1, const nir_alu_src *src2)
89 {
90    assert(!src1->abs);
91    assert(!src1->negate);
92    assert(!src2->abs);
93    assert(!src2->negate);
94 
95    return srcs_equal(&src1->src, &src2->src);
96 }
97 
98 static bool
instrs_equal(const nir_instr * instr1,const nir_instr * instr2)99 instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
100 {
101    switch (instr1->type) {
102    case nir_instr_type_alu: {
103       nir_alu_instr *alu1 = nir_instr_as_alu(instr1);
104       nir_alu_instr *alu2 = nir_instr_as_alu(instr2);
105 
106       if (alu1->op != alu2->op)
107          return false;
108 
109       if (alu1->dest.dest.ssa.bit_size != alu2->dest.dest.ssa.bit_size)
110          return false;
111 
112       for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
113          if (!alu_srcs_equal(&alu1->src[i], &alu2->src[i]))
114             return false;
115       }
116 
117       return true;
118    }
119 
120    default:
121       unreachable("bad instruction type");
122    }
123 }
124 
125 static bool
instr_can_rewrite(nir_instr * instr)126 instr_can_rewrite(nir_instr *instr)
127 {
128    switch (instr->type) {
129    case nir_instr_type_alu: {
130       nir_alu_instr *alu = nir_instr_as_alu(instr);
131 
132       /* Don't try and vectorize mov's. Either they'll be handled by copy
133        * prop, or they're actually necessary and trying to vectorize them
134        * would result in fighting with copy prop.
135        */
136       if (alu->op == nir_op_mov)
137          return false;
138 
139       if (nir_op_infos[alu->op].output_size != 0)
140          return false;
141 
142       for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
143          if (nir_op_infos[alu->op].input_sizes[i] != 0)
144             return false;
145       }
146 
147       return true;
148    }
149 
150    /* TODO support phi nodes */
151    default:
152       break;
153    }
154 
155    return false;
156 }
157 
158 /*
159  * Tries to combine two instructions whose sources are different components of
160  * the same instructions into one vectorized instruction. Note that instr1
161  * should dominate instr2.
162  */
163 
164 static nir_instr *
instr_try_combine(struct nir_shader * nir,nir_instr * instr1,nir_instr * instr2,nir_opt_vectorize_cb filter,void * data)165 instr_try_combine(struct nir_shader *nir, nir_instr *instr1, nir_instr *instr2,
166                   nir_opt_vectorize_cb filter, void *data)
167 {
168    assert(instr1->type == nir_instr_type_alu);
169    assert(instr2->type == nir_instr_type_alu);
170    nir_alu_instr *alu1 = nir_instr_as_alu(instr1);
171    nir_alu_instr *alu2 = nir_instr_as_alu(instr2);
172 
173    assert(alu1->dest.dest.ssa.bit_size == alu2->dest.dest.ssa.bit_size);
174    unsigned alu1_components = alu1->dest.dest.ssa.num_components;
175    unsigned alu2_components = alu2->dest.dest.ssa.num_components;
176    unsigned total_components = alu1_components + alu2_components;
177 
178    if (total_components > 4)
179       return NULL;
180 
181    if (nir->options->vectorize_vec2_16bit &&
182        (total_components > 2 || alu1->dest.dest.ssa.bit_size != 16))
183       return NULL;
184 
185    if (filter && !filter(&alu1->instr, &alu2->instr, data))
186       return NULL;
187 
188    nir_builder b;
189    nir_builder_init(&b, nir_cf_node_get_function(&instr1->block->cf_node));
190    b.cursor = nir_after_instr(instr1);
191 
192    nir_alu_instr *new_alu = nir_alu_instr_create(b.shader, alu1->op);
193    nir_ssa_dest_init(&new_alu->instr, &new_alu->dest.dest,
194                      total_components, alu1->dest.dest.ssa.bit_size, NULL);
195    new_alu->dest.write_mask = (1 << total_components) - 1;
196 
197    /* If either channel is exact, we have to preserve it even if it's
198     * not optimal for other channels.
199     */
200    new_alu->exact = alu1->exact || alu2->exact;
201 
202    /* If all channels don't wrap, we can say that the whole vector doesn't
203     * wrap.
204     */
205    new_alu->no_signed_wrap = alu1->no_signed_wrap && alu2->no_signed_wrap;
206    new_alu->no_unsigned_wrap = alu1->no_unsigned_wrap && alu2->no_unsigned_wrap;
207 
208    for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
209       /* handle constant merging case */
210       if (alu1->src[i].src.ssa != alu2->src[i].src.ssa) {
211          nir_const_value *c1 = nir_src_as_const_value(alu1->src[i].src);
212          nir_const_value *c2 = nir_src_as_const_value(alu2->src[i].src);
213          assert(c1 && c2);
214          nir_const_value value[NIR_MAX_VEC_COMPONENTS];
215          unsigned bit_size = alu1->src[i].src.ssa->bit_size;
216 
217          for (unsigned j = 0; j < total_components; j++) {
218             value[j].u64 = j < alu1_components ?
219                               c1[alu1->src[i].swizzle[j]].u64 :
220                               c2[alu2->src[i].swizzle[j - alu1_components]].u64;
221          }
222          nir_ssa_def *def = nir_build_imm(&b, total_components, bit_size, value);
223 
224          new_alu->src[i].src = nir_src_for_ssa(def);
225          for (unsigned j = 0; j < total_components; j++)
226             new_alu->src[i].swizzle[j] = j;
227          continue;
228       }
229 
230       new_alu->src[i].src = alu1->src[i].src;
231 
232       for (unsigned j = 0; j < alu1_components; j++)
233          new_alu->src[i].swizzle[j] = alu1->src[i].swizzle[j];
234 
235       for (unsigned j = 0; j < alu2_components; j++) {
236          new_alu->src[i].swizzle[j + alu1_components] =
237             alu2->src[i].swizzle[j];
238       }
239    }
240 
241    nir_builder_instr_insert(&b, &new_alu->instr);
242 
243    unsigned swiz[NIR_MAX_VEC_COMPONENTS];
244    for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
245       swiz[i] = i;
246    nir_ssa_def *new_alu1 = nir_swizzle(&b, &new_alu->dest.dest.ssa, swiz,
247                                        alu1_components);
248 
249    for (unsigned i = 0; i < alu2_components; i++)
250       swiz[i] += alu1_components;
251    nir_ssa_def *new_alu2 = nir_swizzle(&b, &new_alu->dest.dest.ssa, swiz,
252                                        alu2_components);
253 
254    nir_foreach_use_safe(src, &alu1->dest.dest.ssa) {
255       if (src->parent_instr->type == nir_instr_type_alu) {
256          /* For ALU instructions, rewrite the source directly to avoid a
257           * round-trip through copy propagation.
258           */
259 
260          nir_instr_rewrite_src(src->parent_instr, src,
261                                nir_src_for_ssa(&new_alu->dest.dest.ssa));
262       } else {
263          nir_instr_rewrite_src(src->parent_instr, src,
264                                nir_src_for_ssa(new_alu1));
265       }
266    }
267 
268    nir_foreach_if_use_safe(src, &alu1->dest.dest.ssa) {
269       nir_if_rewrite_condition(src->parent_if, nir_src_for_ssa(new_alu1));
270    }
271 
272    assert(list_is_empty(&alu1->dest.dest.ssa.uses));
273    assert(list_is_empty(&alu1->dest.dest.ssa.if_uses));
274 
275    nir_foreach_use_safe(src, &alu2->dest.dest.ssa) {
276       if (src->parent_instr->type == nir_instr_type_alu) {
277          /* For ALU instructions, rewrite the source directly to avoid a
278           * round-trip through copy propagation.
279           */
280 
281          nir_alu_instr *use = nir_instr_as_alu(src->parent_instr);
282 
283          unsigned src_index = 5;
284          for (unsigned i = 0; i < nir_op_infos[use->op].num_inputs; i++) {
285             if (&use->src[i].src == src) {
286                src_index = i;
287                break;
288             }
289          }
290          assert(src_index != 5);
291 
292          nir_instr_rewrite_src(src->parent_instr, src,
293                                nir_src_for_ssa(&new_alu->dest.dest.ssa));
294 
295          for (unsigned i = 0;
296               i < nir_ssa_alu_instr_src_components(use, src_index); i++) {
297             use->src[src_index].swizzle[i] += alu1_components;
298          }
299       } else {
300          nir_instr_rewrite_src(src->parent_instr, src,
301                                nir_src_for_ssa(new_alu2));
302       }
303    }
304 
305    nir_foreach_if_use_safe(src, &alu2->dest.dest.ssa) {
306       nir_if_rewrite_condition(src->parent_if, nir_src_for_ssa(new_alu2));
307    }
308 
309    assert(list_is_empty(&alu2->dest.dest.ssa.uses));
310    assert(list_is_empty(&alu2->dest.dest.ssa.if_uses));
311 
312    nir_instr_remove(instr1);
313    nir_instr_remove(instr2);
314 
315    return &new_alu->instr;
316 }
317 
318 /*
319  * Use an array to represent a stack of instructions that are equivalent.
320  *
321  * We push and pop instructions off the stack in dominance order. The first
322  * element dominates the second element which dominates the third, etc. When
323  * trying to add to the stack, first we try and combine the instruction with
324  * each of the instructions on the stack and, if successful, replace the
325  * instruction on the stack with the newly-combined instruction.
326  */
327 
328 static struct util_dynarray *
vec_instr_stack_create(void * mem_ctx)329 vec_instr_stack_create(void *mem_ctx)
330 {
331    struct util_dynarray *stack = ralloc(mem_ctx, struct util_dynarray);
332    util_dynarray_init(stack, mem_ctx);
333    return stack;
334 }
335 
336 /* returns true if we were able to successfully replace the instruction */
337 
338 static bool
vec_instr_stack_push(struct nir_shader * nir,struct util_dynarray * stack,nir_instr * instr,nir_opt_vectorize_cb filter,void * data)339 vec_instr_stack_push(struct nir_shader *nir, struct util_dynarray *stack,
340                      nir_instr *instr,
341                      nir_opt_vectorize_cb filter, void *data)
342 {
343    /* Walk the stack from child to parent to make live ranges shorter by
344     * matching the closest thing we can
345     */
346    util_dynarray_foreach_reverse(stack, nir_instr *, stack_instr) {
347       nir_instr *new_instr = instr_try_combine(nir, *stack_instr, instr,
348                                                filter, data);
349       if (new_instr) {
350          *stack_instr = new_instr;
351          return true;
352       }
353    }
354 
355    util_dynarray_append(stack, nir_instr *, instr);
356    return false;
357 }
358 
359 static void
vec_instr_stack_pop(struct util_dynarray * stack,nir_instr * instr)360 vec_instr_stack_pop(struct util_dynarray *stack, nir_instr *instr)
361 {
362    ASSERTED nir_instr *last = util_dynarray_pop(stack, nir_instr *);
363    assert(last == instr);
364 }
365 
366 static bool
cmp_func(const void * data1,const void * data2)367 cmp_func(const void *data1, const void *data2)
368 {
369    const struct util_dynarray *arr1 = data1;
370    const struct util_dynarray *arr2 = data2;
371 
372    const nir_instr *instr1 = *(nir_instr **)util_dynarray_begin(arr1);
373    const nir_instr *instr2 = *(nir_instr **)util_dynarray_begin(arr2);
374 
375    return instrs_equal(instr1, instr2);
376 }
377 
378 static uint32_t
hash_stack(const void * data)379 hash_stack(const void *data)
380 {
381    const struct util_dynarray *stack = data;
382    const nir_instr *first = *(nir_instr **)util_dynarray_begin(stack);
383    return hash_instr(first);
384 }
385 
386 static struct set *
vec_instr_set_create(void)387 vec_instr_set_create(void)
388 {
389    return _mesa_set_create(NULL, hash_stack, cmp_func);
390 }
391 
392 static void
vec_instr_set_destroy(struct set * instr_set)393 vec_instr_set_destroy(struct set *instr_set)
394 {
395    _mesa_set_destroy(instr_set, NULL);
396 }
397 
398 static bool
vec_instr_set_add_or_rewrite(struct nir_shader * nir,struct set * instr_set,nir_instr * instr,nir_opt_vectorize_cb filter,void * data)399 vec_instr_set_add_or_rewrite(struct nir_shader *nir, struct set *instr_set,
400                              nir_instr *instr,
401                              nir_opt_vectorize_cb filter, void *data)
402 {
403    if (!instr_can_rewrite(instr))
404       return false;
405 
406    struct util_dynarray *new_stack = vec_instr_stack_create(instr_set);
407    vec_instr_stack_push(nir, new_stack, instr, filter, data);
408 
409    struct set_entry *entry = _mesa_set_search(instr_set, new_stack);
410 
411    if (entry) {
412       ralloc_free(new_stack);
413       struct util_dynarray *stack = (struct util_dynarray *) entry->key;
414       return vec_instr_stack_push(nir, stack, instr, filter, data);
415    }
416 
417    _mesa_set_add(instr_set, new_stack);
418    return false;
419 }
420 
421 static void
vec_instr_set_remove(struct nir_shader * nir,struct set * instr_set,nir_instr * instr,nir_opt_vectorize_cb filter,void * data)422 vec_instr_set_remove(struct nir_shader *nir, struct set *instr_set,
423                      nir_instr *instr, nir_opt_vectorize_cb filter, void *data)
424 {
425    if (!instr_can_rewrite(instr))
426       return;
427 
428    /*
429     * It's pretty unfortunate that we have to do this, but it's a side effect
430     * of the hash set interfaces. The hash set assumes that we're only
431     * interested in storing one equivalent element at a time, and if we try to
432     * insert a duplicate element it will remove the original. We could hack up
433     * the comparison function to "know" which input is an instruction we
434     * passed in and which is an array that's part of the entry, but that
435     * wouldn't work because we need to pass an array to _mesa_set_add() in
436     * vec_instr_add_or_rewrite() above, and _mesa_set_add() will call our
437     * comparison function as well.
438     */
439    struct util_dynarray *temp = vec_instr_stack_create(instr_set);
440    vec_instr_stack_push(nir, temp, instr, filter, data);
441    struct set_entry *entry = _mesa_set_search(instr_set, temp);
442    ralloc_free(temp);
443 
444    if (entry) {
445       struct util_dynarray *stack = (struct util_dynarray *) entry->key;
446 
447       if (util_dynarray_num_elements(stack, nir_instr *) > 1)
448          vec_instr_stack_pop(stack, instr);
449       else
450          _mesa_set_remove(instr_set, entry);
451    }
452 }
453 
454 static bool
vectorize_block(struct nir_shader * nir,nir_block * block,struct set * instr_set,nir_opt_vectorize_cb filter,void * data)455 vectorize_block(struct nir_shader *nir, nir_block *block,
456                 struct set *instr_set,
457                 nir_opt_vectorize_cb filter, void *data)
458 {
459    bool progress = false;
460 
461    nir_foreach_instr_safe(instr, block) {
462       if (vec_instr_set_add_or_rewrite(nir, instr_set, instr, filter, data))
463          progress = true;
464    }
465 
466    for (unsigned i = 0; i < block->num_dom_children; i++) {
467       nir_block *child = block->dom_children[i];
468       progress |= vectorize_block(nir, child, instr_set, filter, data);
469    }
470 
471    nir_foreach_instr_reverse(instr, block)
472       vec_instr_set_remove(nir, instr_set, instr, filter, data);
473 
474    return progress;
475 }
476 
477 static bool
nir_opt_vectorize_impl(struct nir_shader * nir,nir_function_impl * impl,nir_opt_vectorize_cb filter,void * data)478 nir_opt_vectorize_impl(struct nir_shader *nir, nir_function_impl *impl,
479                        nir_opt_vectorize_cb filter, void *data)
480 {
481    struct set *instr_set = vec_instr_set_create();
482 
483    nir_metadata_require(impl, nir_metadata_dominance);
484 
485    bool progress = vectorize_block(nir, nir_start_block(impl), instr_set,
486                                    filter, data);
487 
488    if (progress)
489       nir_metadata_preserve(impl, nir_metadata_block_index |
490                                   nir_metadata_dominance);
491 
492    vec_instr_set_destroy(instr_set);
493    return progress;
494 }
495 
496 bool
nir_opt_vectorize(nir_shader * shader,nir_opt_vectorize_cb filter,void * data)497 nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,
498                   void *data)
499 {
500    bool progress = false;
501 
502    nir_foreach_function(function, shader) {
503       if (function->impl)
504          progress |= nir_opt_vectorize_impl(shader, function->impl, filter, data);
505    }
506 
507    return progress;
508 }
509