• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nir.h"
6 #include "nir_builder.h"
7 
8 /*
9  * It is challenging to optimize the complex deref chains resulting from
10  * nontrivial OpenCL C constructs. nir_opt_deref generally does a good job, but
11  * occassionally we are forced to lower temporaries to scratch anyway. LLVM's
12  * recent embrace of opaque pointers have exacerbated this problem.
13  *
14  * The "proper" solutions here are to smarten nir_opt_deref and/or to use LLVM's
15  * own optimization passes to clean up the input IR. Both of these are
16  * challenging projects for the medium-term.
17  *
18  * In the short term, this pass is a stopgap. After lowering away all derefs to
19  * scratch, this pass can "unlower" scratch memory back into nir_variable
20  * access. The lower->unlower pair is lossy. The point is not to reconstruct the
21  * original derefs (that we failed to optimize), but instead just to model array
22  * access that other NIR passes can optimize. The resulting array accesses will
23  * generally optimize out if there are no indirects, or can be lowered to bcsel
24  * instead of scratch if that's preferable for a driver.
25  */
26 
27 /*
28  * This pass operates only on 32-bit scalars, so this callback instructs
29  * nir_lower_mem_access_bit_sizes_options to turn all scratch access into
30  * 32-bit scalars. We don't want to use 8-bit accesses, since that would be
31  * challenging to optimize the resulting pack/unpack on some drivers. Larger
32  * 32-bit access however requires nontrivial tracking to extract/insert. Since
33  * nir_lower_mem_access_bit_sizes already has that code, we use it in this pass
34  * instead of NIH'ing it here.
35  */
36 static nir_mem_access_size_align
mem_access_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align,uint32_t align_offset,bool offset_is_const,enum gl_access_qualifier access,const void * cb_data)37 mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size,
38               uint32_t align, uint32_t align_offset, bool offset_is_const,
39               enum gl_access_qualifier access, const void *cb_data)
40 {
41    return (nir_mem_access_size_align){
42       .num_components = 1,
43       .bit_size = 32,
44       .align = 4,
45       .shift = nir_mem_access_shift_method_scalar,
46    };
47 }
48 
49 /*
50  * Thanks to nir_lower_mem_access_bit_sizes, we can lower scratch intrinsics 1:1
51  * to word-based array access.
52  */
53 static bool
lower_scratch_to_var(nir_builder * b,nir_intrinsic_instr * intr,void * data)54 lower_scratch_to_var(nir_builder *b, nir_intrinsic_instr *intr, void *data)
55 {
56    nir_variable *scratch = data;
57    b->cursor = nir_before_instr(&intr->instr);
58 
59    if (intr->intrinsic == nir_intrinsic_store_scratch) {
60       nir_def *index = nir_udiv_aligned_4(b, intr->src[1].ssa);
61       nir_def *value = intr->src[0].ssa;
62 
63       nir_store_array_var(b, scratch, index, value, nir_component_mask(1));
64    } else if (intr->intrinsic == nir_intrinsic_load_scratch) {
65       nir_def *index = nir_udiv_aligned_4(b, intr->src[0].ssa);
66       nir_def_rewrite_uses(&intr->def, nir_load_array_var(b, scratch, index));
67    } else {
68       return false;
69    }
70 
71    nir_instr_remove(&intr->instr);
72    return true;
73 }
74 
75 bool
nir_lower_scratch_to_var(nir_shader * nir)76 nir_lower_scratch_to_var(nir_shader *nir)
77 {
78    unsigned words = DIV_ROUND_UP(nir->scratch_size, 4);
79 
80    /* Early exit in the common case that scratch is not used. */
81    if (words == 0) {
82       return false;
83    }
84 
85    /* First, lower bit sizes and vectors as required by lower_scratch_to_var */
86    nir_lower_mem_access_bit_sizes_options lower_mem_access_options = {
87       .modes = nir_var_shader_temp | nir_var_function_temp,
88       .callback = mem_access_cb,
89    };
90    NIR_PASS(_, nir, nir_lower_mem_access_bit_sizes, &lower_mem_access_options);
91 
92    /* Then, back scratch by an array of words and turn all scratch access into
93     * array access.
94     */
95    nir_function_impl *entry = nir_shader_get_entrypoint(nir);
96    const glsl_type *type_ = glsl_array_type(glsl_uint_type(), words, 1);
97    nir_variable *var = nir_local_variable_create(entry, type_, "scratch");
98    nir_shader_intrinsics_pass(nir, lower_scratch_to_var,
99                               nir_metadata_control_flow, var);
100    nir->scratch_size = 0;
101 
102    /* Now clean up the mess we made */
103    bool progress;
104    do {
105       progress = false;
106       NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
107       NIR_PASS(progress, nir, nir_opt_constant_folding);
108       NIR_PASS(progress, nir, nir_opt_algebraic);
109       NIR_PASS(progress, nir, nir_copy_prop);
110       NIR_PASS(progress, nir, nir_opt_cse);
111       NIR_PASS(progress, nir, nir_opt_dce);
112    } while (progress);
113 
114    return true;
115 }
116