• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <array>
26 #include <unordered_map>
27 #include "aco_ir.h"
28 #include "vulkan/radv_shader_args.h"
29 
30 namespace aco {
31 
32 struct shader_io_state {
33    uint8_t mask[VARYING_SLOT_MAX];
34    Temp temps[VARYING_SLOT_MAX * 4u];
35 
shader_io_stateshader_io_state36    shader_io_state() {
37       memset(mask, 0, sizeof(mask));
38       std::fill_n(temps, VARYING_SLOT_MAX * 4u, Temp(0, RegClass::v1));
39    }
40 };
41 
42 enum resource_flags {
43    has_glc_vmem_load = 0x1,
44    has_nonglc_vmem_load = 0x2,
45    has_glc_vmem_store = 0x4,
46    has_nonglc_vmem_store = 0x8,
47 
48    has_vmem_store = has_glc_vmem_store | has_nonglc_vmem_store,
49    has_vmem_loadstore = has_vmem_store | has_glc_vmem_load | has_nonglc_vmem_load,
50    has_nonglc_vmem_loadstore = has_nonglc_vmem_load | has_nonglc_vmem_store,
51 
52    buffer_is_restrict = 0x10,
53 };
54 
55 struct isel_context {
56    const struct radv_nir_compiler_options *options;
57    struct radv_shader_args *args;
58    Program *program;
59    nir_shader *shader;
60    uint32_t constant_data_offset;
61    Block *block;
62    uint32_t first_temp_id;
63    std::unordered_map<unsigned, std::array<Temp,NIR_MAX_VEC_COMPONENTS>> allocated_vec;
64    Stage stage;
65    bool has_gfx10_wave64_bpermute = false;
66    struct {
67       bool has_branch;
68       uint16_t loop_nest_depth = 0;
69       struct {
70          unsigned header_idx;
71          Block* exit;
72          bool has_divergent_continue = false;
73          bool has_divergent_branch = false;
74       } parent_loop;
75       struct {
76          bool is_divergent = false;
77       } parent_if;
78       bool exec_potentially_empty_discard = false; /* set to false when loop_nest_depth==0 && parent_if.is_divergent==false */
79       uint16_t exec_potentially_empty_break_depth = UINT16_MAX;
80       /* Set to false when loop_nest_depth==exec_potentially_empty_break_depth
81        * and parent_if.is_divergent==false. Called _break but it's also used for
82        * loop continues. */
83       bool exec_potentially_empty_break = false;
84       std::unique_ptr<unsigned[]> nir_to_aco; /* NIR block index to ACO block index */
85    } cf_info;
86 
87    /* NIR range analysis. */
88    struct hash_table *range_ht;
89    nir_unsigned_upper_bound_config ub_config;
90 
91    uint32_t resource_flag_offsets[MAX_SETS];
92    std::vector<uint8_t> buffer_resource_flags;
93 
94    Temp arg_temps[AC_MAX_ARGS];
95 
96    /* FS inputs */
97    Temp persp_centroid, linear_centroid;
98 
99    /* GS inputs */
100    bool ngg_nogs_early_prim_export = false;
101    bool ngg_gs_early_alloc = false;
102    bool ngg_gs_known_vtxcnt[4] = {false, false, false, false};
103    Temp gs_wave_id;
104    unsigned ngg_gs_emit_addr = 0;
105    unsigned ngg_gs_emit_vtx_bytes = 0;
106    unsigned ngg_gs_scratch_addr = 0;
107    unsigned ngg_gs_primflags_offset = 0;
108    int ngg_gs_const_vtxcnt[4];
109    int ngg_gs_const_prmcnt[4];
110 
111    /* VS output information */
112    bool export_clip_dists;
113    unsigned num_clip_distances;
114    unsigned num_cull_distances;
115 
116    /* tessellation information */
117    unsigned tcs_tess_lvl_out_loc;
118    unsigned tcs_tess_lvl_in_loc;
119    uint64_t tcs_temp_only_inputs;
120    uint32_t tcs_num_inputs;
121    uint32_t tcs_num_outputs;
122    uint32_t tcs_num_patch_outputs;
123    uint32_t tcs_num_patches;
124    bool tcs_in_out_eq = false;
125 
126    /* I/O information */
127    shader_io_state inputs;
128    shader_io_state outputs;
129 };
130 
get_arg(isel_context * ctx,struct ac_arg arg)131 inline Temp get_arg(isel_context *ctx, struct ac_arg arg)
132 {
133    assert(arg.used);
134    return ctx->arg_temps[arg.arg_index];
135 }
136 
get_buffer_resource_flags(isel_context * ctx,nir_ssa_def * def,unsigned access,uint8_t ** flags,uint32_t * count)137 inline void get_buffer_resource_flags(isel_context *ctx, nir_ssa_def *def, unsigned access,
138                                uint8_t **flags, uint32_t *count)
139 {
140    int desc_set = -1;
141    unsigned binding = 0;
142 
143    if (!def) {
144       /* global resources are considered aliasing with all other buffers and
145        * buffer images */
146       // TODO: only merge flags of resources which can really alias.
147    } else if (def->parent_instr->type == nir_instr_type_alu) {
148       nir_alu_instr* mov_instr = nir_instr_as_alu(def->parent_instr);
149       if (mov_instr->op == nir_op_mov && mov_instr->src[0].swizzle[0] == 0 &&
150           mov_instr->src[0].src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
151          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(mov_instr->src[0].src.ssa->parent_instr);
152          if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
153             desc_set = nir_intrinsic_desc_set(intrin);
154             binding = nir_intrinsic_binding(intrin);
155          }
156       }
157    } else if (def->parent_instr->type == nir_instr_type_deref) {
158       nir_deref_instr *deref = nir_instr_as_deref(def->parent_instr);
159       assert(deref->type->is_image());
160       if (deref->type->sampler_dimensionality != GLSL_SAMPLER_DIM_BUF) {
161          *flags = NULL;
162          *count = 0;
163          return;
164       }
165 
166       nir_variable *var = nir_deref_instr_get_variable(deref);
167       desc_set = var->data.descriptor_set;
168       binding = var->data.binding;
169    }
170 
171    if (desc_set < 0) {
172       *flags = ctx->buffer_resource_flags.data();
173       *count = ctx->buffer_resource_flags.size();
174       return;
175    }
176 
177    unsigned set_offset = ctx->resource_flag_offsets[desc_set];
178 
179    if (!(ctx->buffer_resource_flags[set_offset + binding] & buffer_is_restrict)) {
180       /* Non-restrict buffers alias only with other non-restrict buffers.
181        * We reserve flags[0] for these. */
182       *flags = ctx->buffer_resource_flags.data();
183       *count = 1;
184       return;
185    }
186 
187    *flags = ctx->buffer_resource_flags.data() + set_offset + binding;
188    *count = 1;
189 }
190 
get_all_buffer_resource_flags(isel_context * ctx,nir_ssa_def * def,unsigned access)191 inline uint8_t get_all_buffer_resource_flags(isel_context *ctx, nir_ssa_def *def, unsigned access)
192 {
193    uint8_t *flags;
194    uint32_t count;
195    get_buffer_resource_flags(ctx, def, access, &flags, &count);
196 
197    uint8_t res = 0;
198    for (unsigned i = 0; i < count; i++)
199       res |= flags[i];
200    return res;
201 }
202 
can_subdword_ssbo_store_use_smem(nir_intrinsic_instr * intrin)203 inline bool can_subdword_ssbo_store_use_smem(nir_intrinsic_instr *intrin)
204 {
205    unsigned wrmask = nir_intrinsic_write_mask(intrin);
206    if (util_last_bit(wrmask) != util_bitcount(wrmask) ||
207        util_bitcount(wrmask) * intrin->src[0].ssa->bit_size % 32 ||
208        util_bitcount(wrmask) != intrin->src[0].ssa->num_components)
209       return false;
210 
211    if (nir_intrinsic_align_mul(intrin) % 4 || nir_intrinsic_align_offset(intrin) % 4)
212       return false;
213 
214    return true;
215 }
216 
217 void init_context(isel_context *ctx, nir_shader *shader);
218 void cleanup_context(isel_context *ctx);
219 
220 isel_context
221 setup_isel_context(Program* program,
222                    unsigned shader_count,
223                    struct nir_shader *const *shaders,
224                    ac_shader_config* config,
225                    struct radv_shader_args *args,
226                    bool is_gs_copy_shader);
227 
228 }
229