• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "agx_bg_eot.h"
7 #include "util/simple_mtx.h"
8 #include "util/u_debug.h"
9 #include "agx_compile.h"
10 #include "agx_device.h"
11 #include "agx_nir.h"
12 #include "agx_nir_texture.h"
13 #include "agx_tilebuffer.h"
14 #include "agx_usc.h"
15 #include "libagx_shaders.h"
16 #include "nir.h"
17 #include "nir_builder.h"
18 #include "nir_intrinsics.h"
19 #include "pool.h"
20 
21 static bool
lower_tex_handle_to_u0(nir_builder * b,nir_intrinsic_instr * intr,void * data)22 lower_tex_handle_to_u0(nir_builder *b, nir_intrinsic_instr *intr, void *data)
23 {
24    if (intr->intrinsic != nir_intrinsic_load_texture_handle_agx)
25       return false;
26 
27    b->cursor = nir_instr_remove(&intr->instr);
28    nir_def_rewrite_uses(
29       &intr->def,
30       nir_vec2(b, nir_imm_int(b, 0), nir_imul_imm(b, intr->src[0].ssa, 24)));
31 
32    return true;
33 }
34 
35 static struct agx_bg_eot_shader *
agx_compile_bg_eot_shader(struct agx_bg_eot_cache * cache,nir_shader * shader,struct agx_shader_key * key,struct agx_tilebuffer_layout * tib)36 agx_compile_bg_eot_shader(struct agx_bg_eot_cache *cache, nir_shader *shader,
37                           struct agx_shader_key *key,
38                           struct agx_tilebuffer_layout *tib)
39 {
40    agx_nir_lower_texture(shader);
41    agx_preprocess_nir(shader, cache->dev->libagx);
42    if (tib) {
43       unsigned bindless_base = 0;
44       agx_nir_lower_tilebuffer(shader, tib, NULL, &bindless_base, NULL, NULL);
45       agx_nir_lower_monolithic_msaa(shader, tib->nr_samples);
46       agx_nir_lower_multisampled_image_store(shader);
47       agx_nir_lower_texture(shader);
48 
49       nir_shader_intrinsics_pass(shader, lower_tex_handle_to_u0,
50                                  nir_metadata_control_flow, NULL);
51    }
52 
53    key->libagx = cache->dev->libagx;
54 
55    struct agx_bg_eot_shader *res = rzalloc(cache->ht, struct agx_bg_eot_shader);
56    struct agx_shader_part bin;
57    agx_compile_shader_nir(shader, key, NULL, &bin);
58 
59    res->info = bin.info;
60    res->ptr = agx_pool_upload_aligned_with_bo(
61       &cache->pool, bin.binary, bin.info.binary_size, 128, &res->bo);
62    free(bin.binary);
63    ralloc_free(shader);
64 
65    return res;
66 }
67 
68 static nir_def *
build_background_op(nir_builder * b,enum agx_bg_eot_op op,unsigned rt,unsigned nr,bool msaa,bool layered)69 build_background_op(nir_builder *b, enum agx_bg_eot_op op, unsigned rt,
70                     unsigned nr, bool msaa, bool layered)
71 {
72    if (op == AGX_BG_LOAD) {
73       nir_def *coord = nir_u2u32(b, nir_load_pixel_coord(b));
74 
75       if (layered) {
76          coord = nir_vec3(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1),
77                           nir_load_layer_id(b));
78       }
79 
80       nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
81       /* The type doesn't matter as long as it matches the store */
82       tex->dest_type = nir_type_uint32;
83       tex->sampler_dim = msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
84       tex->is_array = layered;
85       tex->op = msaa ? nir_texop_txf_ms : nir_texop_txf;
86       tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord);
87 
88       /* Layer is necessarily already in-bounds so we do not want the compiler
89        * to clamp it, which would require reading the descriptor
90        */
91       tex->backend_flags = AGX_TEXTURE_FLAG_NO_CLAMP;
92 
93       if (msaa) {
94          tex->src[1] =
95             nir_tex_src_for_ssa(nir_tex_src_ms_index, nir_load_sample_id(b));
96          b->shader->info.fs.uses_sample_shading = true;
97       } else {
98          tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(b, 0));
99       }
100 
101       tex->coord_components = layered ? 3 : 2;
102       tex->texture_index = rt * 2;
103       nir_def_init(&tex->instr, &tex->def, 4, 32);
104       nir_builder_instr_insert(b, &tex->instr);
105 
106       return nir_trim_vector(b, &tex->def, nr);
107    } else {
108       assert(op == AGX_BG_CLEAR);
109 
110       return nir_load_preamble(b, nr, 32, 4 + (rt * 8));
111    }
112 }
113 
114 static struct agx_bg_eot_shader *
agx_build_background_shader(struct agx_bg_eot_cache * cache,struct agx_bg_eot_key * key)115 agx_build_background_shader(struct agx_bg_eot_cache *cache,
116                             struct agx_bg_eot_key *key)
117 {
118    nir_builder b = nir_builder_init_simple_shader(
119       MESA_SHADER_FRAGMENT, &agx_nir_options, "agx_background");
120    b.shader->info.fs.untyped_color_outputs = true;
121 
122    struct agx_shader_key compiler_key = {
123       .fs.ignore_tib_dependencies = true,
124       .reserved_preamble = key->reserved_preamble,
125    };
126 
127    for (unsigned rt = 0; rt < ARRAY_SIZE(key->op); ++rt) {
128       if (key->op[rt] == AGX_BG_EOT_NONE)
129          continue;
130 
131       unsigned nr = util_format_get_nr_components(key->tib.logical_format[rt]);
132       bool msaa = key->tib.nr_samples > 1;
133       bool layered = key->tib.layered;
134       assert(nr > 0);
135 
136       nir_store_output(
137          &b, build_background_op(&b, key->op[rt], rt, nr, msaa, layered),
138          nir_imm_int(&b, 0), .write_mask = BITFIELD_MASK(nr),
139          .src_type = nir_type_uint32,
140          .io_semantics.location = FRAG_RESULT_DATA0 + rt,
141          .io_semantics.num_slots = 1);
142 
143       b.shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_DATA0 + rt);
144    }
145 
146    return agx_compile_bg_eot_shader(cache, b.shader, &compiler_key, &key->tib);
147 }
148 
149 static struct agx_bg_eot_shader *
agx_build_end_of_tile_shader(struct agx_bg_eot_cache * cache,struct agx_bg_eot_key * key)150 agx_build_end_of_tile_shader(struct agx_bg_eot_cache *cache,
151                              struct agx_bg_eot_key *key)
152 {
153    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
154                                                   &agx_nir_options, "agx_eot");
155 
156    enum glsl_sampler_dim dim =
157       (key->tib.nr_samples > 1) ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
158 
159    for (unsigned rt = 0; rt < ARRAY_SIZE(key->op); ++rt) {
160       if (key->op[rt] == AGX_BG_EOT_NONE)
161          continue;
162 
163       /* The end-of-tile shader is unsuitable to handle spilled render targets.
164        * Skip them. If blits are needed with spilled render targets, other parts
165        * of the driver need to implement them.
166        */
167       if (key->tib.spilled[rt])
168          continue;
169 
170       assert(key->op[rt] == AGX_EOT_STORE);
171       unsigned offset_B = agx_tilebuffer_offset_B(&key->tib, rt);
172 
173       nir_def *layer = nir_undef(&b, 1, 16);
174       if (key->tib.layered)
175          layer = nir_u2u16(&b, nir_load_layer_id(&b));
176 
177       nir_image_store_block_agx(
178          &b, nir_imm_intN_t(&b, rt, 16), nir_imm_intN_t(&b, offset_B, 16),
179          layer, .format = agx_tilebuffer_physical_format(&key->tib, rt),
180          .image_dim = dim, .image_array = key->tib.layered);
181    }
182 
183    struct agx_shader_key compiler_key = {
184       .reserved_preamble = key->reserved_preamble,
185    };
186 
187    return agx_compile_bg_eot_shader(cache, b.shader, &compiler_key, NULL);
188 }
189 
190 struct agx_bg_eot_shader *
agx_get_bg_eot_shader(struct agx_bg_eot_cache * cache,struct agx_bg_eot_key * key)191 agx_get_bg_eot_shader(struct agx_bg_eot_cache *cache,
192                       struct agx_bg_eot_key *key)
193 {
194    struct hash_entry *ent = _mesa_hash_table_search(cache->ht, key);
195    if (ent)
196       return ent->data;
197 
198    struct agx_bg_eot_shader *ret = NULL;
199 
200    for (unsigned rt = 0; rt < ARRAY_SIZE(key->op); ++rt) {
201       if (key->op[rt] == AGX_EOT_STORE) {
202          ret = agx_build_end_of_tile_shader(cache, key);
203          break;
204       }
205    }
206 
207    if (!ret)
208       ret = agx_build_background_shader(cache, key);
209 
210    ret->key = *key;
211    _mesa_hash_table_insert(cache->ht, &ret->key, ret);
212    return ret;
213 }
214 
215 DERIVE_HASH_TABLE(agx_bg_eot_key);
216 
217 void
agx_bg_eot_init(struct agx_bg_eot_cache * cache,struct agx_device * dev)218 agx_bg_eot_init(struct agx_bg_eot_cache *cache, struct agx_device *dev)
219 {
220    agx_pool_init(&cache->pool, dev, "Internal programs",
221                  AGX_BO_EXEC | AGX_BO_LOW_VA, true);
222    simple_mtx_init(&cache->lock, mtx_plain);
223    cache->ht = agx_bg_eot_key_table_create(NULL);
224    cache->dev = dev;
225 }
226 
227 void
agx_bg_eot_cleanup(struct agx_bg_eot_cache * cache)228 agx_bg_eot_cleanup(struct agx_bg_eot_cache *cache)
229 {
230    agx_pool_cleanup(&cache->pool);
231    _mesa_hash_table_destroy(cache->ht, NULL);
232    simple_mtx_destroy(&cache->lock);
233    cache->ht = NULL;
234    cache->dev = NULL;
235 }
236 
237 static struct agx_precompiled_shader *
agx_get_precompiled_locked(struct agx_bg_eot_cache * cache,unsigned program)238 agx_get_precompiled_locked(struct agx_bg_eot_cache *cache, unsigned program)
239 {
240    simple_mtx_assert_locked(&cache->lock);
241 
242    /* It is possible that, while waiting for the lock, another thread uploaded
243     * the shader. Check for that so we don't double-upload.
244     */
245    if (cache->precomp[program])
246       return cache->precomp[program];
247 
248    /* Otherwise, we need to upload. */
249    struct agx_precompiled_shader *p =
250       ralloc(cache->ht, struct agx_precompiled_shader);
251 
252    const uint32_t *bin = cache->dev->libagx_programs[program];
253    const struct agx_precompiled_kernel_info *info = (void *)bin;
254    const void *binary = (const uint8_t *)bin + sizeof(*info);
255 
256    assert(info->main_offset == 0 || program != LIBAGX_HELPER);
257 
258    p->b.workgroup =
259       agx_workgroup(info->workgroup_size[0], info->workgroup_size[1],
260                     info->workgroup_size[2]);
261 
262    p->ptr = agx_pool_upload_aligned_with_bo(&cache->pool, binary,
263                                             info->binary_size, 128, &p->bo);
264 
265    /* Bake launch */
266    agx_pack(&p->b.launch, CDM_LAUNCH_WORD_0, cfg) {
267       cfg.sampler_state_register_count = 1;
268       cfg.uniform_register_count = info->push_count;
269       cfg.preshader_register_count = info->nr_preamble_gprs;
270    }
271 
272    /* Bake USC */
273    struct agx_usc_builder b =
274       agx_usc_builder(p->b.usc.data, sizeof(p->b.usc.data));
275 
276    agx_usc_immediates(&b, &info->rodata, p->ptr);
277 
278    if (info->uses_txf)
279       agx_usc_push_packed(&b, SAMPLER, cache->dev->txf_sampler);
280 
281    agx_usc_shared(&b, info->local_size, info->imageblock_stride, 0);
282 
283    agx_usc_pack(&b, SHADER, cfg) {
284       cfg.code = agx_usc_addr(cache->dev, p->ptr + info->main_offset);
285       cfg.unk_2 = 3;
286    }
287 
288    agx_usc_pack(&b, REGISTERS, cfg) {
289       cfg.register_count = info->nr_gprs;
290       cfg.spill_size = 0;
291    }
292 
293    if (info->nr_preamble_gprs) {
294       agx_usc_pack(&b, PRESHADER, cfg) {
295          cfg.code = agx_usc_addr(cache->dev, p->ptr + info->preamble_offset);
296       }
297    } else {
298       agx_usc_pack(&b, NO_PRESHADER, cfg)
299          ;
300    }
301 
302    p->b.usc.size = b.head - p->b.usc.data;
303 
304    /* We must only write to the cache once we are done compiling, since other
305     * threads may be reading the cache concurrently. Do this last.
306     */
307    p_atomic_set(&cache->precomp[program], p);
308    return p;
309 }
310 
311 struct agx_precompiled_shader *
agx_get_precompiled(struct agx_bg_eot_cache * cache,unsigned program)312 agx_get_precompiled(struct agx_bg_eot_cache *cache, unsigned program)
313 {
314    /* Shaders are immutable once written, so if we atomically read a non-NULL
315     * shader, then we have a valid cached shader and are done.
316     */
317    struct agx_precompiled_shader *ret = p_atomic_read(cache->precomp + program);
318 
319    if (ret != NULL)
320       return ret;
321 
322    /* Otherwise, take the lock and upload. */
323    simple_mtx_lock(&cache->lock);
324    ret = agx_get_precompiled_locked(cache, program);
325    simple_mtx_unlock(&cache->lock);
326 
327    return ret;
328 }
329 
330 uint64_t
agx_helper_program(struct agx_bg_eot_cache * cache)331 agx_helper_program(struct agx_bg_eot_cache *cache)
332 {
333    struct agx_precompiled_shader *pc =
334       agx_get_precompiled(cache, LIBAGX_HELPER);
335 
336    return pc->ptr | 1;
337 }
338