1 /*
2 * Copyright 2022 Alyssa Rosenzweig
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "agx_bg_eot.h"
7 #include "util/simple_mtx.h"
8 #include "util/u_debug.h"
9 #include "agx_compile.h"
10 #include "agx_device.h"
11 #include "agx_nir.h"
12 #include "agx_nir_texture.h"
13 #include "agx_tilebuffer.h"
14 #include "agx_usc.h"
15 #include "libagx_shaders.h"
16 #include "nir.h"
17 #include "nir_builder.h"
18 #include "nir_intrinsics.h"
19 #include "pool.h"
20
21 static bool
lower_tex_handle_to_u0(nir_builder * b,nir_intrinsic_instr * intr,void * data)22 lower_tex_handle_to_u0(nir_builder *b, nir_intrinsic_instr *intr, void *data)
23 {
24 if (intr->intrinsic != nir_intrinsic_load_texture_handle_agx)
25 return false;
26
27 b->cursor = nir_instr_remove(&intr->instr);
28 nir_def_rewrite_uses(
29 &intr->def,
30 nir_vec2(b, nir_imm_int(b, 0), nir_imul_imm(b, intr->src[0].ssa, 24)));
31
32 return true;
33 }
34
35 static struct agx_bg_eot_shader *
agx_compile_bg_eot_shader(struct agx_bg_eot_cache * cache,nir_shader * shader,struct agx_shader_key * key,struct agx_tilebuffer_layout * tib)36 agx_compile_bg_eot_shader(struct agx_bg_eot_cache *cache, nir_shader *shader,
37 struct agx_shader_key *key,
38 struct agx_tilebuffer_layout *tib)
39 {
40 agx_nir_lower_texture(shader);
41 agx_preprocess_nir(shader, cache->dev->libagx);
42 if (tib) {
43 unsigned bindless_base = 0;
44 agx_nir_lower_tilebuffer(shader, tib, NULL, &bindless_base, NULL, NULL);
45 agx_nir_lower_monolithic_msaa(shader, tib->nr_samples);
46 agx_nir_lower_multisampled_image_store(shader);
47 agx_nir_lower_texture(shader);
48
49 nir_shader_intrinsics_pass(shader, lower_tex_handle_to_u0,
50 nir_metadata_control_flow, NULL);
51 }
52
53 key->libagx = cache->dev->libagx;
54
55 struct agx_bg_eot_shader *res = rzalloc(cache->ht, struct agx_bg_eot_shader);
56 struct agx_shader_part bin;
57 agx_compile_shader_nir(shader, key, NULL, &bin);
58
59 res->info = bin.info;
60 res->ptr = agx_pool_upload_aligned_with_bo(
61 &cache->pool, bin.binary, bin.info.binary_size, 128, &res->bo);
62 free(bin.binary);
63 ralloc_free(shader);
64
65 return res;
66 }
67
68 static nir_def *
build_background_op(nir_builder * b,enum agx_bg_eot_op op,unsigned rt,unsigned nr,bool msaa,bool layered)69 build_background_op(nir_builder *b, enum agx_bg_eot_op op, unsigned rt,
70 unsigned nr, bool msaa, bool layered)
71 {
72 if (op == AGX_BG_LOAD) {
73 nir_def *coord = nir_u2u32(b, nir_load_pixel_coord(b));
74
75 if (layered) {
76 coord = nir_vec3(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1),
77 nir_load_layer_id(b));
78 }
79
80 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
81 /* The type doesn't matter as long as it matches the store */
82 tex->dest_type = nir_type_uint32;
83 tex->sampler_dim = msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
84 tex->is_array = layered;
85 tex->op = msaa ? nir_texop_txf_ms : nir_texop_txf;
86 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord);
87
88 /* Layer is necessarily already in-bounds so we do not want the compiler
89 * to clamp it, which would require reading the descriptor
90 */
91 tex->backend_flags = AGX_TEXTURE_FLAG_NO_CLAMP;
92
93 if (msaa) {
94 tex->src[1] =
95 nir_tex_src_for_ssa(nir_tex_src_ms_index, nir_load_sample_id(b));
96 b->shader->info.fs.uses_sample_shading = true;
97 } else {
98 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(b, 0));
99 }
100
101 tex->coord_components = layered ? 3 : 2;
102 tex->texture_index = rt * 2;
103 nir_def_init(&tex->instr, &tex->def, 4, 32);
104 nir_builder_instr_insert(b, &tex->instr);
105
106 return nir_trim_vector(b, &tex->def, nr);
107 } else {
108 assert(op == AGX_BG_CLEAR);
109
110 return nir_load_preamble(b, nr, 32, 4 + (rt * 8));
111 }
112 }
113
114 static struct agx_bg_eot_shader *
agx_build_background_shader(struct agx_bg_eot_cache * cache,struct agx_bg_eot_key * key)115 agx_build_background_shader(struct agx_bg_eot_cache *cache,
116 struct agx_bg_eot_key *key)
117 {
118 nir_builder b = nir_builder_init_simple_shader(
119 MESA_SHADER_FRAGMENT, &agx_nir_options, "agx_background");
120 b.shader->info.fs.untyped_color_outputs = true;
121
122 struct agx_shader_key compiler_key = {
123 .fs.ignore_tib_dependencies = true,
124 .reserved_preamble = key->reserved_preamble,
125 };
126
127 for (unsigned rt = 0; rt < ARRAY_SIZE(key->op); ++rt) {
128 if (key->op[rt] == AGX_BG_EOT_NONE)
129 continue;
130
131 unsigned nr = util_format_get_nr_components(key->tib.logical_format[rt]);
132 bool msaa = key->tib.nr_samples > 1;
133 bool layered = key->tib.layered;
134 assert(nr > 0);
135
136 nir_store_output(
137 &b, build_background_op(&b, key->op[rt], rt, nr, msaa, layered),
138 nir_imm_int(&b, 0), .write_mask = BITFIELD_MASK(nr),
139 .src_type = nir_type_uint32,
140 .io_semantics.location = FRAG_RESULT_DATA0 + rt,
141 .io_semantics.num_slots = 1);
142
143 b.shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_DATA0 + rt);
144 }
145
146 return agx_compile_bg_eot_shader(cache, b.shader, &compiler_key, &key->tib);
147 }
148
149 static struct agx_bg_eot_shader *
agx_build_end_of_tile_shader(struct agx_bg_eot_cache * cache,struct agx_bg_eot_key * key)150 agx_build_end_of_tile_shader(struct agx_bg_eot_cache *cache,
151 struct agx_bg_eot_key *key)
152 {
153 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
154 &agx_nir_options, "agx_eot");
155
156 enum glsl_sampler_dim dim =
157 (key->tib.nr_samples > 1) ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
158
159 for (unsigned rt = 0; rt < ARRAY_SIZE(key->op); ++rt) {
160 if (key->op[rt] == AGX_BG_EOT_NONE)
161 continue;
162
163 /* The end-of-tile shader is unsuitable to handle spilled render targets.
164 * Skip them. If blits are needed with spilled render targets, other parts
165 * of the driver need to implement them.
166 */
167 if (key->tib.spilled[rt])
168 continue;
169
170 assert(key->op[rt] == AGX_EOT_STORE);
171 unsigned offset_B = agx_tilebuffer_offset_B(&key->tib, rt);
172
173 nir_def *layer = nir_undef(&b, 1, 16);
174 if (key->tib.layered)
175 layer = nir_u2u16(&b, nir_load_layer_id(&b));
176
177 nir_image_store_block_agx(
178 &b, nir_imm_intN_t(&b, rt, 16), nir_imm_intN_t(&b, offset_B, 16),
179 layer, .format = agx_tilebuffer_physical_format(&key->tib, rt),
180 .image_dim = dim, .image_array = key->tib.layered);
181 }
182
183 struct agx_shader_key compiler_key = {
184 .reserved_preamble = key->reserved_preamble,
185 };
186
187 return agx_compile_bg_eot_shader(cache, b.shader, &compiler_key, NULL);
188 }
189
190 struct agx_bg_eot_shader *
agx_get_bg_eot_shader(struct agx_bg_eot_cache * cache,struct agx_bg_eot_key * key)191 agx_get_bg_eot_shader(struct agx_bg_eot_cache *cache,
192 struct agx_bg_eot_key *key)
193 {
194 struct hash_entry *ent = _mesa_hash_table_search(cache->ht, key);
195 if (ent)
196 return ent->data;
197
198 struct agx_bg_eot_shader *ret = NULL;
199
200 for (unsigned rt = 0; rt < ARRAY_SIZE(key->op); ++rt) {
201 if (key->op[rt] == AGX_EOT_STORE) {
202 ret = agx_build_end_of_tile_shader(cache, key);
203 break;
204 }
205 }
206
207 if (!ret)
208 ret = agx_build_background_shader(cache, key);
209
210 ret->key = *key;
211 _mesa_hash_table_insert(cache->ht, &ret->key, ret);
212 return ret;
213 }
214
215 DERIVE_HASH_TABLE(agx_bg_eot_key);
216
217 void
agx_bg_eot_init(struct agx_bg_eot_cache * cache,struct agx_device * dev)218 agx_bg_eot_init(struct agx_bg_eot_cache *cache, struct agx_device *dev)
219 {
220 agx_pool_init(&cache->pool, dev, "Internal programs",
221 AGX_BO_EXEC | AGX_BO_LOW_VA, true);
222 simple_mtx_init(&cache->lock, mtx_plain);
223 cache->ht = agx_bg_eot_key_table_create(NULL);
224 cache->dev = dev;
225 }
226
227 void
agx_bg_eot_cleanup(struct agx_bg_eot_cache * cache)228 agx_bg_eot_cleanup(struct agx_bg_eot_cache *cache)
229 {
230 agx_pool_cleanup(&cache->pool);
231 _mesa_hash_table_destroy(cache->ht, NULL);
232 simple_mtx_destroy(&cache->lock);
233 cache->ht = NULL;
234 cache->dev = NULL;
235 }
236
237 static struct agx_precompiled_shader *
agx_get_precompiled_locked(struct agx_bg_eot_cache * cache,unsigned program)238 agx_get_precompiled_locked(struct agx_bg_eot_cache *cache, unsigned program)
239 {
240 simple_mtx_assert_locked(&cache->lock);
241
242 /* It is possible that, while waiting for the lock, another thread uploaded
243 * the shader. Check for that so we don't double-upload.
244 */
245 if (cache->precomp[program])
246 return cache->precomp[program];
247
248 /* Otherwise, we need to upload. */
249 struct agx_precompiled_shader *p =
250 ralloc(cache->ht, struct agx_precompiled_shader);
251
252 const uint32_t *bin = cache->dev->libagx_programs[program];
253 const struct agx_precompiled_kernel_info *info = (void *)bin;
254 const void *binary = (const uint8_t *)bin + sizeof(*info);
255
256 assert(info->main_offset == 0 || program != LIBAGX_HELPER);
257
258 p->b.workgroup =
259 agx_workgroup(info->workgroup_size[0], info->workgroup_size[1],
260 info->workgroup_size[2]);
261
262 p->ptr = agx_pool_upload_aligned_with_bo(&cache->pool, binary,
263 info->binary_size, 128, &p->bo);
264
265 /* Bake launch */
266 agx_pack(&p->b.launch, CDM_LAUNCH_WORD_0, cfg) {
267 cfg.sampler_state_register_count = 1;
268 cfg.uniform_register_count = info->push_count;
269 cfg.preshader_register_count = info->nr_preamble_gprs;
270 }
271
272 /* Bake USC */
273 struct agx_usc_builder b =
274 agx_usc_builder(p->b.usc.data, sizeof(p->b.usc.data));
275
276 agx_usc_immediates(&b, &info->rodata, p->ptr);
277
278 if (info->uses_txf)
279 agx_usc_push_packed(&b, SAMPLER, cache->dev->txf_sampler);
280
281 agx_usc_shared(&b, info->local_size, info->imageblock_stride, 0);
282
283 agx_usc_pack(&b, SHADER, cfg) {
284 cfg.code = agx_usc_addr(cache->dev, p->ptr + info->main_offset);
285 cfg.unk_2 = 3;
286 }
287
288 agx_usc_pack(&b, REGISTERS, cfg) {
289 cfg.register_count = info->nr_gprs;
290 cfg.spill_size = 0;
291 }
292
293 if (info->nr_preamble_gprs) {
294 agx_usc_pack(&b, PRESHADER, cfg) {
295 cfg.code = agx_usc_addr(cache->dev, p->ptr + info->preamble_offset);
296 }
297 } else {
298 agx_usc_pack(&b, NO_PRESHADER, cfg)
299 ;
300 }
301
302 p->b.usc.size = b.head - p->b.usc.data;
303
304 /* We must only write to the cache once we are done compiling, since other
305 * threads may be reading the cache concurrently. Do this last.
306 */
307 p_atomic_set(&cache->precomp[program], p);
308 return p;
309 }
310
311 struct agx_precompiled_shader *
agx_get_precompiled(struct agx_bg_eot_cache * cache,unsigned program)312 agx_get_precompiled(struct agx_bg_eot_cache *cache, unsigned program)
313 {
314 /* Shaders are immutable once written, so if we atomically read a non-NULL
315 * shader, then we have a valid cached shader and are done.
316 */
317 struct agx_precompiled_shader *ret = p_atomic_read(cache->precomp + program);
318
319 if (ret != NULL)
320 return ret;
321
322 /* Otherwise, take the lock and upload. */
323 simple_mtx_lock(&cache->lock);
324 ret = agx_get_precompiled_locked(cache, program);
325 simple_mtx_unlock(&cache->lock);
326
327 return ret;
328 }
329
330 uint64_t
agx_helper_program(struct agx_bg_eot_cache * cache)331 agx_helper_program(struct agx_bg_eot_cache *cache)
332 {
333 struct agx_precompiled_shader *pc =
334 agx_get_precompiled(cache, LIBAGX_HELPER);
335
336 return pc->ptr | 1;
337 }
338