• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  */
24 
25 #include <stdio.h>
26 #include "pan_bo.h"
27 #include "pan_shader.h"
28 #include "pan_scoreboard.h"
29 #include "pan_encoder.h"
30 #include "pan_indirect_dispatch.h"
31 #include "pan_pool.h"
32 #include "pan_util.h"
33 #include "compiler/nir/nir_builder.h"
34 #include "util/u_memory.h"
35 #include "util/macros.h"
36 
37 #define get_input_field(b, name) \
38         nir_load_push_constant(b, \
39                1, sizeof(((struct pan_indirect_dispatch_info *)0)->name) * 8, \
40                nir_imm_int(b, 0), \
41                .base = offsetof(struct pan_indirect_dispatch_info, name))
42 
43 static mali_ptr
get_rsd(const struct panfrost_device * dev)44 get_rsd(const struct panfrost_device *dev)
45 {
46         return dev->indirect_dispatch.descs->ptr.gpu;
47 }
48 
49 static mali_ptr
get_tls(const struct panfrost_device * dev)50 get_tls(const struct panfrost_device *dev)
51 {
52         return dev->indirect_dispatch.descs->ptr.gpu +
53                pan_size(RENDERER_STATE);
54 }
55 
56 unsigned
GENX(pan_indirect_dispatch_emit)57 GENX(pan_indirect_dispatch_emit)(struct pan_pool *pool,
58                                  struct pan_scoreboard *scoreboard,
59                                  const struct pan_indirect_dispatch_info *inputs)
60 {
61         struct panfrost_device *dev = pool->dev;
62         struct panfrost_ptr job =
63                 pan_pool_alloc_desc(pool, COMPUTE_JOB);
64         void *invocation =
65                 pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);
66 
67         panfrost_pack_work_groups_compute(invocation,
68                                           1, 1, 1, 1, 1, 1,
69                                           false, false);
70 
71         pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
72                 cfg.job_task_split = 2;
73         }
74 
75         pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
76                 cfg.state = get_rsd(dev);
77                 cfg.thread_storage = get_tls(pool->dev);
78                 cfg.push_uniforms =
79                         pan_pool_upload_aligned(pool, inputs, sizeof(*inputs), 16);
80         }
81 
82         return panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
83                                 false, true, 0, 0, &job, false);
84 }
85 
86 void
GENX(pan_indirect_dispatch_init)87 GENX(pan_indirect_dispatch_init)(struct panfrost_device *dev)
88 {
89         nir_builder b =
90                 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
91                                                GENX(pan_shader_get_compiler_options)(),
92                                                "%s", "indirect_dispatch");
93         nir_ssa_def *zero = nir_imm_int(&b, 0);
94         nir_ssa_def *one = nir_imm_int(&b, 1);
95         nir_ssa_def *num_wg = nir_load_global(&b, get_input_field(&b, indirect_dim), 4, 3, 32);
96         nir_ssa_def *num_wg_x = nir_channel(&b, num_wg, 0);
97         nir_ssa_def *num_wg_y = nir_channel(&b, num_wg, 1);
98         nir_ssa_def *num_wg_z = nir_channel(&b, num_wg, 2);
99 
100         nir_ssa_def *job_hdr_ptr = get_input_field(&b, job);
101         nir_ssa_def *num_wg_flat = nir_imul(&b, num_wg_x, nir_imul(&b, num_wg_y, num_wg_z));
102 
103         nir_push_if(&b, nir_ieq(&b, num_wg_flat, zero));
104         {
105                 nir_ssa_def *type_ptr = nir_iadd(&b, job_hdr_ptr, nir_imm_int64(&b, 4 * 4));
106                 nir_ssa_def *ntype = nir_imm_intN_t(&b, (MALI_JOB_TYPE_NULL << 1) | 1, 8);
107                 nir_store_global(&b, type_ptr, 1, ntype, 1);
108         }
109         nir_push_else(&b, NULL);
110         {
111                 nir_ssa_def *job_dim_ptr = nir_iadd(&b, job_hdr_ptr,
112                                 nir_imm_int64(&b, pan_section_offset(COMPUTE_JOB, INVOCATION)));
113                 nir_ssa_def *num_wg_x_m1 = nir_isub(&b, num_wg_x, one);
114                 nir_ssa_def *num_wg_y_m1 = nir_isub(&b, num_wg_y, one);
115                 nir_ssa_def *num_wg_z_m1 = nir_isub(&b, num_wg_z, one);
116                 nir_ssa_def *job_dim = nir_load_global(&b, job_dim_ptr, 8, 2, 32);
117                 nir_ssa_def *dims = nir_channel(&b, job_dim, 0);
118                 nir_ssa_def *split = nir_channel(&b, job_dim, 1);
119                 nir_ssa_def *num_wg_x_split = nir_iand_imm(&b, nir_ushr_imm(&b, split, 10), 0x3f);
120                 nir_ssa_def *num_wg_y_split = nir_iadd(&b, num_wg_x_split,
121                                 nir_isub_imm(&b, 32, nir_uclz(&b, num_wg_x_m1)));
122                 nir_ssa_def *num_wg_z_split = nir_iadd(&b, num_wg_y_split,
123                                 nir_isub_imm(&b, 32, nir_uclz(&b, num_wg_y_m1)));
124                 split = nir_ior(&b, split,
125                                 nir_ior(&b,
126                                         nir_ishl(&b, num_wg_y_split, nir_imm_int(&b, 16)),
127                                         nir_ishl(&b, num_wg_z_split, nir_imm_int(&b, 22))));
128                 dims = nir_ior(&b, dims,
129                                nir_ior(&b, nir_ishl(&b, num_wg_x_m1, num_wg_x_split),
130                                        nir_ior(&b, nir_ishl(&b, num_wg_y_m1, num_wg_y_split),
131                                                nir_ishl(&b, num_wg_z_m1, num_wg_z_split))));
132 
133                 nir_store_global(&b, job_dim_ptr, 8, nir_vec2(&b, dims, split), 3);
134 
135                 nir_ssa_def *num_wg_x_ptr = get_input_field(&b, num_wg_sysval[0]);
136 
137                 nir_push_if(&b, nir_ine(&b, num_wg_x_ptr, nir_imm_int64(&b, 0)));
138                 {
139                         nir_store_global(&b, num_wg_x_ptr, 8, num_wg_x, 1);
140                         nir_store_global(&b, get_input_field(&b, num_wg_sysval[1]), 8, num_wg_y, 1);
141                         nir_store_global(&b, get_input_field(&b, num_wg_sysval[2]), 8, num_wg_z, 1);
142                 }
143                 nir_pop_if(&b, NULL);
144         }
145 
146         nir_pop_if(&b, NULL);
147 
148         struct panfrost_compile_inputs inputs = {
149                 .gpu_id = dev->gpu_id,
150                 .fixed_sysval_ubo = -1,
151                 .no_ubo_to_push = true,
152         };
153         struct pan_shader_info shader_info;
154         struct util_dynarray binary;
155 
156         util_dynarray_init(&binary, NULL);
157         GENX(pan_shader_compile)(b.shader, &inputs, &binary, &shader_info);
158 
159         ralloc_free(b.shader);
160 
161         assert(!shader_info.tls_size);
162         assert(!shader_info.wls_size);
163         assert(!shader_info.sysvals.sysval_count);
164 
165         shader_info.push.count =
166                 DIV_ROUND_UP(sizeof(struct pan_indirect_dispatch_info), 4);
167 
168         dev->indirect_dispatch.bin =
169                 panfrost_bo_create(dev, binary.size, PAN_BO_EXECUTE,
170                                 "Indirect dispatch shader");
171 
172         memcpy(dev->indirect_dispatch.bin->ptr.cpu, binary.data, binary.size);
173         util_dynarray_fini(&binary);
174 
175         dev->indirect_dispatch.descs =
176                 panfrost_bo_create(dev,
177                                    pan_size(RENDERER_STATE) +
178                                    pan_size(LOCAL_STORAGE),
179                                    0, "Indirect dispatch descriptors");
180 
181         mali_ptr address = dev->indirect_dispatch.bin->ptr.gpu;
182 
183         void *rsd = dev->indirect_dispatch.descs->ptr.cpu;
184         pan_pack(rsd, RENDERER_STATE, cfg) {
185                 pan_shader_prepare_rsd(&shader_info, address, &cfg);
186         }
187 
188         void *tsd = dev->indirect_dispatch.descs->ptr.cpu +
189                     pan_size(RENDERER_STATE);
190         pan_pack(tsd, LOCAL_STORAGE, ls) {
191                 ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
192         };
193 }
194 
195 void
GENX(pan_indirect_dispatch_cleanup)196 GENX(pan_indirect_dispatch_cleanup)(struct panfrost_device *dev)
197 {
198         panfrost_bo_unreference(dev->indirect_dispatch.bin);
199         panfrost_bo_unreference(dev->indirect_dispatch.descs);
200 }
201