1 /*
2 * Copyright 2016 Red Hat.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include "util/u_inlines.h"
24 #include "util/u_math.h"
25 #include "util/u_memory.h"
26 #include "pipe/p_shader_tokens.h"
27 #include "draw/draw_context.h"
28 #include "draw/draw_vertex.h"
29 #include "sp_context.h"
30 #include "sp_screen.h"
31 #include "sp_state.h"
32 #include "sp_texture.h"
33 #include "sp_tex_sample.h"
34 #include "sp_tex_tile_cache.h"
35 #include "tgsi/tgsi_parse.h"
36
37 static void
cs_prepare(const struct sp_compute_shader * cs,struct tgsi_exec_machine * machine,int local_x,int local_y,int local_z,int g_w,int g_h,int g_d,int b_w,int b_h,int b_d,struct tgsi_sampler * sampler,struct tgsi_image * image,struct tgsi_buffer * buffer)38 cs_prepare(const struct sp_compute_shader *cs,
39 struct tgsi_exec_machine *machine,
40 int local_x, int local_y, int local_z,
41 int g_w, int g_h, int g_d,
42 int b_w, int b_h, int b_d,
43 struct tgsi_sampler *sampler,
44 struct tgsi_image *image,
45 struct tgsi_buffer *buffer )
46 {
47 int j;
48 /*
49 * Bind tokens/shader to the interpreter's machine state.
50 */
51 tgsi_exec_machine_bind_shader(machine,
52 cs->tokens,
53 sampler, image, buffer);
54
55 if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) {
56 unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID];
57 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
58 machine->SystemValue[i].xyzw[0].i[j] = local_x + j;
59 machine->SystemValue[i].xyzw[1].i[j] = local_y;
60 machine->SystemValue[i].xyzw[2].i[j] = local_z;
61 }
62 }
63
64 if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) {
65 unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE];
66 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
67 machine->SystemValue[i].xyzw[0].i[j] = g_w;
68 machine->SystemValue[i].xyzw[1].i[j] = g_h;
69 machine->SystemValue[i].xyzw[2].i[j] = g_d;
70 }
71 }
72
73 if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) {
74 unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE];
75 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
76 machine->SystemValue[i].xyzw[0].i[j] = b_w;
77 machine->SystemValue[i].xyzw[1].i[j] = b_h;
78 machine->SystemValue[i].xyzw[2].i[j] = b_d;
79 }
80 }
81 }
82
83 static bool
cs_run(const struct sp_compute_shader * cs,int g_w,int g_h,int g_d,struct tgsi_exec_machine * machine,bool restart)84 cs_run(const struct sp_compute_shader *cs,
85 int g_w, int g_h, int g_d,
86 struct tgsi_exec_machine *machine, bool restart)
87 {
88 if (!restart) {
89 if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) {
90 unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID];
91 int j;
92 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
93 machine->SystemValue[i].xyzw[0].i[j] = g_w;
94 machine->SystemValue[i].xyzw[1].i[j] = g_h;
95 machine->SystemValue[i].xyzw[2].i[j] = g_d;
96 }
97 }
98 }
99
100 tgsi_exec_machine_run(machine, restart ? machine->pc : 0);
101
102 if (machine->pc != -1)
103 return true;
104 return false;
105 }
106
107 static void
run_workgroup(const struct sp_compute_shader * cs,int g_w,int g_h,int g_d,int num_threads,struct tgsi_exec_machine ** machines)108 run_workgroup(const struct sp_compute_shader *cs,
109 int g_w, int g_h, int g_d, int num_threads,
110 struct tgsi_exec_machine **machines)
111 {
112 int i;
113 bool grp_hit_barrier, restart_threads = false;
114
115 do {
116 grp_hit_barrier = false;
117 for (i = 0; i < num_threads; i++) {
118 grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads);
119 }
120 restart_threads = false;
121 if (grp_hit_barrier) {
122 grp_hit_barrier = false;
123 restart_threads = true;
124 }
125 } while (restart_threads);
126 }
127
128 static void
cs_delete(const struct sp_compute_shader * cs,struct tgsi_exec_machine * machine)129 cs_delete(const struct sp_compute_shader *cs,
130 struct tgsi_exec_machine *machine)
131 {
132 if (machine->Tokens == cs->tokens) {
133 tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
134 }
135 }
136
137 static void
fill_grid_size(struct pipe_context * context,const struct pipe_grid_info * info,uint32_t grid_size[3])138 fill_grid_size(struct pipe_context *context,
139 const struct pipe_grid_info *info,
140 uint32_t grid_size[3])
141 {
142 struct pipe_transfer *transfer;
143 uint32_t *params;
144 if (!info->indirect) {
145 grid_size[0] = info->grid[0];
146 grid_size[1] = info->grid[1];
147 grid_size[2] = info->grid[2];
148 return;
149 }
150 params = pipe_buffer_map_range(context, info->indirect,
151 info->indirect_offset,
152 3 * sizeof(uint32_t),
153 PIPE_MAP_READ,
154 &transfer);
155
156 if (!transfer)
157 return;
158
159 grid_size[0] = params[0];
160 grid_size[1] = params[1];
161 grid_size[2] = params[2];
162 pipe_buffer_unmap(context, transfer);
163 }
164
165 void
softpipe_launch_grid(struct pipe_context * context,const struct pipe_grid_info * info)166 softpipe_launch_grid(struct pipe_context *context,
167 const struct pipe_grid_info *info)
168 {
169 struct softpipe_context *softpipe = softpipe_context(context);
170 struct sp_compute_shader *cs = softpipe->cs;
171 int num_threads_in_group;
172 struct tgsi_exec_machine **machines;
173 int bwidth, bheight, bdepth;
174 int local_x, local_y, local_z, i;
175 int g_w, g_h, g_d;
176 uint32_t grid_size[3] = {0};
177 void *local_mem = NULL;
178
179 softpipe_update_compute_samplers(softpipe);
180 bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH];
181 bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT];
182 bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
183 num_threads_in_group = DIV_ROUND_UP(bwidth, TGSI_QUAD_SIZE) * bheight * bdepth;
184
185 fill_grid_size(context, info, grid_size);
186
187 if (cs->shader.req_local_mem) {
188 local_mem = CALLOC(1, cs->shader.req_local_mem);
189 }
190
191 machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group);
192 if (!machines) {
193 FREE(local_mem);
194 return;
195 }
196
197 /* initialise machines + GRID_SIZE + THREAD_ID + BLOCK_SIZE */
198 int idx = 0;
199 for (local_z = 0; local_z < bdepth; local_z++) {
200 for (local_y = 0; local_y < bheight; local_y++) {
201 for (local_x = 0; local_x < bwidth; local_x += TGSI_QUAD_SIZE) {
202 machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE);
203
204 machines[idx]->LocalMem = local_mem;
205 machines[idx]->LocalMemSize = cs->shader.req_local_mem;
206 machines[idx]->NonHelperMask = (1 << (MIN2(TGSI_QUAD_SIZE, bwidth - local_x))) - 1;
207 cs_prepare(cs, machines[idx],
208 local_x, local_y, local_z,
209 grid_size[0], grid_size[1], grid_size[2],
210 bwidth, bheight, bdepth,
211 (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE],
212 (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE],
213 (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]);
214 tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS,
215 softpipe->mapped_constants[PIPE_SHADER_COMPUTE],
216 softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]);
217 idx++;
218 }
219 }
220 }
221
222 for (g_d = 0; g_d < grid_size[2]; g_d++) {
223 for (g_h = 0; g_h < grid_size[1]; g_h++) {
224 for (g_w = 0; g_w < grid_size[0]; g_w++) {
225 run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines);
226 }
227 }
228 }
229
230 if (softpipe->active_statistics_queries) {
231 softpipe->pipeline_statistics.cs_invocations +=
232 grid_size[0] * grid_size[1] * grid_size[2];
233 }
234
235 for (i = 0; i < num_threads_in_group; i++) {
236 cs_delete(cs, machines[i]);
237 tgsi_exec_machine_destroy(machines[i]);
238 }
239
240 FREE(local_mem);
241 FREE(machines);
242 }
243