1 /**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25 #include "util/u_memory.h"
26 #include "util/os_time.h"
27 #include "util/u_dump.h"
28 #include "util/u_string.h"
29 #include "tgsi/tgsi_dump.h"
30 #include "tgsi/tgsi_parse.h"
31 #include "gallivm/lp_bld_const.h"
32 #include "gallivm/lp_bld_debug.h"
33 #include "gallivm/lp_bld_intr.h"
34 #include "gallivm/lp_bld_flow.h"
35 #include "gallivm/lp_bld_gather.h"
36 #include "gallivm/lp_bld_coro.h"
37 #include "gallivm/lp_bld_nir.h"
38 #include "lp_state_cs.h"
39 #include "lp_context.h"
40 #include "lp_debug.h"
41 #include "lp_state.h"
42 #include "lp_perf.h"
43 #include "lp_screen.h"
44 #include "lp_memory.h"
45 #include "lp_query.h"
46 #include "lp_cs_tpool.h"
47 #include "frontend/sw_winsys.h"
48 #include "nir/nir_to_tgsi_info.h"
49 #include "util/mesa-sha1.h"
50 #include "nir_serialize.h"
51
52 /** Fragment shader number (for debugging) */
53 static unsigned cs_no = 0;
54
55 struct lp_cs_job_info {
56 unsigned grid_size[3];
57 unsigned grid_base[3];
58 unsigned block_size[3];
59 unsigned req_local_mem;
60 unsigned work_dim;
61 bool zero_initialize_shared_memory;
62 struct lp_cs_exec *current;
63 };
64
65 static void
generate_compute(struct llvmpipe_context * lp,struct lp_compute_shader * shader,struct lp_compute_shader_variant * variant)66 generate_compute(struct llvmpipe_context *lp,
67 struct lp_compute_shader *shader,
68 struct lp_compute_shader_variant *variant)
69 {
70 struct gallivm_state *gallivm = variant->gallivm;
71 const struct lp_compute_shader_variant_key *key = &variant->key;
72 char func_name[64], func_name_coro[64];
73 LLVMTypeRef arg_types[19];
74 LLVMTypeRef func_type, coro_func_type;
75 LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
76 LLVMValueRef context_ptr;
77 LLVMValueRef block_x_size_arg, block_y_size_arg, block_z_size_arg;
78 LLVMValueRef grid_x_arg, grid_y_arg, grid_z_arg;
79 LLVMValueRef grid_size_x_arg, grid_size_y_arg, grid_size_z_arg;
80 LLVMValueRef work_dim_arg, thread_data_ptr;
81 LLVMBasicBlockRef block;
82 LLVMBuilderRef builder;
83 struct lp_build_sampler_soa *sampler;
84 struct lp_build_image_soa *image;
85 LLVMValueRef function, coro;
86 struct lp_type cs_type;
87 unsigned i;
88
89 /*
90 * This function has two parts
91 * a) setup the coroutine execution environment loop.
92 * b) build the compute shader llvm for use inside the coroutine.
93 */
94 assert(lp_native_vector_width / 32 >= 4);
95
96 memset(&cs_type, 0, sizeof cs_type);
97 cs_type.floating = TRUE; /* floating point values */
98 cs_type.sign = TRUE; /* values are signed */
99 cs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
100 cs_type.width = 32; /* 32-bit float */
101 cs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */
102 snprintf(func_name, sizeof(func_name), "cs_variant");
103
104 snprintf(func_name_coro, sizeof(func_name), "cs_co_variant");
105
106 arg_types[0] = variant->jit_cs_context_ptr_type; /* context */
107 arg_types[1] = int32_type; /* block_x_size */
108 arg_types[2] = int32_type; /* block_y_size */
109 arg_types[3] = int32_type; /* block_z_size */
110 arg_types[4] = int32_type; /* grid_x */
111 arg_types[5] = int32_type; /* grid_y */
112 arg_types[6] = int32_type; /* grid_z */
113 arg_types[7] = int32_type; /* grid_size_x */
114 arg_types[8] = int32_type; /* grid_size_y */
115 arg_types[9] = int32_type; /* grid_size_z */
116 arg_types[10] = int32_type; /* work dim */
117 arg_types[11] = variant->jit_cs_thread_data_ptr_type; /* per thread data */
118 arg_types[12] = int32_type; /* coro only - num X loops */
119 arg_types[13] = int32_type; /* coro only - partials */
120 arg_types[14] = int32_type; /* coro block_x_size */
121 arg_types[15] = int32_type; /* coro block_y_size */
122 arg_types[16] = int32_type; /* coro block_z_size */
123 arg_types[17] = int32_type; /* coro idx */
124 arg_types[18] = LLVMPointerType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), 0);
125 func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
126 arg_types, ARRAY_SIZE(arg_types) - 7, 0);
127
128 coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0),
129 arg_types, ARRAY_SIZE(arg_types), 0);
130
131 function = LLVMAddFunction(gallivm->module, func_name, func_type);
132 LLVMSetFunctionCallConv(function, LLVMCCallConv);
133
134 coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);
135 LLVMSetFunctionCallConv(coro, LLVMCCallConv);
136 lp_build_coro_add_presplit(coro);
137
138 variant->function = function;
139
140 for(i = 0; i < ARRAY_SIZE(arg_types); ++i) {
141 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
142 lp_add_function_attr(coro, i + 1, LP_FUNC_ATTR_NOALIAS);
143 if (i < ARRAY_SIZE(arg_types) - 7)
144 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
145 }
146 }
147
148 if (variant->gallivm->cache->data_size)
149 return;
150
151 context_ptr = LLVMGetParam(function, 0);
152 block_x_size_arg = LLVMGetParam(function, 1);
153 block_y_size_arg = LLVMGetParam(function, 2);
154 block_z_size_arg = LLVMGetParam(function, 3);
155 grid_x_arg = LLVMGetParam(function, 4);
156 grid_y_arg = LLVMGetParam(function, 5);
157 grid_z_arg = LLVMGetParam(function, 6);
158 grid_size_x_arg = LLVMGetParam(function, 7);
159 grid_size_y_arg = LLVMGetParam(function, 8);
160 grid_size_z_arg = LLVMGetParam(function, 9);
161 work_dim_arg = LLVMGetParam(function, 10);
162 thread_data_ptr = LLVMGetParam(function, 11);
163
164 lp_build_name(context_ptr, "context");
165 lp_build_name(block_x_size_arg, "x_size");
166 lp_build_name(block_y_size_arg, "y_size");
167 lp_build_name(block_z_size_arg, "z_size");
168 lp_build_name(grid_x_arg, "grid_x");
169 lp_build_name(grid_y_arg, "grid_y");
170 lp_build_name(grid_z_arg, "grid_z");
171 lp_build_name(grid_size_x_arg, "grid_size_x");
172 lp_build_name(grid_size_y_arg, "grid_size_y");
173 lp_build_name(grid_size_z_arg, "grid_size_z");
174 lp_build_name(work_dim_arg, "work_dim");
175 lp_build_name(thread_data_ptr, "thread_data");
176
177 block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
178 builder = gallivm->builder;
179 assert(builder);
180 LLVMPositionBuilderAtEnd(builder, block);
181 sampler = lp_llvm_sampler_soa_create(lp_cs_variant_key_samplers(key),
182 MAX2(key->nr_samplers,
183 key->nr_sampler_views));
184 image = lp_llvm_image_soa_create(lp_cs_variant_key_images(key), key->nr_images);
185
186 struct lp_build_loop_state loop_state[4];
187 LLVMValueRef num_x_loop;
188 LLVMValueRef vec_length = lp_build_const_int32(gallivm, cs_type.length);
189 num_x_loop = LLVMBuildAdd(gallivm->builder, block_x_size_arg, vec_length, "");
190 num_x_loop = LLVMBuildSub(gallivm->builder, num_x_loop, lp_build_const_int32(gallivm, 1), "");
191 num_x_loop = LLVMBuildUDiv(gallivm->builder, num_x_loop, vec_length, "");
192 LLVMValueRef partials = LLVMBuildURem(gallivm->builder, block_x_size_arg, vec_length, "");
193
194 LLVMValueRef coro_num_hdls = LLVMBuildMul(gallivm->builder, num_x_loop, block_y_size_arg, "");
195 coro_num_hdls = LLVMBuildMul(gallivm->builder, coro_num_hdls, block_z_size_arg, "");
196
197 /* build a ptr in memory to store all the frames in later. */
198 LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
199 LLVMValueRef coro_mem = LLVMBuildAlloca(gallivm->builder, hdl_ptr_type, "coro_mem");
200 LLVMBuildStore(builder, LLVMConstNull(hdl_ptr_type), coro_mem);
201
202 LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, coro_num_hdls, "coro_hdls");
203
204 unsigned end_coroutine = INT_MAX;
205
206 /*
207 * This is the main coroutine execution loop. It iterates over the dimensions
208 * and calls the coroutine main entrypoint on the first pass, but in subsequent
209 * passes it checks if the coroutine has completed and resumes it if not.
210 */
211 /* take x_width - round up to type.length width */
212 lp_build_loop_begin(&loop_state[3], gallivm,
213 lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
214 lp_build_loop_begin(&loop_state[2], gallivm,
215 lp_build_const_int32(gallivm, 0)); /* z loop */
216 lp_build_loop_begin(&loop_state[1], gallivm,
217 lp_build_const_int32(gallivm, 0)); /* y loop */
218 lp_build_loop_begin(&loop_state[0], gallivm,
219 lp_build_const_int32(gallivm, 0)); /* x loop */
220 {
221 LLVMValueRef args[19];
222 args[0] = context_ptr;
223 args[1] = loop_state[0].counter;
224 args[2] = loop_state[1].counter;
225 args[3] = loop_state[2].counter;
226 args[4] = grid_x_arg;
227 args[5] = grid_y_arg;
228 args[6] = grid_z_arg;
229 args[7] = grid_size_x_arg;
230 args[8] = grid_size_y_arg;
231 args[9] = grid_size_z_arg;
232 args[10] = work_dim_arg;
233 args[11] = thread_data_ptr;
234 args[12] = num_x_loop;
235 args[13] = partials;
236 args[14] = block_x_size_arg;
237 args[15] = block_y_size_arg;
238 args[16] = block_z_size_arg;
239
240 /* idx = (z * (size_x * size_y) + y * size_x + x */
241 LLVMValueRef coro_hdl_idx = LLVMBuildMul(gallivm->builder, loop_state[2].counter,
242 LLVMBuildMul(gallivm->builder, num_x_loop, block_y_size_arg, ""), "");
243 coro_hdl_idx = LLVMBuildAdd(gallivm->builder, coro_hdl_idx,
244 LLVMBuildMul(gallivm->builder, loop_state[1].counter,
245 num_x_loop, ""), "");
246 coro_hdl_idx = LLVMBuildAdd(gallivm->builder, coro_hdl_idx,
247 loop_state[0].counter, "");
248
249 args[17] = coro_hdl_idx;
250
251 args[18] = coro_mem;
252 LLVMValueRef coro_entry = LLVMBuildGEP(gallivm->builder, coro_hdls, &coro_hdl_idx, 1, "");
253
254 LLVMValueRef coro_hdl = LLVMBuildLoad(gallivm->builder, coro_entry, "coro_hdl");
255
256 struct lp_build_if_state ifstate;
257 LLVMValueRef cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, loop_state[3].counter,
258 lp_build_const_int32(gallivm, 0), "");
259 /* first time here - call the coroutine function entry point */
260 lp_build_if(&ifstate, gallivm, cmp);
261 LLVMValueRef coro_ret = LLVMBuildCall(gallivm->builder, coro, args, 19, "");
262 LLVMBuildStore(gallivm->builder, coro_ret, coro_entry);
263 lp_build_else(&ifstate);
264 /* subsequent calls for this invocation - check if done. */
265 LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);
266 struct lp_build_if_state ifstate2;
267 lp_build_if(&ifstate2, gallivm, coro_done);
268 /* if done destroy and force loop exit */
269 lp_build_coro_destroy(gallivm, coro_hdl);
270 lp_build_loop_force_set_counter(&loop_state[3], lp_build_const_int32(gallivm, end_coroutine - 1));
271 lp_build_else(&ifstate2);
272 /* otherwise resume the coroutine */
273 lp_build_coro_resume(gallivm, coro_hdl);
274 lp_build_endif(&ifstate2);
275 lp_build_endif(&ifstate);
276 lp_build_loop_force_reload_counter(&loop_state[3]);
277 }
278 lp_build_loop_end_cond(&loop_state[0],
279 num_x_loop,
280 NULL, LLVMIntUGE);
281 lp_build_loop_end_cond(&loop_state[1],
282 block_y_size_arg,
283 NULL, LLVMIntUGE);
284 lp_build_loop_end_cond(&loop_state[2],
285 block_z_size_arg,
286 NULL, LLVMIntUGE);
287 lp_build_loop_end_cond(&loop_state[3],
288 lp_build_const_int32(gallivm, end_coroutine),
289 NULL, LLVMIntEQ);
290
291 LLVMValueRef coro_mem_ptr = LLVMBuildLoad(builder, coro_mem, "");
292 LLVMBuildCall(gallivm->builder, gallivm->coro_free_hook, &coro_mem_ptr, 1, "");
293
294 LLVMBuildRetVoid(builder);
295
296 /* This is stage (b) - generate the compute shader code inside the coroutine. */
297 LLVMValueRef x_size_arg, y_size_arg, z_size_arg;
298 context_ptr = LLVMGetParam(coro, 0);
299 x_size_arg = LLVMGetParam(coro, 1);
300 y_size_arg = LLVMGetParam(coro, 2);
301 z_size_arg = LLVMGetParam(coro, 3);
302 grid_x_arg = LLVMGetParam(coro, 4);
303 grid_y_arg = LLVMGetParam(coro, 5);
304 grid_z_arg = LLVMGetParam(coro, 6);
305 grid_size_x_arg = LLVMGetParam(coro, 7);
306 grid_size_y_arg = LLVMGetParam(coro, 8);
307 grid_size_z_arg = LLVMGetParam(coro, 9);
308 work_dim_arg = LLVMGetParam(coro, 10);
309 thread_data_ptr = LLVMGetParam(coro, 11);
310 num_x_loop = LLVMGetParam(coro, 12);
311 partials = LLVMGetParam(coro, 13);
312 block_x_size_arg = LLVMGetParam(coro, 14);
313 block_y_size_arg = LLVMGetParam(coro, 15);
314 block_z_size_arg = LLVMGetParam(coro, 16);
315 LLVMValueRef coro_idx = LLVMGetParam(coro, 17);
316 coro_mem = LLVMGetParam(coro, 18);
317 block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "entry");
318 LLVMPositionBuilderAtEnd(builder, block);
319 {
320 LLVMValueRef consts_ptr, num_consts_ptr;
321 LLVMValueRef ssbo_ptr, num_ssbo_ptr;
322 LLVMValueRef shared_ptr;
323 LLVMValueRef kernel_args_ptr;
324 struct lp_build_mask_context mask;
325 struct lp_bld_tgsi_system_values system_values;
326
327 memset(&system_values, 0, sizeof(system_values));
328 consts_ptr = lp_jit_cs_context_constants(gallivm, context_ptr);
329 num_consts_ptr = lp_jit_cs_context_num_constants(gallivm, context_ptr);
330 ssbo_ptr = lp_jit_cs_context_ssbos(gallivm, context_ptr);
331 num_ssbo_ptr = lp_jit_cs_context_num_ssbos(gallivm, context_ptr);
332 kernel_args_ptr = lp_jit_cs_context_kernel_args(gallivm, context_ptr);
333
334 shared_ptr = lp_jit_cs_thread_data_shared(gallivm, thread_data_ptr);
335
336 LLVMValueRef coro_num_hdls = LLVMBuildMul(gallivm->builder, num_x_loop, block_y_size_arg, "");
337 coro_num_hdls = LLVMBuildMul(gallivm->builder, coro_num_hdls, block_z_size_arg, "");
338
339 /* these are coroutine entrypoint necessities */
340 LLVMValueRef coro_id = lp_build_coro_id(gallivm);
341 LLVMValueRef coro_entry = lp_build_coro_alloc_mem_array(gallivm, coro_mem, coro_idx, coro_num_hdls);
342
343 LLVMValueRef alloced_ptr = LLVMBuildLoad(gallivm->builder, coro_mem, "");
344 alloced_ptr = LLVMBuildGEP(gallivm->builder, alloced_ptr, &coro_entry, 1, "");
345 LLVMValueRef coro_hdl = lp_build_coro_begin(gallivm, coro_id, alloced_ptr);
346 LLVMValueRef has_partials = LLVMBuildICmp(gallivm->builder, LLVMIntNE, partials, lp_build_const_int32(gallivm, 0), "");
347 LLVMValueRef tid_vals[3];
348 LLVMValueRef tids_x[LP_MAX_VECTOR_LENGTH], tids_y[LP_MAX_VECTOR_LENGTH], tids_z[LP_MAX_VECTOR_LENGTH];
349 LLVMValueRef base_val = LLVMBuildMul(gallivm->builder, x_size_arg, vec_length, "");
350 for (i = 0; i < cs_type.length; i++) {
351 tids_x[i] = LLVMBuildAdd(gallivm->builder, base_val, lp_build_const_int32(gallivm, i), "");
352 tids_y[i] = y_size_arg;
353 tids_z[i] = z_size_arg;
354 }
355 tid_vals[0] = lp_build_gather_values(gallivm, tids_x, cs_type.length);
356 tid_vals[1] = lp_build_gather_values(gallivm, tids_y, cs_type.length);
357 tid_vals[2] = lp_build_gather_values(gallivm, tids_z, cs_type.length);
358 system_values.thread_id = LLVMGetUndef(LLVMArrayType(LLVMVectorType(int32_type, cs_type.length), 3));
359 for (i = 0; i < 3; i++)
360 system_values.thread_id = LLVMBuildInsertValue(builder, system_values.thread_id, tid_vals[i], i, "");
361
362 LLVMValueRef gtids[3] = { grid_x_arg, grid_y_arg, grid_z_arg };
363 system_values.block_id = LLVMGetUndef(LLVMVectorType(int32_type, 3));
364 for (i = 0; i < 3; i++)
365 system_values.block_id = LLVMBuildInsertElement(builder, system_values.block_id, gtids[i], lp_build_const_int32(gallivm, i), "");
366
367 LLVMValueRef gstids[3] = { grid_size_x_arg, grid_size_y_arg, grid_size_z_arg };
368 system_values.grid_size = LLVMGetUndef(LLVMVectorType(int32_type, 3));
369 for (i = 0; i < 3; i++)
370 system_values.grid_size = LLVMBuildInsertElement(builder, system_values.grid_size, gstids[i], lp_build_const_int32(gallivm, i), "");
371
372 system_values.work_dim = work_dim_arg;
373
374 /* subgroup_id = ((z * block_size_x * block_size_y) + (y * block_size_x) + x) / subgroup_size
375 *
376 * this breaks if z or y is zero, so distribute the division to preserve ids
377 *
378 * subgroup_id = ((z * block_size_x * block_size_y) / subgroup_size) + ((y * block_size_x) / subgroup_size) + (x / subgroup_size)
379 *
380 * except "x" is pre-divided here
381 *
382 * subgroup_id = ((z * block_size_x * block_size_y) / subgroup_size) + ((y * block_size_x) / subgroup_size) + x
383 */
384 LLVMValueRef subgroup_id = LLVMBuildUDiv(builder,
385 LLVMBuildMul(gallivm->builder, z_size_arg, LLVMBuildMul(gallivm->builder, block_x_size_arg, block_y_size_arg, ""), ""),
386 vec_length, "");
387 subgroup_id = LLVMBuildAdd(gallivm->builder,
388 subgroup_id,
389 LLVMBuildUDiv(builder, LLVMBuildMul(gallivm->builder, y_size_arg, block_x_size_arg, ""), vec_length, ""),
390 "");
391 subgroup_id = LLVMBuildAdd(gallivm->builder, subgroup_id, x_size_arg, "");
392 system_values.subgroup_id = subgroup_id;
393 LLVMValueRef num_subgroups = LLVMBuildUDiv(builder,
394 LLVMBuildMul(builder, block_x_size_arg,
395 LLVMBuildMul(builder, block_y_size_arg, block_z_size_arg, ""), ""),
396 vec_length, "");
397 LLVMValueRef subgroup_cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, num_subgroups, lp_build_const_int32(gallivm, 0), "");
398 system_values.num_subgroups = LLVMBuildSelect(builder, subgroup_cmp, lp_build_const_int32(gallivm, 1), num_subgroups, "");
399
400 LLVMValueRef bsize[3] = { block_x_size_arg, block_y_size_arg, block_z_size_arg };
401 system_values.block_size = LLVMGetUndef(LLVMVectorType(int32_type, 3));
402 for (i = 0; i < 3; i++)
403 system_values.block_size = LLVMBuildInsertElement(builder, system_values.block_size, bsize[i], lp_build_const_int32(gallivm, i), "");
404
405 LLVMValueRef last_x_loop = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, x_size_arg, LLVMBuildSub(gallivm->builder, num_x_loop, lp_build_const_int32(gallivm, 1), ""), "");
406 LLVMValueRef use_partial_mask = LLVMBuildAnd(gallivm->builder, last_x_loop, has_partials, "");
407 struct lp_build_if_state if_state;
408 LLVMValueRef mask_val = lp_build_alloca(gallivm, LLVMVectorType(int32_type, cs_type.length), "mask");
409 LLVMValueRef full_mask_val = lp_build_const_int_vec(gallivm, cs_type, ~0);
410 LLVMBuildStore(gallivm->builder, full_mask_val, mask_val);
411
412 lp_build_if(&if_state, gallivm, use_partial_mask);
413 struct lp_build_loop_state mask_loop_state;
414 lp_build_loop_begin(&mask_loop_state, gallivm, partials);
415 LLVMValueRef tmask_val = LLVMBuildLoad(gallivm->builder, mask_val, "");
416 tmask_val = LLVMBuildInsertElement(gallivm->builder, tmask_val, lp_build_const_int32(gallivm, 0), mask_loop_state.counter, "");
417 LLVMBuildStore(gallivm->builder, tmask_val, mask_val);
418 lp_build_loop_end_cond(&mask_loop_state, vec_length, NULL, LLVMIntUGE);
419 lp_build_endif(&if_state);
420
421 mask_val = LLVMBuildLoad(gallivm->builder, mask_val, "");
422 lp_build_mask_begin(&mask, gallivm, cs_type, mask_val);
423
424 struct lp_build_coro_suspend_info coro_info;
425
426 LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "suspend");
427 LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "cleanup");
428
429 coro_info.suspend = sus_block;
430 coro_info.cleanup = clean_block;
431
432 struct lp_build_tgsi_params params;
433 memset(¶ms, 0, sizeof(params));
434
435 params.type = cs_type;
436 params.mask = &mask;
437 params.consts_ptr = consts_ptr;
438 params.const_sizes_ptr = num_consts_ptr;
439 params.system_values = &system_values;
440 params.context_ptr = context_ptr;
441 params.sampler = sampler;
442 params.info = &shader->info.base;
443 params.ssbo_ptr = ssbo_ptr;
444 params.ssbo_sizes_ptr = num_ssbo_ptr;
445 params.image = image;
446 params.shared_ptr = shared_ptr;
447 params.coro = &coro_info;
448 params.kernel_args = kernel_args_ptr;
449 params.aniso_filter_table = lp_jit_cs_context_aniso_filter_table(gallivm, context_ptr);
450
451 if (shader->base.type == PIPE_SHADER_IR_TGSI)
452 lp_build_tgsi_soa(gallivm, shader->base.tokens, ¶ms, NULL);
453 else
454 lp_build_nir_soa(gallivm, shader->base.ir.nir, ¶ms,
455 NULL);
456
457 mask_val = lp_build_mask_end(&mask);
458
459 lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);
460 LLVMPositionBuilderAtEnd(builder, clean_block);
461
462 LLVMBuildBr(builder, sus_block);
463 LLVMPositionBuilderAtEnd(builder, sus_block);
464
465 lp_build_coro_end(gallivm, coro_hdl);
466 LLVMBuildRet(builder, coro_hdl);
467 }
468
469 sampler->destroy(sampler);
470 image->destroy(image);
471
472 gallivm_verify_function(gallivm, coro);
473 gallivm_verify_function(gallivm, function);
474 }
475
476 static void *
llvmpipe_create_compute_state(struct pipe_context * pipe,const struct pipe_compute_state * templ)477 llvmpipe_create_compute_state(struct pipe_context *pipe,
478 const struct pipe_compute_state *templ)
479 {
480 struct lp_compute_shader *shader;
481 int nr_samplers, nr_sampler_views;
482
483 shader = CALLOC_STRUCT(lp_compute_shader);
484 if (!shader)
485 return NULL;
486
487 shader->no = cs_no++;
488
489 shader->base.type = templ->ir_type;
490 shader->req_local_mem = templ->req_local_mem;
491 if (templ->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) {
492 struct blob_reader reader;
493 const struct pipe_binary_program_header *hdr = templ->prog;
494
495 blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
496 shader->base.ir.nir = nir_deserialize(NULL, pipe->screen->get_compiler_options(pipe->screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE), &reader);
497 shader->base.type = PIPE_SHADER_IR_NIR;
498
499 pipe->screen->finalize_nir(pipe->screen, shader->base.ir.nir);
500 shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size;
501 shader->zero_initialize_shared_memory = ((struct nir_shader *)shader->base.ir.nir)->info.zero_initialize_shared_memory;
502 } else if (templ->ir_type == PIPE_SHADER_IR_NIR) {
503 shader->base.ir.nir = (struct nir_shader *)templ->prog;
504 shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size;
505 shader->zero_initialize_shared_memory = ((struct nir_shader *)shader->base.ir.nir)->info.zero_initialize_shared_memory;
506 }
507 if (shader->base.type == PIPE_SHADER_IR_TGSI) {
508 /* get/save the summary info for this shader */
509 lp_build_tgsi_info(templ->prog, &shader->info);
510
511 /* we need to keep a local copy of the tokens */
512 shader->base.tokens = tgsi_dup_tokens(templ->prog);
513 } else {
514 nir_tgsi_scan_shader(shader->base.ir.nir, &shader->info.base, false);
515 }
516
517 list_inithead(&shader->variants.list);
518
519 nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
520 nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
521 int nr_images = shader->info.base.file_max[TGSI_FILE_IMAGE] + 1;
522 shader->variant_key_size = lp_cs_variant_key_size(MAX2(nr_samplers, nr_sampler_views), nr_images);
523
524 return shader;
525 }
526
527 static void
llvmpipe_bind_compute_state(struct pipe_context * pipe,void * cs)528 llvmpipe_bind_compute_state(struct pipe_context *pipe,
529 void *cs)
530 {
531 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
532
533 if (llvmpipe->cs == cs)
534 return;
535
536 llvmpipe->cs = (struct lp_compute_shader *)cs;
537 llvmpipe->cs_dirty |= LP_CSNEW_CS;
538 }
539
540 /**
541 * Remove shader variant from two lists: the shader's variant list
542 * and the context's variant list.
543 */
544 static void
llvmpipe_remove_cs_shader_variant(struct llvmpipe_context * lp,struct lp_compute_shader_variant * variant)545 llvmpipe_remove_cs_shader_variant(struct llvmpipe_context *lp,
546 struct lp_compute_shader_variant *variant)
547 {
548 if ((LP_DEBUG & DEBUG_CS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
549 debug_printf("llvmpipe: del cs #%u var %u v created %u v cached %u "
550 "v total cached %u inst %u total inst %u\n",
551 variant->shader->no, variant->no,
552 variant->shader->variants_created,
553 variant->shader->variants_cached,
554 lp->nr_cs_variants, variant->nr_instrs, lp->nr_cs_instrs);
555 }
556
557 gallivm_destroy(variant->gallivm);
558
559 /* remove from shader's list */
560 list_del(&variant->list_item_local.list);
561 variant->shader->variants_cached--;
562
563 /* remove from context's list */
564 list_del(&variant->list_item_global.list);
565 lp->nr_cs_variants--;
566 lp->nr_cs_instrs -= variant->nr_instrs;
567
568 FREE(variant);
569 }
570
571 static void
llvmpipe_delete_compute_state(struct pipe_context * pipe,void * cs)572 llvmpipe_delete_compute_state(struct pipe_context *pipe,
573 void *cs)
574 {
575 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
576 struct lp_compute_shader *shader = cs;
577 struct lp_cs_variant_list_item *li, *next;
578
579 if (llvmpipe->cs == cs)
580 llvmpipe->cs = NULL;
581 for (unsigned i = 0; i < shader->max_global_buffers; i++)
582 pipe_resource_reference(&shader->global_buffers[i], NULL);
583 FREE(shader->global_buffers);
584
585 /* Delete all the variants */
586 LIST_FOR_EACH_ENTRY_SAFE(li, next, &shader->variants.list, list) {
587 llvmpipe_remove_cs_shader_variant(llvmpipe, li->base);
588 }
589 if (shader->base.ir.nir)
590 ralloc_free(shader->base.ir.nir);
591 tgsi_free_tokens(shader->base.tokens);
592 FREE(shader);
593 }
594
595 static struct lp_compute_shader_variant_key *
make_variant_key(struct llvmpipe_context * lp,struct lp_compute_shader * shader,char * store)596 make_variant_key(struct llvmpipe_context *lp,
597 struct lp_compute_shader *shader,
598 char *store)
599 {
600 int i;
601 struct lp_compute_shader_variant_key *key;
602 key = (struct lp_compute_shader_variant_key *)store;
603 memset(key, 0, sizeof(*key));
604
605 /* This value will be the same for all the variants of a given shader:
606 */
607 key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
608
609 if (shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1)
610 key->nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
611 struct lp_sampler_static_state *cs_sampler;
612
613 cs_sampler = lp_cs_variant_key_samplers(key);
614
615 memset(cs_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *cs_sampler);
616 for(i = 0; i < key->nr_samplers; ++i) {
617 if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
618 lp_sampler_static_sampler_state(&cs_sampler[i].sampler_state,
619 lp->samplers[PIPE_SHADER_COMPUTE][i]);
620 }
621 }
622
623 /*
624 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
625 * are dx10-style? Can't really have mixed opcodes, at least not
626 * if we want to skip the holes here (without rescanning tgsi).
627 */
628 if (shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
629 for(i = 0; i < key->nr_sampler_views; ++i) {
630 /*
631 * Note sview may exceed what's representable by file_mask.
632 * This will still work, the only downside is that not actually
633 * used views may be included in the shader key.
634 */
635 if((shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) || i > 31) {
636 lp_sampler_static_texture_state(&cs_sampler[i].texture_state,
637 lp->sampler_views[PIPE_SHADER_COMPUTE][i]);
638 }
639 }
640 }
641 else {
642 key->nr_sampler_views = key->nr_samplers;
643 for(i = 0; i < key->nr_sampler_views; ++i) {
644 if((shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) || i > 31) {
645 lp_sampler_static_texture_state(&cs_sampler[i].texture_state,
646 lp->sampler_views[PIPE_SHADER_COMPUTE][i]);
647 }
648 }
649 }
650
651 struct lp_image_static_state *lp_image;
652 lp_image = lp_cs_variant_key_images(key);
653 key->nr_images = shader->info.base.file_max[TGSI_FILE_IMAGE] + 1;
654 if (key->nr_images)
655 memset(lp_image, 0,
656 key->nr_images * sizeof *lp_image);
657 for (i = 0; i < key->nr_images; ++i) {
658 if ((shader->info.base.file_mask[TGSI_FILE_IMAGE] & (1 << i)) || i > 31) {
659 lp_sampler_static_texture_state_image(&lp_image[i].image_state,
660 &lp->images[PIPE_SHADER_COMPUTE][i]);
661 }
662 }
663 return key;
664 }
665
666 static void
dump_cs_variant_key(const struct lp_compute_shader_variant_key * key)667 dump_cs_variant_key(const struct lp_compute_shader_variant_key *key)
668 {
669 int i;
670 debug_printf("cs variant %p:\n", (void *) key);
671
672 for (i = 0; i < key->nr_samplers; ++i) {
673 const struct lp_sampler_static_state *samplers = lp_cs_variant_key_samplers(key);
674 const struct lp_static_sampler_state *sampler = &samplers[i].sampler_state;
675 debug_printf("sampler[%u] = \n", i);
676 debug_printf(" .wrap = %s %s %s\n",
677 util_str_tex_wrap(sampler->wrap_s, TRUE),
678 util_str_tex_wrap(sampler->wrap_t, TRUE),
679 util_str_tex_wrap(sampler->wrap_r, TRUE));
680 debug_printf(" .min_img_filter = %s\n",
681 util_str_tex_filter(sampler->min_img_filter, TRUE));
682 debug_printf(" .min_mip_filter = %s\n",
683 util_str_tex_mipfilter(sampler->min_mip_filter, TRUE));
684 debug_printf(" .mag_img_filter = %s\n",
685 util_str_tex_filter(sampler->mag_img_filter, TRUE));
686 if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE)
687 debug_printf(" .compare_func = %s\n", util_str_func(sampler->compare_func, TRUE));
688 debug_printf(" .normalized_coords = %u\n", sampler->normalized_coords);
689 debug_printf(" .min_max_lod_equal = %u\n", sampler->min_max_lod_equal);
690 debug_printf(" .lod_bias_non_zero = %u\n", sampler->lod_bias_non_zero);
691 debug_printf(" .apply_min_lod = %u\n", sampler->apply_min_lod);
692 debug_printf(" .apply_max_lod = %u\n", sampler->apply_max_lod);
693 debug_printf(" .aniso = %u\n", sampler->aniso);
694 }
695 for (i = 0; i < key->nr_sampler_views; ++i) {
696 const struct lp_sampler_static_state *samplers = lp_cs_variant_key_samplers(key);
697 const struct lp_static_texture_state *texture = &samplers[i].texture_state;
698 debug_printf("texture[%u] = \n", i);
699 debug_printf(" .format = %s\n",
700 util_format_name(texture->format));
701 debug_printf(" .target = %s\n",
702 util_str_tex_target(texture->target, TRUE));
703 debug_printf(" .level_zero_only = %u\n",
704 texture->level_zero_only);
705 debug_printf(" .pot = %u %u %u\n",
706 texture->pot_width,
707 texture->pot_height,
708 texture->pot_depth);
709 }
710 struct lp_image_static_state *images = lp_cs_variant_key_images(key);
711 for (i = 0; i < key->nr_images; ++i) {
712 const struct lp_static_texture_state *image = &images[i].image_state;
713 debug_printf("image[%u] = \n", i);
714 debug_printf(" .format = %s\n",
715 util_format_name(image->format));
716 debug_printf(" .target = %s\n",
717 util_str_tex_target(image->target, TRUE));
718 debug_printf(" .level_zero_only = %u\n",
719 image->level_zero_only);
720 debug_printf(" .pot = %u %u %u\n",
721 image->pot_width,
722 image->pot_height,
723 image->pot_depth);
724 }
725 }
726
727 static void
lp_debug_cs_variant(const struct lp_compute_shader_variant * variant)728 lp_debug_cs_variant(const struct lp_compute_shader_variant *variant)
729 {
730 debug_printf("llvmpipe: Compute shader #%u variant #%u:\n",
731 variant->shader->no, variant->no);
732 if (variant->shader->base.type == PIPE_SHADER_IR_TGSI)
733 tgsi_dump(variant->shader->base.tokens, 0);
734 else
735 nir_print_shader(variant->shader->base.ir.nir, stderr);
736 dump_cs_variant_key(&variant->key);
737 debug_printf("\n");
738 }
739
740 static void
lp_cs_get_ir_cache_key(struct lp_compute_shader_variant * variant,unsigned char ir_sha1_cache_key[20])741 lp_cs_get_ir_cache_key(struct lp_compute_shader_variant *variant,
742 unsigned char ir_sha1_cache_key[20])
743 {
744 struct blob blob = { 0 };
745 unsigned ir_size;
746 void *ir_binary;
747
748 blob_init(&blob);
749 nir_serialize(&blob, variant->shader->base.ir.nir, true);
750 ir_binary = blob.data;
751 ir_size = blob.size;
752
753 struct mesa_sha1 ctx;
754 _mesa_sha1_init(&ctx);
755 _mesa_sha1_update(&ctx, &variant->key, variant->shader->variant_key_size);
756 _mesa_sha1_update(&ctx, ir_binary, ir_size);
757 _mesa_sha1_final(&ctx, ir_sha1_cache_key);
758
759 blob_finish(&blob);
760 }
761
762 static struct lp_compute_shader_variant *
generate_variant(struct llvmpipe_context * lp,struct lp_compute_shader * shader,const struct lp_compute_shader_variant_key * key)763 generate_variant(struct llvmpipe_context *lp,
764 struct lp_compute_shader *shader,
765 const struct lp_compute_shader_variant_key *key)
766 {
767 struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
768 struct lp_compute_shader_variant *variant;
769 char module_name[64];
770 unsigned char ir_sha1_cache_key[20];
771 struct lp_cached_code cached = { 0 };
772 bool needs_caching = false;
773 variant = MALLOC(sizeof *variant + shader->variant_key_size - sizeof variant->key);
774 if (!variant)
775 return NULL;
776
777 memset(variant, 0, sizeof(*variant));
778 snprintf(module_name, sizeof(module_name), "cs%u_variant%u",
779 shader->no, shader->variants_created);
780
781 variant->shader = shader;
782 memcpy(&variant->key, key, shader->variant_key_size);
783
784 if (shader->base.ir.nir) {
785 lp_cs_get_ir_cache_key(variant, ir_sha1_cache_key);
786
787 lp_disk_cache_find_shader(screen, &cached, ir_sha1_cache_key);
788 if (!cached.data_size)
789 needs_caching = true;
790 }
791 variant->gallivm = gallivm_create(module_name, lp->context, &cached);
792 if (!variant->gallivm) {
793 FREE(variant);
794 return NULL;
795 }
796
797 variant->list_item_global.base = variant;
798 variant->list_item_local.base = variant;
799 variant->no = shader->variants_created++;
800
801
802
803 if ((LP_DEBUG & DEBUG_CS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
804 lp_debug_cs_variant(variant);
805 }
806
807 lp_jit_init_cs_types(variant);
808
809 generate_compute(lp, shader, variant);
810
811 gallivm_compile_module(variant->gallivm);
812
813 variant->nr_instrs += lp_build_count_ir_module(variant->gallivm->module);
814
815 variant->jit_function = (lp_jit_cs_func)gallivm_jit_function(variant->gallivm, variant->function);
816
817 if (needs_caching) {
818 lp_disk_cache_insert_shader(screen, &cached, ir_sha1_cache_key);
819 }
820 gallivm_free_ir(variant->gallivm);
821 return variant;
822 }
823
824 static void
lp_cs_ctx_set_cs_variant(struct lp_cs_context * csctx,struct lp_compute_shader_variant * variant)825 lp_cs_ctx_set_cs_variant( struct lp_cs_context *csctx,
826 struct lp_compute_shader_variant *variant)
827 {
828 csctx->cs.current.variant = variant;
829 }
830
831 static void
llvmpipe_update_cs(struct llvmpipe_context * lp)832 llvmpipe_update_cs(struct llvmpipe_context *lp)
833 {
834 struct lp_compute_shader *shader = lp->cs;
835
836 struct lp_compute_shader_variant_key *key;
837 struct lp_compute_shader_variant *variant = NULL;
838 struct lp_cs_variant_list_item *li;
839 char store[LP_CS_MAX_VARIANT_KEY_SIZE];
840
841 key = make_variant_key(lp, shader, store);
842
843 /* Search the variants for one which matches the key */
844 LIST_FOR_EACH_ENTRY(li, &shader->variants.list, list) {
845 if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
846 variant = li->base;
847 break;
848 }
849 }
850
851 if (variant) {
852 /* Move this variant to the head of the list to implement LRU
853 * deletion of shader's when we have too many.
854 */
855 list_move_to(&variant->list_item_global.list, &lp->cs_variants_list.list);
856 }
857 else {
858 /* variant not found, create it now */
859 int64_t t0, t1, dt;
860 unsigned i;
861 unsigned variants_to_cull;
862
863 if (LP_DEBUG & DEBUG_CS) {
864 debug_printf("%u variants,\t%u instrs,\t%u instrs/variant\n",
865 lp->nr_cs_variants,
866 lp->nr_cs_instrs,
867 lp->nr_cs_variants ? lp->nr_cs_instrs / lp->nr_cs_variants : 0);
868 }
869
870 /* First, check if we've exceeded the max number of shader variants.
871 * If so, free 6.25% of them (the least recently used ones).
872 */
873 variants_to_cull = lp->nr_cs_variants >= LP_MAX_SHADER_VARIANTS ? LP_MAX_SHADER_VARIANTS / 16 : 0;
874
875 if (variants_to_cull ||
876 lp->nr_cs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) {
877 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
878 debug_printf("Evicting CS: %u cs variants,\t%u total variants,"
879 "\t%u instrs,\t%u instrs/variant\n",
880 shader->variants_cached,
881 lp->nr_cs_variants, lp->nr_cs_instrs,
882 lp->nr_cs_instrs / lp->nr_cs_variants);
883 }
884
885 /*
886 * We need to re-check lp->nr_cs_variants because an arbitrarily large
887 * number of shader variants (potentially all of them) could be
888 * pending for destruction on flush.
889 */
890
891 for (i = 0; i < variants_to_cull || lp->nr_cs_instrs >= LP_MAX_SHADER_INSTRUCTIONS; i++) {
892 struct lp_cs_variant_list_item *item;
893 if (list_is_empty(&lp->cs_variants_list.list)) {
894 break;
895 }
896 item = list_last_entry(&lp->cs_variants_list.list,
897 struct lp_cs_variant_list_item, list);
898 assert(item);
899 assert(item->base);
900 llvmpipe_remove_cs_shader_variant(lp, item->base);
901 }
902 }
903 /*
904 * Generate the new variant.
905 */
906 t0 = os_time_get();
907 variant = generate_variant(lp, shader, key);
908 t1 = os_time_get();
909 dt = t1 - t0;
910 LP_COUNT_ADD(llvm_compile_time, dt);
911 LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */
912
913 /* Put the new variant into the list */
914 if (variant) {
915 list_add(&variant->list_item_local.list, &shader->variants.list);
916 list_add(&variant->list_item_global.list, &lp->cs_variants_list.list);
917 lp->nr_cs_variants++;
918 lp->nr_cs_instrs += variant->nr_instrs;
919 shader->variants_cached++;
920 }
921 }
922 /* Bind this variant */
923 lp_cs_ctx_set_cs_variant(lp->csctx, variant);
924 }
925
926 /**
927 * Called during state validation when LP_CSNEW_SAMPLER_VIEW is set.
928 */
929 static void
lp_csctx_set_sampler_views(struct lp_cs_context * csctx,unsigned num,struct pipe_sampler_view ** views)930 lp_csctx_set_sampler_views(struct lp_cs_context *csctx,
931 unsigned num,
932 struct pipe_sampler_view **views)
933 {
934 unsigned i, max_tex_num;
935
936 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
937
938 assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
939
940 max_tex_num = MAX2(num, csctx->cs.current_tex_num);
941
942 for (i = 0; i < max_tex_num; i++) {
943 struct pipe_sampler_view *view = i < num ? views[i] : NULL;
944
945 /* We are going to overwrite/unref the current texture further below. If
946 * set, make sure to unmap its resource to avoid leaking previous
947 * mapping. */
948 if (csctx->cs.current_tex[i])
949 llvmpipe_resource_unmap(csctx->cs.current_tex[i], 0, 0);
950
951 if (view) {
952 struct pipe_resource *res = view->texture;
953 struct llvmpipe_resource *lp_tex = llvmpipe_resource(res);
954 struct lp_jit_texture *jit_tex;
955 jit_tex = &csctx->cs.current.jit_context.textures[i];
956
957 /* We're referencing the texture's internal data, so save a
958 * reference to it.
959 */
960 pipe_resource_reference(&csctx->cs.current_tex[i], res);
961
962 if (!lp_tex->dt) {
963 /* regular texture - csctx array of mipmap level offsets */
964 int j;
965 unsigned first_level = 0;
966 unsigned last_level = 0;
967
968 if (llvmpipe_resource_is_texture(res)) {
969 first_level = view->u.tex.first_level;
970 last_level = view->u.tex.last_level;
971 assert(first_level <= last_level);
972 assert(last_level <= res->last_level);
973 jit_tex->base = lp_tex->tex_data;
974 }
975 else {
976 jit_tex->base = lp_tex->data;
977 }
978 if (LP_PERF & PERF_TEX_MEM) {
979 /* use dummy tile memory */
980 jit_tex->base = lp_dummy_tile;
981 jit_tex->width = TILE_SIZE/8;
982 jit_tex->height = TILE_SIZE/8;
983 jit_tex->depth = 1;
984 jit_tex->first_level = 0;
985 jit_tex->last_level = 0;
986 jit_tex->mip_offsets[0] = 0;
987 jit_tex->row_stride[0] = 0;
988 jit_tex->img_stride[0] = 0;
989 jit_tex->num_samples = 0;
990 jit_tex->sample_stride = 0;
991 }
992 else {
993 jit_tex->width = res->width0;
994 jit_tex->height = res->height0;
995 jit_tex->depth = res->depth0;
996 jit_tex->first_level = first_level;
997 jit_tex->last_level = last_level;
998 jit_tex->num_samples = res->nr_samples;
999 jit_tex->sample_stride = 0;
1000
1001 if (llvmpipe_resource_is_texture(res)) {
1002 for (j = first_level; j <= last_level; j++) {
1003 jit_tex->mip_offsets[j] = lp_tex->mip_offsets[j];
1004 jit_tex->row_stride[j] = lp_tex->row_stride[j];
1005 jit_tex->img_stride[j] = lp_tex->img_stride[j];
1006 }
1007 jit_tex->sample_stride = lp_tex->sample_stride;
1008
1009 if (res->target == PIPE_TEXTURE_1D_ARRAY ||
1010 res->target == PIPE_TEXTURE_2D_ARRAY ||
1011 res->target == PIPE_TEXTURE_CUBE ||
1012 res->target == PIPE_TEXTURE_CUBE_ARRAY ||
1013 (res->target == PIPE_TEXTURE_3D && view->target == PIPE_TEXTURE_2D)) {
1014 /*
1015 * For array textures, we don't have first_layer, instead
1016 * adjust last_layer (stored as depth) plus the mip level offsets
1017 * (as we have mip-first layout can't just adjust base ptr).
1018 * XXX For mip levels, could do something similar.
1019 */
1020 jit_tex->depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
1021 for (j = first_level; j <= last_level; j++) {
1022 jit_tex->mip_offsets[j] += view->u.tex.first_layer *
1023 lp_tex->img_stride[j];
1024 }
1025 if (view->target == PIPE_TEXTURE_CUBE ||
1026 view->target == PIPE_TEXTURE_CUBE_ARRAY) {
1027 assert(jit_tex->depth % 6 == 0);
1028 }
1029 assert(view->u.tex.first_layer <= view->u.tex.last_layer);
1030 if (res->target == PIPE_TEXTURE_3D)
1031 assert(view->u.tex.last_layer < res->depth0);
1032 else
1033 assert(view->u.tex.last_layer < res->array_size);
1034 }
1035 }
1036 else {
1037 /*
1038 * For buffers, we don't have "offset", instead adjust
1039 * the size (stored as width) plus the base pointer.
1040 */
1041 unsigned view_blocksize = util_format_get_blocksize(view->format);
1042 /* probably don't really need to fill that out */
1043 jit_tex->mip_offsets[0] = 0;
1044 jit_tex->row_stride[0] = 0;
1045 jit_tex->img_stride[0] = 0;
1046
1047 /* everything specified in number of elements here. */
1048 jit_tex->width = view->u.buf.size / view_blocksize;
1049 jit_tex->base = (uint8_t *)jit_tex->base + view->u.buf.offset;
1050 /* XXX Unsure if we need to sanitize parameters? */
1051 assert(view->u.buf.offset + view->u.buf.size <= res->width0);
1052 }
1053 }
1054 }
1055 else {
1056 /* display target texture/surface */
1057 jit_tex->base = llvmpipe_resource_map(res, 0, 0, LP_TEX_USAGE_READ);
1058 jit_tex->row_stride[0] = lp_tex->row_stride[0];
1059 jit_tex->img_stride[0] = lp_tex->img_stride[0];
1060 jit_tex->mip_offsets[0] = 0;
1061 jit_tex->width = res->width0;
1062 jit_tex->height = res->height0;
1063 jit_tex->depth = res->depth0;
1064 jit_tex->first_level = jit_tex->last_level = 0;
1065 jit_tex->num_samples = res->nr_samples;
1066 jit_tex->sample_stride = 0;
1067 assert(jit_tex->base);
1068 }
1069 }
1070 else {
1071 pipe_resource_reference(&csctx->cs.current_tex[i], NULL);
1072 }
1073 }
1074 csctx->cs.current_tex_num = num;
1075 }
1076
1077
1078 /**
1079 * Called during state validation when LP_NEW_SAMPLER is set.
1080 */
1081 static void
lp_csctx_set_sampler_state(struct lp_cs_context * csctx,unsigned num,struct pipe_sampler_state ** samplers)1082 lp_csctx_set_sampler_state(struct lp_cs_context *csctx,
1083 unsigned num,
1084 struct pipe_sampler_state **samplers)
1085 {
1086 unsigned i;
1087
1088 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
1089
1090 assert(num <= PIPE_MAX_SAMPLERS);
1091
1092 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
1093 const struct pipe_sampler_state *sampler = i < num ? samplers[i] : NULL;
1094
1095 if (sampler) {
1096 struct lp_jit_sampler *jit_sam;
1097 jit_sam = &csctx->cs.current.jit_context.samplers[i];
1098
1099 jit_sam->min_lod = sampler->min_lod;
1100 jit_sam->max_lod = sampler->max_lod;
1101 jit_sam->lod_bias = sampler->lod_bias;
1102 jit_sam->max_aniso = sampler->max_anisotropy;
1103 COPY_4V(jit_sam->border_color, sampler->border_color.f);
1104 }
1105 }
1106 }
1107
1108 static void
lp_csctx_set_cs_constants(struct lp_cs_context * csctx,unsigned num,struct pipe_constant_buffer * buffers)1109 lp_csctx_set_cs_constants(struct lp_cs_context *csctx,
1110 unsigned num,
1111 struct pipe_constant_buffer *buffers)
1112 {
1113 unsigned i;
1114
1115 LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffers);
1116
1117 assert(num <= ARRAY_SIZE(csctx->constants));
1118
1119 for (i = 0; i < num; ++i) {
1120 util_copy_constant_buffer(&csctx->constants[i].current, &buffers[i], false);
1121 }
1122 for (; i < ARRAY_SIZE(csctx->constants); i++) {
1123 util_copy_constant_buffer(&csctx->constants[i].current, NULL, false);
1124 }
1125 }
1126
1127 static void
lp_csctx_set_cs_ssbos(struct lp_cs_context * csctx,unsigned num,struct pipe_shader_buffer * buffers)1128 lp_csctx_set_cs_ssbos(struct lp_cs_context *csctx,
1129 unsigned num,
1130 struct pipe_shader_buffer *buffers)
1131 {
1132 int i;
1133 LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *)buffers);
1134
1135 assert (num <= ARRAY_SIZE(csctx->ssbos));
1136
1137 for (i = 0; i < num; ++i) {
1138 util_copy_shader_buffer(&csctx->ssbos[i].current, &buffers[i]);
1139 }
1140 for (; i < ARRAY_SIZE(csctx->ssbos); i++) {
1141 util_copy_shader_buffer(&csctx->ssbos[i].current, NULL);
1142 }
1143 }
1144
1145 static void
lp_csctx_set_cs_images(struct lp_cs_context * csctx,unsigned num,struct pipe_image_view * images)1146 lp_csctx_set_cs_images(struct lp_cs_context *csctx,
1147 unsigned num,
1148 struct pipe_image_view *images)
1149 {
1150 unsigned i;
1151
1152 LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) images);
1153
1154 assert(num <= ARRAY_SIZE(csctx->images));
1155
1156 for (i = 0; i < num; ++i) {
1157 struct pipe_image_view *image = &images[i];
1158 util_copy_image_view(&csctx->images[i].current, &images[i]);
1159
1160 struct pipe_resource *res = image->resource;
1161 struct llvmpipe_resource *lp_res = llvmpipe_resource(res);
1162 struct lp_jit_image *jit_image;
1163
1164 jit_image = &csctx->cs.current.jit_context.images[i];
1165 if (!lp_res)
1166 continue;
1167 if (!lp_res->dt) {
1168 /* regular texture - csctx array of mipmap level offsets */
1169 if (llvmpipe_resource_is_texture(res)) {
1170 jit_image->base = lp_res->tex_data;
1171 } else
1172 jit_image->base = lp_res->data;
1173
1174 jit_image->width = res->width0;
1175 jit_image->height = res->height0;
1176 jit_image->depth = res->depth0;
1177 jit_image->num_samples = res->nr_samples;
1178
1179 if (llvmpipe_resource_is_texture(res)) {
1180 uint32_t mip_offset = lp_res->mip_offsets[image->u.tex.level];
1181 const uint32_t bw = util_format_get_blockwidth(image->resource->format);
1182 const uint32_t bh = util_format_get_blockheight(image->resource->format);
1183
1184 jit_image->width = DIV_ROUND_UP(jit_image->width, bw);
1185 jit_image->height = DIV_ROUND_UP(jit_image->height, bh);
1186 jit_image->width = u_minify(jit_image->width, image->u.tex.level);
1187 jit_image->height = u_minify(jit_image->height, image->u.tex.level);
1188
1189 if (res->target == PIPE_TEXTURE_1D_ARRAY ||
1190 res->target == PIPE_TEXTURE_2D_ARRAY ||
1191 res->target == PIPE_TEXTURE_3D ||
1192 res->target == PIPE_TEXTURE_CUBE ||
1193 res->target == PIPE_TEXTURE_CUBE_ARRAY) {
1194 /*
1195 * For array textures, we don't have first_layer, instead
1196 * adjust last_layer (stored as depth) plus the mip level offsets
1197 * (as we have mip-first layout can't just adjust base ptr).
1198 * XXX For mip levels, could do something similar.
1199 */
1200 jit_image->depth = image->u.tex.last_layer - image->u.tex.first_layer + 1;
1201 mip_offset += image->u.tex.first_layer * lp_res->img_stride[image->u.tex.level];
1202 } else
1203 jit_image->depth = u_minify(jit_image->depth, image->u.tex.level);
1204
1205 jit_image->row_stride = lp_res->row_stride[image->u.tex.level];
1206 jit_image->img_stride = lp_res->img_stride[image->u.tex.level];
1207 jit_image->sample_stride = lp_res->sample_stride;
1208 jit_image->base = (uint8_t *)jit_image->base + mip_offset;
1209 } else {
1210 unsigned view_blocksize = util_format_get_blocksize(image->format);
1211 jit_image->width = image->u.buf.size / view_blocksize;
1212 jit_image->base = (uint8_t *)jit_image->base + image->u.buf.offset;
1213 }
1214 }
1215 }
1216 for (; i < ARRAY_SIZE(csctx->images); i++) {
1217 util_copy_image_view(&csctx->images[i].current, NULL);
1218 }
1219 }
1220
1221 static void
update_csctx_consts(struct llvmpipe_context * llvmpipe)1222 update_csctx_consts(struct llvmpipe_context *llvmpipe)
1223 {
1224 struct lp_cs_context *csctx = llvmpipe->csctx;
1225 int i;
1226
1227 for (i = 0; i < ARRAY_SIZE(csctx->constants); ++i) {
1228 struct pipe_resource *buffer = csctx->constants[i].current.buffer;
1229 const ubyte *current_data = NULL;
1230 unsigned current_size = csctx->constants[i].current.buffer_size;
1231 if (buffer) {
1232 /* resource buffer */
1233 current_data = (ubyte *) llvmpipe_resource_data(buffer);
1234 }
1235 else if (csctx->constants[i].current.user_buffer) {
1236 /* user-space buffer */
1237 current_data = (ubyte *) csctx->constants[i].current.user_buffer;
1238 }
1239
1240 if (current_data && current_size >= sizeof(float)) {
1241 current_data += csctx->constants[i].current.buffer_offset;
1242 csctx->cs.current.jit_context.constants[i] = (const float *)current_data;
1243 csctx->cs.current.jit_context.num_constants[i] =
1244 DIV_ROUND_UP(csctx->constants[i].current.buffer_size,
1245 lp_get_constant_buffer_stride(llvmpipe->pipe.screen));
1246 } else {
1247 static const float fake_const_buf[4];
1248 csctx->cs.current.jit_context.constants[i] = fake_const_buf;
1249 csctx->cs.current.jit_context.num_constants[i] = 0;
1250 }
1251 }
1252 }
1253
1254 static void
update_csctx_ssbo(struct llvmpipe_context * llvmpipe)1255 update_csctx_ssbo(struct llvmpipe_context *llvmpipe)
1256 {
1257 struct lp_cs_context *csctx = llvmpipe->csctx;
1258 int i;
1259 for (i = 0; i < ARRAY_SIZE(csctx->ssbos); ++i) {
1260 struct pipe_resource *buffer = csctx->ssbos[i].current.buffer;
1261 const ubyte *current_data = NULL;
1262
1263 /* resource buffer */
1264 if (buffer)
1265 current_data = (ubyte *) llvmpipe_resource_data(buffer);
1266 if (current_data) {
1267 current_data += csctx->ssbos[i].current.buffer_offset;
1268
1269 csctx->cs.current.jit_context.ssbos[i] = (const uint32_t *)current_data;
1270 csctx->cs.current.jit_context.num_ssbos[i] = csctx->ssbos[i].current.buffer_size;
1271 } else {
1272 csctx->cs.current.jit_context.ssbos[i] = NULL;
1273 csctx->cs.current.jit_context.num_ssbos[i] = 0;
1274 }
1275 }
1276 }
1277
1278 static void
llvmpipe_cs_update_derived(struct llvmpipe_context * llvmpipe,void * input)1279 llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe, void *input)
1280 {
1281 if (llvmpipe->cs_dirty & LP_CSNEW_CONSTANTS) {
1282 lp_csctx_set_cs_constants(llvmpipe->csctx,
1283 ARRAY_SIZE(llvmpipe->constants[PIPE_SHADER_COMPUTE]),
1284 llvmpipe->constants[PIPE_SHADER_COMPUTE]);
1285 update_csctx_consts(llvmpipe);
1286 }
1287
1288 if (llvmpipe->cs_dirty & LP_CSNEW_SSBOS) {
1289 lp_csctx_set_cs_ssbos(llvmpipe->csctx,
1290 ARRAY_SIZE(llvmpipe->ssbos[PIPE_SHADER_COMPUTE]),
1291 llvmpipe->ssbos[PIPE_SHADER_COMPUTE]);
1292 update_csctx_ssbo(llvmpipe);
1293 }
1294
1295 if (llvmpipe->cs_dirty & LP_CSNEW_SAMPLER_VIEW)
1296 lp_csctx_set_sampler_views(llvmpipe->csctx,
1297 llvmpipe->num_sampler_views[PIPE_SHADER_COMPUTE],
1298 llvmpipe->sampler_views[PIPE_SHADER_COMPUTE]);
1299
1300 if (llvmpipe->cs_dirty & LP_CSNEW_SAMPLER)
1301 lp_csctx_set_sampler_state(llvmpipe->csctx,
1302 llvmpipe->num_samplers[PIPE_SHADER_COMPUTE],
1303 llvmpipe->samplers[PIPE_SHADER_COMPUTE]);
1304
1305 if (llvmpipe->cs_dirty & LP_CSNEW_IMAGES)
1306 lp_csctx_set_cs_images(llvmpipe->csctx,
1307 ARRAY_SIZE(llvmpipe->images[PIPE_SHADER_COMPUTE]),
1308 llvmpipe->images[PIPE_SHADER_COMPUTE]);
1309
1310 struct lp_cs_context *csctx = llvmpipe->csctx;
1311 csctx->cs.current.jit_context.aniso_filter_table = lp_build_sample_aniso_filter_table();
1312 if (input) {
1313 csctx->input = input;
1314 csctx->cs.current.jit_context.kernel_args = input;
1315 }
1316
1317 if (llvmpipe->cs_dirty & (LP_CSNEW_CS |
1318 LP_CSNEW_IMAGES |
1319 LP_CSNEW_SAMPLER_VIEW |
1320 LP_CSNEW_SAMPLER))
1321 llvmpipe_update_cs(llvmpipe);
1322
1323
1324 llvmpipe->cs_dirty = 0;
1325 }
1326
1327 static void
cs_exec_fn(void * init_data,int iter_idx,struct lp_cs_local_mem * lmem)1328 cs_exec_fn(void *init_data, int iter_idx, struct lp_cs_local_mem *lmem)
1329 {
1330 struct lp_cs_job_info *job_info = init_data;
1331 struct lp_jit_cs_thread_data thread_data;
1332
1333 memset(&thread_data, 0, sizeof(thread_data));
1334
1335 if (lmem->local_size < job_info->req_local_mem) {
1336 lmem->local_mem_ptr = REALLOC(lmem->local_mem_ptr, lmem->local_size,
1337 job_info->req_local_mem);
1338 lmem->local_size = job_info->req_local_mem;
1339 }
1340 if (job_info->zero_initialize_shared_memory)
1341 memset(lmem->local_mem_ptr, 0, job_info->req_local_mem);
1342 thread_data.shared = lmem->local_mem_ptr;
1343
1344 unsigned grid_z = iter_idx / (job_info->grid_size[0] * job_info->grid_size[1]);
1345 unsigned grid_y = (iter_idx - (grid_z * (job_info->grid_size[0] * job_info->grid_size[1]))) / job_info->grid_size[0];
1346 unsigned grid_x = (iter_idx - (grid_z * (job_info->grid_size[0] * job_info->grid_size[1])) - (grid_y * job_info->grid_size[0]));
1347
1348 grid_z += job_info->grid_base[2];
1349 grid_y += job_info->grid_base[1];
1350 grid_x += job_info->grid_base[0];
1351 struct lp_compute_shader_variant *variant = job_info->current->variant;
1352 variant->jit_function(&job_info->current->jit_context,
1353 job_info->block_size[0], job_info->block_size[1], job_info->block_size[2],
1354 grid_x, grid_y, grid_z,
1355 job_info->grid_size[0], job_info->grid_size[1], job_info->grid_size[2], job_info->work_dim,
1356 &thread_data);
1357 }
1358
1359 static void
fill_grid_size(struct pipe_context * pipe,const struct pipe_grid_info * info,uint32_t grid_size[3])1360 fill_grid_size(struct pipe_context *pipe,
1361 const struct pipe_grid_info *info,
1362 uint32_t grid_size[3])
1363 {
1364 struct pipe_transfer *transfer;
1365 uint32_t *params;
1366 if (!info->indirect) {
1367 grid_size[0] = info->grid[0];
1368 grid_size[1] = info->grid[1];
1369 grid_size[2] = info->grid[2];
1370 return;
1371 }
1372 params = pipe_buffer_map_range(pipe, info->indirect,
1373 info->indirect_offset,
1374 3 * sizeof(uint32_t),
1375 PIPE_MAP_READ,
1376 &transfer);
1377
1378 if (!transfer)
1379 return;
1380
1381 grid_size[0] = params[0];
1382 grid_size[1] = params[1];
1383 grid_size[2] = params[2];
1384 pipe_buffer_unmap(pipe, transfer);
1385 }
1386
llvmpipe_launch_grid(struct pipe_context * pipe,const struct pipe_grid_info * info)1387 static void llvmpipe_launch_grid(struct pipe_context *pipe,
1388 const struct pipe_grid_info *info)
1389 {
1390 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1391 struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
1392 struct lp_cs_job_info job_info;
1393
1394 if (!llvmpipe_check_render_cond(llvmpipe))
1395 return;
1396
1397 memset(&job_info, 0, sizeof(job_info));
1398
1399 llvmpipe_cs_update_derived(llvmpipe, info->input);
1400
1401 fill_grid_size(pipe, info, job_info.grid_size);
1402
1403 job_info.grid_base[0] = info->grid_base[0];
1404 job_info.grid_base[1] = info->grid_base[1];
1405 job_info.grid_base[2] = info->grid_base[2];
1406 job_info.block_size[0] = info->block[0];
1407 job_info.block_size[1] = info->block[1];
1408 job_info.block_size[2] = info->block[2];
1409 job_info.work_dim = info->work_dim;
1410 job_info.req_local_mem = llvmpipe->cs->req_local_mem;
1411 job_info.zero_initialize_shared_memory = llvmpipe->cs->zero_initialize_shared_memory;
1412 job_info.current = &llvmpipe->csctx->cs.current;
1413
1414 int num_tasks = job_info.grid_size[2] * job_info.grid_size[1] * job_info.grid_size[0];
1415 if (num_tasks) {
1416 struct lp_cs_tpool_task *task;
1417 mtx_lock(&screen->cs_mutex);
1418 task = lp_cs_tpool_queue_task(screen->cs_tpool, cs_exec_fn, &job_info, num_tasks);
1419 mtx_unlock(&screen->cs_mutex);
1420
1421 lp_cs_tpool_wait_for_task(screen->cs_tpool, &task);
1422 }
1423 if (!llvmpipe->queries_disabled)
1424 llvmpipe->pipeline_statistics.cs_invocations += num_tasks * info->block[0] * info->block[1] * info->block[2];
1425 }
1426
1427 static void
llvmpipe_set_compute_resources(struct pipe_context * pipe,unsigned start,unsigned count,struct pipe_surface ** resources)1428 llvmpipe_set_compute_resources(struct pipe_context *pipe,
1429 unsigned start, unsigned count,
1430 struct pipe_surface **resources)
1431 {
1432
1433
1434 }
1435
1436 static void
llvmpipe_set_global_binding(struct pipe_context * pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)1437 llvmpipe_set_global_binding(struct pipe_context *pipe,
1438 unsigned first, unsigned count,
1439 struct pipe_resource **resources,
1440 uint32_t **handles)
1441 {
1442 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1443 struct lp_compute_shader *cs = llvmpipe->cs;
1444 unsigned i;
1445
1446 if (first + count > cs->max_global_buffers) {
1447 unsigned old_max = cs->max_global_buffers;
1448 cs->max_global_buffers = first + count;
1449 cs->global_buffers = realloc(cs->global_buffers,
1450 cs->max_global_buffers * sizeof(cs->global_buffers[0]));
1451 if (!cs->global_buffers) {
1452 return;
1453 }
1454
1455 memset(&cs->global_buffers[old_max], 0, (cs->max_global_buffers - old_max) * sizeof(cs->global_buffers[0]));
1456 }
1457
1458 if (!resources) {
1459 for (i = 0; i < count; i++)
1460 pipe_resource_reference(&cs->global_buffers[first + i], NULL);
1461 return;
1462 }
1463
1464 for (i = 0; i < count; i++) {
1465 uintptr_t va;
1466 uint32_t offset;
1467 pipe_resource_reference(&cs->global_buffers[first + i], resources[i]);
1468 struct llvmpipe_resource *lp_res = llvmpipe_resource(resources[i]);
1469 offset = *handles[i];
1470 va = (uintptr_t)((char *)lp_res->data + offset);
1471 memcpy(handles[i], &va, sizeof(va));
1472 }
1473 }
1474
1475 void
llvmpipe_init_compute_funcs(struct llvmpipe_context * llvmpipe)1476 llvmpipe_init_compute_funcs(struct llvmpipe_context *llvmpipe)
1477 {
1478 llvmpipe->pipe.create_compute_state = llvmpipe_create_compute_state;
1479 llvmpipe->pipe.bind_compute_state = llvmpipe_bind_compute_state;
1480 llvmpipe->pipe.delete_compute_state = llvmpipe_delete_compute_state;
1481 llvmpipe->pipe.set_compute_resources = llvmpipe_set_compute_resources;
1482 llvmpipe->pipe.set_global_binding = llvmpipe_set_global_binding;
1483 llvmpipe->pipe.launch_grid = llvmpipe_launch_grid;
1484 }
1485
1486 void
lp_csctx_destroy(struct lp_cs_context * csctx)1487 lp_csctx_destroy(struct lp_cs_context *csctx)
1488 {
1489 unsigned i;
1490 for (i = 0; i < ARRAY_SIZE(csctx->cs.current_tex); i++) {
1491 struct pipe_resource **res_ptr = &csctx->cs.current_tex[i];
1492 if (*res_ptr)
1493 llvmpipe_resource_unmap(*res_ptr, 0, 0);
1494 pipe_resource_reference(res_ptr, NULL);
1495 }
1496 for (i = 0; i < ARRAY_SIZE(csctx->constants); i++) {
1497 pipe_resource_reference(&csctx->constants[i].current.buffer, NULL);
1498 }
1499 for (i = 0; i < ARRAY_SIZE(csctx->ssbos); i++) {
1500 pipe_resource_reference(&csctx->ssbos[i].current.buffer, NULL);
1501 }
1502 for (i = 0; i < ARRAY_SIZE(csctx->images); i++) {
1503 pipe_resource_reference(&csctx->images[i].current.resource, NULL);
1504 }
1505 FREE(csctx);
1506 }
1507
lp_csctx_create(struct pipe_context * pipe)1508 struct lp_cs_context *lp_csctx_create(struct pipe_context *pipe)
1509 {
1510 struct lp_cs_context *csctx;
1511
1512 csctx = CALLOC_STRUCT(lp_cs_context);
1513 if (!csctx)
1514 return NULL;
1515
1516 csctx->pipe = pipe;
1517 return csctx;
1518 }
1519