• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2019 Red Hat.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **************************************************************************/
25 #include "util/u_memory.h"
26 #include "util/os_time.h"
27 #include "util/u_dump.h"
28 #include "util/u_string.h"
29 #include "tgsi/tgsi_dump.h"
30 #include "tgsi/tgsi_parse.h"
31 #include "gallivm/lp_bld_const.h"
32 #include "gallivm/lp_bld_debug.h"
33 #include "gallivm/lp_bld_intr.h"
34 #include "gallivm/lp_bld_flow.h"
35 #include "gallivm/lp_bld_gather.h"
36 #include "gallivm/lp_bld_coro.h"
37 #include "gallivm/lp_bld_nir.h"
38 #include "lp_state_cs.h"
39 #include "lp_context.h"
40 #include "lp_debug.h"
41 #include "lp_state.h"
42 #include "lp_perf.h"
43 #include "lp_screen.h"
44 #include "lp_memory.h"
45 #include "lp_query.h"
46 #include "lp_cs_tpool.h"
47 #include "frontend/sw_winsys.h"
48 #include "nir/nir_to_tgsi_info.h"
49 #include "util/mesa-sha1.h"
50 #include "nir_serialize.h"
51 
52 /** Fragment shader number (for debugging) */
53 static unsigned cs_no = 0;
54 
55 struct lp_cs_job_info {
56    unsigned grid_size[3];
57    unsigned grid_base[3];
58    unsigned block_size[3];
59    unsigned req_local_mem;
60    unsigned work_dim;
61    bool zero_initialize_shared_memory;
62    struct lp_cs_exec *current;
63 };
64 
65 static void
generate_compute(struct llvmpipe_context * lp,struct lp_compute_shader * shader,struct lp_compute_shader_variant * variant)66 generate_compute(struct llvmpipe_context *lp,
67                  struct lp_compute_shader *shader,
68                  struct lp_compute_shader_variant *variant)
69 {
70    struct gallivm_state *gallivm = variant->gallivm;
71    const struct lp_compute_shader_variant_key *key = &variant->key;
72    char func_name[64], func_name_coro[64];
73    LLVMTypeRef arg_types[19];
74    LLVMTypeRef func_type, coro_func_type;
75    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
76    LLVMValueRef context_ptr;
77    LLVMValueRef block_x_size_arg, block_y_size_arg, block_z_size_arg;
78    LLVMValueRef grid_x_arg, grid_y_arg, grid_z_arg;
79    LLVMValueRef grid_size_x_arg, grid_size_y_arg, grid_size_z_arg;
80    LLVMValueRef work_dim_arg, thread_data_ptr;
81    LLVMBasicBlockRef block;
82    LLVMBuilderRef builder;
83    struct lp_build_sampler_soa *sampler;
84    struct lp_build_image_soa *image;
85    LLVMValueRef function, coro;
86    struct lp_type cs_type;
87    unsigned i;
88 
89    /*
90     * This function has two parts
91     * a) setup the coroutine execution environment loop.
92     * b) build the compute shader llvm for use inside the coroutine.
93     */
94    assert(lp_native_vector_width / 32 >= 4);
95 
96    memset(&cs_type, 0, sizeof cs_type);
97    cs_type.floating = TRUE;      /* floating point values */
98    cs_type.sign = TRUE;          /* values are signed */
99    cs_type.norm = FALSE;         /* values are not limited to [0,1] or [-1,1] */
100    cs_type.width = 32;           /* 32-bit float */
101    cs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */
102    snprintf(func_name, sizeof(func_name), "cs_variant");
103 
104    snprintf(func_name_coro, sizeof(func_name), "cs_co_variant");
105 
106    arg_types[0] = variant->jit_cs_context_ptr_type;       /* context */
107    arg_types[1] = int32_type;                          /* block_x_size */
108    arg_types[2] = int32_type;                          /* block_y_size */
109    arg_types[3] = int32_type;                          /* block_z_size */
110    arg_types[4] = int32_type;                          /* grid_x */
111    arg_types[5] = int32_type;                          /* grid_y */
112    arg_types[6] = int32_type;                          /* grid_z */
113    arg_types[7] = int32_type;                          /* grid_size_x */
114    arg_types[8] = int32_type;                          /* grid_size_y */
115    arg_types[9] = int32_type;                          /* grid_size_z */
116    arg_types[10] = int32_type;                         /* work dim */
117    arg_types[11] = variant->jit_cs_thread_data_ptr_type;  /* per thread data */
118    arg_types[12] = int32_type;                         /* coro only - num X loops */
119    arg_types[13] = int32_type;                         /* coro only - partials */
120    arg_types[14] = int32_type;                         /* coro block_x_size */
121    arg_types[15] = int32_type;                         /* coro block_y_size */
122    arg_types[16] = int32_type;                         /* coro block_z_size */
123    arg_types[17] = int32_type;                         /* coro idx */
124    arg_types[18] = LLVMPointerType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), 0);
125    func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
126                                 arg_types, ARRAY_SIZE(arg_types) - 7, 0);
127 
128    coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0),
129                                      arg_types, ARRAY_SIZE(arg_types), 0);
130 
131    function = LLVMAddFunction(gallivm->module, func_name, func_type);
132    LLVMSetFunctionCallConv(function, LLVMCCallConv);
133 
134    coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);
135    LLVMSetFunctionCallConv(coro, LLVMCCallConv);
136    lp_build_coro_add_presplit(coro);
137 
138    variant->function = function;
139 
140    for(i = 0; i < ARRAY_SIZE(arg_types); ++i) {
141       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
142          lp_add_function_attr(coro, i + 1, LP_FUNC_ATTR_NOALIAS);
143          if (i < ARRAY_SIZE(arg_types) - 7)
144             lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
145       }
146    }
147 
148    if (variant->gallivm->cache->data_size)
149       return;
150 
151    context_ptr  = LLVMGetParam(function, 0);
152    block_x_size_arg = LLVMGetParam(function, 1);
153    block_y_size_arg = LLVMGetParam(function, 2);
154    block_z_size_arg = LLVMGetParam(function, 3);
155    grid_x_arg = LLVMGetParam(function, 4);
156    grid_y_arg = LLVMGetParam(function, 5);
157    grid_z_arg = LLVMGetParam(function, 6);
158    grid_size_x_arg = LLVMGetParam(function, 7);
159    grid_size_y_arg = LLVMGetParam(function, 8);
160    grid_size_z_arg = LLVMGetParam(function, 9);
161    work_dim_arg = LLVMGetParam(function, 10);
162    thread_data_ptr  = LLVMGetParam(function, 11);
163 
164    lp_build_name(context_ptr, "context");
165    lp_build_name(block_x_size_arg, "x_size");
166    lp_build_name(block_y_size_arg, "y_size");
167    lp_build_name(block_z_size_arg, "z_size");
168    lp_build_name(grid_x_arg, "grid_x");
169    lp_build_name(grid_y_arg, "grid_y");
170    lp_build_name(grid_z_arg, "grid_z");
171    lp_build_name(grid_size_x_arg, "grid_size_x");
172    lp_build_name(grid_size_y_arg, "grid_size_y");
173    lp_build_name(grid_size_z_arg, "grid_size_z");
174    lp_build_name(work_dim_arg, "work_dim");
175    lp_build_name(thread_data_ptr, "thread_data");
176 
177    block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
178    builder = gallivm->builder;
179    assert(builder);
180    LLVMPositionBuilderAtEnd(builder, block);
181    sampler = lp_llvm_sampler_soa_create(lp_cs_variant_key_samplers(key),
182                                         MAX2(key->nr_samplers,
183                                              key->nr_sampler_views));
184    image = lp_llvm_image_soa_create(lp_cs_variant_key_images(key), key->nr_images);
185 
186    struct lp_build_loop_state loop_state[4];
187    LLVMValueRef num_x_loop;
188    LLVMValueRef vec_length = lp_build_const_int32(gallivm, cs_type.length);
189    num_x_loop = LLVMBuildAdd(gallivm->builder, block_x_size_arg, vec_length, "");
190    num_x_loop = LLVMBuildSub(gallivm->builder, num_x_loop, lp_build_const_int32(gallivm, 1), "");
191    num_x_loop = LLVMBuildUDiv(gallivm->builder, num_x_loop, vec_length, "");
192    LLVMValueRef partials = LLVMBuildURem(gallivm->builder, block_x_size_arg, vec_length, "");
193 
194    LLVMValueRef coro_num_hdls = LLVMBuildMul(gallivm->builder, num_x_loop, block_y_size_arg, "");
195    coro_num_hdls = LLVMBuildMul(gallivm->builder, coro_num_hdls, block_z_size_arg, "");
196 
197    /* build a ptr in memory to store all the frames in later. */
198    LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
199    LLVMValueRef coro_mem = LLVMBuildAlloca(gallivm->builder, hdl_ptr_type, "coro_mem");
200    LLVMBuildStore(builder, LLVMConstNull(hdl_ptr_type), coro_mem);
201 
202    LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, coro_num_hdls, "coro_hdls");
203 
204    unsigned end_coroutine = INT_MAX;
205 
206    /*
207     * This is the main coroutine execution loop. It iterates over the dimensions
208     * and calls the coroutine main entrypoint on the first pass, but in subsequent
209     * passes it checks if the coroutine has completed and resumes it if not.
210     */
211    /* take x_width - round up to type.length width */
212    lp_build_loop_begin(&loop_state[3], gallivm,
213                        lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
214    lp_build_loop_begin(&loop_state[2], gallivm,
215                        lp_build_const_int32(gallivm, 0)); /* z loop */
216    lp_build_loop_begin(&loop_state[1], gallivm,
217                        lp_build_const_int32(gallivm, 0)); /* y loop */
218    lp_build_loop_begin(&loop_state[0], gallivm,
219                        lp_build_const_int32(gallivm, 0)); /* x loop */
220    {
221       LLVMValueRef args[19];
222       args[0] = context_ptr;
223       args[1] = loop_state[0].counter;
224       args[2] = loop_state[1].counter;
225       args[3] = loop_state[2].counter;
226       args[4] = grid_x_arg;
227       args[5] = grid_y_arg;
228       args[6] = grid_z_arg;
229       args[7] = grid_size_x_arg;
230       args[8] = grid_size_y_arg;
231       args[9] = grid_size_z_arg;
232       args[10] = work_dim_arg;
233       args[11] = thread_data_ptr;
234       args[12] = num_x_loop;
235       args[13] = partials;
236       args[14] = block_x_size_arg;
237       args[15] = block_y_size_arg;
238       args[16] = block_z_size_arg;
239 
240       /* idx = (z * (size_x * size_y) + y * size_x + x */
241       LLVMValueRef coro_hdl_idx = LLVMBuildMul(gallivm->builder, loop_state[2].counter,
242                                                LLVMBuildMul(gallivm->builder, num_x_loop, block_y_size_arg, ""), "");
243       coro_hdl_idx = LLVMBuildAdd(gallivm->builder, coro_hdl_idx,
244                                   LLVMBuildMul(gallivm->builder, loop_state[1].counter,
245                                                num_x_loop, ""), "");
246       coro_hdl_idx = LLVMBuildAdd(gallivm->builder, coro_hdl_idx,
247                                   loop_state[0].counter, "");
248 
249       args[17] = coro_hdl_idx;
250 
251       args[18] = coro_mem;
252       LLVMValueRef coro_entry = LLVMBuildGEP(gallivm->builder, coro_hdls, &coro_hdl_idx, 1, "");
253 
254       LLVMValueRef coro_hdl = LLVMBuildLoad(gallivm->builder, coro_entry, "coro_hdl");
255 
256       struct lp_build_if_state ifstate;
257       LLVMValueRef cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, loop_state[3].counter,
258                                        lp_build_const_int32(gallivm, 0), "");
259       /* first time here - call the coroutine function entry point */
260       lp_build_if(&ifstate, gallivm, cmp);
261       LLVMValueRef coro_ret = LLVMBuildCall(gallivm->builder, coro, args, 19, "");
262       LLVMBuildStore(gallivm->builder, coro_ret, coro_entry);
263       lp_build_else(&ifstate);
264       /* subsequent calls for this invocation - check if done. */
265       LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);
266       struct lp_build_if_state ifstate2;
267       lp_build_if(&ifstate2, gallivm, coro_done);
268       /* if done destroy and force loop exit */
269       lp_build_coro_destroy(gallivm, coro_hdl);
270       lp_build_loop_force_set_counter(&loop_state[3], lp_build_const_int32(gallivm, end_coroutine - 1));
271       lp_build_else(&ifstate2);
272       /* otherwise resume the coroutine */
273       lp_build_coro_resume(gallivm, coro_hdl);
274       lp_build_endif(&ifstate2);
275       lp_build_endif(&ifstate);
276       lp_build_loop_force_reload_counter(&loop_state[3]);
277    }
278    lp_build_loop_end_cond(&loop_state[0],
279                           num_x_loop,
280                           NULL,  LLVMIntUGE);
281    lp_build_loop_end_cond(&loop_state[1],
282                           block_y_size_arg,
283                           NULL,  LLVMIntUGE);
284    lp_build_loop_end_cond(&loop_state[2],
285                           block_z_size_arg,
286                           NULL,  LLVMIntUGE);
287    lp_build_loop_end_cond(&loop_state[3],
288                           lp_build_const_int32(gallivm, end_coroutine),
289                           NULL, LLVMIntEQ);
290 
291    LLVMValueRef coro_mem_ptr = LLVMBuildLoad(builder, coro_mem, "");
292    LLVMBuildCall(gallivm->builder, gallivm->coro_free_hook, &coro_mem_ptr, 1, "");
293 
294    LLVMBuildRetVoid(builder);
295 
296    /* This is stage (b) - generate the compute shader code inside the coroutine. */
297    LLVMValueRef x_size_arg, y_size_arg, z_size_arg;
298    context_ptr  = LLVMGetParam(coro, 0);
299    x_size_arg = LLVMGetParam(coro, 1);
300    y_size_arg = LLVMGetParam(coro, 2);
301    z_size_arg = LLVMGetParam(coro, 3);
302    grid_x_arg = LLVMGetParam(coro, 4);
303    grid_y_arg = LLVMGetParam(coro, 5);
304    grid_z_arg = LLVMGetParam(coro, 6);
305    grid_size_x_arg = LLVMGetParam(coro, 7);
306    grid_size_y_arg = LLVMGetParam(coro, 8);
307    grid_size_z_arg = LLVMGetParam(coro, 9);
308    work_dim_arg = LLVMGetParam(coro, 10);
309    thread_data_ptr  = LLVMGetParam(coro, 11);
310    num_x_loop = LLVMGetParam(coro, 12);
311    partials = LLVMGetParam(coro, 13);
312    block_x_size_arg = LLVMGetParam(coro, 14);
313    block_y_size_arg = LLVMGetParam(coro, 15);
314    block_z_size_arg = LLVMGetParam(coro, 16);
315    LLVMValueRef coro_idx = LLVMGetParam(coro, 17);
316    coro_mem = LLVMGetParam(coro, 18);
317    block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "entry");
318    LLVMPositionBuilderAtEnd(builder, block);
319    {
320       LLVMValueRef consts_ptr, num_consts_ptr;
321       LLVMValueRef ssbo_ptr, num_ssbo_ptr;
322       LLVMValueRef shared_ptr;
323       LLVMValueRef kernel_args_ptr;
324       struct lp_build_mask_context mask;
325       struct lp_bld_tgsi_system_values system_values;
326 
327       memset(&system_values, 0, sizeof(system_values));
328       consts_ptr = lp_jit_cs_context_constants(gallivm, context_ptr);
329       num_consts_ptr = lp_jit_cs_context_num_constants(gallivm, context_ptr);
330       ssbo_ptr = lp_jit_cs_context_ssbos(gallivm, context_ptr);
331       num_ssbo_ptr = lp_jit_cs_context_num_ssbos(gallivm, context_ptr);
332       kernel_args_ptr = lp_jit_cs_context_kernel_args(gallivm, context_ptr);
333 
334       shared_ptr = lp_jit_cs_thread_data_shared(gallivm, thread_data_ptr);
335 
336       LLVMValueRef coro_num_hdls = LLVMBuildMul(gallivm->builder, num_x_loop, block_y_size_arg, "");
337       coro_num_hdls = LLVMBuildMul(gallivm->builder, coro_num_hdls, block_z_size_arg, "");
338 
339       /* these are coroutine entrypoint necessities */
340       LLVMValueRef coro_id = lp_build_coro_id(gallivm);
341       LLVMValueRef coro_entry = lp_build_coro_alloc_mem_array(gallivm, coro_mem, coro_idx, coro_num_hdls);
342 
343       LLVMValueRef alloced_ptr = LLVMBuildLoad(gallivm->builder, coro_mem, "");
344       alloced_ptr = LLVMBuildGEP(gallivm->builder, alloced_ptr, &coro_entry, 1, "");
345       LLVMValueRef coro_hdl = lp_build_coro_begin(gallivm, coro_id, alloced_ptr);
346       LLVMValueRef has_partials = LLVMBuildICmp(gallivm->builder, LLVMIntNE, partials, lp_build_const_int32(gallivm, 0), "");
347       LLVMValueRef tid_vals[3];
348       LLVMValueRef tids_x[LP_MAX_VECTOR_LENGTH], tids_y[LP_MAX_VECTOR_LENGTH], tids_z[LP_MAX_VECTOR_LENGTH];
349       LLVMValueRef base_val = LLVMBuildMul(gallivm->builder, x_size_arg, vec_length, "");
350       for (i = 0; i < cs_type.length; i++) {
351          tids_x[i] = LLVMBuildAdd(gallivm->builder, base_val, lp_build_const_int32(gallivm, i), "");
352          tids_y[i] = y_size_arg;
353          tids_z[i] = z_size_arg;
354       }
355       tid_vals[0] = lp_build_gather_values(gallivm, tids_x, cs_type.length);
356       tid_vals[1] = lp_build_gather_values(gallivm, tids_y, cs_type.length);
357       tid_vals[2] = lp_build_gather_values(gallivm, tids_z, cs_type.length);
358       system_values.thread_id = LLVMGetUndef(LLVMArrayType(LLVMVectorType(int32_type, cs_type.length), 3));
359       for (i = 0; i < 3; i++)
360          system_values.thread_id = LLVMBuildInsertValue(builder, system_values.thread_id, tid_vals[i], i, "");
361 
362       LLVMValueRef gtids[3] = { grid_x_arg, grid_y_arg, grid_z_arg };
363       system_values.block_id = LLVMGetUndef(LLVMVectorType(int32_type, 3));
364       for (i = 0; i < 3; i++)
365          system_values.block_id = LLVMBuildInsertElement(builder, system_values.block_id, gtids[i], lp_build_const_int32(gallivm, i), "");
366 
367       LLVMValueRef gstids[3] = { grid_size_x_arg, grid_size_y_arg, grid_size_z_arg };
368       system_values.grid_size = LLVMGetUndef(LLVMVectorType(int32_type, 3));
369       for (i = 0; i < 3; i++)
370          system_values.grid_size = LLVMBuildInsertElement(builder, system_values.grid_size, gstids[i], lp_build_const_int32(gallivm, i), "");
371 
372       system_values.work_dim = work_dim_arg;
373 
374       /* subgroup_id = ((z * block_size_x * block_size_y) + (y * block_size_x) + x) / subgroup_size
375        *
376        * this breaks if z or y is zero, so distribute the division to preserve ids
377        *
378        * subgroup_id = ((z * block_size_x * block_size_y) / subgroup_size) + ((y * block_size_x) / subgroup_size) + (x / subgroup_size)
379        *
380        * except "x" is pre-divided here
381        *
382        * subgroup_id = ((z * block_size_x * block_size_y) / subgroup_size) + ((y * block_size_x) / subgroup_size) + x
383        */
384       LLVMValueRef subgroup_id = LLVMBuildUDiv(builder,
385                                                LLVMBuildMul(gallivm->builder, z_size_arg, LLVMBuildMul(gallivm->builder, block_x_size_arg, block_y_size_arg, ""), ""),
386                                                vec_length, "");
387       subgroup_id = LLVMBuildAdd(gallivm->builder,
388                                  subgroup_id,
389                                  LLVMBuildUDiv(builder, LLVMBuildMul(gallivm->builder, y_size_arg, block_x_size_arg, ""), vec_length, ""),
390                                  "");
391       subgroup_id = LLVMBuildAdd(gallivm->builder, subgroup_id, x_size_arg, "");
392       system_values.subgroup_id = subgroup_id;
393       LLVMValueRef num_subgroups = LLVMBuildUDiv(builder,
394                                                  LLVMBuildMul(builder, block_x_size_arg,
395                                                               LLVMBuildMul(builder, block_y_size_arg, block_z_size_arg, ""), ""),
396                                                  vec_length, "");
397       LLVMValueRef subgroup_cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, num_subgroups, lp_build_const_int32(gallivm, 0), "");
398       system_values.num_subgroups = LLVMBuildSelect(builder, subgroup_cmp, lp_build_const_int32(gallivm, 1), num_subgroups, "");
399 
400       LLVMValueRef bsize[3] = { block_x_size_arg, block_y_size_arg, block_z_size_arg };
401       system_values.block_size = LLVMGetUndef(LLVMVectorType(int32_type, 3));
402       for (i = 0; i < 3; i++)
403          system_values.block_size = LLVMBuildInsertElement(builder, system_values.block_size, bsize[i], lp_build_const_int32(gallivm, i), "");
404 
405       LLVMValueRef last_x_loop = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, x_size_arg, LLVMBuildSub(gallivm->builder, num_x_loop, lp_build_const_int32(gallivm, 1), ""), "");
406       LLVMValueRef use_partial_mask = LLVMBuildAnd(gallivm->builder, last_x_loop, has_partials, "");
407       struct lp_build_if_state if_state;
408       LLVMValueRef mask_val = lp_build_alloca(gallivm, LLVMVectorType(int32_type, cs_type.length), "mask");
409       LLVMValueRef full_mask_val = lp_build_const_int_vec(gallivm, cs_type, ~0);
410       LLVMBuildStore(gallivm->builder, full_mask_val, mask_val);
411 
412       lp_build_if(&if_state, gallivm, use_partial_mask);
413       struct lp_build_loop_state mask_loop_state;
414       lp_build_loop_begin(&mask_loop_state, gallivm, partials);
415       LLVMValueRef tmask_val = LLVMBuildLoad(gallivm->builder, mask_val, "");
416       tmask_val = LLVMBuildInsertElement(gallivm->builder, tmask_val, lp_build_const_int32(gallivm, 0), mask_loop_state.counter, "");
417       LLVMBuildStore(gallivm->builder, tmask_val, mask_val);
418       lp_build_loop_end_cond(&mask_loop_state, vec_length, NULL, LLVMIntUGE);
419       lp_build_endif(&if_state);
420 
421       mask_val = LLVMBuildLoad(gallivm->builder, mask_val, "");
422       lp_build_mask_begin(&mask, gallivm, cs_type, mask_val);
423 
424       struct lp_build_coro_suspend_info coro_info;
425 
426       LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "suspend");
427       LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "cleanup");
428 
429       coro_info.suspend = sus_block;
430       coro_info.cleanup = clean_block;
431 
432       struct lp_build_tgsi_params params;
433       memset(&params, 0, sizeof(params));
434 
435       params.type = cs_type;
436       params.mask = &mask;
437       params.consts_ptr = consts_ptr;
438       params.const_sizes_ptr = num_consts_ptr;
439       params.system_values = &system_values;
440       params.context_ptr = context_ptr;
441       params.sampler = sampler;
442       params.info = &shader->info.base;
443       params.ssbo_ptr = ssbo_ptr;
444       params.ssbo_sizes_ptr = num_ssbo_ptr;
445       params.image = image;
446       params.shared_ptr = shared_ptr;
447       params.coro = &coro_info;
448       params.kernel_args = kernel_args_ptr;
449       params.aniso_filter_table = lp_jit_cs_context_aniso_filter_table(gallivm, context_ptr);
450 
451       if (shader->base.type == PIPE_SHADER_IR_TGSI)
452          lp_build_tgsi_soa(gallivm, shader->base.tokens, &params, NULL);
453       else
454          lp_build_nir_soa(gallivm, shader->base.ir.nir, &params,
455                           NULL);
456 
457       mask_val = lp_build_mask_end(&mask);
458 
459       lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);
460       LLVMPositionBuilderAtEnd(builder, clean_block);
461 
462       LLVMBuildBr(builder, sus_block);
463       LLVMPositionBuilderAtEnd(builder, sus_block);
464 
465       lp_build_coro_end(gallivm, coro_hdl);
466       LLVMBuildRet(builder, coro_hdl);
467    }
468 
469    sampler->destroy(sampler);
470    image->destroy(image);
471 
472    gallivm_verify_function(gallivm, coro);
473    gallivm_verify_function(gallivm, function);
474 }
475 
476 static void *
llvmpipe_create_compute_state(struct pipe_context * pipe,const struct pipe_compute_state * templ)477 llvmpipe_create_compute_state(struct pipe_context *pipe,
478                                      const struct pipe_compute_state *templ)
479 {
480    struct lp_compute_shader *shader;
481    int nr_samplers, nr_sampler_views;
482 
483    shader = CALLOC_STRUCT(lp_compute_shader);
484    if (!shader)
485       return NULL;
486 
487    shader->no = cs_no++;
488 
489    shader->base.type = templ->ir_type;
490    shader->req_local_mem = templ->req_local_mem;
491    if (templ->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) {
492       struct blob_reader reader;
493       const struct pipe_binary_program_header *hdr = templ->prog;
494 
495       blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
496       shader->base.ir.nir = nir_deserialize(NULL, pipe->screen->get_compiler_options(pipe->screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE), &reader);
497       shader->base.type = PIPE_SHADER_IR_NIR;
498 
499       pipe->screen->finalize_nir(pipe->screen, shader->base.ir.nir);
500       shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size;
501       shader->zero_initialize_shared_memory = ((struct nir_shader *)shader->base.ir.nir)->info.zero_initialize_shared_memory;
502    } else if (templ->ir_type == PIPE_SHADER_IR_NIR) {
503       shader->base.ir.nir = (struct nir_shader *)templ->prog;
504       shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size;
505       shader->zero_initialize_shared_memory = ((struct nir_shader *)shader->base.ir.nir)->info.zero_initialize_shared_memory;
506    }
507    if (shader->base.type == PIPE_SHADER_IR_TGSI) {
508       /* get/save the summary info for this shader */
509       lp_build_tgsi_info(templ->prog, &shader->info);
510 
511       /* we need to keep a local copy of the tokens */
512       shader->base.tokens = tgsi_dup_tokens(templ->prog);
513    } else {
514       nir_tgsi_scan_shader(shader->base.ir.nir, &shader->info.base, false);
515    }
516 
517    list_inithead(&shader->variants.list);
518 
519    nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
520    nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
521    int nr_images = shader->info.base.file_max[TGSI_FILE_IMAGE] + 1;
522    shader->variant_key_size = lp_cs_variant_key_size(MAX2(nr_samplers, nr_sampler_views), nr_images);
523 
524    return shader;
525 }
526 
527 static void
llvmpipe_bind_compute_state(struct pipe_context * pipe,void * cs)528 llvmpipe_bind_compute_state(struct pipe_context *pipe,
529                             void *cs)
530 {
531    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
532 
533    if (llvmpipe->cs == cs)
534       return;
535 
536    llvmpipe->cs = (struct lp_compute_shader *)cs;
537    llvmpipe->cs_dirty |= LP_CSNEW_CS;
538 }
539 
540 /**
541  * Remove shader variant from two lists: the shader's variant list
542  * and the context's variant list.
543  */
544 static void
llvmpipe_remove_cs_shader_variant(struct llvmpipe_context * lp,struct lp_compute_shader_variant * variant)545 llvmpipe_remove_cs_shader_variant(struct llvmpipe_context *lp,
546                                   struct lp_compute_shader_variant *variant)
547 {
548    if ((LP_DEBUG & DEBUG_CS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
549       debug_printf("llvmpipe: del cs #%u var %u v created %u v cached %u "
550                    "v total cached %u inst %u total inst %u\n",
551                    variant->shader->no, variant->no,
552                    variant->shader->variants_created,
553                    variant->shader->variants_cached,
554                    lp->nr_cs_variants, variant->nr_instrs, lp->nr_cs_instrs);
555    }
556 
557    gallivm_destroy(variant->gallivm);
558 
559    /* remove from shader's list */
560    list_del(&variant->list_item_local.list);
561    variant->shader->variants_cached--;
562 
563    /* remove from context's list */
564    list_del(&variant->list_item_global.list);
565    lp->nr_cs_variants--;
566    lp->nr_cs_instrs -= variant->nr_instrs;
567 
568    FREE(variant);
569 }
570 
571 static void
llvmpipe_delete_compute_state(struct pipe_context * pipe,void * cs)572 llvmpipe_delete_compute_state(struct pipe_context *pipe,
573                               void *cs)
574 {
575    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
576    struct lp_compute_shader *shader = cs;
577    struct lp_cs_variant_list_item *li, *next;
578 
579    if (llvmpipe->cs == cs)
580       llvmpipe->cs = NULL;
581    for (unsigned i = 0; i < shader->max_global_buffers; i++)
582       pipe_resource_reference(&shader->global_buffers[i], NULL);
583    FREE(shader->global_buffers);
584 
585    /* Delete all the variants */
586    LIST_FOR_EACH_ENTRY_SAFE(li, next, &shader->variants.list, list) {
587       llvmpipe_remove_cs_shader_variant(llvmpipe, li->base);
588    }
589    if (shader->base.ir.nir)
590       ralloc_free(shader->base.ir.nir);
591    tgsi_free_tokens(shader->base.tokens);
592    FREE(shader);
593 }
594 
595 static struct lp_compute_shader_variant_key *
make_variant_key(struct llvmpipe_context * lp,struct lp_compute_shader * shader,char * store)596 make_variant_key(struct llvmpipe_context *lp,
597                  struct lp_compute_shader *shader,
598                  char *store)
599 {
600    int i;
601    struct lp_compute_shader_variant_key *key;
602    key = (struct lp_compute_shader_variant_key *)store;
603    memset(key, 0, sizeof(*key));
604 
605    /* This value will be the same for all the variants of a given shader:
606     */
607    key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
608 
609    if (shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1)
610       key->nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
611    struct lp_sampler_static_state *cs_sampler;
612 
613    cs_sampler = lp_cs_variant_key_samplers(key);
614 
615    memset(cs_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *cs_sampler);
616    for(i = 0; i < key->nr_samplers; ++i) {
617       if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
618          lp_sampler_static_sampler_state(&cs_sampler[i].sampler_state,
619                                          lp->samplers[PIPE_SHADER_COMPUTE][i]);
620       }
621    }
622 
623    /*
624     * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
625     * are dx10-style? Can't really have mixed opcodes, at least not
626     * if we want to skip the holes here (without rescanning tgsi).
627     */
628    if (shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
629       for(i = 0; i < key->nr_sampler_views; ++i) {
630          /*
631           * Note sview may exceed what's representable by file_mask.
632           * This will still work, the only downside is that not actually
633           * used views may be included in the shader key.
634           */
635          if((shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) || i > 31) {
636             lp_sampler_static_texture_state(&cs_sampler[i].texture_state,
637                                             lp->sampler_views[PIPE_SHADER_COMPUTE][i]);
638          }
639       }
640    }
641    else {
642       key->nr_sampler_views = key->nr_samplers;
643       for(i = 0; i < key->nr_sampler_views; ++i) {
644          if((shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) || i > 31) {
645             lp_sampler_static_texture_state(&cs_sampler[i].texture_state,
646                                             lp->sampler_views[PIPE_SHADER_COMPUTE][i]);
647          }
648       }
649    }
650 
651    struct lp_image_static_state *lp_image;
652    lp_image = lp_cs_variant_key_images(key);
653    key->nr_images = shader->info.base.file_max[TGSI_FILE_IMAGE] + 1;
654    if (key->nr_images)
655       memset(lp_image, 0,
656              key->nr_images * sizeof *lp_image);
657    for (i = 0; i < key->nr_images; ++i) {
658       if ((shader->info.base.file_mask[TGSI_FILE_IMAGE] & (1 << i)) || i > 31) {
659          lp_sampler_static_texture_state_image(&lp_image[i].image_state,
660                                                &lp->images[PIPE_SHADER_COMPUTE][i]);
661       }
662    }
663    return key;
664 }
665 
666 static void
dump_cs_variant_key(const struct lp_compute_shader_variant_key * key)667 dump_cs_variant_key(const struct lp_compute_shader_variant_key *key)
668 {
669    int i;
670    debug_printf("cs variant %p:\n", (void *) key);
671 
672    for (i = 0; i < key->nr_samplers; ++i) {
673       const struct lp_sampler_static_state *samplers = lp_cs_variant_key_samplers(key);
674       const struct lp_static_sampler_state *sampler = &samplers[i].sampler_state;
675       debug_printf("sampler[%u] = \n", i);
676       debug_printf("  .wrap = %s %s %s\n",
677                    util_str_tex_wrap(sampler->wrap_s, TRUE),
678                    util_str_tex_wrap(sampler->wrap_t, TRUE),
679                    util_str_tex_wrap(sampler->wrap_r, TRUE));
680       debug_printf("  .min_img_filter = %s\n",
681                    util_str_tex_filter(sampler->min_img_filter, TRUE));
682       debug_printf("  .min_mip_filter = %s\n",
683                    util_str_tex_mipfilter(sampler->min_mip_filter, TRUE));
684       debug_printf("  .mag_img_filter = %s\n",
685                    util_str_tex_filter(sampler->mag_img_filter, TRUE));
686       if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE)
687          debug_printf("  .compare_func = %s\n", util_str_func(sampler->compare_func, TRUE));
688       debug_printf("  .normalized_coords = %u\n", sampler->normalized_coords);
689       debug_printf("  .min_max_lod_equal = %u\n", sampler->min_max_lod_equal);
690       debug_printf("  .lod_bias_non_zero = %u\n", sampler->lod_bias_non_zero);
691       debug_printf("  .apply_min_lod = %u\n", sampler->apply_min_lod);
692       debug_printf("  .apply_max_lod = %u\n", sampler->apply_max_lod);
693       debug_printf("  .aniso = %u\n", sampler->aniso);
694    }
695    for (i = 0; i < key->nr_sampler_views; ++i) {
696       const struct lp_sampler_static_state *samplers = lp_cs_variant_key_samplers(key);
697       const struct lp_static_texture_state *texture = &samplers[i].texture_state;
698       debug_printf("texture[%u] = \n", i);
699       debug_printf("  .format = %s\n",
700                    util_format_name(texture->format));
701       debug_printf("  .target = %s\n",
702                    util_str_tex_target(texture->target, TRUE));
703       debug_printf("  .level_zero_only = %u\n",
704                    texture->level_zero_only);
705       debug_printf("  .pot = %u %u %u\n",
706                    texture->pot_width,
707                    texture->pot_height,
708                    texture->pot_depth);
709    }
710    struct lp_image_static_state *images = lp_cs_variant_key_images(key);
711    for (i = 0; i < key->nr_images; ++i) {
712       const struct lp_static_texture_state *image = &images[i].image_state;
713       debug_printf("image[%u] = \n", i);
714       debug_printf("  .format = %s\n",
715                    util_format_name(image->format));
716       debug_printf("  .target = %s\n",
717                    util_str_tex_target(image->target, TRUE));
718       debug_printf("  .level_zero_only = %u\n",
719                    image->level_zero_only);
720       debug_printf("  .pot = %u %u %u\n",
721                    image->pot_width,
722                    image->pot_height,
723                    image->pot_depth);
724    }
725 }
726 
727 static void
lp_debug_cs_variant(const struct lp_compute_shader_variant * variant)728 lp_debug_cs_variant(const struct lp_compute_shader_variant *variant)
729 {
730    debug_printf("llvmpipe: Compute shader #%u variant #%u:\n",
731                 variant->shader->no, variant->no);
732    if (variant->shader->base.type == PIPE_SHADER_IR_TGSI)
733       tgsi_dump(variant->shader->base.tokens, 0);
734    else
735       nir_print_shader(variant->shader->base.ir.nir, stderr);
736    dump_cs_variant_key(&variant->key);
737    debug_printf("\n");
738 }
739 
740 static void
lp_cs_get_ir_cache_key(struct lp_compute_shader_variant * variant,unsigned char ir_sha1_cache_key[20])741 lp_cs_get_ir_cache_key(struct lp_compute_shader_variant *variant,
742                        unsigned char ir_sha1_cache_key[20])
743 {
744    struct blob blob = { 0 };
745    unsigned ir_size;
746    void *ir_binary;
747 
748    blob_init(&blob);
749    nir_serialize(&blob, variant->shader->base.ir.nir, true);
750    ir_binary = blob.data;
751    ir_size = blob.size;
752 
753    struct mesa_sha1 ctx;
754    _mesa_sha1_init(&ctx);
755    _mesa_sha1_update(&ctx, &variant->key, variant->shader->variant_key_size);
756    _mesa_sha1_update(&ctx, ir_binary, ir_size);
757    _mesa_sha1_final(&ctx, ir_sha1_cache_key);
758 
759    blob_finish(&blob);
760 }
761 
762 static struct lp_compute_shader_variant *
generate_variant(struct llvmpipe_context * lp,struct lp_compute_shader * shader,const struct lp_compute_shader_variant_key * key)763 generate_variant(struct llvmpipe_context *lp,
764                  struct lp_compute_shader *shader,
765                  const struct lp_compute_shader_variant_key *key)
766 {
767    struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
768    struct lp_compute_shader_variant *variant;
769    char module_name[64];
770    unsigned char ir_sha1_cache_key[20];
771    struct lp_cached_code cached = { 0 };
772    bool needs_caching = false;
773    variant = MALLOC(sizeof *variant + shader->variant_key_size - sizeof variant->key);
774    if (!variant)
775       return NULL;
776 
777    memset(variant, 0, sizeof(*variant));
778    snprintf(module_name, sizeof(module_name), "cs%u_variant%u",
779             shader->no, shader->variants_created);
780 
781    variant->shader = shader;
782    memcpy(&variant->key, key, shader->variant_key_size);
783 
784    if (shader->base.ir.nir) {
785       lp_cs_get_ir_cache_key(variant, ir_sha1_cache_key);
786 
787       lp_disk_cache_find_shader(screen, &cached, ir_sha1_cache_key);
788       if (!cached.data_size)
789          needs_caching = true;
790    }
791    variant->gallivm = gallivm_create(module_name, lp->context, &cached);
792    if (!variant->gallivm) {
793       FREE(variant);
794       return NULL;
795    }
796 
797    variant->list_item_global.base = variant;
798    variant->list_item_local.base = variant;
799    variant->no = shader->variants_created++;
800 
801 
802 
803    if ((LP_DEBUG & DEBUG_CS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
804       lp_debug_cs_variant(variant);
805    }
806 
807    lp_jit_init_cs_types(variant);
808 
809    generate_compute(lp, shader, variant);
810 
811    gallivm_compile_module(variant->gallivm);
812 
813    variant->nr_instrs += lp_build_count_ir_module(variant->gallivm->module);
814 
815    variant->jit_function = (lp_jit_cs_func)gallivm_jit_function(variant->gallivm, variant->function);
816 
817    if (needs_caching) {
818       lp_disk_cache_insert_shader(screen, &cached, ir_sha1_cache_key);
819    }
820    gallivm_free_ir(variant->gallivm);
821    return variant;
822 }
823 
824 static void
lp_cs_ctx_set_cs_variant(struct lp_cs_context * csctx,struct lp_compute_shader_variant * variant)825 lp_cs_ctx_set_cs_variant( struct lp_cs_context *csctx,
826                           struct lp_compute_shader_variant *variant)
827 {
828    csctx->cs.current.variant = variant;
829 }
830 
831 static void
llvmpipe_update_cs(struct llvmpipe_context * lp)832 llvmpipe_update_cs(struct llvmpipe_context *lp)
833 {
834    struct lp_compute_shader *shader = lp->cs;
835 
836    struct lp_compute_shader_variant_key *key;
837    struct lp_compute_shader_variant *variant = NULL;
838    struct lp_cs_variant_list_item *li;
839    char store[LP_CS_MAX_VARIANT_KEY_SIZE];
840 
841    key = make_variant_key(lp, shader, store);
842 
843    /* Search the variants for one which matches the key */
844    LIST_FOR_EACH_ENTRY(li, &shader->variants.list, list) {
845       if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
846          variant = li->base;
847          break;
848       }
849    }
850 
851    if (variant) {
852       /* Move this variant to the head of the list to implement LRU
853        * deletion of shader's when we have too many.
854        */
855       list_move_to(&variant->list_item_global.list, &lp->cs_variants_list.list);
856    }
857    else {
858       /* variant not found, create it now */
859       int64_t t0, t1, dt;
860       unsigned i;
861       unsigned variants_to_cull;
862 
863       if (LP_DEBUG & DEBUG_CS) {
864          debug_printf("%u variants,\t%u instrs,\t%u instrs/variant\n",
865                       lp->nr_cs_variants,
866                       lp->nr_cs_instrs,
867                       lp->nr_cs_variants ? lp->nr_cs_instrs / lp->nr_cs_variants : 0);
868       }
869 
870       /* First, check if we've exceeded the max number of shader variants.
871        * If so, free 6.25% of them (the least recently used ones).
872        */
873       variants_to_cull = lp->nr_cs_variants >= LP_MAX_SHADER_VARIANTS ? LP_MAX_SHADER_VARIANTS / 16 : 0;
874 
875       if (variants_to_cull ||
876           lp->nr_cs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) {
877          if (gallivm_debug & GALLIVM_DEBUG_PERF) {
878             debug_printf("Evicting CS: %u cs variants,\t%u total variants,"
879                          "\t%u instrs,\t%u instrs/variant\n",
880                          shader->variants_cached,
881                          lp->nr_cs_variants, lp->nr_cs_instrs,
882                          lp->nr_cs_instrs / lp->nr_cs_variants);
883          }
884 
885          /*
886           * We need to re-check lp->nr_cs_variants because an arbitrarily large
887           * number of shader variants (potentially all of them) could be
888           * pending for destruction on flush.
889           */
890 
891          for (i = 0; i < variants_to_cull || lp->nr_cs_instrs >= LP_MAX_SHADER_INSTRUCTIONS; i++) {
892             struct lp_cs_variant_list_item *item;
893             if (list_is_empty(&lp->cs_variants_list.list)) {
894                break;
895             }
896             item = list_last_entry(&lp->cs_variants_list.list,
897                                    struct lp_cs_variant_list_item, list);
898             assert(item);
899             assert(item->base);
900             llvmpipe_remove_cs_shader_variant(lp, item->base);
901          }
902       }
903       /*
904        * Generate the new variant.
905        */
906       t0 = os_time_get();
907       variant = generate_variant(lp, shader, key);
908       t1 = os_time_get();
909       dt = t1 - t0;
910       LP_COUNT_ADD(llvm_compile_time, dt);
911       LP_COUNT_ADD(nr_llvm_compiles, 2);  /* emit vs. omit in/out test */
912 
913       /* Put the new variant into the list */
914       if (variant) {
915          list_add(&variant->list_item_local.list, &shader->variants.list);
916          list_add(&variant->list_item_global.list, &lp->cs_variants_list.list);
917          lp->nr_cs_variants++;
918          lp->nr_cs_instrs += variant->nr_instrs;
919          shader->variants_cached++;
920       }
921    }
922    /* Bind this variant */
923    lp_cs_ctx_set_cs_variant(lp->csctx, variant);
924 }
925 
926 /**
927  * Called during state validation when LP_CSNEW_SAMPLER_VIEW is set.
928  */
929 static void
lp_csctx_set_sampler_views(struct lp_cs_context * csctx,unsigned num,struct pipe_sampler_view ** views)930 lp_csctx_set_sampler_views(struct lp_cs_context *csctx,
931                            unsigned num,
932                            struct pipe_sampler_view **views)
933 {
934    unsigned i, max_tex_num;
935 
936    LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
937 
938    assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
939 
940    max_tex_num = MAX2(num, csctx->cs.current_tex_num);
941 
942    for (i = 0; i < max_tex_num; i++) {
943       struct pipe_sampler_view *view = i < num ? views[i] : NULL;
944 
945       /* We are going to overwrite/unref the current texture further below. If
946        * set, make sure to unmap its resource to avoid leaking previous
947        * mapping.  */
948       if (csctx->cs.current_tex[i])
949          llvmpipe_resource_unmap(csctx->cs.current_tex[i], 0, 0);
950 
951       if (view) {
952          struct pipe_resource *res = view->texture;
953          struct llvmpipe_resource *lp_tex = llvmpipe_resource(res);
954          struct lp_jit_texture *jit_tex;
955          jit_tex = &csctx->cs.current.jit_context.textures[i];
956 
957          /* We're referencing the texture's internal data, so save a
958           * reference to it.
959           */
960          pipe_resource_reference(&csctx->cs.current_tex[i], res);
961 
962          if (!lp_tex->dt) {
963             /* regular texture - csctx array of mipmap level offsets */
964             int j;
965             unsigned first_level = 0;
966             unsigned last_level = 0;
967 
968             if (llvmpipe_resource_is_texture(res)) {
969                first_level = view->u.tex.first_level;
970                last_level = view->u.tex.last_level;
971                assert(first_level <= last_level);
972                assert(last_level <= res->last_level);
973                jit_tex->base = lp_tex->tex_data;
974             }
975             else {
976               jit_tex->base = lp_tex->data;
977             }
978             if (LP_PERF & PERF_TEX_MEM) {
979                /* use dummy tile memory */
980                jit_tex->base = lp_dummy_tile;
981                jit_tex->width = TILE_SIZE/8;
982                jit_tex->height = TILE_SIZE/8;
983                jit_tex->depth = 1;
984                jit_tex->first_level = 0;
985                jit_tex->last_level = 0;
986                jit_tex->mip_offsets[0] = 0;
987                jit_tex->row_stride[0] = 0;
988                jit_tex->img_stride[0] = 0;
989                jit_tex->num_samples = 0;
990                jit_tex->sample_stride = 0;
991             }
992             else {
993                jit_tex->width = res->width0;
994                jit_tex->height = res->height0;
995                jit_tex->depth = res->depth0;
996                jit_tex->first_level = first_level;
997                jit_tex->last_level = last_level;
998                jit_tex->num_samples = res->nr_samples;
999                jit_tex->sample_stride = 0;
1000 
1001                if (llvmpipe_resource_is_texture(res)) {
1002                   for (j = first_level; j <= last_level; j++) {
1003                      jit_tex->mip_offsets[j] = lp_tex->mip_offsets[j];
1004                      jit_tex->row_stride[j] = lp_tex->row_stride[j];
1005                      jit_tex->img_stride[j] = lp_tex->img_stride[j];
1006                   }
1007                   jit_tex->sample_stride = lp_tex->sample_stride;
1008 
1009                   if (res->target == PIPE_TEXTURE_1D_ARRAY ||
1010                       res->target == PIPE_TEXTURE_2D_ARRAY ||
1011                       res->target == PIPE_TEXTURE_CUBE ||
1012                       res->target == PIPE_TEXTURE_CUBE_ARRAY ||
1013                       (res->target == PIPE_TEXTURE_3D && view->target == PIPE_TEXTURE_2D)) {
1014                      /*
1015                       * For array textures, we don't have first_layer, instead
1016                       * adjust last_layer (stored as depth) plus the mip level offsets
1017                       * (as we have mip-first layout can't just adjust base ptr).
1018                       * XXX For mip levels, could do something similar.
1019                       */
1020                      jit_tex->depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
1021                      for (j = first_level; j <= last_level; j++) {
1022                         jit_tex->mip_offsets[j] += view->u.tex.first_layer *
1023                                                    lp_tex->img_stride[j];
1024                      }
1025                      if (view->target == PIPE_TEXTURE_CUBE ||
1026                          view->target == PIPE_TEXTURE_CUBE_ARRAY) {
1027                         assert(jit_tex->depth % 6 == 0);
1028                      }
1029                      assert(view->u.tex.first_layer <= view->u.tex.last_layer);
1030                      if (res->target == PIPE_TEXTURE_3D)
1031                         assert(view->u.tex.last_layer < res->depth0);
1032                      else
1033                         assert(view->u.tex.last_layer < res->array_size);
1034                   }
1035                }
1036                else {
1037                   /*
1038                    * For buffers, we don't have "offset", instead adjust
1039                    * the size (stored as width) plus the base pointer.
1040                    */
1041                   unsigned view_blocksize = util_format_get_blocksize(view->format);
1042                   /* probably don't really need to fill that out */
1043                   jit_tex->mip_offsets[0] = 0;
1044                   jit_tex->row_stride[0] = 0;
1045                   jit_tex->img_stride[0] = 0;
1046 
1047                   /* everything specified in number of elements here. */
1048                   jit_tex->width = view->u.buf.size / view_blocksize;
1049                   jit_tex->base = (uint8_t *)jit_tex->base + view->u.buf.offset;
1050                   /* XXX Unsure if we need to sanitize parameters? */
1051                   assert(view->u.buf.offset + view->u.buf.size <= res->width0);
1052                }
1053             }
1054          }
1055          else {
1056             /* display target texture/surface */
1057             jit_tex->base = llvmpipe_resource_map(res, 0, 0, LP_TEX_USAGE_READ);
1058             jit_tex->row_stride[0] = lp_tex->row_stride[0];
1059             jit_tex->img_stride[0] = lp_tex->img_stride[0];
1060             jit_tex->mip_offsets[0] = 0;
1061             jit_tex->width = res->width0;
1062             jit_tex->height = res->height0;
1063             jit_tex->depth = res->depth0;
1064             jit_tex->first_level = jit_tex->last_level = 0;
1065             jit_tex->num_samples = res->nr_samples;
1066             jit_tex->sample_stride = 0;
1067             assert(jit_tex->base);
1068          }
1069       }
1070       else {
1071          pipe_resource_reference(&csctx->cs.current_tex[i], NULL);
1072       }
1073    }
1074    csctx->cs.current_tex_num = num;
1075 }
1076 
1077 
1078 /**
1079  * Called during state validation when LP_NEW_SAMPLER is set.
1080  */
1081 static void
lp_csctx_set_sampler_state(struct lp_cs_context * csctx,unsigned num,struct pipe_sampler_state ** samplers)1082 lp_csctx_set_sampler_state(struct lp_cs_context *csctx,
1083                            unsigned num,
1084                            struct pipe_sampler_state **samplers)
1085 {
1086    unsigned i;
1087 
1088    LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
1089 
1090    assert(num <= PIPE_MAX_SAMPLERS);
1091 
1092    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
1093       const struct pipe_sampler_state *sampler = i < num ? samplers[i] : NULL;
1094 
1095       if (sampler) {
1096          struct lp_jit_sampler *jit_sam;
1097          jit_sam = &csctx->cs.current.jit_context.samplers[i];
1098 
1099          jit_sam->min_lod = sampler->min_lod;
1100          jit_sam->max_lod = sampler->max_lod;
1101          jit_sam->lod_bias = sampler->lod_bias;
1102          jit_sam->max_aniso = sampler->max_anisotropy;
1103          COPY_4V(jit_sam->border_color, sampler->border_color.f);
1104       }
1105    }
1106 }
1107 
1108 static void
lp_csctx_set_cs_constants(struct lp_cs_context * csctx,unsigned num,struct pipe_constant_buffer * buffers)1109 lp_csctx_set_cs_constants(struct lp_cs_context *csctx,
1110                           unsigned num,
1111                           struct pipe_constant_buffer *buffers)
1112 {
1113    unsigned i;
1114 
1115    LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffers);
1116 
1117    assert(num <= ARRAY_SIZE(csctx->constants));
1118 
1119    for (i = 0; i < num; ++i) {
1120       util_copy_constant_buffer(&csctx->constants[i].current, &buffers[i], false);
1121    }
1122    for (; i < ARRAY_SIZE(csctx->constants); i++) {
1123       util_copy_constant_buffer(&csctx->constants[i].current, NULL, false);
1124    }
1125 }
1126 
1127 static void
lp_csctx_set_cs_ssbos(struct lp_cs_context * csctx,unsigned num,struct pipe_shader_buffer * buffers)1128 lp_csctx_set_cs_ssbos(struct lp_cs_context *csctx,
1129                        unsigned num,
1130                        struct pipe_shader_buffer *buffers)
1131 {
1132    int i;
1133    LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *)buffers);
1134 
1135    assert (num <= ARRAY_SIZE(csctx->ssbos));
1136 
1137    for (i = 0; i < num; ++i) {
1138       util_copy_shader_buffer(&csctx->ssbos[i].current, &buffers[i]);
1139    }
1140    for (; i < ARRAY_SIZE(csctx->ssbos); i++) {
1141       util_copy_shader_buffer(&csctx->ssbos[i].current, NULL);
1142    }
1143 }
1144 
1145 static void
lp_csctx_set_cs_images(struct lp_cs_context * csctx,unsigned num,struct pipe_image_view * images)1146 lp_csctx_set_cs_images(struct lp_cs_context *csctx,
1147                        unsigned num,
1148                        struct pipe_image_view *images)
1149 {
1150    unsigned i;
1151 
1152    LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) images);
1153 
1154    assert(num <= ARRAY_SIZE(csctx->images));
1155 
1156    for (i = 0; i < num; ++i) {
1157       struct pipe_image_view *image = &images[i];
1158       util_copy_image_view(&csctx->images[i].current, &images[i]);
1159 
1160       struct pipe_resource *res = image->resource;
1161       struct llvmpipe_resource *lp_res = llvmpipe_resource(res);
1162       struct lp_jit_image *jit_image;
1163 
1164       jit_image = &csctx->cs.current.jit_context.images[i];
1165       if (!lp_res)
1166          continue;
1167       if (!lp_res->dt) {
1168          /* regular texture - csctx array of mipmap level offsets */
1169          if (llvmpipe_resource_is_texture(res)) {
1170             jit_image->base = lp_res->tex_data;
1171          } else
1172             jit_image->base = lp_res->data;
1173 
1174          jit_image->width = res->width0;
1175          jit_image->height = res->height0;
1176          jit_image->depth = res->depth0;
1177          jit_image->num_samples = res->nr_samples;
1178 
1179          if (llvmpipe_resource_is_texture(res)) {
1180             uint32_t mip_offset = lp_res->mip_offsets[image->u.tex.level];
1181             const uint32_t bw = util_format_get_blockwidth(image->resource->format);
1182             const uint32_t bh = util_format_get_blockheight(image->resource->format);
1183 
1184             jit_image->width = DIV_ROUND_UP(jit_image->width, bw);
1185             jit_image->height = DIV_ROUND_UP(jit_image->height, bh);
1186             jit_image->width = u_minify(jit_image->width, image->u.tex.level);
1187             jit_image->height = u_minify(jit_image->height, image->u.tex.level);
1188 
1189             if (res->target == PIPE_TEXTURE_1D_ARRAY ||
1190                 res->target == PIPE_TEXTURE_2D_ARRAY ||
1191                 res->target == PIPE_TEXTURE_3D ||
1192                 res->target == PIPE_TEXTURE_CUBE ||
1193                 res->target == PIPE_TEXTURE_CUBE_ARRAY) {
1194                /*
1195                 * For array textures, we don't have first_layer, instead
1196                 * adjust last_layer (stored as depth) plus the mip level offsets
1197                 * (as we have mip-first layout can't just adjust base ptr).
1198                 * XXX For mip levels, could do something similar.
1199                 */
1200                jit_image->depth = image->u.tex.last_layer - image->u.tex.first_layer + 1;
1201                mip_offset += image->u.tex.first_layer * lp_res->img_stride[image->u.tex.level];
1202             } else
1203                jit_image->depth = u_minify(jit_image->depth, image->u.tex.level);
1204 
1205             jit_image->row_stride = lp_res->row_stride[image->u.tex.level];
1206             jit_image->img_stride = lp_res->img_stride[image->u.tex.level];
1207             jit_image->sample_stride = lp_res->sample_stride;
1208             jit_image->base = (uint8_t *)jit_image->base + mip_offset;
1209          } else {
1210             unsigned view_blocksize = util_format_get_blocksize(image->format);
1211             jit_image->width = image->u.buf.size / view_blocksize;
1212             jit_image->base = (uint8_t *)jit_image->base + image->u.buf.offset;
1213          }
1214       }
1215    }
1216    for (; i < ARRAY_SIZE(csctx->images); i++) {
1217       util_copy_image_view(&csctx->images[i].current, NULL);
1218    }
1219 }
1220 
1221 static void
update_csctx_consts(struct llvmpipe_context * llvmpipe)1222 update_csctx_consts(struct llvmpipe_context *llvmpipe)
1223 {
1224    struct lp_cs_context *csctx = llvmpipe->csctx;
1225    int i;
1226 
1227    for (i = 0; i < ARRAY_SIZE(csctx->constants); ++i) {
1228       struct pipe_resource *buffer = csctx->constants[i].current.buffer;
1229       const ubyte *current_data = NULL;
1230       unsigned current_size = csctx->constants[i].current.buffer_size;
1231       if (buffer) {
1232          /* resource buffer */
1233          current_data = (ubyte *) llvmpipe_resource_data(buffer);
1234       }
1235       else if (csctx->constants[i].current.user_buffer) {
1236          /* user-space buffer */
1237          current_data = (ubyte *) csctx->constants[i].current.user_buffer;
1238       }
1239 
1240       if (current_data && current_size >= sizeof(float)) {
1241          current_data += csctx->constants[i].current.buffer_offset;
1242          csctx->cs.current.jit_context.constants[i] = (const float *)current_data;
1243          csctx->cs.current.jit_context.num_constants[i] =
1244             DIV_ROUND_UP(csctx->constants[i].current.buffer_size,
1245                          lp_get_constant_buffer_stride(llvmpipe->pipe.screen));
1246       } else {
1247          static const float fake_const_buf[4];
1248          csctx->cs.current.jit_context.constants[i] = fake_const_buf;
1249          csctx->cs.current.jit_context.num_constants[i] = 0;
1250       }
1251    }
1252 }
1253 
1254 static void
update_csctx_ssbo(struct llvmpipe_context * llvmpipe)1255 update_csctx_ssbo(struct llvmpipe_context *llvmpipe)
1256 {
1257    struct lp_cs_context *csctx = llvmpipe->csctx;
1258    int i;
1259    for (i = 0; i < ARRAY_SIZE(csctx->ssbos); ++i) {
1260       struct pipe_resource *buffer = csctx->ssbos[i].current.buffer;
1261       const ubyte *current_data = NULL;
1262 
1263       /* resource buffer */
1264       if (buffer)
1265          current_data = (ubyte *) llvmpipe_resource_data(buffer);
1266       if (current_data) {
1267          current_data += csctx->ssbos[i].current.buffer_offset;
1268 
1269          csctx->cs.current.jit_context.ssbos[i] = (const uint32_t *)current_data;
1270          csctx->cs.current.jit_context.num_ssbos[i] = csctx->ssbos[i].current.buffer_size;
1271       } else {
1272          csctx->cs.current.jit_context.ssbos[i] = NULL;
1273          csctx->cs.current.jit_context.num_ssbos[i] = 0;
1274       }
1275    }
1276 }
1277 
1278 static void
llvmpipe_cs_update_derived(struct llvmpipe_context * llvmpipe,void * input)1279 llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe, void *input)
1280 {
1281    if (llvmpipe->cs_dirty & LP_CSNEW_CONSTANTS) {
1282       lp_csctx_set_cs_constants(llvmpipe->csctx,
1283                                 ARRAY_SIZE(llvmpipe->constants[PIPE_SHADER_COMPUTE]),
1284                                 llvmpipe->constants[PIPE_SHADER_COMPUTE]);
1285       update_csctx_consts(llvmpipe);
1286    }
1287 
1288    if (llvmpipe->cs_dirty & LP_CSNEW_SSBOS) {
1289       lp_csctx_set_cs_ssbos(llvmpipe->csctx,
1290                             ARRAY_SIZE(llvmpipe->ssbos[PIPE_SHADER_COMPUTE]),
1291                             llvmpipe->ssbos[PIPE_SHADER_COMPUTE]);
1292       update_csctx_ssbo(llvmpipe);
1293    }
1294 
1295    if (llvmpipe->cs_dirty & LP_CSNEW_SAMPLER_VIEW)
1296       lp_csctx_set_sampler_views(llvmpipe->csctx,
1297                                  llvmpipe->num_sampler_views[PIPE_SHADER_COMPUTE],
1298                                  llvmpipe->sampler_views[PIPE_SHADER_COMPUTE]);
1299 
1300    if (llvmpipe->cs_dirty & LP_CSNEW_SAMPLER)
1301       lp_csctx_set_sampler_state(llvmpipe->csctx,
1302                                  llvmpipe->num_samplers[PIPE_SHADER_COMPUTE],
1303                                  llvmpipe->samplers[PIPE_SHADER_COMPUTE]);
1304 
1305    if (llvmpipe->cs_dirty & LP_CSNEW_IMAGES)
1306       lp_csctx_set_cs_images(llvmpipe->csctx,
1307                               ARRAY_SIZE(llvmpipe->images[PIPE_SHADER_COMPUTE]),
1308                               llvmpipe->images[PIPE_SHADER_COMPUTE]);
1309 
1310    struct lp_cs_context *csctx = llvmpipe->csctx;
1311    csctx->cs.current.jit_context.aniso_filter_table = lp_build_sample_aniso_filter_table();
1312    if (input) {
1313       csctx->input = input;
1314       csctx->cs.current.jit_context.kernel_args = input;
1315    }
1316 
1317    if (llvmpipe->cs_dirty & (LP_CSNEW_CS |
1318                              LP_CSNEW_IMAGES |
1319                              LP_CSNEW_SAMPLER_VIEW |
1320                              LP_CSNEW_SAMPLER))
1321       llvmpipe_update_cs(llvmpipe);
1322 
1323 
1324    llvmpipe->cs_dirty = 0;
1325 }
1326 
1327 static void
cs_exec_fn(void * init_data,int iter_idx,struct lp_cs_local_mem * lmem)1328 cs_exec_fn(void *init_data, int iter_idx, struct lp_cs_local_mem *lmem)
1329 {
1330    struct lp_cs_job_info *job_info = init_data;
1331    struct lp_jit_cs_thread_data thread_data;
1332 
1333    memset(&thread_data, 0, sizeof(thread_data));
1334 
1335    if (lmem->local_size < job_info->req_local_mem) {
1336       lmem->local_mem_ptr = REALLOC(lmem->local_mem_ptr, lmem->local_size,
1337                                     job_info->req_local_mem);
1338       lmem->local_size = job_info->req_local_mem;
1339    }
1340    if (job_info->zero_initialize_shared_memory)
1341       memset(lmem->local_mem_ptr, 0, job_info->req_local_mem);
1342    thread_data.shared = lmem->local_mem_ptr;
1343 
1344    unsigned grid_z = iter_idx / (job_info->grid_size[0] * job_info->grid_size[1]);
1345    unsigned grid_y = (iter_idx - (grid_z * (job_info->grid_size[0] * job_info->grid_size[1]))) / job_info->grid_size[0];
1346    unsigned grid_x = (iter_idx - (grid_z * (job_info->grid_size[0] * job_info->grid_size[1])) - (grid_y * job_info->grid_size[0]));
1347 
1348    grid_z += job_info->grid_base[2];
1349    grid_y += job_info->grid_base[1];
1350    grid_x += job_info->grid_base[0];
1351    struct lp_compute_shader_variant *variant = job_info->current->variant;
1352    variant->jit_function(&job_info->current->jit_context,
1353                          job_info->block_size[0], job_info->block_size[1], job_info->block_size[2],
1354                          grid_x, grid_y, grid_z,
1355                          job_info->grid_size[0], job_info->grid_size[1], job_info->grid_size[2], job_info->work_dim,
1356                          &thread_data);
1357 }
1358 
1359 static void
fill_grid_size(struct pipe_context * pipe,const struct pipe_grid_info * info,uint32_t grid_size[3])1360 fill_grid_size(struct pipe_context *pipe,
1361                const struct pipe_grid_info *info,
1362                uint32_t grid_size[3])
1363 {
1364    struct pipe_transfer *transfer;
1365    uint32_t *params;
1366    if (!info->indirect) {
1367       grid_size[0] = info->grid[0];
1368       grid_size[1] = info->grid[1];
1369       grid_size[2] = info->grid[2];
1370       return;
1371    }
1372    params = pipe_buffer_map_range(pipe, info->indirect,
1373                                   info->indirect_offset,
1374                                   3 * sizeof(uint32_t),
1375                                   PIPE_MAP_READ,
1376                                   &transfer);
1377 
1378    if (!transfer)
1379       return;
1380 
1381    grid_size[0] = params[0];
1382    grid_size[1] = params[1];
1383    grid_size[2] = params[2];
1384    pipe_buffer_unmap(pipe, transfer);
1385 }
1386 
llvmpipe_launch_grid(struct pipe_context * pipe,const struct pipe_grid_info * info)1387 static void llvmpipe_launch_grid(struct pipe_context *pipe,
1388                                  const struct pipe_grid_info *info)
1389 {
1390    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1391    struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
1392    struct lp_cs_job_info job_info;
1393 
1394    if (!llvmpipe_check_render_cond(llvmpipe))
1395       return;
1396 
1397    memset(&job_info, 0, sizeof(job_info));
1398 
1399    llvmpipe_cs_update_derived(llvmpipe, info->input);
1400 
1401    fill_grid_size(pipe, info, job_info.grid_size);
1402 
1403    job_info.grid_base[0] = info->grid_base[0];
1404    job_info.grid_base[1] = info->grid_base[1];
1405    job_info.grid_base[2] = info->grid_base[2];
1406    job_info.block_size[0] = info->block[0];
1407    job_info.block_size[1] = info->block[1];
1408    job_info.block_size[2] = info->block[2];
1409    job_info.work_dim = info->work_dim;
1410    job_info.req_local_mem = llvmpipe->cs->req_local_mem;
1411    job_info.zero_initialize_shared_memory = llvmpipe->cs->zero_initialize_shared_memory;
1412    job_info.current = &llvmpipe->csctx->cs.current;
1413 
1414    int num_tasks = job_info.grid_size[2] * job_info.grid_size[1] * job_info.grid_size[0];
1415    if (num_tasks) {
1416       struct lp_cs_tpool_task *task;
1417       mtx_lock(&screen->cs_mutex);
1418       task = lp_cs_tpool_queue_task(screen->cs_tpool, cs_exec_fn, &job_info, num_tasks);
1419       mtx_unlock(&screen->cs_mutex);
1420 
1421       lp_cs_tpool_wait_for_task(screen->cs_tpool, &task);
1422    }
1423    if (!llvmpipe->queries_disabled)
1424       llvmpipe->pipeline_statistics.cs_invocations += num_tasks * info->block[0] * info->block[1] * info->block[2];
1425 }
1426 
1427 static void
llvmpipe_set_compute_resources(struct pipe_context * pipe,unsigned start,unsigned count,struct pipe_surface ** resources)1428 llvmpipe_set_compute_resources(struct pipe_context *pipe,
1429                                unsigned start, unsigned count,
1430                                struct pipe_surface **resources)
1431 {
1432 
1433 
1434 }
1435 
1436 static void
llvmpipe_set_global_binding(struct pipe_context * pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)1437 llvmpipe_set_global_binding(struct pipe_context *pipe,
1438                             unsigned first, unsigned count,
1439                             struct pipe_resource **resources,
1440                             uint32_t **handles)
1441 {
1442    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1443    struct lp_compute_shader *cs = llvmpipe->cs;
1444    unsigned i;
1445 
1446    if (first + count > cs->max_global_buffers) {
1447       unsigned old_max = cs->max_global_buffers;
1448       cs->max_global_buffers = first + count;
1449       cs->global_buffers = realloc(cs->global_buffers,
1450                                    cs->max_global_buffers * sizeof(cs->global_buffers[0]));
1451       if (!cs->global_buffers) {
1452          return;
1453       }
1454 
1455       memset(&cs->global_buffers[old_max], 0, (cs->max_global_buffers - old_max) * sizeof(cs->global_buffers[0]));
1456    }
1457 
1458    if (!resources) {
1459       for (i = 0; i < count; i++)
1460          pipe_resource_reference(&cs->global_buffers[first + i], NULL);
1461       return;
1462    }
1463 
1464    for (i = 0; i < count; i++) {
1465       uintptr_t va;
1466       uint32_t offset;
1467       pipe_resource_reference(&cs->global_buffers[first + i], resources[i]);
1468       struct llvmpipe_resource *lp_res = llvmpipe_resource(resources[i]);
1469       offset = *handles[i];
1470       va = (uintptr_t)((char *)lp_res->data + offset);
1471       memcpy(handles[i], &va, sizeof(va));
1472    }
1473 }
1474 
1475 void
llvmpipe_init_compute_funcs(struct llvmpipe_context * llvmpipe)1476 llvmpipe_init_compute_funcs(struct llvmpipe_context *llvmpipe)
1477 {
1478    llvmpipe->pipe.create_compute_state = llvmpipe_create_compute_state;
1479    llvmpipe->pipe.bind_compute_state = llvmpipe_bind_compute_state;
1480    llvmpipe->pipe.delete_compute_state = llvmpipe_delete_compute_state;
1481    llvmpipe->pipe.set_compute_resources = llvmpipe_set_compute_resources;
1482    llvmpipe->pipe.set_global_binding = llvmpipe_set_global_binding;
1483    llvmpipe->pipe.launch_grid = llvmpipe_launch_grid;
1484 }
1485 
1486 void
lp_csctx_destroy(struct lp_cs_context * csctx)1487 lp_csctx_destroy(struct lp_cs_context *csctx)
1488 {
1489    unsigned i;
1490    for (i = 0; i < ARRAY_SIZE(csctx->cs.current_tex); i++) {
1491       struct pipe_resource **res_ptr = &csctx->cs.current_tex[i];
1492       if (*res_ptr)
1493          llvmpipe_resource_unmap(*res_ptr, 0, 0);
1494       pipe_resource_reference(res_ptr, NULL);
1495    }
1496    for (i = 0; i < ARRAY_SIZE(csctx->constants); i++) {
1497       pipe_resource_reference(&csctx->constants[i].current.buffer, NULL);
1498    }
1499    for (i = 0; i < ARRAY_SIZE(csctx->ssbos); i++) {
1500       pipe_resource_reference(&csctx->ssbos[i].current.buffer, NULL);
1501    }
1502    for (i = 0; i < ARRAY_SIZE(csctx->images); i++) {
1503       pipe_resource_reference(&csctx->images[i].current.resource, NULL);
1504    }
1505    FREE(csctx);
1506 }
1507 
lp_csctx_create(struct pipe_context * pipe)1508 struct lp_cs_context *lp_csctx_create(struct pipe_context *pipe)
1509 {
1510    struct lp_cs_context *csctx;
1511 
1512    csctx = CALLOC_STRUCT(lp_cs_context);
1513    if (!csctx)
1514       return NULL;
1515 
1516    csctx->pipe = pipe;
1517    return csctx;
1518 }
1519