• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2019 Red Hat.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  **************************************************************************/
25 
26 #include "util/u_memory.h"
27 #include "util/os_time.h"
28 #include "util/u_dump.h"
29 #include "util/u_string.h"
30 #include "gallivm/lp_bld_const.h"
31 #include "gallivm/lp_bld_debug.h"
32 #include "gallivm/lp_bld_intr.h"
33 #include "gallivm/lp_bld_flow.h"
34 #include "gallivm/lp_bld_pack.h"
35 #include "gallivm/lp_bld_gather.h"
36 #include "gallivm/lp_bld_coro.h"
37 #include "gallivm/lp_bld_nir.h"
38 #include "gallivm/lp_bld_jit_sample.h"
39 #include "lp_state_cs.h"
40 #include "lp_context.h"
41 #include "lp_setup_context.h"
42 #include "lp_debug.h"
43 #include "lp_state.h"
44 #include "lp_perf.h"
45 #include "lp_screen.h"
46 #include "lp_memory.h"
47 #include "lp_query.h"
48 #include "lp_cs_tpool.h"
49 #include "frontend/sw_winsys.h"
50 #include "nir/nir_to_tgsi_info.h"
51 #include "nir/tgsi_to_nir.h"
52 #include "util/mesa-sha1.h"
53 #include "nir_serialize.h"
54 
55 #include "draw/draw_context.h"
56 #include "draw/draw_llvm.h"
57 #include "draw/draw_mesh_prim.h"
58 
59 /** Fragment shader number (for debugging) */
60 static unsigned cs_no = 0;
61 static unsigned task_no = 0;
62 static unsigned mesh_no = 0;
63 
64 struct lp_cs_job_info {
65    unsigned grid_size[3];
66    unsigned iter_size[3];
67    unsigned grid_base[3];
68    unsigned block_size[3];
69    unsigned req_local_mem;
70    unsigned work_dim;
71    unsigned draw_id;
72    bool zero_initialize_shared_memory;
73    bool use_iters;
74    struct lp_cs_exec *current;
75    struct vertex_header *io;
76    size_t io_stride;
77    void *payload;
78    size_t payload_stride;
79 };
80 
81 enum {
82    CS_ARG_CONTEXT,
83    CS_ARG_RESOURCES,
84    CS_ARG_BLOCK_X_SIZE,
85    CS_ARG_BLOCK_Y_SIZE,
86    CS_ARG_BLOCK_Z_SIZE,
87    CS_ARG_GRID_X,
88    CS_ARG_GRID_Y,
89    CS_ARG_GRID_Z,
90    CS_ARG_GRID_SIZE_X,
91    CS_ARG_GRID_SIZE_Y,
92    CS_ARG_GRID_SIZE_Z,
93    CS_ARG_WORK_DIM,
94    CS_ARG_DRAW_ID,
95    CS_ARG_VERTEX_DATA,
96    CS_ARG_PER_THREAD_DATA,
97    CS_ARG_OUTER_COUNT,
98    CS_ARG_CORO_SUBGROUP_COUNT = CS_ARG_OUTER_COUNT,
99    CS_ARG_CORO_PARTIALS,
100    CS_ARG_CORO_BLOCK_X_SIZE,
101    CS_ARG_CORO_BLOCK_Y_SIZE,
102    CS_ARG_CORO_BLOCK_Z_SIZE,
103    CS_ARG_CORO_IDX,
104    CS_ARG_CORO_MEM,
105    CS_ARG_CORO_OUTPUTS,
106    CS_ARG_MAX,
107 };
108 
109 struct lp_mesh_llvm_iface {
110    struct lp_build_mesh_iface base;
111 
112    LLVMValueRef vertex_count;
113    LLVMValueRef prim_count;
114    LLVMValueRef outputs;
115 };
116 
117 static inline const struct lp_mesh_llvm_iface *
lp_mesh_llvm_iface(const struct lp_build_mesh_iface * iface)118 lp_mesh_llvm_iface(const struct lp_build_mesh_iface *iface)
119 {
120    return (const struct lp_mesh_llvm_iface *)iface;
121 }
122 
123 
124 static LLVMTypeRef
create_mesh_jit_output_type_deref(struct gallivm_state * gallivm)125 create_mesh_jit_output_type_deref(struct gallivm_state *gallivm)
126 {
127    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
128    LLVMTypeRef output_array;
129 
130    output_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
131    output_array = LLVMArrayType(output_array, PIPE_MAX_SHADER_OUTPUTS); /* num attrs per vertex */
132    return output_array;
133 }
134 
135 static void
lp_mesh_llvm_emit_store_output(const struct lp_build_mesh_iface * mesh_iface,struct lp_build_context * bld,unsigned name,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index,LLVMValueRef value,LLVMValueRef mask_vec)136 lp_mesh_llvm_emit_store_output(const struct lp_build_mesh_iface *mesh_iface,
137                                 struct lp_build_context *bld,
138                                 unsigned name,
139                                 bool is_vindex_indirect,
140                                 LLVMValueRef vertex_index,
141                                 bool is_aindex_indirect,
142                                 LLVMValueRef attrib_index,
143                                 bool is_sindex_indirect,
144                                 LLVMValueRef swizzle_index,
145                                 LLVMValueRef value,
146                                 LLVMValueRef mask_vec)
147 {
148    const struct lp_mesh_llvm_iface *mesh = lp_mesh_llvm_iface(mesh_iface);
149    struct gallivm_state *gallivm = bld->gallivm;
150    LLVMBuilderRef builder = gallivm->builder;
151    LLVMValueRef indices[3];
152    LLVMValueRef res;
153    struct lp_type type = bld->type;
154    LLVMTypeRef output_type = create_mesh_jit_output_type_deref(gallivm);
155 
156    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
157       for (int i = 0; i < type.length; ++i) {
158          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
159          LLVMValueRef vert_chan_index = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
160          LLVMValueRef attr_chan_index = attrib_index;
161          LLVMValueRef swiz_chan_index = swizzle_index;
162          LLVMValueRef channel_vec;
163 
164          if (is_vindex_indirect) {
165             vert_chan_index = LLVMBuildExtractElement(builder,
166                                                       vertex_index, idx, "");
167          }
168          if (is_aindex_indirect) {
169             attr_chan_index = LLVMBuildExtractElement(builder,
170                                                       attrib_index, idx, "");
171          }
172 
173          if (is_sindex_indirect) {
174             swiz_chan_index = LLVMBuildExtractElement(builder,
175                                                       swizzle_index, idx, "");
176          }
177 
178          indices[0] = vert_chan_index;
179          indices[1] = attr_chan_index;
180          indices[2] = swiz_chan_index;
181 
182          channel_vec = LLVMBuildGEP2(builder, output_type, mesh->outputs, indices, 3, "");
183 
184          res = LLVMBuildExtractElement(builder, value, idx, "");
185 
186          struct lp_build_if_state ifthen;
187          LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
188          cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
189          lp_build_if(&ifthen, gallivm, cond);
190          LLVMBuildStore(builder, res, channel_vec);
191          lp_build_endif(&ifthen);
192       }
193    } else {
194       indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
195       indices[1] = attrib_index;
196       indices[2] = swizzle_index;
197 
198       res = LLVMBuildGEP2(builder, output_type, mesh->outputs, indices, 3, "");
199       for (unsigned i = 0; i < type.length; ++i) {
200          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
201          LLVMValueRef val = LLVMBuildExtractElement(builder, value, idx, "");
202 
203          struct lp_build_if_state ifthen;
204          LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
205          cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
206          lp_build_if(&ifthen, gallivm, cond);
207          LLVMBuildStore(builder, val, res);
208          lp_build_endif(&ifthen);
209       }
210    }
211 }
212 
213 static void
lp_mesh_emit_vertex_and_primitive_count(const struct lp_build_mesh_iface * mesh_iface,struct lp_build_context * bld,LLVMValueRef vertices_count,LLVMValueRef primitives_count)214 lp_mesh_emit_vertex_and_primitive_count(const struct lp_build_mesh_iface *mesh_iface,
215                                         struct lp_build_context *bld,
216                                         LLVMValueRef vertices_count,
217                                         LLVMValueRef primitives_count)
218 {
219    const struct lp_mesh_llvm_iface *mesh = lp_mesh_llvm_iface(mesh_iface);
220    struct gallivm_state *gallivm = bld->gallivm;
221 
222    LLVMBuildStore(gallivm->builder, vertices_count, mesh->vertex_count);
223    LLVMBuildStore(gallivm->builder, primitives_count, mesh->prim_count);
224 }
225 
226 static void
mesh_convert_to_aos(struct gallivm_state * gallivm,nir_shader * nir,bool vert_only,LLVMTypeRef io_type,LLVMValueRef io,LLVMValueRef outputs,LLVMValueRef clipmask,LLVMValueRef vertex_index,struct lp_type soa_type,int primid_slot,bool need_edgeflag)227 mesh_convert_to_aos(struct gallivm_state *gallivm,
228                     nir_shader *nir,
229                     bool vert_only,
230                     LLVMTypeRef io_type,
231                     LLVMValueRef io,
232                     LLVMValueRef outputs,
233                     LLVMValueRef clipmask,
234                     LLVMValueRef vertex_index,
235                     struct lp_type soa_type,
236                     int primid_slot,
237                     bool need_edgeflag)
238 {
239    LLVMBuilderRef builder = gallivm->builder;
240    LLVMValueRef inds[3];
241    LLVMTypeRef output_type = create_mesh_jit_output_type_deref(gallivm);
242 #if DEBUG_STORE
243    lp_build_printf(gallivm, "   # storing begin\n");
244 #endif
245    int first_per_prim_attrib = -1;
246    nir_foreach_shader_out_variable(var, nir) {
247       if (var->data.per_primitive) {
248          first_per_prim_attrib = var->data.driver_location;
249          break;
250       }
251    }
252    nir_foreach_shader_out_variable(var, nir) {
253 
254       if (vert_only && var->data.per_primitive)
255          continue;
256       if (!vert_only && !var->data.per_primitive)
257          continue;
258       int attrib = var->data.driver_location;
259       int slots = glsl_count_attribute_slots(glsl_get_array_element(var->type), false);
260 
261       for (unsigned s = 0; s < slots; s++) {
262          LLVMValueRef soa[TGSI_NUM_CHANNELS];
263          LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];
264          for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
265             inds[0] = vertex_index;
266             inds[1] = lp_build_const_int32(gallivm, attrib);
267             inds[2] = lp_build_const_int32(gallivm, chan);
268 
269             LLVMValueRef res = LLVMBuildGEP2(builder, output_type, outputs, inds, 3, "");
270             LLVMTypeRef single_type = (attrib == primid_slot) ? lp_build_int_elem_type(gallivm, soa_type) : lp_build_elem_type(gallivm, soa_type);
271             LLVMValueRef out = LLVMBuildLoad2(builder, single_type, res, "");
272             lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
273 #if DEBUG_STORE
274             lp_build_printf(gallivm, "output %d : %d ",
275                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
276                                          attrib, 0),
277                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
278                                          chan, 0));
279             lp_build_print_value(gallivm, "val = ", out);
280             {
281                LLVMValueRef iv =
282                   LLVMBuildBitCast(builder, out, lp_build_int_elem_type(gallivm, soa_type), "");
283 
284                lp_build_print_value(gallivm, "  ival = ", iv);
285             }
286 #endif
287             soa[chan] = out;
288          }
289          LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
290          aos[0] = LLVMGetUndef(LLVMVectorType(float_type, 4));
291          for (unsigned i = 0; i <  4; i++)
292             aos[0] = LLVMBuildInsertElement(builder, aos[0], soa[i], lp_build_const_int32(gallivm, i), "");
293          int aos_attrib = attrib;
294          if (var->data.per_primitive)
295             aos_attrib -= first_per_prim_attrib;
296          draw_store_aos_array(gallivm,
297                               soa_type,
298                               io_type,
299                               io,
300                               NULL,
301                               aos,
302                               aos_attrib,
303                               clipmask,
304                               need_edgeflag, var->data.per_primitive);
305          attrib++;
306       }
307    }
308 #if DEBUG_STORE
309    lp_build_printf(gallivm, "   # storing end\n");
310 #endif
311 }
312 
313 static void
generate_compute(struct llvmpipe_context * lp,struct lp_compute_shader * shader,struct lp_compute_shader_variant * variant)314 generate_compute(struct llvmpipe_context *lp,
315                  struct lp_compute_shader *shader,
316                  struct lp_compute_shader_variant *variant)
317 {
318    struct gallivm_state *gallivm = variant->gallivm;
319    struct nir_shader *nir = shader->base.ir.nir;
320    const struct lp_compute_shader_variant_key *key = &variant->key;
321    char func_name[64], func_name_coro[64];
322    LLVMTypeRef arg_types[CS_ARG_MAX];
323    LLVMTypeRef func_type, coro_func_type;
324    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
325    LLVMValueRef context_ptr, resources_ptr;
326    LLVMValueRef block_x_size_arg, block_y_size_arg, block_z_size_arg;
327    LLVMValueRef grid_x_arg, grid_y_arg, grid_z_arg;
328    LLVMValueRef grid_size_x_arg, grid_size_y_arg, grid_size_z_arg;
329    LLVMValueRef work_dim_arg, draw_id_arg, thread_data_ptr, io_ptr;
330    LLVMBasicBlockRef block;
331    LLVMBuilderRef builder;
332    struct lp_build_sampler_soa *sampler;
333    struct lp_build_image_soa *image;
334    LLVMValueRef function, coro;
335    struct lp_type cs_type;
336    struct lp_mesh_llvm_iface mesh_iface;
337    bool is_mesh = nir->info.stage == MESA_SHADER_MESH;
338    unsigned i;
339 
340    LLVMValueRef output_array = NULL;
341 
342    /*
343     * This function has two parts
344     * a) setup the coroutine execution environment loop.
345     * b) build the compute shader llvm for use inside the coroutine.
346     */
347    assert(lp_native_vector_width / 32 >= 4);
348 
349    memset(&cs_type, 0, sizeof cs_type);
350    cs_type.floating = true;      /* floating point values */
351    cs_type.sign = true;          /* values are signed */
352    cs_type.norm = false;         /* values are not limited to [0,1] or [-1,1] */
353    cs_type.width = 32;           /* 32-bit float */
354    cs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */
355    snprintf(func_name, sizeof(func_name), "cs_variant");
356 
357    snprintf(func_name_coro, sizeof(func_name), "cs_co_variant");
358 
359    arg_types[CS_ARG_CONTEXT] = variant->jit_cs_context_ptr_type;       /* context */
360    arg_types[CS_ARG_RESOURCES]=  variant->jit_resources_ptr_type;
361    arg_types[CS_ARG_BLOCK_X_SIZE] = int32_type;                        /* block_x_size */
362    arg_types[CS_ARG_BLOCK_Y_SIZE] = int32_type;                        /* block_y_size */
363    arg_types[CS_ARG_BLOCK_Z_SIZE] = int32_type;                        /* block_z_size */
364    arg_types[CS_ARG_GRID_X] = int32_type;                              /* grid_x */
365    arg_types[CS_ARG_GRID_Y] = int32_type;                              /* grid_y */
366    arg_types[CS_ARG_GRID_Z] = int32_type;                              /* grid_z */
367    arg_types[CS_ARG_GRID_SIZE_X] = int32_type;                         /* grid_size_x */
368    arg_types[CS_ARG_GRID_SIZE_Y] = int32_type;                         /* grid_size_y */
369    arg_types[CS_ARG_GRID_SIZE_Z] = int32_type;                         /* grid_size_z */
370    arg_types[CS_ARG_WORK_DIM] = int32_type;                            /* work dim */
371    arg_types[CS_ARG_DRAW_ID] = int32_type;                             /* draw id */
372    if (variant->jit_vertex_header_ptr_type)
373       arg_types[CS_ARG_VERTEX_DATA] = variant->jit_vertex_header_ptr_type; /* mesh shaders only */
374    else
375       arg_types[CS_ARG_VERTEX_DATA] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* mesh shaders only */
376    arg_types[CS_ARG_PER_THREAD_DATA] = variant->jit_cs_thread_data_ptr_type;  /* per thread data */
377    arg_types[CS_ARG_CORO_SUBGROUP_COUNT] = int32_type;                 /* coro only - subgroup count */
378    arg_types[CS_ARG_CORO_PARTIALS] = int32_type;                       /* coro only - partials */
379    arg_types[CS_ARG_CORO_BLOCK_X_SIZE] = int32_type;                   /* coro block_x_size */
380    arg_types[CS_ARG_CORO_BLOCK_Y_SIZE] = int32_type;                   /* coro block_y_size */
381    arg_types[CS_ARG_CORO_BLOCK_Z_SIZE] = int32_type;                   /* coro block_z_size */
382    arg_types[CS_ARG_CORO_IDX] = int32_type;                            /* coro idx */
383    arg_types[CS_ARG_CORO_MEM] = LLVMPointerType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), 0);
384    arg_types[CS_ARG_CORO_OUTPUTS] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* mesh shaders only */
385 
386    func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
387                                 arg_types, CS_ARG_OUTER_COUNT, 0);
388 
389    coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0),
390                                      arg_types, CS_ARG_MAX - (!is_mesh), 0);
391 
392    function = LLVMAddFunction(gallivm->module, func_name, func_type);
393    LLVMSetFunctionCallConv(function, LLVMCCallConv);
394 
395    coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);
396    LLVMSetFunctionCallConv(coro, LLVMCCallConv);
397    lp_build_coro_add_presplit(coro);
398 
399    variant->function = function;
400    variant->function_name = MALLOC(strlen(func_name)+1);
401    strcpy(variant->function_name, func_name);
402 
403 
404    for (i = 0; i < CS_ARG_MAX - !is_mesh; ++i) {
405       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
406          lp_add_function_attr(coro, i + 1, LP_FUNC_ATTR_NOALIAS);
407          if (i < CS_ARG_OUTER_COUNT)
408             lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
409       }
410    }
411 
412    if (variant->gallivm->cache->data_size) {
413       gallivm_stub_func(gallivm, function);
414       gallivm_stub_func(gallivm, coro);
415       return;
416    }
417 
418    context_ptr  = LLVMGetParam(function, CS_ARG_CONTEXT);
419    resources_ptr  = LLVMGetParam(function, CS_ARG_RESOURCES);
420    block_x_size_arg = LLVMGetParam(function, CS_ARG_BLOCK_X_SIZE);
421    block_y_size_arg = LLVMGetParam(function, CS_ARG_BLOCK_Y_SIZE);
422    block_z_size_arg = LLVMGetParam(function, CS_ARG_BLOCK_Z_SIZE);
423    grid_x_arg = LLVMGetParam(function, CS_ARG_GRID_X);
424    grid_y_arg = LLVMGetParam(function, CS_ARG_GRID_Y);
425    grid_z_arg = LLVMGetParam(function, CS_ARG_GRID_Z);
426    grid_size_x_arg = LLVMGetParam(function, CS_ARG_GRID_SIZE_X);
427    grid_size_y_arg = LLVMGetParam(function, CS_ARG_GRID_SIZE_Y);
428    grid_size_z_arg = LLVMGetParam(function, CS_ARG_GRID_SIZE_Z);
429    work_dim_arg = LLVMGetParam(function, CS_ARG_WORK_DIM);
430    draw_id_arg = LLVMGetParam(function, CS_ARG_DRAW_ID);
431    io_ptr = LLVMGetParam(function, CS_ARG_VERTEX_DATA);
432    thread_data_ptr = LLVMGetParam(function, CS_ARG_PER_THREAD_DATA);
433 
434    lp_build_name(context_ptr, "context");
435    lp_build_name(resources_ptr, "resources");
436    lp_build_name(block_x_size_arg, "x_size");
437    lp_build_name(block_y_size_arg, "y_size");
438    lp_build_name(block_z_size_arg, "z_size");
439    lp_build_name(grid_x_arg, "grid_x");
440    lp_build_name(grid_y_arg, "grid_y");
441    lp_build_name(grid_z_arg, "grid_z");
442    lp_build_name(grid_size_x_arg, "grid_size_x");
443    lp_build_name(grid_size_y_arg, "grid_size_y");
444    lp_build_name(grid_size_z_arg, "grid_size_z");
445    lp_build_name(work_dim_arg, "work_dim");
446    lp_build_name(draw_id_arg, "draw_id");
447    lp_build_name(thread_data_ptr, "thread_data");
448    lp_build_name(io_ptr, "vertex_io");
449 
450    lp_build_nir_prepasses(nir);
451    struct hash_table *fns = _mesa_pointer_hash_table_create(NULL);
452 
453    sampler = lp_llvm_sampler_soa_create(lp_cs_variant_key_samplers(key),
454                                         MAX2(key->nr_samplers,
455                                              key->nr_sampler_views));
456    image = lp_bld_llvm_image_soa_create(lp_cs_variant_key_images(key), key->nr_images);
457 
458    if (exec_list_length(&nir->functions) > 1) {
459       LLVMTypeRef call_context_type = lp_build_cs_func_call_context(gallivm, cs_type.length,
460                                                                     variant->jit_cs_context_type,
461                                                                     variant->jit_resources_type);
462       nir_foreach_function(func, nir) {
463          if (func->is_entrypoint)
464             continue;
465 
466          LLVMTypeRef args[32];
467          int num_args;
468 
469          num_args = func->num_params + LP_RESV_FUNC_ARGS;
470 
471          args[0] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), cs_type.length); /* mask */
472          args[1] = LLVMPointerType(call_context_type, 0);
473          for (int i = 0; i < func->num_params; i++) {
474             args[i + LP_RESV_FUNC_ARGS] = LLVMVectorType(LLVMIntTypeInContext(gallivm->context, func->params[i].bit_size), cs_type.length);
475             if (func->params[i].num_components > 1)
476                args[i + LP_RESV_FUNC_ARGS] = LLVMArrayType(args[i + LP_RESV_FUNC_ARGS], func->params[i].num_components);
477          }
478 
479          LLVMTypeRef func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
480                                                   args, num_args, 0);
481          LLVMValueRef lfunc = LLVMAddFunction(gallivm->module, func->name, func_type);
482          LLVMSetFunctionCallConv(lfunc, LLVMCCallConv);
483 
484          struct lp_build_fn *new_fn = ralloc(fns, struct lp_build_fn);
485          new_fn->fn_type = func_type;
486          new_fn->fn = lfunc;
487          _mesa_hash_table_insert(fns, func, new_fn);
488       }
489 
490       nir_foreach_function(func, nir) {
491          if (func->is_entrypoint)
492             continue;
493 
494          struct hash_entry *entry = _mesa_hash_table_search(fns, func);
495          assert(entry);
496          struct lp_build_fn *new_fn = entry->data;
497          LLVMValueRef lfunc = new_fn->fn;
498          block = LLVMAppendBasicBlockInContext(gallivm->context, lfunc, "entry");
499 
500          builder = gallivm->builder;
501          LLVMPositionBuilderAtEnd(builder, block);
502          LLVMValueRef mask_param = LLVMGetParam(lfunc, 0);
503          LLVMValueRef call_context_ptr = LLVMGetParam(lfunc, 1);
504          LLVMValueRef call_context = LLVMBuildLoad2(builder, call_context_type, call_context_ptr, "");
505          struct lp_build_mask_context mask;
506          struct lp_bld_tgsi_system_values system_values;
507 
508          memset(&system_values, 0, sizeof(system_values));
509 
510          lp_build_mask_begin(&mask, gallivm, cs_type, mask_param);
511          lp_build_mask_check(&mask);
512 
513          struct lp_build_tgsi_params params;
514          memset(&params, 0, sizeof(params));
515          params.type = cs_type;
516          params.mask = &mask;
517          params.fns = fns;
518          params.current_func = lfunc;
519          params.context_type = variant->jit_cs_context_type;
520          params.resources_type = variant->jit_resources_type;
521          params.call_context_ptr = call_context_ptr;
522          params.context_ptr = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_CONTEXT, "");
523          params.resources_ptr = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_RESOURCES, "");
524          params.shared_ptr = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_SHARED, "");
525          params.scratch_ptr = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_SCRATCH, "");
526          system_values.work_dim = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_WORK_DIM, "");
527          system_values.thread_id[0] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_THREAD_ID_0, "");
528          system_values.thread_id[1] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_THREAD_ID_1, "");
529          system_values.thread_id[2] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_THREAD_ID_2, "");
530          system_values.block_id[0] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_BLOCK_ID_0, "");
531          system_values.block_id[1] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_BLOCK_ID_1, "");
532          system_values.block_id[2] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_BLOCK_ID_2, "");
533          system_values.grid_size[0] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_GRID_SIZE_0, "");
534          system_values.grid_size[1] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_GRID_SIZE_1, "");
535          system_values.grid_size[2] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_GRID_SIZE_2, "");
536          system_values.block_size[0] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0, "");
537          system_values.block_size[1] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1, "");
538          system_values.block_size[2] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2, "");
539 
540          params.system_values = &system_values;
541 
542          params.consts_ptr = lp_jit_resources_constants(gallivm,
543                                                         variant->jit_resources_type,
544                                                         params.resources_ptr);
545          params.sampler = sampler;
546          params.ssbo_ptr = lp_jit_resources_ssbos(gallivm,
547                                                   variant->jit_resources_type,
548                                                   params.resources_ptr);
549          params.image = image;
550 
551          lp_build_nir_soa_func(gallivm, shader->base.ir.nir,
552                                func->impl,
553                                &params,
554                                NULL);
555 
556          lp_build_mask_end(&mask);
557 
558          LLVMBuildRetVoid(builder);
559          gallivm_verify_function(gallivm, lfunc);
560       }
561    }
562 
563    block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
564    builder = gallivm->builder;
565    assert(builder);
566    LLVMPositionBuilderAtEnd(builder, block);
567 
568    if (is_mesh) {
569       LLVMTypeRef output_type = create_mesh_jit_output_type_deref(gallivm);
570       output_array = lp_build_array_alloca(gallivm, output_type, lp_build_const_int32(gallivm, align(MAX2(nir->info.mesh.max_primitives_out, nir->info.mesh.max_vertices_out), 8)), "outputs");
571    }
572 
573    struct lp_build_loop_state loop_state[2];
574 
575    LLVMValueRef vec_length = lp_build_const_int32(gallivm, cs_type.length);
576 
577    LLVMValueRef invocation_count = LLVMBuildMul(gallivm->builder, block_x_size_arg, block_y_size_arg, "");
578    invocation_count = LLVMBuildMul(gallivm->builder, invocation_count, block_z_size_arg, "");
579 
580    LLVMValueRef partials = LLVMBuildURem(gallivm->builder, invocation_count, vec_length, "");
581 
582    LLVMValueRef num_subgroup_loop = LLVMBuildAdd(gallivm->builder, invocation_count, lp_build_const_int32(gallivm, cs_type.length - 1), "");
583    num_subgroup_loop = LLVMBuildUDiv(gallivm->builder, num_subgroup_loop, vec_length, "");
584 
585    /* build a ptr in memory to store all the frames in later. */
586    LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
587    LLVMValueRef coro_mem = LLVMBuildAlloca(gallivm->builder, hdl_ptr_type, "coro_mem");
588    LLVMBuildStore(builder, LLVMConstNull(hdl_ptr_type), coro_mem);
589 
590    LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_subgroup_loop, "coro_hdls");
591 
592    unsigned end_coroutine = INT_MAX;
593 
594    /*
595     * This is the main coroutine execution loop. It iterates over the dimensions
596     * and calls the coroutine main entrypoint on the first pass, but in subsequent
597     * passes it checks if the coroutine has completed and resumes it if not.
598     */
599    lp_build_loop_begin(&loop_state[1], gallivm,
600                        lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
601    lp_build_loop_begin(&loop_state[0], gallivm,
602                        lp_build_const_int32(gallivm, 0)); /* subgroup loop */
603    {
604       LLVMValueRef args[CS_ARG_MAX];
605       args[CS_ARG_CONTEXT] = context_ptr;
606       args[CS_ARG_RESOURCES] = resources_ptr;
607       args[CS_ARG_BLOCK_X_SIZE] = LLVMGetUndef(int32_type);
608       args[CS_ARG_BLOCK_Y_SIZE] = LLVMGetUndef(int32_type);
609       args[CS_ARG_BLOCK_Z_SIZE] = LLVMGetUndef(int32_type);
610       args[CS_ARG_GRID_X] = grid_x_arg;
611       args[CS_ARG_GRID_Y] = grid_y_arg;
612       args[CS_ARG_GRID_Z] = grid_z_arg;
613       args[CS_ARG_GRID_SIZE_X] = grid_size_x_arg;
614       args[CS_ARG_GRID_SIZE_Y] = grid_size_y_arg;
615       args[CS_ARG_GRID_SIZE_Z] = grid_size_z_arg;
616       args[CS_ARG_WORK_DIM] = work_dim_arg;
617       args[CS_ARG_DRAW_ID] = draw_id_arg;
618       args[CS_ARG_VERTEX_DATA] = io_ptr;
619       args[CS_ARG_PER_THREAD_DATA] = thread_data_ptr;
620       args[CS_ARG_CORO_SUBGROUP_COUNT] = num_subgroup_loop;
621       args[CS_ARG_CORO_PARTIALS] = partials;
622       args[CS_ARG_CORO_BLOCK_X_SIZE] = block_x_size_arg;
623       args[CS_ARG_CORO_BLOCK_Y_SIZE] = block_y_size_arg;
624       args[CS_ARG_CORO_BLOCK_Z_SIZE] = block_z_size_arg;
625 
626       args[CS_ARG_CORO_IDX] = loop_state[0].counter;
627 
628       args[CS_ARG_CORO_MEM] = coro_mem;
629 
630       if (is_mesh)
631          args[CS_ARG_CORO_OUTPUTS] = output_array;
632 
633       LLVMValueRef coro_entry = LLVMBuildGEP2(gallivm->builder, hdl_ptr_type, coro_hdls, &loop_state[0].counter, 1, "");
634 
635       LLVMValueRef coro_hdl = LLVMBuildLoad2(gallivm->builder, hdl_ptr_type, coro_entry, "coro_hdl");
636 
637       struct lp_build_if_state ifstate;
638       LLVMValueRef cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, loop_state[1].counter,
639                                        lp_build_const_int32(gallivm, 0), "");
640       /* first time here - call the coroutine function entry point */
641       lp_build_if(&ifstate, gallivm, cmp);
642       LLVMValueRef coro_ret = LLVMBuildCall2(gallivm->builder, coro_func_type, coro, args, CS_ARG_MAX - !is_mesh, "");
643       LLVMBuildStore(gallivm->builder, coro_ret, coro_entry);
644       lp_build_else(&ifstate);
645       /* subsequent calls for this invocation - check if done. */
646       LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);
647       struct lp_build_if_state ifstate2;
648       lp_build_if(&ifstate2, gallivm, coro_done);
649       /* if done destroy and force loop exit */
650       lp_build_coro_destroy(gallivm, coro_hdl);
651       lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1));
652       lp_build_else(&ifstate2);
653       /* otherwise resume the coroutine */
654       lp_build_coro_resume(gallivm, coro_hdl);
655       lp_build_endif(&ifstate2);
656       lp_build_endif(&ifstate);
657       lp_build_loop_force_reload_counter(&loop_state[1]);
658    }
659    lp_build_loop_end_cond(&loop_state[0],
660                           num_subgroup_loop,
661                           NULL,  LLVMIntUGE);
662    lp_build_loop_end_cond(&loop_state[1],
663                           lp_build_const_int32(gallivm, end_coroutine),
664                           NULL, LLVMIntEQ);
665 
666    LLVMValueRef coro_mem_ptr = LLVMBuildLoad2(builder, hdl_ptr_type, coro_mem, "");
667    LLVMTypeRef mem_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
668    LLVMTypeRef free_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), &mem_ptr_type, 1, 0);
669    LLVMBuildCall2(gallivm->builder, free_type, gallivm->coro_free_hook, &coro_mem_ptr, 1, "");
670 
671    LLVMBuildRetVoid(builder);
672 
673    /* This is stage (b) - generate the compute shader code inside the coroutine. */
674    context_ptr  = LLVMGetParam(coro, CS_ARG_CONTEXT);
675    resources_ptr = LLVMGetParam(coro, CS_ARG_RESOURCES);
676    grid_x_arg = LLVMGetParam(coro, CS_ARG_GRID_X);
677    grid_y_arg = LLVMGetParam(coro, CS_ARG_GRID_Y);
678    grid_z_arg = LLVMGetParam(coro, CS_ARG_GRID_Z);
679    grid_size_x_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_X);
680    grid_size_y_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_Y);
681    grid_size_z_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_Z);
682    work_dim_arg = LLVMGetParam(coro, CS_ARG_WORK_DIM);
683    draw_id_arg = LLVMGetParam(coro, CS_ARG_DRAW_ID);
684    io_ptr = LLVMGetParam(coro, CS_ARG_VERTEX_DATA);
685    thread_data_ptr  = LLVMGetParam(coro, CS_ARG_PER_THREAD_DATA);
686    num_subgroup_loop = LLVMGetParam(coro, CS_ARG_CORO_SUBGROUP_COUNT);
687    partials = LLVMGetParam(coro, CS_ARG_CORO_PARTIALS);
688    block_x_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_X_SIZE);
689    block_y_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_Y_SIZE);
690    block_z_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_Z_SIZE);
691    LLVMValueRef subgroup_id = LLVMGetParam(coro, CS_ARG_CORO_IDX);
692    coro_mem = LLVMGetParam(coro, CS_ARG_CORO_MEM);
693    if (is_mesh)
694       output_array = LLVMGetParam(coro, CS_ARG_CORO_OUTPUTS);
695    block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "entry");
696    LLVMPositionBuilderAtEnd(builder, block);
697    {
698       LLVMValueRef consts_ptr;
699       LLVMValueRef ssbo_ptr;
700       LLVMValueRef shared_ptr;
701       LLVMValueRef payload_ptr;
702       LLVMValueRef kernel_args_ptr;
703       struct lp_build_mask_context mask;
704       struct lp_bld_tgsi_system_values system_values;
705 
706       memset(&system_values, 0, sizeof(system_values));
707       consts_ptr = lp_jit_resources_constants(gallivm, variant->jit_resources_type, resources_ptr);
708       ssbo_ptr = lp_jit_resources_ssbos(gallivm, variant->jit_resources_type, resources_ptr);
709       kernel_args_ptr = lp_jit_cs_context_kernel_args(gallivm,
710                                                       variant->jit_cs_context_type,
711                                                       context_ptr);
712 
713       shared_ptr = lp_jit_cs_thread_data_shared(gallivm,
714                                                 variant->jit_cs_thread_data_type,
715                                                 thread_data_ptr);
716       payload_ptr = lp_jit_cs_thread_data_payload(gallivm,
717                                                   variant->jit_cs_thread_data_type,
718                                                   thread_data_ptr);
719 
720       /* these are coroutine entrypoint necessities */
721       LLVMValueRef coro_id = lp_build_coro_id(gallivm);
722       LLVMValueRef coro_entry = lp_build_coro_alloc_mem_array(gallivm, coro_mem, subgroup_id, num_subgroup_loop);
723       LLVMTypeRef mem_ptr_type = LLVMInt8TypeInContext(gallivm->context);
724       LLVMValueRef alloced_ptr = LLVMBuildLoad2(gallivm->builder, hdl_ptr_type, coro_mem, "");
725       alloced_ptr = LLVMBuildGEP2(gallivm->builder, mem_ptr_type, alloced_ptr, &coro_entry, 1, "");
726       LLVMValueRef coro_hdl = lp_build_coro_begin(gallivm, coro_id, alloced_ptr);
727       LLVMValueRef has_partials = LLVMBuildICmp(gallivm->builder, LLVMIntNE, partials, lp_build_const_int32(gallivm, 0), "");
728 
729       struct lp_build_context bld;
730       lp_build_context_init(&bld, gallivm, lp_uint_type(cs_type));
731 
732       LLVMValueRef base_val = LLVMBuildMul(gallivm->builder, subgroup_id, vec_length, "");
733       LLVMValueRef invocation_indices[LP_MAX_VECTOR_LENGTH];
734       for (i = 0; i < cs_type.length; i++)
735          invocation_indices[i] = LLVMBuildAdd(gallivm->builder, base_val, lp_build_const_int32(gallivm, i), "");
736       LLVMValueRef invocation_index = lp_build_gather_values(gallivm, invocation_indices, cs_type.length);
737 
738       LLVMValueRef block_x_size_vec = lp_build_broadcast_scalar(&bld, block_x_size_arg);
739       LLVMValueRef block_y_size_vec = lp_build_broadcast_scalar(&bld, block_y_size_arg);
740 
741       if (nir->info.derivative_group == DERIVATIVE_GROUP_QUADS) {
742          /* x = (invocation_index / 4 * 2 + invocation_index % 2) % block_width */
743          LLVMValueRef quad_x = LLVMBuildAnd(builder, invocation_index, lp_build_const_int_vec(gallivm, bld.type, ~3u), "");
744          quad_x = LLVMBuildUDiv(builder, quad_x, lp_build_const_int_vec(gallivm, bld.type, 2), "");
745          LLVMValueRef quad_sub_x = LLVMBuildURem(builder, invocation_index, lp_build_const_int_vec(gallivm, bld.type, 2), "");
746          system_values.thread_id[0] = LLVMBuildAdd(builder, quad_x, quad_sub_x, "");
747          system_values.thread_id[0] = LLVMBuildURem(builder, system_values.thread_id[0], block_x_size_vec, "");
748          /* y = (invocation_index / block_width / 2 * 2 + (invocation_index / 2) % 2) % block_height */
749          LLVMValueRef quad_y = LLVMBuildUDiv(builder, invocation_index, block_x_size_vec, "");
750          quad_y = LLVMBuildAnd(builder, quad_y, lp_build_const_int_vec(gallivm, bld.type, ~1u), "");
751          LLVMValueRef quad_sub_y = LLVMBuildUDiv(builder, invocation_index, lp_build_const_int_vec(gallivm, bld.type, 2), "");
752          quad_sub_y = LLVMBuildURem(builder, quad_sub_y, lp_build_const_int_vec(gallivm, bld.type, 2), "");
753          system_values.thread_id[1] = LLVMBuildAdd(builder, quad_y, quad_sub_y, "");
754          system_values.thread_id[1] = LLVMBuildURem(builder, system_values.thread_id[1], block_y_size_vec, "");
755       } else {
756          system_values.thread_id[0] = LLVMBuildURem(gallivm->builder, invocation_index, block_x_size_vec, "");
757          system_values.thread_id[1] = LLVMBuildUDiv(gallivm->builder, invocation_index, block_x_size_vec, "");
758          system_values.thread_id[1] = LLVMBuildURem(gallivm->builder, system_values.thread_id[1], block_y_size_vec, "");
759       }
760       system_values.thread_id[2] = LLVMBuildUDiv(gallivm->builder, invocation_index, block_x_size_vec, "");
761       system_values.thread_id[2] = LLVMBuildUDiv(gallivm->builder, system_values.thread_id[2], block_y_size_vec, "");
762 
763       system_values.block_id[0] = grid_x_arg;
764       system_values.block_id[1] = grid_y_arg;
765       system_values.block_id[2] = grid_z_arg;
766 
767       system_values.grid_size[0] = grid_size_x_arg;
768       system_values.grid_size[1] = grid_size_y_arg;
769       system_values.grid_size[2] = grid_size_z_arg;
770 
771       system_values.work_dim = work_dim_arg;
772       system_values.draw_id = draw_id_arg;
773 
774       system_values.subgroup_id = subgroup_id;
775       system_values.num_subgroups = num_subgroup_loop;
776 
777       system_values.block_size[0] = block_x_size_arg;
778       system_values.block_size[1] = block_y_size_arg;
779       system_values.block_size[2] = block_z_size_arg;
780 
781       LLVMValueRef last_loop = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, subgroup_id, LLVMBuildSub(gallivm->builder, num_subgroup_loop, lp_build_const_int32(gallivm, 1), ""), "");
782       LLVMValueRef use_partial_mask = LLVMBuildAnd(gallivm->builder, last_loop, has_partials, "");
783       struct lp_build_if_state if_state;
784       LLVMTypeRef mask_type = LLVMVectorType(int32_type, cs_type.length);
785       LLVMValueRef mask_val = lp_build_alloca(gallivm, mask_type, "mask");
786       LLVMValueRef full_mask_val = lp_build_const_int_vec(gallivm, cs_type, ~0);
787       LLVMBuildStore(gallivm->builder, full_mask_val, mask_val);
788 
789       lp_build_if(&if_state, gallivm, use_partial_mask);
790       struct lp_build_loop_state mask_loop_state;
791       lp_build_loop_begin(&mask_loop_state, gallivm, partials);
792       LLVMValueRef tmask_val = LLVMBuildLoad2(gallivm->builder, mask_type, mask_val, "");
793       tmask_val = LLVMBuildInsertElement(gallivm->builder, tmask_val, lp_build_const_int32(gallivm, 0), mask_loop_state.counter, "");
794       LLVMBuildStore(gallivm->builder, tmask_val, mask_val);
795       lp_build_loop_end_cond(&mask_loop_state, vec_length, NULL, LLVMIntUGE);
796       lp_build_endif(&if_state);
797 
798       mask_val = LLVMBuildLoad2(gallivm->builder, mask_type, mask_val, "");
799       lp_build_mask_begin(&mask, gallivm, cs_type, mask_val);
800 
801       struct lp_build_coro_suspend_info coro_info;
802 
803       LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "suspend");
804       LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "cleanup");
805 
806       coro_info.suspend = sus_block;
807       coro_info.cleanup = clean_block;
808 
809       if (is_mesh) {
810          LLVMValueRef vertex_count = lp_build_alloca(gallivm, LLVMInt32TypeInContext(gallivm->context), "vertex_count");
811          LLVMValueRef primitive_count = lp_build_alloca(gallivm, LLVMInt32TypeInContext(gallivm->context), "prim_count");
812          mesh_iface.base.emit_store_output = lp_mesh_llvm_emit_store_output;
813          mesh_iface.base.emit_vertex_and_primitive_count = lp_mesh_emit_vertex_and_primitive_count;
814          mesh_iface.vertex_count = vertex_count;
815          mesh_iface.prim_count = primitive_count;
816          mesh_iface.outputs = output_array;
817       }
818 
819       struct lp_build_tgsi_params params;
820       memset(&params, 0, sizeof(params));
821 
822       params.type = cs_type;
823       params.mask = &mask;
824       params.consts_ptr = consts_ptr;
825       params.system_values = &system_values;
826       params.context_type = variant->jit_cs_context_type;
827       params.context_ptr = context_ptr;
828       params.resources_type = variant->jit_resources_type;
829       params.resources_ptr = resources_ptr;
830       params.sampler = sampler;
831       params.ssbo_ptr = ssbo_ptr;
832       params.image = image;
833       params.shared_ptr = shared_ptr;
834       params.payload_ptr = payload_ptr;
835       params.coro = &coro_info;
836       params.kernel_args = kernel_args_ptr;
837       params.mesh_iface = &mesh_iface.base;
838 
839       params.current_func = NULL;
840       params.fns = fns;
841       lp_build_nir_soa_func(gallivm, nir,
842                             nir_shader_get_entrypoint(nir),
843                             &params, NULL);
844 
845       if (is_mesh) {
846          LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
847          LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
848                                                         lp_int_type(cs_type), 0);
849 
850          struct lp_build_if_state iter0state;
851          LLVMValueRef is_iter0 = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, subgroup_id,
852                                                lp_build_const_int32(gallivm, 0), "");
853          LLVMValueRef vertex_count = LLVMBuildLoad2(gallivm->builder, i32t, mesh_iface.vertex_count, "");
854          LLVMValueRef prim_count = LLVMBuildLoad2(gallivm->builder, i32t, mesh_iface.prim_count, "");
855 
856          LLVMValueRef vert_count_ptr, prim_count_ptr;
857          LLVMValueRef indices = lp_build_const_int32(gallivm, 1);
858          vert_count_ptr = LLVMBuildGEP2(gallivm->builder, i32t, io_ptr, &indices, 1, "");
859          indices = lp_build_const_int32(gallivm, 2);
860          prim_count_ptr = LLVMBuildGEP2(gallivm->builder, i32t, io_ptr, &indices, 1, "");
861 
862          lp_build_if(&iter0state, gallivm, is_iter0);
863          LLVMBuildStore(gallivm->builder, vertex_count, vert_count_ptr);
864          LLVMBuildStore(gallivm->builder, prim_count, prim_count_ptr);
865          lp_build_endif(&iter0state);
866 
867          LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
868 
869          lp_build_coro_suspend_switch(gallivm, params.coro, resume, false);
870          LLVMPositionBuilderAtEnd(gallivm->builder, resume);
871 
872          vertex_count = LLVMBuildLoad2(gallivm->builder, i32t, vert_count_ptr, "");
873          prim_count = LLVMBuildLoad2(gallivm->builder, i32t, prim_count_ptr, "");
874 
875          int per_prim_count = util_bitcount64(nir->info.per_primitive_outputs);
876          int out_count = util_bitcount64(nir->info.outputs_written);
877          int per_vert_count = out_count - per_prim_count;
878          int vsize = (sizeof(struct vertex_header) + per_vert_count * 4 * sizeof(float)) * 8;
879          int psize = (per_prim_count * 4 * sizeof(float)) * 8;
880          struct lp_build_loop_state vertex_loop_state;
881 
882          lp_build_loop_begin(&vertex_loop_state, gallivm,
883                              lp_build_const_int32(gallivm, 0));
884          LLVMValueRef io;
885          io = LLVMBuildPtrToInt(gallivm->builder, io_ptr, LLVMInt64TypeInContext(gallivm->context),  "");
886          io = LLVMBuildAdd(builder, io, LLVMBuildZExt(builder, LLVMBuildMul(builder, vertex_loop_state.counter, lp_build_const_int32(gallivm, vsize), ""), LLVMInt64TypeInContext(gallivm->context), ""), "");
887          io = LLVMBuildIntToPtr(gallivm->builder, io, LLVMPointerType(LLVMVoidTypeInContext(gallivm->context), 0), "");
888          mesh_convert_to_aos(gallivm, shader->base.ir.nir, true, variant->jit_vertex_header_type,
889                              io, output_array, clipmask,
890                              vertex_loop_state.counter, lp_elem_type(cs_type), -1, false);
891          lp_build_loop_end_cond(&vertex_loop_state,
892                                 vertex_count,
893                                 NULL,  LLVMIntUGE);
894 
895          struct lp_build_loop_state prim_loop_state;
896          lp_build_loop_begin(&prim_loop_state, gallivm,
897                              lp_build_const_int32(gallivm, 0));
898          io = LLVMBuildPtrToInt(gallivm->builder, io_ptr, LLVMInt64TypeInContext(gallivm->context),  "");
899          LLVMValueRef prim_offset = LLVMBuildMul(builder, prim_loop_state.counter, lp_build_const_int32(gallivm, psize), "");
900          prim_offset = LLVMBuildAdd(builder, prim_offset, lp_build_const_int32(gallivm, vsize * (nir->info.mesh.max_vertices_out + 8)), "");
901          io = LLVMBuildAdd(builder, io, LLVMBuildZExt(builder, prim_offset, LLVMInt64TypeInContext(gallivm->context), ""), "");
902          io = LLVMBuildIntToPtr(gallivm->builder, io, LLVMPointerType(LLVMVoidTypeInContext(gallivm->context), 0), "");
903          mesh_convert_to_aos(gallivm, shader->base.ir.nir, false, variant->jit_prim_type,
904                              io, output_array, clipmask,
905                              prim_loop_state.counter, lp_elem_type(cs_type), -1, false);
906          lp_build_loop_end_cond(&prim_loop_state,
907                                 prim_count,
908                                 NULL,  LLVMIntUGE);
909       }
910 
911       mask_val = lp_build_mask_end(&mask);
912 
913       lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);
914       LLVMPositionBuilderAtEnd(builder, clean_block);
915 
916       LLVMBuildBr(builder, sus_block);
917       LLVMPositionBuilderAtEnd(builder, sus_block);
918 
919       lp_build_coro_end(gallivm, coro_hdl);
920       LLVMBuildRet(builder, coro_hdl);
921    }
922 
923    lp_bld_llvm_sampler_soa_destroy(sampler);
924    lp_bld_llvm_image_soa_destroy(image);
925    _mesa_hash_table_destroy(fns, NULL);
926 
927    gallivm_verify_function(gallivm, coro);
928    gallivm_verify_function(gallivm, function);
929 }
930 
931 
932 static void *
llvmpipe_create_compute_state(struct pipe_context * pipe,const struct pipe_compute_state * templ)933 llvmpipe_create_compute_state(struct pipe_context *pipe,
934                               const struct pipe_compute_state *templ)
935 {
936    struct lp_compute_shader *shader = CALLOC_STRUCT(lp_compute_shader);
937    struct nir_shader *nir = NULL;
938    if (!shader)
939       return NULL;
940 
941    shader->no = cs_no++;
942 
943    shader->base.type = PIPE_SHADER_IR_NIR;
944 
945    if (templ->ir_type == PIPE_SHADER_IR_TGSI) {
946       shader->base.ir.nir = tgsi_to_nir(templ->prog, pipe->screen, false);
947    } else if (templ->ir_type == PIPE_SHADER_IR_NIR) {
948       shader->base.ir.nir = (struct nir_shader *)templ->prog;
949    }
950 
951    nir = (struct nir_shader *)shader->base.ir.nir;
952    shader->req_local_mem += nir->info.shared_size;
953    shader->zero_initialize_shared_memory = nir->info.zero_initialize_shared_memory;
954 
955    llvmpipe_register_shader(pipe, &shader->base);
956 
957    list_inithead(&shader->variants.list);
958 
959    int nr_samplers = BITSET_LAST_BIT(nir->info.samplers_used);
960    int nr_sampler_views = BITSET_LAST_BIT(nir->info.textures_used);
961    int nr_images = BITSET_LAST_BIT(nir->info.images_used);
962    shader->variant_key_size = lp_cs_variant_key_size(MAX2(nr_samplers, nr_sampler_views), nr_images);
963 
964    return shader;
965 }
966 
967 
968 static void
llvmpipe_bind_compute_state(struct pipe_context * pipe,void * cs)969 llvmpipe_bind_compute_state(struct pipe_context *pipe,
970                             void *cs)
971 {
972    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
973 
974    if (llvmpipe->cs == cs)
975       return;
976 
977    llvmpipe->cs = (struct lp_compute_shader *)cs;
978    llvmpipe->cs_dirty |= LP_CSNEW_CS;
979 }
980 
981 static void
llvmpipe_get_compute_state_info(struct pipe_context * pipe,void * cs,struct pipe_compute_state_object_info * info)982 llvmpipe_get_compute_state_info(struct pipe_context *pipe, void *cs,
983                                 struct pipe_compute_state_object_info *info)
984 {
985    struct lp_compute_shader* shader = cs;
986    struct nir_shader* nir = shader->base.ir.nir;
987 
988    info->max_threads = 1024;
989    info->simd_sizes = lp_native_vector_width / 32;
990    info->preferred_simd_size = info->simd_sizes;
991    // TODO: this is a bad estimate, but not much we can do without actually compiling the shaders
992    info->private_memory = nir->scratch_size;
993 }
994 
995 
996 /**
997  * Remove shader variant from two lists: the shader's variant list
998  * and the context's variant list.
999  */
1000 static void
llvmpipe_remove_cs_shader_variant(struct llvmpipe_context * lp,struct lp_compute_shader_variant * variant)1001 llvmpipe_remove_cs_shader_variant(struct llvmpipe_context *lp,
1002                                   struct lp_compute_shader_variant *variant)
1003 {
1004    if ((LP_DEBUG & DEBUG_CS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
1005       debug_printf("llvmpipe: del cs #%u var %u v created %u v cached %u "
1006                    "v total cached %u inst %u total inst %u\n",
1007                    variant->shader->no, variant->no,
1008                    variant->shader->variants_created,
1009                    variant->shader->variants_cached,
1010                    lp->nr_cs_variants, variant->nr_instrs, lp->nr_cs_instrs);
1011    }
1012 
1013    gallivm_destroy(variant->gallivm);
1014 
1015    /* remove from shader's list */
1016    list_del(&variant->list_item_local.list);
1017    variant->shader->variants_cached--;
1018 
1019    /* remove from context's list */
1020    list_del(&variant->list_item_global.list);
1021    lp->nr_cs_variants--;
1022    lp->nr_cs_instrs -= variant->nr_instrs;
1023 
1024    if(variant->function_name)
1025       FREE(variant->function_name);
1026    FREE(variant);
1027 }
1028 
1029 
1030 static void
llvmpipe_delete_compute_state(struct pipe_context * pipe,void * cs)1031 llvmpipe_delete_compute_state(struct pipe_context *pipe,
1032                               void *cs)
1033 {
1034    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1035    struct lp_compute_shader *shader = cs;
1036    struct lp_cs_variant_list_item *li, *next;
1037 
1038    if (llvmpipe->cs == cs)
1039       llvmpipe->cs = NULL;
1040    for (unsigned i = 0; i < shader->max_global_buffers; i++)
1041       pipe_resource_reference(&shader->global_buffers[i], NULL);
1042    FREE(shader->global_buffers);
1043 
1044    /* Delete all the variants */
1045    LIST_FOR_EACH_ENTRY_SAFE(li, next, &shader->variants.list, list) {
1046       llvmpipe_remove_cs_shader_variant(llvmpipe, li->base);
1047    }
1048    ralloc_free(shader->base.ir.nir);
1049    FREE(shader);
1050 }
1051 
1052 
1053 static struct lp_compute_shader_variant_key *
make_variant_key(struct llvmpipe_context * lp,struct lp_compute_shader * shader,enum pipe_shader_type sh_type,char * store)1054 make_variant_key(struct llvmpipe_context *lp,
1055                  struct lp_compute_shader *shader,
1056                  enum pipe_shader_type sh_type,
1057                  char *store)
1058 {
1059    struct lp_compute_shader_variant_key *key =
1060       (struct lp_compute_shader_variant_key *)store;
1061    memset(key, 0, sizeof(*key));
1062 
1063    struct nir_shader *nir = (struct nir_shader *)shader->base.ir.nir;
1064    /* This value will be the same for all the variants of a given shader:
1065     */
1066    key->nr_samplers = BITSET_LAST_BIT(nir->info.samplers_used);
1067    key->nr_sampler_views = BITSET_LAST_BIT(nir->info.textures_used);
1068    struct lp_sampler_static_state *cs_sampler;
1069 
1070    cs_sampler = lp_cs_variant_key_samplers(key);
1071 
1072    memset(cs_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *cs_sampler);
1073    for (unsigned i = 0; i < key->nr_samplers; ++i) {
1074       if (BITSET_TEST(nir->info.samplers_used, i)) {
1075          lp_sampler_static_sampler_state(&cs_sampler[i].sampler_state,
1076                                          lp->samplers[sh_type][i]);
1077       }
1078    }
1079 
1080    /*
1081     * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
1082     * are dx10-style? Can't really have mixed opcodes, at least not
1083     * if we want to skip the holes here (without rescanning tgsi).
1084     */
1085    if (!BITSET_IS_EMPTY(nir->info.textures_used)) {
1086       for (unsigned i = 0; i < key->nr_sampler_views; ++i) {
1087          /*
1088           * Note sview may exceed what's representable by file_mask.
1089           * This will still work, the only downside is that not actually
1090           * used views may be included in the shader key.
1091           */
1092          if (BITSET_TEST(nir->info.textures_used, i)) {
1093             lp_sampler_static_texture_state(&cs_sampler[i].texture_state,
1094                                             lp->sampler_views[sh_type][i]);
1095          }
1096       }
1097    } else {
1098       key->nr_sampler_views = key->nr_samplers;
1099       for (unsigned i = 0; i < key->nr_sampler_views; ++i) {
1100          if (BITSET_TEST(nir->info.samplers_used, i)) {
1101             lp_sampler_static_texture_state(&cs_sampler[i].texture_state,
1102                                             lp->sampler_views[sh_type][i]);
1103          }
1104       }
1105    }
1106 
1107    struct lp_image_static_state *lp_image;
1108    lp_image = lp_cs_variant_key_images(key);
1109    key->nr_images = BITSET_LAST_BIT(nir->info.images_used);
1110 
1111    if (key->nr_images)
1112       memset(lp_image, 0,
1113              key->nr_images * sizeof *lp_image);
1114    for (unsigned i = 0; i < key->nr_images; ++i) {
1115       if (BITSET_TEST(nir->info.images_used, i)) {
1116          lp_sampler_static_texture_state_image(&lp_image[i].image_state,
1117                                                &lp->images[sh_type][i]);
1118       }
1119    }
1120    return key;
1121 }
1122 
1123 
1124 static void
dump_cs_variant_key(const struct lp_compute_shader_variant_key * key)1125 dump_cs_variant_key(const struct lp_compute_shader_variant_key *key)
1126 {
1127    int i;
1128    debug_printf("cs variant %p:\n", (void *) key);
1129 
1130    for (i = 0; i < key->nr_samplers; ++i) {
1131       const struct lp_sampler_static_state *samplers = lp_cs_variant_key_samplers(key);
1132       const struct lp_static_sampler_state *sampler = &samplers[i].sampler_state;
1133       debug_printf("sampler[%u] = \n", i);
1134       debug_printf("  .wrap = %s %s %s\n",
1135                    util_str_tex_wrap(sampler->wrap_s, true),
1136                    util_str_tex_wrap(sampler->wrap_t, true),
1137                    util_str_tex_wrap(sampler->wrap_r, true));
1138       debug_printf("  .min_img_filter = %s\n",
1139                    util_str_tex_filter(sampler->min_img_filter, true));
1140       debug_printf("  .min_mip_filter = %s\n",
1141                    util_str_tex_mipfilter(sampler->min_mip_filter, true));
1142       debug_printf("  .mag_img_filter = %s\n",
1143                    util_str_tex_filter(sampler->mag_img_filter, true));
1144       if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE)
1145          debug_printf("  .compare_func = %s\n", util_str_func(sampler->compare_func, true));
1146       debug_printf("  .normalized_coords = %u\n", sampler->normalized_coords);
1147       debug_printf("  .min_max_lod_equal = %u\n", sampler->min_max_lod_equal);
1148       debug_printf("  .lod_bias_non_zero = %u\n", sampler->lod_bias_non_zero);
1149       debug_printf("  .apply_min_lod = %u\n", sampler->apply_min_lod);
1150       debug_printf("  .apply_max_lod = %u\n", sampler->apply_max_lod);
1151       debug_printf("  .aniso = %u\n", sampler->aniso);
1152    }
1153    for (i = 0; i < key->nr_sampler_views; ++i) {
1154       const struct lp_sampler_static_state *samplers = lp_cs_variant_key_samplers(key);
1155       const struct lp_static_texture_state *texture = &samplers[i].texture_state;
1156       debug_printf("texture[%u] = \n", i);
1157       debug_printf("  .format = %s\n",
1158                    util_format_name(texture->format));
1159       debug_printf("  .target = %s\n",
1160                    util_str_tex_target(texture->target, true));
1161       debug_printf("  .level_zero_only = %u\n",
1162                    texture->level_zero_only);
1163       debug_printf("  .pot = %u %u %u\n",
1164                    texture->pot_width,
1165                    texture->pot_height,
1166                    texture->pot_depth);
1167    }
1168    struct lp_image_static_state *images = lp_cs_variant_key_images(key);
1169    for (i = 0; i < key->nr_images; ++i) {
1170       const struct lp_static_texture_state *image = &images[i].image_state;
1171       debug_printf("image[%u] = \n", i);
1172       debug_printf("  .format = %s\n",
1173                    util_format_name(image->format));
1174       debug_printf("  .target = %s\n",
1175                    util_str_tex_target(image->target, true));
1176       debug_printf("  .level_zero_only = %u\n",
1177                    image->level_zero_only);
1178       debug_printf("  .pot = %u %u %u\n",
1179                    image->pot_width,
1180                    image->pot_height,
1181                    image->pot_depth);
1182    }
1183 }
1184 
1185 
1186 static void
lp_debug_cs_variant(const struct lp_compute_shader_variant * variant)1187 lp_debug_cs_variant(const struct lp_compute_shader_variant *variant)
1188 {
1189    debug_printf("llvmpipe: Compute shader #%u variant #%u:\n",
1190                 variant->shader->no, variant->no);
1191    nir_print_shader(variant->shader->base.ir.nir, stderr);
1192    dump_cs_variant_key(&variant->key);
1193    debug_printf("\n");
1194 }
1195 
1196 
1197 static void
lp_cs_get_ir_cache_key(struct lp_compute_shader_variant * variant,unsigned char ir_sha1_cache_key[20])1198 lp_cs_get_ir_cache_key(struct lp_compute_shader_variant *variant,
1199                        unsigned char ir_sha1_cache_key[20])
1200 {
1201    struct blob blob = { 0 };
1202    unsigned ir_size;
1203    void *ir_binary;
1204 
1205    blob_init(&blob);
1206    nir_serialize(&blob, variant->shader->base.ir.nir, true);
1207    ir_binary = blob.data;
1208    ir_size = blob.size;
1209 
1210    struct mesa_sha1 ctx;
1211    _mesa_sha1_init(&ctx);
1212    _mesa_sha1_update(&ctx, &variant->key, variant->shader->variant_key_size);
1213    _mesa_sha1_update(&ctx, ir_binary, ir_size);
1214    _mesa_sha1_final(&ctx, ir_sha1_cache_key);
1215 
1216    blob_finish(&blob);
1217 }
1218 
1219 
1220 static struct lp_compute_shader_variant *
generate_variant(struct llvmpipe_context * lp,struct lp_compute_shader * shader,enum pipe_shader_type sh_type,const struct lp_compute_shader_variant_key * key)1221 generate_variant(struct llvmpipe_context *lp,
1222                  struct lp_compute_shader *shader,
1223                  enum pipe_shader_type sh_type,
1224                  const struct lp_compute_shader_variant_key *key)
1225 {
1226    struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
1227 
1228    struct lp_compute_shader_variant *variant =
1229       MALLOC(sizeof *variant + shader->variant_key_size - sizeof variant->key);
1230    if (!variant)
1231       return NULL;
1232 
1233    memset(variant, 0, sizeof(*variant));
1234 
1235    char module_name[64];
1236    const char *shname = sh_type == PIPE_SHADER_MESH ? "ms" :
1237       (sh_type == PIPE_SHADER_TASK ? "ts" : "cs");
1238    snprintf(module_name, sizeof(module_name), "%s%u_variant%u",
1239             shname, shader->no, shader->variants_created);
1240 
1241    variant->shader = shader;
1242    memcpy(&variant->key, key, shader->variant_key_size);
1243 
1244    unsigned char ir_sha1_cache_key[20];
1245    struct lp_cached_code cached = { 0 };
1246    bool needs_caching = false;
1247 
1248    lp_cs_get_ir_cache_key(variant, ir_sha1_cache_key);
1249 
1250    lp_disk_cache_find_shader(screen, &cached, ir_sha1_cache_key);
1251    if (!cached.data_size)
1252       needs_caching = true;
1253 
1254    variant->gallivm = gallivm_create(module_name, &lp->context, &cached);
1255    if (!variant->gallivm) {
1256       FREE(variant);
1257       return NULL;
1258    }
1259 
1260    variant->list_item_global.base = variant;
1261    variant->list_item_local.base = variant;
1262    variant->no = shader->variants_created++;
1263 
1264    if ((LP_DEBUG & DEBUG_CS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
1265       lp_debug_cs_variant(variant);
1266    }
1267 
1268    lp_jit_init_cs_types(variant);
1269 
1270    if (sh_type == PIPE_SHADER_MESH) {
1271       struct nir_shader *nir = shader->base.ir.nir;
1272       int per_prim_count = util_bitcount64(nir->info.per_primitive_outputs);
1273       int out_count = util_bitcount64(nir->info.outputs_written);
1274       int per_vert_count = out_count - per_prim_count;
1275       variant->jit_vertex_header_type = lp_build_create_jit_vertex_header_type(variant->gallivm, per_vert_count);
1276       variant->jit_vertex_header_ptr_type = LLVMPointerType(variant->jit_vertex_header_type, 0);
1277       variant->jit_prim_type = LLVMArrayType(LLVMArrayType(LLVMFloatTypeInContext(variant->gallivm->context), 4), per_prim_count);
1278    }
1279 
1280    generate_compute(lp, shader, variant);
1281 
1282 #if GALLIVM_USE_ORCJIT
1283 /* module has been moved into ORCJIT after gallivm_compile_module */
1284    variant->nr_instrs += lp_build_count_ir_module(variant->gallivm->module);
1285 
1286    gallivm_compile_module(variant->gallivm);
1287 #else
1288    gallivm_compile_module(variant->gallivm);
1289 
1290    variant->nr_instrs += lp_build_count_ir_module(variant->gallivm->module);
1291 #endif
1292 
1293    variant->jit_function = (lp_jit_cs_func)
1294       gallivm_jit_function(variant->gallivm, variant->function, variant->function_name);
1295 
1296    if (needs_caching) {
1297       lp_disk_cache_insert_shader(screen, &cached, ir_sha1_cache_key);
1298    }
1299    gallivm_free_ir(variant->gallivm);
1300    return variant;
1301 }
1302 
1303 
1304 static void
lp_cs_ctx_set_cs_variant(struct lp_cs_context * csctx,struct lp_compute_shader_variant * variant)1305 lp_cs_ctx_set_cs_variant(struct lp_cs_context *csctx,
1306                          struct lp_compute_shader_variant *variant)
1307 {
1308    csctx->cs.current.variant = variant;
1309 }
1310 
1311 
1312 static struct lp_compute_shader_variant *
llvmpipe_update_cs_variant(struct llvmpipe_context * lp,enum pipe_shader_type sh_type,struct lp_compute_shader * shader)1313 llvmpipe_update_cs_variant(struct llvmpipe_context *lp,
1314                            enum pipe_shader_type sh_type,
1315                            struct lp_compute_shader *shader)
1316 {
1317    char store[LP_CS_MAX_VARIANT_KEY_SIZE];
1318    struct lp_compute_shader_variant_key *key =
1319       make_variant_key(lp, shader, sh_type, store);
1320    struct lp_compute_shader_variant *variant = NULL;
1321    struct lp_cs_variant_list_item *li;
1322 
1323    /* Search the variants for one which matches the key */
1324    LIST_FOR_EACH_ENTRY(li, &shader->variants.list, list) {
1325       if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
1326          variant = li->base;
1327          break;
1328       }
1329    }
1330 
1331    if (variant) {
1332       /* Move this variant to the head of the list to implement LRU
1333        * deletion of shader's when we have too many.
1334        */
1335       list_move_to(&variant->list_item_global.list,
1336                    &lp->cs_variants_list.list);
1337    } else {
1338       /* variant not found, create it now */
1339 
1340       if (LP_DEBUG & DEBUG_CS) {
1341          debug_printf("%u variants,\t%u instrs,\t%u instrs/variant\n",
1342                       lp->nr_cs_variants,
1343                       lp->nr_cs_instrs,
1344                       lp->nr_cs_variants
1345                       ? lp->nr_cs_instrs / lp->nr_cs_variants : 0);
1346       }
1347 
1348       /* First, check if we've exceeded the max number of shader variants.
1349        * If so, free 6.25% of them (the least recently used ones).
1350        */
1351       unsigned variants_to_cull = lp->nr_cs_variants >= LP_MAX_SHADER_VARIANTS
1352          ? LP_MAX_SHADER_VARIANTS / 16 : 0;
1353 
1354       if (variants_to_cull ||
1355           lp->nr_cs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) {
1356          if (gallivm_debug & GALLIVM_DEBUG_PERF) {
1357             debug_printf("Evicting CS: %u cs variants,\t%u total variants,"
1358                          "\t%u instrs,\t%u instrs/variant\n",
1359                          shader->variants_cached,
1360                          lp->nr_cs_variants, lp->nr_cs_instrs,
1361                          lp->nr_cs_instrs / lp->nr_cs_variants);
1362          }
1363 
1364          /*
1365           * We need to re-check lp->nr_cs_variants because an arbitrarily large
1366           * number of shader variants (potentially all of them) could be
1367           * pending for destruction on flush.
1368           */
1369          for (unsigned i = 0;
1370               i < variants_to_cull ||
1371                  lp->nr_cs_instrs >= LP_MAX_SHADER_INSTRUCTIONS; i++) {
1372             struct lp_cs_variant_list_item *item;
1373             if (list_is_empty(&lp->cs_variants_list.list)) {
1374                break;
1375             }
1376             item = list_last_entry(&lp->cs_variants_list.list,
1377                                    struct lp_cs_variant_list_item, list);
1378             assert(item);
1379             assert(item->base);
1380             llvmpipe_remove_cs_shader_variant(lp, item->base);
1381          }
1382       }
1383 
1384       /*
1385        * Generate the new variant.
1386        */
1387       int64_t t0, t1, dt;
1388       t0 = os_time_get();
1389       variant = generate_variant(lp, shader, sh_type, key);
1390       t1 = os_time_get();
1391       dt = t1 - t0;
1392       LP_COUNT_ADD(llvm_compile_time, dt);
1393       LP_COUNT_ADD(nr_llvm_compiles, 2);  /* emit vs. omit in/out test */
1394 
1395       /* Put the new variant into the list */
1396       if (variant) {
1397          list_add(&variant->list_item_local.list, &shader->variants.list);
1398          list_add(&variant->list_item_global.list, &lp->cs_variants_list.list);
1399          lp->nr_cs_variants++;
1400          lp->nr_cs_instrs += variant->nr_instrs;
1401          shader->variants_cached++;
1402       }
1403    }
1404    return variant;
1405 }
1406 
1407 static void
llvmpipe_update_cs(struct llvmpipe_context * lp)1408 llvmpipe_update_cs(struct llvmpipe_context *lp)
1409 {
1410    struct lp_compute_shader_variant *variant;
1411    variant = llvmpipe_update_cs_variant(lp, PIPE_SHADER_COMPUTE, lp->cs);
1412    /* Bind this variant */
1413    lp_cs_ctx_set_cs_variant(lp->csctx, variant);
1414 }
1415 
1416 
1417 /**
1418  * Called during state validation when LP_CSNEW_SAMPLER_VIEW is set.
1419  */
1420 static void
lp_csctx_set_sampler_views(struct lp_cs_context * csctx,unsigned num,struct pipe_sampler_view ** views)1421 lp_csctx_set_sampler_views(struct lp_cs_context *csctx,
1422                            unsigned num,
1423                            struct pipe_sampler_view **views)
1424 {
1425    LP_DBG(DEBUG_SETUP, "%s\n", __func__);
1426 
1427    assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
1428 
1429    const unsigned max_tex_num = MAX2(num, csctx->cs.current_tex_num);
1430 
1431    for (unsigned i = 0; i < max_tex_num; i++) {
1432       struct pipe_sampler_view *view = i < num ? views[i] : NULL;
1433 
1434       /* We are going to overwrite/unref the current texture further below. If
1435        * set, make sure to unmap its resource to avoid leaking previous
1436        * mapping.  */
1437       if (csctx->cs.current_tex[i])
1438          llvmpipe_resource_unmap(csctx->cs.current_tex[i], 0, 0);
1439 
1440       if (view) {
1441          struct pipe_resource *res = view->texture;
1442          struct lp_jit_texture *jit_tex;
1443          jit_tex = &csctx->cs.current.jit_resources.textures[i];
1444 
1445          /* We're referencing the texture's internal data, so save a
1446           * reference to it.
1447           */
1448          pipe_resource_reference(&csctx->cs.current_tex[i], res);
1449 
1450          lp_jit_texture_from_pipe(jit_tex, view);
1451       } else {
1452          pipe_resource_reference(&csctx->cs.current_tex[i], NULL);
1453       }
1454    }
1455    csctx->cs.current_tex_num = num;
1456 }
1457 
1458 
1459 /**
1460  * Called during state validation when LP_NEW_SAMPLER is set.
1461  */
1462 static void
lp_csctx_set_sampler_state(struct lp_cs_context * csctx,unsigned num,struct pipe_sampler_state ** samplers)1463 lp_csctx_set_sampler_state(struct lp_cs_context *csctx,
1464                            unsigned num,
1465                            struct pipe_sampler_state **samplers)
1466 {
1467    LP_DBG(DEBUG_SETUP, "%s\n", __func__);
1468 
1469    assert(num <= PIPE_MAX_SAMPLERS);
1470 
1471    for (unsigned i = 0; i < PIPE_MAX_SAMPLERS; i++) {
1472       const struct pipe_sampler_state *sampler = i < num ? samplers[i] : NULL;
1473 
1474       if (sampler) {
1475          struct lp_jit_sampler *jit_sam;
1476          jit_sam = &csctx->cs.current.jit_resources.samplers[i];
1477 
1478          jit_sam->min_lod = sampler->min_lod;
1479          jit_sam->max_lod = sampler->max_lod;
1480          jit_sam->lod_bias = sampler->lod_bias;
1481          COPY_4V(jit_sam->border_color, sampler->border_color.f);
1482       }
1483    }
1484 }
1485 
1486 
1487 static void
lp_csctx_set_cs_constants(struct lp_cs_context * csctx,unsigned num,struct pipe_constant_buffer * buffers)1488 lp_csctx_set_cs_constants(struct lp_cs_context *csctx,
1489                           unsigned num,
1490                           struct pipe_constant_buffer *buffers)
1491 {
1492    unsigned i;
1493 
1494    LP_DBG(DEBUG_SETUP, "%s %p\n", __func__, (void *) buffers);
1495 
1496    assert(num <= ARRAY_SIZE(csctx->constants));
1497 
1498    for (i = 0; i < num; ++i) {
1499       util_copy_constant_buffer(&csctx->constants[i].current, &buffers[i], false);
1500    }
1501    for (; i < ARRAY_SIZE(csctx->constants); i++) {
1502       util_copy_constant_buffer(&csctx->constants[i].current, NULL, false);
1503    }
1504 }
1505 
1506 
1507 static void
lp_csctx_set_cs_ssbos(struct lp_cs_context * csctx,unsigned num,struct pipe_shader_buffer * buffers)1508 lp_csctx_set_cs_ssbos(struct lp_cs_context *csctx,
1509                        unsigned num,
1510                        struct pipe_shader_buffer *buffers)
1511 {
1512    int i;
1513    LP_DBG(DEBUG_SETUP, "%s %p\n", __func__, (void *)buffers);
1514 
1515    assert (num <= ARRAY_SIZE(csctx->ssbos));
1516 
1517    for (i = 0; i < num; ++i) {
1518       util_copy_shader_buffer(&csctx->ssbos[i].current, &buffers[i]);
1519    }
1520    for (; i < ARRAY_SIZE(csctx->ssbos); i++) {
1521       util_copy_shader_buffer(&csctx->ssbos[i].current, NULL);
1522    }
1523 }
1524 
1525 
1526 static void
lp_csctx_set_cs_images(struct lp_cs_context * csctx,unsigned num,struct pipe_image_view * images)1527 lp_csctx_set_cs_images(struct lp_cs_context *csctx,
1528                        unsigned num,
1529                        struct pipe_image_view *images)
1530 {
1531    unsigned i;
1532 
1533    LP_DBG(DEBUG_SETUP, "%s %p\n", __func__, (void *) images);
1534 
1535    assert(num <= ARRAY_SIZE(csctx->images));
1536 
1537    for (i = 0; i < num; ++i) {
1538       struct pipe_image_view *image = &images[i];
1539       util_copy_image_view(&csctx->images[i].current, &images[i]);
1540 
1541       struct pipe_resource *res = image->resource;
1542       struct llvmpipe_resource *lp_res = llvmpipe_resource(res);
1543       struct lp_jit_image *jit_image;
1544 
1545       jit_image = &csctx->cs.current.jit_resources.images[i];
1546       if (!lp_res)
1547          continue;
1548 
1549       lp_jit_image_from_pipe(jit_image, image);
1550    }
1551    for (; i < ARRAY_SIZE(csctx->images); i++) {
1552       util_copy_image_view(&csctx->images[i].current, NULL);
1553    }
1554 }
1555 
1556 
1557 static void
update_csctx_consts(struct llvmpipe_context * llvmpipe,struct lp_cs_context * csctx)1558 update_csctx_consts(struct llvmpipe_context *llvmpipe,
1559                     struct lp_cs_context *csctx)
1560 {
1561    for (int i = 0; i < ARRAY_SIZE(csctx->constants); ++i) {
1562       lp_jit_buffer_from_pipe_const(&csctx->cs.current.jit_resources.constants[i],
1563                                     &csctx->constants[i].current, llvmpipe->pipe.screen);
1564    }
1565 }
1566 
1567 
1568 static void
update_csctx_ssbo(struct llvmpipe_context * llvmpipe,struct lp_cs_context * csctx)1569 update_csctx_ssbo(struct llvmpipe_context *llvmpipe,
1570                   struct lp_cs_context *csctx)
1571 {
1572    for (int i = 0; i < ARRAY_SIZE(csctx->ssbos); ++i) {
1573       struct pipe_resource *buffer = csctx->ssbos[i].current.buffer;
1574       const uint8_t *current_data = NULL;
1575 
1576       /* resource buffer */
1577       if (buffer)
1578          current_data = (uint8_t *) llvmpipe_resource_data(buffer);
1579       if (current_data) {
1580          current_data += csctx->ssbos[i].current.buffer_offset;
1581 
1582          csctx->cs.current.jit_resources.ssbos[i].u = (const uint32_t *)current_data;
1583          csctx->cs.current.jit_resources.ssbos[i].num_elements = csctx->ssbos[i].current.buffer_size;
1584       } else {
1585          csctx->cs.current.jit_resources.ssbos[i].u = NULL;
1586          csctx->cs.current.jit_resources.ssbos[i].num_elements = 0;
1587       }
1588    }
1589 }
1590 
1591 
1592 static void
llvmpipe_cs_update_derived(struct llvmpipe_context * llvmpipe,const void * input)1593 llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe, const void *input)
1594 {
1595    if (llvmpipe->cs_dirty & LP_CSNEW_CONSTANTS) {
1596       lp_csctx_set_cs_constants(llvmpipe->csctx,
1597                                 ARRAY_SIZE(llvmpipe->constants[PIPE_SHADER_COMPUTE]),
1598                                 llvmpipe->constants[PIPE_SHADER_COMPUTE]);
1599       update_csctx_consts(llvmpipe, llvmpipe->csctx);
1600    }
1601 
1602    if (llvmpipe->cs_dirty & LP_CSNEW_SSBOS) {
1603       lp_csctx_set_cs_ssbos(llvmpipe->csctx,
1604                             ARRAY_SIZE(llvmpipe->ssbos[PIPE_SHADER_COMPUTE]),
1605                             llvmpipe->ssbos[PIPE_SHADER_COMPUTE]);
1606       update_csctx_ssbo(llvmpipe, llvmpipe->csctx);
1607    }
1608 
1609    if (llvmpipe->cs_dirty & LP_CSNEW_SAMPLER_VIEW)
1610       lp_csctx_set_sampler_views(llvmpipe->csctx,
1611                                  llvmpipe->num_sampler_views[PIPE_SHADER_COMPUTE],
1612                                  llvmpipe->sampler_views[PIPE_SHADER_COMPUTE]);
1613 
1614    if (llvmpipe->cs_dirty & LP_CSNEW_SAMPLER)
1615       lp_csctx_set_sampler_state(llvmpipe->csctx,
1616                                  llvmpipe->num_samplers[PIPE_SHADER_COMPUTE],
1617                                  llvmpipe->samplers[PIPE_SHADER_COMPUTE]);
1618 
1619    if (llvmpipe->cs_dirty & LP_CSNEW_IMAGES)
1620       lp_csctx_set_cs_images(llvmpipe->csctx,
1621                               ARRAY_SIZE(llvmpipe->images[PIPE_SHADER_COMPUTE]),
1622                               llvmpipe->images[PIPE_SHADER_COMPUTE]);
1623 
1624    struct lp_cs_context *csctx = llvmpipe->csctx;
1625    if (input) {
1626       csctx->input = input;
1627       csctx->cs.current.jit_context.kernel_args = input;
1628    }
1629 
1630    if (llvmpipe->cs_dirty & (LP_CSNEW_CS |
1631                              LP_CSNEW_IMAGES |
1632                              LP_CSNEW_SAMPLER_VIEW |
1633                              LP_CSNEW_SAMPLER))
1634       llvmpipe_update_cs(llvmpipe);
1635 
1636 
1637    llvmpipe->cs_dirty = 0;
1638 }
1639 
1640 
1641 static void
cs_exec_fn(void * init_data,int iter_idx,struct lp_cs_local_mem * lmem)1642 cs_exec_fn(void *init_data, int iter_idx, struct lp_cs_local_mem *lmem)
1643 {
1644    struct lp_cs_job_info *job_info = init_data;
1645    struct lp_jit_cs_thread_data thread_data;
1646 
1647    memset(&thread_data, 0, sizeof(thread_data));
1648 
1649    if (lmem->local_size < job_info->req_local_mem) {
1650       lmem->local_mem_ptr = REALLOC(lmem->local_mem_ptr, lmem->local_size,
1651                                     job_info->req_local_mem);
1652       lmem->local_size = job_info->req_local_mem;
1653    }
1654    if (job_info->zero_initialize_shared_memory)
1655       memset(lmem->local_mem_ptr, 0, job_info->req_local_mem);
1656    thread_data.shared = lmem->local_mem_ptr;
1657 
1658    thread_data.payload = job_info->payload;
1659 
1660    unsigned grid_z, grid_y, grid_x;
1661 
1662    if (job_info->use_iters) {
1663       grid_z = iter_idx / (job_info->iter_size[0] * job_info->iter_size[1]);
1664       grid_y = (iter_idx - (grid_z * (job_info->iter_size[0] * job_info->iter_size[1]))) / job_info->iter_size[0];
1665       grid_x = (iter_idx - (grid_z * (job_info->iter_size[0] * job_info->iter_size[1])) - (grid_y * job_info->iter_size[0]));
1666    } else {
1667       grid_z = iter_idx / (job_info->grid_size[0] * job_info->grid_size[1]);
1668       grid_y = (iter_idx - (grid_z * (job_info->grid_size[0] * job_info->grid_size[1]))) / job_info->grid_size[0];
1669       grid_x = (iter_idx - (grid_z * (job_info->grid_size[0] * job_info->grid_size[1])) - (grid_y * job_info->grid_size[0]));
1670    }
1671 
1672    grid_z += job_info->grid_base[2];
1673    grid_y += job_info->grid_base[1];
1674    grid_x += job_info->grid_base[0];
1675    struct lp_compute_shader_variant *variant = job_info->current->variant;
1676 
1677    void *io_ptr = NULL;
1678    if (job_info->io) {
1679       size_t io_offset = job_info->io_stride * iter_idx;
1680       io_ptr = (char *)job_info->io + io_offset;
1681    }
1682    if (thread_data.payload) {
1683       size_t payload_offset = job_info->payload_stride * iter_idx;
1684       thread_data.payload = (char *)thread_data.payload + payload_offset;
1685    }
1686    variant->jit_function(&job_info->current->jit_context,
1687                          &job_info->current->jit_resources,
1688                          job_info->block_size[0], job_info->block_size[1], job_info->block_size[2],
1689                          grid_x, grid_y, grid_z,
1690                          job_info->grid_size[0], job_info->grid_size[1], job_info->grid_size[2],
1691                          job_info->work_dim, job_info->draw_id,
1692                          io_ptr,
1693                          &thread_data);
1694 }
1695 
1696 
1697 static void
fill_grid_size(struct pipe_context * pipe,int idx,const struct pipe_grid_info * info,uint32_t grid_size[3])1698 fill_grid_size(struct pipe_context *pipe,
1699                int idx,
1700                const struct pipe_grid_info *info,
1701                uint32_t grid_size[3])
1702 {
1703    struct pipe_transfer *transfer;
1704    uint32_t *params;
1705    if (!info->indirect) {
1706       grid_size[0] = info->grid[0];
1707       grid_size[1] = info->grid[1];
1708       grid_size[2] = info->grid[2];
1709       return;
1710    }
1711    params = pipe_buffer_map_range(pipe, info->indirect,
1712                                   (info->indirect_stride * idx) + info->indirect_offset,
1713                                   3 * sizeof(uint32_t),
1714                                   PIPE_MAP_READ,
1715                                   &transfer);
1716 
1717    if (!transfer)
1718       return;
1719 
1720    grid_size[0] = params[0];
1721    grid_size[1] = params[1];
1722    grid_size[2] = params[2];
1723    pipe_buffer_unmap(pipe, transfer);
1724 }
1725 
1726 
1727 static void
llvmpipe_launch_grid(struct pipe_context * pipe,const struct pipe_grid_info * info)1728 llvmpipe_launch_grid(struct pipe_context *pipe,
1729                      const struct pipe_grid_info *info)
1730 {
1731    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1732    struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
1733    struct lp_cs_job_info job_info;
1734 
1735    if (!llvmpipe_check_render_cond(llvmpipe))
1736       return;
1737 
1738    memset(&job_info, 0, sizeof(job_info));
1739 
1740    llvmpipe_cs_update_derived(llvmpipe, info->input);
1741 
1742    fill_grid_size(pipe, 0, info, job_info.grid_size);
1743 
1744    job_info.grid_base[0] = info->grid_base[0];
1745    job_info.grid_base[1] = info->grid_base[1];
1746    job_info.grid_base[2] = info->grid_base[2];
1747    job_info.block_size[0] = info->block[0];
1748    job_info.block_size[1] = info->block[1];
1749    job_info.block_size[2] = info->block[2];
1750    job_info.work_dim = info->work_dim;
1751    job_info.req_local_mem = llvmpipe->cs->req_local_mem + info->variable_shared_mem;
1752    job_info.zero_initialize_shared_memory = llvmpipe->cs->zero_initialize_shared_memory;
1753    job_info.current = &llvmpipe->csctx->cs.current;
1754 
1755    int num_tasks = job_info.grid_size[2] * job_info.grid_size[1] * job_info.grid_size[0];
1756    if (num_tasks) {
1757       struct lp_cs_tpool_task *task;
1758       mtx_lock(&screen->cs_mutex);
1759       task = lp_cs_tpool_queue_task(screen->cs_tpool, cs_exec_fn, &job_info, num_tasks);
1760       mtx_unlock(&screen->cs_mutex);
1761 
1762       lp_cs_tpool_wait_for_task(screen->cs_tpool, &task);
1763    }
1764    if (!llvmpipe->queries_disabled)
1765       llvmpipe->pipeline_statistics.cs_invocations += num_tasks * info->block[0] * info->block[1] * info->block[2];
1766 }
1767 
1768 
1769 static void
llvmpipe_set_compute_resources(struct pipe_context * pipe,unsigned start,unsigned count,struct pipe_surface ** resources)1770 llvmpipe_set_compute_resources(struct pipe_context *pipe,
1771                                unsigned start, unsigned count,
1772                                struct pipe_surface **resources)
1773 {
1774 }
1775 
1776 
1777 static void
llvmpipe_set_global_binding(struct pipe_context * pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)1778 llvmpipe_set_global_binding(struct pipe_context *pipe,
1779                             unsigned first, unsigned count,
1780                             struct pipe_resource **resources,
1781                             uint32_t **handles)
1782 {
1783    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1784    struct lp_compute_shader *cs = llvmpipe->cs;
1785 
1786    if (first + count > cs->max_global_buffers) {
1787       unsigned old_max = cs->max_global_buffers;
1788       cs->max_global_buffers = first + count;
1789       cs->global_buffers = realloc(cs->global_buffers,
1790                                    cs->max_global_buffers * sizeof(cs->global_buffers[0]));
1791       if (!cs->global_buffers) {
1792          return;
1793       }
1794 
1795       memset(&cs->global_buffers[old_max], 0, (cs->max_global_buffers - old_max) * sizeof(cs->global_buffers[0]));
1796    }
1797 
1798    if (!resources) {
1799       for (unsigned i = 0; i < count; i++)
1800          pipe_resource_reference(&cs->global_buffers[first + i], NULL);
1801       return;
1802    }
1803 
1804    for (unsigned i = 0; i < count; i++) {
1805       uintptr_t va;
1806       uint32_t offset;
1807       pipe_resource_reference(&cs->global_buffers[first + i], resources[i]);
1808       struct llvmpipe_resource *lp_res = llvmpipe_resource(resources[i]);
1809       offset = *handles[i];
1810       va = (uintptr_t)((char *)lp_res->data + offset);
1811       memcpy(handles[i], &va, sizeof(va));
1812    }
1813 }
1814 
1815 
1816 void
llvmpipe_init_compute_funcs(struct llvmpipe_context * llvmpipe)1817 llvmpipe_init_compute_funcs(struct llvmpipe_context *llvmpipe)
1818 {
1819    llvmpipe->pipe.create_compute_state = llvmpipe_create_compute_state;
1820    llvmpipe->pipe.bind_compute_state = llvmpipe_bind_compute_state;
1821    llvmpipe->pipe.get_compute_state_info = llvmpipe_get_compute_state_info;
1822    llvmpipe->pipe.delete_compute_state = llvmpipe_delete_compute_state;
1823    llvmpipe->pipe.set_compute_resources = llvmpipe_set_compute_resources;
1824    llvmpipe->pipe.set_global_binding = llvmpipe_set_global_binding;
1825    llvmpipe->pipe.launch_grid = llvmpipe_launch_grid;
1826 }
1827 
1828 
1829 void
lp_csctx_destroy(struct lp_cs_context * csctx)1830 lp_csctx_destroy(struct lp_cs_context *csctx)
1831 {
1832    unsigned i;
1833    for (i = 0; i < ARRAY_SIZE(csctx->cs.current_tex); i++) {
1834       struct pipe_resource **res_ptr = &csctx->cs.current_tex[i];
1835       if (*res_ptr)
1836          llvmpipe_resource_unmap(*res_ptr, 0, 0);
1837       pipe_resource_reference(res_ptr, NULL);
1838    }
1839    for (i = 0; i < ARRAY_SIZE(csctx->constants); i++) {
1840       pipe_resource_reference(&csctx->constants[i].current.buffer, NULL);
1841    }
1842    for (i = 0; i < ARRAY_SIZE(csctx->ssbos); i++) {
1843       pipe_resource_reference(&csctx->ssbos[i].current.buffer, NULL);
1844    }
1845    for (i = 0; i < ARRAY_SIZE(csctx->images); i++) {
1846       pipe_resource_reference(&csctx->images[i].current.resource, NULL);
1847    }
1848    FREE(csctx);
1849 }
1850 
1851 
1852 struct lp_cs_context *
lp_csctx_create(struct pipe_context * pipe)1853 lp_csctx_create(struct pipe_context *pipe)
1854 {
1855    struct lp_cs_context *csctx = CALLOC_STRUCT(lp_cs_context);
1856    if (!csctx)
1857       return NULL;
1858 
1859    csctx->pipe = pipe;
1860    return csctx;
1861 }
1862 
1863 void
llvmpipe_update_task_shader(struct llvmpipe_context * lp)1864 llvmpipe_update_task_shader(struct llvmpipe_context *lp)
1865 {
1866    if (!lp->tss)
1867       return;
1868    struct lp_compute_shader_variant *variant = llvmpipe_update_cs_variant(lp, PIPE_SHADER_TASK, lp->tss);
1869    lp_cs_ctx_set_cs_variant(lp->task_ctx, variant);
1870 }
1871 
1872 static void *
llvmpipe_create_ts_state(struct pipe_context * pipe,const struct pipe_shader_state * templ)1873 llvmpipe_create_ts_state(struct pipe_context *pipe,
1874                            const struct pipe_shader_state *templ)
1875 {
1876    struct lp_compute_shader *shader = CALLOC_STRUCT(lp_compute_shader);
1877    if (!shader)
1878       return NULL;
1879 
1880    llvmpipe_register_shader(pipe, templ);
1881 
1882    shader->no = task_no++;
1883    shader->base.type = templ->type;
1884 
1885    shader->base.ir.nir = templ->ir.nir;
1886    shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size;
1887    list_inithead(&shader->variants.list);
1888 
1889    struct nir_shader *nir = shader->base.ir.nir;
1890    int nr_samplers = BITSET_LAST_BIT(nir->info.samplers_used);
1891    int nr_sampler_views = BITSET_LAST_BIT(nir->info.textures_used);
1892    int nr_images = BITSET_LAST_BIT(nir->info.images_used);
1893    shader->variant_key_size = lp_cs_variant_key_size(MAX2(nr_samplers, nr_sampler_views), nr_images);
1894    return shader;
1895 }
1896 
1897 
1898 static void
llvmpipe_bind_ts_state(struct pipe_context * pipe,void * _task)1899 llvmpipe_bind_ts_state(struct pipe_context *pipe, void *_task)
1900 {
1901    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1902 
1903    if (llvmpipe->tss == _task)
1904       return;
1905 
1906    llvmpipe->tss = (struct lp_compute_shader *)_task;
1907    llvmpipe->dirty |= LP_NEW_TASK;
1908 }
1909 
1910 static void
llvmpipe_delete_ts_state(struct pipe_context * pipe,void * _task)1911 llvmpipe_delete_ts_state(struct pipe_context *pipe, void *_task)
1912 {
1913    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1914    struct lp_compute_shader *shader = _task;
1915    struct lp_cs_variant_list_item *li, *next;
1916 
1917    /* Delete all the variants */
1918    LIST_FOR_EACH_ENTRY_SAFE(li, next, &shader->variants.list, list) {
1919       llvmpipe_remove_cs_shader_variant(llvmpipe, li->base);
1920    }
1921    ralloc_free(shader->base.ir.nir);
1922    FREE(shader);
1923 }
1924 
1925 void
llvmpipe_init_task_funcs(struct llvmpipe_context * llvmpipe)1926 llvmpipe_init_task_funcs(struct llvmpipe_context *llvmpipe)
1927 {
1928    llvmpipe->pipe.create_ts_state = llvmpipe_create_ts_state;
1929    llvmpipe->pipe.bind_ts_state   = llvmpipe_bind_ts_state;
1930    llvmpipe->pipe.delete_ts_state = llvmpipe_delete_ts_state;
1931 }
1932 
1933 void
llvmpipe_update_mesh_shader(struct llvmpipe_context * lp)1934 llvmpipe_update_mesh_shader(struct llvmpipe_context *lp)
1935 {
1936    if (!lp->mhs)
1937       return;
1938    struct lp_compute_shader_variant *variant = llvmpipe_update_cs_variant(lp, PIPE_SHADER_MESH, lp->mhs);
1939    lp_cs_ctx_set_cs_variant(lp->mesh_ctx, variant);
1940 }
1941 
1942 static void *
llvmpipe_create_ms_state(struct pipe_context * pipe,const struct pipe_shader_state * templ)1943 llvmpipe_create_ms_state(struct pipe_context *pipe,
1944                            const struct pipe_shader_state *templ)
1945 {
1946    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1947    struct lp_compute_shader *shader = CALLOC_STRUCT(lp_compute_shader);
1948    if (!shader)
1949       return NULL;
1950 
1951    llvmpipe_register_shader(pipe, templ);
1952 
1953    shader->no = mesh_no++;
1954    shader->base.type = templ->type;
1955 
1956    shader->base.ir.nir = templ->ir.nir;
1957    shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size;
1958    list_inithead(&shader->variants.list);
1959 
1960    shader->draw_mesh_data = draw_create_mesh_shader(llvmpipe->draw, templ);
1961    if (shader->draw_mesh_data == NULL) {
1962       FREE(shader);
1963       return NULL;
1964    }
1965 
1966    struct nir_shader *nir = shader->base.ir.nir;
1967    int nr_samplers = BITSET_LAST_BIT(nir->info.samplers_used);
1968    int nr_sampler_views = BITSET_LAST_BIT(nir->info.textures_used);
1969    int nr_images = BITSET_LAST_BIT(nir->info.images_used);
1970    shader->variant_key_size = lp_cs_variant_key_size(MAX2(nr_samplers, nr_sampler_views), nr_images);
1971    return shader;
1972 }
1973 
1974 
1975 static void
llvmpipe_bind_ms_state(struct pipe_context * pipe,void * _mesh)1976 llvmpipe_bind_ms_state(struct pipe_context *pipe, void *_mesh)
1977 {
1978    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1979 
1980    if (llvmpipe->mhs == _mesh)
1981       return;
1982 
1983    llvmpipe->mhs = (struct lp_compute_shader *)_mesh;
1984 
1985    draw_bind_mesh_shader(llvmpipe->draw, _mesh ? llvmpipe->mhs->draw_mesh_data : NULL);
1986    llvmpipe->dirty |= LP_NEW_MESH;
1987 }
1988 
1989 
1990 static void
llvmpipe_delete_ms_state(struct pipe_context * pipe,void * _mesh)1991 llvmpipe_delete_ms_state(struct pipe_context *pipe, void *_mesh)
1992 {
1993    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1994    struct lp_compute_shader *shader = _mesh;
1995    struct lp_cs_variant_list_item *li, *next;
1996 
1997    /* Delete all the variants */
1998    LIST_FOR_EACH_ENTRY_SAFE(li, next, &shader->variants.list, list) {
1999       llvmpipe_remove_cs_shader_variant(llvmpipe, li->base);
2000    }
2001 
2002    draw_delete_mesh_shader(llvmpipe->draw, shader->draw_mesh_data);
2003    ralloc_free(shader->base.ir.nir);
2004 
2005    FREE(shader);
2006 }
2007 
2008 static void
lp_mesh_call_draw(struct llvmpipe_context * lp,enum mesa_prim prim,int prim_out_idx,int cull_prim_idx,int task_idx,void * vbuf,size_t task_out_size,int vsize,int psize,int per_prim_count,size_t prim_offset)2009 lp_mesh_call_draw(struct llvmpipe_context *lp,
2010                   enum mesa_prim prim,
2011                   int prim_out_idx,
2012                   int cull_prim_idx,
2013                   int task_idx,
2014                   void *vbuf, size_t task_out_size,
2015                   int vsize, int psize, int per_prim_count,
2016                   size_t prim_offset)
2017 {
2018    unsigned prim_len = mesa_vertices_per_prim(prim);
2019    uint32_t *ptr = (uint32_t *)((char *)vbuf + task_out_size * task_idx);
2020    uint32_t vertex_count = ptr[1];
2021    uint32_t prim_count = ptr[2];
2022 
2023    if (!vertex_count || !prim_count)
2024       return;
2025 
2026    struct draw_vertex_info vinfo;
2027    vinfo.verts = (struct vertex_header *)ptr;
2028    vinfo.vertex_size = vsize / 8;
2029    vinfo.stride = vsize;
2030    vinfo.count = vertex_count;
2031 
2032    unsigned elts_size = prim_len * prim_count;
2033    unsigned short *elts = calloc(sizeof(uint16_t), elts_size);
2034    uint32_t *prim_lengths = calloc(prim_count, sizeof(uint32_t));
2035    int elts_idx = 0;
2036    char *prim_ptr = (char *)ptr + prim_offset;
2037    for (unsigned p = 0; p < prim_count; p++) {
2038       uint32_t *prim_idxs = (uint32_t *)(prim_ptr + p * psize + prim_out_idx * 4 * sizeof(float));
2039       for (unsigned elt = 0; elt < prim_len; elt++){
2040          elts[elts_idx++] = prim_idxs[elt];
2041       }
2042       prim_lengths[p] = prim_len;
2043    }
2044 
2045    struct draw_prim_info prim_info = { 0 };
2046    prim_info.prim = prim;
2047    prim_info.linear = false;
2048    prim_info.elts = elts;
2049    prim_info.count = prim_count;
2050    prim_info.primitive_count = prim_count;
2051    prim_info.primitive_lengths = prim_lengths;
2052 
2053    struct draw_vertex_info vert_out = { 0 };
2054    struct draw_prim_info prim_out = { 0 };
2055    draw_mesh_prim_run(lp->draw,
2056                       per_prim_count,
2057                       prim_ptr,
2058                       cull_prim_idx,
2059                       &prim_info,
2060                       &vinfo,
2061                       &prim_out,
2062                       &vert_out);
2063    free(elts);
2064    free(prim_lengths);
2065 
2066    draw_collect_primitives_generated(lp->draw,
2067                                      lp->active_primgen_queries &&
2068                                      !lp->queries_disabled);
2069    draw_mesh(lp->draw, &vert_out, &prim_out);
2070 
2071    free(vert_out.verts);
2072    free(prim_out.primitive_lengths);
2073 }
2074 
2075 static void
llvmpipe_draw_mesh_tasks(struct pipe_context * pipe,unsigned drawid_offset,const struct pipe_grid_info * info)2076 llvmpipe_draw_mesh_tasks(struct pipe_context *pipe,
2077                          unsigned drawid_offset,
2078                          const struct pipe_grid_info *info)
2079 {
2080    struct llvmpipe_context *lp = llvmpipe_context(pipe);
2081    struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
2082    struct lp_cs_job_info job_info;
2083 
2084    if (!llvmpipe_check_render_cond(lp))
2085       return;
2086 
2087    memset(&job_info, 0, sizeof(job_info));
2088    if (lp->dirty)
2089       llvmpipe_update_derived(lp);
2090 
2091    unsigned draw_count = info->draw_count;
2092    if (info->indirect && info->indirect_draw_count) {
2093       struct pipe_transfer *dc_transfer;
2094       uint32_t *dc_param = pipe_buffer_map_range(pipe,
2095                                                  info->indirect_draw_count,
2096                                                  info->indirect_draw_count_offset,
2097                                                  4, PIPE_MAP_READ, &dc_transfer);
2098       if (!dc_transfer) {
2099          debug_printf("%s: failed to map indirect draw count buffer\n", __func__);
2100          return;
2101       }
2102       if (dc_param[0] < draw_count)
2103          draw_count = dc_param[0];
2104       pipe_buffer_unmap(pipe, dc_transfer);
2105    }
2106 
2107    struct nir_shader *mhs_shader = lp->mhs->base.ir.nir;
2108    int prim_out_idx = -1;
2109    int first_per_prim_idx = -1;
2110    int cull_prim_idx = -1;
2111    nir_foreach_shader_out_variable(var, mhs_shader) {
2112       if (var->data.per_primitive) {
2113          first_per_prim_idx = var->data.driver_location;
2114          break;
2115       }
2116    }
2117    nir_foreach_shader_out_variable(var, mhs_shader) {
2118       if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES) {
2119          prim_out_idx = var->data.driver_location;
2120          break;
2121       }
2122    }
2123    nir_foreach_shader_out_variable(var, mhs_shader) {
2124       if (var->data.location == VARYING_SLOT_CULL_PRIMITIVE) {
2125          cull_prim_idx = var->data.driver_location - first_per_prim_idx;
2126          break;
2127       }
2128    }
2129    int per_prim_count = util_bitcount64(mhs_shader->info.per_primitive_outputs);
2130    int out_count = util_bitcount64(mhs_shader->info.outputs_written);
2131    int per_vert_count = out_count - per_prim_count;
2132    int vsize = (sizeof(struct vertex_header) + per_vert_count * 4 * sizeof(float)) * 8;
2133    int psize = (per_prim_count * 4 * sizeof(float)) * 8;
2134    size_t prim_offset = vsize * (mhs_shader->info.mesh.max_vertices_out + 8);
2135    size_t task_out_size = prim_offset + psize * (mhs_shader->info.mesh.max_primitives_out + 8);
2136 
2137    for (unsigned dr = 0; dr < draw_count; dr++) {
2138       fill_grid_size(pipe, dr, info, job_info.grid_size);
2139 
2140       job_info.grid_base[0] = info->grid_base[0];
2141       job_info.grid_base[1] = info->grid_base[1];
2142       job_info.grid_base[2] = info->grid_base[2];
2143       job_info.block_size[0] = info->block[0];
2144       job_info.block_size[1] = info->block[1];
2145       job_info.block_size[2] = info->block[2];
2146 
2147       void *payload = NULL;
2148       size_t payload_stride = 0;
2149       int num_tasks = job_info.grid_size[2] * job_info.grid_size[1] * job_info.grid_size[0];
2150       int num_mesh_invocs = 1;
2151       if (lp->tss) {
2152          struct nir_shader *tsk_shader = lp->tss->base.ir.nir;
2153          payload_stride = tsk_shader->info.task_payload_size + 3 * sizeof(uint32_t);
2154 
2155          payload = calloc(num_tasks, payload_stride);
2156 
2157          job_info.use_iters = false;
2158          job_info.payload = payload;
2159          job_info.payload_stride = payload_stride;
2160          job_info.work_dim = info->work_dim;
2161          job_info.draw_id = dr + drawid_offset;
2162          job_info.req_local_mem = lp->tss->req_local_mem + info->variable_shared_mem;
2163          job_info.current = &lp->task_ctx->cs.current;
2164 
2165          if (num_tasks) {
2166             struct lp_cs_tpool_task *task;
2167             mtx_lock(&screen->cs_mutex);
2168             task = lp_cs_tpool_queue_task(screen->cs_tpool, cs_exec_fn, &job_info, num_tasks);
2169             mtx_unlock(&screen->cs_mutex);
2170 
2171             lp_cs_tpool_wait_for_task(screen->cs_tpool, &task);
2172          }
2173          if (!lp->queries_disabled)
2174             lp->pipeline_statistics.ts_invocations += num_tasks * info->block[0] * info->block[1] * info->block[2];
2175          num_mesh_invocs = num_tasks;
2176       }
2177 
2178       for (unsigned i = 0; i < num_mesh_invocs; i++) {
2179          if (payload) {
2180             void *this_payload = (char *)payload + (payload_stride * i);
2181             uint32_t *payload_grid = (uint32_t *)this_payload;
2182             assert(lp->tss);
2183             job_info.grid_size[0] = payload_grid[0];
2184             job_info.grid_size[1] = payload_grid[1];
2185             job_info.grid_size[2] = payload_grid[2];
2186             job_info.payload = this_payload;
2187             job_info.block_size[0] = mhs_shader->info.workgroup_size[0];
2188             job_info.block_size[1] = mhs_shader->info.workgroup_size[1];
2189             job_info.block_size[2] = mhs_shader->info.workgroup_size[2];
2190          }
2191 
2192          job_info.req_local_mem = lp->mhs->req_local_mem + info->variable_shared_mem;
2193          job_info.current = &lp->mesh_ctx->cs.current;
2194          job_info.payload_stride = 0;
2195          job_info.draw_id = dr + drawid_offset;
2196          job_info.io_stride = task_out_size;
2197 
2198          uint32_t job_strides[3] = { job_info.grid_size[0], job_info.grid_size[1], job_info.grid_size[2] };
2199          uint32_t total_grid[3] = { job_info.grid_size[0], job_info.grid_size[1], job_info.grid_size[2] };
2200          const unsigned int max_tasks = 4096;
2201          /* limit how large memory allocation can get for vbuf */
2202          for (unsigned g = 0; g < 3; g++) {
2203             if (job_strides[g] > max_tasks) {
2204                job_strides[g] = max_tasks;
2205             }
2206          }
2207 
2208          for (unsigned grid_z = 0; grid_z < total_grid[2]; grid_z += job_strides[2]) {
2209             int this_z = MIN2(total_grid[2] - grid_z, max_tasks);
2210             job_info.grid_base[2] = grid_z;
2211             for (unsigned grid_y = 0; grid_y < total_grid[1]; grid_y += job_strides[1]) {
2212                int this_y = MIN2(total_grid[1] - grid_y, max_tasks);
2213                job_info.grid_base[1] = grid_y;
2214                for (unsigned grid_x = 0; grid_x < total_grid[0]; grid_x += job_strides[0]) {
2215                   int this_x = MIN2(total_grid[0] - grid_x, max_tasks);
2216                   job_info.grid_base[0] = grid_x;
2217                   num_tasks = this_x * this_y * this_z;
2218 
2219                   job_info.iter_size[0] = this_x;
2220                   job_info.iter_size[1] = this_y;
2221                   job_info.iter_size[2] = this_z;
2222                   job_info.use_iters = true;
2223 
2224                   void *vbuf = CALLOC(num_tasks, task_out_size);
2225                   if (!vbuf)
2226                      return;
2227 
2228                   job_info.io = vbuf;
2229                   if (num_tasks) {
2230                      struct lp_cs_tpool_task *task;
2231                      mtx_lock(&screen->cs_mutex);
2232                      task = lp_cs_tpool_queue_task(screen->cs_tpool, cs_exec_fn, &job_info, num_tasks);
2233                      mtx_unlock(&screen->cs_mutex);
2234 
2235                      lp_cs_tpool_wait_for_task(screen->cs_tpool, &task);
2236                   }
2237                   if (!lp->queries_disabled)
2238                      lp->pipeline_statistics.ms_invocations += num_tasks * job_info.block_size[0] * job_info.block_size[1] * job_info.block_size[2];
2239 
2240                   for (unsigned t = 0; t < num_tasks; t++)
2241                      lp_mesh_call_draw(lp,
2242                                        mhs_shader->info.mesh.primitive_type,
2243                                        prim_out_idx - first_per_prim_idx,
2244                                        cull_prim_idx, t, vbuf, task_out_size,
2245                                        vsize, psize, per_prim_count, prim_offset);
2246                   free(vbuf);
2247                }
2248             }
2249          }
2250       }
2251       free(payload);
2252    }
2253    draw_flush(lp->draw);
2254 }
2255 
2256 void
llvmpipe_init_mesh_funcs(struct llvmpipe_context * llvmpipe)2257 llvmpipe_init_mesh_funcs(struct llvmpipe_context *llvmpipe)
2258 {
2259    llvmpipe->pipe.create_ms_state = llvmpipe_create_ms_state;
2260    llvmpipe->pipe.bind_ms_state   = llvmpipe_bind_ms_state;
2261    llvmpipe->pipe.delete_ms_state = llvmpipe_delete_ms_state;
2262 
2263    llvmpipe->pipe.draw_mesh_tasks = llvmpipe_draw_mesh_tasks;
2264 }
2265 
2266 void
llvmpipe_task_update_derived(struct llvmpipe_context * llvmpipe)2267 llvmpipe_task_update_derived(struct llvmpipe_context *llvmpipe)
2268 {
2269    if (llvmpipe->dirty & LP_NEW_TASK_CONSTANTS) {
2270       lp_csctx_set_cs_constants(llvmpipe->task_ctx,
2271                                 ARRAY_SIZE(llvmpipe->constants[PIPE_SHADER_TASK]),
2272                                 llvmpipe->constants[PIPE_SHADER_TASK]);
2273       update_csctx_consts(llvmpipe, llvmpipe->task_ctx);
2274    }
2275 
2276    if (llvmpipe->dirty & LP_NEW_TASK_SSBOS) {
2277       lp_csctx_set_cs_ssbos(llvmpipe->task_ctx,
2278                             ARRAY_SIZE(llvmpipe->ssbos[PIPE_SHADER_TASK]),
2279                             llvmpipe->ssbos[PIPE_SHADER_TASK]);
2280       update_csctx_ssbo(llvmpipe, llvmpipe->task_ctx);
2281    }
2282 
2283    if (llvmpipe->dirty & LP_NEW_TASK_SAMPLER_VIEW)
2284       lp_csctx_set_sampler_views(llvmpipe->task_ctx,
2285                                  llvmpipe->num_sampler_views[PIPE_SHADER_TASK],
2286                                  llvmpipe->sampler_views[PIPE_SHADER_TASK]);
2287 
2288    if (llvmpipe->dirty & LP_NEW_TASK_SAMPLER)
2289       lp_csctx_set_sampler_state(llvmpipe->task_ctx,
2290                                  llvmpipe->num_samplers[PIPE_SHADER_TASK],
2291                                  llvmpipe->samplers[PIPE_SHADER_TASK]);
2292 
2293    if (llvmpipe->dirty & LP_NEW_TASK_IMAGES)
2294       lp_csctx_set_cs_images(llvmpipe->task_ctx,
2295                               ARRAY_SIZE(llvmpipe->images[PIPE_SHADER_TASK]),
2296                               llvmpipe->images[PIPE_SHADER_TASK]);
2297 }
2298 
2299 void
llvmpipe_mesh_update_derived(struct llvmpipe_context * llvmpipe)2300 llvmpipe_mesh_update_derived(struct llvmpipe_context *llvmpipe)
2301 {
2302    if (llvmpipe->dirty & LP_NEW_MESH_CONSTANTS) {
2303       lp_csctx_set_cs_constants(llvmpipe->mesh_ctx,
2304                                 ARRAY_SIZE(llvmpipe->constants[PIPE_SHADER_MESH]),
2305                                 llvmpipe->constants[PIPE_SHADER_MESH]);
2306       update_csctx_consts(llvmpipe, llvmpipe->mesh_ctx);
2307    }
2308 
2309    if (llvmpipe->dirty & LP_NEW_MESH_SSBOS) {
2310       lp_csctx_set_cs_ssbos(llvmpipe->mesh_ctx,
2311                             ARRAY_SIZE(llvmpipe->ssbos[PIPE_SHADER_MESH]),
2312                             llvmpipe->ssbos[PIPE_SHADER_MESH]);
2313       update_csctx_ssbo(llvmpipe, llvmpipe->mesh_ctx);
2314    }
2315 
2316    if (llvmpipe->dirty & LP_NEW_MESH_SAMPLER_VIEW)
2317       lp_csctx_set_sampler_views(llvmpipe->mesh_ctx,
2318                                  llvmpipe->num_sampler_views[PIPE_SHADER_MESH],
2319                                  llvmpipe->sampler_views[PIPE_SHADER_MESH]);
2320 
2321    if (llvmpipe->dirty & LP_NEW_MESH_SAMPLER)
2322       lp_csctx_set_sampler_state(llvmpipe->mesh_ctx,
2323                                  llvmpipe->num_samplers[PIPE_SHADER_MESH],
2324                                  llvmpipe->samplers[PIPE_SHADER_MESH]);
2325 
2326    if (llvmpipe->dirty & LP_NEW_MESH_IMAGES)
2327       lp_csctx_set_cs_images(llvmpipe->mesh_ctx,
2328                               ARRAY_SIZE(llvmpipe->images[PIPE_SHADER_MESH]),
2329                               llvmpipe->images[PIPE_SHADER_MESH]);
2330 }
2331