• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include "draw_llvm.h"
29 
30 #include "draw_context.h"
31 #include "draw_vs.h"
32 #include "draw_gs.h"
33 
34 #include "gallivm/lp_bld_arit.h"
35 #include "gallivm/lp_bld_arit_overflow.h"
36 #include "gallivm/lp_bld_bitarit.h"
37 #include "gallivm/lp_bld_gather.h"
38 #include "gallivm/lp_bld_logic.h"
39 #include "gallivm/lp_bld_const.h"
40 #include "gallivm/lp_bld_coro.h"
41 #include "gallivm/lp_bld_swizzle.h"
42 #include "gallivm/lp_bld_struct.h"
43 #include "gallivm/lp_bld_type.h"
44 #include "gallivm/lp_bld_flow.h"
45 #include "gallivm/lp_bld_debug.h"
46 #include "gallivm/lp_bld_tgsi.h"
47 #include "gallivm/lp_bld_nir.h"
48 #include "gallivm/lp_bld_printf.h"
49 #include "gallivm/lp_bld_intr.h"
50 #include "gallivm/lp_bld_init.h"
51 #include "gallivm/lp_bld_type.h"
52 #include "gallivm/lp_bld_pack.h"
53 #include "gallivm/lp_bld_format.h"
54 #include "gallivm/lp_bld_misc.h"
55 #include "gallivm/lp_bld_jit_sample.h"
56 #include "tgsi/tgsi_exec.h"
57 #include "tgsi/tgsi_dump.h"
58 
59 #include "util/u_math.h"
60 #include "util/u_pointer.h"
61 #include "util/u_string.h"
62 #include "nir_serialize.h"
63 #include "util/mesa-sha1.h"
64 #define DEBUG_STORE 0
65 
66 
67 static void
68 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
69 
70 
71 struct draw_gs_llvm_iface {
72    struct lp_build_gs_iface base;
73 
74    struct draw_gs_llvm_variant *variant;
75    LLVMValueRef input;
76 };
77 
78 
79 static inline const struct draw_gs_llvm_iface *
draw_gs_llvm_iface(const struct lp_build_gs_iface * iface)80 draw_gs_llvm_iface(const struct lp_build_gs_iface *iface)
81 {
82    return (const struct draw_gs_llvm_iface *)iface;
83 }
84 
85 
86 struct draw_tcs_llvm_iface {
87    struct lp_build_tcs_iface base;
88 
89    struct draw_tcs_llvm_variant *variant;
90    LLVMValueRef input;
91    LLVMValueRef output;
92 };
93 
94 
95 static inline const struct draw_tcs_llvm_iface *
draw_tcs_llvm_iface(const struct lp_build_tcs_iface * iface)96 draw_tcs_llvm_iface(const struct lp_build_tcs_iface *iface)
97 {
98    return (const struct draw_tcs_llvm_iface *)iface;
99 }
100 
101 
102 struct draw_tes_llvm_iface {
103    struct lp_build_tes_iface base;
104 
105    struct draw_tes_llvm_variant *variant;
106    LLVMValueRef input;
107 };
108 
109 
110 static inline const struct draw_tes_llvm_iface *
draw_tes_llvm_iface(const struct lp_build_tes_iface * iface)111 draw_tes_llvm_iface(const struct lp_build_tes_iface *iface)
112 {
113    return (const struct draw_tes_llvm_iface *)iface;
114 }
115 
116 
117 /**
118  * Create LLVM type for draw_vertex_buffer.
119  */
120 static LLVMTypeRef
create_jit_dvbuffer_type(struct gallivm_state * gallivm,const char * struct_name)121 create_jit_dvbuffer_type(struct gallivm_state *gallivm,
122                          const char *struct_name)
123 {
124    LLVMTargetDataRef target = gallivm->target;
125    LLVMTypeRef dvbuffer_type;
126    LLVMTypeRef elem_types[DRAW_JIT_DVBUFFER_NUM_FIELDS];
127    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
128 
129    elem_types[DRAW_JIT_DVBUFFER_MAP] =
130       LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
131    elem_types[DRAW_JIT_DVBUFFER_SIZE] = int32_type;
132 
133    dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types,
134                                            ARRAY_SIZE(elem_types), 0);
135 
136    (void) target; /* silence unused var warning for non-debug build */
137    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map,
138                           target, dvbuffer_type,
139                           DRAW_JIT_DVBUFFER_MAP);
140    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, size,
141                           target, dvbuffer_type,
142                           DRAW_JIT_DVBUFFER_SIZE);
143 
144    return dvbuffer_type;
145 }
146 
147 /**
148  * Create LLVM type for struct draw_jit_context
149  */
150 static LLVMTypeRef
create_vs_jit_context_type(struct gallivm_state * gallivm,const char * struct_name)151 create_vs_jit_context_type(struct gallivm_state *gallivm, const char *struct_name)
152 {
153    LLVMTargetDataRef target = gallivm->target;
154    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
155    LLVMTypeRef elem_types[DRAW_VS_JIT_CTX_NUM_FIELDS];
156 
157    elem_types[DRAW_VS_JIT_CTX_PLANES] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), DRAW_TOTAL_CLIP_PLANES), 0);
158    elem_types[DRAW_VS_JIT_CTX_VIEWPORT] = LLVMPointerType(float_type, 0);
159 
160    LLVMTypeRef context_type = LLVMStructTypeInContext(gallivm->context, elem_types, ARRAY_SIZE(elem_types), 0);
161 
162    (void) target; /* silence unused var warning for non-debug build */
163    LP_CHECK_MEMBER_OFFSET(struct draw_vs_jit_context, planes,
164                           target, context_type, DRAW_VS_JIT_CTX_PLANES);
165    LP_CHECK_MEMBER_OFFSET(struct draw_vs_jit_context, viewports,
166                           target, context_type, DRAW_VS_JIT_CTX_VIEWPORT);
167    LP_CHECK_STRUCT_SIZE(struct draw_vs_jit_context,
168                         target, context_type);
169 
170    return context_type;
171 }
172 
173 
174 /**
175  * Create LLVM type for struct draw_gs_jit_context
176  */
177 static LLVMTypeRef
create_gs_jit_context_type(struct gallivm_state * gallivm,unsigned vector_length,const char * struct_name)178 create_gs_jit_context_type(struct gallivm_state *gallivm,
179                            unsigned vector_length,
180                            const char *struct_name)
181 {
182    LLVMTargetDataRef target = gallivm->target;
183    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
184    LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
185    LLVMTypeRef elem_types[DRAW_GS_JIT_CTX_NUM_FIELDS];
186    LLVMTypeRef context_type;
187 
188    elem_types[DRAW_GS_JIT_CTX_PLANES] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
189                                                                       DRAW_TOTAL_CLIP_PLANES), 0);
190    elem_types[DRAW_GS_JIT_CTX_VIEWPORT] = LLVMPointerType(float_type, 0); /* viewports */
191 
192 
193    elem_types[DRAW_GS_JIT_CTX_PRIM_LENGTHS] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
194    elem_types[DRAW_GS_JIT_CTX_EMITTED_VERTICES] = LLVMPointerType(LLVMVectorType(int_type,
195                                                                                  vector_length), 0);
196    elem_types[DRAW_GS_JIT_CTX_EMITTED_PRIMS] = LLVMPointerType(LLVMVectorType(int_type,
197                                                                               vector_length), 0);
198 
199    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
200                                           ARRAY_SIZE(elem_types), 0);
201 
202    (void) target; /* silence unused var warning for non-debug build */
203    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes,
204                           target, context_type, DRAW_GS_JIT_CTX_PLANES);
205    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewports,
206                           target, context_type, DRAW_GS_JIT_CTX_VIEWPORT);
207    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
208                           target, context_type,
209                           DRAW_GS_JIT_CTX_PRIM_LENGTHS);
210    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
211                           target, context_type,
212                           DRAW_GS_JIT_CTX_EMITTED_VERTICES);
213    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
214                           target, context_type,
215                           DRAW_GS_JIT_CTX_EMITTED_PRIMS);
216    LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,
217                         target, context_type);
218    return context_type;
219 }
220 
221 
222 static LLVMTypeRef
create_gs_jit_input_type_deref(struct gallivm_state * gallivm)223 create_gs_jit_input_type_deref(struct gallivm_state *gallivm)
224 {
225    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
226    LLVMTypeRef input_array;
227 
228    input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */
229    input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */
230    input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
231    return input_array;
232 }
233 
234 
235 static LLVMTypeRef
create_gs_jit_input_type(struct gallivm_state * gallivm)236 create_gs_jit_input_type(struct gallivm_state *gallivm)
237 {
238    return LLVMPointerType(create_gs_jit_input_type_deref(gallivm), 0); /* num vertices per prim */
239 }
240 
241 
242 /**
243  * Create LLVM type for struct pipe_vertex_buffer
244  */
245 static LLVMTypeRef
create_jit_vertex_buffer_type(struct gallivm_state * gallivm,const char * struct_name)246 create_jit_vertex_buffer_type(struct gallivm_state *gallivm,
247                               const char *struct_name)
248 {
249    LLVMTargetDataRef target = gallivm->target;
250    LLVMTypeRef elem_types[3];
251    LLVMTypeRef vb_type;
252 
253    elem_types[0] = LLVMInt8TypeInContext(gallivm->context);
254    elem_types[1] = LLVMInt32TypeInContext(gallivm->context);
255    elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
256 
257    vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
258                                      ARRAY_SIZE(elem_types), 0);
259 
260    (void) target; /* silence unused var warning for non-debug build */
261    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, is_user_buffer,
262                           target, vb_type, 0);
263    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
264                           target, vb_type, 1);
265    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer.resource,
266                           target, vb_type, 2);
267 
268    LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
269 
270    return vb_type;
271 }
272 
273 
274 static LLVMTypeRef
create_tcs_jit_input_type_deref(struct gallivm_state * gallivm)275 create_tcs_jit_input_type_deref(struct gallivm_state *gallivm)
276 {
277    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
278    LLVMTypeRef input_array;
279 
280    input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
281    input_array = LLVMArrayType(input_array, NUM_TCS_INPUTS); /* num attrs per vertex */
282    return input_array;
283 }
284 
285 
286 static LLVMTypeRef
create_tcs_jit_input_type(struct gallivm_state * gallivm)287 create_tcs_jit_input_type(struct gallivm_state *gallivm)
288 {
289    return LLVMPointerType(create_tcs_jit_input_type_deref(gallivm), 0); /* num vertices per prim */
290 }
291 
292 
293 static LLVMTypeRef
create_tcs_jit_output_type_deref(struct gallivm_state * gallivm)294 create_tcs_jit_output_type_deref(struct gallivm_state *gallivm)
295 {
296    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
297    LLVMTypeRef output_array;
298 
299    output_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
300    output_array = LLVMArrayType(output_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
301    return output_array;
302 }
303 
304 
305 static LLVMTypeRef
create_tcs_jit_output_type(struct gallivm_state * gallivm)306 create_tcs_jit_output_type(struct gallivm_state *gallivm)
307 {
308    return LLVMPointerType(create_tcs_jit_output_type_deref(gallivm), 0); /* num vertices per prim */
309 }
310 
311 
312 static LLVMTypeRef
create_tes_jit_input_deref_type(struct gallivm_state * gallivm)313 create_tes_jit_input_deref_type(struct gallivm_state *gallivm)
314 {
315    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
316    LLVMTypeRef input_array;
317 
318    input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
319    input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
320 
321    return input_array;
322 }
323 
324 
325 /**
326  * Create LLVM types for various structures.
327  */
328 static void
create_vs_jit_types(struct draw_llvm_variant * variant)329 create_vs_jit_types(struct draw_llvm_variant *variant)
330 {
331    struct gallivm_state *gallivm = variant->gallivm;
332 
333    variant->context_type = create_vs_jit_context_type(gallivm, "draw_vs_jit_context");
334    variant->context_ptr_type = LLVMPointerType(variant->context_type, 0);
335 
336    variant->resources_type = lp_build_jit_resources_type(gallivm);
337    variant->resources_ptr_type = LLVMPointerType(variant->resources_type, 0);
338 
339    variant->buffer_type = create_jit_dvbuffer_type(gallivm, "draw_vertex_buffer");
340    variant->buffer_ptr_type = LLVMPointerType(variant->buffer_type, 0);
341 
342    variant->vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
343    variant->vb_ptr_type = LLVMPointerType(variant->vb_type, 0);
344 }
345 
346 
347 static LLVMTypeRef
get_context_ptr_type(struct draw_llvm_variant * variant)348 get_context_ptr_type(struct draw_llvm_variant *variant)
349 {
350    if (!variant->context_ptr_type)
351       create_vs_jit_types(variant);
352    return variant->context_ptr_type;
353 }
354 
355 
356 static LLVMTypeRef
get_buffer_ptr_type(struct draw_llvm_variant * variant)357 get_buffer_ptr_type(struct draw_llvm_variant *variant)
358 {
359    if (!variant->buffer_ptr_type)
360       create_vs_jit_types(variant);
361    return variant->buffer_ptr_type;
362 }
363 
364 
365 static LLVMTypeRef
get_vb_ptr_type(struct draw_llvm_variant * variant)366 get_vb_ptr_type(struct draw_llvm_variant *variant)
367 {
368    if (!variant->vb_ptr_type)
369       create_vs_jit_types(variant);
370    return variant->vb_ptr_type;
371 }
372 
373 static LLVMTypeRef
get_vertex_header_ptr_type(struct draw_llvm_variant * variant)374 get_vertex_header_ptr_type(struct draw_llvm_variant *variant)
375 {
376    assert(variant->vertex_header_ptr_type);
377    return variant->vertex_header_ptr_type;
378 }
379 
380 
381 /**
382  * Create per-context LLVM info.
383  */
384 struct draw_llvm *
draw_llvm_create(struct draw_context * draw,LLVMContextRef context)385 draw_llvm_create(struct draw_context *draw, LLVMContextRef context)
386 {
387    struct draw_llvm *llvm;
388 
389    if (!lp_build_init())
390       return NULL;
391 
392    llvm = CALLOC_STRUCT(draw_llvm);
393    if (!llvm)
394       return NULL;
395 
396    llvm->draw = draw;
397 
398    llvm->context = context;
399    if (!llvm->context) {
400       llvm->context = LLVMContextCreate();
401 
402 #if LLVM_VERSION_MAJOR == 15
403       LLVMContextSetOpaquePointers(llvm->context, false);
404 #endif
405 
406       llvm->context_owned = true;
407    }
408    if (!llvm->context)
409       goto fail;
410 
411    llvm->nr_variants = 0;
412    list_inithead(&llvm->vs_variants_list.list);
413 
414    llvm->nr_gs_variants = 0;
415    list_inithead(&llvm->gs_variants_list.list);
416 
417    llvm->nr_tcs_variants = 0;
418    list_inithead(&llvm->tcs_variants_list.list);
419 
420    llvm->nr_tes_variants = 0;
421    list_inithead(&llvm->tes_variants_list.list);
422 
423    return llvm;
424 
425 fail:
426    draw_llvm_destroy(llvm);
427    return NULL;
428 }
429 
430 
431 /**
432  * Free per-context LLVM info.
433  */
434 void
draw_llvm_destroy(struct draw_llvm * llvm)435 draw_llvm_destroy(struct draw_llvm *llvm)
436 {
437    if (llvm->context_owned)
438       LLVMContextDispose(llvm->context);
439    llvm->context = NULL;
440 
441    /* XXX free other draw_llvm data? */
442    FREE(llvm);
443 }
444 
445 
446 static void
draw_get_ir_cache_key(struct nir_shader * nir,const void * key,size_t key_size,uint32_t val_32bit,unsigned char ir_sha1_cache_key[20])447 draw_get_ir_cache_key(struct nir_shader *nir,
448                       const void *key, size_t key_size,
449                       uint32_t val_32bit,
450                       unsigned char ir_sha1_cache_key[20])
451 {
452    struct blob blob = { 0 };
453    unsigned ir_size;
454    void *ir_binary;
455 
456    blob_init(&blob);
457    nir_serialize(&blob, nir, true);
458    ir_binary = blob.data;
459    ir_size = blob.size;
460 
461    struct mesa_sha1 ctx;
462    _mesa_sha1_init(&ctx);
463    _mesa_sha1_update(&ctx, key, key_size);
464    _mesa_sha1_update(&ctx, ir_binary, ir_size);
465    _mesa_sha1_update(&ctx, &val_32bit, 4);
466    _mesa_sha1_final(&ctx, ir_sha1_cache_key);
467 
468    blob_finish(&blob);
469 }
470 
471 
472 /**
473  * Create LLVM-generated code for a vertex shader.
474  */
475 struct draw_llvm_variant *
draw_llvm_create_variant(struct draw_llvm * llvm,unsigned num_inputs,const struct draw_llvm_variant_key * key)476 draw_llvm_create_variant(struct draw_llvm *llvm,
477                          unsigned num_inputs,
478                          const struct draw_llvm_variant_key *key)
479 {
480    struct draw_llvm_variant *variant;
481    struct llvm_vertex_shader *shader =
482       llvm_vertex_shader(llvm->draw->vs.vertex_shader);
483    char module_name[64];
484    unsigned char ir_sha1_cache_key[20];
485    struct lp_cached_code cached = { 0 };
486    bool needs_caching = false;
487    variant = MALLOC(sizeof *variant +
488                     shader->variant_key_size -
489                     sizeof variant->key);
490    if (!variant)
491       return NULL;
492 
493    variant->llvm = llvm;
494    variant->shader = shader;
495    memcpy(&variant->key, key, shader->variant_key_size);
496 
497    snprintf(module_name, sizeof(module_name), "draw_llvm_vs_variant%u",
498             variant->shader->variants_cached);
499 
500    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
501       draw_get_ir_cache_key(shader->base.state.ir.nir,
502                             key,
503                             shader->variant_key_size,
504                             num_inputs,
505                             ir_sha1_cache_key);
506 
507       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
508                                          &cached,
509                                          ir_sha1_cache_key);
510       if (!cached.data_size)
511          needs_caching = true;
512    }
513    variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
514 
515    create_vs_jit_types(variant);
516 
517    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
518       if (llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_TGSI)
519          tgsi_dump(llvm->draw->vs.vertex_shader->state.tokens, 0);
520       else
521          nir_print_shader(llvm->draw->vs.vertex_shader->state.ir.nir, stderr);
522       draw_llvm_dump_variant_key(&variant->key);
523    }
524 
525    variant->vertex_header_type = lp_build_create_jit_vertex_header_type(variant->gallivm, num_inputs);
526    variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
527 
528    draw_llvm_generate(llvm, variant);
529 
530    gallivm_compile_module(variant->gallivm);
531 
532    variant->jit_func = (draw_jit_vert_func)
533          gallivm_jit_function(variant->gallivm, variant->function);
534 
535    if (needs_caching)
536       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
537                                            &cached,
538                                            ir_sha1_cache_key);
539    gallivm_free_ir(variant->gallivm);
540 
541    variant->list_item_global.base = variant;
542    variant->list_item_local.base = variant;
543    /*variant->no = */shader->variants_created++;
544    variant->list_item_global.base = variant;
545 
546    return variant;
547 }
548 
549 
550 static void
do_clamp_vertex_color(struct gallivm_state * gallivm,struct lp_type type,const struct tgsi_shader_info * info,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS])551 do_clamp_vertex_color(struct gallivm_state *gallivm,
552                       struct lp_type type,
553                       const struct tgsi_shader_info *info,
554                       LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
555 {
556    LLVMBuilderRef builder = gallivm->builder;
557    LLVMValueRef out;
558    unsigned chan, attrib;
559    struct lp_build_context bld;
560    lp_build_context_init(&bld, gallivm, type);
561 
562    for (attrib = 0; attrib < info->num_outputs; ++attrib) {
563       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
564          if (outputs[attrib][chan]) {
565             switch (info->output_semantic_name[attrib]) {
566             case TGSI_SEMANTIC_COLOR:
567             case TGSI_SEMANTIC_BCOLOR:
568                out = LLVMBuildLoad2(builder, LLVMTypeOf(bld.zero), outputs[attrib][chan], "");
569                out = lp_build_clamp(&bld, out, bld.zero, bld.one);
570                LLVMBuildStore(builder, out, outputs[attrib][chan]);
571                break;
572             }
573          }
574       }
575    }
576 }
577 
578 
579 static void
generate_vs(struct draw_llvm_variant * variant,LLVMBuilderRef builder,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],const LLVMValueRef (* inputs)[TGSI_NUM_CHANNELS],const struct lp_bld_tgsi_system_values * system_values,LLVMValueRef context_ptr,LLVMValueRef resources_ptr,const struct lp_build_sampler_soa * draw_sampler,const struct lp_build_image_soa * draw_image,bool clamp_vertex_color,struct lp_build_mask_context * bld_mask)580 generate_vs(struct draw_llvm_variant *variant,
581             LLVMBuilderRef builder,
582             struct lp_type vs_type,
583             LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
584             const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
585             const struct lp_bld_tgsi_system_values *system_values,
586             LLVMValueRef context_ptr,
587             LLVMValueRef resources_ptr,
588             const struct lp_build_sampler_soa *draw_sampler,
589             const struct lp_build_image_soa *draw_image,
590             bool clamp_vertex_color,
591             struct lp_build_mask_context *bld_mask)
592 {
593    struct draw_llvm *llvm = variant->llvm;
594    const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
595    LLVMValueRef consts_ptr =
596       lp_jit_resources_constants(variant->gallivm, variant->resources_type, resources_ptr);
597    LLVMValueRef ssbos_ptr =
598       lp_jit_resources_ssbos(variant->gallivm, variant->resources_type, resources_ptr);
599    struct draw_llvm_variant_key *key = &variant->key;
600 
601    struct lp_build_tgsi_params params;
602    memset(&params, 0, sizeof(params));
603 
604    params.type = vs_type;
605    params.mask = bld_mask;
606    params.consts_ptr = consts_ptr;
607    params.system_values = system_values;
608    params.inputs = inputs;
609    params.num_inputs = key->nr_vertex_elements;
610    params.context_type = variant->context_type;
611    params.context_ptr = context_ptr;
612    params.resources_type = variant->resources_type;
613    params.resources_ptr = resources_ptr;
614    params.sampler = draw_sampler;
615    params.info = &llvm->draw->vs.vertex_shader->info;
616    params.ssbo_ptr = ssbos_ptr;
617    params.image = draw_image;
618    params.aniso_filter_table = lp_jit_resources_aniso_filter_table(variant->gallivm,
619                                                                    variant->resources_type,
620                                                                    resources_ptr);
621 
622    if (llvm->draw->vs.vertex_shader->state.ir.nir &&
623        llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_NIR) {
624       lp_build_nir_soa(variant->gallivm,
625                        llvm->draw->vs.vertex_shader->state.ir.nir,
626                        &params,
627                        outputs);
628    } else {
629       lp_build_tgsi_soa(variant->gallivm,
630                         tokens,
631                         &params,
632                         outputs);
633    }
634 
635    if (clamp_vertex_color) {
636       const struct tgsi_shader_info *info = &llvm->draw->vs.vertex_shader->info;
637       do_clamp_vertex_color(variant->gallivm,
638                             vs_type, info,
639                             outputs);
640    }
641 }
642 
643 
644 static void
fetch_instanced(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type vs_type,LLVMValueRef vb_stride,LLVMValueRef map_ptr,LLVMValueRef buffer_size_adj,LLVMValueRef * inputs,LLVMValueRef index)645 fetch_instanced(struct gallivm_state *gallivm,
646                 const struct util_format_description *format_desc,
647                 struct lp_type vs_type,
648                 LLVMValueRef vb_stride,
649                 LLVMValueRef map_ptr,
650                 LLVMValueRef buffer_size_adj,
651                 LLVMValueRef *inputs,
652                 LLVMValueRef index)
653 {
654    LLVMTypeRef i32_t = LLVMInt32TypeInContext(gallivm->context);
655    LLVMTypeRef aosf_t, aosi_t;
656    LLVMValueRef zero = LLVMConstNull(i32_t);
657    LLVMBuilderRef builder = gallivm->builder;
658    LLVMValueRef stride, buffer_overflowed, aos, index_valid;
659    unsigned i;
660 
661    aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type());
662    aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type());
663 
664    /* This mul can overflow. Wraparound is ok. */
665    stride = LLVMBuildMul(builder, vb_stride, index, "");
666 
667    buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
668                                      stride, buffer_size_adj,
669                                      "buffer_overflowed");
670 
671    if (0) {
672       lp_build_print_value(gallivm, "   instance index = ", index);
673       lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
674    }
675 
676    index_valid = LLVMBuildNot(builder, buffer_overflowed, "");
677    index_valid = LLVMBuildSExt(builder, index_valid, i32_t, "");
678    stride = LLVMBuildAnd(builder, stride, index_valid, "");
679 
680    aos = lp_build_fetch_rgba_aos(gallivm,
681                                  format_desc,
682                                  lp_float32_vec4_type(),
683                                  false,
684                                  map_ptr,
685                                  stride, zero, zero,
686                                  NULL);
687 
688    index_valid = lp_build_broadcast(gallivm, aosi_t, index_valid);
689    aos = LLVMBuildBitCast(builder, aos, aosi_t, "");
690    aos = LLVMBuildAnd(builder, aos, index_valid, "");
691    aos = LLVMBuildBitCast(builder, aos, aosf_t, "");
692 
693    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
694       LLVMValueRef index = lp_build_const_int32(gallivm, i);
695       inputs[i] = lp_build_extract_broadcast(gallivm,
696                                              lp_float32_vec4_type(),
697                                              vs_type, aos, index);
698    }
699 }
700 
701 
702 static void
fetch_vector(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type vs_type,LLVMValueRef vb_stride,LLVMValueRef map_ptr,LLVMValueRef buffer_size_adj,LLVMValueRef * inputs,LLVMValueRef indices)703 fetch_vector(struct gallivm_state *gallivm,
704              const struct util_format_description *format_desc,
705              struct lp_type vs_type,
706              LLVMValueRef vb_stride,
707              LLVMValueRef map_ptr,
708              LLVMValueRef buffer_size_adj,
709              LLVMValueRef *inputs,
710              LLVMValueRef indices)
711 {
712    LLVMBuilderRef builder = gallivm->builder;
713    struct lp_build_context blduivec;
714    struct lp_type fetch_type = vs_type;
715    LLVMValueRef offset, valid_mask;
716    unsigned i;
717 
718    lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
719 
720    vb_stride = lp_build_broadcast_scalar(&blduivec, vb_stride);
721    buffer_size_adj = lp_build_broadcast_scalar(&blduivec, buffer_size_adj);
722 
723    /* This mul can overflow. Wraparound is ok. */
724    offset = lp_build_mul(&blduivec, vb_stride, indices);
725 
726    valid_mask = lp_build_compare(gallivm, blduivec.type,
727                                  PIPE_FUNC_LESS, offset, buffer_size_adj);
728 
729    /* not valid elements use offset 0 */
730    offset = LLVMBuildAnd(builder, offset, valid_mask, "");
731 
732    if (0) {
733       lp_build_print_value(gallivm, "   indices = ", indices);
734       lp_build_print_value(gallivm, "   offsets = ", offset);
735       lp_build_print_value(gallivm, "   valid_mask = ", valid_mask);
736    }
737 
738    /*
739     * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches.
740     * This should always produce better code.
741     */
742 
743    /* The type handling is annoying here... */
744    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
745        format_desc->channel[0].pure_integer) {
746       if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
747          fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length);
748       } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
749          fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length);
750       }
751    }
752 
753    lp_build_fetch_rgba_soa(gallivm, format_desc,
754                            fetch_type, false, map_ptr, offset,
755                            blduivec.zero, blduivec.zero,
756                            NULL, inputs);
757 
758    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
759       inputs[i] = LLVMBuildBitCast(builder, inputs[i],
760                                    lp_build_vec_type(gallivm, vs_type), "");
761    }
762 
763    /* out-of-bound fetches return all zeros */
764    for (i = 0; i < format_desc->nr_channels; i++) {
765       inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, "");
766       inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, "");
767       inputs[i] = LLVMBuildBitCast(builder, inputs[i],
768                                    lp_build_vec_type(gallivm, vs_type), "");
769    }
770 }
771 
772 
773 static void
store_aos(struct gallivm_state * gallivm,bool is_per_prim,LLVMTypeRef io_type,LLVMValueRef io_ptr,LLVMValueRef index,LLVMValueRef value)774 store_aos(struct gallivm_state *gallivm,
775           bool is_per_prim,
776           LLVMTypeRef io_type,
777           LLVMValueRef io_ptr,
778           LLVMValueRef index,
779           LLVMValueRef value)
780 {
781    LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0);
782    LLVMBuilderRef builder = gallivm->builder;
783    LLVMValueRef data_ptr;
784    LLVMTypeRef data_type;
785    LLVMValueRef indices[3];
786 
787    indices[0] = lp_build_const_int32(gallivm, 0);
788    indices[1] = index;
789    indices[2] = lp_build_const_int32(gallivm, 0);
790 
791    if (!is_per_prim) {
792       data_ptr = lp_jit_vertex_header_data(gallivm, io_type, io_ptr);
793       data_type = LLVMStructGetTypeAtIndex(io_type, LP_JIT_VERTEX_HEADER_DATA);
794    } else {
795       data_ptr = io_ptr;
796       data_type = io_type;
797    }
798 
799    data_ptr = LLVMBuildGEP2(builder, data_type, data_ptr, indices, 3, "");
800    data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");
801 
802 #if DEBUG_STORE
803    if (is_per_prim)
804       lp_build_printf(gallivm, "    ---- %p storing prim attribute %d (io = %p)\n", data_ptr, index, io_ptr);
805    else
806       lp_build_printf(gallivm, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
807 #endif
808 
809    /* Unaligned store due to the vertex header */
810    LLVMSetAlignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
811 }
812 
813 
814 /**
815  * Adjust the mask to architecture endianess. The mask will the store in struct:
816  *
817  * struct vertex_header {
818  *    unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
819  *    unsigned edgeflag:1;
820  *    unsigned pad:1;
821  *    unsigned vertex_id:16;
822  *    [...]
823  * }
824  *
825  * On little-endian machine nothing needs to done, however on bit-endian machine
826  * the mask's fields need to be adjusted with the algorithm:
827  *
828  * uint32_t reverse (uint32_t x)
829  * {
830  *   return (x >> 16) |              // vertex_id
831  *          ((x & 0x3fff) << 18) |   // clipmask
832  *          ((x & 0x4000) << 3) |    // edgeflag
833  *          ((x & 0x8000) << 1);     // pad
834  * }
835  */
836 static LLVMValueRef
adjust_mask(struct gallivm_state * gallivm,LLVMValueRef mask)837 adjust_mask(struct gallivm_state *gallivm,
838             LLVMValueRef mask)
839 {
840 #if UTIL_ARCH_BIG_ENDIAN
841    LLVMBuilderRef builder = gallivm->builder;
842    LLVMValueRef vertex_id;
843    LLVMValueRef clipmask;
844    LLVMValueRef pad;
845    LLVMValueRef edgeflag;
846 
847    vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");
848    clipmask  = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");
849    clipmask  = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");
850    if (0) {
851       pad = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");
852       pad = LLVMBuildShl(builder, pad, lp_build_const_int32(gallivm, 1), "");
853    }
854    edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
855    edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 3), "");
856 
857    mask = LLVMBuildOr(builder, vertex_id, clipmask, "");
858    if (0) {
859       mask = LLVMBuildOr(builder, mask, pad, "");
860    }
861    mask = LLVMBuildOr(builder, mask, edgeflag, "");
862 #endif
863    return mask;
864 }
865 
866 
867 void
draw_store_aos_array(struct gallivm_state * gallivm,struct lp_type soa_type,LLVMTypeRef io_type,LLVMValueRef io_ptr,LLVMValueRef * indices,LLVMValueRef * aos,int attrib,LLVMValueRef clipmask,bool need_edgeflag,bool is_per_prim)868 draw_store_aos_array(struct gallivm_state *gallivm,
869                      struct lp_type soa_type,
870                      LLVMTypeRef io_type,
871                      LLVMValueRef io_ptr,
872                      LLVMValueRef *indices,
873                      LLVMValueRef* aos,
874                      int attrib,
875                      LLVMValueRef clipmask,
876                      bool need_edgeflag, bool is_per_prim)
877 {
878    LLVMBuilderRef builder = gallivm->builder;
879    LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
880    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
881    LLVMValueRef linear_inds[LP_MAX_VECTOR_WIDTH / 32];
882    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
883    int vector_length = soa_type.length;
884 
885    assert(TGSI_NUM_CHANNELS == 4);
886 
887    for (int i = 0; i < vector_length; i++) {
888       linear_inds[i] = lp_build_const_int32(gallivm, i);
889       if (indices) {
890          inds[i] = indices[i];
891       } else {
892          inds[i] = linear_inds[i];
893       }
894       io_ptrs[i] = LLVMBuildGEP2(builder, io_type, io_ptr, &inds[i], 1, "");
895    }
896 
897    if (attrib == 0 && !is_per_prim) {
898       /* store vertex header for each of the n vertices */
899       LLVMValueRef val, cliptmp;
900       int vertex_id_pad_edgeflag;
901 
902       /* If this assertion fails, it means we need to update the bit twidding
903        * code here.  See struct vertex_header in draw_private.h.
904        */
905       assert(DRAW_TOTAL_CLIP_PLANES==14);
906       /* initialize vertex id:16 = 0xffff, pad:1 = 0, edgeflag:1 = 1 */
907       if (!need_edgeflag) {
908          vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
909       } else {
910          vertex_id_pad_edgeflag = (0xffff << 16);
911       }
912       if (vector_length == 1)
913          val = lp_build_const_int32(gallivm, vertex_id_pad_edgeflag);
914       else
915          val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type),
916                                       vertex_id_pad_edgeflag);
917 
918       /* OR with the clipmask */
919       cliptmp = LLVMBuildOr(builder, val, clipmask, "");
920       for (unsigned i = 0; i < vector_length; i++) {
921          LLVMValueRef id_ptr = lp_jit_vertex_header_id(gallivm, io_type, io_ptrs[i]);
922          if (vector_length > 1)
923             val = LLVMBuildExtractElement(builder, cliptmp, linear_inds[i], "");
924          else
925             val = cliptmp;
926          val = adjust_mask(gallivm, val);
927 #if DEBUG_STORE
928          lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n",
929                          io_ptrs[i], inds[i], val);
930 #endif
931          LLVMBuildStore(builder, val, id_ptr);
932       }
933    }
934 
935    /* store for each of the n vertices */
936    for (int i = 0; i < vector_length; i++) {
937       store_aos(gallivm, is_per_prim, io_type, io_ptrs[i], attr_index, aos[i]);
938    }
939 }
940 
941 
942 static void
convert_to_aos(struct gallivm_state * gallivm,LLVMTypeRef io_type,LLVMValueRef io,LLVMValueRef * indices,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],LLVMValueRef clipmask,int num_outputs,struct lp_type soa_type,int primid_slot,bool need_edgeflag)943 convert_to_aos(struct gallivm_state *gallivm,
944                LLVMTypeRef io_type,
945                LLVMValueRef io,
946                LLVMValueRef *indices,
947                LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
948                LLVMValueRef clipmask,
949                int num_outputs,
950                struct lp_type soa_type,
951                int primid_slot,
952                bool need_edgeflag)
953 {
954    LLVMBuilderRef builder = gallivm->builder;
955 
956 #if DEBUG_STORE
957    lp_build_printf(gallivm, "   # storing begin\n");
958 #endif
959    for (unsigned attrib = 0; attrib < num_outputs; ++attrib) {
960       LLVMValueRef soa[TGSI_NUM_CHANNELS];
961       LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];
962       for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
963          if (outputs[attrib][chan]) {
964             LLVMTypeRef single_type = (attrib == primid_slot) ? lp_build_int_vec_type(gallivm, soa_type) : lp_build_vec_type(gallivm, soa_type);
965             LLVMValueRef out = LLVMBuildLoad2(builder, single_type, outputs[attrib][chan], "");
966             lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
967 #if DEBUG_STORE
968             lp_build_printf(gallivm, "output %d : %d ",
969                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
970                                          attrib, 0),
971                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
972                                          chan, 0));
973             lp_build_print_value(gallivm, "val = ", out);
974             {
975                LLVMValueRef iv =
976                   LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), "");
977 
978                lp_build_print_value(gallivm, "  ival = ", iv);
979             }
980 #endif
981             soa[chan] = out;
982          } else {
983             soa[chan] = 0;
984          }
985       }
986 
987 
988       if (soa_type.length == TGSI_NUM_CHANNELS) {
989          lp_build_transpose_aos(gallivm, soa_type, soa, aos);
990       } else {
991          lp_build_transpose_aos(gallivm, soa_type, soa, soa);
992 
993          for (unsigned i = 0; i < soa_type.length; ++i) {
994             aos[i] = lp_build_extract_range(gallivm,
995                                             soa[i % TGSI_NUM_CHANNELS],
996                                             (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
997                                             TGSI_NUM_CHANNELS);
998          }
999       }
1000 
1001       draw_store_aos_array(gallivm,
1002                            soa_type,
1003                            io_type,
1004                            io,
1005                            indices,
1006                            aos,
1007                            attrib,
1008                            clipmask,
1009                            need_edgeflag, false);
1010    }
1011 #if DEBUG_STORE
1012    lp_build_printf(gallivm, "   # storing end\n");
1013 #endif
1014 }
1015 
1016 
1017 /**
1018  * Stores original vertex positions in clip coordinates
1019  */
1020 static void
store_clip(struct gallivm_state * gallivm,const struct lp_type vs_type,LLVMTypeRef io_type,LLVMValueRef io_ptr,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],int idx)1021 store_clip(struct gallivm_state *gallivm,
1022            const struct lp_type vs_type,
1023            LLVMTypeRef io_type,
1024            LLVMValueRef io_ptr,
1025            LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1026            int idx)
1027 {
1028    LLVMBuilderRef builder = gallivm->builder;
1029    LLVMValueRef soa[4];
1030    LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];
1031    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1032    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
1033    LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1034    LLVMTypeRef clip_ptr_type =
1035       LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context),
1036                                      4), 0);
1037 
1038    for (int i = 0; i < vs_type.length; i++) {
1039       inds[i] = lp_build_const_int32(gallivm, i);
1040       io_ptrs[i] = LLVMBuildGEP2(builder, io_type, io_ptr, &inds[i], 1, "");
1041    }
1042 
1043    LLVMTypeRef single_type = lp_build_vec_type(gallivm, vs_type);
1044    soa[0] = LLVMBuildLoad2(builder, single_type, outputs[idx][0], ""); /*x0 x1 .. xn*/
1045    soa[1] = LLVMBuildLoad2(builder, single_type, outputs[idx][1], ""); /*y0 y1 .. yn*/
1046    soa[2] = LLVMBuildLoad2(builder, single_type, outputs[idx][2], ""); /*z0 z1 .. zn*/
1047    soa[3] = LLVMBuildLoad2(builder, single_type, outputs[idx][3], ""); /*w0 w1 .. wn*/
1048 
1049    for (int i = 0; i < vs_type.length; i++) {
1050       clip_ptrs[i] = lp_jit_vertex_header_clip_pos(gallivm, io_type, io_ptrs[i]);
1051    }
1052 
1053    lp_build_transpose_aos(gallivm, vs_type, soa, soa);
1054    for (int i = 0; i < vs_type.length; ++i) {
1055       aos[i] = lp_build_extract_range(gallivm,
1056                                       soa[i % TGSI_NUM_CHANNELS],
1057                                       (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
1058                                       TGSI_NUM_CHANNELS);
1059    }
1060 
1061    for (int j = 0; j < vs_type.length; j++) {
1062       LLVMValueRef clip_ptr;
1063 
1064       clip_ptr = LLVMBuildPointerCast(builder, clip_ptrs[j], clip_ptr_type, "");
1065 
1066       /* Unaligned store */
1067       LLVMSetAlignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
1068    }
1069 }
1070 
1071 
1072 /**
1073  * Transforms the outputs for viewport mapping
1074  */
1075 static void
generate_viewport(struct draw_llvm_variant * variant,LLVMBuilderRef builder,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],LLVMValueRef context_ptr)1076 generate_viewport(struct draw_llvm_variant *variant,
1077                   LLVMBuilderRef builder,
1078                   struct lp_type vs_type,
1079                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1080                   LLVMValueRef context_ptr)
1081 {
1082    struct gallivm_state *gallivm = variant->gallivm;
1083    struct lp_type f32_type = vs_type;
1084    const unsigned pos = variant->llvm->draw->vs.position_output;
1085    LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1086    LLVMValueRef out3 = LLVMBuildLoad2(builder, vs_type_llvm, outputs[pos][3], ""); /*w0 w1 .. wn*/
1087    LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
1088    LLVMValueRef vp_ptr = draw_vs_jit_context_viewports(variant, context_ptr);
1089 
1090    /* We treat pipe_viewport_state as a float array */
1091    const int scale_index_offset = offsetof(struct pipe_viewport_state, scale) / sizeof(float);
1092    const int trans_index_offset = offsetof(struct pipe_viewport_state, translate) / sizeof(float);
1093 
1094    /* for 1/w convention*/
1095    out3 = LLVMBuildFDiv(builder, const1, out3, "");
1096    LLVMBuildStore(builder, out3, outputs[pos][3]);
1097 
1098    LLVMTypeRef elem_type = lp_build_elem_type(gallivm, vs_type);
1099 
1100    /* Viewport Mapping */
1101    for (unsigned i = 0; i < 3; i++) {
1102       LLVMValueRef out = LLVMBuildLoad2(builder, vs_type_llvm, outputs[pos][i], ""); /*x0 x1 .. xn*/
1103       LLVMValueRef scale;
1104       LLVMValueRef trans;
1105       LLVMValueRef scale_i;
1106       LLVMValueRef trans_i;
1107       LLVMValueRef index;
1108 
1109       index = lp_build_const_int32(gallivm, i + scale_index_offset);
1110       scale_i = LLVMBuildGEP2(builder, elem_type, vp_ptr, &index, 1, "");
1111 
1112       index = lp_build_const_int32(gallivm, i + trans_index_offset);
1113       trans_i = LLVMBuildGEP2(builder, elem_type, vp_ptr, &index, 1, "");
1114 
1115       scale = lp_build_broadcast(gallivm, vs_type_llvm,
1116                                  LLVMBuildLoad2(builder, elem_type, scale_i, "scale"));
1117       trans = lp_build_broadcast(gallivm, vs_type_llvm,
1118                                  LLVMBuildLoad2(builder, elem_type, trans_i, "trans"));
1119 
1120       /* divide by w */
1121       out = LLVMBuildFMul(builder, out, out3, "");
1122       /* mult by scale, add translation */
1123       out = lp_build_fmuladd(builder, out, scale, trans);
1124 
1125       /* store transformed outputs */
1126       LLVMBuildStore(builder, out, outputs[pos][i]);
1127    }
1128 
1129 }
1130 
1131 
1132 /**
1133  * Returns clipmask as nxi32 bitmask for the n vertices
1134  */
1135 static LLVMValueRef
generate_clipmask(struct draw_llvm * llvm,struct gallivm_state * gallivm,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],struct draw_llvm_variant_key * key,LLVMTypeRef context_type,LLVMValueRef context_ptr,bool * have_clipdist)1136 generate_clipmask(struct draw_llvm *llvm,
1137                   struct gallivm_state *gallivm,
1138                   struct lp_type vs_type,
1139                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1140                   struct draw_llvm_variant_key *key,
1141                   LLVMTypeRef context_type,
1142                   LLVMValueRef context_ptr,
1143                   bool *have_clipdist)
1144 {
1145    LLVMBuilderRef builder = gallivm->builder;
1146    LLVMValueRef mask; /* stores the <nxi32> clipmasks */
1147    LLVMValueRef test, temp;
1148    LLVMValueRef zero, shift;
1149    LLVMValueRef pos_x, pos_y, pos_z, pos_w;
1150    LLVMValueRef cv_x, cv_y, cv_z, cv_w;
1151    LLVMValueRef plane1, planes, plane_ptr;
1152    struct lp_type f32_type = vs_type;
1153    struct lp_type i32_type = lp_int_type(vs_type);
1154    const unsigned pos = llvm->draw->vs.position_output;
1155    const unsigned cv = llvm->draw->vs.clipvertex_output;
1156    int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
1157    bool have_cd = false;
1158    bool clip_user = key->clip_user;
1159    unsigned ucp_enable = key->ucp_enable;
1160    unsigned cd[2];
1161 
1162    cd[0] = llvm->draw->vs.ccdistance_output[0];
1163    cd[1] = llvm->draw->vs.ccdistance_output[1];
1164 
1165    if (cd[0] != pos || cd[1] != pos)
1166       have_cd = true;
1167 
1168    if (num_written_clipdistance && !clip_user) {
1169       clip_user = true;
1170       ucp_enable = (1 << num_written_clipdistance) - 1;
1171    }
1172 
1173    mask = lp_build_const_int_vec(gallivm, i32_type, 0);
1174    temp = lp_build_const_int_vec(gallivm, i32_type, 0);
1175    zero = lp_build_const_vec(gallivm, f32_type, 0);         /* 0.0f 0.0f 0.0f 0.0f */
1176    shift = lp_build_const_int_vec(gallivm, i32_type, 1);    /* 1 1 1 1 */
1177 
1178    LLVMTypeRef vec_type = LLVMTypeOf(zero);
1179 
1180    /*
1181     * load clipvertex and position from correct locations.
1182     * if they are the same just load them once.
1183     */
1184    pos_x = LLVMBuildLoad2(builder, vec_type, outputs[pos][0], ""); /*x0 x1 .. xn */
1185    pos_y = LLVMBuildLoad2(builder, vec_type, outputs[pos][1], ""); /*y0 y1 .. yn */
1186    pos_z = LLVMBuildLoad2(builder, vec_type, outputs[pos][2], ""); /*z0 z1 .. zn */
1187    pos_w = LLVMBuildLoad2(builder, vec_type, outputs[pos][3], ""); /*w0 w1 .. wn */
1188 
1189    if (clip_user && cv != pos) {
1190       cv_x = LLVMBuildLoad2(builder, vec_type, outputs[cv][0], ""); /*x0 x1 .. xn */
1191       cv_y = LLVMBuildLoad2(builder, vec_type, outputs[cv][1], ""); /*y0 y1 .. yn */
1192       cv_z = LLVMBuildLoad2(builder, vec_type, outputs[cv][2], ""); /*z0 z1 .. zn */
1193       cv_w = LLVMBuildLoad2(builder, vec_type, outputs[cv][3], ""); /*w0 w1 .. wn */
1194    } else {
1195       cv_x = pos_x;
1196       cv_y = pos_y;
1197       cv_z = pos_z;
1198       cv_w = pos_w;
1199    }
1200 
1201    /*
1202     * Be careful with the comparisons and NaNs (using llvm's unordered
1203     * comparisons here).
1204     */
1205    /* Cliptest, for hardwired planes */
1206    /*
1207     * XXX should take guardband into account (currently not in key).
1208     * Otherwise might run the draw pipeline stages for nothing.
1209     */
1210    if (key->clip_xy) {
1211       /* plane 1 */
1212       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
1213       temp = shift;
1214       test = LLVMBuildAnd(builder, test, temp, "");
1215       mask = test;
1216 
1217       /* plane 2 */
1218       test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
1219       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1220       temp = LLVMBuildShl(builder, temp, shift, "");
1221       test = LLVMBuildAnd(builder, test, temp, "");
1222       mask = LLVMBuildOr(builder, mask, test, "");
1223 
1224       /* plane 3 */
1225       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
1226       temp = LLVMBuildShl(builder, temp, shift, "");
1227       test = LLVMBuildAnd(builder, test, temp, "");
1228       mask = LLVMBuildOr(builder, mask, test, "");
1229 
1230       /* plane 4 */
1231       test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
1232       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1233       temp = LLVMBuildShl(builder, temp, shift, "");
1234       test = LLVMBuildAnd(builder, test, temp, "");
1235       mask = LLVMBuildOr(builder, mask, test, "");
1236    }
1237 
1238    if (key->clip_z) {
1239       temp = lp_build_const_int_vec(gallivm, i32_type, 16);
1240       if (key->clip_halfz) {
1241          /* plane 5 */
1242          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
1243          test = LLVMBuildAnd(builder, test, temp, "");
1244          mask = LLVMBuildOr(builder, mask, test, "");
1245       } else {
1246          /* plane 5 */
1247          test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
1248          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1249          test = LLVMBuildAnd(builder, test, temp, "");
1250          mask = LLVMBuildOr(builder, mask, test, "");
1251       }
1252       /* plane 6 */
1253       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
1254       temp = LLVMBuildShl(builder, temp, shift, "");
1255       test = LLVMBuildAnd(builder, test, temp, "");
1256       mask = LLVMBuildOr(builder, mask, test, "");
1257    }
1258 
1259    if (clip_user) {
1260       LLVMValueRef planes_ptr = draw_vs_jit_context_planes(gallivm, context_type, context_ptr);
1261       LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
1262       LLVMTypeRef planes_type = LLVMArrayType(LLVMArrayType(float_type, 4), DRAW_TOTAL_CLIP_PLANES);
1263       LLVMValueRef indices[3];
1264       LLVMValueRef is_nan_or_inf;
1265 
1266       /* userclip planes */
1267       while (ucp_enable) {
1268          unsigned plane_idx = ffs(ucp_enable)-1;
1269          ucp_enable &= ~(1 << plane_idx);
1270          plane_idx += 6;
1271 
1272          if (have_cd && num_written_clipdistance) {
1273             LLVMValueRef clipdist;
1274             int i;
1275             i = plane_idx - 6;
1276 
1277             *have_clipdist = true;
1278             if (i < 4) {
1279                clipdist = LLVMBuildLoad2(builder, vec_type, outputs[cd[0]][i], "");
1280             } else {
1281                clipdist = LLVMBuildLoad2(builder, vec_type, outputs[cd[1]][i-4], "");
1282             }
1283             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);
1284             is_nan_or_inf = lp_build_is_inf_or_nan(gallivm, vs_type, clipdist);
1285             test = LLVMBuildOr(builder, test, is_nan_or_inf, "");
1286             temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
1287             test = LLVMBuildAnd(builder, test, temp, "");
1288             mask = LLVMBuildOr(builder, mask, test, "");
1289          } else {
1290             LLVMTypeRef vs_elem_type = lp_build_elem_type(gallivm, vs_type);
1291             LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1292             LLVMValueRef sum = NULL;
1293             indices[0] = lp_build_const_int32(gallivm, 0);
1294             indices[1] = lp_build_const_int32(gallivm, plane_idx);
1295 
1296             for (int i = 0; i < 4; ++i) {
1297                indices[2] = lp_build_const_int32(gallivm, i);
1298                plane_ptr = LLVMBuildGEP2(builder, planes_type, planes_ptr, indices, 3, "");
1299                plane1 = LLVMBuildLoad2(builder, vs_elem_type, plane_ptr,
1300                                        (const char *[]){"plane_x", "plane_y", "plane_z", "plane_w"}[i]);
1301                planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
1302                if (i == 0) {
1303                   sum = LLVMBuildFMul(builder, planes, cv_x, "");
1304                } else {
1305                   sum = lp_build_fmuladd(builder, planes,
1306                                          (LLVMValueRef[]){cv_x, cv_y, cv_z, cv_w}[i], sum);
1307                }
1308             }
1309 
1310             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
1311             temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
1312             test = LLVMBuildAnd(builder, test, temp, "");
1313             mask = LLVMBuildOr(builder, mask, test, "");
1314          }
1315       }
1316    }
1317    if (key->need_edgeflags) {
1318       /*
1319        * This isn't really part of clipmask but stored the same in vertex
1320        * header later, so do it here.
1321        */
1322       unsigned edge_attr = llvm->draw->vs.edgeflag_output;
1323       LLVMValueRef one = lp_build_const_vec(gallivm, f32_type, 1.0);
1324       LLVMValueRef edgeflag = LLVMBuildLoad2(builder, vec_type, outputs[edge_attr][0], "");
1325       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_EQUAL, one, edgeflag);
1326       temp = lp_build_const_int_vec(gallivm, i32_type,
1327                                     1LL << DRAW_TOTAL_CLIP_PLANES);
1328       test = LLVMBuildAnd(builder, test, temp, "");
1329       mask = LLVMBuildOr(builder, mask, test, "");
1330    }
1331    return mask;
1332 }
1333 
1334 
1335 /**
1336  * Returns boolean if any clipping has occurred
1337  * Used zero/one i8 value to represent boolean
1338  */
1339 static LLVMValueRef
clipmask_booli8(struct gallivm_state * gallivm,const struct lp_type vs_type,LLVMTypeRef clipmask_bool_type,LLVMValueRef clipmask_bool_ptr,bool edgeflag_in_clipmask)1340 clipmask_booli8(struct gallivm_state *gallivm,
1341                 const struct lp_type vs_type,
1342                 LLVMTypeRef clipmask_bool_type,
1343                 LLVMValueRef clipmask_bool_ptr,
1344                 bool edgeflag_in_clipmask)
1345 {
1346    LLVMBuilderRef builder = gallivm->builder;
1347    LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
1348    LLVMValueRef clipmask_bool = LLVMBuildLoad2(builder, clipmask_bool_type, clipmask_bool_ptr, "");
1349    LLVMValueRef ret;
1350    struct lp_build_context bldivec;
1351 
1352    lp_build_context_init(&bldivec, gallivm, lp_int_type(vs_type));
1353 
1354    /*
1355     * We need to invert the edgeflag bit from the clipmask here
1356     * (because the result is really if we want to run the pipeline or not
1357     * and we (may) need it if edgeflag was 0).
1358     */
1359    if (edgeflag_in_clipmask) {
1360       LLVMValueRef edge = lp_build_const_int_vec(gallivm, bldivec.type,
1361                                                  1LL << DRAW_TOTAL_CLIP_PLANES);
1362       clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, "");
1363    }
1364 
1365    /*
1366     * XXX: probably should mask off bits from the mask which come from
1367     * vertices which were beyond the count (i.e. indices_valid for
1368     * linear fetches, for elts ones we don't have the correct mask
1369     * right now). Otherwise might run the pipeline for nothing,
1370     * though everything should still work.
1371     */
1372    ret = lp_build_any_true_range(&bldivec, vs_type.length, clipmask_bool);
1373    ret = LLVMBuildZExt(builder, ret, int8_type, "");
1374    return ret;
1375 }
1376 
1377 
1378 static LLVMValueRef
draw_gs_llvm_fetch_input(const struct lp_build_gs_iface * gs_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,LLVMValueRef swizzle_index)1379 draw_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
1380                          struct lp_build_context * bld,
1381                          bool is_vindex_indirect,
1382                          LLVMValueRef vertex_index,
1383                          bool is_aindex_indirect,
1384                          LLVMValueRef attrib_index,
1385                          LLVMValueRef swizzle_index)
1386 {
1387    const struct draw_gs_llvm_iface *gs = draw_gs_llvm_iface(gs_iface);
1388    struct gallivm_state *gallivm = bld->gallivm;
1389    LLVMBuilderRef builder = gallivm->builder;
1390    LLVMValueRef indices[3];
1391    LLVMValueRef res;
1392    struct lp_type type = bld->type;
1393 
1394    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
1395    LLVMTypeRef channel_vec_type = LLVMVectorType(float_type, TGSI_NUM_CHANNELS);
1396    LLVMTypeRef input_array_type = create_gs_jit_input_type_deref(gallivm);
1397 
1398    if (is_vindex_indirect || is_aindex_indirect) {
1399       res = bld->zero;
1400       for (int i = 0; i < type.length; ++i) {
1401          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
1402          LLVMValueRef vert_chan_index = vertex_index;
1403          LLVMValueRef attr_chan_index = attrib_index;
1404          LLVMValueRef channel_vec, value;
1405 
1406          if (is_vindex_indirect) {
1407             vert_chan_index = LLVMBuildExtractElement(builder,
1408                                                       vertex_index, idx, "");
1409          }
1410          if (is_aindex_indirect) {
1411             attr_chan_index = LLVMBuildExtractElement(builder,
1412                                                       attrib_index, idx, "");
1413          }
1414 
1415          indices[0] = vert_chan_index;
1416          indices[1] = attr_chan_index;
1417          indices[2] = swizzle_index;
1418 
1419          channel_vec = LLVMBuildGEP2(builder, input_array_type, gs->input, indices, 3, "");
1420          channel_vec = LLVMBuildLoad2(builder, channel_vec_type, channel_vec, "");
1421          value = LLVMBuildExtractElement(builder, channel_vec, idx, "");
1422 
1423          res = LLVMBuildInsertElement(builder, res, value, idx, "");
1424       }
1425    } else {
1426       indices[0] = vertex_index;
1427       indices[1] = attrib_index;
1428       indices[2] = swizzle_index;
1429 
1430       res = LLVMBuildGEP2(builder, input_array_type, gs->input, indices, 3, "");
1431       res = LLVMBuildLoad2(builder, channel_vec_type, res, "");
1432    }
1433 
1434    return res;
1435 }
1436 
1437 
1438 static void
draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface * gs_base,struct lp_build_context * bld,LLVMValueRef (* outputs)[4],LLVMValueRef emitted_vertices_vec,LLVMValueRef mask_vec,LLVMValueRef stream_id)1439 draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
1440                          struct lp_build_context * bld,
1441                          LLVMValueRef (*outputs)[4],
1442                          LLVMValueRef emitted_vertices_vec,
1443                          LLVMValueRef mask_vec, LLVMValueRef stream_id)
1444 {
1445    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1446    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1447    struct gallivm_state *gallivm = variant->gallivm;
1448    LLVMBuilderRef builder = gallivm->builder;
1449    struct lp_type gs_type = bld->type;
1450    LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
1451                                                   lp_int_type(gs_type), 0);
1452    LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
1453    LLVMValueRef next_prim_offset =
1454       lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);
1455    LLVMValueRef io = variant->io_ptr;
1456    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
1457 
1458    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
1459    for (unsigned i = 0; i < gs_type.length; ++i) {
1460       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1461       LLVMValueRef currently_emitted =
1462          LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
1463       indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
1464       indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
1465       indices[i] = LLVMBuildSelect(builder, LLVMBuildExtractElement(builder, cond, ind, ""), indices[i],
1466                                    lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary - 1), "");
1467    }
1468 
1469    LLVMValueRef stream_idx = LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), "");
1470    LLVMValueRef cnd = LLVMBuildICmp(builder, LLVMIntULT, stream_idx, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");
1471    struct lp_build_if_state if_ctx;
1472    lp_build_if(&if_ctx, gallivm, cnd);
1473    io = lp_build_pointer_get2(builder, variant->vertex_header_ptr_type,
1474                               io, LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), ""));
1475 
1476    if (variant->key.clamp_vertex_color) {
1477       do_clamp_vertex_color(gallivm, gs_type,
1478                             gs_info, outputs);
1479    }
1480    convert_to_aos(gallivm, variant->vertex_header_type,
1481                   io, indices,
1482                   outputs, clipmask,
1483                   gs_info->num_outputs, gs_type,
1484                   -1,
1485                   false);
1486    lp_build_endif(&if_ctx);
1487 }
1488 
1489 
1490 static void
draw_gs_llvm_end_primitive(const struct lp_build_gs_iface * gs_base,struct lp_build_context * bld,LLVMValueRef total_emitted_vertices_vec_ptr,LLVMValueRef verts_per_prim_vec,LLVMValueRef emitted_prims_vec,LLVMValueRef mask_vec,unsigned stream)1491 draw_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
1492                            struct lp_build_context * bld,
1493                            LLVMValueRef total_emitted_vertices_vec_ptr,
1494                            LLVMValueRef verts_per_prim_vec,
1495                            LLVMValueRef emitted_prims_vec,
1496                            LLVMValueRef mask_vec, unsigned stream)
1497 {
1498    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1499    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1500    struct gallivm_state *gallivm = variant->gallivm;
1501    LLVMBuilderRef builder = gallivm->builder;
1502    LLVMValueRef prim_lengts_ptr =
1503       draw_gs_jit_prim_lengths(variant, variant->context_ptr);
1504 
1505    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
1506    for (unsigned i = 0; i < bld->type.length; ++i) {
1507       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1508       LLVMValueRef prims_emitted =
1509          LLVMBuildExtractElement(builder, emitted_prims_vec, ind, "");
1510       LLVMValueRef store_ptr;
1511       LLVMValueRef num_vertices =
1512          LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");
1513 
1514       LLVMValueRef this_cond = LLVMBuildExtractElement(gallivm->builder, cond, ind, "");
1515       struct lp_build_if_state ifthen;
1516       lp_build_if(&ifthen, gallivm, this_cond);
1517       prims_emitted = LLVMBuildMul(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");
1518       prims_emitted = LLVMBuildAdd(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, stream), "");
1519       LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
1520       LLVMTypeRef prim_lengths_type = LLVMPointerType(int_type, 0);
1521       store_ptr = LLVMBuildGEP2(builder, prim_lengths_type, prim_lengts_ptr, &prims_emitted, 1, "");
1522       store_ptr = LLVMBuildLoad2(builder, prim_lengths_type, store_ptr, "");
1523       store_ptr = LLVMBuildGEP2(builder, int_type, store_ptr, &ind, 1, "");
1524       LLVMBuildStore(builder, num_vertices, store_ptr);
1525       lp_build_endif(&ifthen);
1526    }
1527 }
1528 
1529 
1530 static void
draw_gs_llvm_epilogue(const struct lp_build_gs_iface * gs_base,LLVMValueRef total_emitted_vertices_vec,LLVMValueRef emitted_prims_vec,unsigned stream)1531 draw_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
1532                       LLVMValueRef total_emitted_vertices_vec,
1533                       LLVMValueRef emitted_prims_vec, unsigned stream)
1534 {
1535    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1536    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1537    struct gallivm_state *gallivm = variant->gallivm;
1538    LLVMBuilderRef builder = gallivm->builder;
1539    LLVMValueRef emitted_verts_ptr =
1540       draw_gs_jit_emitted_vertices(variant, variant->context_ptr);
1541    LLVMValueRef emitted_prims_ptr =
1542       draw_gs_jit_emitted_prims(variant, variant->context_ptr);
1543    LLVMValueRef stream_val = lp_build_const_int32(gallivm, stream);
1544 
1545    emitted_verts_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(total_emitted_vertices_vec), emitted_verts_ptr, &stream_val, 1, "");
1546    emitted_prims_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(emitted_prims_vec), emitted_prims_ptr, &stream_val, 1, "");
1547 
1548    LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);
1549    LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);
1550 }
1551 
1552 
1553 static void
draw_llvm_generate(struct draw_llvm * llvm,struct draw_llvm_variant * variant)1554 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1555 {
1556    struct gallivm_state *gallivm = variant->gallivm;
1557    LLVMContextRef context = gallivm->context;
1558    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
1559    LLVMTypeRef arg_types[14];
1560    unsigned num_arg_types = ARRAY_SIZE(arg_types);
1561    LLVMTypeRef func_type;
1562    LLVMValueRef context_ptr;
1563    LLVMValueRef resources_ptr;
1564    LLVMBasicBlockRef block;
1565    LLVMBuilderRef builder;
1566    char func_name[64];
1567    struct lp_type vs_type;
1568    LLVMValueRef count, fetch_elts, start;
1569    LLVMValueRef vertex_id_offset;
1570    LLVMValueRef stride, step, io_itr;
1571    LLVMValueRef ind_vec, start_vec, have_elts, fetch_max, tmp;
1572    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1573    LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS];
1574    LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS];
1575    LLVMValueRef buffer_size_adj[PIPE_MAX_ATTRIBS];
1576    LLVMValueRef instance_index[PIPE_MAX_ATTRIBS];
1577    LLVMValueRef fake_buf_ptr, fake_buf;
1578 
1579    struct draw_context *draw = llvm->draw;
1580    const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
1581    unsigned i, j;
1582    struct lp_build_context bld, blduivec;
1583    struct lp_build_loop_state lp_loop;
1584    struct lp_build_if_state if_ctx;
1585    const int vector_length = lp_native_vector_width / 32;
1586    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1587    struct lp_build_sampler_soa *sampler = 0;
1588    struct lp_build_image_soa *image = NULL;
1589    LLVMValueRef ret, clipmask_bool_ptr;
1590    struct draw_llvm_variant_key *key = &variant->key;
1591    /* If geometry shader is present we need to skip both the viewport
1592     * transformation and clipping otherwise the inputs to the geometry
1593     * shader will be incorrect.
1594     * The code can't handle vp transform when vs writes vp index neither
1595     * (though this would be fixable here, but couldn't just broadcast
1596     * the values).
1597     */
1598    const bool bypass_viewport = key->has_gs_or_tes || key->bypass_viewport ||
1599                                 vs_info->writes_viewport_index;
1600    const bool enable_cliptest = !key->has_gs_or_tes && (key->clip_xy ||
1601                                                     key->clip_z ||
1602                                                     key->clip_user ||
1603                                                     key->need_edgeflags);
1604    LLVMValueRef variant_func;
1605    const unsigned pos = draw->vs.position_output;
1606    const unsigned cv = draw->vs.clipvertex_output;
1607    bool have_clipdist = false;
1608    struct lp_bld_tgsi_system_values system_values;
1609 
1610    memset(&system_values, 0, sizeof(system_values));
1611    memset(&outputs, 0, sizeof(outputs));
1612    snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant");
1613 
1614    i = 0;
1615    arg_types[i++] = get_context_ptr_type(variant);       /* context */
1616    arg_types[i++] = variant->resources_ptr_type;       /* context */
1617    arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */
1618    arg_types[i++] = get_buffer_ptr_type(variant);        /* vbuffers */
1619    arg_types[i++] = int32_type;                          /* count */
1620    arg_types[i++] = int32_type;                          /* start/fetch_elt_max */
1621    arg_types[i++] = int32_type;                          /* stride */
1622    arg_types[i++] = get_vb_ptr_type(variant);            /* pipe_vertex_buffer's */
1623    arg_types[i++] = int32_type;                          /* instance_id */
1624    arg_types[i++] = int32_type;                          /* vertex_id_offset */
1625    arg_types[i++] = int32_type;                          /* start_instance */
1626    arg_types[i++] = LLVMPointerType(int32_type, 0);      /* fetch_elts  */
1627    arg_types[i++] = int32_type;                          /* draw_id */
1628    arg_types[i++] = int32_type;                          /* view_id */
1629    assert(i == ARRAY_SIZE(arg_types));
1630 
1631    func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
1632                                 arg_types, num_arg_types, 0);
1633 
1634    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
1635    variant->function = variant_func;
1636 
1637    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
1638    for (i = 0; i < num_arg_types; ++i)
1639       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1640          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
1641 
1642    if (gallivm->cache && gallivm->cache->data_size)
1643       return;
1644 
1645    context_ptr               = LLVMGetParam(variant_func, 0);
1646    resources_ptr             = LLVMGetParam(variant_func, 1);
1647    io_ptr                    = LLVMGetParam(variant_func, 2);
1648    vbuffers_ptr              = LLVMGetParam(variant_func, 3);
1649    count                     = LLVMGetParam(variant_func, 4);
1650    start                     = LLVMGetParam(variant_func, 5);
1651    /*
1652     * XXX: stride is actually unused. The stride we use is strictly calculated
1653     * from the number of outputs (including the draw_extra outputs).
1654     * Should probably fix some day (we need a new vs just because of extra
1655     * outputs which the generated vs won't touch).
1656     */
1657    stride                    = LLVMGetParam(variant_func, 6);
1658    vb_ptr                    = LLVMGetParam(variant_func, 7);
1659    system_values.instance_id = LLVMGetParam(variant_func, 8);
1660    vertex_id_offset          = LLVMGetParam(variant_func, 9);
1661    system_values.base_instance = LLVMGetParam(variant_func, 10);
1662    fetch_elts                = LLVMGetParam(variant_func, 11);
1663    system_values.draw_id     = LLVMGetParam(variant_func, 12);
1664    system_values.view_index  = LLVMGetParam(variant_func, 13);
1665 
1666    lp_build_name(context_ptr, "context");
1667    lp_build_name(resources_ptr, "resources");
1668    lp_build_name(io_ptr, "io");
1669    lp_build_name(vbuffers_ptr, "vbuffers");
1670    lp_build_name(count, "count");
1671    lp_build_name(start, "start");
1672    lp_build_name(stride, "stride");
1673    lp_build_name(vb_ptr, "vb");
1674    lp_build_name(system_values.instance_id, "instance_id");
1675    lp_build_name(vertex_id_offset, "vertex_id_offset");
1676    lp_build_name(system_values.base_instance, "start_instance");
1677    lp_build_name(fetch_elts, "fetch_elts");
1678    lp_build_name(system_values.draw_id, "draw_id");
1679 
1680    /*
1681     * Function body
1682     */
1683 
1684    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
1685    builder = gallivm->builder;
1686    LLVMPositionBuilderAtEnd(builder, block);
1687 
1688    memset(&vs_type, 0, sizeof vs_type);
1689    vs_type.floating = true; /* floating point values */
1690    vs_type.sign = true;     /* values are signed */
1691    vs_type.norm = false;    /* values are not limited to [0,1] or [-1,1] */
1692    vs_type.width = 32;      /* 32-bit float */
1693    vs_type.length = vector_length;
1694 
1695    lp_build_context_init(&bld, gallivm, lp_type_uint(32));
1696    lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
1697 
1698    /* hold temporary "bool" clipmask */
1699    clipmask_bool_ptr = lp_build_alloca(gallivm, blduivec.vec_type, "");
1700 
1701    fake_buf = lp_build_alloca_undef(gallivm,
1702                  LLVMVectorType(LLVMInt64TypeInContext(context), 4), "");
1703    fake_buf = LLVMBuildBitCast(builder, fake_buf,
1704                  LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
1705    fake_buf_ptr = LLVMBuildGEP2(builder, LLVMInt8TypeInContext(context), fake_buf, &bld.zero, 1, "");
1706 
1707    /* code generated texture sampling */
1708    sampler = lp_bld_llvm_sampler_soa_create(draw_llvm_variant_key_samplers(key),
1709                                           MAX2(key->nr_samplers,
1710                                                key->nr_sampler_views));
1711    image = lp_bld_llvm_image_soa_create(draw_llvm_variant_key_images(key),
1712                                       key->nr_images);
1713 
1714    step = lp_build_const_int32(gallivm, vector_length);
1715 
1716    ind_vec = blduivec.undef;
1717    for (i = 0; i < vs_type.length; i++) {
1718       LLVMValueRef index = lp_build_const_int32(gallivm, i);
1719       ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, "");
1720    }
1721 
1722    have_elts = LLVMBuildICmp(builder, LLVMIntNE,
1723                              LLVMConstPointerNull(arg_types[11]), fetch_elts, "");
1724 
1725    fetch_max = LLVMBuildSub(builder, count, bld.one, "fetch_max");
1726    fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max);
1727    /*
1728     * Only needed for non-indexed path.
1729     */
1730    start_vec = lp_build_broadcast_scalar(&blduivec, start);
1731 
1732    /*
1733     * Pre-calculate everything which is constant per shader invocation.
1734     */
1735    for (j = 0; j < key->nr_vertex_elements; ++j) {
1736       LLVMValueRef vb_buffer_offset, buffer_size, temp_ptr;
1737       LLVMValueRef vb_info, vbuffer_ptr, buf_offset, ofbit;
1738       struct pipe_vertex_element *velem = &key->vertex_element[j];
1739       LLVMValueRef vb_index =
1740          lp_build_const_int32(gallivm, velem->vertex_buffer_index);
1741       LLVMValueRef bsize = lp_build_const_int32(gallivm,
1742                                                 util_format_get_blocksize(velem->src_format));
1743       LLVMValueRef src_offset = lp_build_const_int32(gallivm,
1744                                                      velem->src_offset);
1745       LLVMValueRef src_stride = lp_build_const_int32(gallivm,
1746                                                      velem->src_stride);
1747       struct lp_build_if_state if_ctx;
1748 
1749       if (velem->src_format != PIPE_FORMAT_NONE) {
1750          vbuffer_ptr = LLVMBuildGEP2(builder, variant->buffer_type, vbuffers_ptr, &vb_index, 1, "");
1751          vb_info = LLVMBuildGEP2(builder, variant->vb_type, vb_ptr, &vb_index, 1, "");
1752          vb_stride[j] = src_stride;
1753          vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, variant->vb_type, vb_info);
1754          map_ptr[j] = draw_jit_dvbuffer_map(gallivm, variant->buffer_type, vbuffer_ptr);
1755          buffer_size = draw_jit_dvbuffer_size(gallivm, variant->buffer_type, vbuffer_ptr);
1756 
1757          ofbit = NULL;
1758          /*
1759           * We'll set buffer_size_adj to zero if we have of, so it will
1760           * always overflow later automatically without having to keep ofbit.
1761           * Overflows (with normal wraparound) doing the actual offset
1762           * calculation should be ok, just not for the buffer size calc.
1763           * It would also be possible to detect such overflows and return
1764           * zeros if that happens, but this would be more complex.
1765           */
1766          buf_offset = lp_build_add(&bld, vb_buffer_offset, src_offset);
1767          tmp = lp_build_sub(&bld, bsize, bld.one);
1768          buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp,
1769                                                      &ofbit);
1770          buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size_adj[j],
1771                                                      buf_offset, &ofbit);
1772 
1773          /*
1774           * We can't easily set fake vertex buffers outside the generated code.
1775           * Hence, set fake vertex buffers here instead basically, so fetch
1776           * code can always fetch using offset 0, eliminating all control flow
1777           * inside the main loop.
1778           * (Alternatively, could have control flow per vector skipping fetch
1779           * if ofbit is true.)
1780           */
1781          if (velem->instance_divisor) {
1782             /*
1783              * Index is equal to the start instance plus the number of current
1784              * instance divided by the divisor. In this case we compute it as:
1785              * index = start_instance + (instance_id  / divisor).
1786              * Note we could actually do the fetch here, outside the loop -
1787              * it's all constant, hopefully llvm recognizes this.
1788              */
1789             LLVMValueRef current_instance;
1790             current_instance = LLVMBuildUDiv(builder, system_values.instance_id,
1791                                              lp_build_const_int32(gallivm,
1792                                                                   velem->instance_divisor),
1793                                              "instance_divisor");
1794             instance_index[j] = lp_build_uadd_overflow(gallivm, system_values.base_instance,
1795                                                        current_instance, &ofbit);
1796          }
1797 
1798          buffer_size_adj[j] = LLVMBuildSelect(builder, ofbit, bld.zero,
1799                                               buffer_size_adj[j], "");
1800 
1801          LLVMTypeRef byte_type = LLVMInt8TypeInContext(context);
1802          LLVMTypeRef byte_ptr_type = LLVMPointerType(byte_type, 0);
1803          temp_ptr = lp_build_alloca_undef(gallivm, byte_ptr_type, "");
1804 
1805          lp_build_if(&if_ctx, gallivm, ofbit);
1806          {
1807             LLVMBuildStore(builder, fake_buf_ptr, temp_ptr);
1808          }
1809          lp_build_else(&if_ctx);
1810          {
1811             map_ptr[j] = LLVMBuildGEP2(builder, byte_type, map_ptr[j], &buf_offset, 1, "");
1812             LLVMBuildStore(builder, map_ptr[j], temp_ptr);
1813          }
1814          lp_build_endif(&if_ctx);
1815          map_ptr[j] = LLVMBuildLoad2(builder, byte_ptr_type, temp_ptr, "map_ptr");
1816 
1817          if (0) {
1818             lp_build_printf(gallivm, "velem %d, vbuf index = %u, vb_stride = %u\n",
1819                             lp_build_const_int32(gallivm, j),
1820                             vb_index, vb_stride[j]);
1821             lp_build_printf(gallivm,
1822                             "   vb_buffer_offset = %u, src_offset = %u, buf_offset = %u\n",
1823                             vb_buffer_offset, src_offset, buf_offset);
1824             lp_build_printf(gallivm, "   buffer size = %u, blocksize = %u\n",
1825                             buffer_size, bsize);
1826             lp_build_printf(gallivm, "   instance_id = %u\n", system_values.instance_id);
1827          }
1828       }
1829    }
1830 
1831    lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
1832    {
1833       LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
1834       LLVMValueRef io;
1835       LLVMValueRef clipmask;   /* holds the clipmask value */
1836       LLVMValueRef true_index_array, index_store;
1837       const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
1838 
1839       io_itr = lp_loop.counter;
1840 
1841       io = LLVMBuildGEP2(builder, variant->vertex_header_type, io_ptr, &io_itr, 1, "");
1842 #if DEBUG_STORE
1843       lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",
1844                       io_itr, io, lp_loop.counter);
1845 #endif
1846 
1847       true_index_array = lp_build_broadcast_scalar(&blduivec, lp_loop.counter);
1848       true_index_array = LLVMBuildAdd(builder, true_index_array, ind_vec, "");
1849 
1850       LLVMValueRef exec_mask = lp_build_cmp(&blduivec, PIPE_FUNC_LEQUAL, true_index_array, fetch_max);
1851       /*
1852        * Limit indices to fetch_max, otherwise might try to access indices
1853        * beyond index buffer (or rather vsplit elt buffer) size.
1854        * Could probably safely (?) skip this for non-indexed draws and
1855        * simplify things minimally (by removing it could combine the ind_vec
1856        * and start_vec adds). I think the only effect for non-indexed draws will
1857        * be that for the invalid elements they will be all fetched from the
1858        * same location as the last valid one, but noone should really care.
1859        */
1860       true_index_array = lp_build_min(&blduivec, true_index_array, fetch_max);
1861 
1862       index_store = lp_build_alloca_undef(gallivm, blduivec.vec_type, "index_store");
1863 
1864       lp_build_if(&if_ctx, gallivm, have_elts);
1865       {
1866          /*
1867           * Note: you'd expect some comparison/clamp against fetch_elt_max
1868           * here.
1869           * There used to be one here but it was incorrect: overflow was
1870           * detected if index > fetch_elt_max - but the correct condition
1871           * would be index >= fetch_elt_max (since this is just size of elts
1872           * buffer / element size).
1873           * Using the correct condition however will cause failures - due to
1874           * vsplit/vcache code which rebases indices. So, as an example, if
1875           * fetch_elt_max is just 1 and fetch_count 2, vsplit cache will
1876           * replace all invalid indices with 0 - which in case of elt_bias
1877           * not being zero will get a different fetch index than the valid
1878           * index 0. So, just rely on vsplit code preventing out-of-bounds
1879           * fetches. This is also why it's safe to do elts fetch even if there
1880           * was no index buffer bound - the real buffer is never seen here, at
1881           * least not if there are index buffer overflows...
1882           */
1883 
1884          /*
1885           * XXX should not have to do this, as scale can be handled
1886           * natively by loads (hits asserts though).
1887           */
1888          tmp = lp_build_shl_imm(&blduivec, true_index_array, 2);
1889          fetch_elts = LLVMBuildBitCast(builder, fetch_elts,
1890                                        LLVMPointerType(LLVMInt8TypeInContext(context),
1891                                                        0), "");
1892          tmp = lp_build_gather(gallivm, vs_type.length,
1893                                32, bld.type, true,
1894                                fetch_elts, tmp, false);
1895          LLVMBuildStore(builder, tmp, index_store);
1896       }
1897       lp_build_else(&if_ctx);
1898       {
1899          tmp = LLVMBuildAdd(builder, true_index_array, start_vec, "");
1900          LLVMBuildStore(builder, tmp, index_store);
1901       }
1902       lp_build_endif(&if_ctx);
1903 
1904       true_index_array = LLVMBuildLoad2(builder, blduivec.vec_type, index_store, "");
1905 
1906       for (j = 0; j < key->nr_vertex_elements; ++j) {
1907          struct pipe_vertex_element *velem = &key->vertex_element[j];
1908          const struct util_format_description *format_desc =
1909             util_format_description(velem->src_format);
1910 
1911          if (format_desc->format == PIPE_FORMAT_NONE) {
1912             for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
1913                inputs[j][i] = lp_build_zero(gallivm, vs_type);
1914             }
1915          } else if (velem->instance_divisor) {
1916             fetch_instanced(gallivm, format_desc, vs_type,
1917                             vb_stride[j], map_ptr[j],
1918                             buffer_size_adj[j],
1919                             inputs[j], instance_index[j]);
1920          } else {
1921             fetch_vector(gallivm, format_desc, vs_type,
1922                          vb_stride[j], map_ptr[j],
1923                          buffer_size_adj[j],
1924                          inputs[j], true_index_array);
1925          }
1926       }
1927 
1928       struct lp_build_mask_context mask;
1929 
1930       lp_build_mask_begin(&mask, gallivm, vs_type, exec_mask);
1931       /* In the paths with elts vertex id has to be unaffected by the
1932        * index bias and because indices inside our elements array have
1933        * already had index bias applied we need to subtract it here to
1934        * get back to the original index.
1935        * In the linear paths vertex id has to be unaffected by the
1936        * original start index and because we abuse the 'start' variable
1937        * to either represent the actual start index or the index at which
1938        * the primitive was split (we split rendering into chunks of at
1939        * most 4095-vertices) we need to back out the original start
1940        * index out of our vertex id here.
1941        * for ARB_shader_draw_parameters, base_vertex should be 0 for
1942        * non-indexed draws.
1943        */
1944       LLVMValueRef base_vertex = lp_build_select(&bld, have_elts, vertex_id_offset, lp_build_const_int32(gallivm, 0));
1945       system_values.basevertex = lp_build_broadcast_scalar(&blduivec, base_vertex);
1946 
1947       /* first vertex is for Vulkan base vertex support */
1948       LLVMValueRef first_vertex = vertex_id_offset;
1949       system_values.firstvertex = lp_build_broadcast_scalar(&blduivec, first_vertex);
1950 
1951       system_values.vertex_id = true_index_array;
1952       system_values.vertex_id_nobase = LLVMBuildSub(builder, true_index_array,
1953                                                     lp_build_broadcast_scalar(&blduivec, vertex_id_offset), "");
1954 
1955       ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;
1956       generate_vs(variant,
1957                   builder,
1958                   vs_type,
1959                   outputs,
1960                   ptr_aos,
1961                   &system_values,
1962                   context_ptr,
1963                   resources_ptr,
1964                   sampler,
1965                   image,
1966                   key->clamp_vertex_color,
1967                   &mask);
1968 
1969       lp_build_mask_end(&mask);
1970       if (pos != -1 && cv != -1) {
1971          /* store original positions in clip before further manipulation */
1972          store_clip(gallivm, vs_type, variant->vertex_header_type, io, outputs, pos);
1973 
1974          /* do cliptest */
1975          if (enable_cliptest) {
1976             LLVMValueRef temp = LLVMBuildLoad2(builder, blduivec.vec_type, clipmask_bool_ptr, "");
1977             /* allocate clipmask, assign it integer type */
1978             clipmask = generate_clipmask(llvm,
1979                                          gallivm,
1980                                          vs_type,
1981                                          outputs,
1982                                          key,
1983                                          variant->context_type,
1984                                          context_ptr, &have_clipdist);
1985             temp = LLVMBuildOr(builder, clipmask, temp, "");
1986             /* store temporary clipping boolean value */
1987             LLVMBuildStore(builder, temp, clipmask_bool_ptr);
1988          } else {
1989             clipmask = blduivec.zero;
1990          }
1991 
1992          /* do viewport mapping */
1993          if (!bypass_viewport) {
1994             generate_viewport(variant, builder, vs_type, outputs, context_ptr);
1995          }
1996       } else {
1997          clipmask = blduivec.zero;
1998       }
1999 
2000       /* store clipmask in vertex header,
2001        * original positions in clip
2002        * and transformed positions in data
2003        */
2004       convert_to_aos(gallivm, variant->vertex_header_type, io, NULL, outputs, clipmask,
2005                      vs_info->num_outputs, vs_type, -1,
2006                      enable_cliptest && key->need_edgeflags);
2007    }
2008    lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
2009 
2010    lp_bld_llvm_sampler_soa_destroy(sampler);
2011    lp_bld_llvm_image_soa_destroy(image);
2012 
2013    /* return clipping boolean value for function */
2014    ret = clipmask_booli8(gallivm, vs_type, blduivec.vec_type, clipmask_bool_ptr,
2015                          enable_cliptest && key->need_edgeflags);
2016 
2017    LLVMBuildRet(builder, ret);
2018 
2019    gallivm_verify_function(gallivm, variant_func);
2020 }
2021 
2022 
2023 struct draw_llvm_variant_key *
draw_llvm_make_variant_key(struct draw_llvm * llvm,char * store)2024 draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
2025 {
2026    struct draw_llvm_variant_key *key;
2027    struct lp_sampler_static_state *draw_sampler;
2028    struct lp_image_static_state *draw_image;
2029 
2030    key = (struct draw_llvm_variant_key *)store;
2031 
2032    memset(key, 0, offsetof(struct draw_llvm_variant_key, vertex_element[0]));
2033 
2034 
2035    /* will have to rig this up properly later */
2036    key->clip_xy = llvm->draw->clip_xy;
2037    key->clip_z = llvm->draw->clip_z;
2038    key->clip_user = llvm->draw->clip_user;
2039    key->bypass_viewport = llvm->draw->bypass_viewport;
2040    key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
2041    /* XXX assumes edgeflag output not at 0 */
2042    key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? true : false);
2043    key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
2044    key->has_gs_or_tes = llvm->draw->gs.geometry_shader != NULL || llvm->draw->tes.tess_eval_shader != NULL;
2045    key->num_outputs = draw_total_vs_outputs(llvm->draw);
2046 
2047    key->clamp_vertex_color = !key->has_gs_or_tes &&
2048       llvm->draw->rasterizer->clamp_vertex_color;
2049 
2050    /* All variants of this shader will have the same value for
2051     * nr_samplers.  Not yet trying to compact away holes in the
2052     * sampler array.
2053     */
2054    key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
2055    if (llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
2056       key->nr_sampler_views =
2057          llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2058    } else {
2059       key->nr_sampler_views = key->nr_samplers;
2060    }
2061 
2062    key->nr_images = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
2063 
2064    /* Presumably all variants of the shader should have the same
2065     * number of vertex elements - ie the number of shader inputs.
2066     * NOTE: we NEED to store the needed number of needed inputs
2067     * here, not the number of provided elements to match keysize
2068     * (and the offset of sampler state in the key).
2069     * If we have excess number of vertex elements, this is valid,
2070     * but the excess ones don't matter.
2071     * If we don't have enough vertex elements (which looks not really
2072     * valid but we'll handle it gracefully) fill out missing ones with
2073     * zero (we'll recognize these later by PIPE_FORMAT_NONE).
2074     */
2075    key->nr_vertex_elements =
2076       llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 1;
2077 
2078    if (llvm->draw->pt.nr_vertex_elements < key->nr_vertex_elements) {
2079       debug_printf("draw: vs with %d inputs but only have %d vertex elements\n",
2080                    key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements);
2081       memset(key->vertex_element, 0,
2082              sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
2083    }
2084    memcpy(key->vertex_element,
2085           llvm->draw->pt.vertex_element,
2086           sizeof(struct pipe_vertex_element) *
2087              MIN2(key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements));
2088 
2089    draw_sampler = draw_llvm_variant_key_samplers(key);
2090    memset(draw_sampler, 0,
2091           MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
2092 
2093    for (unsigned i = 0 ; i < key->nr_samplers; i++) {
2094       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
2095                                       llvm->draw->samplers[PIPE_SHADER_VERTEX][i]);
2096    }
2097    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2098       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
2099                                       llvm->draw->sampler_views[PIPE_SHADER_VERTEX][i]);
2100    }
2101 
2102    draw_image = draw_llvm_variant_key_images(key);
2103    memset(draw_image, 0,
2104           key->nr_images * sizeof *draw_image);
2105    for (unsigned i = 0; i < key->nr_images; i++) {
2106       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
2107                                             llvm->draw->images[PIPE_SHADER_VERTEX][i]);
2108    }
2109    return key;
2110 }
2111 
2112 
2113 void
draw_llvm_dump_variant_key(struct draw_llvm_variant_key * key)2114 draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
2115 {
2116    struct lp_sampler_static_state *sampler = draw_llvm_variant_key_samplers(key);
2117    struct lp_image_static_state *image = draw_llvm_variant_key_images(key);
2118    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
2119    debug_printf("clip_xy = %u\n", key->clip_xy);
2120    debug_printf("clip_z = %u\n", key->clip_z);
2121    debug_printf("clip_user = %u\n", key->clip_user);
2122    debug_printf("bypass_viewport = %u\n", key->bypass_viewport);
2123    debug_printf("clip_halfz = %u\n", key->clip_halfz);
2124    debug_printf("need_edgeflags = %u\n", key->need_edgeflags);
2125    debug_printf("has_gs_or_tes = %u\n", key->has_gs_or_tes);
2126    debug_printf("ucp_enable = %u\n", key->ucp_enable);
2127 
2128    for (unsigned i = 0 ; i < key->nr_vertex_elements; i++) {
2129       debug_printf("vertex_element[%i].src_offset = %u\n", i, key->vertex_element[i].src_offset);
2130       debug_printf("vertex_element[%i].instance_divisor = %u\n", i, key->vertex_element[i].instance_divisor);
2131       debug_printf("vertex_element[%i].vertex_buffer_index = %u\n", i, key->vertex_element[i].vertex_buffer_index);
2132       debug_printf("vertex_element[%i].src_format = %s\n", i, util_format_name(key->vertex_element[i].src_format));
2133    }
2134 
2135    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2136       debug_printf("sampler[%i].src_format = %s\n", i, util_format_name(sampler[i].texture_state.format));
2137    }
2138 
2139    for (unsigned i = 0 ; i < key->nr_images; i++)
2140       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
2141 }
2142 
2143 
2144 void
draw_llvm_set_mapped_texture(struct draw_context * draw,enum pipe_shader_type shader_stage,unsigned sview_idx,uint32_t width,uint32_t height,uint32_t depth,uint32_t first_level,uint32_t last_level,uint32_t num_samples,uint32_t sample_stride,const void * base_ptr,uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])2145 draw_llvm_set_mapped_texture(struct draw_context *draw,
2146                              enum pipe_shader_type shader_stage,
2147                              unsigned sview_idx,
2148                              uint32_t width, uint32_t height, uint32_t depth,
2149                              uint32_t first_level, uint32_t last_level,
2150                              uint32_t num_samples,
2151                              uint32_t sample_stride,
2152                              const void *base_ptr,
2153                              uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
2154                              uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
2155                              uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])
2156 {
2157    struct lp_jit_texture *jit_tex;
2158 
2159    assert(shader_stage < DRAW_MAX_SHADER_STAGE);
2160    assert(sview_idx < ARRAY_SIZE(draw->llvm->jit_resources[shader_stage].textures));
2161 
2162    jit_tex = &draw->llvm->jit_resources[shader_stage].textures[sview_idx];
2163    jit_tex->width = width;
2164    jit_tex->height = height;
2165    jit_tex->depth = depth;
2166    jit_tex->first_level = first_level;
2167    jit_tex->last_level = last_level;
2168    jit_tex->mip_offsets[0] = 0;
2169    jit_tex->base = base_ptr;
2170    if (num_samples > 1) {
2171       jit_tex->mip_offsets[LP_JIT_TEXTURE_SAMPLE_STRIDE] = sample_stride;
2172       jit_tex->row_stride[0] = row_stride[0];
2173       jit_tex->img_stride[0] = img_stride[0];
2174       jit_tex->last_level = num_samples;
2175    } else {
2176       for (unsigned j = first_level; j <= last_level; j++) {
2177          jit_tex->mip_offsets[j] = mip_offsets[j];
2178          jit_tex->row_stride[j] = row_stride[j];
2179          jit_tex->img_stride[j] = img_stride[j];
2180       }
2181    }
2182 }
2183 
2184 
2185 void
draw_llvm_set_mapped_image(struct draw_context * draw,enum pipe_shader_type shader_stage,unsigned idx,uint32_t width,uint32_t height,uint32_t depth,const void * base_ptr,uint32_t row_stride,uint32_t img_stride,uint32_t num_samples,uint32_t sample_stride)2186 draw_llvm_set_mapped_image(struct draw_context *draw,
2187                            enum pipe_shader_type shader_stage,
2188                            unsigned idx,
2189                            uint32_t width, uint32_t height, uint32_t depth,
2190                            const void *base_ptr,
2191                            uint32_t row_stride,
2192                            uint32_t img_stride,
2193                            uint32_t num_samples,
2194                            uint32_t sample_stride)
2195 {
2196    struct lp_jit_image *jit_image;
2197 
2198    assert(shader_stage < DRAW_MAX_SHADER_STAGE);
2199    assert(idx < ARRAY_SIZE(draw->llvm->jit_resources[shader_stage].images));
2200 
2201    jit_image = &draw->llvm->jit_resources[shader_stage].images[idx];
2202 
2203    jit_image->width = width;
2204    jit_image->height = height;
2205    jit_image->depth = depth;
2206    jit_image->base = base_ptr;
2207 
2208    jit_image->row_stride = row_stride;
2209    jit_image->img_stride = img_stride;
2210    jit_image->num_samples = num_samples;
2211    jit_image->sample_stride = sample_stride;
2212 }
2213 
2214 
2215 void
draw_llvm_set_sampler_state(struct draw_context * draw,enum pipe_shader_type shader_type)2216 draw_llvm_set_sampler_state(struct draw_context *draw,
2217                             enum pipe_shader_type shader_type)
2218 {
2219    assert(shader_type < DRAW_MAX_SHADER_STAGE);
2220    for (unsigned i = 0; i < draw->num_samplers[shader_type]; i++) {
2221       struct lp_jit_sampler *jit_sam = &draw->llvm->jit_resources[shader_type].samplers[i];
2222 
2223       if (draw->samplers[shader_type][i]) {
2224          const struct pipe_sampler_state *s
2225             = draw->samplers[shader_type][i];
2226          jit_sam->min_lod = s->min_lod;
2227          jit_sam->max_lod = s->max_lod;
2228          jit_sam->lod_bias = s->lod_bias;
2229          jit_sam->max_aniso = s->max_anisotropy;
2230          COPY_4V(jit_sam->border_color, s->border_color.f);
2231       }
2232    }
2233 }
2234 
2235 
2236 void
draw_llvm_destroy_variant(struct draw_llvm_variant * variant)2237 draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
2238 {
2239    struct draw_llvm *llvm = variant->llvm;
2240 
2241    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2242       debug_printf("Deleting VS variant: %u vs variants,\t%u total variants\n",
2243                     variant->shader->variants_cached, llvm->nr_variants);
2244    }
2245 
2246    gallivm_destroy(variant->gallivm);
2247 
2248    list_del(&variant->list_item_local.list);
2249    variant->shader->variants_cached--;
2250    list_del(&variant->list_item_global.list);
2251    llvm->nr_variants--;
2252    FREE(variant);
2253 }
2254 
2255 
2256 /**
2257  * Create LLVM types for various structures.
2258  */
2259 static void
create_gs_jit_types(struct draw_gs_llvm_variant * var)2260 create_gs_jit_types(struct draw_gs_llvm_variant *var)
2261 {
2262    struct gallivm_state *gallivm = var->gallivm;
2263 
2264    var->context_type = create_gs_jit_context_type(gallivm,
2265                                              var->shader->base.vector_length,
2266                                              "draw_gs_jit_context");
2267    var->context_ptr_type = LLVMPointerType(var->context_type, 0);
2268 
2269    var->resources_type = lp_build_jit_resources_type(gallivm);
2270    var->resources_ptr_type = LLVMPointerType(var->resources_type, 0);
2271    var->input_array_type = create_gs_jit_input_type(gallivm);
2272 }
2273 
2274 
2275 static LLVMTypeRef
get_gs_context_ptr_type(struct draw_gs_llvm_variant * variant)2276 get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant)
2277 {
2278    if (!variant->context_ptr_type)
2279       create_gs_jit_types(variant);
2280    return variant->context_ptr_type;
2281 }
2282 
2283 
2284 static LLVMValueRef
generate_mask_value(struct draw_gs_llvm_variant * variant,struct lp_type gs_type)2285 generate_mask_value(struct draw_gs_llvm_variant *variant,
2286                     struct lp_type gs_type)
2287 {
2288    struct gallivm_state *gallivm = variant->gallivm;
2289    LLVMBuilderRef builder = gallivm->builder;
2290    struct lp_type mask_type = lp_int_type(gs_type);
2291    LLVMValueRef num_prims;
2292    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
2293 
2294    num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
2295                                   variant->num_prims);
2296    for (unsigned i = 0; i < gs_type.length; i++) {
2297       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2298       mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
2299    }
2300    mask_val = lp_build_compare(gallivm, mask_type,
2301                                PIPE_FUNC_GREATER, num_prims, mask_val);
2302 
2303    return mask_val;
2304 }
2305 
2306 
2307 static void
draw_gs_llvm_generate(struct draw_llvm * llvm,struct draw_gs_llvm_variant * variant)2308 draw_gs_llvm_generate(struct draw_llvm *llvm,
2309                       struct draw_gs_llvm_variant *variant)
2310 {
2311    struct gallivm_state *gallivm = variant->gallivm;
2312    LLVMContextRef context = gallivm->context;
2313    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
2314    LLVMTypeRef arg_types[9];
2315    LLVMTypeRef func_type;
2316    LLVMValueRef variant_func;
2317    LLVMValueRef context_ptr;
2318    LLVMValueRef resources_ptr;
2319    LLVMValueRef prim_id_ptr;
2320    LLVMBasicBlockRef block;
2321    LLVMBuilderRef builder;
2322    LLVMValueRef io_ptr, input_array, num_prims, mask_val;
2323    struct lp_build_sampler_soa *sampler = 0;
2324    struct lp_build_image_soa *image = NULL;
2325    struct lp_build_context bld;
2326    struct lp_bld_tgsi_system_values system_values;
2327    char func_name[64];
2328    struct lp_type gs_type;
2329    struct draw_gs_llvm_iface gs_iface;
2330    const struct tgsi_token *tokens = variant->shader->base.state.tokens;
2331    LLVMValueRef consts_ptr;
2332    LLVMValueRef ssbos_ptr;
2333    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
2334    struct lp_build_mask_context mask;
2335    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
2336    unsigned vector_length = variant->shader->base.vector_length;
2337 
2338    memset(&system_values, 0, sizeof(system_values));
2339    memset(&outputs, 0, sizeof(outputs));
2340 
2341    snprintf(func_name, sizeof(func_name), "draw_llvm_gs_variant");
2342 
2343    assert(variant->vertex_header_ptr_type);
2344 
2345    LLVMTypeRef prim_id_type = LLVMVectorType(int32_type, vector_length);
2346    arg_types[0] = get_gs_context_ptr_type(variant);    /* context */
2347    arg_types[1] = variant->resources_ptr_type;
2348    arg_types[2] = variant->input_array_type;           /* input */
2349    arg_types[3] = LLVMPointerType(variant->vertex_header_ptr_type, 0);     /* vertex_header */
2350    arg_types[4] = int32_type;                          /* num_prims */
2351    arg_types[5] = int32_type;                          /* instance_id */
2352    arg_types[6] = LLVMPointerType(prim_id_type, 0);    /* prim_id_ptr */
2353    arg_types[7] = int32_type;
2354    arg_types[8] = int32_type;
2355 
2356    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
2357 
2358    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
2359 
2360    variant->function = variant_func;
2361 
2362    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
2363 
2364    for (unsigned i = 0; i < ARRAY_SIZE(arg_types); ++i)
2365       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
2366          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
2367 
2368    if (gallivm->cache && gallivm->cache->data_size)
2369       return;
2370    context_ptr               = LLVMGetParam(variant_func, 0);
2371    resources_ptr             = LLVMGetParam(variant_func, 1);
2372    input_array               = LLVMGetParam(variant_func, 2);
2373    io_ptr                    = LLVMGetParam(variant_func, 3);
2374    num_prims                 = LLVMGetParam(variant_func, 4);
2375    system_values.instance_id = LLVMGetParam(variant_func, 5);
2376    prim_id_ptr               = LLVMGetParam(variant_func, 6);
2377    system_values.invocation_id = LLVMGetParam(variant_func, 7);
2378    system_values.view_index  = LLVMGetParam(variant_func, 8);
2379 
2380    lp_build_name(context_ptr, "context");
2381    lp_build_name(resources_ptr, "resources");
2382    lp_build_name(input_array, "input");
2383    lp_build_name(io_ptr, "io");
2384    lp_build_name(num_prims, "num_prims");
2385    lp_build_name(system_values.instance_id, "instance_id");
2386    lp_build_name(prim_id_ptr, "prim_id_ptr");
2387    lp_build_name(system_values.invocation_id, "invocation_id");
2388    lp_build_name(system_values.view_index, "view_index");
2389 
2390    variant->context_ptr = context_ptr;
2391    variant->io_ptr = io_ptr;
2392    variant->num_prims = num_prims;
2393 
2394    gs_iface.base.fetch_input = draw_gs_llvm_fetch_input;
2395    gs_iface.base.emit_vertex = draw_gs_llvm_emit_vertex;
2396    gs_iface.base.end_primitive = draw_gs_llvm_end_primitive;
2397    gs_iface.base.gs_epilogue = draw_gs_llvm_epilogue;
2398    gs_iface.input = input_array;
2399    gs_iface.variant = variant;
2400 
2401    /*
2402     * Function body
2403     */
2404 
2405    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
2406    builder = gallivm->builder;
2407    LLVMPositionBuilderAtEnd(builder, block);
2408 
2409    lp_build_context_init(&bld, gallivm, lp_type_int(32));
2410 
2411    memset(&gs_type, 0, sizeof gs_type);
2412    gs_type.floating = true; /* floating point values */
2413    gs_type.sign = true;     /* values are signed */
2414    gs_type.norm = false;    /* values are not limited to [0,1] or [-1,1] */
2415    gs_type.width = 32;      /* 32-bit float */
2416    gs_type.length = vector_length;
2417 
2418    consts_ptr = lp_jit_resources_constants(gallivm, variant->resources_type, resources_ptr);
2419 
2420    ssbos_ptr = lp_jit_resources_ssbos(gallivm, variant->resources_type, resources_ptr);
2421 
2422    /* code generated texture sampling */
2423    sampler = lp_bld_llvm_sampler_soa_create(variant->key.samplers,
2424                                           MAX2(variant->key.nr_samplers,
2425                                                variant->key.nr_sampler_views));
2426    image = lp_bld_llvm_image_soa_create(draw_gs_llvm_variant_key_images(&variant->key),
2427                                       variant->key.nr_images);
2428    mask_val = generate_mask_value(variant, gs_type);
2429    lp_build_mask_begin(&mask, gallivm, gs_type, mask_val);
2430 
2431    if (gs_info->uses_primid) {
2432       system_values.prim_id = LLVMBuildLoad2(builder, prim_id_type, prim_id_ptr, "prim_id");
2433    }
2434 
2435    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2436       if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)
2437          tgsi_dump(tokens, 0);
2438       else
2439          nir_print_shader(llvm->draw->gs.geometry_shader->state.ir.nir, stderr);
2440       draw_gs_llvm_dump_variant_key(&variant->key);
2441    }
2442 
2443    struct lp_build_tgsi_params params;
2444    memset(&params, 0, sizeof(params));
2445 
2446    params.type = gs_type;
2447    params.mask = &mask;
2448    params.consts_ptr = consts_ptr;
2449    params.system_values = &system_values;
2450    params.context_type = variant->context_type;
2451    params.context_ptr = context_ptr;
2452    params.resources_type = variant->resources_type;
2453    params.resources_ptr = resources_ptr;
2454    params.sampler = sampler;
2455    params.info = &llvm->draw->gs.geometry_shader->info;
2456    params.gs_iface = (const struct lp_build_gs_iface *)&gs_iface;
2457    params.ssbo_ptr = ssbos_ptr;
2458    params.image = image;
2459    params.gs_vertex_streams = variant->shader->base.num_vertex_streams;
2460    params.aniso_filter_table = lp_jit_resources_aniso_filter_table(gallivm,
2461                                                                    variant->resources_type,
2462                                                                    resources_ptr);
2463 
2464 
2465    if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)
2466       lp_build_tgsi_soa(variant->gallivm,
2467                         tokens,
2468                         &params,
2469                         outputs);
2470    else
2471       lp_build_nir_soa(variant->gallivm,
2472                        llvm->draw->gs.geometry_shader->state.ir.nir,
2473                        &params,
2474                        outputs);
2475 
2476    lp_bld_llvm_sampler_soa_destroy(sampler);
2477    lp_bld_llvm_image_soa_destroy(image);
2478 
2479    lp_build_mask_end(&mask);
2480 
2481    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
2482 
2483    gallivm_verify_function(gallivm, variant_func);
2484 }
2485 
2486 
2487 struct draw_gs_llvm_variant *
draw_gs_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_gs_llvm_variant_key * key)2488 draw_gs_llvm_create_variant(struct draw_llvm *llvm,
2489                             unsigned num_outputs,
2490                             const struct draw_gs_llvm_variant_key *key)
2491 {
2492    struct draw_gs_llvm_variant *variant;
2493    struct llvm_geometry_shader *shader =
2494       llvm_geometry_shader(llvm->draw->gs.geometry_shader);
2495    char module_name[64];
2496    unsigned char ir_sha1_cache_key[20];
2497    struct lp_cached_code cached = { 0 };
2498    bool needs_caching = false;
2499 
2500    variant = MALLOC(sizeof *variant +
2501                     shader->variant_key_size -
2502                     sizeof variant->key);
2503    if (!variant)
2504       return NULL;
2505 
2506    variant->llvm = llvm;
2507    variant->shader = shader;
2508 
2509    snprintf(module_name, sizeof(module_name), "draw_llvm_gs_variant%u",
2510             variant->shader->variants_cached);
2511 
2512    memcpy(&variant->key, key, shader->variant_key_size);
2513 
2514    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
2515       draw_get_ir_cache_key(shader->base.state.ir.nir,
2516                             key,
2517                             shader->variant_key_size,
2518                             num_outputs,
2519                             ir_sha1_cache_key);
2520 
2521       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
2522                                          &cached,
2523                                          ir_sha1_cache_key);
2524       if (!cached.data_size)
2525          needs_caching = true;
2526    }
2527    variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
2528 
2529    create_gs_jit_types(variant);
2530 
2531    variant->vertex_header_type = lp_build_create_jit_vertex_header_type(variant->gallivm, num_outputs);
2532    variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
2533 
2534    draw_gs_llvm_generate(llvm, variant);
2535 
2536    gallivm_compile_module(variant->gallivm);
2537 
2538    variant->jit_func = (draw_gs_jit_func)
2539          gallivm_jit_function(variant->gallivm, variant->function);
2540 
2541    if (needs_caching)
2542       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
2543                                            &cached,
2544                                            ir_sha1_cache_key);
2545    gallivm_free_ir(variant->gallivm);
2546 
2547    variant->list_item_global.base = variant;
2548    variant->list_item_local.base = variant;
2549    /*variant->no = */shader->variants_created++;
2550    variant->list_item_global.base = variant;
2551 
2552    return variant;
2553 }
2554 
2555 
2556 void
draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant * variant)2557 draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant)
2558 {
2559    struct draw_llvm *llvm = variant->llvm;
2560 
2561    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2562       debug_printf("Deleting GS variant: %u gs variants,\t%u total variants\n",
2563                     variant->shader->variants_cached, llvm->nr_gs_variants);
2564    }
2565 
2566    gallivm_destroy(variant->gallivm);
2567 
2568    list_del(&variant->list_item_local.list);
2569    variant->shader->variants_cached--;
2570    list_del(&variant->list_item_global.list);
2571    llvm->nr_gs_variants--;
2572    FREE(variant);
2573 }
2574 
2575 
2576 struct draw_gs_llvm_variant_key *
draw_gs_llvm_make_variant_key(struct draw_llvm * llvm,char * store)2577 draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
2578 {
2579    struct draw_gs_llvm_variant_key *key;
2580    struct lp_sampler_static_state *draw_sampler;
2581    struct lp_image_static_state *draw_image;
2582 
2583    key = (struct draw_gs_llvm_variant_key *)store;
2584 
2585    memset(key, 0, offsetof(struct draw_gs_llvm_variant_key, samplers[0]));
2586 
2587    key->num_outputs = draw_total_gs_outputs(llvm->draw);
2588 
2589    key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color;
2590 
2591    /* All variants of this shader will have the same value for
2592     * nr_samplers.  Not yet trying to compact away holes in the
2593     * sampler array.
2594     */
2595    key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
2596    if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
2597       key->nr_sampler_views =
2598          llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2599    } else {
2600       key->nr_sampler_views = key->nr_samplers;
2601    }
2602 
2603    key->nr_images = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
2604 
2605    draw_sampler = key->samplers;
2606 
2607    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
2608 
2609    for (unsigned i = 0 ; i < key->nr_samplers; i++) {
2610       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
2611                                       llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]);
2612    }
2613    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2614       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
2615                                       llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]);
2616    }
2617 
2618    draw_image = draw_gs_llvm_variant_key_images(key);
2619    memset(draw_image, 0,
2620           key->nr_images * sizeof *draw_image);
2621    for (unsigned i = 0; i < key->nr_images; i++) {
2622       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
2623                                             llvm->draw->images[PIPE_SHADER_GEOMETRY][i]);
2624    }
2625    return key;
2626 }
2627 
2628 
2629 void
draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key * key)2630 draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key)
2631 {
2632    struct lp_sampler_static_state *sampler = key->samplers;
2633    struct lp_image_static_state *image = draw_gs_llvm_variant_key_images(key);
2634 
2635    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
2636    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2637       debug_printf("sampler[%i].src_format = %s\n", i,
2638                    util_format_name(sampler[i].texture_state.format));
2639    }
2640 
2641    for (unsigned i = 0 ; i < key->nr_images; i++)
2642       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
2643 
2644 }
2645 
2646 
2647 static void
create_tcs_jit_types(struct draw_tcs_llvm_variant * var)2648 create_tcs_jit_types(struct draw_tcs_llvm_variant *var)
2649 {
2650    struct gallivm_state *gallivm = var->gallivm;
2651 
2652    var->resources_type = lp_build_jit_resources_type(gallivm);
2653    var->resources_ptr_type = LLVMPointerType(var->resources_type, 0);
2654    var->input_array_type = create_tcs_jit_input_type(gallivm);
2655    var->output_array_type = create_tcs_jit_output_type(gallivm);
2656 }
2657 
2658 
2659 static LLVMTypeRef
get_tcs_resources_ptr_type(struct draw_tcs_llvm_variant * variant)2660 get_tcs_resources_ptr_type(struct draw_tcs_llvm_variant *variant)
2661 {
2662    if (!variant->resources_ptr_type)
2663       create_tcs_jit_types(variant);
2664    return variant->resources_ptr_type;
2665 }
2666 
2667 
2668 static LLVMValueRef
draw_tcs_llvm_emit_fetch_input(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index)2669 draw_tcs_llvm_emit_fetch_input(const struct lp_build_tcs_iface *tes_iface,
2670                                struct lp_build_context *bld,
2671                                bool is_vindex_indirect,
2672                                LLVMValueRef vertex_index,
2673                                bool is_aindex_indirect,
2674                                LLVMValueRef attrib_index,
2675                                bool is_sindex_indirect,
2676                                LLVMValueRef swizzle_index)
2677 {
2678    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
2679    struct gallivm_state *gallivm = bld->gallivm;
2680    LLVMBuilderRef builder = gallivm->builder;
2681    LLVMValueRef indices[3];
2682    LLVMValueRef res;
2683    struct lp_type type = bld->type;
2684    LLVMTypeRef input_type = create_tcs_jit_input_type_deref(gallivm);
2685    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
2686 
2687    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
2688       res = bld->zero;
2689       for (int i = 0; i < type.length; ++i) {
2690          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2691          LLVMValueRef vert_chan_index = vertex_index;
2692          LLVMValueRef attr_chan_index = attrib_index;
2693          LLVMValueRef swiz_chan_index = swizzle_index;
2694          LLVMValueRef channel_vec;
2695 
2696          if (is_vindex_indirect) {
2697             vert_chan_index = LLVMBuildExtractElement(builder,
2698                                                       vertex_index, idx, "");
2699          }
2700          if (is_aindex_indirect) {
2701             attr_chan_index = LLVMBuildExtractElement(builder,
2702                                                       attrib_index, idx, "");
2703          }
2704          if (is_sindex_indirect) {
2705             swiz_chan_index = LLVMBuildExtractElement(builder,
2706                                                       swizzle_index, idx, "");
2707          }
2708 
2709          indices[0] = vert_chan_index;
2710          indices[1] = attr_chan_index;
2711          indices[2] = swiz_chan_index;
2712 
2713          channel_vec = LLVMBuildGEP2(builder, input_type, tcs->input, indices, 3, "");
2714          channel_vec = LLVMBuildLoad2(builder, float_type, channel_vec, "");
2715          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
2716       }
2717    } else {
2718       indices[0] = vertex_index;
2719       indices[1] = attrib_index;
2720       indices[2] = swizzle_index;
2721       res = LLVMBuildGEP2(builder, input_type, tcs->input, indices, 3, "");
2722       res = LLVMBuildLoad2(builder, float_type, res, "");
2723       res = lp_build_broadcast_scalar(bld, res);
2724    }
2725    return res;
2726 }
2727 
2728 
2729 static LLVMValueRef
draw_tcs_llvm_emit_fetch_output(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index,uint32_t name)2730 draw_tcs_llvm_emit_fetch_output(const struct lp_build_tcs_iface *tes_iface,
2731                                 struct lp_build_context *bld,
2732                                 bool is_vindex_indirect,
2733                                 LLVMValueRef vertex_index,
2734                                 bool is_aindex_indirect,
2735                                 LLVMValueRef attrib_index,
2736                                 bool is_sindex_indirect,
2737                                 LLVMValueRef swizzle_index,
2738                                 uint32_t name)
2739 {
2740    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
2741    struct gallivm_state *gallivm = bld->gallivm;
2742    LLVMBuilderRef builder = gallivm->builder;
2743    LLVMValueRef indices[3];
2744    LLVMValueRef res;
2745    struct lp_type type = bld->type;
2746    LLVMTypeRef output_type = create_tcs_jit_output_type_deref(gallivm);
2747    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
2748 
2749    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
2750       res = bld->zero;
2751       for (int i = 0; i < type.length; ++i) {
2752          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2753          LLVMValueRef vert_chan_index = vertex_index;
2754          LLVMValueRef attr_chan_index = attrib_index;
2755          LLVMValueRef swiz_chan_index = swizzle_index;
2756          LLVMValueRef channel_vec;
2757 
2758          if (is_vindex_indirect) {
2759             vert_chan_index = LLVMBuildExtractElement(builder,
2760                                                       vertex_index, idx, "");
2761          }
2762          if (is_aindex_indirect) {
2763             attr_chan_index = LLVMBuildExtractElement(builder,
2764                                                       attrib_index, idx, "");
2765          }
2766          if (is_sindex_indirect) {
2767             swiz_chan_index = LLVMBuildExtractElement(builder,
2768                                                       swizzle_index, idx, "");
2769          }
2770 
2771          indices[0] = vert_chan_index;
2772          indices[1] = attr_chan_index;
2773          indices[2] = swiz_chan_index;
2774 
2775          channel_vec = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2776          channel_vec = LLVMBuildLoad2(builder, float_type, channel_vec, "");
2777 
2778          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
2779       }
2780    } else {
2781       indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
2782       indices[1] = attrib_index;
2783       indices[2] = swizzle_index;
2784 
2785       res = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2786       res = LLVMBuildLoad2(builder, float_type, res, "");
2787       res = lp_build_broadcast_scalar(bld, res);
2788    }
2789    return res;
2790 }
2791 
2792 
2793 static void
draw_tcs_llvm_emit_store_output(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,unsigned name,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index,LLVMValueRef value,LLVMValueRef mask_vec)2794 draw_tcs_llvm_emit_store_output(const struct lp_build_tcs_iface *tes_iface,
2795                                 struct lp_build_context *bld,
2796                                 unsigned name,
2797                                 bool is_vindex_indirect,
2798                                 LLVMValueRef vertex_index,
2799                                 bool is_aindex_indirect,
2800                                 LLVMValueRef attrib_index,
2801                                 bool is_sindex_indirect,
2802                                 LLVMValueRef swizzle_index,
2803                                 LLVMValueRef value,
2804                                 LLVMValueRef mask_vec)
2805 {
2806    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
2807    struct gallivm_state *gallivm = bld->gallivm;
2808    LLVMBuilderRef builder = gallivm->builder;
2809    LLVMValueRef indices[3];
2810    LLVMValueRef res;
2811    struct lp_type type = bld->type;
2812    LLVMTypeRef output_type = create_tcs_jit_output_type_deref(gallivm);
2813 
2814    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
2815       for (int i = 0; i < type.length; ++i) {
2816          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2817          LLVMValueRef vert_chan_index = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
2818          LLVMValueRef attr_chan_index = attrib_index;
2819          LLVMValueRef swiz_chan_index = swizzle_index;
2820          LLVMValueRef channel_vec;
2821 
2822          if (is_vindex_indirect) {
2823             vert_chan_index = LLVMBuildExtractElement(builder,
2824                                                       vertex_index, idx, "");
2825          }
2826          if (is_aindex_indirect) {
2827             attr_chan_index = LLVMBuildExtractElement(builder,
2828                                                       attrib_index, idx, "");
2829          }
2830 
2831          if (is_sindex_indirect) {
2832             swiz_chan_index = LLVMBuildExtractElement(builder,
2833                                                       swizzle_index, idx, "");
2834          }
2835 
2836          indices[0] = vert_chan_index;
2837          indices[1] = attr_chan_index;
2838          indices[2] = swiz_chan_index;
2839 
2840          channel_vec = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2841 
2842          res = LLVMBuildExtractElement(builder, value, idx, "");
2843 
2844          struct lp_build_if_state ifthen;
2845          LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
2846          cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
2847          lp_build_if(&ifthen, gallivm, cond);
2848          LLVMBuildStore(builder, res, channel_vec);
2849          lp_build_endif(&ifthen);
2850       }
2851    } else {
2852       indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
2853       indices[1] = attrib_index;
2854       indices[2] = swizzle_index;
2855 
2856       res = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2857       for (unsigned i = 0; i < type.length; ++i) {
2858          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2859          LLVMValueRef val = LLVMBuildExtractElement(builder, value, idx, "");
2860 
2861          struct lp_build_if_state ifthen;
2862          LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
2863          cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
2864          lp_build_if(&ifthen, gallivm, cond);
2865          LLVMBuildStore(builder, val, res);
2866          lp_build_endif(&ifthen);
2867       }
2868    }
2869 }
2870 
2871 
2872 static LLVMValueRef
generate_tcs_mask_value(struct draw_tcs_llvm_variant * variant,struct lp_type tcs_type,LLVMValueRef limit,LLVMValueRef loop_counter)2873 generate_tcs_mask_value(struct draw_tcs_llvm_variant *variant,
2874                         struct lp_type tcs_type, LLVMValueRef limit, LLVMValueRef loop_counter)
2875 {
2876    struct gallivm_state *gallivm = variant->gallivm;
2877    LLVMBuilderRef builder = gallivm->builder;
2878    struct lp_type mask_type = lp_int_type(tcs_type);
2879    LLVMValueRef num_vecs;
2880    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
2881 
2882    num_vecs = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);
2883    for (unsigned i = 0; i < tcs_type.length; i++) {
2884       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2885       mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");
2886    }
2887    mask_val = lp_build_compare(gallivm, mask_type,
2888                                PIPE_FUNC_GREATER, num_vecs, mask_val);
2889 
2890    return mask_val;
2891 }
2892 
2893 
2894 static void
draw_tcs_llvm_generate(struct draw_llvm * llvm,struct draw_tcs_llvm_variant * variant)2895 draw_tcs_llvm_generate(struct draw_llvm *llvm,
2896                        struct draw_tcs_llvm_variant *variant)
2897 {
2898    struct gallivm_state *gallivm = variant->gallivm;
2899    LLVMContextRef context = gallivm->context;
2900    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
2901    LLVMTypeRef arg_types[7];
2902    LLVMTypeRef func_type, coro_func_type;
2903    LLVMValueRef variant_func, variant_coro;
2904    LLVMValueRef resources_ptr;
2905    LLVMValueRef view_index;
2906    LLVMValueRef input_array, output_array, prim_id, patch_vertices_in;
2907    LLVMValueRef mask_val;
2908    LLVMBasicBlockRef block;
2909    LLVMBuilderRef builder;
2910    struct lp_build_context bld, bldvec;
2911    struct lp_build_sampler_soa *sampler = 0;
2912    struct lp_build_image_soa *image = NULL;
2913    struct lp_bld_tgsi_system_values system_values;
2914    char func_name[64], func_name_coro[64];
2915    struct draw_tcs_llvm_iface tcs_iface;
2916    struct lp_build_mask_context mask;
2917    LLVMValueRef consts_ptr;
2918    LLVMValueRef ssbos_ptr;
2919    struct lp_type tcs_type;
2920    unsigned vector_length = variant->shader->base.vector_length;
2921 
2922    memset(&system_values, 0, sizeof(system_values));
2923 
2924    snprintf(func_name, sizeof(func_name), "draw_llvm_tcs_variant");
2925 
2926    snprintf(func_name_coro, sizeof(func_name_coro), "draw_llvm_tcs_coro_variant");
2927 
2928    arg_types[0] = get_tcs_resources_ptr_type(variant);    /* context */
2929    arg_types[1] = variant->input_array_type;           /* input */
2930    arg_types[2] = variant->output_array_type;
2931    arg_types[3] = int32_type;
2932    arg_types[4] = int32_type;
2933    arg_types[5] = int32_type;
2934    arg_types[6] = int32_type; /* coroutine only */
2935 
2936    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types) - 1, 0);
2937 
2938    coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), arg_types, ARRAY_SIZE(arg_types), 0);
2939 
2940    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
2941 
2942    variant_coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);
2943 
2944    variant->function = variant_func;
2945    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
2946 
2947    LLVMSetFunctionCallConv(variant_coro, LLVMCCallConv);
2948 
2949    lp_build_coro_add_presplit(variant_coro);
2950 
2951    for (unsigned i = 0; i < ARRAY_SIZE(arg_types); ++i) {
2952       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
2953          lp_add_function_attr(variant_coro, i + 1, LP_FUNC_ATTR_NOALIAS);
2954          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
2955       }
2956    }
2957 
2958    if (gallivm->cache && gallivm->cache->data_size)
2959       return;
2960    resources_ptr               = LLVMGetParam(variant_func, 0);
2961    input_array               = LLVMGetParam(variant_func, 1);
2962    output_array              = LLVMGetParam(variant_func, 2);
2963    prim_id                   = LLVMGetParam(variant_func, 3);
2964    patch_vertices_in         = LLVMGetParam(variant_func, 4);
2965    view_index                = LLVMGetParam(variant_func, 5);
2966 
2967    lp_build_name(resources_ptr, "resources");
2968    lp_build_name(input_array, "input");
2969    lp_build_name(output_array, "output");
2970    lp_build_name(prim_id, "prim_id");
2971    lp_build_name(patch_vertices_in, "patch_vertices_in");
2972    lp_build_name(view_index, "view_index");
2973 
2974    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
2975    builder = gallivm->builder;
2976    LLVMPositionBuilderAtEnd(builder, block);
2977 
2978    lp_build_context_init(&bld, gallivm, lp_type_int(32));
2979 
2980    memset(&tcs_type, 0, sizeof tcs_type);
2981    tcs_type.floating = true; /* floating point values */
2982    tcs_type.sign = true;     /* values are signed */
2983    tcs_type.norm = false;    /* values are not limited to [0,1] or [-1,1] */
2984    tcs_type.width = 32;      /* 32-bit float */
2985    tcs_type.length = vector_length;
2986 
2987    lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tcs_type));
2988 
2989    LLVMValueRef count = lp_build_const_int32(gallivm, variant->shader->base.vertices_out);
2990    LLVMValueRef step = lp_build_const_int32(gallivm, vector_length);
2991 
2992    struct lp_build_loop_state loop_state[2];
2993    LLVMValueRef num_inner_loop;
2994    unsigned count_align = util_align_npot(variant->shader->base.vertices_out, tcs_type.length);
2995    num_inner_loop = lp_build_const_int32(gallivm, count_align / tcs_type.length);
2996    LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
2997    LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_inner_loop, "coro_hdls");
2998    unsigned end_coroutine = INT_MAX;
2999    lp_build_loop_begin(&loop_state[1], gallivm,
3000                        lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
3001    lp_build_loop_begin(&loop_state[0], gallivm,
3002                        lp_build_const_int32(gallivm, 0)); /* inner loop */
3003    {
3004       LLVMValueRef args[7];
3005       args[0] = resources_ptr;
3006       args[1] = input_array;
3007       args[2] = output_array;
3008       args[3] = prim_id;
3009       args[4] = patch_vertices_in;
3010       args[5] = view_index;
3011       args[6] = loop_state[0].counter;
3012       LLVMValueRef coro_entry = LLVMBuildGEP2(builder, hdl_ptr_type, coro_hdls, &loop_state[0].counter, 1, "");
3013       LLVMValueRef coro_hdl = LLVMBuildLoad2(builder, hdl_ptr_type, coro_entry, "coro_hdl");
3014 
3015       struct lp_build_if_state ifstate;
3016       LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntEQ, loop_state[1].counter,
3017                                        lp_build_const_int32(gallivm, 0), "");
3018       /* first time here - call the coroutine function entry point */
3019       lp_build_if(&ifstate, gallivm, cmp);
3020       LLVMValueRef coro_ret = LLVMBuildCall2(builder, coro_func_type, variant_coro, args, 7, "");
3021       LLVMBuildStore(builder, coro_ret, coro_entry);
3022       lp_build_else(&ifstate);
3023       /* subsequent calls for this invocation - check if done. */
3024       LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);
3025       struct lp_build_if_state ifstate2;
3026       lp_build_if(&ifstate2, gallivm, coro_done);
3027       /* if done destroy and force loop exit */
3028       lp_build_coro_destroy(gallivm, coro_hdl);
3029       lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1));
3030       lp_build_else(&ifstate2);
3031       /* otherwise resume the coroutine */
3032       lp_build_coro_resume(gallivm, coro_hdl);
3033       lp_build_endif(&ifstate2);
3034       lp_build_endif(&ifstate);
3035       lp_build_loop_force_reload_counter(&loop_state[1]);
3036    }
3037    lp_build_loop_end_cond(&loop_state[0],
3038                           num_inner_loop,
3039                           NULL,  LLVMIntUGE);
3040    lp_build_loop_end_cond(&loop_state[1],
3041                           lp_build_const_int32(gallivm, end_coroutine),
3042                           NULL, LLVMIntEQ);
3043    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
3044 
3045    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "entry");
3046    LLVMPositionBuilderAtEnd(builder, block);
3047 
3048    resources_ptr = LLVMGetParam(variant_coro, 0);
3049    input_array = LLVMGetParam(variant_coro, 1);
3050    output_array = LLVMGetParam(variant_coro, 2);
3051    prim_id = LLVMGetParam(variant_coro, 3);
3052    patch_vertices_in = LLVMGetParam(variant_coro, 4);
3053    view_index = LLVMGetParam(variant_coro, 5);
3054 
3055    consts_ptr = lp_jit_resources_constants(gallivm, variant->resources_type, resources_ptr);
3056 
3057    ssbos_ptr = lp_jit_resources_ssbos(gallivm, variant->resources_type, resources_ptr);
3058    sampler = lp_bld_llvm_sampler_soa_create(variant->key.samplers,
3059                                           MAX2(variant->key.nr_samplers,
3060                                                variant->key.nr_sampler_views));
3061    image = lp_bld_llvm_image_soa_create(draw_tcs_llvm_variant_key_images(&variant->key),
3062                                       variant->key.nr_images);
3063 
3064    LLVMValueRef counter = LLVMGetParam(variant_coro, 6);
3065    LLVMValueRef invocvec = LLVMGetUndef(LLVMVectorType(int32_type, vector_length));
3066    for (unsigned i = 0; i < vector_length; i++) {
3067       LLVMValueRef loop_iter = lp_build_const_int32(gallivm, i);
3068       LLVMValueRef idx = LLVMBuildAdd(builder, LLVMBuildMul(builder, counter, step, ""), loop_iter, "");
3069       invocvec = LLVMBuildInsertElement(builder, invocvec, idx, loop_iter, "");
3070    }
3071 
3072    system_values.invocation_id = invocvec;
3073    system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);
3074    system_values.view_index = view_index;
3075    system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);
3076    tcs_iface.input = input_array;
3077    tcs_iface.output = output_array;
3078    tcs_iface.base.emit_fetch_input = draw_tcs_llvm_emit_fetch_input;
3079    tcs_iface.base.emit_fetch_output = draw_tcs_llvm_emit_fetch_output;
3080    tcs_iface.base.emit_store_output = draw_tcs_llvm_emit_store_output;
3081 
3082 
3083    {
3084       LLVMValueRef coro_id = lp_build_coro_id(gallivm);
3085       LLVMValueRef coro_hdl = lp_build_coro_begin_alloc_mem(gallivm, coro_id);
3086 
3087       mask_val = generate_tcs_mask_value(variant, tcs_type, count, LLVMBuildMul(builder, counter, step, ""));
3088       lp_build_mask_begin(&mask, gallivm, tcs_type, mask_val);
3089 
3090       struct lp_build_coro_suspend_info coro_info;
3091 
3092       LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "suspend");
3093       LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "cleanup");
3094 
3095       coro_info.suspend = sus_block;
3096       coro_info.cleanup = clean_block;
3097 
3098       struct lp_build_tgsi_params params;
3099       memset(&params, 0, sizeof(params));
3100 
3101       params.type = tcs_type;
3102       params.mask = &mask;
3103       params.consts_ptr = consts_ptr;
3104       params.system_values = &system_values;
3105       params.resources_type = variant->resources_type;
3106       params.resources_ptr = resources_ptr;
3107       params.sampler = sampler;
3108       params.info = &llvm->draw->tcs.tess_ctrl_shader->info;
3109       params.ssbo_ptr = ssbos_ptr;
3110       params.image = image;
3111       params.coro = &coro_info;
3112       params.tcs_iface = &tcs_iface.base;
3113       params.aniso_filter_table = lp_jit_resources_aniso_filter_table(gallivm,
3114                                                                       variant->resources_type,
3115                                                                       resources_ptr);
3116 
3117       lp_build_nir_soa(variant->gallivm,
3118                        llvm->draw->tcs.tess_ctrl_shader->state.ir.nir,
3119                        &params, NULL);
3120 
3121       lp_build_mask_end(&mask);
3122 
3123       lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);
3124       LLVMPositionBuilderAtEnd(builder, clean_block);
3125 
3126       lp_build_coro_free_mem(gallivm, coro_id, coro_hdl);
3127 
3128       LLVMBuildBr(builder, sus_block);
3129       LLVMPositionBuilderAtEnd(builder, sus_block);
3130 
3131       lp_build_coro_end(gallivm, coro_hdl);
3132       LLVMBuildRet(builder, coro_hdl);
3133    }
3134 
3135    lp_bld_llvm_sampler_soa_destroy(sampler);
3136    lp_bld_llvm_image_soa_destroy(image);
3137    gallivm_verify_function(gallivm, variant_func);
3138    gallivm_verify_function(gallivm, variant_coro);
3139 }
3140 
3141 
3142 struct draw_tcs_llvm_variant *
draw_tcs_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_tcs_llvm_variant_key * key)3143 draw_tcs_llvm_create_variant(struct draw_llvm *llvm,
3144                              unsigned num_outputs,
3145                              const struct draw_tcs_llvm_variant_key *key)
3146 {
3147    struct draw_tcs_llvm_variant *variant;
3148    struct llvm_tess_ctrl_shader *shader = llvm_tess_ctrl_shader(llvm->draw->tcs.tess_ctrl_shader);
3149    char module_name[64];
3150    unsigned char ir_sha1_cache_key[20];
3151    struct lp_cached_code cached = { 0 };
3152    bool needs_caching = false;
3153 
3154    variant = MALLOC(sizeof *variant +
3155                     shader->variant_key_size - sizeof variant->key);
3156    if (!variant)
3157       return NULL;
3158 
3159    variant->llvm = llvm;
3160    variant->shader = shader;
3161 
3162    snprintf(module_name, sizeof(module_name), "draw_llvm_tcs_variant%u",
3163             variant->shader->variants_cached);
3164 
3165    memcpy(&variant->key, key, shader->variant_key_size);
3166 
3167    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
3168       draw_get_ir_cache_key(shader->base.state.ir.nir,
3169                             key,
3170                             shader->variant_key_size,
3171                             num_outputs,
3172                             ir_sha1_cache_key);
3173 
3174       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
3175                                          &cached,
3176                                          ir_sha1_cache_key);
3177       if (!cached.data_size)
3178          needs_caching = true;
3179    }
3180 
3181    variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
3182 
3183    create_tcs_jit_types(variant);
3184 
3185    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3186       nir_print_shader(llvm->draw->tcs.tess_ctrl_shader->state.ir.nir, stderr);
3187       draw_tcs_llvm_dump_variant_key(&variant->key);
3188    }
3189 
3190    draw_tcs_llvm_generate(llvm, variant);
3191 
3192    gallivm_compile_module(variant->gallivm);
3193 
3194    variant->jit_func = (draw_tcs_jit_func)
3195       gallivm_jit_function(variant->gallivm, variant->function);
3196 
3197    if (needs_caching)
3198       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
3199                                            &cached,
3200                                            ir_sha1_cache_key);
3201    gallivm_free_ir(variant->gallivm);
3202 
3203    variant->list_item_global.base = variant;
3204    variant->list_item_local.base = variant;
3205    /*variant->no = */shader->variants_created++;
3206    variant->list_item_global.base = variant;
3207 
3208    return variant;
3209 }
3210 
3211 
3212 void
draw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant * variant)3213 draw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant *variant)
3214 {
3215    struct draw_llvm *llvm = variant->llvm;
3216 
3217    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3218       debug_printf("Deleting TCS variant: %u tcs variants,\t%u total variants\n",
3219                     variant->shader->variants_cached, llvm->nr_tcs_variants);
3220    }
3221 
3222    gallivm_destroy(variant->gallivm);
3223 
3224    list_del(&variant->list_item_local.list);
3225    variant->shader->variants_cached--;
3226    list_del(&variant->list_item_global.list);
3227    llvm->nr_tcs_variants--;
3228    FREE(variant);
3229 }
3230 
3231 
3232 struct draw_tcs_llvm_variant_key *
draw_tcs_llvm_make_variant_key(struct draw_llvm * llvm,char * store)3233 draw_tcs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
3234 {
3235    unsigned i;
3236    struct draw_tcs_llvm_variant_key *key;
3237    struct lp_sampler_static_state *draw_sampler;
3238    struct lp_image_static_state *draw_image;
3239 
3240    key = (struct draw_tcs_llvm_variant_key *)store;
3241 
3242    memset(key, 0, offsetof(struct draw_tcs_llvm_variant_key, samplers[0]));
3243 
3244    /* All variants of this shader will have the same value for
3245     * nr_samplers.  Not yet trying to compact away holes in the
3246     * sampler array.
3247     */
3248    key->nr_samplers = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
3249    if (llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
3250       key->nr_sampler_views =
3251          llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
3252    } else {
3253       key->nr_sampler_views = key->nr_samplers;
3254    }
3255 
3256    key->nr_images = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
3257 
3258    draw_sampler = key->samplers;
3259 
3260    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
3261 
3262    for (i = 0 ; i < key->nr_samplers; i++) {
3263       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
3264                                       llvm->draw->samplers[PIPE_SHADER_TESS_CTRL][i]);
3265    }
3266    for (i = 0 ; i < key->nr_sampler_views; i++) {
3267       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
3268                                       llvm->draw->sampler_views[PIPE_SHADER_TESS_CTRL][i]);
3269    }
3270 
3271    draw_image = draw_tcs_llvm_variant_key_images(key);
3272    memset(draw_image, 0,
3273           key->nr_images * sizeof *draw_image);
3274    for (i = 0; i < key->nr_images; i++) {
3275       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
3276                                             llvm->draw->images[PIPE_SHADER_TESS_CTRL][i]);
3277    }
3278    return key;
3279 }
3280 
3281 
3282 void
draw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key * key)3283 draw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key *key)
3284 {
3285    struct lp_sampler_static_state *sampler = key->samplers;
3286    struct lp_image_static_state *image = draw_tcs_llvm_variant_key_images(key);
3287    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
3288       debug_printf("sampler[%i].src_format = %s\n", i,
3289                    util_format_name(sampler[i].texture_state.format));
3290    }
3291 
3292    for (unsigned i = 0 ; i < key->nr_images; i++)
3293       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
3294 }
3295 
3296 
3297 static void
create_tes_jit_types(struct draw_tes_llvm_variant * var)3298 create_tes_jit_types(struct draw_tes_llvm_variant *var)
3299 {
3300    struct gallivm_state *gallivm = var->gallivm;
3301 
3302    var->resources_type = lp_build_jit_resources_type(gallivm);
3303    var->resources_ptr_type = LLVMPointerType(var->resources_type, 0);
3304    var->input_array_deref_type = create_tes_jit_input_deref_type(gallivm);
3305    var->input_array_type = LLVMPointerType(var->input_array_deref_type, 0); /* num vertices per prim */
3306 }
3307 
3308 
3309 static LLVMTypeRef
get_tes_resources_ptr_type(struct draw_tes_llvm_variant * variant)3310 get_tes_resources_ptr_type(struct draw_tes_llvm_variant *variant)
3311 {
3312    if (!variant->resources_ptr_type)
3313       create_tes_jit_types(variant);
3314    return variant->resources_ptr_type;
3315 }
3316 
3317 
3318 static LLVMValueRef
generate_tes_mask_value(struct draw_tes_llvm_variant * variant,struct lp_type tes_type,LLVMValueRef limit,LLVMValueRef loop_counter)3319 generate_tes_mask_value(struct draw_tes_llvm_variant *variant,
3320                         struct lp_type tes_type, LLVMValueRef limit, LLVMValueRef loop_counter)
3321 {
3322    struct gallivm_state *gallivm = variant->gallivm;
3323    LLVMBuilderRef builder = gallivm->builder;
3324    struct lp_type mask_type = lp_int_type(tes_type);
3325    LLVMValueRef num_prims;
3326    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
3327    unsigned i;
3328 
3329    num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);
3330    for (i = 0; i < tes_type.length; i++) {
3331       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3332       mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");
3333    }
3334    mask_val = lp_build_compare(gallivm, mask_type,
3335                                PIPE_FUNC_GREATER, num_prims, mask_val);
3336 
3337    return mask_val;
3338 }
3339 
3340 
3341 static LLVMValueRef
draw_tes_llvm_fetch_vertex_input(const struct lp_build_tes_iface * tes_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index)3342 draw_tes_llvm_fetch_vertex_input(const struct lp_build_tes_iface *tes_iface,
3343                                  struct lp_build_context *bld,
3344                                  bool is_vindex_indirect,
3345                                  LLVMValueRef vertex_index,
3346                                  bool is_aindex_indirect,
3347                                  LLVMValueRef attrib_index,
3348                                  bool is_sindex_indirect,
3349                                  LLVMValueRef swizzle_index)
3350 {
3351    const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);
3352    struct gallivm_state *gallivm = bld->gallivm;
3353    LLVMBuilderRef builder = gallivm->builder;
3354    LLVMValueRef indices[3];
3355    LLVMValueRef res;
3356    struct lp_type type = bld->type;
3357 
3358    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
3359       res = bld->zero;
3360 
3361       for (int i = 0; i < type.length; ++i) {
3362          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3363          LLVMValueRef vert_chan_index = vertex_index;
3364          LLVMValueRef attr_chan_index = attrib_index;
3365          LLVMValueRef swiz_chan_index = swizzle_index;
3366          LLVMValueRef channel_vec;
3367 
3368          if (is_vindex_indirect) {
3369             vert_chan_index = LLVMBuildExtractElement(builder,
3370                                                       vertex_index, idx, "");
3371          }
3372          if (is_aindex_indirect) {
3373             attr_chan_index = LLVMBuildExtractElement(builder,
3374                                                       attrib_index, idx, "");
3375          }
3376          if (is_sindex_indirect) {
3377             swiz_chan_index = LLVMBuildExtractElement(builder,
3378                                                       swizzle_index, idx, "");
3379          }
3380 
3381          indices[0] = vert_chan_index;
3382          indices[1] = attr_chan_index;
3383          indices[2] = swiz_chan_index;
3384 
3385          channel_vec = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3386          channel_vec = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), channel_vec, "");
3387 
3388          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3389       }
3390    } else {
3391       indices[0] = vertex_index;
3392       indices[1] = attrib_index;
3393       indices[2] = swizzle_index;
3394 
3395       res = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3396       res = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), res, "");
3397       res = lp_build_broadcast_scalar(bld, res);
3398    }
3399    return res;
3400 }
3401 
3402 
3403 static LLVMValueRef
draw_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface * tes_iface,struct lp_build_context * bld,bool is_aindex_indirect,LLVMValueRef attrib_index,LLVMValueRef swizzle_index)3404 draw_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
3405                                 struct lp_build_context *bld,
3406                                 bool is_aindex_indirect,
3407                                 LLVMValueRef attrib_index,
3408                                 LLVMValueRef swizzle_index)
3409 {
3410    const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);
3411    struct gallivm_state *gallivm = bld->gallivm;
3412    LLVMBuilderRef builder = gallivm->builder;
3413    LLVMValueRef indices[3];
3414    LLVMValueRef res;
3415    struct lp_type type = bld->type;
3416 
3417    if (is_aindex_indirect) {
3418       res = bld->zero;
3419 
3420       for (int i = 0; i < type.length; ++i) {
3421          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3422          LLVMValueRef attr_chan_index = attrib_index;
3423          LLVMValueRef channel_vec;
3424 
3425          if (is_aindex_indirect) {
3426             attr_chan_index = LLVMBuildExtractElement(builder,
3427                                                       attrib_index, idx, "");
3428          }
3429 
3430          indices[0] = lp_build_const_int32(gallivm, 0);
3431          indices[1] = attr_chan_index;
3432          indices[2] = swizzle_index;
3433 
3434          channel_vec = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3435          channel_vec = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), channel_vec, "");
3436 
3437          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3438       }
3439    } else {
3440       indices[0] = lp_build_const_int32(gallivm, 0);
3441       indices[1] = attrib_index;
3442       indices[2] = swizzle_index;
3443 
3444       res = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3445       res = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), res, "");
3446       res = lp_build_broadcast_scalar(bld, res);
3447    }
3448    return res;
3449 }
3450 
3451 
3452 static void
draw_tes_llvm_generate(struct draw_llvm * llvm,struct draw_tes_llvm_variant * variant)3453 draw_tes_llvm_generate(struct draw_llvm *llvm,
3454                        struct draw_tes_llvm_variant *variant)
3455 {
3456    struct gallivm_state *gallivm = variant->gallivm;
3457    LLVMContextRef context = gallivm->context;
3458    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
3459    LLVMTypeRef flt_type = LLVMFloatTypeInContext(context);
3460    LLVMTypeRef arg_types[11];
3461    LLVMTypeRef func_type;
3462    LLVMValueRef variant_func;
3463    LLVMValueRef resources_ptr;
3464    LLVMValueRef tess_coord[2], io_ptr, input_array, num_tess_coord;
3465    LLVMValueRef view_index;
3466    LLVMValueRef tess_inner, tess_outer, prim_id, patch_vertices_in;
3467    LLVMBasicBlockRef block;
3468    LLVMBuilderRef builder;
3469    LLVMValueRef mask_val;
3470    struct lp_build_context bld, bldvec;
3471    struct lp_build_sampler_soa *sampler = 0;
3472    struct lp_build_image_soa *image = NULL;
3473    struct lp_bld_tgsi_system_values system_values;
3474    char func_name[64];
3475    unsigned i;
3476    struct draw_tes_llvm_iface tes_iface;
3477    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
3478    struct lp_build_mask_context mask;
3479    LLVMValueRef consts_ptr;
3480    LLVMValueRef ssbos_ptr;
3481    LLVMValueRef step;
3482    struct lp_type tes_type;
3483    unsigned vector_length = variant->shader->base.vector_length;
3484    int primid_slot = -1;
3485 
3486    memset(&system_values, 0, sizeof(system_values));
3487    memset(&outputs, 0, sizeof(outputs));
3488 
3489    snprintf(func_name, sizeof(func_name), "draw_llvm_tes_variant");
3490 
3491    LLVMTypeRef tess_outer_deref_type = LLVMArrayType(flt_type, 4);
3492    LLVMTypeRef tess_inner_deref_type = LLVMArrayType(flt_type, 2);
3493 
3494    arg_types[0] = get_tes_resources_ptr_type(variant);    /* context */
3495    arg_types[1] = variant->input_array_type;           /* input */
3496    arg_types[2] = variant->vertex_header_ptr_type;
3497    arg_types[3] = int32_type;
3498    arg_types[4] = int32_type;
3499    arg_types[5] = LLVMPointerType(flt_type, 0);
3500    arg_types[6] = LLVMPointerType(flt_type, 0);
3501    arg_types[7] = LLVMPointerType(tess_outer_deref_type, 0);
3502    arg_types[8] = LLVMPointerType(tess_inner_deref_type, 0);
3503    arg_types[9] = int32_type;
3504    arg_types[10] = int32_type;
3505 
3506    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
3507    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
3508 
3509    variant->function = variant_func;
3510    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
3511 
3512    for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
3513       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
3514          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
3515 
3516    if (gallivm->cache && gallivm->cache->data_size)
3517       return;
3518    resources_ptr               = LLVMGetParam(variant_func, 0);
3519    input_array               = LLVMGetParam(variant_func, 1);
3520    io_ptr                    = LLVMGetParam(variant_func, 2);
3521    prim_id                   = LLVMGetParam(variant_func, 3);
3522    num_tess_coord            = LLVMGetParam(variant_func, 4);
3523    tess_coord[0]             = LLVMGetParam(variant_func, 5);
3524    tess_coord[1]             = LLVMGetParam(variant_func, 6);
3525    tess_outer                = LLVMGetParam(variant_func, 7);
3526    tess_inner                = LLVMGetParam(variant_func, 8);
3527    patch_vertices_in         = LLVMGetParam(variant_func, 9);
3528    view_index                = LLVMGetParam(variant_func, 10);
3529 
3530    lp_build_name(resources_ptr, "resources");
3531    lp_build_name(input_array, "input");
3532    lp_build_name(io_ptr, "io");
3533    lp_build_name(prim_id, "prim_id");
3534    lp_build_name(num_tess_coord, "num_tess_coord");
3535    lp_build_name(tess_coord[0], "tess_coord[0]");
3536    lp_build_name(tess_coord[1], "tess_coord[1]");
3537    lp_build_name(tess_outer, "tess_outer");
3538    lp_build_name(tess_inner, "tess_inner");
3539    lp_build_name(patch_vertices_in, "patch_vertices_in");
3540    lp_build_name(view_index, "view_index");
3541 
3542    tes_iface.base.fetch_vertex_input = draw_tes_llvm_fetch_vertex_input;
3543    tes_iface.base.fetch_patch_input = draw_tes_llvm_fetch_patch_input;
3544    tes_iface.input = input_array;
3545    tes_iface.variant = variant;
3546 
3547    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
3548    builder = gallivm->builder;
3549    LLVMPositionBuilderAtEnd(builder, block);
3550 
3551    lp_build_context_init(&bld, gallivm, lp_type_int(32));
3552 
3553    memset(&tes_type, 0, sizeof tes_type);
3554    tes_type.floating = true; /* floating point values */
3555    tes_type.sign = true;     /* values are signed */
3556    tes_type.norm = false;    /* values are not limited to [0,1] or [-1,1] */
3557    tes_type.width = 32;      /* 32-bit float */
3558    tes_type.length = vector_length;
3559 
3560    lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tes_type));
3561    consts_ptr = lp_jit_resources_constants(gallivm, variant->resources_type, resources_ptr);
3562 
3563    ssbos_ptr = lp_jit_resources_ssbos(gallivm, variant->resources_type, resources_ptr);
3564 
3565    sampler = lp_bld_llvm_sampler_soa_create(variant->key.samplers,
3566                                           MAX2(variant->key.nr_samplers,
3567                                                variant->key.nr_sampler_views));
3568    image = lp_bld_llvm_image_soa_create(draw_tes_llvm_variant_key_images(&variant->key),
3569                                       variant->key.nr_images);
3570    step = lp_build_const_int32(gallivm, vector_length);
3571 
3572    system_values.tess_outer = LLVMBuildLoad2(builder, tess_outer_deref_type, tess_outer, "");
3573    system_values.tess_inner = LLVMBuildLoad2(builder, tess_inner_deref_type, tess_inner, "");
3574 
3575    system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);
3576 
3577    system_values.view_index = view_index;
3578 
3579    system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);
3580 
3581    if (variant->key.primid_needed) {
3582       int slot = variant->key.primid_output;
3583       for (unsigned i = 0; i < 4; i++) {
3584          outputs[slot][i] = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, tes_type), "primid");
3585          LLVMBuildStore(builder, system_values.prim_id, outputs[slot][i]);
3586       }
3587       primid_slot = slot;
3588    }
3589    struct lp_build_loop_state lp_loop;
3590    lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
3591    {
3592       LLVMValueRef io;
3593 
3594       io = LLVMBuildGEP2(builder, variant->vertex_header_type, io_ptr, &lp_loop.counter, 1, "");
3595       mask_val = generate_tes_mask_value(variant, tes_type, num_tess_coord, lp_loop.counter);
3596       lp_build_mask_begin(&mask, gallivm, tes_type, mask_val);
3597 
3598       system_values.tess_coord = LLVMGetUndef(LLVMArrayType(LLVMVectorType(flt_type, vector_length), 3));
3599       for (i = 0; i < 3; i++) {
3600          LLVMValueRef tess_coord_chan = LLVMGetUndef(LLVMVectorType(flt_type, vector_length));
3601          for (unsigned j = 0; j < vector_length; j++) {
3602             LLVMValueRef idx = LLVMBuildAdd(builder, lp_loop.counter, lp_build_const_int32(gallivm, j), "");
3603             LLVMValueRef tc_val;
3604             if (i == 2) {
3605                if (variant->shader->base.prim_mode == MESA_PRIM_TRIANGLES) {
3606                   tc_val = lp_build_const_float(gallivm, 1.0);
3607                   tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get2(builder, flt_type, tess_coord[0], idx), "");
3608                   tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get2(builder, flt_type, tess_coord[1], idx), "");
3609                } else
3610                   tc_val = lp_build_const_float(gallivm, 0.0);
3611             } else
3612                tc_val = lp_build_pointer_get2(builder, flt_type, tess_coord[i], idx);
3613 
3614             tess_coord_chan = LLVMBuildInsertElement(builder, tess_coord_chan, tc_val, lp_build_const_int32(gallivm, j), "");
3615          }
3616          system_values.tess_coord = LLVMBuildInsertValue(builder, system_values.tess_coord, tess_coord_chan, i, "");
3617       }
3618 
3619       struct lp_build_tgsi_params params;
3620       memset(&params, 0, sizeof(params));
3621 
3622       params.type = tes_type;
3623       params.mask = &mask;
3624       params.consts_ptr = consts_ptr;
3625       params.system_values = &system_values;
3626       params.resources_type = variant->resources_type;
3627       params.resources_ptr = resources_ptr;
3628       params.sampler = sampler;
3629       params.info = &llvm->draw->tes.tess_eval_shader->info;
3630       params.ssbo_ptr = ssbos_ptr;
3631       params.image = image;
3632       params.tes_iface = &tes_iface.base;
3633       params.aniso_filter_table = lp_jit_resources_aniso_filter_table(gallivm, variant->resources_type, resources_ptr);
3634 
3635       lp_build_nir_soa(variant->gallivm,
3636                        llvm->draw->tes.tess_eval_shader->state.ir.nir,
3637                        &params,
3638                        outputs);
3639 
3640       lp_build_mask_end(&mask);
3641 
3642       if (variant->key.clamp_vertex_color) {
3643          const struct tgsi_shader_info *info = &llvm->draw->tes.tess_eval_shader->info;
3644          do_clamp_vertex_color(variant->gallivm,
3645                                tes_type, info,
3646                                outputs);
3647       }
3648       LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
3649                                                      lp_int_type(tes_type), 0);
3650 
3651       convert_to_aos(gallivm, variant->vertex_header_type, io, NULL, outputs, clipmask,
3652                      draw_total_tes_outputs(llvm->draw), tes_type, primid_slot, false);
3653    }
3654    lp_build_loop_end_cond(&lp_loop, num_tess_coord, step, LLVMIntUGE);
3655    lp_bld_llvm_sampler_soa_destroy(sampler);
3656    lp_bld_llvm_image_soa_destroy(image);
3657 
3658    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
3659    gallivm_verify_function(gallivm, variant_func);
3660 }
3661 
3662 
3663 struct draw_tes_llvm_variant *
draw_tes_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_tes_llvm_variant_key * key)3664 draw_tes_llvm_create_variant(struct draw_llvm *llvm,
3665                              unsigned num_outputs,
3666                              const struct draw_tes_llvm_variant_key *key)
3667 {
3668    struct draw_tes_llvm_variant *variant;
3669    struct llvm_tess_eval_shader *shader = llvm_tess_eval_shader(llvm->draw->tes.tess_eval_shader);
3670    char module_name[64];
3671    unsigned char ir_sha1_cache_key[20];
3672    struct lp_cached_code cached = { 0 };
3673    bool needs_caching = false;
3674 
3675    variant = MALLOC(sizeof *variant +
3676                     shader->variant_key_size - sizeof variant->key);
3677    if (!variant)
3678       return NULL;
3679 
3680    variant->llvm = llvm;
3681    variant->shader = shader;
3682 
3683    snprintf(module_name, sizeof(module_name), "draw_llvm_tes_variant%u",
3684             variant->shader->variants_cached);
3685 
3686    memcpy(&variant->key, key, shader->variant_key_size);
3687    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
3688       draw_get_ir_cache_key(shader->base.state.ir.nir,
3689                             key,
3690                             shader->variant_key_size,
3691                             num_outputs,
3692                             ir_sha1_cache_key);
3693 
3694       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
3695                                          &cached,
3696                                          ir_sha1_cache_key);
3697       if (!cached.data_size)
3698          needs_caching = true;
3699    }
3700    variant->gallivm = gallivm_create(module_name, llvm->context, &cached);
3701 
3702    create_tes_jit_types(variant);
3703 
3704    variant->vertex_header_type = lp_build_create_jit_vertex_header_type(variant->gallivm, num_outputs);
3705    variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
3706 
3707    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3708       nir_print_shader(llvm->draw->tes.tess_eval_shader->state.ir.nir, stderr);
3709       draw_tes_llvm_dump_variant_key(&variant->key);
3710    }
3711 
3712    draw_tes_llvm_generate(llvm, variant);
3713 
3714    gallivm_compile_module(variant->gallivm);
3715 
3716    variant->jit_func = (draw_tes_jit_func)
3717       gallivm_jit_function(variant->gallivm, variant->function);
3718 
3719    if (needs_caching)
3720       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
3721                                            &cached,
3722                                            ir_sha1_cache_key);
3723    gallivm_free_ir(variant->gallivm);
3724 
3725    variant->list_item_global.base = variant;
3726    variant->list_item_local.base = variant;
3727    /*variant->no = */shader->variants_created++;
3728    variant->list_item_global.base = variant;
3729 
3730    return variant;
3731 }
3732 
3733 
3734 void
draw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant * variant)3735 draw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant *variant)
3736 {
3737    struct draw_llvm *llvm = variant->llvm;
3738 
3739    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3740       debug_printf("Deleting TES variant: %u tes variants,\t%u total variants\n",
3741                     variant->shader->variants_cached, llvm->nr_tes_variants);
3742    }
3743 
3744    gallivm_destroy(variant->gallivm);
3745 
3746    list_del(&variant->list_item_local.list);
3747    variant->shader->variants_cached--;
3748    list_del(&variant->list_item_global.list);
3749    llvm->nr_tes_variants--;
3750    FREE(variant);
3751 }
3752 
3753 
3754 struct draw_tes_llvm_variant_key *
draw_tes_llvm_make_variant_key(struct draw_llvm * llvm,char * store)3755 draw_tes_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
3756 {
3757    struct draw_tes_llvm_variant_key *key;
3758    struct lp_sampler_static_state *draw_sampler;
3759    struct lp_image_static_state *draw_image;
3760 
3761    key = (struct draw_tes_llvm_variant_key *)store;
3762 
3763    memset(key, 0, offsetof(struct draw_tes_llvm_variant_key, samplers[0]));
3764 
3765    int primid_output = draw_find_shader_output(llvm->draw, TGSI_SEMANTIC_PRIMID, 0);
3766    if (primid_output >= 0) {
3767       key->primid_output = primid_output;
3768       key->primid_needed = true;
3769    }
3770 
3771    key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color &&
3772       llvm->draw->gs.geometry_shader == NULL;
3773 
3774    /* All variants of this shader will have the same value for
3775     * nr_samplers.  Not yet trying to compact away holes in the
3776     * sampler array.
3777     */
3778    key->nr_samplers = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
3779    if (llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
3780       key->nr_sampler_views =
3781          llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
3782    } else {
3783       key->nr_sampler_views = key->nr_samplers;
3784    }
3785 
3786    key->nr_images = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
3787 
3788    draw_sampler = key->samplers;
3789 
3790    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
3791 
3792    for (unsigned i = 0 ; i < key->nr_samplers; i++) {
3793       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
3794                                       llvm->draw->samplers[PIPE_SHADER_TESS_EVAL][i]);
3795    }
3796    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
3797       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
3798                                       llvm->draw->sampler_views[PIPE_SHADER_TESS_EVAL][i]);
3799    }
3800 
3801    draw_image = draw_tes_llvm_variant_key_images(key);
3802    memset(draw_image, 0,
3803           key->nr_images * sizeof *draw_image);
3804    for (unsigned i = 0; i < key->nr_images; i++) {
3805       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
3806                                             llvm->draw->images[PIPE_SHADER_TESS_EVAL][i]);
3807    }
3808    return key;
3809 }
3810 
3811 
3812 void
draw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key * key)3813 draw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key *key)
3814 {
3815    struct lp_sampler_static_state *sampler = key->samplers;
3816    struct lp_image_static_state *image = draw_tes_llvm_variant_key_images(key);
3817 
3818    if (key->primid_needed)
3819       debug_printf("prim id output %d\n", key->primid_output);
3820    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
3821    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
3822       debug_printf("sampler[%i].src_format = %s\n", i,
3823                    util_format_name(sampler[i].texture_state.format));
3824    }
3825 
3826    for (unsigned i = 0 ; i < key->nr_images; i++)
3827       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
3828 
3829 }
3830