• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include "draw_llvm.h"
29 
30 #include "draw_context.h"
31 #include "draw_vs.h"
32 #include "draw_gs.h"
33 
34 #include "gallivm/lp_bld_arit.h"
35 #include "gallivm/lp_bld_arit_overflow.h"
36 #include "gallivm/lp_bld_bitarit.h"
37 #include "gallivm/lp_bld_gather.h"
38 #include "gallivm/lp_bld_logic.h"
39 #include "gallivm/lp_bld_const.h"
40 #include "gallivm/lp_bld_coro.h"
41 #include "gallivm/lp_bld_swizzle.h"
42 #include "gallivm/lp_bld_struct.h"
43 #include "gallivm/lp_bld_type.h"
44 #include "gallivm/lp_bld_flow.h"
45 #include "gallivm/lp_bld_debug.h"
46 #include "gallivm/lp_bld_tgsi.h"
47 #include "gallivm/lp_bld_nir.h"
48 #include "gallivm/lp_bld_printf.h"
49 #include "gallivm/lp_bld_intr.h"
50 #include "gallivm/lp_bld_init.h"
51 #include "gallivm/lp_bld_type.h"
52 #include "gallivm/lp_bld_pack.h"
53 #include "gallivm/lp_bld_format.h"
54 #include "gallivm/lp_bld_misc.h"
55 #include "gallivm/lp_bld_jit_sample.h"
56 #include "tgsi/tgsi_exec.h"
57 #include "tgsi/tgsi_dump.h"
58 
59 #include "util/u_math.h"
60 #include "util/u_pointer.h"
61 #include "util/u_string.h"
62 #include "nir_serialize.h"
63 #include "util/mesa-sha1.h"
64 #define DEBUG_STORE 0
65 
66 
67 static void
68 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
69 
70 
71 struct draw_gs_llvm_iface {
72    struct lp_build_gs_iface base;
73 
74    struct draw_gs_llvm_variant *variant;
75    LLVMValueRef input;
76 };
77 
78 
79 static inline const struct draw_gs_llvm_iface *
draw_gs_llvm_iface(const struct lp_build_gs_iface * iface)80 draw_gs_llvm_iface(const struct lp_build_gs_iface *iface)
81 {
82    return (const struct draw_gs_llvm_iface *)iface;
83 }
84 
85 
86 struct draw_tcs_llvm_iface {
87    struct lp_build_tcs_iface base;
88 
89    struct draw_tcs_llvm_variant *variant;
90    LLVMValueRef input;
91    LLVMValueRef output;
92 };
93 
94 
95 static inline const struct draw_tcs_llvm_iface *
draw_tcs_llvm_iface(const struct lp_build_tcs_iface * iface)96 draw_tcs_llvm_iface(const struct lp_build_tcs_iface *iface)
97 {
98    return (const struct draw_tcs_llvm_iface *)iface;
99 }
100 
101 
102 struct draw_tes_llvm_iface {
103    struct lp_build_tes_iface base;
104 
105    struct draw_tes_llvm_variant *variant;
106    LLVMValueRef input;
107 };
108 
109 
110 static inline const struct draw_tes_llvm_iface *
draw_tes_llvm_iface(const struct lp_build_tes_iface * iface)111 draw_tes_llvm_iface(const struct lp_build_tes_iface *iface)
112 {
113    return (const struct draw_tes_llvm_iface *)iface;
114 }
115 
116 
117 /**
118  * Create LLVM type for draw_vertex_buffer.
119  */
120 static LLVMTypeRef
create_jit_dvbuffer_type(struct gallivm_state * gallivm,const char * struct_name)121 create_jit_dvbuffer_type(struct gallivm_state *gallivm,
122                          const char *struct_name)
123 {
124    LLVMTargetDataRef target = gallivm->target;
125    LLVMTypeRef dvbuffer_type;
126    LLVMTypeRef elem_types[DRAW_JIT_DVBUFFER_NUM_FIELDS];
127    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
128 
129    elem_types[DRAW_JIT_DVBUFFER_MAP] =
130       LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
131    elem_types[DRAW_JIT_DVBUFFER_SIZE] = int32_type;
132 
133    dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types,
134                                            ARRAY_SIZE(elem_types), 0);
135 
136    (void) target; /* silence unused var warning for non-debug build */
137    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map,
138                           target, dvbuffer_type,
139                           DRAW_JIT_DVBUFFER_MAP);
140    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, size,
141                           target, dvbuffer_type,
142                           DRAW_JIT_DVBUFFER_SIZE);
143 
144    return dvbuffer_type;
145 }
146 
147 /**
148  * Create LLVM type for struct draw_jit_context
149  */
150 static LLVMTypeRef
create_vs_jit_context_type(struct gallivm_state * gallivm,const char * struct_name)151 create_vs_jit_context_type(struct gallivm_state *gallivm, const char *struct_name)
152 {
153    LLVMTargetDataRef target = gallivm->target;
154    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
155    LLVMTypeRef elem_types[DRAW_VS_JIT_CTX_NUM_FIELDS];
156 
157    elem_types[DRAW_VS_JIT_CTX_PLANES] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), DRAW_TOTAL_CLIP_PLANES), 0);
158    elem_types[DRAW_VS_JIT_CTX_VIEWPORT] = LLVMPointerType(float_type, 0);
159 
160    LLVMTypeRef context_type = LLVMStructTypeInContext(gallivm->context, elem_types, ARRAY_SIZE(elem_types), 0);
161 
162    (void) target; /* silence unused var warning for non-debug build */
163    LP_CHECK_MEMBER_OFFSET(struct draw_vs_jit_context, planes,
164                           target, context_type, DRAW_VS_JIT_CTX_PLANES);
165    LP_CHECK_MEMBER_OFFSET(struct draw_vs_jit_context, viewports,
166                           target, context_type, DRAW_VS_JIT_CTX_VIEWPORT);
167    LP_CHECK_STRUCT_SIZE(struct draw_vs_jit_context,
168                         target, context_type);
169 
170    return context_type;
171 }
172 
173 
174 /**
175  * Create LLVM type for struct draw_gs_jit_context
176  */
177 static LLVMTypeRef
create_gs_jit_context_type(struct gallivm_state * gallivm,unsigned vector_length,const char * struct_name)178 create_gs_jit_context_type(struct gallivm_state *gallivm,
179                            unsigned vector_length,
180                            const char *struct_name)
181 {
182    LLVMTargetDataRef target = gallivm->target;
183    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
184    LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
185    LLVMTypeRef elem_types[DRAW_GS_JIT_CTX_NUM_FIELDS];
186    LLVMTypeRef context_type;
187 
188    elem_types[DRAW_GS_JIT_CTX_PLANES] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
189                                                                       DRAW_TOTAL_CLIP_PLANES), 0);
190    elem_types[DRAW_GS_JIT_CTX_VIEWPORT] = LLVMPointerType(float_type, 0); /* viewports */
191 
192 
193    elem_types[DRAW_GS_JIT_CTX_PRIM_LENGTHS] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
194    elem_types[DRAW_GS_JIT_CTX_EMITTED_VERTICES] = LLVMPointerType(LLVMVectorType(int_type,
195                                                                                  vector_length), 0);
196    elem_types[DRAW_GS_JIT_CTX_EMITTED_PRIMS] = LLVMPointerType(LLVMVectorType(int_type,
197                                                                               vector_length), 0);
198 
199    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
200                                           ARRAY_SIZE(elem_types), 0);
201 
202    (void) target; /* silence unused var warning for non-debug build */
203    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes,
204                           target, context_type, DRAW_GS_JIT_CTX_PLANES);
205    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewports,
206                           target, context_type, DRAW_GS_JIT_CTX_VIEWPORT);
207    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
208                           target, context_type,
209                           DRAW_GS_JIT_CTX_PRIM_LENGTHS);
210    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
211                           target, context_type,
212                           DRAW_GS_JIT_CTX_EMITTED_VERTICES);
213    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
214                           target, context_type,
215                           DRAW_GS_JIT_CTX_EMITTED_PRIMS);
216    LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,
217                         target, context_type);
218    return context_type;
219 }
220 
221 
222 static LLVMTypeRef
create_gs_jit_input_type_deref(struct gallivm_state * gallivm)223 create_gs_jit_input_type_deref(struct gallivm_state *gallivm)
224 {
225    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
226    LLVMTypeRef input_array;
227 
228    input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */
229    input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */
230    input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
231    return input_array;
232 }
233 
234 
235 static LLVMTypeRef
create_gs_jit_input_type(struct gallivm_state * gallivm)236 create_gs_jit_input_type(struct gallivm_state *gallivm)
237 {
238    return LLVMPointerType(create_gs_jit_input_type_deref(gallivm), 0); /* num vertices per prim */
239 }
240 
241 
242 /**
243  * Create LLVM type for struct pipe_vertex_buffer
244  */
245 static LLVMTypeRef
create_jit_vertex_buffer_type(struct gallivm_state * gallivm,const char * struct_name)246 create_jit_vertex_buffer_type(struct gallivm_state *gallivm,
247                               const char *struct_name)
248 {
249    LLVMTargetDataRef target = gallivm->target;
250    LLVMTypeRef elem_types[3];
251    LLVMTypeRef vb_type;
252 
253    elem_types[0] = LLVMInt8TypeInContext(gallivm->context);
254    elem_types[1] = LLVMInt32TypeInContext(gallivm->context);
255    elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
256 
257    vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
258                                      ARRAY_SIZE(elem_types), 0);
259 
260    (void) target; /* silence unused var warning for non-debug build */
261    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, is_user_buffer,
262                           target, vb_type, 0);
263    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
264                           target, vb_type, 1);
265    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer.resource,
266                           target, vb_type, 2);
267 
268    LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
269 
270    return vb_type;
271 }
272 
273 
274 static LLVMTypeRef
create_tcs_jit_input_type_deref(struct gallivm_state * gallivm)275 create_tcs_jit_input_type_deref(struct gallivm_state *gallivm)
276 {
277    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
278    LLVMTypeRef input_array;
279 
280    input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
281    input_array = LLVMArrayType(input_array, NUM_TCS_INPUTS); /* num attrs per vertex */
282    return input_array;
283 }
284 
285 
286 static LLVMTypeRef
create_tcs_jit_input_type(struct gallivm_state * gallivm)287 create_tcs_jit_input_type(struct gallivm_state *gallivm)
288 {
289    return LLVMPointerType(create_tcs_jit_input_type_deref(gallivm), 0); /* num vertices per prim */
290 }
291 
292 
293 static LLVMTypeRef
create_tcs_jit_output_type_deref(struct gallivm_state * gallivm)294 create_tcs_jit_output_type_deref(struct gallivm_state *gallivm)
295 {
296    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
297    LLVMTypeRef output_array;
298 
299    output_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
300    output_array = LLVMArrayType(output_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
301    return output_array;
302 }
303 
304 
305 static LLVMTypeRef
create_tcs_jit_output_type(struct gallivm_state * gallivm)306 create_tcs_jit_output_type(struct gallivm_state *gallivm)
307 {
308    return LLVMPointerType(create_tcs_jit_output_type_deref(gallivm), 0); /* num vertices per prim */
309 }
310 
311 
312 static LLVMTypeRef
create_tes_jit_input_deref_type(struct gallivm_state * gallivm)313 create_tes_jit_input_deref_type(struct gallivm_state *gallivm)
314 {
315    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
316    LLVMTypeRef input_array;
317 
318    input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
319    input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
320 
321    return input_array;
322 }
323 
324 
325 /**
326  * Create LLVM types for various structures.
327  */
328 static void
create_vs_jit_types(struct draw_llvm_variant * variant)329 create_vs_jit_types(struct draw_llvm_variant *variant)
330 {
331    struct gallivm_state *gallivm = variant->gallivm;
332 
333    variant->context_type = create_vs_jit_context_type(gallivm, "draw_vs_jit_context");
334    variant->context_ptr_type = LLVMPointerType(variant->context_type, 0);
335 
336    variant->resources_type = lp_build_jit_resources_type(gallivm);
337    variant->resources_ptr_type = LLVMPointerType(variant->resources_type, 0);
338 
339    variant->buffer_type = create_jit_dvbuffer_type(gallivm, "draw_vertex_buffer");
340    variant->buffer_ptr_type = LLVMPointerType(variant->buffer_type, 0);
341 
342    variant->vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
343    variant->vb_ptr_type = LLVMPointerType(variant->vb_type, 0);
344 }
345 
346 
347 static LLVMTypeRef
get_context_ptr_type(struct draw_llvm_variant * variant)348 get_context_ptr_type(struct draw_llvm_variant *variant)
349 {
350    if (!variant->context_ptr_type)
351       create_vs_jit_types(variant);
352    return variant->context_ptr_type;
353 }
354 
355 
356 static LLVMTypeRef
get_buffer_ptr_type(struct draw_llvm_variant * variant)357 get_buffer_ptr_type(struct draw_llvm_variant *variant)
358 {
359    if (!variant->buffer_ptr_type)
360       create_vs_jit_types(variant);
361    return variant->buffer_ptr_type;
362 }
363 
364 
365 static LLVMTypeRef
get_vb_ptr_type(struct draw_llvm_variant * variant)366 get_vb_ptr_type(struct draw_llvm_variant *variant)
367 {
368    if (!variant->vb_ptr_type)
369       create_vs_jit_types(variant);
370    return variant->vb_ptr_type;
371 }
372 
373 static LLVMTypeRef
get_vertex_header_ptr_type(struct draw_llvm_variant * variant)374 get_vertex_header_ptr_type(struct draw_llvm_variant *variant)
375 {
376    assert(variant->vertex_header_ptr_type);
377    return variant->vertex_header_ptr_type;
378 }
379 
380 
381 /**
382  * Create per-context LLVM info.
383  */
384 struct draw_llvm *
draw_llvm_create(struct draw_context * draw,lp_context_ref * context)385 draw_llvm_create(struct draw_context *draw, lp_context_ref *context)
386 {
387    struct draw_llvm *llvm;
388 
389    if (!lp_build_init())
390       return NULL;
391 
392    llvm = CALLOC_STRUCT(draw_llvm);
393    if (!llvm)
394       return NULL;
395 
396    llvm->draw = draw;
397 
398    if (context) {
399       llvm->context = *context;
400       llvm->context.owned = false;
401    }
402    if (!llvm->context.ref) {
403       lp_context_create(&llvm->context);
404    }
405    if (!llvm->context.ref)
406       goto fail;
407 
408    llvm->nr_variants = 0;
409    list_inithead(&llvm->vs_variants_list.list);
410 
411    llvm->nr_gs_variants = 0;
412    list_inithead(&llvm->gs_variants_list.list);
413 
414    llvm->nr_tcs_variants = 0;
415    list_inithead(&llvm->tcs_variants_list.list);
416 
417    llvm->nr_tes_variants = 0;
418    list_inithead(&llvm->tes_variants_list.list);
419 
420    return llvm;
421 
422 fail:
423    draw_llvm_destroy(llvm);
424    return NULL;
425 }
426 
427 
428 /**
429  * Free per-context LLVM info.
430  */
431 void
draw_llvm_destroy(struct draw_llvm * llvm)432 draw_llvm_destroy(struct draw_llvm *llvm)
433 {
434    lp_context_destroy(&llvm->context);
435 
436    /* XXX free other draw_llvm data? */
437    FREE(llvm);
438 }
439 
440 
441 static void
draw_get_ir_cache_key(struct nir_shader * nir,const void * key,size_t key_size,uint32_t val_32bit,unsigned char ir_sha1_cache_key[20])442 draw_get_ir_cache_key(struct nir_shader *nir,
443                       const void *key, size_t key_size,
444                       uint32_t val_32bit,
445                       unsigned char ir_sha1_cache_key[20])
446 {
447    struct blob blob = { 0 };
448    unsigned ir_size;
449    void *ir_binary;
450 
451    blob_init(&blob);
452    nir_serialize(&blob, nir, true);
453    ir_binary = blob.data;
454    ir_size = blob.size;
455 
456    struct mesa_sha1 ctx;
457    _mesa_sha1_init(&ctx);
458    _mesa_sha1_update(&ctx, key, key_size);
459    _mesa_sha1_update(&ctx, ir_binary, ir_size);
460    _mesa_sha1_update(&ctx, &val_32bit, 4);
461    _mesa_sha1_final(&ctx, ir_sha1_cache_key);
462 
463    blob_finish(&blob);
464 }
465 
466 
467 /**
468  * Create LLVM-generated code for a vertex shader.
469  */
470 struct draw_llvm_variant *
draw_llvm_create_variant(struct draw_llvm * llvm,unsigned num_inputs,const struct draw_llvm_variant_key * key)471 draw_llvm_create_variant(struct draw_llvm *llvm,
472                          unsigned num_inputs,
473                          const struct draw_llvm_variant_key *key)
474 {
475    struct draw_llvm_variant *variant;
476    struct llvm_vertex_shader *shader =
477       llvm_vertex_shader(llvm->draw->vs.vertex_shader);
478    char module_name[64];
479    unsigned char ir_sha1_cache_key[20];
480    struct lp_cached_code cached = { 0 };
481    bool needs_caching = false;
482    variant = MALLOC(sizeof *variant +
483                     shader->variant_key_size -
484                     sizeof variant->key);
485    if (!variant)
486       return NULL;
487 
488    variant->llvm = llvm;
489    variant->shader = shader;
490    memcpy(&variant->key, key, shader->variant_key_size);
491 
492    snprintf(module_name, sizeof(module_name), "draw_llvm_vs_variant%u",
493             variant->shader->variants_cached);
494 
495    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
496       draw_get_ir_cache_key(shader->base.state.ir.nir,
497                             key,
498                             shader->variant_key_size,
499                             num_inputs,
500                             ir_sha1_cache_key);
501 
502       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
503                                          &cached,
504                                          ir_sha1_cache_key);
505       if (!cached.data_size)
506          needs_caching = true;
507    }
508    variant->gallivm = gallivm_create(module_name, &llvm->context, &cached);
509 
510    create_vs_jit_types(variant);
511 
512    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
513       if (llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_TGSI)
514          tgsi_dump(llvm->draw->vs.vertex_shader->state.tokens, 0);
515       else
516          nir_print_shader(llvm->draw->vs.vertex_shader->state.ir.nir, stderr);
517       draw_llvm_dump_variant_key(&variant->key);
518    }
519 
520    variant->vertex_header_type = lp_build_create_jit_vertex_header_type(variant->gallivm, num_inputs);
521    variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
522 
523    draw_llvm_generate(llvm, variant);
524 
525    gallivm_compile_module(variant->gallivm);
526 
527    variant->jit_func = (draw_jit_vert_func)
528          gallivm_jit_function(variant->gallivm, variant->function, variant->function_name);
529 
530    if (needs_caching)
531       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
532                                            &cached,
533                                            ir_sha1_cache_key);
534    gallivm_free_ir(variant->gallivm);
535 
536    variant->list_item_global.base = variant;
537    variant->list_item_local.base = variant;
538    /*variant->no = */shader->variants_created++;
539    variant->list_item_global.base = variant;
540 
541    return variant;
542 }
543 
544 
545 static void
do_clamp_vertex_color(struct gallivm_state * gallivm,struct lp_type type,const struct tgsi_shader_info * info,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS])546 do_clamp_vertex_color(struct gallivm_state *gallivm,
547                       struct lp_type type,
548                       const struct tgsi_shader_info *info,
549                       LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
550 {
551    LLVMBuilderRef builder = gallivm->builder;
552    LLVMValueRef out;
553    unsigned chan, attrib;
554    struct lp_build_context bld;
555    lp_build_context_init(&bld, gallivm, type);
556 
557    for (attrib = 0; attrib < info->num_outputs; ++attrib) {
558       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
559          if (outputs[attrib][chan]) {
560             switch (info->output_semantic_name[attrib]) {
561             case TGSI_SEMANTIC_COLOR:
562             case TGSI_SEMANTIC_BCOLOR:
563                out = LLVMBuildLoad2(builder, LLVMTypeOf(bld.zero), outputs[attrib][chan], "");
564                out = lp_build_clamp(&bld, out, bld.zero, bld.one);
565                LLVMBuildStore(builder, out, outputs[attrib][chan]);
566                break;
567             }
568          }
569       }
570    }
571 }
572 
573 
574 static void
generate_vs(struct draw_llvm_variant * variant,LLVMBuilderRef builder,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],const LLVMValueRef (* inputs)[TGSI_NUM_CHANNELS],const struct lp_bld_tgsi_system_values * system_values,LLVMValueRef context_ptr,LLVMValueRef resources_ptr,const struct lp_build_sampler_soa * draw_sampler,const struct lp_build_image_soa * draw_image,bool clamp_vertex_color,struct lp_build_mask_context * bld_mask)575 generate_vs(struct draw_llvm_variant *variant,
576             LLVMBuilderRef builder,
577             struct lp_type vs_type,
578             LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
579             const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
580             const struct lp_bld_tgsi_system_values *system_values,
581             LLVMValueRef context_ptr,
582             LLVMValueRef resources_ptr,
583             const struct lp_build_sampler_soa *draw_sampler,
584             const struct lp_build_image_soa *draw_image,
585             bool clamp_vertex_color,
586             struct lp_build_mask_context *bld_mask)
587 {
588    struct draw_llvm *llvm = variant->llvm;
589    const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
590    LLVMValueRef consts_ptr =
591       lp_jit_resources_constants(variant->gallivm, variant->resources_type, resources_ptr);
592    LLVMValueRef ssbos_ptr =
593       lp_jit_resources_ssbos(variant->gallivm, variant->resources_type, resources_ptr);
594    struct draw_llvm_variant_key *key = &variant->key;
595 
596    struct lp_build_tgsi_params params;
597    memset(&params, 0, sizeof(params));
598 
599    params.type = vs_type;
600    params.mask = bld_mask;
601    params.consts_ptr = consts_ptr;
602    params.system_values = system_values;
603    params.inputs = inputs;
604    params.num_inputs = key->nr_vertex_elements;
605    params.context_type = variant->context_type;
606    params.context_ptr = context_ptr;
607    params.resources_type = variant->resources_type;
608    params.resources_ptr = resources_ptr;
609    params.sampler = draw_sampler;
610    params.info = &llvm->draw->vs.vertex_shader->info;
611    params.ssbo_ptr = ssbos_ptr;
612    params.image = draw_image;
613 
614    if (llvm->draw->vs.vertex_shader->state.ir.nir &&
615        llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_NIR) {
616       lp_build_nir_soa(variant->gallivm,
617                        llvm->draw->vs.vertex_shader->state.ir.nir,
618                        &params,
619                        outputs);
620    } else {
621       lp_build_tgsi_soa(variant->gallivm,
622                         tokens,
623                         &params,
624                         outputs);
625    }
626 
627    if (clamp_vertex_color) {
628       const struct tgsi_shader_info *info = &llvm->draw->vs.vertex_shader->info;
629       do_clamp_vertex_color(variant->gallivm,
630                             vs_type, info,
631                             outputs);
632    }
633 }
634 
635 
636 static void
fetch_instanced(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type vs_type,LLVMValueRef vb_stride,LLVMValueRef map_ptr,LLVMValueRef buffer_size_adj,LLVMValueRef * inputs,LLVMValueRef index)637 fetch_instanced(struct gallivm_state *gallivm,
638                 const struct util_format_description *format_desc,
639                 struct lp_type vs_type,
640                 LLVMValueRef vb_stride,
641                 LLVMValueRef map_ptr,
642                 LLVMValueRef buffer_size_adj,
643                 LLVMValueRef *inputs,
644                 LLVMValueRef index)
645 {
646    LLVMTypeRef i32_t = LLVMInt32TypeInContext(gallivm->context);
647    LLVMTypeRef aosf_t, aosi_t;
648    LLVMValueRef zero = LLVMConstNull(i32_t);
649    LLVMBuilderRef builder = gallivm->builder;
650    LLVMValueRef stride, buffer_overflowed, aos, index_valid;
651    unsigned i;
652 
653    aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type());
654    aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type());
655 
656    /* This mul can overflow. Wraparound is ok. */
657    stride = LLVMBuildMul(builder, vb_stride, index, "");
658 
659    buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
660                                      stride, buffer_size_adj,
661                                      "buffer_overflowed");
662 
663    if (0) {
664       lp_build_print_value(gallivm, "   instance index = ", index);
665       lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
666    }
667 
668    index_valid = LLVMBuildNot(builder, buffer_overflowed, "");
669    index_valid = LLVMBuildSExt(builder, index_valid, i32_t, "");
670    stride = LLVMBuildAnd(builder, stride, index_valid, "");
671 
672    aos = lp_build_fetch_rgba_aos(gallivm,
673                                  format_desc,
674                                  lp_float32_vec4_type(),
675                                  false,
676                                  map_ptr,
677                                  stride, zero, zero,
678                                  NULL);
679 
680    index_valid = lp_build_broadcast(gallivm, aosi_t, index_valid);
681    aos = LLVMBuildBitCast(builder, aos, aosi_t, "");
682    aos = LLVMBuildAnd(builder, aos, index_valid, "");
683    aos = LLVMBuildBitCast(builder, aos, aosf_t, "");
684 
685    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
686       LLVMValueRef index = lp_build_const_int32(gallivm, i);
687       inputs[i] = lp_build_extract_broadcast(gallivm,
688                                              lp_float32_vec4_type(),
689                                              vs_type, aos, index);
690    }
691 }
692 
693 
694 static void
fetch_vector(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type vs_type,LLVMValueRef vb_stride,LLVMValueRef map_ptr,LLVMValueRef buffer_size_adj,LLVMValueRef * inputs,LLVMValueRef indices)695 fetch_vector(struct gallivm_state *gallivm,
696              const struct util_format_description *format_desc,
697              struct lp_type vs_type,
698              LLVMValueRef vb_stride,
699              LLVMValueRef map_ptr,
700              LLVMValueRef buffer_size_adj,
701              LLVMValueRef *inputs,
702              LLVMValueRef indices)
703 {
704    LLVMBuilderRef builder = gallivm->builder;
705    struct lp_build_context blduivec;
706    struct lp_type fetch_type = vs_type;
707    LLVMValueRef offset, valid_mask;
708    unsigned i;
709 
710    lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
711 
712    vb_stride = lp_build_broadcast_scalar(&blduivec, vb_stride);
713    buffer_size_adj = lp_build_broadcast_scalar(&blduivec, buffer_size_adj);
714 
715    /* This mul can overflow. Wraparound is ok. */
716    offset = lp_build_mul(&blduivec, vb_stride, indices);
717 
718    valid_mask = lp_build_compare(gallivm, blduivec.type,
719                                  PIPE_FUNC_LESS, offset, buffer_size_adj);
720 
721    /* not valid elements use offset 0 */
722    offset = LLVMBuildAnd(builder, offset, valid_mask, "");
723 
724    if (0) {
725       lp_build_print_value(gallivm, "   indices = ", indices);
726       lp_build_print_value(gallivm, "   offsets = ", offset);
727       lp_build_print_value(gallivm, "   valid_mask = ", valid_mask);
728    }
729 
730    /*
731     * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches.
732     * This should always produce better code.
733     */
734 
735    /* The type handling is annoying here... */
736    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
737        format_desc->channel[0].pure_integer) {
738       if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
739          fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length);
740       } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
741          fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length);
742       }
743    }
744 
745    lp_build_fetch_rgba_soa(gallivm, format_desc,
746                            fetch_type, false, map_ptr, offset,
747                            blduivec.zero, blduivec.zero,
748                            NULL, inputs);
749 
750    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
751       inputs[i] = LLVMBuildBitCast(builder, inputs[i],
752                                    lp_build_vec_type(gallivm, vs_type), "");
753    }
754 
755    /* out-of-bound fetches return all zeros */
756    for (i = 0; i < format_desc->nr_channels; i++) {
757       inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, "");
758       inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, "");
759       inputs[i] = LLVMBuildBitCast(builder, inputs[i],
760                                    lp_build_vec_type(gallivm, vs_type), "");
761    }
762 }
763 
764 
765 static void
store_aos(struct gallivm_state * gallivm,bool is_per_prim,LLVMTypeRef io_type,LLVMValueRef io_ptr,LLVMValueRef index,LLVMValueRef value)766 store_aos(struct gallivm_state *gallivm,
767           bool is_per_prim,
768           LLVMTypeRef io_type,
769           LLVMValueRef io_ptr,
770           LLVMValueRef index,
771           LLVMValueRef value)
772 {
773    LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0);
774    LLVMBuilderRef builder = gallivm->builder;
775    LLVMValueRef data_ptr;
776    LLVMTypeRef data_type;
777    LLVMValueRef indices[3];
778 
779    indices[0] = lp_build_const_int32(gallivm, 0);
780    indices[1] = index;
781    indices[2] = lp_build_const_int32(gallivm, 0);
782 
783    if (!is_per_prim) {
784       data_ptr = lp_jit_vertex_header_data(gallivm, io_type, io_ptr);
785       data_type = LLVMStructGetTypeAtIndex(io_type, LP_JIT_VERTEX_HEADER_DATA);
786    } else {
787       data_ptr = io_ptr;
788       data_type = io_type;
789    }
790 
791    data_ptr = LLVMBuildGEP2(builder, data_type, data_ptr, indices, 3, "");
792    data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");
793 
794 #if DEBUG_STORE
795    if (is_per_prim)
796       lp_build_printf(gallivm, "    ---- %p storing prim attribute %d (io = %p)\n", data_ptr, index, io_ptr);
797    else
798       lp_build_printf(gallivm, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
799 #endif
800 
801    /* Unaligned store due to the vertex header */
802    LLVMSetAlignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
803 }
804 
805 
806 /**
807  * Adjust the mask to architecture endianess. The mask will the store in struct:
808  *
809  * struct vertex_header {
810  *    unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
811  *    unsigned edgeflag:1;
812  *    unsigned pad:1;
813  *    unsigned vertex_id:16;
814  *    [...]
815  * }
816  *
817  * On little-endian machine nothing needs to done, however on bit-endian machine
818  * the mask's fields need to be adjusted with the algorithm:
819  *
820  * uint32_t reverse (uint32_t x)
821  * {
822  *   return (x >> 16) |              // vertex_id
823  *          ((x & 0x3fff) << 18) |   // clipmask
824  *          ((x & 0x4000) << 3) |    // edgeflag
825  *          ((x & 0x8000) << 1);     // pad
826  * }
827  */
828 static LLVMValueRef
adjust_mask(struct gallivm_state * gallivm,LLVMValueRef mask)829 adjust_mask(struct gallivm_state *gallivm,
830             LLVMValueRef mask)
831 {
832 #if UTIL_ARCH_BIG_ENDIAN
833    LLVMBuilderRef builder = gallivm->builder;
834    LLVMValueRef vertex_id;
835    LLVMValueRef clipmask;
836    LLVMValueRef pad;
837    LLVMValueRef edgeflag;
838 
839    vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");
840    clipmask  = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");
841    clipmask  = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");
842    if (0) {
843       pad = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");
844       pad = LLVMBuildShl(builder, pad, lp_build_const_int32(gallivm, 1), "");
845    }
846    edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
847    edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 3), "");
848 
849    mask = LLVMBuildOr(builder, vertex_id, clipmask, "");
850    if (0) {
851       mask = LLVMBuildOr(builder, mask, pad, "");
852    }
853    mask = LLVMBuildOr(builder, mask, edgeflag, "");
854 #endif
855    return mask;
856 }
857 
858 
859 void
draw_store_aos_array(struct gallivm_state * gallivm,struct lp_type soa_type,LLVMTypeRef io_type,LLVMValueRef io_ptr,LLVMValueRef * indices,LLVMValueRef * aos,int attrib,LLVMValueRef clipmask,bool need_edgeflag,bool is_per_prim)860 draw_store_aos_array(struct gallivm_state *gallivm,
861                      struct lp_type soa_type,
862                      LLVMTypeRef io_type,
863                      LLVMValueRef io_ptr,
864                      LLVMValueRef *indices,
865                      LLVMValueRef* aos,
866                      int attrib,
867                      LLVMValueRef clipmask,
868                      bool need_edgeflag, bool is_per_prim)
869 {
870    LLVMBuilderRef builder = gallivm->builder;
871    LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
872    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
873    LLVMValueRef linear_inds[LP_MAX_VECTOR_WIDTH / 32];
874    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
875    int vector_length = soa_type.length;
876 
877    assert(TGSI_NUM_CHANNELS == 4);
878 
879    for (int i = 0; i < vector_length; i++) {
880       linear_inds[i] = lp_build_const_int32(gallivm, i);
881       if (indices) {
882          inds[i] = indices[i];
883       } else {
884          inds[i] = linear_inds[i];
885       }
886       io_ptrs[i] = LLVMBuildGEP2(builder, io_type, io_ptr, &inds[i], 1, "");
887    }
888 
889    if (attrib == 0 && !is_per_prim) {
890       /* store vertex header for each of the n vertices */
891       LLVMValueRef val, cliptmp;
892       int vertex_id_pad_edgeflag;
893 
894       /* If this assertion fails, it means we need to update the bit twidding
895        * code here.  See struct vertex_header in draw_private.h.
896        */
897       assert(DRAW_TOTAL_CLIP_PLANES==14);
898       /* initialize vertex id:16 = 0xffff, pad:1 = 0, edgeflag:1 = 1 */
899       if (!need_edgeflag) {
900          vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
901       } else {
902          vertex_id_pad_edgeflag = (0xffff << 16);
903       }
904       if (vector_length == 1)
905          val = lp_build_const_int32(gallivm, vertex_id_pad_edgeflag);
906       else
907          val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type),
908                                       vertex_id_pad_edgeflag);
909 
910       /* OR with the clipmask */
911       cliptmp = LLVMBuildOr(builder, val, clipmask, "");
912       for (unsigned i = 0; i < vector_length; i++) {
913          LLVMValueRef id_ptr = lp_jit_vertex_header_id(gallivm, io_type, io_ptrs[i]);
914          if (vector_length > 1)
915             val = LLVMBuildExtractElement(builder, cliptmp, linear_inds[i], "");
916          else
917             val = cliptmp;
918          val = adjust_mask(gallivm, val);
919 #if DEBUG_STORE
920          lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n",
921                          io_ptrs[i], inds[i], val);
922 #endif
923          LLVMBuildStore(builder, val, id_ptr);
924       }
925    }
926 
927    /* store for each of the n vertices */
928    for (int i = 0; i < vector_length; i++) {
929       store_aos(gallivm, is_per_prim, io_type, io_ptrs[i], attr_index, aos[i]);
930    }
931 }
932 
933 
934 static void
convert_to_aos(struct gallivm_state * gallivm,LLVMTypeRef io_type,LLVMValueRef io,LLVMValueRef * indices,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],LLVMValueRef clipmask,int num_outputs,struct lp_type soa_type,int primid_slot,bool need_edgeflag)935 convert_to_aos(struct gallivm_state *gallivm,
936                LLVMTypeRef io_type,
937                LLVMValueRef io,
938                LLVMValueRef *indices,
939                LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
940                LLVMValueRef clipmask,
941                int num_outputs,
942                struct lp_type soa_type,
943                int primid_slot,
944                bool need_edgeflag)
945 {
946    LLVMBuilderRef builder = gallivm->builder;
947 
948 #if DEBUG_STORE
949    lp_build_printf(gallivm, "   # storing begin\n");
950 #endif
951    for (unsigned attrib = 0; attrib < num_outputs; ++attrib) {
952       LLVMValueRef soa[TGSI_NUM_CHANNELS];
953       LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];
954       for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
955          if (outputs[attrib][chan]) {
956             LLVMTypeRef single_type = (attrib == primid_slot) ? lp_build_int_vec_type(gallivm, soa_type) : lp_build_vec_type(gallivm, soa_type);
957             LLVMValueRef out = LLVMBuildLoad2(builder, single_type, outputs[attrib][chan], "");
958             lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
959 #if DEBUG_STORE
960             lp_build_printf(gallivm, "output %d : %d ",
961                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
962                                          attrib, 0),
963                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
964                                          chan, 0));
965             lp_build_print_value(gallivm, "val = ", out);
966             {
967                LLVMValueRef iv =
968                   LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), "");
969 
970                lp_build_print_value(gallivm, "  ival = ", iv);
971             }
972 #endif
973             soa[chan] = out;
974          } else {
975             soa[chan] = 0;
976          }
977       }
978 
979 
980       if (soa_type.length == TGSI_NUM_CHANNELS) {
981          lp_build_transpose_aos(gallivm, soa_type, soa, aos);
982       } else {
983          lp_build_transpose_aos(gallivm, soa_type, soa, soa);
984 
985          for (unsigned i = 0; i < soa_type.length; ++i) {
986             aos[i] = lp_build_extract_range(gallivm,
987                                             soa[i % TGSI_NUM_CHANNELS],
988                                             (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
989                                             TGSI_NUM_CHANNELS);
990          }
991       }
992 
993       draw_store_aos_array(gallivm,
994                            soa_type,
995                            io_type,
996                            io,
997                            indices,
998                            aos,
999                            attrib,
1000                            clipmask,
1001                            need_edgeflag, false);
1002    }
1003 #if DEBUG_STORE
1004    lp_build_printf(gallivm, "   # storing end\n");
1005 #endif
1006 }
1007 
1008 
1009 /**
1010  * Stores original vertex positions in clip coordinates
1011  */
1012 static void
store_clip(struct gallivm_state * gallivm,const struct lp_type vs_type,LLVMTypeRef io_type,LLVMValueRef io_ptr,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],int idx)1013 store_clip(struct gallivm_state *gallivm,
1014            const struct lp_type vs_type,
1015            LLVMTypeRef io_type,
1016            LLVMValueRef io_ptr,
1017            LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1018            int idx)
1019 {
1020    LLVMBuilderRef builder = gallivm->builder;
1021    LLVMValueRef soa[4];
1022    LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];
1023    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1024    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
1025    LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1026    LLVMTypeRef clip_ptr_type =
1027       LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context),
1028                                      4), 0);
1029 
1030    for (int i = 0; i < vs_type.length; i++) {
1031       inds[i] = lp_build_const_int32(gallivm, i);
1032       io_ptrs[i] = LLVMBuildGEP2(builder, io_type, io_ptr, &inds[i], 1, "");
1033    }
1034 
1035    LLVMTypeRef single_type = lp_build_vec_type(gallivm, vs_type);
1036    soa[0] = LLVMBuildLoad2(builder, single_type, outputs[idx][0], ""); /*x0 x1 .. xn*/
1037    soa[1] = LLVMBuildLoad2(builder, single_type, outputs[idx][1], ""); /*y0 y1 .. yn*/
1038    soa[2] = LLVMBuildLoad2(builder, single_type, outputs[idx][2], ""); /*z0 z1 .. zn*/
1039    soa[3] = LLVMBuildLoad2(builder, single_type, outputs[idx][3], ""); /*w0 w1 .. wn*/
1040 
1041    for (int i = 0; i < vs_type.length; i++) {
1042       clip_ptrs[i] = lp_jit_vertex_header_clip_pos(gallivm, io_type, io_ptrs[i]);
1043    }
1044 
1045    lp_build_transpose_aos(gallivm, vs_type, soa, soa);
1046    for (int i = 0; i < vs_type.length; ++i) {
1047       aos[i] = lp_build_extract_range(gallivm,
1048                                       soa[i % TGSI_NUM_CHANNELS],
1049                                       (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
1050                                       TGSI_NUM_CHANNELS);
1051    }
1052 
1053    for (int j = 0; j < vs_type.length; j++) {
1054       LLVMValueRef clip_ptr;
1055 
1056       clip_ptr = LLVMBuildPointerCast(builder, clip_ptrs[j], clip_ptr_type, "");
1057 
1058       /* Unaligned store */
1059       LLVMSetAlignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
1060    }
1061 }
1062 
1063 
1064 /**
1065  * Transforms the outputs for viewport mapping
1066  */
1067 static void
generate_viewport(struct draw_llvm_variant * variant,LLVMBuilderRef builder,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],LLVMValueRef context_ptr)1068 generate_viewport(struct draw_llvm_variant *variant,
1069                   LLVMBuilderRef builder,
1070                   struct lp_type vs_type,
1071                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1072                   LLVMValueRef context_ptr)
1073 {
1074    struct gallivm_state *gallivm = variant->gallivm;
1075    struct lp_type f32_type = vs_type;
1076    const unsigned pos = variant->llvm->draw->vs.position_output;
1077    LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1078    LLVMValueRef out3 = LLVMBuildLoad2(builder, vs_type_llvm, outputs[pos][3], ""); /*w0 w1 .. wn*/
1079    LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
1080    LLVMValueRef vp_ptr = draw_vs_jit_context_viewports(variant, context_ptr);
1081 
1082    /* We treat pipe_viewport_state as a float array */
1083    const int scale_index_offset = offsetof(struct pipe_viewport_state, scale) / sizeof(float);
1084    const int trans_index_offset = offsetof(struct pipe_viewport_state, translate) / sizeof(float);
1085 
1086    /* for 1/w convention*/
1087    out3 = LLVMBuildFDiv(builder, const1, out3, "");
1088    LLVMBuildStore(builder, out3, outputs[pos][3]);
1089 
1090    LLVMTypeRef elem_type = lp_build_elem_type(gallivm, vs_type);
1091 
1092    /* Viewport Mapping */
1093    for (unsigned i = 0; i < 3; i++) {
1094       LLVMValueRef out = LLVMBuildLoad2(builder, vs_type_llvm, outputs[pos][i], ""); /*x0 x1 .. xn*/
1095       LLVMValueRef scale;
1096       LLVMValueRef trans;
1097       LLVMValueRef scale_i;
1098       LLVMValueRef trans_i;
1099       LLVMValueRef index;
1100 
1101       index = lp_build_const_int32(gallivm, i + scale_index_offset);
1102       scale_i = LLVMBuildGEP2(builder, elem_type, vp_ptr, &index, 1, "");
1103 
1104       index = lp_build_const_int32(gallivm, i + trans_index_offset);
1105       trans_i = LLVMBuildGEP2(builder, elem_type, vp_ptr, &index, 1, "");
1106 
1107       scale = lp_build_broadcast(gallivm, vs_type_llvm,
1108                                  LLVMBuildLoad2(builder, elem_type, scale_i, "scale"));
1109       trans = lp_build_broadcast(gallivm, vs_type_llvm,
1110                                  LLVMBuildLoad2(builder, elem_type, trans_i, "trans"));
1111 
1112       /* divide by w */
1113       out = LLVMBuildFMul(builder, out, out3, "");
1114       /* mult by scale, add translation */
1115       out = lp_build_fmuladd(builder, out, scale, trans);
1116 
1117       /* store transformed outputs */
1118       LLVMBuildStore(builder, out, outputs[pos][i]);
1119    }
1120 
1121 }
1122 
1123 
1124 /**
1125  * Returns clipmask as nxi32 bitmask for the n vertices
1126  */
1127 static LLVMValueRef
generate_clipmask(struct draw_llvm * llvm,struct gallivm_state * gallivm,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],struct draw_llvm_variant_key * key,LLVMTypeRef context_type,LLVMValueRef context_ptr,bool * have_clipdist)1128 generate_clipmask(struct draw_llvm *llvm,
1129                   struct gallivm_state *gallivm,
1130                   struct lp_type vs_type,
1131                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1132                   struct draw_llvm_variant_key *key,
1133                   LLVMTypeRef context_type,
1134                   LLVMValueRef context_ptr,
1135                   bool *have_clipdist)
1136 {
1137    LLVMBuilderRef builder = gallivm->builder;
1138    LLVMValueRef mask; /* stores the <nxi32> clipmasks */
1139    LLVMValueRef test, temp;
1140    LLVMValueRef zero, shift;
1141    LLVMValueRef pos_x, pos_y, pos_z, pos_w;
1142    LLVMValueRef cv_x, cv_y, cv_z, cv_w;
1143    LLVMValueRef plane1, planes, plane_ptr;
1144    struct lp_type f32_type = vs_type;
1145    struct lp_type i32_type = lp_int_type(vs_type);
1146    const unsigned pos = llvm->draw->vs.position_output;
1147    const unsigned cv = llvm->draw->vs.clipvertex_output;
1148    int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
1149    bool have_cd = false;
1150    bool clip_user = key->clip_user;
1151    unsigned ucp_enable = key->ucp_enable;
1152    unsigned cd[2];
1153 
1154    cd[0] = llvm->draw->vs.ccdistance_output[0];
1155    cd[1] = llvm->draw->vs.ccdistance_output[1];
1156 
1157    if (cd[0] != pos || cd[1] != pos)
1158       have_cd = true;
1159 
1160    if (num_written_clipdistance && !clip_user) {
1161       clip_user = true;
1162       ucp_enable = (1 << num_written_clipdistance) - 1;
1163    }
1164 
1165    mask = lp_build_const_int_vec(gallivm, i32_type, 0);
1166    temp = lp_build_const_int_vec(gallivm, i32_type, 0);
1167    zero = lp_build_const_vec(gallivm, f32_type, 0);         /* 0.0f 0.0f 0.0f 0.0f */
1168    shift = lp_build_const_int_vec(gallivm, i32_type, 1);    /* 1 1 1 1 */
1169 
1170    LLVMTypeRef vec_type = LLVMTypeOf(zero);
1171 
1172    /*
1173     * load clipvertex and position from correct locations.
1174     * if they are the same just load them once.
1175     */
1176    pos_x = LLVMBuildLoad2(builder, vec_type, outputs[pos][0], ""); /*x0 x1 .. xn */
1177    pos_y = LLVMBuildLoad2(builder, vec_type, outputs[pos][1], ""); /*y0 y1 .. yn */
1178    pos_z = LLVMBuildLoad2(builder, vec_type, outputs[pos][2], ""); /*z0 z1 .. zn */
1179    pos_w = LLVMBuildLoad2(builder, vec_type, outputs[pos][3], ""); /*w0 w1 .. wn */
1180 
1181    if (clip_user && cv != pos) {
1182       cv_x = LLVMBuildLoad2(builder, vec_type, outputs[cv][0], ""); /*x0 x1 .. xn */
1183       cv_y = LLVMBuildLoad2(builder, vec_type, outputs[cv][1], ""); /*y0 y1 .. yn */
1184       cv_z = LLVMBuildLoad2(builder, vec_type, outputs[cv][2], ""); /*z0 z1 .. zn */
1185       cv_w = LLVMBuildLoad2(builder, vec_type, outputs[cv][3], ""); /*w0 w1 .. wn */
1186    } else {
1187       cv_x = pos_x;
1188       cv_y = pos_y;
1189       cv_z = pos_z;
1190       cv_w = pos_w;
1191    }
1192 
1193    /*
1194     * Be careful with the comparisons and NaNs (using llvm's unordered
1195     * comparisons here).
1196     */
1197    /* Cliptest, for hardwired planes */
1198    /*
1199     * XXX should take guardband into account (currently not in key).
1200     * Otherwise might run the draw pipeline stages for nothing.
1201     */
1202    if (key->clip_xy) {
1203       /* plane 1 */
1204       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
1205       temp = shift;
1206       test = LLVMBuildAnd(builder, test, temp, "");
1207       mask = test;
1208 
1209       /* plane 2 */
1210       test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
1211       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1212       temp = LLVMBuildShl(builder, temp, shift, "");
1213       test = LLVMBuildAnd(builder, test, temp, "");
1214       mask = LLVMBuildOr(builder, mask, test, "");
1215 
1216       /* plane 3 */
1217       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
1218       temp = LLVMBuildShl(builder, temp, shift, "");
1219       test = LLVMBuildAnd(builder, test, temp, "");
1220       mask = LLVMBuildOr(builder, mask, test, "");
1221 
1222       /* plane 4 */
1223       test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
1224       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1225       temp = LLVMBuildShl(builder, temp, shift, "");
1226       test = LLVMBuildAnd(builder, test, temp, "");
1227       mask = LLVMBuildOr(builder, mask, test, "");
1228    }
1229 
1230    if (key->clip_z) {
1231       temp = lp_build_const_int_vec(gallivm, i32_type, 16);
1232       if (key->clip_halfz) {
1233          /* plane 5 */
1234          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
1235          test = LLVMBuildAnd(builder, test, temp, "");
1236          mask = LLVMBuildOr(builder, mask, test, "");
1237       } else {
1238          /* plane 5 */
1239          test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
1240          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1241          test = LLVMBuildAnd(builder, test, temp, "");
1242          mask = LLVMBuildOr(builder, mask, test, "");
1243       }
1244       /* plane 6 */
1245       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
1246       temp = LLVMBuildShl(builder, temp, shift, "");
1247       test = LLVMBuildAnd(builder, test, temp, "");
1248       mask = LLVMBuildOr(builder, mask, test, "");
1249    }
1250 
1251    if (clip_user) {
1252       LLVMValueRef planes_ptr = draw_vs_jit_context_planes(gallivm, context_type, context_ptr);
1253       LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
1254       LLVMTypeRef planes_type = LLVMArrayType(LLVMArrayType(float_type, 4), DRAW_TOTAL_CLIP_PLANES);
1255       LLVMValueRef indices[3];
1256       LLVMValueRef is_nan_or_inf;
1257 
1258       /* userclip planes */
1259       while (ucp_enable) {
1260          unsigned plane_idx = ffs(ucp_enable)-1;
1261          ucp_enable &= ~(1 << plane_idx);
1262          plane_idx += 6;
1263 
1264          if (have_cd && num_written_clipdistance) {
1265             LLVMValueRef clipdist;
1266             int i;
1267             i = plane_idx - 6;
1268 
1269             *have_clipdist = true;
1270             if (i < 4) {
1271                clipdist = LLVMBuildLoad2(builder, vec_type, outputs[cd[0]][i], "");
1272             } else {
1273                clipdist = LLVMBuildLoad2(builder, vec_type, outputs[cd[1]][i-4], "");
1274             }
1275             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);
1276             is_nan_or_inf = lp_build_is_inf_or_nan(gallivm, vs_type, clipdist);
1277             test = LLVMBuildOr(builder, test, is_nan_or_inf, "");
1278             temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
1279             test = LLVMBuildAnd(builder, test, temp, "");
1280             mask = LLVMBuildOr(builder, mask, test, "");
1281          } else {
1282             LLVMTypeRef vs_elem_type = lp_build_elem_type(gallivm, vs_type);
1283             LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1284             LLVMValueRef sum = NULL;
1285             indices[0] = lp_build_const_int32(gallivm, 0);
1286             indices[1] = lp_build_const_int32(gallivm, plane_idx);
1287 
1288             for (int i = 0; i < 4; ++i) {
1289                indices[2] = lp_build_const_int32(gallivm, i);
1290                plane_ptr = LLVMBuildGEP2(builder, planes_type, planes_ptr, indices, 3, "");
1291                plane1 = LLVMBuildLoad2(builder, vs_elem_type, plane_ptr,
1292                                        (const char *[]){"plane_x", "plane_y", "plane_z", "plane_w"}[i]);
1293                planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
1294                if (i == 0) {
1295                   sum = LLVMBuildFMul(builder, planes, cv_x, "");
1296                } else {
1297                   sum = lp_build_fmuladd(builder, planes,
1298                                          (LLVMValueRef[]){cv_x, cv_y, cv_z, cv_w}[i], sum);
1299                }
1300             }
1301 
1302             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
1303             temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
1304             test = LLVMBuildAnd(builder, test, temp, "");
1305             mask = LLVMBuildOr(builder, mask, test, "");
1306          }
1307       }
1308    }
1309    if (key->need_edgeflags) {
1310       /*
1311        * This isn't really part of clipmask but stored the same in vertex
1312        * header later, so do it here.
1313        */
1314       unsigned edge_attr = llvm->draw->vs.edgeflag_output;
1315       LLVMValueRef one = lp_build_const_vec(gallivm, f32_type, 1.0);
1316       LLVMValueRef edgeflag = LLVMBuildLoad2(builder, vec_type, outputs[edge_attr][0], "");
1317       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_EQUAL, one, edgeflag);
1318       temp = lp_build_const_int_vec(gallivm, i32_type,
1319                                     1LL << DRAW_TOTAL_CLIP_PLANES);
1320       test = LLVMBuildAnd(builder, test, temp, "");
1321       mask = LLVMBuildOr(builder, mask, test, "");
1322    }
1323    return mask;
1324 }
1325 
1326 
1327 /**
1328  * Returns boolean if any clipping has occurred
1329  * Used zero/one i8 value to represent boolean
1330  */
1331 static LLVMValueRef
clipmask_booli8(struct gallivm_state * gallivm,const struct lp_type vs_type,LLVMTypeRef clipmask_bool_type,LLVMValueRef clipmask_bool_ptr,bool edgeflag_in_clipmask)1332 clipmask_booli8(struct gallivm_state *gallivm,
1333                 const struct lp_type vs_type,
1334                 LLVMTypeRef clipmask_bool_type,
1335                 LLVMValueRef clipmask_bool_ptr,
1336                 bool edgeflag_in_clipmask)
1337 {
1338    LLVMBuilderRef builder = gallivm->builder;
1339    LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
1340    LLVMValueRef clipmask_bool = LLVMBuildLoad2(builder, clipmask_bool_type, clipmask_bool_ptr, "");
1341    LLVMValueRef ret;
1342    struct lp_build_context bldivec;
1343 
1344    lp_build_context_init(&bldivec, gallivm, lp_int_type(vs_type));
1345 
1346    /*
1347     * We need to invert the edgeflag bit from the clipmask here
1348     * (because the result is really if we want to run the pipeline or not
1349     * and we (may) need it if edgeflag was 0).
1350     */
1351    if (edgeflag_in_clipmask) {
1352       LLVMValueRef edge = lp_build_const_int_vec(gallivm, bldivec.type,
1353                                                  1LL << DRAW_TOTAL_CLIP_PLANES);
1354       clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, "");
1355    }
1356 
1357    /*
1358     * XXX: probably should mask off bits from the mask which come from
1359     * vertices which were beyond the count (i.e. indices_valid for
1360     * linear fetches, for elts ones we don't have the correct mask
1361     * right now). Otherwise might run the pipeline for nothing,
1362     * though everything should still work.
1363     */
1364    ret = lp_build_any_true_range(&bldivec, vs_type.length, clipmask_bool);
1365    ret = LLVMBuildZExt(builder, ret, int8_type, "");
1366    return ret;
1367 }
1368 
1369 
1370 static LLVMValueRef
draw_gs_llvm_fetch_input(const struct lp_build_gs_iface * gs_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,LLVMValueRef swizzle_index)1371 draw_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
1372                          struct lp_build_context * bld,
1373                          bool is_vindex_indirect,
1374                          LLVMValueRef vertex_index,
1375                          bool is_aindex_indirect,
1376                          LLVMValueRef attrib_index,
1377                          LLVMValueRef swizzle_index)
1378 {
1379    const struct draw_gs_llvm_iface *gs = draw_gs_llvm_iface(gs_iface);
1380    struct gallivm_state *gallivm = bld->gallivm;
1381    LLVMBuilderRef builder = gallivm->builder;
1382    LLVMValueRef indices[3];
1383    LLVMValueRef res;
1384    struct lp_type type = bld->type;
1385 
1386    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
1387    LLVMTypeRef channel_vec_type = LLVMVectorType(float_type, TGSI_NUM_CHANNELS);
1388    LLVMTypeRef input_array_type = create_gs_jit_input_type_deref(gallivm);
1389 
1390    if (is_vindex_indirect || is_aindex_indirect) {
1391       res = bld->zero;
1392       for (int i = 0; i < type.length; ++i) {
1393          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
1394          LLVMValueRef vert_chan_index = vertex_index;
1395          LLVMValueRef attr_chan_index = attrib_index;
1396          LLVMValueRef channel_vec, value;
1397 
1398          if (is_vindex_indirect) {
1399             vert_chan_index = LLVMBuildExtractElement(builder,
1400                                                       vertex_index, idx, "");
1401          }
1402          if (is_aindex_indirect) {
1403             attr_chan_index = LLVMBuildExtractElement(builder,
1404                                                       attrib_index, idx, "");
1405          }
1406 
1407          indices[0] = vert_chan_index;
1408          indices[1] = attr_chan_index;
1409          indices[2] = swizzle_index;
1410 
1411          channel_vec = LLVMBuildGEP2(builder, input_array_type, gs->input, indices, 3, "");
1412          channel_vec = LLVMBuildLoad2(builder, channel_vec_type, channel_vec, "");
1413          value = LLVMBuildExtractElement(builder, channel_vec, idx, "");
1414 
1415          res = LLVMBuildInsertElement(builder, res, value, idx, "");
1416       }
1417    } else {
1418       indices[0] = vertex_index;
1419       indices[1] = attrib_index;
1420       indices[2] = swizzle_index;
1421 
1422       res = LLVMBuildGEP2(builder, input_array_type, gs->input, indices, 3, "");
1423       res = LLVMBuildLoad2(builder, channel_vec_type, res, "");
1424    }
1425 
1426    return res;
1427 }
1428 
1429 
1430 static void
draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface * gs_base,struct lp_build_context * bld,LLVMValueRef (* outputs)[4],LLVMValueRef emitted_vertices_vec,LLVMValueRef mask_vec,LLVMValueRef stream_id)1431 draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
1432                          struct lp_build_context * bld,
1433                          LLVMValueRef (*outputs)[4],
1434                          LLVMValueRef emitted_vertices_vec,
1435                          LLVMValueRef mask_vec, LLVMValueRef stream_id)
1436 {
1437    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1438    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1439    struct gallivm_state *gallivm = variant->gallivm;
1440    LLVMBuilderRef builder = gallivm->builder;
1441    struct lp_type gs_type = bld->type;
1442    LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
1443                                                   lp_int_type(gs_type), 0);
1444    LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
1445    LLVMValueRef next_prim_offset =
1446       lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);
1447    LLVMValueRef io = variant->io_ptr;
1448    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
1449 
1450    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
1451    for (unsigned i = 0; i < gs_type.length; ++i) {
1452       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1453       LLVMValueRef currently_emitted =
1454          LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
1455       indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
1456       indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
1457       indices[i] = LLVMBuildSelect(builder, LLVMBuildExtractElement(builder, cond, ind, ""), indices[i],
1458                                    lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary - 1), "");
1459    }
1460 
1461    LLVMValueRef stream_idx = LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), "");
1462    LLVMValueRef cnd = LLVMBuildICmp(builder, LLVMIntULT, stream_idx, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");
1463    struct lp_build_if_state if_ctx;
1464    lp_build_if(&if_ctx, gallivm, cnd);
1465    io = lp_build_pointer_get2(builder, variant->vertex_header_ptr_type,
1466                               io, LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), ""));
1467 
1468    if (variant->key.clamp_vertex_color) {
1469       do_clamp_vertex_color(gallivm, gs_type,
1470                             gs_info, outputs);
1471    }
1472    convert_to_aos(gallivm, variant->vertex_header_type,
1473                   io, indices,
1474                   outputs, clipmask,
1475                   gs_info->num_outputs, gs_type,
1476                   -1,
1477                   false);
1478    lp_build_endif(&if_ctx);
1479 }
1480 
1481 
1482 static void
draw_gs_llvm_end_primitive(const struct lp_build_gs_iface * gs_base,struct lp_build_context * bld,LLVMValueRef total_emitted_vertices_vec_ptr,LLVMValueRef verts_per_prim_vec,LLVMValueRef emitted_prims_vec,LLVMValueRef mask_vec,unsigned stream)1483 draw_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
1484                            struct lp_build_context * bld,
1485                            LLVMValueRef total_emitted_vertices_vec_ptr,
1486                            LLVMValueRef verts_per_prim_vec,
1487                            LLVMValueRef emitted_prims_vec,
1488                            LLVMValueRef mask_vec, unsigned stream)
1489 {
1490    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1491    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1492    struct gallivm_state *gallivm = variant->gallivm;
1493    LLVMBuilderRef builder = gallivm->builder;
1494    LLVMValueRef prim_lengts_ptr =
1495       draw_gs_jit_prim_lengths(variant, variant->context_ptr);
1496 
1497    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
1498    for (unsigned i = 0; i < bld->type.length; ++i) {
1499       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1500       LLVMValueRef prims_emitted =
1501          LLVMBuildExtractElement(builder, emitted_prims_vec, ind, "");
1502       LLVMValueRef store_ptr;
1503       LLVMValueRef num_vertices =
1504          LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");
1505 
1506       LLVMValueRef this_cond = LLVMBuildExtractElement(gallivm->builder, cond, ind, "");
1507       struct lp_build_if_state ifthen;
1508       lp_build_if(&ifthen, gallivm, this_cond);
1509       prims_emitted = LLVMBuildMul(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");
1510       prims_emitted = LLVMBuildAdd(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, stream), "");
1511       LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
1512       LLVMTypeRef prim_lengths_type = LLVMPointerType(int_type, 0);
1513       store_ptr = LLVMBuildGEP2(builder, prim_lengths_type, prim_lengts_ptr, &prims_emitted, 1, "");
1514       store_ptr = LLVMBuildLoad2(builder, prim_lengths_type, store_ptr, "");
1515       store_ptr = LLVMBuildGEP2(builder, int_type, store_ptr, &ind, 1, "");
1516       LLVMBuildStore(builder, num_vertices, store_ptr);
1517       lp_build_endif(&ifthen);
1518    }
1519 }
1520 
1521 
1522 static void
draw_gs_llvm_epilogue(const struct lp_build_gs_iface * gs_base,LLVMValueRef total_emitted_vertices_vec,LLVMValueRef emitted_prims_vec,unsigned stream)1523 draw_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
1524                       LLVMValueRef total_emitted_vertices_vec,
1525                       LLVMValueRef emitted_prims_vec, unsigned stream)
1526 {
1527    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1528    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1529    struct gallivm_state *gallivm = variant->gallivm;
1530    LLVMBuilderRef builder = gallivm->builder;
1531    LLVMValueRef emitted_verts_ptr =
1532       draw_gs_jit_emitted_vertices(variant, variant->context_ptr);
1533    LLVMValueRef emitted_prims_ptr =
1534       draw_gs_jit_emitted_prims(variant, variant->context_ptr);
1535    LLVMValueRef stream_val = lp_build_const_int32(gallivm, stream);
1536 
1537    emitted_verts_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(total_emitted_vertices_vec), emitted_verts_ptr, &stream_val, 1, "");
1538    emitted_prims_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(emitted_prims_vec), emitted_prims_ptr, &stream_val, 1, "");
1539 
1540    LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);
1541    LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);
1542 }
1543 
1544 
1545 static void
draw_llvm_generate(struct draw_llvm * llvm,struct draw_llvm_variant * variant)1546 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1547 {
1548    struct gallivm_state *gallivm = variant->gallivm;
1549    LLVMContextRef context = gallivm->context;
1550    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
1551    LLVMTypeRef arg_types[14];
1552    unsigned num_arg_types = ARRAY_SIZE(arg_types);
1553    LLVMTypeRef func_type;
1554    LLVMValueRef context_ptr;
1555    LLVMValueRef resources_ptr;
1556    LLVMBasicBlockRef block;
1557    LLVMBuilderRef builder;
1558    char func_name[64];
1559    struct lp_type vs_type;
1560    LLVMValueRef count, fetch_elts, start;
1561    LLVMValueRef vertex_id_offset;
1562    LLVMValueRef stride, step, io_itr;
1563    LLVMValueRef ind_vec, start_vec, have_elts, fetch_max, tmp;
1564    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1565    LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS];
1566    LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS];
1567    LLVMValueRef buffer_size_adj[PIPE_MAX_ATTRIBS];
1568    LLVMValueRef instance_index[PIPE_MAX_ATTRIBS];
1569    LLVMValueRef fake_buf_ptr, fake_buf;
1570 
1571    struct draw_context *draw = llvm->draw;
1572    const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
1573    unsigned i, j;
1574    struct lp_build_context bld, blduivec;
1575    struct lp_build_loop_state lp_loop;
1576    struct lp_build_if_state if_ctx;
1577    const int vector_length = lp_native_vector_width / 32;
1578    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1579    struct lp_build_sampler_soa *sampler = 0;
1580    struct lp_build_image_soa *image = NULL;
1581    LLVMValueRef ret, clipmask_bool_ptr;
1582    struct draw_llvm_variant_key *key = &variant->key;
1583    /* If geometry shader is present we need to skip both the viewport
1584     * transformation and clipping otherwise the inputs to the geometry
1585     * shader will be incorrect.
1586     * The code can't handle vp transform when vs writes vp index neither
1587     * (though this would be fixable here, but couldn't just broadcast
1588     * the values).
1589     */
1590    const bool bypass_viewport = key->has_gs_or_tes || key->bypass_viewport ||
1591                                 vs_info->writes_viewport_index;
1592    const bool enable_cliptest = !key->has_gs_or_tes && (key->clip_xy ||
1593                                                     key->clip_z ||
1594                                                     key->clip_user ||
1595                                                     key->need_edgeflags);
1596    LLVMValueRef variant_func;
1597    const unsigned pos = draw->vs.position_output;
1598    const unsigned cv = draw->vs.clipvertex_output;
1599    bool have_clipdist = false;
1600    struct lp_bld_tgsi_system_values system_values;
1601 
1602    memset(&system_values, 0, sizeof(system_values));
1603    memset(&outputs, 0, sizeof(outputs));
1604    snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant");
1605 
1606    i = 0;
1607    arg_types[i++] = get_context_ptr_type(variant);       /* context */
1608    arg_types[i++] = variant->resources_ptr_type;       /* context */
1609    arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */
1610    arg_types[i++] = get_buffer_ptr_type(variant);        /* vbuffers */
1611    arg_types[i++] = int32_type;                          /* count */
1612    arg_types[i++] = int32_type;                          /* start/fetch_elt_max */
1613    arg_types[i++] = int32_type;                          /* stride */
1614    arg_types[i++] = get_vb_ptr_type(variant);            /* pipe_vertex_buffer's */
1615    arg_types[i++] = int32_type;                          /* instance_id */
1616    arg_types[i++] = int32_type;                          /* vertex_id_offset */
1617    arg_types[i++] = int32_type;                          /* start_instance */
1618    arg_types[i++] = LLVMPointerType(int32_type, 0);      /* fetch_elts  */
1619    arg_types[i++] = int32_type;                          /* draw_id */
1620    arg_types[i++] = int32_type;                          /* view_id */
1621    assert(i == ARRAY_SIZE(arg_types));
1622 
1623    func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
1624                                 arg_types, num_arg_types, 0);
1625 
1626    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
1627    variant->function = variant_func;
1628    variant->function_name = MALLOC(strlen(func_name)+1);
1629    strcpy(variant->function_name, func_name);
1630 
1631    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
1632    for (i = 0; i < num_arg_types; ++i)
1633       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1634          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
1635 
1636    if (gallivm->cache && gallivm->cache->data_size) {
1637       gallivm_stub_func(gallivm, variant_func);
1638       return;
1639    }
1640 
1641    context_ptr               = LLVMGetParam(variant_func, 0);
1642    resources_ptr             = LLVMGetParam(variant_func, 1);
1643    io_ptr                    = LLVMGetParam(variant_func, 2);
1644    vbuffers_ptr              = LLVMGetParam(variant_func, 3);
1645    count                     = LLVMGetParam(variant_func, 4);
1646    start                     = LLVMGetParam(variant_func, 5);
1647    /*
1648     * XXX: stride is actually unused. The stride we use is strictly calculated
1649     * from the number of outputs (including the draw_extra outputs).
1650     * Should probably fix some day (we need a new vs just because of extra
1651     * outputs which the generated vs won't touch).
1652     */
1653    stride                    = LLVMGetParam(variant_func, 6);
1654    vb_ptr                    = LLVMGetParam(variant_func, 7);
1655    system_values.instance_id = LLVMGetParam(variant_func, 8);
1656    vertex_id_offset          = LLVMGetParam(variant_func, 9);
1657    system_values.base_instance = LLVMGetParam(variant_func, 10);
1658    fetch_elts                = LLVMGetParam(variant_func, 11);
1659    system_values.draw_id     = LLVMGetParam(variant_func, 12);
1660    system_values.view_index  = LLVMGetParam(variant_func, 13);
1661 
1662    lp_build_name(context_ptr, "context");
1663    lp_build_name(resources_ptr, "resources");
1664    lp_build_name(io_ptr, "io");
1665    lp_build_name(vbuffers_ptr, "vbuffers");
1666    lp_build_name(count, "count");
1667    lp_build_name(start, "start");
1668    lp_build_name(stride, "stride");
1669    lp_build_name(vb_ptr, "vb");
1670    lp_build_name(system_values.instance_id, "instance_id");
1671    lp_build_name(vertex_id_offset, "vertex_id_offset");
1672    lp_build_name(system_values.base_instance, "start_instance");
1673    lp_build_name(fetch_elts, "fetch_elts");
1674    lp_build_name(system_values.draw_id, "draw_id");
1675 
1676    /*
1677     * Function body
1678     */
1679 
1680    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
1681    builder = gallivm->builder;
1682    LLVMPositionBuilderAtEnd(builder, block);
1683 
1684    memset(&vs_type, 0, sizeof vs_type);
1685    vs_type.floating = true; /* floating point values */
1686    vs_type.sign = true;     /* values are signed */
1687    vs_type.norm = false;    /* values are not limited to [0,1] or [-1,1] */
1688    vs_type.width = 32;      /* 32-bit float */
1689    vs_type.length = vector_length;
1690 
1691    lp_build_context_init(&bld, gallivm, lp_type_uint(32));
1692    lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
1693 
1694    /* hold temporary "bool" clipmask */
1695    clipmask_bool_ptr = lp_build_alloca(gallivm, blduivec.vec_type, "");
1696 
1697    fake_buf = lp_build_alloca_undef(gallivm,
1698                  LLVMVectorType(LLVMInt64TypeInContext(context), 4), "");
1699    fake_buf = LLVMBuildBitCast(builder, fake_buf,
1700                  LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
1701    fake_buf_ptr = LLVMBuildGEP2(builder, LLVMInt8TypeInContext(context), fake_buf, &bld.zero, 1, "");
1702 
1703    /* code generated texture sampling */
1704    sampler = lp_bld_llvm_sampler_soa_create(draw_llvm_variant_key_samplers(key),
1705                                           MAX2(key->nr_samplers,
1706                                                key->nr_sampler_views));
1707    image = lp_bld_llvm_image_soa_create(draw_llvm_variant_key_images(key),
1708                                       key->nr_images);
1709 
1710    step = lp_build_const_int32(gallivm, vector_length);
1711 
1712    ind_vec = blduivec.undef;
1713    for (i = 0; i < vs_type.length; i++) {
1714       LLVMValueRef index = lp_build_const_int32(gallivm, i);
1715       ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, "");
1716    }
1717 
1718    have_elts = LLVMBuildICmp(builder, LLVMIntNE,
1719                              LLVMConstPointerNull(arg_types[11]), fetch_elts, "");
1720 
1721    fetch_max = LLVMBuildSub(builder, count, bld.one, "fetch_max");
1722    fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max);
1723    /*
1724     * Only needed for non-indexed path.
1725     */
1726    start_vec = lp_build_broadcast_scalar(&blduivec, start);
1727 
1728    /*
1729     * Pre-calculate everything which is constant per shader invocation.
1730     */
1731    for (j = 0; j < key->nr_vertex_elements; ++j) {
1732       LLVMValueRef vb_buffer_offset, buffer_size, temp_ptr;
1733       LLVMValueRef vb_info, vbuffer_ptr, buf_offset, ofbit;
1734       struct pipe_vertex_element *velem = &key->vertex_element[j];
1735       LLVMValueRef vb_index =
1736          lp_build_const_int32(gallivm, velem->vertex_buffer_index);
1737       LLVMValueRef bsize = lp_build_const_int32(gallivm,
1738                                                 util_format_get_blocksize(velem->src_format));
1739       LLVMValueRef src_offset = lp_build_const_int32(gallivm,
1740                                                      velem->src_offset);
1741       LLVMValueRef src_stride = lp_build_const_int32(gallivm,
1742                                                      velem->src_stride);
1743       struct lp_build_if_state if_ctx;
1744 
1745       if (velem->src_format != PIPE_FORMAT_NONE) {
1746          vbuffer_ptr = LLVMBuildGEP2(builder, variant->buffer_type, vbuffers_ptr, &vb_index, 1, "");
1747          vb_info = LLVMBuildGEP2(builder, variant->vb_type, vb_ptr, &vb_index, 1, "");
1748          vb_stride[j] = src_stride;
1749          vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, variant->vb_type, vb_info);
1750          map_ptr[j] = draw_jit_dvbuffer_map(gallivm, variant->buffer_type, vbuffer_ptr);
1751          buffer_size = draw_jit_dvbuffer_size(gallivm, variant->buffer_type, vbuffer_ptr);
1752 
1753          ofbit = NULL;
1754          /*
1755           * We'll set buffer_size_adj to zero if we have of, so it will
1756           * always overflow later automatically without having to keep ofbit.
1757           * Overflows (with normal wraparound) doing the actual offset
1758           * calculation should be ok, just not for the buffer size calc.
1759           * It would also be possible to detect such overflows and return
1760           * zeros if that happens, but this would be more complex.
1761           */
1762          buf_offset = lp_build_add(&bld, vb_buffer_offset, src_offset);
1763          tmp = lp_build_sub(&bld, bsize, bld.one);
1764          buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp,
1765                                                      &ofbit);
1766          buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size_adj[j],
1767                                                      buf_offset, &ofbit);
1768 
1769          /*
1770           * We can't easily set fake vertex buffers outside the generated code.
1771           * Hence, set fake vertex buffers here instead basically, so fetch
1772           * code can always fetch using offset 0, eliminating all control flow
1773           * inside the main loop.
1774           * (Alternatively, could have control flow per vector skipping fetch
1775           * if ofbit is true.)
1776           */
1777          if (velem->instance_divisor) {
1778             /*
1779              * Index is equal to the start instance plus the number of current
1780              * instance divided by the divisor. In this case we compute it as:
1781              * index = start_instance + (instance_id  / divisor).
1782              * Note we could actually do the fetch here, outside the loop -
1783              * it's all constant, hopefully llvm recognizes this.
1784              */
1785             LLVMValueRef current_instance;
1786             current_instance = LLVMBuildUDiv(builder, system_values.instance_id,
1787                                              lp_build_const_int32(gallivm,
1788                                                                   velem->instance_divisor),
1789                                              "instance_divisor");
1790             instance_index[j] = lp_build_uadd_overflow(gallivm, system_values.base_instance,
1791                                                        current_instance, &ofbit);
1792          }
1793 
1794          buffer_size_adj[j] = LLVMBuildSelect(builder, ofbit, bld.zero,
1795                                               buffer_size_adj[j], "");
1796 
1797          LLVMTypeRef byte_type = LLVMInt8TypeInContext(context);
1798          LLVMTypeRef byte_ptr_type = LLVMPointerType(byte_type, 0);
1799          temp_ptr = lp_build_alloca_undef(gallivm, byte_ptr_type, "");
1800 
1801          lp_build_if(&if_ctx, gallivm, ofbit);
1802          {
1803             LLVMBuildStore(builder, fake_buf_ptr, temp_ptr);
1804          }
1805          lp_build_else(&if_ctx);
1806          {
1807             map_ptr[j] = LLVMBuildGEP2(builder, byte_type, map_ptr[j], &buf_offset, 1, "");
1808             LLVMBuildStore(builder, map_ptr[j], temp_ptr);
1809          }
1810          lp_build_endif(&if_ctx);
1811          map_ptr[j] = LLVMBuildLoad2(builder, byte_ptr_type, temp_ptr, "map_ptr");
1812 
1813          if (0) {
1814             lp_build_printf(gallivm, "velem %d, vbuf index = %u, vb_stride = %u\n",
1815                             lp_build_const_int32(gallivm, j),
1816                             vb_index, vb_stride[j]);
1817             lp_build_printf(gallivm,
1818                             "   vb_buffer_offset = %u, src_offset = %u, buf_offset = %u\n",
1819                             vb_buffer_offset, src_offset, buf_offset);
1820             lp_build_printf(gallivm, "   buffer size = %u, blocksize = %u\n",
1821                             buffer_size, bsize);
1822             lp_build_printf(gallivm, "   instance_id = %u\n", system_values.instance_id);
1823          }
1824       }
1825    }
1826 
1827    lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
1828    {
1829       LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
1830       LLVMValueRef io;
1831       LLVMValueRef clipmask;   /* holds the clipmask value */
1832       LLVMValueRef true_index_array, index_store;
1833       const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
1834 
1835       io_itr = lp_loop.counter;
1836 
1837       io = LLVMBuildGEP2(builder, variant->vertex_header_type, io_ptr, &io_itr, 1, "");
1838 #if DEBUG_STORE
1839       lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",
1840                       io_itr, io, lp_loop.counter);
1841 #endif
1842 
1843       true_index_array = lp_build_broadcast_scalar(&blduivec, lp_loop.counter);
1844       true_index_array = LLVMBuildAdd(builder, true_index_array, ind_vec, "");
1845 
1846       LLVMValueRef exec_mask = lp_build_cmp(&blduivec, PIPE_FUNC_LEQUAL, true_index_array, fetch_max);
1847       /*
1848        * Limit indices to fetch_max, otherwise might try to access indices
1849        * beyond index buffer (or rather vsplit elt buffer) size.
1850        * Could probably safely (?) skip this for non-indexed draws and
1851        * simplify things minimally (by removing it could combine the ind_vec
1852        * and start_vec adds). I think the only effect for non-indexed draws will
1853        * be that for the invalid elements they will be all fetched from the
1854        * same location as the last valid one, but noone should really care.
1855        */
1856       true_index_array = lp_build_min(&blduivec, true_index_array, fetch_max);
1857 
1858       index_store = lp_build_alloca_undef(gallivm, blduivec.vec_type, "index_store");
1859 
1860       lp_build_if(&if_ctx, gallivm, have_elts);
1861       {
1862          /*
1863           * Note: you'd expect some comparison/clamp against fetch_elt_max
1864           * here.
1865           * There used to be one here but it was incorrect: overflow was
1866           * detected if index > fetch_elt_max - but the correct condition
1867           * would be index >= fetch_elt_max (since this is just size of elts
1868           * buffer / element size).
1869           * Using the correct condition however will cause failures - due to
1870           * vsplit/vcache code which rebases indices. So, as an example, if
1871           * fetch_elt_max is just 1 and fetch_count 2, vsplit cache will
1872           * replace all invalid indices with 0 - which in case of elt_bias
1873           * not being zero will get a different fetch index than the valid
1874           * index 0. So, just rely on vsplit code preventing out-of-bounds
1875           * fetches. This is also why it's safe to do elts fetch even if there
1876           * was no index buffer bound - the real buffer is never seen here, at
1877           * least not if there are index buffer overflows...
1878           */
1879 
1880          /*
1881           * XXX should not have to do this, as scale can be handled
1882           * natively by loads (hits asserts though).
1883           */
1884          tmp = lp_build_shl_imm(&blduivec, true_index_array, 2);
1885          fetch_elts = LLVMBuildBitCast(builder, fetch_elts,
1886                                        LLVMPointerType(LLVMInt8TypeInContext(context),
1887                                                        0), "");
1888          tmp = lp_build_gather(gallivm, vs_type.length,
1889                                32, bld.type, true,
1890                                fetch_elts, tmp, false);
1891          LLVMBuildStore(builder, tmp, index_store);
1892       }
1893       lp_build_else(&if_ctx);
1894       {
1895          tmp = LLVMBuildAdd(builder, true_index_array, start_vec, "");
1896          LLVMBuildStore(builder, tmp, index_store);
1897       }
1898       lp_build_endif(&if_ctx);
1899 
1900       true_index_array = LLVMBuildLoad2(builder, blduivec.vec_type, index_store, "");
1901 
1902       for (j = 0; j < key->nr_vertex_elements; ++j) {
1903          struct pipe_vertex_element *velem = &key->vertex_element[j];
1904          const struct util_format_description *format_desc =
1905             util_format_description(velem->src_format);
1906 
1907          if (format_desc->format == PIPE_FORMAT_NONE) {
1908             for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
1909                inputs[j][i] = lp_build_zero(gallivm, vs_type);
1910             }
1911          } else if (velem->instance_divisor) {
1912             fetch_instanced(gallivm, format_desc, vs_type,
1913                             vb_stride[j], map_ptr[j],
1914                             buffer_size_adj[j],
1915                             inputs[j], instance_index[j]);
1916          } else {
1917             fetch_vector(gallivm, format_desc, vs_type,
1918                          vb_stride[j], map_ptr[j],
1919                          buffer_size_adj[j],
1920                          inputs[j], true_index_array);
1921          }
1922       }
1923 
1924       struct lp_build_mask_context mask;
1925 
1926       lp_build_mask_begin(&mask, gallivm, vs_type, exec_mask);
1927       /* In the paths with elts vertex id has to be unaffected by the
1928        * index bias and because indices inside our elements array have
1929        * already had index bias applied we need to subtract it here to
1930        * get back to the original index.
1931        * In the linear paths vertex id has to be unaffected by the
1932        * original start index and because we abuse the 'start' variable
1933        * to either represent the actual start index or the index at which
1934        * the primitive was split (we split rendering into chunks of at
1935        * most 4095-vertices) we need to back out the original start
1936        * index out of our vertex id here.
1937        * for ARB_shader_draw_parameters, base_vertex should be 0 for
1938        * non-indexed draws.
1939        */
1940       LLVMValueRef base_vertex = lp_build_select(&bld, have_elts, vertex_id_offset, lp_build_const_int32(gallivm, 0));
1941       system_values.basevertex = lp_build_broadcast_scalar(&blduivec, base_vertex);
1942 
1943       /* first vertex is for Vulkan base vertex support */
1944       LLVMValueRef first_vertex = vertex_id_offset;
1945       system_values.firstvertex = lp_build_broadcast_scalar(&blduivec, first_vertex);
1946 
1947       system_values.vertex_id = true_index_array;
1948       system_values.vertex_id_nobase = LLVMBuildSub(builder, true_index_array,
1949                                                     lp_build_broadcast_scalar(&blduivec, vertex_id_offset), "");
1950 
1951       ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;
1952       generate_vs(variant,
1953                   builder,
1954                   vs_type,
1955                   outputs,
1956                   ptr_aos,
1957                   &system_values,
1958                   context_ptr,
1959                   resources_ptr,
1960                   sampler,
1961                   image,
1962                   key->clamp_vertex_color,
1963                   &mask);
1964 
1965       lp_build_mask_end(&mask);
1966       if (pos != -1 && cv != -1) {
1967          /* store original positions in clip before further manipulation */
1968          store_clip(gallivm, vs_type, variant->vertex_header_type, io, outputs, pos);
1969 
1970          /* do cliptest */
1971          if (enable_cliptest) {
1972             LLVMValueRef temp = LLVMBuildLoad2(builder, blduivec.vec_type, clipmask_bool_ptr, "");
1973             /* allocate clipmask, assign it integer type */
1974             clipmask = generate_clipmask(llvm,
1975                                          gallivm,
1976                                          vs_type,
1977                                          outputs,
1978                                          key,
1979                                          variant->context_type,
1980                                          context_ptr, &have_clipdist);
1981             temp = LLVMBuildOr(builder, clipmask, temp, "");
1982             /* store temporary clipping boolean value */
1983             LLVMBuildStore(builder, temp, clipmask_bool_ptr);
1984          } else {
1985             clipmask = blduivec.zero;
1986          }
1987 
1988          /* do viewport mapping */
1989          if (!bypass_viewport) {
1990             generate_viewport(variant, builder, vs_type, outputs, context_ptr);
1991          }
1992       } else {
1993          clipmask = blduivec.zero;
1994       }
1995 
1996       /* store clipmask in vertex header,
1997        * original positions in clip
1998        * and transformed positions in data
1999        */
2000       convert_to_aos(gallivm, variant->vertex_header_type, io, NULL, outputs, clipmask,
2001                      vs_info->num_outputs, vs_type, -1,
2002                      enable_cliptest && key->need_edgeflags);
2003    }
2004    lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
2005 
2006    lp_bld_llvm_sampler_soa_destroy(sampler);
2007    lp_bld_llvm_image_soa_destroy(image);
2008 
2009    /* return clipping boolean value for function */
2010    ret = clipmask_booli8(gallivm, vs_type, blduivec.vec_type, clipmask_bool_ptr,
2011                          enable_cliptest && key->need_edgeflags);
2012 
2013    LLVMBuildRet(builder, ret);
2014 
2015    gallivm_verify_function(gallivm, variant_func);
2016 }
2017 
2018 
2019 struct draw_llvm_variant_key *
draw_llvm_make_variant_key(struct draw_llvm * llvm,char * store)2020 draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
2021 {
2022    struct draw_llvm_variant_key *key;
2023    struct lp_sampler_static_state *draw_sampler;
2024    struct lp_image_static_state *draw_image;
2025 
2026    key = (struct draw_llvm_variant_key *)store;
2027 
2028    memset(key, 0, offsetof(struct draw_llvm_variant_key, vertex_element[0]));
2029 
2030 
2031    /* will have to rig this up properly later */
2032    key->clip_xy = llvm->draw->clip_xy;
2033    key->clip_z = llvm->draw->clip_z;
2034    key->clip_user = llvm->draw->clip_user;
2035    key->bypass_viewport = llvm->draw->bypass_viewport;
2036    key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
2037    /* XXX assumes edgeflag output not at 0 */
2038    key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? true : false);
2039    key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
2040    key->has_gs_or_tes = llvm->draw->gs.geometry_shader != NULL || llvm->draw->tes.tess_eval_shader != NULL;
2041    key->num_outputs = draw_total_vs_outputs(llvm->draw);
2042 
2043    key->clamp_vertex_color = !key->has_gs_or_tes &&
2044       llvm->draw->rasterizer->clamp_vertex_color;
2045 
2046    /* All variants of this shader will have the same value for
2047     * nr_samplers.  Not yet trying to compact away holes in the
2048     * sampler array.
2049     */
2050    key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
2051    if (llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
2052       key->nr_sampler_views =
2053          llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2054    } else {
2055       key->nr_sampler_views = key->nr_samplers;
2056    }
2057 
2058    key->nr_images = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
2059 
2060    /* Presumably all variants of the shader should have the same
2061     * number of vertex elements - ie the number of shader inputs.
2062     * NOTE: we NEED to store the needed number of needed inputs
2063     * here, not the number of provided elements to match keysize
2064     * (and the offset of sampler state in the key).
2065     * If we have excess number of vertex elements, this is valid,
2066     * but the excess ones don't matter.
2067     * If we don't have enough vertex elements (which looks not really
2068     * valid but we'll handle it gracefully) fill out missing ones with
2069     * zero (we'll recognize these later by PIPE_FORMAT_NONE).
2070     */
2071    key->nr_vertex_elements =
2072       llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 1;
2073 
2074    if (llvm->draw->pt.nr_vertex_elements < key->nr_vertex_elements) {
2075       debug_printf("draw: vs with %d inputs but only have %d vertex elements\n",
2076                    key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements);
2077       memset(key->vertex_element, 0,
2078              sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
2079    }
2080    memcpy(key->vertex_element,
2081           llvm->draw->pt.vertex_element,
2082           sizeof(struct pipe_vertex_element) *
2083              MIN2(key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements));
2084 
2085    draw_sampler = draw_llvm_variant_key_samplers(key);
2086    memset(draw_sampler, 0,
2087           MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
2088 
2089    for (unsigned i = 0 ; i < key->nr_samplers; i++) {
2090       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
2091                                       llvm->draw->samplers[PIPE_SHADER_VERTEX][i]);
2092    }
2093    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2094       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
2095                                       llvm->draw->sampler_views[PIPE_SHADER_VERTEX][i]);
2096    }
2097 
2098    draw_image = draw_llvm_variant_key_images(key);
2099    memset(draw_image, 0,
2100           key->nr_images * sizeof *draw_image);
2101    for (unsigned i = 0; i < key->nr_images; i++) {
2102       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
2103                                             llvm->draw->images[PIPE_SHADER_VERTEX][i]);
2104    }
2105    return key;
2106 }
2107 
2108 
2109 void
draw_llvm_dump_variant_key(struct draw_llvm_variant_key * key)2110 draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
2111 {
2112    struct lp_sampler_static_state *sampler = draw_llvm_variant_key_samplers(key);
2113    struct lp_image_static_state *image = draw_llvm_variant_key_images(key);
2114    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
2115    debug_printf("clip_xy = %u\n", key->clip_xy);
2116    debug_printf("clip_z = %u\n", key->clip_z);
2117    debug_printf("clip_user = %u\n", key->clip_user);
2118    debug_printf("bypass_viewport = %u\n", key->bypass_viewport);
2119    debug_printf("clip_halfz = %u\n", key->clip_halfz);
2120    debug_printf("need_edgeflags = %u\n", key->need_edgeflags);
2121    debug_printf("has_gs_or_tes = %u\n", key->has_gs_or_tes);
2122    debug_printf("ucp_enable = %u\n", key->ucp_enable);
2123 
2124    for (unsigned i = 0 ; i < key->nr_vertex_elements; i++) {
2125       debug_printf("vertex_element[%i].src_offset = %u\n", i, key->vertex_element[i].src_offset);
2126       debug_printf("vertex_element[%i].instance_divisor = %u\n", i, key->vertex_element[i].instance_divisor);
2127       debug_printf("vertex_element[%i].vertex_buffer_index = %u\n", i, key->vertex_element[i].vertex_buffer_index);
2128       debug_printf("vertex_element[%i].src_format = %s\n", i, util_format_name(key->vertex_element[i].src_format));
2129    }
2130 
2131    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2132       debug_printf("sampler[%i].src_format = %s\n", i, util_format_name(sampler[i].texture_state.format));
2133    }
2134 
2135    for (unsigned i = 0 ; i < key->nr_images; i++)
2136       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
2137 }
2138 
2139 
2140 void
draw_llvm_set_mapped_texture(struct draw_context * draw,enum pipe_shader_type shader_stage,unsigned sview_idx,uint32_t width,uint32_t height,uint32_t depth,uint32_t first_level,uint32_t last_level,uint32_t num_samples,uint32_t sample_stride,const void * base_ptr,uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])2141 draw_llvm_set_mapped_texture(struct draw_context *draw,
2142                              enum pipe_shader_type shader_stage,
2143                              unsigned sview_idx,
2144                              uint32_t width, uint32_t height, uint32_t depth,
2145                              uint32_t first_level, uint32_t last_level,
2146                              uint32_t num_samples,
2147                              uint32_t sample_stride,
2148                              const void *base_ptr,
2149                              uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
2150                              uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
2151                              uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])
2152 {
2153    struct lp_jit_texture *jit_tex;
2154 
2155    assert(shader_stage < DRAW_MAX_SHADER_STAGE);
2156    assert(sview_idx < ARRAY_SIZE(draw->llvm->jit_resources[shader_stage].textures));
2157 
2158    jit_tex = &draw->llvm->jit_resources[shader_stage].textures[sview_idx];
2159    jit_tex->width = width;
2160    jit_tex->height = height;
2161    jit_tex->depth = depth;
2162    jit_tex->first_level = first_level;
2163    jit_tex->last_level = last_level;
2164    jit_tex->base = base_ptr;
2165    jit_tex->mip_offsets[0] = 0;
2166    if (num_samples > 1) {
2167       jit_tex->mip_offsets[0] = mip_offsets[0];
2168       jit_tex->mip_offsets[LP_JIT_TEXTURE_SAMPLE_STRIDE] = sample_stride;
2169       jit_tex->row_stride[0] = row_stride[0];
2170       jit_tex->img_stride[0] = img_stride[0];
2171       jit_tex->last_level = num_samples;
2172    } else {
2173       for (unsigned j = first_level; j <= last_level; j++) {
2174          jit_tex->mip_offsets[j] = mip_offsets[j];
2175          jit_tex->row_stride[j] = row_stride[j];
2176          jit_tex->img_stride[j] = img_stride[j];
2177       }
2178    }
2179 }
2180 
2181 
2182 void
draw_llvm_set_mapped_image(struct draw_context * draw,enum pipe_shader_type shader_stage,unsigned idx,uint32_t width,uint32_t height,uint32_t depth,const void * base_ptr,uint32_t row_stride,uint32_t img_stride,uint32_t num_samples,uint32_t sample_stride)2183 draw_llvm_set_mapped_image(struct draw_context *draw,
2184                            enum pipe_shader_type shader_stage,
2185                            unsigned idx,
2186                            uint32_t width, uint32_t height, uint32_t depth,
2187                            const void *base_ptr,
2188                            uint32_t row_stride,
2189                            uint32_t img_stride,
2190                            uint32_t num_samples,
2191                            uint32_t sample_stride)
2192 {
2193    struct lp_jit_image *jit_image;
2194 
2195    assert(shader_stage < DRAW_MAX_SHADER_STAGE);
2196    assert(idx < ARRAY_SIZE(draw->llvm->jit_resources[shader_stage].images));
2197 
2198    jit_image = &draw->llvm->jit_resources[shader_stage].images[idx];
2199 
2200    jit_image->width = width;
2201    jit_image->height = height;
2202    jit_image->depth = depth;
2203    jit_image->base = base_ptr;
2204 
2205    jit_image->row_stride = row_stride;
2206    jit_image->img_stride = img_stride;
2207    jit_image->num_samples = num_samples;
2208    jit_image->sample_stride = sample_stride;
2209 }
2210 
2211 
2212 void
draw_llvm_set_sampler_state(struct draw_context * draw,enum pipe_shader_type shader_type)2213 draw_llvm_set_sampler_state(struct draw_context *draw,
2214                             enum pipe_shader_type shader_type)
2215 {
2216    assert(shader_type < DRAW_MAX_SHADER_STAGE);
2217    for (unsigned i = 0; i < draw->num_samplers[shader_type]; i++) {
2218       struct lp_jit_sampler *jit_sam = &draw->llvm->jit_resources[shader_type].samplers[i];
2219 
2220       if (draw->samplers[shader_type][i]) {
2221          const struct pipe_sampler_state *s
2222             = draw->samplers[shader_type][i];
2223          jit_sam->min_lod = s->min_lod;
2224          jit_sam->max_lod = s->max_lod;
2225          jit_sam->lod_bias = s->lod_bias;
2226          COPY_4V(jit_sam->border_color, s->border_color.f);
2227       }
2228    }
2229 }
2230 
2231 
2232 void
draw_llvm_destroy_variant(struct draw_llvm_variant * variant)2233 draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
2234 {
2235    struct draw_llvm *llvm = variant->llvm;
2236 
2237    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2238       debug_printf("Deleting VS variant: %u vs variants,\t%u total variants\n",
2239                     variant->shader->variants_cached, llvm->nr_variants);
2240    }
2241 
2242    gallivm_destroy(variant->gallivm);
2243 
2244    list_del(&variant->list_item_local.list);
2245    variant->shader->variants_cached--;
2246    list_del(&variant->list_item_global.list);
2247    llvm->nr_variants--;
2248    if(variant->function_name)
2249       FREE(variant->function_name);
2250    FREE(variant);
2251 }
2252 
2253 
2254 /**
2255  * Create LLVM types for various structures.
2256  */
2257 static void
create_gs_jit_types(struct draw_gs_llvm_variant * var)2258 create_gs_jit_types(struct draw_gs_llvm_variant *var)
2259 {
2260    struct gallivm_state *gallivm = var->gallivm;
2261 
2262    var->context_type = create_gs_jit_context_type(gallivm,
2263                                              var->shader->base.vector_length,
2264                                              "draw_gs_jit_context");
2265    var->context_ptr_type = LLVMPointerType(var->context_type, 0);
2266 
2267    var->resources_type = lp_build_jit_resources_type(gallivm);
2268    var->resources_ptr_type = LLVMPointerType(var->resources_type, 0);
2269    var->input_array_type = create_gs_jit_input_type(gallivm);
2270 }
2271 
2272 
2273 static LLVMTypeRef
get_gs_context_ptr_type(struct draw_gs_llvm_variant * variant)2274 get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant)
2275 {
2276    if (!variant->context_ptr_type)
2277       create_gs_jit_types(variant);
2278    return variant->context_ptr_type;
2279 }
2280 
2281 
2282 static LLVMValueRef
generate_mask_value(struct draw_gs_llvm_variant * variant,struct lp_type gs_type)2283 generate_mask_value(struct draw_gs_llvm_variant *variant,
2284                     struct lp_type gs_type)
2285 {
2286    struct gallivm_state *gallivm = variant->gallivm;
2287    LLVMBuilderRef builder = gallivm->builder;
2288    struct lp_type mask_type = lp_int_type(gs_type);
2289    LLVMValueRef num_prims;
2290    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
2291 
2292    num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
2293                                   variant->num_prims);
2294    for (unsigned i = 0; i < gs_type.length; i++) {
2295       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2296       mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
2297    }
2298    mask_val = lp_build_compare(gallivm, mask_type,
2299                                PIPE_FUNC_GREATER, num_prims, mask_val);
2300 
2301    return mask_val;
2302 }
2303 
2304 
2305 static void
draw_gs_llvm_generate(struct draw_llvm * llvm,struct draw_gs_llvm_variant * variant)2306 draw_gs_llvm_generate(struct draw_llvm *llvm,
2307                       struct draw_gs_llvm_variant *variant)
2308 {
2309    struct gallivm_state *gallivm = variant->gallivm;
2310    LLVMContextRef context = gallivm->context;
2311    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
2312    LLVMTypeRef arg_types[9];
2313    LLVMTypeRef func_type;
2314    LLVMValueRef variant_func;
2315    LLVMValueRef context_ptr;
2316    LLVMValueRef resources_ptr;
2317    LLVMValueRef prim_id_ptr;
2318    LLVMBasicBlockRef block;
2319    LLVMBuilderRef builder;
2320    LLVMValueRef io_ptr, input_array, num_prims, mask_val;
2321    struct lp_build_sampler_soa *sampler = 0;
2322    struct lp_build_image_soa *image = NULL;
2323    struct lp_build_context bld;
2324    struct lp_bld_tgsi_system_values system_values;
2325    char func_name[64];
2326    struct lp_type gs_type;
2327    struct draw_gs_llvm_iface gs_iface;
2328    const struct tgsi_token *tokens = variant->shader->base.state.tokens;
2329    LLVMValueRef consts_ptr;
2330    LLVMValueRef ssbos_ptr;
2331    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
2332    struct lp_build_mask_context mask;
2333    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
2334    unsigned vector_length = variant->shader->base.vector_length;
2335 
2336    memset(&system_values, 0, sizeof(system_values));
2337    memset(&outputs, 0, sizeof(outputs));
2338 
2339    snprintf(func_name, sizeof(func_name), "draw_llvm_gs_variant");
2340 
2341    assert(variant->vertex_header_ptr_type);
2342 
2343    LLVMTypeRef prim_id_type = LLVMVectorType(int32_type, vector_length);
2344    arg_types[0] = get_gs_context_ptr_type(variant);    /* context */
2345    arg_types[1] = variant->resources_ptr_type;
2346    arg_types[2] = variant->input_array_type;           /* input */
2347    arg_types[3] = LLVMPointerType(variant->vertex_header_ptr_type, 0);     /* vertex_header */
2348    arg_types[4] = int32_type;                          /* num_prims */
2349    arg_types[5] = int32_type;                          /* instance_id */
2350    arg_types[6] = LLVMPointerType(prim_id_type, 0);    /* prim_id_ptr */
2351    arg_types[7] = int32_type;
2352    arg_types[8] = int32_type;
2353 
2354    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
2355 
2356    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
2357 
2358    variant->function = variant_func;
2359    variant->function_name = MALLOC(strlen(func_name)+1);
2360    strcpy(variant->function_name, func_name);
2361 
2362    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
2363 
2364    for (unsigned i = 0; i < ARRAY_SIZE(arg_types); ++i)
2365       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
2366          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
2367 
2368    if (gallivm->cache && gallivm->cache->data_size) {
2369       gallivm_stub_func(gallivm, variant_func);
2370       return;
2371    }
2372 
2373    context_ptr               = LLVMGetParam(variant_func, 0);
2374    resources_ptr             = LLVMGetParam(variant_func, 1);
2375    input_array               = LLVMGetParam(variant_func, 2);
2376    io_ptr                    = LLVMGetParam(variant_func, 3);
2377    num_prims                 = LLVMGetParam(variant_func, 4);
2378    system_values.instance_id = LLVMGetParam(variant_func, 5);
2379    prim_id_ptr               = LLVMGetParam(variant_func, 6);
2380    system_values.invocation_id = LLVMGetParam(variant_func, 7);
2381    system_values.view_index  = LLVMGetParam(variant_func, 8);
2382 
2383    lp_build_name(context_ptr, "context");
2384    lp_build_name(resources_ptr, "resources");
2385    lp_build_name(input_array, "input");
2386    lp_build_name(io_ptr, "io");
2387    lp_build_name(num_prims, "num_prims");
2388    lp_build_name(system_values.instance_id, "instance_id");
2389    lp_build_name(prim_id_ptr, "prim_id_ptr");
2390    lp_build_name(system_values.invocation_id, "invocation_id");
2391    lp_build_name(system_values.view_index, "view_index");
2392 
2393    variant->context_ptr = context_ptr;
2394    variant->io_ptr = io_ptr;
2395    variant->num_prims = num_prims;
2396 
2397    gs_iface.base.fetch_input = draw_gs_llvm_fetch_input;
2398    gs_iface.base.emit_vertex = draw_gs_llvm_emit_vertex;
2399    gs_iface.base.end_primitive = draw_gs_llvm_end_primitive;
2400    gs_iface.base.gs_epilogue = draw_gs_llvm_epilogue;
2401    gs_iface.input = input_array;
2402    gs_iface.variant = variant;
2403 
2404    /*
2405     * Function body
2406     */
2407 
2408    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
2409    builder = gallivm->builder;
2410    LLVMPositionBuilderAtEnd(builder, block);
2411 
2412    lp_build_context_init(&bld, gallivm, lp_type_int(32));
2413 
2414    memset(&gs_type, 0, sizeof gs_type);
2415    gs_type.floating = true; /* floating point values */
2416    gs_type.sign = true;     /* values are signed */
2417    gs_type.norm = false;    /* values are not limited to [0,1] or [-1,1] */
2418    gs_type.width = 32;      /* 32-bit float */
2419    gs_type.length = vector_length;
2420 
2421    consts_ptr = lp_jit_resources_constants(gallivm, variant->resources_type, resources_ptr);
2422 
2423    ssbos_ptr = lp_jit_resources_ssbos(gallivm, variant->resources_type, resources_ptr);
2424 
2425    /* code generated texture sampling */
2426    sampler = lp_bld_llvm_sampler_soa_create(variant->key.samplers,
2427                                           MAX2(variant->key.nr_samplers,
2428                                                variant->key.nr_sampler_views));
2429    image = lp_bld_llvm_image_soa_create(draw_gs_llvm_variant_key_images(&variant->key),
2430                                       variant->key.nr_images);
2431    mask_val = generate_mask_value(variant, gs_type);
2432    lp_build_mask_begin(&mask, gallivm, gs_type, mask_val);
2433 
2434    if (gs_info->uses_primid) {
2435       system_values.prim_id = LLVMBuildLoad2(builder, prim_id_type, prim_id_ptr, "prim_id");
2436    }
2437 
2438    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2439       if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)
2440          tgsi_dump(tokens, 0);
2441       else
2442          nir_print_shader(llvm->draw->gs.geometry_shader->state.ir.nir, stderr);
2443       draw_gs_llvm_dump_variant_key(&variant->key);
2444    }
2445 
2446    struct lp_build_tgsi_params params;
2447    memset(&params, 0, sizeof(params));
2448 
2449    params.type = gs_type;
2450    params.mask = &mask;
2451    params.consts_ptr = consts_ptr;
2452    params.system_values = &system_values;
2453    params.context_type = variant->context_type;
2454    params.context_ptr = context_ptr;
2455    params.resources_type = variant->resources_type;
2456    params.resources_ptr = resources_ptr;
2457    params.sampler = sampler;
2458    params.info = &llvm->draw->gs.geometry_shader->info;
2459    params.gs_iface = (const struct lp_build_gs_iface *)&gs_iface;
2460    params.ssbo_ptr = ssbos_ptr;
2461    params.image = image;
2462    params.gs_vertex_streams = variant->shader->base.num_vertex_streams;
2463 
2464    if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)
2465       lp_build_tgsi_soa(variant->gallivm,
2466                         tokens,
2467                         &params,
2468                         outputs);
2469    else
2470       lp_build_nir_soa(variant->gallivm,
2471                        llvm->draw->gs.geometry_shader->state.ir.nir,
2472                        &params,
2473                        outputs);
2474 
2475    lp_bld_llvm_sampler_soa_destroy(sampler);
2476    lp_bld_llvm_image_soa_destroy(image);
2477 
2478    lp_build_mask_end(&mask);
2479 
2480    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
2481 
2482    gallivm_verify_function(gallivm, variant_func);
2483 }
2484 
2485 
2486 struct draw_gs_llvm_variant *
draw_gs_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_gs_llvm_variant_key * key)2487 draw_gs_llvm_create_variant(struct draw_llvm *llvm,
2488                             unsigned num_outputs,
2489                             const struct draw_gs_llvm_variant_key *key)
2490 {
2491    struct draw_gs_llvm_variant *variant;
2492    struct llvm_geometry_shader *shader =
2493       llvm_geometry_shader(llvm->draw->gs.geometry_shader);
2494    char module_name[64];
2495    unsigned char ir_sha1_cache_key[20];
2496    struct lp_cached_code cached = { 0 };
2497    bool needs_caching = false;
2498 
2499    variant = MALLOC(sizeof *variant +
2500                     shader->variant_key_size -
2501                     sizeof variant->key);
2502    if (!variant)
2503       return NULL;
2504 
2505    variant->llvm = llvm;
2506    variant->shader = shader;
2507 
2508    snprintf(module_name, sizeof(module_name), "draw_llvm_gs_variant%u",
2509             variant->shader->variants_cached);
2510 
2511    memcpy(&variant->key, key, shader->variant_key_size);
2512 
2513    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
2514       draw_get_ir_cache_key(shader->base.state.ir.nir,
2515                             key,
2516                             shader->variant_key_size,
2517                             num_outputs,
2518                             ir_sha1_cache_key);
2519 
2520       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
2521                                          &cached,
2522                                          ir_sha1_cache_key);
2523       if (!cached.data_size)
2524          needs_caching = true;
2525    }
2526    variant->gallivm = gallivm_create(module_name, &llvm->context, &cached);
2527 
2528    create_gs_jit_types(variant);
2529 
2530    variant->vertex_header_type = lp_build_create_jit_vertex_header_type(variant->gallivm, num_outputs);
2531    variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
2532 
2533    draw_gs_llvm_generate(llvm, variant);
2534 
2535    gallivm_compile_module(variant->gallivm);
2536 
2537    variant->jit_func = (draw_gs_jit_func)
2538          gallivm_jit_function(variant->gallivm, variant->function, variant->function_name);
2539 
2540    if (needs_caching)
2541       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
2542                                            &cached,
2543                                            ir_sha1_cache_key);
2544    gallivm_free_ir(variant->gallivm);
2545 
2546    variant->list_item_global.base = variant;
2547    variant->list_item_local.base = variant;
2548    /*variant->no = */shader->variants_created++;
2549    variant->list_item_global.base = variant;
2550 
2551    return variant;
2552 }
2553 
2554 
2555 void
draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant * variant)2556 draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant)
2557 {
2558    struct draw_llvm *llvm = variant->llvm;
2559 
2560    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2561       debug_printf("Deleting GS variant: %u gs variants,\t%u total variants\n",
2562                     variant->shader->variants_cached, llvm->nr_gs_variants);
2563    }
2564 
2565    gallivm_destroy(variant->gallivm);
2566 
2567    list_del(&variant->list_item_local.list);
2568    variant->shader->variants_cached--;
2569    list_del(&variant->list_item_global.list);
2570    llvm->nr_gs_variants--;
2571    if(variant->function_name)
2572       FREE(variant->function_name);
2573    FREE(variant);
2574 }
2575 
2576 
2577 struct draw_gs_llvm_variant_key *
draw_gs_llvm_make_variant_key(struct draw_llvm * llvm,char * store)2578 draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
2579 {
2580    struct draw_gs_llvm_variant_key *key;
2581    struct lp_sampler_static_state *draw_sampler;
2582    struct lp_image_static_state *draw_image;
2583 
2584    key = (struct draw_gs_llvm_variant_key *)store;
2585 
2586    memset(key, 0, offsetof(struct draw_gs_llvm_variant_key, samplers[0]));
2587 
2588    key->num_outputs = draw_total_gs_outputs(llvm->draw);
2589 
2590    key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color;
2591 
2592    /* All variants of this shader will have the same value for
2593     * nr_samplers.  Not yet trying to compact away holes in the
2594     * sampler array.
2595     */
2596    key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
2597    if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
2598       key->nr_sampler_views =
2599          llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2600    } else {
2601       key->nr_sampler_views = key->nr_samplers;
2602    }
2603 
2604    key->nr_images = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
2605 
2606    draw_sampler = key->samplers;
2607 
2608    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
2609 
2610    for (unsigned i = 0 ; i < key->nr_samplers; i++) {
2611       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
2612                                       llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]);
2613    }
2614    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2615       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
2616                                       llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]);
2617    }
2618 
2619    draw_image = draw_gs_llvm_variant_key_images(key);
2620    memset(draw_image, 0,
2621           key->nr_images * sizeof *draw_image);
2622    for (unsigned i = 0; i < key->nr_images; i++) {
2623       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
2624                                             llvm->draw->images[PIPE_SHADER_GEOMETRY][i]);
2625    }
2626    return key;
2627 }
2628 
2629 
2630 void
draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key * key)2631 draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key)
2632 {
2633    struct lp_sampler_static_state *sampler = key->samplers;
2634    struct lp_image_static_state *image = draw_gs_llvm_variant_key_images(key);
2635 
2636    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
2637    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2638       debug_printf("sampler[%i].src_format = %s\n", i,
2639                    util_format_name(sampler[i].texture_state.format));
2640    }
2641 
2642    for (unsigned i = 0 ; i < key->nr_images; i++)
2643       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
2644 
2645 }
2646 
2647 
2648 static void
create_tcs_jit_types(struct draw_tcs_llvm_variant * var)2649 create_tcs_jit_types(struct draw_tcs_llvm_variant *var)
2650 {
2651    struct gallivm_state *gallivm = var->gallivm;
2652 
2653    var->resources_type = lp_build_jit_resources_type(gallivm);
2654    var->resources_ptr_type = LLVMPointerType(var->resources_type, 0);
2655    var->input_array_type = create_tcs_jit_input_type(gallivm);
2656    var->output_array_type = create_tcs_jit_output_type(gallivm);
2657 }
2658 
2659 
2660 static LLVMTypeRef
get_tcs_resources_ptr_type(struct draw_tcs_llvm_variant * variant)2661 get_tcs_resources_ptr_type(struct draw_tcs_llvm_variant *variant)
2662 {
2663    if (!variant->resources_ptr_type)
2664       create_tcs_jit_types(variant);
2665    return variant->resources_ptr_type;
2666 }
2667 
2668 
2669 static LLVMValueRef
draw_tcs_llvm_emit_fetch_input(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index)2670 draw_tcs_llvm_emit_fetch_input(const struct lp_build_tcs_iface *tes_iface,
2671                                struct lp_build_context *bld,
2672                                bool is_vindex_indirect,
2673                                LLVMValueRef vertex_index,
2674                                bool is_aindex_indirect,
2675                                LLVMValueRef attrib_index,
2676                                bool is_sindex_indirect,
2677                                LLVMValueRef swizzle_index)
2678 {
2679    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
2680    struct gallivm_state *gallivm = bld->gallivm;
2681    LLVMBuilderRef builder = gallivm->builder;
2682    LLVMValueRef indices[3];
2683    LLVMValueRef res;
2684    struct lp_type type = bld->type;
2685    LLVMTypeRef input_type = create_tcs_jit_input_type_deref(gallivm);
2686    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
2687 
2688    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
2689       res = bld->zero;
2690       for (int i = 0; i < type.length; ++i) {
2691          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2692          LLVMValueRef vert_chan_index = vertex_index;
2693          LLVMValueRef attr_chan_index = attrib_index;
2694          LLVMValueRef swiz_chan_index = swizzle_index;
2695          LLVMValueRef channel_vec;
2696 
2697          if (is_vindex_indirect) {
2698             vert_chan_index = LLVMBuildExtractElement(builder,
2699                                                       vertex_index, idx, "");
2700          }
2701          if (is_aindex_indirect) {
2702             attr_chan_index = LLVMBuildExtractElement(builder,
2703                                                       attrib_index, idx, "");
2704          }
2705          if (is_sindex_indirect) {
2706             swiz_chan_index = LLVMBuildExtractElement(builder,
2707                                                       swizzle_index, idx, "");
2708          }
2709 
2710          indices[0] = vert_chan_index;
2711          indices[1] = attr_chan_index;
2712          indices[2] = swiz_chan_index;
2713 
2714          channel_vec = LLVMBuildGEP2(builder, input_type, tcs->input, indices, 3, "");
2715          channel_vec = LLVMBuildLoad2(builder, float_type, channel_vec, "");
2716          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
2717       }
2718    } else {
2719       indices[0] = vertex_index;
2720       indices[1] = attrib_index;
2721       indices[2] = swizzle_index;
2722       res = LLVMBuildGEP2(builder, input_type, tcs->input, indices, 3, "");
2723       res = LLVMBuildLoad2(builder, float_type, res, "");
2724       res = lp_build_broadcast_scalar(bld, res);
2725    }
2726    return res;
2727 }
2728 
2729 
2730 static LLVMValueRef
draw_tcs_llvm_emit_fetch_output(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index,uint32_t name)2731 draw_tcs_llvm_emit_fetch_output(const struct lp_build_tcs_iface *tes_iface,
2732                                 struct lp_build_context *bld,
2733                                 bool is_vindex_indirect,
2734                                 LLVMValueRef vertex_index,
2735                                 bool is_aindex_indirect,
2736                                 LLVMValueRef attrib_index,
2737                                 bool is_sindex_indirect,
2738                                 LLVMValueRef swizzle_index,
2739                                 uint32_t name)
2740 {
2741    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
2742    struct gallivm_state *gallivm = bld->gallivm;
2743    LLVMBuilderRef builder = gallivm->builder;
2744    LLVMValueRef indices[3];
2745    LLVMValueRef res;
2746    struct lp_type type = bld->type;
2747    LLVMTypeRef output_type = create_tcs_jit_output_type_deref(gallivm);
2748    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
2749 
2750    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
2751       res = bld->zero;
2752       for (int i = 0; i < type.length; ++i) {
2753          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2754          LLVMValueRef vert_chan_index = vertex_index;
2755          LLVMValueRef attr_chan_index = attrib_index;
2756          LLVMValueRef swiz_chan_index = swizzle_index;
2757          LLVMValueRef channel_vec;
2758 
2759          if (is_vindex_indirect) {
2760             vert_chan_index = LLVMBuildExtractElement(builder,
2761                                                       vertex_index, idx, "");
2762          }
2763          if (is_aindex_indirect) {
2764             attr_chan_index = LLVMBuildExtractElement(builder,
2765                                                       attrib_index, idx, "");
2766          }
2767          if (is_sindex_indirect) {
2768             swiz_chan_index = LLVMBuildExtractElement(builder,
2769                                                       swizzle_index, idx, "");
2770          }
2771 
2772          indices[0] = vert_chan_index;
2773          indices[1] = attr_chan_index;
2774          indices[2] = swiz_chan_index;
2775 
2776          channel_vec = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2777          channel_vec = LLVMBuildLoad2(builder, float_type, channel_vec, "");
2778 
2779          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
2780       }
2781    } else {
2782       indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
2783       indices[1] = attrib_index;
2784       indices[2] = swizzle_index;
2785 
2786       res = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2787       res = LLVMBuildLoad2(builder, float_type, res, "");
2788       res = lp_build_broadcast_scalar(bld, res);
2789    }
2790    return res;
2791 }
2792 
2793 
2794 static void
draw_tcs_llvm_emit_store_output(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,unsigned name,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index,LLVMValueRef value,LLVMValueRef mask_vec)2795 draw_tcs_llvm_emit_store_output(const struct lp_build_tcs_iface *tes_iface,
2796                                 struct lp_build_context *bld,
2797                                 unsigned name,
2798                                 bool is_vindex_indirect,
2799                                 LLVMValueRef vertex_index,
2800                                 bool is_aindex_indirect,
2801                                 LLVMValueRef attrib_index,
2802                                 bool is_sindex_indirect,
2803                                 LLVMValueRef swizzle_index,
2804                                 LLVMValueRef value,
2805                                 LLVMValueRef mask_vec)
2806 {
2807    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
2808    struct gallivm_state *gallivm = bld->gallivm;
2809    LLVMBuilderRef builder = gallivm->builder;
2810    LLVMValueRef indices[3];
2811    LLVMValueRef res;
2812    struct lp_type type = bld->type;
2813    LLVMTypeRef output_type = create_tcs_jit_output_type_deref(gallivm);
2814 
2815    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
2816       for (int i = 0; i < type.length; ++i) {
2817          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2818          LLVMValueRef vert_chan_index = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
2819          LLVMValueRef attr_chan_index = attrib_index;
2820          LLVMValueRef swiz_chan_index = swizzle_index;
2821          LLVMValueRef channel_vec;
2822 
2823          if (is_vindex_indirect) {
2824             vert_chan_index = LLVMBuildExtractElement(builder,
2825                                                       vertex_index, idx, "");
2826          }
2827          if (is_aindex_indirect) {
2828             attr_chan_index = LLVMBuildExtractElement(builder,
2829                                                       attrib_index, idx, "");
2830          }
2831 
2832          if (is_sindex_indirect) {
2833             swiz_chan_index = LLVMBuildExtractElement(builder,
2834                                                       swizzle_index, idx, "");
2835          }
2836 
2837          indices[0] = vert_chan_index;
2838          indices[1] = attr_chan_index;
2839          indices[2] = swiz_chan_index;
2840 
2841          channel_vec = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2842 
2843          res = LLVMBuildExtractElement(builder, value, idx, "");
2844 
2845          struct lp_build_if_state ifthen;
2846          LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
2847          cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
2848          lp_build_if(&ifthen, gallivm, cond);
2849          LLVMBuildStore(builder, res, channel_vec);
2850          lp_build_endif(&ifthen);
2851       }
2852    } else {
2853       indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
2854       indices[1] = attrib_index;
2855       indices[2] = swizzle_index;
2856 
2857       res = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2858       for (unsigned i = 0; i < type.length; ++i) {
2859          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2860          LLVMValueRef val = LLVMBuildExtractElement(builder, value, idx, "");
2861 
2862          struct lp_build_if_state ifthen;
2863          LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
2864          cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
2865          lp_build_if(&ifthen, gallivm, cond);
2866          LLVMBuildStore(builder, val, res);
2867          lp_build_endif(&ifthen);
2868       }
2869    }
2870 }
2871 
2872 
2873 static LLVMValueRef
generate_tcs_mask_value(struct draw_tcs_llvm_variant * variant,struct lp_type tcs_type,LLVMValueRef limit,LLVMValueRef loop_counter)2874 generate_tcs_mask_value(struct draw_tcs_llvm_variant *variant,
2875                         struct lp_type tcs_type, LLVMValueRef limit, LLVMValueRef loop_counter)
2876 {
2877    struct gallivm_state *gallivm = variant->gallivm;
2878    LLVMBuilderRef builder = gallivm->builder;
2879    struct lp_type mask_type = lp_int_type(tcs_type);
2880    LLVMValueRef num_vecs;
2881    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
2882 
2883    num_vecs = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);
2884    for (unsigned i = 0; i < tcs_type.length; i++) {
2885       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2886       mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");
2887    }
2888    mask_val = lp_build_compare(gallivm, mask_type,
2889                                PIPE_FUNC_GREATER, num_vecs, mask_val);
2890 
2891    return mask_val;
2892 }
2893 
2894 
2895 static void
draw_tcs_llvm_generate(struct draw_llvm * llvm,struct draw_tcs_llvm_variant * variant)2896 draw_tcs_llvm_generate(struct draw_llvm *llvm,
2897                        struct draw_tcs_llvm_variant *variant)
2898 {
2899    struct gallivm_state *gallivm = variant->gallivm;
2900    LLVMContextRef context = gallivm->context;
2901    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
2902    LLVMTypeRef arg_types[7];
2903    LLVMTypeRef func_type, coro_func_type;
2904    LLVMValueRef variant_func, variant_coro;
2905    LLVMValueRef resources_ptr;
2906    LLVMValueRef view_index;
2907    LLVMValueRef input_array, output_array, prim_id, patch_vertices_in;
2908    LLVMValueRef mask_val;
2909    LLVMBasicBlockRef block;
2910    LLVMBuilderRef builder;
2911    struct lp_build_context bld, bldvec;
2912    struct lp_build_sampler_soa *sampler = 0;
2913    struct lp_build_image_soa *image = NULL;
2914    struct lp_bld_tgsi_system_values system_values;
2915    char func_name[64], func_name_coro[64];
2916    struct draw_tcs_llvm_iface tcs_iface;
2917    struct lp_build_mask_context mask;
2918    LLVMValueRef consts_ptr;
2919    LLVMValueRef ssbos_ptr;
2920    struct lp_type tcs_type;
2921    unsigned vector_length = variant->shader->base.vector_length;
2922 
2923    memset(&system_values, 0, sizeof(system_values));
2924 
2925    snprintf(func_name, sizeof(func_name), "draw_llvm_tcs_variant");
2926 
2927    snprintf(func_name_coro, sizeof(func_name_coro), "draw_llvm_tcs_coro_variant");
2928 
2929    arg_types[0] = get_tcs_resources_ptr_type(variant);    /* context */
2930    arg_types[1] = variant->input_array_type;           /* input */
2931    arg_types[2] = variant->output_array_type;
2932    arg_types[3] = int32_type;
2933    arg_types[4] = int32_type;
2934    arg_types[5] = int32_type;
2935    arg_types[6] = int32_type; /* coroutine only */
2936 
2937    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types) - 1, 0);
2938 
2939    coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), arg_types, ARRAY_SIZE(arg_types), 0);
2940 
2941    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
2942 
2943    variant_coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);
2944 
2945    variant->function = variant_func;
2946    variant->function_name = MALLOC(strlen(func_name)+1);
2947    strcpy(variant->function_name, func_name);
2948    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
2949 
2950    LLVMSetFunctionCallConv(variant_coro, LLVMCCallConv);
2951 
2952    lp_build_coro_add_presplit(variant_coro);
2953 
2954    for (unsigned i = 0; i < ARRAY_SIZE(arg_types); ++i) {
2955       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
2956          lp_add_function_attr(variant_coro, i + 1, LP_FUNC_ATTR_NOALIAS);
2957          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
2958       }
2959    }
2960 
2961    if (gallivm->cache && gallivm->cache->data_size) {
2962       gallivm_stub_func(gallivm, variant_func);
2963       gallivm_stub_func(gallivm, variant_coro);
2964       return;
2965    }
2966 
2967    resources_ptr               = LLVMGetParam(variant_func, 0);
2968    input_array               = LLVMGetParam(variant_func, 1);
2969    output_array              = LLVMGetParam(variant_func, 2);
2970    prim_id                   = LLVMGetParam(variant_func, 3);
2971    patch_vertices_in         = LLVMGetParam(variant_func, 4);
2972    view_index                = LLVMGetParam(variant_func, 5);
2973 
2974    lp_build_name(resources_ptr, "resources");
2975    lp_build_name(input_array, "input");
2976    lp_build_name(output_array, "output");
2977    lp_build_name(prim_id, "prim_id");
2978    lp_build_name(patch_vertices_in, "patch_vertices_in");
2979    lp_build_name(view_index, "view_index");
2980 
2981    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
2982    builder = gallivm->builder;
2983    LLVMPositionBuilderAtEnd(builder, block);
2984 
2985    lp_build_context_init(&bld, gallivm, lp_type_int(32));
2986 
2987    memset(&tcs_type, 0, sizeof tcs_type);
2988    tcs_type.floating = true; /* floating point values */
2989    tcs_type.sign = true;     /* values are signed */
2990    tcs_type.norm = false;    /* values are not limited to [0,1] or [-1,1] */
2991    tcs_type.width = 32;      /* 32-bit float */
2992    tcs_type.length = vector_length;
2993 
2994    lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tcs_type));
2995 
2996    LLVMValueRef count = lp_build_const_int32(gallivm, variant->shader->base.vertices_out);
2997    LLVMValueRef step = lp_build_const_int32(gallivm, vector_length);
2998 
2999    struct lp_build_loop_state loop_state[2];
3000    LLVMValueRef num_inner_loop;
3001    unsigned count_align = util_align_npot(variant->shader->base.vertices_out, tcs_type.length);
3002    num_inner_loop = lp_build_const_int32(gallivm, count_align / tcs_type.length);
3003    LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
3004    LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_inner_loop, "coro_hdls");
3005    unsigned end_coroutine = INT_MAX;
3006    lp_build_loop_begin(&loop_state[1], gallivm,
3007                        lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
3008    lp_build_loop_begin(&loop_state[0], gallivm,
3009                        lp_build_const_int32(gallivm, 0)); /* inner loop */
3010    {
3011       LLVMValueRef args[7];
3012       args[0] = resources_ptr;
3013       args[1] = input_array;
3014       args[2] = output_array;
3015       args[3] = prim_id;
3016       args[4] = patch_vertices_in;
3017       args[5] = view_index;
3018       args[6] = loop_state[0].counter;
3019       LLVMValueRef coro_entry = LLVMBuildGEP2(builder, hdl_ptr_type, coro_hdls, &loop_state[0].counter, 1, "");
3020       LLVMValueRef coro_hdl = LLVMBuildLoad2(builder, hdl_ptr_type, coro_entry, "coro_hdl");
3021 
3022       struct lp_build_if_state ifstate;
3023       LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntEQ, loop_state[1].counter,
3024                                        lp_build_const_int32(gallivm, 0), "");
3025       /* first time here - call the coroutine function entry point */
3026       lp_build_if(&ifstate, gallivm, cmp);
3027       LLVMValueRef coro_ret = LLVMBuildCall2(builder, coro_func_type, variant_coro, args, 7, "");
3028       LLVMBuildStore(builder, coro_ret, coro_entry);
3029       lp_build_else(&ifstate);
3030       /* subsequent calls for this invocation - check if done. */
3031       LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);
3032       struct lp_build_if_state ifstate2;
3033       lp_build_if(&ifstate2, gallivm, coro_done);
3034       /* if done destroy and force loop exit */
3035       lp_build_coro_destroy(gallivm, coro_hdl);
3036       lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1));
3037       lp_build_else(&ifstate2);
3038       /* otherwise resume the coroutine */
3039       lp_build_coro_resume(gallivm, coro_hdl);
3040       lp_build_endif(&ifstate2);
3041       lp_build_endif(&ifstate);
3042       lp_build_loop_force_reload_counter(&loop_state[1]);
3043    }
3044    lp_build_loop_end_cond(&loop_state[0],
3045                           num_inner_loop,
3046                           NULL,  LLVMIntUGE);
3047    lp_build_loop_end_cond(&loop_state[1],
3048                           lp_build_const_int32(gallivm, end_coroutine),
3049                           NULL, LLVMIntEQ);
3050    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
3051 
3052    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "entry");
3053    LLVMPositionBuilderAtEnd(builder, block);
3054 
3055    resources_ptr = LLVMGetParam(variant_coro, 0);
3056    input_array = LLVMGetParam(variant_coro, 1);
3057    output_array = LLVMGetParam(variant_coro, 2);
3058    prim_id = LLVMGetParam(variant_coro, 3);
3059    patch_vertices_in = LLVMGetParam(variant_coro, 4);
3060    view_index = LLVMGetParam(variant_coro, 5);
3061 
3062    consts_ptr = lp_jit_resources_constants(gallivm, variant->resources_type, resources_ptr);
3063 
3064    ssbos_ptr = lp_jit_resources_ssbos(gallivm, variant->resources_type, resources_ptr);
3065    sampler = lp_bld_llvm_sampler_soa_create(variant->key.samplers,
3066                                           MAX2(variant->key.nr_samplers,
3067                                                variant->key.nr_sampler_views));
3068    image = lp_bld_llvm_image_soa_create(draw_tcs_llvm_variant_key_images(&variant->key),
3069                                       variant->key.nr_images);
3070 
3071    LLVMValueRef counter = LLVMGetParam(variant_coro, 6);
3072    LLVMValueRef invocvec = LLVMGetUndef(LLVMVectorType(int32_type, vector_length));
3073    for (unsigned i = 0; i < vector_length; i++) {
3074       LLVMValueRef loop_iter = lp_build_const_int32(gallivm, i);
3075       LLVMValueRef idx = LLVMBuildAdd(builder, LLVMBuildMul(builder, counter, step, ""), loop_iter, "");
3076       invocvec = LLVMBuildInsertElement(builder, invocvec, idx, loop_iter, "");
3077    }
3078 
3079    system_values.invocation_id = invocvec;
3080    system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);
3081    system_values.view_index = view_index;
3082    system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);
3083    tcs_iface.input = input_array;
3084    tcs_iface.output = output_array;
3085    tcs_iface.base.emit_fetch_input = draw_tcs_llvm_emit_fetch_input;
3086    tcs_iface.base.emit_fetch_output = draw_tcs_llvm_emit_fetch_output;
3087    tcs_iface.base.emit_store_output = draw_tcs_llvm_emit_store_output;
3088 
3089 
3090    {
3091       LLVMValueRef coro_id = lp_build_coro_id(gallivm);
3092       LLVMValueRef coro_hdl = lp_build_coro_begin_alloc_mem(gallivm, coro_id);
3093 
3094       mask_val = generate_tcs_mask_value(variant, tcs_type, count, LLVMBuildMul(builder, counter, step, ""));
3095       lp_build_mask_begin(&mask, gallivm, tcs_type, mask_val);
3096 
3097       struct lp_build_coro_suspend_info coro_info;
3098 
3099       LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "suspend");
3100       LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "cleanup");
3101 
3102       coro_info.suspend = sus_block;
3103       coro_info.cleanup = clean_block;
3104 
3105       struct lp_build_tgsi_params params;
3106       memset(&params, 0, sizeof(params));
3107 
3108       params.type = tcs_type;
3109       params.mask = &mask;
3110       params.consts_ptr = consts_ptr;
3111       params.system_values = &system_values;
3112       params.resources_type = variant->resources_type;
3113       params.resources_ptr = resources_ptr;
3114       params.sampler = sampler;
3115       params.info = &llvm->draw->tcs.tess_ctrl_shader->info;
3116       params.ssbo_ptr = ssbos_ptr;
3117       params.image = image;
3118       params.coro = &coro_info;
3119       params.tcs_iface = &tcs_iface.base;
3120 
3121       lp_build_nir_soa(variant->gallivm,
3122                        llvm->draw->tcs.tess_ctrl_shader->state.ir.nir,
3123                        &params, NULL);
3124 
3125       lp_build_mask_end(&mask);
3126 
3127       lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);
3128       LLVMPositionBuilderAtEnd(builder, clean_block);
3129 
3130       lp_build_coro_free_mem(gallivm, coro_id, coro_hdl);
3131 
3132       LLVMBuildBr(builder, sus_block);
3133       LLVMPositionBuilderAtEnd(builder, sus_block);
3134 
3135       lp_build_coro_end(gallivm, coro_hdl);
3136       LLVMBuildRet(builder, coro_hdl);
3137    }
3138 
3139    lp_bld_llvm_sampler_soa_destroy(sampler);
3140    lp_bld_llvm_image_soa_destroy(image);
3141    gallivm_verify_function(gallivm, variant_func);
3142    gallivm_verify_function(gallivm, variant_coro);
3143 }
3144 
3145 
3146 struct draw_tcs_llvm_variant *
draw_tcs_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_tcs_llvm_variant_key * key)3147 draw_tcs_llvm_create_variant(struct draw_llvm *llvm,
3148                              unsigned num_outputs,
3149                              const struct draw_tcs_llvm_variant_key *key)
3150 {
3151    struct draw_tcs_llvm_variant *variant;
3152    struct llvm_tess_ctrl_shader *shader = llvm_tess_ctrl_shader(llvm->draw->tcs.tess_ctrl_shader);
3153    char module_name[64];
3154    unsigned char ir_sha1_cache_key[20];
3155    struct lp_cached_code cached = { 0 };
3156    bool needs_caching = false;
3157 
3158    variant = MALLOC(sizeof *variant +
3159                     shader->variant_key_size - sizeof variant->key);
3160    if (!variant)
3161       return NULL;
3162 
3163    variant->llvm = llvm;
3164    variant->shader = shader;
3165 
3166    snprintf(module_name, sizeof(module_name), "draw_llvm_tcs_variant%u",
3167             variant->shader->variants_cached);
3168 
3169    memcpy(&variant->key, key, shader->variant_key_size);
3170 
3171    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
3172       draw_get_ir_cache_key(shader->base.state.ir.nir,
3173                             key,
3174                             shader->variant_key_size,
3175                             num_outputs,
3176                             ir_sha1_cache_key);
3177 
3178       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
3179                                          &cached,
3180                                          ir_sha1_cache_key);
3181       if (!cached.data_size)
3182          needs_caching = true;
3183    }
3184 
3185    variant->gallivm = gallivm_create(module_name, &llvm->context, &cached);
3186 
3187    create_tcs_jit_types(variant);
3188 
3189    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3190       nir_print_shader(llvm->draw->tcs.tess_ctrl_shader->state.ir.nir, stderr);
3191       draw_tcs_llvm_dump_variant_key(&variant->key);
3192    }
3193 
3194    draw_tcs_llvm_generate(llvm, variant);
3195 
3196    gallivm_compile_module(variant->gallivm);
3197 
3198    variant->jit_func = (draw_tcs_jit_func)
3199       gallivm_jit_function(variant->gallivm, variant->function, variant->function_name);
3200 
3201    if (needs_caching)
3202       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
3203                                            &cached,
3204                                            ir_sha1_cache_key);
3205    gallivm_free_ir(variant->gallivm);
3206 
3207    variant->list_item_global.base = variant;
3208    variant->list_item_local.base = variant;
3209    /*variant->no = */shader->variants_created++;
3210    variant->list_item_global.base = variant;
3211 
3212    return variant;
3213 }
3214 
3215 
3216 void
draw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant * variant)3217 draw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant *variant)
3218 {
3219    struct draw_llvm *llvm = variant->llvm;
3220 
3221    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3222       debug_printf("Deleting TCS variant: %u tcs variants,\t%u total variants\n",
3223                     variant->shader->variants_cached, llvm->nr_tcs_variants);
3224    }
3225 
3226    gallivm_destroy(variant->gallivm);
3227 
3228    list_del(&variant->list_item_local.list);
3229    variant->shader->variants_cached--;
3230    list_del(&variant->list_item_global.list);
3231    llvm->nr_tcs_variants--;
3232    if(variant->function_name)
3233       FREE(variant->function_name);
3234    FREE(variant);
3235 }
3236 
3237 
3238 struct draw_tcs_llvm_variant_key *
draw_tcs_llvm_make_variant_key(struct draw_llvm * llvm,char * store)3239 draw_tcs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
3240 {
3241    unsigned i;
3242    struct draw_tcs_llvm_variant_key *key;
3243    struct lp_sampler_static_state *draw_sampler;
3244    struct lp_image_static_state *draw_image;
3245 
3246    key = (struct draw_tcs_llvm_variant_key *)store;
3247 
3248    memset(key, 0, offsetof(struct draw_tcs_llvm_variant_key, samplers[0]));
3249 
3250    /* All variants of this shader will have the same value for
3251     * nr_samplers.  Not yet trying to compact away holes in the
3252     * sampler array.
3253     */
3254    key->nr_samplers = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
3255    if (llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
3256       key->nr_sampler_views =
3257          llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
3258    } else {
3259       key->nr_sampler_views = key->nr_samplers;
3260    }
3261 
3262    key->nr_images = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
3263 
3264    draw_sampler = key->samplers;
3265 
3266    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
3267 
3268    for (i = 0 ; i < key->nr_samplers; i++) {
3269       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
3270                                       llvm->draw->samplers[PIPE_SHADER_TESS_CTRL][i]);
3271    }
3272    for (i = 0 ; i < key->nr_sampler_views; i++) {
3273       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
3274                                       llvm->draw->sampler_views[PIPE_SHADER_TESS_CTRL][i]);
3275    }
3276 
3277    draw_image = draw_tcs_llvm_variant_key_images(key);
3278    memset(draw_image, 0,
3279           key->nr_images * sizeof *draw_image);
3280    for (i = 0; i < key->nr_images; i++) {
3281       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
3282                                             llvm->draw->images[PIPE_SHADER_TESS_CTRL][i]);
3283    }
3284    return key;
3285 }
3286 
3287 
3288 void
draw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key * key)3289 draw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key *key)
3290 {
3291    struct lp_sampler_static_state *sampler = key->samplers;
3292    struct lp_image_static_state *image = draw_tcs_llvm_variant_key_images(key);
3293    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
3294       debug_printf("sampler[%i].src_format = %s\n", i,
3295                    util_format_name(sampler[i].texture_state.format));
3296    }
3297 
3298    for (unsigned i = 0 ; i < key->nr_images; i++)
3299       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
3300 }
3301 
3302 
3303 static void
create_tes_jit_types(struct draw_tes_llvm_variant * var)3304 create_tes_jit_types(struct draw_tes_llvm_variant *var)
3305 {
3306    struct gallivm_state *gallivm = var->gallivm;
3307 
3308    var->resources_type = lp_build_jit_resources_type(gallivm);
3309    var->resources_ptr_type = LLVMPointerType(var->resources_type, 0);
3310    var->input_array_deref_type = create_tes_jit_input_deref_type(gallivm);
3311    var->input_array_type = LLVMPointerType(var->input_array_deref_type, 0); /* num vertices per prim */
3312 }
3313 
3314 
3315 static LLVMTypeRef
get_tes_resources_ptr_type(struct draw_tes_llvm_variant * variant)3316 get_tes_resources_ptr_type(struct draw_tes_llvm_variant *variant)
3317 {
3318    if (!variant->resources_ptr_type)
3319       create_tes_jit_types(variant);
3320    return variant->resources_ptr_type;
3321 }
3322 
3323 
3324 static LLVMValueRef
generate_tes_mask_value(struct draw_tes_llvm_variant * variant,struct lp_type tes_type,LLVMValueRef limit,LLVMValueRef loop_counter)3325 generate_tes_mask_value(struct draw_tes_llvm_variant *variant,
3326                         struct lp_type tes_type, LLVMValueRef limit, LLVMValueRef loop_counter)
3327 {
3328    struct gallivm_state *gallivm = variant->gallivm;
3329    LLVMBuilderRef builder = gallivm->builder;
3330    struct lp_type mask_type = lp_int_type(tes_type);
3331    LLVMValueRef num_prims;
3332    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
3333    unsigned i;
3334 
3335    num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);
3336    for (i = 0; i < tes_type.length; i++) {
3337       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3338       mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");
3339    }
3340    mask_val = lp_build_compare(gallivm, mask_type,
3341                                PIPE_FUNC_GREATER, num_prims, mask_val);
3342 
3343    return mask_val;
3344 }
3345 
3346 
3347 static LLVMValueRef
draw_tes_llvm_fetch_vertex_input(const struct lp_build_tes_iface * tes_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index)3348 draw_tes_llvm_fetch_vertex_input(const struct lp_build_tes_iface *tes_iface,
3349                                  struct lp_build_context *bld,
3350                                  bool is_vindex_indirect,
3351                                  LLVMValueRef vertex_index,
3352                                  bool is_aindex_indirect,
3353                                  LLVMValueRef attrib_index,
3354                                  bool is_sindex_indirect,
3355                                  LLVMValueRef swizzle_index)
3356 {
3357    const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);
3358    struct gallivm_state *gallivm = bld->gallivm;
3359    LLVMBuilderRef builder = gallivm->builder;
3360    LLVMValueRef indices[3];
3361    LLVMValueRef res;
3362    struct lp_type type = bld->type;
3363 
3364    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
3365       res = bld->zero;
3366 
3367       for (int i = 0; i < type.length; ++i) {
3368          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3369          LLVMValueRef vert_chan_index = vertex_index;
3370          LLVMValueRef attr_chan_index = attrib_index;
3371          LLVMValueRef swiz_chan_index = swizzle_index;
3372          LLVMValueRef channel_vec;
3373 
3374          if (is_vindex_indirect) {
3375             vert_chan_index = LLVMBuildExtractElement(builder,
3376                                                       vertex_index, idx, "");
3377          }
3378          if (is_aindex_indirect) {
3379             attr_chan_index = LLVMBuildExtractElement(builder,
3380                                                       attrib_index, idx, "");
3381          }
3382          if (is_sindex_indirect) {
3383             swiz_chan_index = LLVMBuildExtractElement(builder,
3384                                                       swizzle_index, idx, "");
3385          }
3386 
3387          indices[0] = vert_chan_index;
3388          indices[1] = attr_chan_index;
3389          indices[2] = swiz_chan_index;
3390 
3391          channel_vec = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3392          channel_vec = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), channel_vec, "");
3393 
3394          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3395       }
3396    } else {
3397       indices[0] = vertex_index;
3398       indices[1] = attrib_index;
3399       indices[2] = swizzle_index;
3400 
3401       res = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3402       res = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), res, "");
3403       res = lp_build_broadcast_scalar(bld, res);
3404    }
3405    return res;
3406 }
3407 
3408 
3409 static LLVMValueRef
draw_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface * tes_iface,struct lp_build_context * bld,bool is_aindex_indirect,LLVMValueRef attrib_index,LLVMValueRef swizzle_index)3410 draw_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
3411                                 struct lp_build_context *bld,
3412                                 bool is_aindex_indirect,
3413                                 LLVMValueRef attrib_index,
3414                                 LLVMValueRef swizzle_index)
3415 {
3416    const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);
3417    struct gallivm_state *gallivm = bld->gallivm;
3418    LLVMBuilderRef builder = gallivm->builder;
3419    LLVMValueRef indices[3];
3420    LLVMValueRef res;
3421    struct lp_type type = bld->type;
3422 
3423    if (is_aindex_indirect) {
3424       res = bld->zero;
3425 
3426       for (int i = 0; i < type.length; ++i) {
3427          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3428          LLVMValueRef attr_chan_index = attrib_index;
3429          LLVMValueRef channel_vec;
3430 
3431          if (is_aindex_indirect) {
3432             attr_chan_index = LLVMBuildExtractElement(builder,
3433                                                       attrib_index, idx, "");
3434          }
3435 
3436          indices[0] = lp_build_const_int32(gallivm, 0);
3437          indices[1] = attr_chan_index;
3438          indices[2] = swizzle_index;
3439 
3440          channel_vec = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3441          channel_vec = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), channel_vec, "");
3442 
3443          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3444       }
3445    } else {
3446       indices[0] = lp_build_const_int32(gallivm, 0);
3447       indices[1] = attrib_index;
3448       indices[2] = swizzle_index;
3449 
3450       res = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3451       res = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), res, "");
3452       res = lp_build_broadcast_scalar(bld, res);
3453    }
3454    return res;
3455 }
3456 
3457 
3458 static void
draw_tes_llvm_generate(struct draw_llvm * llvm,struct draw_tes_llvm_variant * variant)3459 draw_tes_llvm_generate(struct draw_llvm *llvm,
3460                        struct draw_tes_llvm_variant *variant)
3461 {
3462    struct gallivm_state *gallivm = variant->gallivm;
3463    LLVMContextRef context = gallivm->context;
3464    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
3465    LLVMTypeRef flt_type = LLVMFloatTypeInContext(context);
3466    LLVMTypeRef arg_types[11];
3467    LLVMTypeRef func_type;
3468    LLVMValueRef variant_func;
3469    LLVMValueRef resources_ptr;
3470    LLVMValueRef tess_coord[2], io_ptr, input_array, num_tess_coord;
3471    LLVMValueRef view_index;
3472    LLVMValueRef tess_inner, tess_outer, prim_id, patch_vertices_in;
3473    LLVMBasicBlockRef block;
3474    LLVMBuilderRef builder;
3475    LLVMValueRef mask_val;
3476    struct lp_build_context bld, bldvec;
3477    struct lp_build_sampler_soa *sampler = 0;
3478    struct lp_build_image_soa *image = NULL;
3479    struct lp_bld_tgsi_system_values system_values;
3480    char func_name[64];
3481    unsigned i;
3482    struct draw_tes_llvm_iface tes_iface;
3483    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
3484    struct lp_build_mask_context mask;
3485    LLVMValueRef consts_ptr;
3486    LLVMValueRef ssbos_ptr;
3487    LLVMValueRef step;
3488    struct lp_type tes_type;
3489    unsigned vector_length = variant->shader->base.vector_length;
3490    int primid_slot = -1;
3491 
3492    memset(&system_values, 0, sizeof(system_values));
3493    memset(&outputs, 0, sizeof(outputs));
3494 
3495    snprintf(func_name, sizeof(func_name), "draw_llvm_tes_variant");
3496 
3497    LLVMTypeRef tess_outer_deref_type = LLVMArrayType(flt_type, 4);
3498    LLVMTypeRef tess_inner_deref_type = LLVMArrayType(flt_type, 2);
3499 
3500    arg_types[0] = get_tes_resources_ptr_type(variant);    /* context */
3501    arg_types[1] = variant->input_array_type;           /* input */
3502    arg_types[2] = variant->vertex_header_ptr_type;
3503    arg_types[3] = int32_type;
3504    arg_types[4] = int32_type;
3505    arg_types[5] = LLVMPointerType(flt_type, 0);
3506    arg_types[6] = LLVMPointerType(flt_type, 0);
3507    arg_types[7] = LLVMPointerType(tess_outer_deref_type, 0);
3508    arg_types[8] = LLVMPointerType(tess_inner_deref_type, 0);
3509    arg_types[9] = int32_type;
3510    arg_types[10] = int32_type;
3511 
3512    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
3513    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
3514 
3515    variant->function = variant_func;
3516    variant->function_name = MALLOC(strlen(func_name)+1);
3517    strcpy(variant->function_name, func_name);
3518    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
3519 
3520    for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
3521       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
3522          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
3523 
3524    if (gallivm->cache && gallivm->cache->data_size) {
3525       gallivm_stub_func(gallivm, variant_func);
3526       return;
3527    }
3528 
3529    resources_ptr               = LLVMGetParam(variant_func, 0);
3530    input_array               = LLVMGetParam(variant_func, 1);
3531    io_ptr                    = LLVMGetParam(variant_func, 2);
3532    prim_id                   = LLVMGetParam(variant_func, 3);
3533    num_tess_coord            = LLVMGetParam(variant_func, 4);
3534    tess_coord[0]             = LLVMGetParam(variant_func, 5);
3535    tess_coord[1]             = LLVMGetParam(variant_func, 6);
3536    tess_outer                = LLVMGetParam(variant_func, 7);
3537    tess_inner                = LLVMGetParam(variant_func, 8);
3538    patch_vertices_in         = LLVMGetParam(variant_func, 9);
3539    view_index                = LLVMGetParam(variant_func, 10);
3540 
3541    lp_build_name(resources_ptr, "resources");
3542    lp_build_name(input_array, "input");
3543    lp_build_name(io_ptr, "io");
3544    lp_build_name(prim_id, "prim_id");
3545    lp_build_name(num_tess_coord, "num_tess_coord");
3546    lp_build_name(tess_coord[0], "tess_coord[0]");
3547    lp_build_name(tess_coord[1], "tess_coord[1]");
3548    lp_build_name(tess_outer, "tess_outer");
3549    lp_build_name(tess_inner, "tess_inner");
3550    lp_build_name(patch_vertices_in, "patch_vertices_in");
3551    lp_build_name(view_index, "view_index");
3552 
3553    tes_iface.base.fetch_vertex_input = draw_tes_llvm_fetch_vertex_input;
3554    tes_iface.base.fetch_patch_input = draw_tes_llvm_fetch_patch_input;
3555    tes_iface.input = input_array;
3556    tes_iface.variant = variant;
3557 
3558    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
3559    builder = gallivm->builder;
3560    LLVMPositionBuilderAtEnd(builder, block);
3561 
3562    lp_build_context_init(&bld, gallivm, lp_type_int(32));
3563 
3564    memset(&tes_type, 0, sizeof tes_type);
3565    tes_type.floating = true; /* floating point values */
3566    tes_type.sign = true;     /* values are signed */
3567    tes_type.norm = false;    /* values are not limited to [0,1] or [-1,1] */
3568    tes_type.width = 32;      /* 32-bit float */
3569    tes_type.length = vector_length;
3570 
3571    lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tes_type));
3572    consts_ptr = lp_jit_resources_constants(gallivm, variant->resources_type, resources_ptr);
3573 
3574    ssbos_ptr = lp_jit_resources_ssbos(gallivm, variant->resources_type, resources_ptr);
3575 
3576    sampler = lp_bld_llvm_sampler_soa_create(variant->key.samplers,
3577                                           MAX2(variant->key.nr_samplers,
3578                                                variant->key.nr_sampler_views));
3579    image = lp_bld_llvm_image_soa_create(draw_tes_llvm_variant_key_images(&variant->key),
3580                                       variant->key.nr_images);
3581    step = lp_build_const_int32(gallivm, vector_length);
3582 
3583    system_values.tess_outer = LLVMBuildLoad2(builder, tess_outer_deref_type, tess_outer, "");
3584    system_values.tess_inner = LLVMBuildLoad2(builder, tess_inner_deref_type, tess_inner, "");
3585 
3586    system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);
3587 
3588    system_values.view_index = view_index;
3589 
3590    system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);
3591 
3592    if (variant->key.primid_needed) {
3593       /* In a fragment shader, it (gl_PrimitiveID) will contain the [...] value that would have been
3594        * presented as input to the geometry shader had it been present.
3595        * https://docs.vulkan.org/spec/latest/chapters/interfaces.html#interfaces-builtin-variables
3596        * Store the primitive ID as-if the geometry shader did `gl_PrimitiveID = gl_PrimitiveIDIn`.
3597        */
3598       int slot = variant->key.primid_output;
3599       for (unsigned i = 0; i < 4; i++) {
3600          outputs[slot][i] = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, tes_type), "primid");
3601          LLVMBuildStore(builder, system_values.prim_id, outputs[slot][i]);
3602       }
3603       primid_slot = slot;
3604    }
3605    struct lp_build_loop_state lp_loop;
3606    lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
3607    {
3608       LLVMValueRef io;
3609 
3610       io = LLVMBuildGEP2(builder, variant->vertex_header_type, io_ptr, &lp_loop.counter, 1, "");
3611       mask_val = generate_tes_mask_value(variant, tes_type, num_tess_coord, lp_loop.counter);
3612       lp_build_mask_begin(&mask, gallivm, tes_type, mask_val);
3613 
3614       system_values.tess_coord = LLVMGetUndef(LLVMArrayType(LLVMVectorType(flt_type, vector_length), 3));
3615       for (i = 0; i < 3; i++) {
3616          LLVMValueRef tess_coord_chan = LLVMGetUndef(LLVMVectorType(flt_type, vector_length));
3617          for (unsigned j = 0; j < vector_length; j++) {
3618             LLVMValueRef idx = LLVMBuildAdd(builder, lp_loop.counter, lp_build_const_int32(gallivm, j), "");
3619             LLVMValueRef tc_val;
3620             if (i == 2) {
3621                if (variant->shader->base.prim_mode == MESA_PRIM_TRIANGLES) {
3622                   tc_val = lp_build_const_float(gallivm, 1.0);
3623                   tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get2(builder, flt_type, tess_coord[0], idx), "");
3624                   tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get2(builder, flt_type, tess_coord[1], idx), "");
3625                } else
3626                   tc_val = lp_build_const_float(gallivm, 0.0);
3627             } else
3628                tc_val = lp_build_pointer_get2(builder, flt_type, tess_coord[i], idx);
3629 
3630             tess_coord_chan = LLVMBuildInsertElement(builder, tess_coord_chan, tc_val, lp_build_const_int32(gallivm, j), "");
3631          }
3632          system_values.tess_coord = LLVMBuildInsertValue(builder, system_values.tess_coord, tess_coord_chan, i, "");
3633       }
3634 
3635       struct lp_build_tgsi_params params;
3636       memset(&params, 0, sizeof(params));
3637 
3638       params.type = tes_type;
3639       params.mask = &mask;
3640       params.consts_ptr = consts_ptr;
3641       params.system_values = &system_values;
3642       params.resources_type = variant->resources_type;
3643       params.resources_ptr = resources_ptr;
3644       params.sampler = sampler;
3645       params.info = &llvm->draw->tes.tess_eval_shader->info;
3646       params.ssbo_ptr = ssbos_ptr;
3647       params.image = image;
3648       params.tes_iface = &tes_iface.base;
3649 
3650       lp_build_nir_soa(variant->gallivm,
3651                        llvm->draw->tes.tess_eval_shader->state.ir.nir,
3652                        &params,
3653                        outputs);
3654 
3655       lp_build_mask_end(&mask);
3656 
3657       if (variant->key.clamp_vertex_color) {
3658          const struct tgsi_shader_info *info = &llvm->draw->tes.tess_eval_shader->info;
3659          do_clamp_vertex_color(variant->gallivm,
3660                                tes_type, info,
3661                                outputs);
3662       }
3663       LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
3664                                                      lp_int_type(tes_type), 0);
3665 
3666       convert_to_aos(gallivm, variant->vertex_header_type, io, NULL, outputs, clipmask,
3667                      draw_total_tes_outputs(llvm->draw), tes_type, primid_slot, false);
3668    }
3669    lp_build_loop_end_cond(&lp_loop, num_tess_coord, step, LLVMIntUGE);
3670    lp_bld_llvm_sampler_soa_destroy(sampler);
3671    lp_bld_llvm_image_soa_destroy(image);
3672 
3673    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
3674    gallivm_verify_function(gallivm, variant_func);
3675 }
3676 
3677 
3678 struct draw_tes_llvm_variant *
draw_tes_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_tes_llvm_variant_key * key)3679 draw_tes_llvm_create_variant(struct draw_llvm *llvm,
3680                              unsigned num_outputs,
3681                              const struct draw_tes_llvm_variant_key *key)
3682 {
3683    struct draw_tes_llvm_variant *variant;
3684    struct llvm_tess_eval_shader *shader = llvm_tess_eval_shader(llvm->draw->tes.tess_eval_shader);
3685    char module_name[64];
3686    unsigned char ir_sha1_cache_key[20];
3687    struct lp_cached_code cached = { 0 };
3688    bool needs_caching = false;
3689 
3690    variant = MALLOC(sizeof *variant +
3691                     shader->variant_key_size - sizeof variant->key);
3692    if (!variant)
3693       return NULL;
3694 
3695    variant->llvm = llvm;
3696    variant->shader = shader;
3697 
3698    snprintf(module_name, sizeof(module_name), "draw_llvm_tes_variant%u",
3699             variant->shader->variants_cached);
3700 
3701    memcpy(&variant->key, key, shader->variant_key_size);
3702    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
3703       draw_get_ir_cache_key(shader->base.state.ir.nir,
3704                             key,
3705                             shader->variant_key_size,
3706                             num_outputs,
3707                             ir_sha1_cache_key);
3708 
3709       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
3710                                          &cached,
3711                                          ir_sha1_cache_key);
3712       if (!cached.data_size)
3713          needs_caching = true;
3714    }
3715    variant->gallivm = gallivm_create(module_name, &llvm->context, &cached);
3716 
3717    create_tes_jit_types(variant);
3718 
3719    variant->vertex_header_type = lp_build_create_jit_vertex_header_type(variant->gallivm, num_outputs);
3720    variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
3721 
3722    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3723       nir_print_shader(llvm->draw->tes.tess_eval_shader->state.ir.nir, stderr);
3724       draw_tes_llvm_dump_variant_key(&variant->key);
3725    }
3726 
3727    draw_tes_llvm_generate(llvm, variant);
3728 
3729    gallivm_compile_module(variant->gallivm);
3730 
3731    variant->jit_func = (draw_tes_jit_func)
3732       gallivm_jit_function(variant->gallivm, variant->function, variant->function_name);
3733 
3734    if (needs_caching)
3735       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
3736                                            &cached,
3737                                            ir_sha1_cache_key);
3738    gallivm_free_ir(variant->gallivm);
3739 
3740    variant->list_item_global.base = variant;
3741    variant->list_item_local.base = variant;
3742    /*variant->no = */shader->variants_created++;
3743    variant->list_item_global.base = variant;
3744 
3745    return variant;
3746 }
3747 
3748 
3749 void
draw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant * variant)3750 draw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant *variant)
3751 {
3752    struct draw_llvm *llvm = variant->llvm;
3753 
3754    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3755       debug_printf("Deleting TES variant: %u tes variants,\t%u total variants\n",
3756                     variant->shader->variants_cached, llvm->nr_tes_variants);
3757    }
3758 
3759    gallivm_destroy(variant->gallivm);
3760 
3761    list_del(&variant->list_item_local.list);
3762    variant->shader->variants_cached--;
3763    list_del(&variant->list_item_global.list);
3764    llvm->nr_tes_variants--;
3765    if(variant->function_name)
3766       FREE(variant->function_name);
3767    FREE(variant);
3768 }
3769 
3770 
3771 struct draw_tes_llvm_variant_key *
draw_tes_llvm_make_variant_key(struct draw_llvm * llvm,char * store)3772 draw_tes_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
3773 {
3774    struct draw_tes_llvm_variant_key *key;
3775    struct lp_sampler_static_state *draw_sampler;
3776    struct lp_image_static_state *draw_image;
3777 
3778    key = (struct draw_tes_llvm_variant_key *)store;
3779 
3780    memset(key, 0, offsetof(struct draw_tes_llvm_variant_key, samplers[0]));
3781 
3782    int primid_output = draw_find_shader_output(llvm->draw, TGSI_SEMANTIC_PRIMID, 0);
3783    if (primid_output >= 0) {
3784       key->primid_output = primid_output;
3785       key->primid_needed = true;
3786    }
3787 
3788    key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color &&
3789       llvm->draw->gs.geometry_shader == NULL;
3790 
3791    /* All variants of this shader will have the same value for
3792     * nr_samplers.  Not yet trying to compact away holes in the
3793     * sampler array.
3794     */
3795    key->nr_samplers = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
3796    if (llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
3797       key->nr_sampler_views =
3798          llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
3799    } else {
3800       key->nr_sampler_views = key->nr_samplers;
3801    }
3802 
3803    key->nr_images = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
3804 
3805    draw_sampler = key->samplers;
3806 
3807    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
3808 
3809    for (unsigned i = 0 ; i < key->nr_samplers; i++) {
3810       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
3811                                       llvm->draw->samplers[PIPE_SHADER_TESS_EVAL][i]);
3812    }
3813    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
3814       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
3815                                       llvm->draw->sampler_views[PIPE_SHADER_TESS_EVAL][i]);
3816    }
3817 
3818    draw_image = draw_tes_llvm_variant_key_images(key);
3819    memset(draw_image, 0,
3820           key->nr_images * sizeof *draw_image);
3821    for (unsigned i = 0; i < key->nr_images; i++) {
3822       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
3823                                             llvm->draw->images[PIPE_SHADER_TESS_EVAL][i]);
3824    }
3825    return key;
3826 }
3827 
3828 
3829 void
draw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key * key)3830 draw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key *key)
3831 {
3832    struct lp_sampler_static_state *sampler = key->samplers;
3833    struct lp_image_static_state *image = draw_tes_llvm_variant_key_images(key);
3834 
3835    if (key->primid_needed)
3836       debug_printf("prim id output %d\n", key->primid_output);
3837    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
3838    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
3839       debug_printf("sampler[%i].src_format = %s\n", i,
3840                    util_format_name(sampler[i].texture_state.format));
3841    }
3842 
3843    for (unsigned i = 0 ; i < key->nr_images; i++)
3844       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
3845 
3846 }
3847