1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "util/u_math.h"
29 #include "util/u_memory.h"
30 #include "util/u_prim.h"
31 #include "draw/draw_context.h"
32 #include "draw/draw_gs.h"
33 #include "draw/draw_tess.h"
34 #include "draw/draw_vbuf.h"
35 #include "draw/draw_vertex.h"
36 #include "draw/draw_pt.h"
37 #include "draw/draw_prim_assembler.h"
38 #include "draw/draw_vs.h"
39 #include "draw/draw_llvm.h"
40 #include "gallivm/lp_bld_init.h"
41 #include "gallivm/lp_bld_debug.h"
42
43
44 struct llvm_middle_end {
45 struct draw_pt_middle_end base;
46 struct draw_context *draw;
47
48 struct pt_emit *emit;
49 struct pt_so_emit *so_emit;
50 struct pt_fetch *fetch;
51 struct pt_post_vs *post_vs;
52
53
54 unsigned vertex_data_offset;
55 unsigned vertex_size;
56 enum pipe_prim_type input_prim;
57 unsigned opt;
58
59 struct draw_llvm *llvm;
60 struct draw_llvm_variant *current_variant;
61 };
62
63
64 /** cast wrapper */
65 static inline struct llvm_middle_end *
llvm_middle_end(struct draw_pt_middle_end * middle)66 llvm_middle_end(struct draw_pt_middle_end *middle)
67 {
68 return (struct llvm_middle_end *) middle;
69 }
70
71
72 static void
llvm_middle_end_prepare_gs(struct llvm_middle_end * fpme)73 llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme)
74 {
75 struct draw_context *draw = fpme->draw;
76 struct draw_llvm *llvm = fpme->llvm;
77 struct draw_geometry_shader *gs = draw->gs.geometry_shader;
78 struct draw_gs_llvm_variant_key *key;
79 struct draw_gs_llvm_variant *variant = NULL;
80 struct draw_gs_llvm_variant_list_item *li;
81 struct llvm_geometry_shader *shader = llvm_geometry_shader(gs);
82 char store[DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE];
83
84 key = draw_gs_llvm_make_variant_key(llvm, store);
85
86 /* Search shader's list of variants for the key */
87 LIST_FOR_EACH_ENTRY(li, &shader->variants.list, list) {
88 if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
89 variant = li->base;
90 break;
91 }
92 }
93
94 if (variant) {
95 /* found the variant, move to head of global list (for LRU) */
96 list_move_to(&variant->list_item_global.list, &llvm->gs_variants_list.list);
97 }
98 else {
99 /* Need to create new variant */
100
101 /* First check if we've created too many variants. If so, free
102 * 3.125% of the LRU to avoid using too much memory.
103 */
104 if (llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) {
105 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
106 debug_printf("Evicting GS: %u gs variants,\t%u total variants\n",
107 shader->variants_cached, llvm->nr_gs_variants);
108 }
109
110 /*
111 * XXX: should we flush here ?
112 */
113 struct draw_gs_llvm_variant_list_item *item;
114 for (unsigned i = 0; i < DRAW_MAX_SHADER_VARIANTS / 32; i++) {
115 if (list_is_empty(&llvm->gs_variants_list.list)) {
116 break;
117 }
118 item = list_last_entry(&llvm->gs_variants_list.list,
119 struct draw_gs_llvm_variant_list_item, list);
120 assert(item);
121 assert(item->base);
122 draw_gs_llvm_destroy_variant(item->base);
123 }
124 }
125
126 variant = draw_gs_llvm_create_variant(llvm, gs->info.num_outputs, key);
127
128 if (variant) {
129 list_add(&variant->list_item_local.list, &shader->variants.list);
130 list_add(&variant->list_item_global.list, &llvm->gs_variants_list.list);
131 llvm->nr_gs_variants++;
132 shader->variants_cached++;
133 }
134 }
135
136 gs->current_variant = variant;
137 }
138
139 static void
llvm_middle_end_prepare_tcs(struct llvm_middle_end * fpme)140 llvm_middle_end_prepare_tcs(struct llvm_middle_end *fpme)
141 {
142 struct draw_context *draw = fpme->draw;
143 struct draw_llvm *llvm = fpme->llvm;
144 struct draw_tess_ctrl_shader *tcs = draw->tcs.tess_ctrl_shader;
145 struct draw_tcs_llvm_variant_key *key;
146 struct draw_tcs_llvm_variant *variant = NULL;
147 struct draw_tcs_llvm_variant_list_item *li;
148 struct llvm_tess_ctrl_shader *shader = llvm_tess_ctrl_shader(tcs);
149 char store[DRAW_TCS_LLVM_MAX_VARIANT_KEY_SIZE];
150 unsigned i;
151
152 key = draw_tcs_llvm_make_variant_key(llvm, store);
153
154 /* Search shader's list of variants for the key */
155 LIST_FOR_EACH_ENTRY(li, &shader->variants.list, list) {
156 if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
157 variant = li->base;
158 break;
159 }
160 }
161
162 if (variant) {
163 /* found the variant, move to head of global list (for LRU) */
164 list_move_to(&variant->list_item_global.list, &llvm->tcs_variants_list.list);
165 }
166 else {
167 /* Need to create new variant */
168
169 /* First check if we've created too many variants. If so, free
170 * 3.125% of the LRU to avoid using too much memory.
171 */
172 if (llvm->nr_tcs_variants >= DRAW_MAX_SHADER_VARIANTS) {
173 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
174 debug_printf("Evicting TCS: %u tcs variants,\t%u total variants\n",
175 shader->variants_cached, llvm->nr_tcs_variants);
176 }
177
178 /*
179 * XXX: should we flush here ?
180 */
181 for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 32; i++) {
182 struct draw_tcs_llvm_variant_list_item *item;
183 if (list_is_empty(&llvm->tcs_variants_list.list)) {
184 break;
185 }
186 item = list_last_entry(&llvm->tcs_variants_list.list,
187 struct draw_tcs_llvm_variant_list_item, list);
188 assert(item);
189 assert(item->base);
190 draw_tcs_llvm_destroy_variant(item->base);
191 }
192 }
193
194 variant = draw_tcs_llvm_create_variant(llvm, 0, key);
195
196 if (variant) {
197 list_add(&variant->list_item_local.list, &shader->variants.list);
198 list_add(&variant->list_item_global.list, &llvm->tcs_variants_list.list);
199 llvm->nr_tcs_variants++;
200 shader->variants_cached++;
201 }
202 }
203
204 tcs->current_variant = variant;
205 }
206
207 static void
llvm_middle_end_prepare_tes(struct llvm_middle_end * fpme)208 llvm_middle_end_prepare_tes(struct llvm_middle_end *fpme)
209 {
210 struct draw_context *draw = fpme->draw;
211 struct draw_llvm *llvm = fpme->llvm;
212 struct draw_tess_eval_shader *tes = draw->tes.tess_eval_shader;
213 struct draw_tes_llvm_variant_key *key;
214 struct draw_tes_llvm_variant *variant = NULL;
215 struct draw_tes_llvm_variant_list_item *li;
216 struct llvm_tess_eval_shader *shader = llvm_tess_eval_shader(tes);
217 char store[DRAW_TES_LLVM_MAX_VARIANT_KEY_SIZE];
218 unsigned i;
219
220 key = draw_tes_llvm_make_variant_key(llvm, store);
221
222 /* Search shader's list of variants for the key */
223 LIST_FOR_EACH_ENTRY(li, &shader->variants.list, list) {
224 if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
225 variant = li->base;
226 break;
227 }
228 }
229
230 if (variant) {
231 /* found the variant, move to head of global list (for LRU) */
232 list_move_to(&variant->list_item_global.list, &llvm->tes_variants_list.list);
233 }
234 else {
235 /* Need to create new variant */
236
237 /* First check if we've created too many variants. If so, free
238 * 3.125% of the LRU to avoid using too much memory.
239 */
240 if (llvm->nr_tes_variants >= DRAW_MAX_SHADER_VARIANTS) {
241 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
242 debug_printf("Evicting TES: %u tes variants,\t%u total variants\n",
243 shader->variants_cached, llvm->nr_tes_variants);
244 }
245
246 /*
247 * XXX: should we flush here ?
248 */
249 for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 32; i++) {
250 struct draw_tes_llvm_variant_list_item *item;
251 if (list_is_empty(&llvm->tes_variants_list.list)) {
252 break;
253 }
254 item = list_last_entry(&llvm->tes_variants_list.list,
255 struct draw_tes_llvm_variant_list_item, list);
256 assert(item);
257 assert(item->base);
258 draw_tes_llvm_destroy_variant(item->base);
259 }
260 }
261
262 variant = draw_tes_llvm_create_variant(llvm, draw_total_tes_outputs(draw), key);
263
264 if (variant) {
265 list_add(&variant->list_item_local.list, &shader->variants.list);
266 list_add(&variant->list_item_global.list, &llvm->tes_variants_list.list);
267 llvm->nr_tes_variants++;
268 shader->variants_cached++;
269 }
270 }
271
272 tes->current_variant = variant;
273 }
274
275 /**
276 * Prepare/validate middle part of the vertex pipeline.
277 * NOTE: if you change this function, also look at the non-LLVM
278 * function fetch_pipeline_prepare() for similar changes.
279 */
280 static void
llvm_middle_end_prepare(struct draw_pt_middle_end * middle,enum pipe_prim_type in_prim,unsigned opt,unsigned * max_vertices)281 llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
282 enum pipe_prim_type in_prim,
283 unsigned opt,
284 unsigned *max_vertices )
285 {
286 struct llvm_middle_end *fpme = llvm_middle_end(middle);
287 struct draw_context *draw = fpme->draw;
288 struct draw_llvm *llvm = fpme->llvm;
289 struct draw_vertex_shader *vs = draw->vs.vertex_shader;
290 struct draw_geometry_shader *gs = draw->gs.geometry_shader;
291 struct draw_tess_ctrl_shader *tcs = draw->tcs.tess_ctrl_shader;
292 struct draw_tess_eval_shader *tes = draw->tes.tess_eval_shader;
293 const enum pipe_prim_type out_prim =
294 gs ? gs->output_primitive : tes ? get_tes_output_prim(tes) :
295 u_assembled_prim(in_prim);
296 unsigned point_clip = draw->rasterizer->fill_front == PIPE_POLYGON_MODE_POINT ||
297 out_prim == PIPE_PRIM_POINTS;
298 unsigned nr;
299
300 fpme->input_prim = in_prim;
301 fpme->opt = opt;
302
303 draw_pt_post_vs_prepare( fpme->post_vs,
304 draw->clip_xy,
305 draw->clip_z,
306 draw->clip_user,
307 point_clip ? draw->guard_band_points_xy :
308 draw->guard_band_xy,
309 draw->bypass_viewport,
310 draw->rasterizer->clip_halfz,
311 (draw->vs.edgeflag_output ? TRUE : FALSE) );
312
313 draw_pt_so_emit_prepare( fpme->so_emit, (gs == NULL && tes == NULL));
314
315 if (!(opt & PT_PIPELINE)) {
316 draw_pt_emit_prepare( fpme->emit, out_prim,
317 max_vertices );
318
319 *max_vertices = MAX2( *max_vertices, 4096 );
320 }
321 else {
322 /* limit max fetches by limiting max_vertices */
323 *max_vertices = 4096;
324 }
325
326 /* Get the number of float[4] attributes per vertex.
327 * Note: this must be done after draw_pt_emit_prepare() since that
328 * can effect the vertex size.
329 */
330 nr = MAX2(vs->info.num_inputs, draw_total_vs_outputs(draw));
331
332 /* Always leave room for the vertex header whether we need it or
333 * not. It's hard to get rid of it in particular because of the
334 * viewport code in draw_pt_post_vs.c.
335 */
336 fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
337
338 /* return even number */
339 *max_vertices = *max_vertices & ~1;
340
341 /* Find/create the vertex shader variant */
342 {
343 struct draw_llvm_variant_key *key;
344 struct draw_llvm_variant *variant = NULL;
345 struct draw_llvm_variant_list_item *li;
346 struct llvm_vertex_shader *shader = llvm_vertex_shader(vs);
347 char store[DRAW_LLVM_MAX_VARIANT_KEY_SIZE];
348 unsigned i;
349
350 key = draw_llvm_make_variant_key(llvm, store);
351
352 /* Search shader's list of variants for the key */
353 LIST_FOR_EACH_ENTRY(li, &shader->variants.list, list) {
354 if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
355 variant = li->base;
356 break;
357 }
358 }
359
360 if (variant) {
361 /* found the variant, move to head of global list (for LRU) */
362 list_move_to(&variant->list_item_global.list, &llvm->vs_variants_list.list);
363 }
364 else {
365 /* Need to create new variant */
366
367 /* First check if we've created too many variants. If so, free
368 * 3.125% of the LRU to avoid using too much memory.
369 */
370 if (llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) {
371 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
372 debug_printf("Evicting VS: %u vs variants,\t%u total variants\n",
373 shader->variants_cached, llvm->nr_variants);
374 }
375
376 /*
377 * XXX: should we flush here ?
378 */
379 for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 32; i++) {
380 struct draw_llvm_variant_list_item *item;
381 if (list_is_empty(&llvm->vs_variants_list.list)) {
382 break;
383 }
384 item = list_last_entry(&llvm->vs_variants_list.list,
385 struct draw_llvm_variant_list_item, list);
386 assert(item);
387 assert(item->base);
388 draw_llvm_destroy_variant(item->base);
389 }
390 }
391
392 variant = draw_llvm_create_variant(llvm, nr, key);
393
394 if (variant) {
395 list_add(&variant->list_item_local.list, &shader->variants.list);
396 list_add(&variant->list_item_global.list, &llvm->vs_variants_list.list);
397 llvm->nr_variants++;
398 shader->variants_cached++;
399 }
400 }
401
402 fpme->current_variant = variant;
403 }
404
405 if (gs) {
406 llvm_middle_end_prepare_gs(fpme);
407 }
408 if (tcs) {
409 llvm_middle_end_prepare_tcs(fpme);
410 }
411 if (tes) {
412 llvm_middle_end_prepare_tes(fpme);
413 }
414 }
415
416 static unsigned
get_num_consts_robust(struct draw_context * draw,unsigned * sizes,unsigned idx)417 get_num_consts_robust(struct draw_context *draw, unsigned *sizes, unsigned idx)
418 {
419 unsigned const_bytes = sizes[idx];
420
421 if (const_bytes < sizeof(float))
422 return 0;
423
424 return DIV_ROUND_UP(const_bytes, draw->constant_buffer_stride);
425 }
426
427 /**
428 * Bind/update constant buffer pointers, clip planes and viewport dims.
429 * These are "light weight" parameters which aren't baked into the
430 * generated code. Updating these items is much cheaper than revalidating
431 * and rebuilding the generated pipeline code.
432 */
433 static void
llvm_middle_end_bind_parameters(struct draw_pt_middle_end * middle)434 llvm_middle_end_bind_parameters(struct draw_pt_middle_end *middle)
435 {
436 static const float fake_const_buf[4];
437 struct llvm_middle_end *fpme = llvm_middle_end(middle);
438 struct draw_context *draw = fpme->draw;
439 struct draw_llvm *llvm = fpme->llvm;
440 unsigned i;
441
442 for (i = 0; i < ARRAY_SIZE(llvm->jit_context.vs_constants); ++i) {
443 /*
444 * There could be a potential issue with rounding this up, as the
445 * shader expects 16-byte allocations, the fix is likely to move
446 * to LOAD intrinsic in the future and remove the vec4 constraint.
447 */
448 int num_consts = get_num_consts_robust(draw, draw->pt.user.vs_constants_size, i);
449 llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i];
450 llvm->jit_context.num_vs_constants[i] = num_consts;
451 if (num_consts == 0) {
452 llvm->jit_context.vs_constants[i] = fake_const_buf;
453 }
454 }
455 for (i = 0; i < ARRAY_SIZE(llvm->jit_context.vs_ssbos); ++i) {
456 int num_ssbos = draw->pt.user.vs_ssbos_size[i];
457 llvm->jit_context.vs_ssbos[i] = draw->pt.user.vs_ssbos[i];
458 llvm->jit_context.num_vs_ssbos[i] = num_ssbos;
459 if (num_ssbos == 0) {
460 llvm->jit_context.vs_ssbos[i] = (const uint32_t *)fake_const_buf;
461 }
462 }
463
464 for (i = 0; i < ARRAY_SIZE(llvm->gs_jit_context.constants); ++i) {
465 int num_consts = get_num_consts_robust(draw, draw->pt.user.gs_constants_size, i);
466 llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i];
467 llvm->gs_jit_context.num_constants[i] = num_consts;
468 if (num_consts == 0) {
469 llvm->gs_jit_context.constants[i] = fake_const_buf;
470 }
471 }
472 for (i = 0; i < ARRAY_SIZE(llvm->gs_jit_context.ssbos); ++i) {
473 int num_ssbos = draw->pt.user.gs_ssbos_size[i];
474 llvm->gs_jit_context.ssbos[i] = draw->pt.user.gs_ssbos[i];
475 llvm->gs_jit_context.num_ssbos[i] = num_ssbos;
476 if (num_ssbos == 0) {
477 llvm->gs_jit_context.ssbos[i] = (const uint32_t *)fake_const_buf;
478 }
479 }
480
481 for (i = 0; i < ARRAY_SIZE(llvm->tcs_jit_context.constants); ++i) {
482 int num_consts = get_num_consts_robust(draw, draw->pt.user.tcs_constants_size, i);
483 llvm->tcs_jit_context.constants[i] = draw->pt.user.tcs_constants[i];
484 llvm->tcs_jit_context.num_constants[i] = num_consts;
485 if (num_consts == 0) {
486 llvm->tcs_jit_context.constants[i] = fake_const_buf;
487 }
488 }
489 for (i = 0; i < ARRAY_SIZE(llvm->tcs_jit_context.ssbos); ++i) {
490 int num_ssbos = draw->pt.user.tcs_ssbos_size[i];
491 llvm->tcs_jit_context.ssbos[i] = draw->pt.user.tcs_ssbos[i];
492 llvm->tcs_jit_context.num_ssbos[i] = num_ssbos;
493 if (num_ssbos == 0) {
494 llvm->tcs_jit_context.ssbos[i] = (const uint32_t *)fake_const_buf;
495 }
496 }
497
498 for (i = 0; i < ARRAY_SIZE(llvm->tes_jit_context.constants); ++i) {
499 int num_consts = get_num_consts_robust(draw, draw->pt.user.tes_constants_size, i);
500 llvm->tes_jit_context.constants[i] = draw->pt.user.tes_constants[i];
501 llvm->tes_jit_context.num_constants[i] = num_consts;
502 if (num_consts == 0) {
503 llvm->tes_jit_context.constants[i] = fake_const_buf;
504 }
505 }
506 for (i = 0; i < ARRAY_SIZE(llvm->tes_jit_context.ssbos); ++i) {
507 int num_ssbos = draw->pt.user.tes_ssbos_size[i];
508 llvm->tes_jit_context.ssbos[i] = draw->pt.user.tes_ssbos[i];
509 llvm->tes_jit_context.num_ssbos[i] = num_ssbos;
510 if (num_ssbos == 0) {
511 llvm->tes_jit_context.ssbos[i] = (const uint32_t *)fake_const_buf;
512 }
513 }
514
515 llvm->jit_context.planes =
516 (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
517 llvm->gs_jit_context.planes =
518 (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
519
520 llvm->jit_context.viewports = draw->viewports;
521 llvm->gs_jit_context.viewports = draw->viewports;
522
523 llvm->jit_context.aniso_filter_table = lp_build_sample_aniso_filter_table();
524 llvm->gs_jit_context.aniso_filter_table = lp_build_sample_aniso_filter_table();
525 llvm->tcs_jit_context.aniso_filter_table = lp_build_sample_aniso_filter_table();
526 llvm->tes_jit_context.aniso_filter_table = lp_build_sample_aniso_filter_table();
527 }
528
529
530 static void
pipeline(struct llvm_middle_end * llvm,const struct draw_vertex_info * vert_info,const struct draw_prim_info * prim_info)531 pipeline(struct llvm_middle_end *llvm,
532 const struct draw_vertex_info *vert_info,
533 const struct draw_prim_info *prim_info)
534 {
535 if (prim_info->linear)
536 draw_pipeline_run_linear( llvm->draw,
537 vert_info,
538 prim_info);
539 else
540 draw_pipeline_run( llvm->draw,
541 vert_info,
542 prim_info );
543 }
544
545
546 static void
emit(struct pt_emit * emit,const struct draw_vertex_info * vert_info,const struct draw_prim_info * prim_info)547 emit(struct pt_emit *emit,
548 const struct draw_vertex_info *vert_info,
549 const struct draw_prim_info *prim_info)
550 {
551 if (prim_info->linear) {
552 draw_pt_emit_linear(emit, vert_info, prim_info);
553 }
554 else {
555 draw_pt_emit(emit, vert_info, prim_info);
556 }
557 }
558
559
560 static void
llvm_pipeline_generic(struct draw_pt_middle_end * middle,const struct draw_fetch_info * fetch_info,const struct draw_prim_info * in_prim_info)561 llvm_pipeline_generic(struct draw_pt_middle_end *middle,
562 const struct draw_fetch_info *fetch_info,
563 const struct draw_prim_info *in_prim_info)
564 {
565 struct llvm_middle_end *fpme = llvm_middle_end(middle);
566 struct draw_context *draw = fpme->draw;
567 struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
568 struct draw_tess_ctrl_shader *tcs_shader = draw->tcs.tess_ctrl_shader;
569 struct draw_tess_eval_shader *tes_shader = draw->tes.tess_eval_shader;
570 struct draw_prim_info tcs_prim_info;
571 struct draw_prim_info tes_prim_info;
572 struct draw_prim_info gs_prim_info[TGSI_MAX_VERTEX_STREAMS];
573 struct draw_vertex_info llvm_vert_info;
574 struct draw_vertex_info tcs_vert_info;
575 struct draw_vertex_info tes_vert_info;
576 struct draw_vertex_info gs_vert_info[TGSI_MAX_VERTEX_STREAMS];
577 struct draw_vertex_info *vert_info;
578 struct draw_prim_info ia_prim_info;
579 struct draw_vertex_info ia_vert_info;
580 const struct draw_prim_info *prim_info = in_prim_info;
581 boolean free_prim_info = FALSE;
582 unsigned opt = fpme->opt;
583 boolean clipped = 0;
584 unsigned start_or_maxelt, vid_base;
585 const unsigned *elts;
586 ushort *tes_elts_out = NULL;
587
588 memset(&gs_vert_info, 0, sizeof(struct draw_vertex_info) * TGSI_MAX_VERTEX_STREAMS);
589 assert(fetch_info->count > 0);
590 llvm_vert_info.count = fetch_info->count;
591 llvm_vert_info.vertex_size = fpme->vertex_size;
592 llvm_vert_info.stride = fpme->vertex_size;
593 llvm_vert_info.verts = (struct vertex_header *)
594 MALLOC(fpme->vertex_size *
595 align(fetch_info->count, lp_native_vector_width / 32) +
596 DRAW_EXTRA_VERTICES_PADDING);
597 if (!llvm_vert_info.verts) {
598 assert(0);
599 return;
600 }
601
602 if (draw->collect_statistics) {
603 draw->statistics.ia_vertices += prim_info->count;
604 if (prim_info->prim == PIPE_PRIM_PATCHES)
605 draw->statistics.ia_primitives += prim_info->count / draw->pt.vertices_per_patch;
606 else
607 draw->statistics.ia_primitives +=
608 u_decomposed_prims_for_vertices(prim_info->prim, prim_info->count);
609 draw->statistics.vs_invocations += fetch_info->count;
610 }
611
612 if (fetch_info->linear) {
613 start_or_maxelt = fetch_info->start;
614 vid_base = draw->start_index;
615 elts = NULL;
616 }
617 else {
618 start_or_maxelt = draw->pt.user.eltMax;
619 vid_base = draw->pt.user.eltBias;
620 elts = fetch_info->elts;
621 }
622 clipped = fpme->current_variant->jit_func(&fpme->llvm->jit_context,
623 llvm_vert_info.verts,
624 draw->pt.user.vbuffer,
625 fetch_info->count,
626 start_or_maxelt,
627 fpme->vertex_size,
628 draw->pt.vertex_buffer,
629 draw->instance_id,
630 vid_base,
631 draw->start_instance,
632 elts, draw->pt.user.drawid,
633 draw->pt.user.viewid);
634
635 /* Finished with fetch and vs:
636 */
637 fetch_info = NULL;
638 vert_info = &llvm_vert_info;
639
640 if (opt & PT_SHADE) {
641 struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
642 if (tcs_shader) {
643 draw_tess_ctrl_shader_run(tcs_shader,
644 draw->pt.user.tcs_constants,
645 draw->pt.user.tcs_constants_size,
646 vert_info,
647 prim_info,
648 &vshader->info,
649 &tcs_vert_info,
650 &tcs_prim_info);
651 FREE(vert_info->verts);
652 vert_info = &tcs_vert_info;
653 prim_info = &tcs_prim_info;
654
655 } else if (tes_shader) {
656 unsigned num_prims = prim_info->count / draw->pt.vertices_per_patch;
657 tcs_prim_info = *prim_info;
658 tcs_prim_info.primitive_count = num_prims;
659 prim_info = &tcs_prim_info;
660 }
661
662 if (tes_shader) {
663 draw_tess_eval_shader_run(tes_shader,
664 draw->pt.user.tes_constants,
665 draw->pt.user.tes_constants_size,
666 tcs_shader ? tcs_shader->vertices_out : draw->pt.vertices_per_patch,
667 vert_info,
668 prim_info,
669 tcs_shader ? &tcs_shader->info : &vshader->info,
670 &tes_vert_info,
671 &tes_prim_info, &tes_elts_out);
672
673 FREE(vert_info->verts);
674 vert_info = &tes_vert_info;
675 prim_info = &tes_prim_info;
676 free_prim_info = TRUE;
677
678 /*
679 * pt emit can only handle ushort number of vertices (see
680 * render->allocate_vertices).
681 * vsplit guarantees there's never more than 4096, however GS can
682 * easily blow this up (by a factor of 256 (or even 1024) max).
683 */
684 if (vert_info->count > 65535) {
685 opt |= PT_PIPELINE;
686 }
687 }
688 }
689
690 if ((opt & PT_SHADE) && gshader) {
691 struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
692 draw_geometry_shader_run(gshader,
693 draw->pt.user.gs_constants,
694 draw->pt.user.gs_constants_size,
695 vert_info,
696 prim_info,
697 tes_shader ? &tes_shader->info : &vshader->info,
698 gs_vert_info,
699 gs_prim_info);
700
701 FREE(vert_info->verts);
702 if (free_prim_info) {
703 FREE(prim_info->primitive_lengths);
704 FREE(tes_elts_out);
705 }
706 vert_info = &gs_vert_info[0];
707 prim_info = &gs_prim_info[0];
708 free_prim_info = FALSE;
709 /*
710 * pt emit can only handle ushort number of vertices (see
711 * render->allocate_vertices).
712 * vsplit guarantees there's never more than 4096, however GS can
713 * easily blow this up (by a factor of 256 (or even 1024) max).
714 */
715 if (vert_info->count > 65535) {
716 opt |= PT_PIPELINE;
717 }
718 } else {
719 if (!tes_shader && draw_prim_assembler_is_required(draw, prim_info, vert_info)) {
720 draw_prim_assembler_run(draw, prim_info, vert_info,
721 &ia_prim_info, &ia_vert_info);
722
723 if (ia_vert_info.count) {
724 FREE(vert_info->verts);
725 if (free_prim_info) {
726 FREE(prim_info->primitive_lengths);
727 FREE(tes_elts_out);
728 tes_elts_out = NULL;
729 }
730 vert_info = &ia_vert_info;
731 prim_info = &ia_prim_info;
732 free_prim_info = TRUE;
733 }
734 }
735 }
736
737 /* stream output needs to be done before clipping */
738 draw_pt_so_emit( fpme->so_emit, gshader ? gshader->num_vertex_streams : 1, vert_info, prim_info );
739
740 if (prim_info->count == 0) {
741 debug_printf("GS/IA didn't emit any vertices!\n");
742 goto out;
743 }
744
745 draw_stats_clipper_primitives(draw, prim_info);
746
747 /*
748 * if there's no position, need to stop now, or the latter stages
749 * will try to access non-existent position output.
750 */
751 if (draw_current_shader_position_output(draw) != -1) {
752 if ((opt & PT_SHADE) && (gshader || tes_shader ||
753 draw->vs.vertex_shader->info.writes_viewport_index)) {
754 clipped = draw_pt_post_vs_run( fpme->post_vs, vert_info, prim_info );
755 }
756 /* "clipped" also includes non-one edgeflag */
757 if (clipped) {
758 opt |= PT_PIPELINE;
759 }
760
761 /* Do we need to run the pipeline? Now will come here if clipped
762 */
763 if (opt & PT_PIPELINE) {
764 pipeline( fpme, vert_info, prim_info );
765 }
766 else {
767 emit( fpme->emit, vert_info, prim_info );
768 }
769 }
770 out:
771 FREE(vert_info->verts);
772 if (gshader && gshader->num_vertex_streams > 1)
773 for (unsigned i = 1; i < gshader->num_vertex_streams; i++)
774 FREE(gs_vert_info[i].verts);
775
776 if (free_prim_info) {
777 FREE(tes_elts_out);
778 FREE(prim_info->primitive_lengths);
779 }
780 }
781
782
783 static inline enum pipe_prim_type
prim_type(enum pipe_prim_type prim,unsigned flags)784 prim_type(enum pipe_prim_type prim, unsigned flags)
785 {
786 if (flags & DRAW_LINE_LOOP_AS_STRIP)
787 return PIPE_PRIM_LINE_STRIP;
788 else
789 return prim;
790 }
791
792
793 static void
llvm_middle_end_run(struct draw_pt_middle_end * middle,const unsigned * fetch_elts,unsigned fetch_count,const ushort * draw_elts,unsigned draw_count,unsigned prim_flags)794 llvm_middle_end_run(struct draw_pt_middle_end *middle,
795 const unsigned *fetch_elts,
796 unsigned fetch_count,
797 const ushort *draw_elts,
798 unsigned draw_count,
799 unsigned prim_flags)
800 {
801 struct llvm_middle_end *fpme = llvm_middle_end(middle);
802 struct draw_fetch_info fetch_info;
803 struct draw_prim_info prim_info;
804
805 fetch_info.linear = FALSE;
806 fetch_info.start = 0;
807 fetch_info.elts = fetch_elts;
808 fetch_info.count = fetch_count;
809
810 prim_info.linear = FALSE;
811 prim_info.start = 0;
812 prim_info.count = draw_count;
813 prim_info.elts = draw_elts;
814 prim_info.prim = prim_type(fpme->input_prim, prim_flags);
815 prim_info.flags = prim_flags;
816 prim_info.primitive_count = 1;
817 prim_info.primitive_lengths = &draw_count;
818
819 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
820 }
821
822
823 static void
llvm_middle_end_linear_run(struct draw_pt_middle_end * middle,unsigned start,unsigned count,unsigned prim_flags)824 llvm_middle_end_linear_run(struct draw_pt_middle_end *middle,
825 unsigned start,
826 unsigned count,
827 unsigned prim_flags)
828 {
829 struct llvm_middle_end *fpme = llvm_middle_end(middle);
830 struct draw_fetch_info fetch_info;
831 struct draw_prim_info prim_info;
832
833 fetch_info.linear = TRUE;
834 fetch_info.start = start;
835 fetch_info.count = count;
836 fetch_info.elts = NULL;
837
838 prim_info.linear = TRUE;
839 prim_info.start = 0;
840 prim_info.count = count;
841 prim_info.elts = NULL;
842 prim_info.prim = prim_type(fpme->input_prim, prim_flags);
843 prim_info.flags = prim_flags;
844 prim_info.primitive_count = 1;
845 prim_info.primitive_lengths = &count;
846
847 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
848 }
849
850
851 static boolean
llvm_middle_end_linear_run_elts(struct draw_pt_middle_end * middle,unsigned start,unsigned count,const ushort * draw_elts,unsigned draw_count,unsigned prim_flags)852 llvm_middle_end_linear_run_elts(struct draw_pt_middle_end *middle,
853 unsigned start,
854 unsigned count,
855 const ushort *draw_elts,
856 unsigned draw_count,
857 unsigned prim_flags)
858 {
859 struct llvm_middle_end *fpme = llvm_middle_end(middle);
860 struct draw_fetch_info fetch_info;
861 struct draw_prim_info prim_info;
862
863 fetch_info.linear = TRUE;
864 fetch_info.start = start;
865 fetch_info.count = count;
866 fetch_info.elts = NULL;
867
868 prim_info.linear = FALSE;
869 prim_info.start = 0;
870 prim_info.count = draw_count;
871 prim_info.elts = draw_elts;
872 prim_info.prim = prim_type(fpme->input_prim, prim_flags);
873 prim_info.flags = prim_flags;
874 prim_info.primitive_count = 1;
875 prim_info.primitive_lengths = &draw_count;
876
877 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
878
879 return TRUE;
880 }
881
882
883 static void
llvm_middle_end_finish(struct draw_pt_middle_end * middle)884 llvm_middle_end_finish(struct draw_pt_middle_end *middle)
885 {
886 /* nothing to do */
887 }
888
889
890 static void
llvm_middle_end_destroy(struct draw_pt_middle_end * middle)891 llvm_middle_end_destroy(struct draw_pt_middle_end *middle)
892 {
893 struct llvm_middle_end *fpme = llvm_middle_end(middle);
894
895 if (fpme->fetch)
896 draw_pt_fetch_destroy( fpme->fetch );
897
898 if (fpme->emit)
899 draw_pt_emit_destroy( fpme->emit );
900
901 if (fpme->so_emit)
902 draw_pt_so_emit_destroy( fpme->so_emit );
903
904 if (fpme->post_vs)
905 draw_pt_post_vs_destroy( fpme->post_vs );
906
907 FREE(middle);
908 }
909
910
911 struct draw_pt_middle_end *
draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context * draw)912 draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw)
913 {
914 struct llvm_middle_end *fpme = 0;
915
916 if (!draw->llvm)
917 return NULL;
918
919 fpme = CALLOC_STRUCT( llvm_middle_end );
920 if (!fpme)
921 goto fail;
922
923 fpme->base.prepare = llvm_middle_end_prepare;
924 fpme->base.bind_parameters = llvm_middle_end_bind_parameters;
925 fpme->base.run = llvm_middle_end_run;
926 fpme->base.run_linear = llvm_middle_end_linear_run;
927 fpme->base.run_linear_elts = llvm_middle_end_linear_run_elts;
928 fpme->base.finish = llvm_middle_end_finish;
929 fpme->base.destroy = llvm_middle_end_destroy;
930
931 fpme->draw = draw;
932
933 fpme->fetch = draw_pt_fetch_create( draw );
934 if (!fpme->fetch)
935 goto fail;
936
937 fpme->post_vs = draw_pt_post_vs_create( draw );
938 if (!fpme->post_vs)
939 goto fail;
940
941 fpme->emit = draw_pt_emit_create( draw );
942 if (!fpme->emit)
943 goto fail;
944
945 fpme->so_emit = draw_pt_so_emit_create( draw );
946 if (!fpme->so_emit)
947 goto fail;
948
949 fpme->llvm = draw->llvm;
950 if (!fpme->llvm)
951 goto fail;
952
953 fpme->current_variant = NULL;
954
955 return &fpme->base;
956
957 fail:
958 if (fpme)
959 llvm_middle_end_destroy( &fpme->base );
960
961 return NULL;
962 }
963