1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * Tiling engine.
30 *
31 * Builds per-tile display lists and executes them on calls to
32 * lp_setup_flush().
33 */
34
35 #include <limits.h>
36
37 #include "pipe/p_defines.h"
38 #include "util/u_framebuffer.h"
39 #include "util/u_inlines.h"
40 #include "util/u_memory.h"
41 #include "util/u_pack_color.h"
42 #include "util/u_cpu_detect.h"
43 #include "util/u_viewport.h"
44 #include "draw/draw_pipe.h"
45 #include "util/os_time.h"
46 #include "lp_context.h"
47 #include "lp_memory.h"
48 #include "lp_scene.h"
49 #include "lp_texture.h"
50 #include "lp_debug.h"
51 #include "lp_fence.h"
52 #include "lp_query.h"
53 #include "lp_rast.h"
54 #include "lp_setup_context.h"
55 #include "lp_screen.h"
56 #include "lp_state.h"
57 #include "lp_jit.h"
58 #include "frontend/sw_winsys.h"
59
60 #include "draw/draw_context.h"
61 #include "draw/draw_vbuf.h"
62
63
64 static boolean set_scene_state( struct lp_setup_context *, enum setup_state,
65 const char *reason);
66 static boolean try_update_scene_state( struct lp_setup_context *setup );
67
68
69 static void
lp_setup_get_empty_scene(struct lp_setup_context * setup)70 lp_setup_get_empty_scene(struct lp_setup_context *setup)
71 {
72 assert(setup->scene == NULL);
73
74 setup->scene_idx++;
75 setup->scene_idx %= ARRAY_SIZE(setup->scenes);
76
77 setup->scene = setup->scenes[setup->scene_idx];
78
79 if (setup->scene->fence) {
80 if (LP_DEBUG & DEBUG_SETUP)
81 debug_printf("%s: wait for scene %d\n",
82 __FUNCTION__, setup->scene->fence->id);
83
84 lp_fence_wait(setup->scene->fence);
85 }
86
87 lp_scene_begin_binning(setup->scene, &setup->fb);
88
89 setup->scene->permit_linear_rasterizer = setup->permit_linear_rasterizer;
90 }
91
92
93 static void
first_triangle(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4])94 first_triangle( struct lp_setup_context *setup,
95 const float (*v0)[4],
96 const float (*v1)[4],
97 const float (*v2)[4])
98 {
99 assert(setup->state == SETUP_ACTIVE);
100 lp_setup_choose_triangle( setup );
101 setup->triangle( setup, v0, v1, v2 );
102 }
103
104 static boolean
first_rectangle(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4])105 first_rectangle( struct lp_setup_context *setup,
106 const float (*v0)[4],
107 const float (*v1)[4],
108 const float (*v2)[4],
109 const float (*v3)[4],
110 const float (*v4)[4],
111 const float (*v5)[4])
112 {
113 assert(setup->state == SETUP_ACTIVE);
114 lp_setup_choose_rect( setup );
115 return setup->rect( setup, v0, v1, v2, v3, v4, v5 );
116 }
117
118 static void
first_line(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4])119 first_line( struct lp_setup_context *setup,
120 const float (*v0)[4],
121 const float (*v1)[4])
122 {
123 assert(setup->state == SETUP_ACTIVE);
124 lp_setup_choose_line( setup );
125 setup->line( setup, v0, v1 );
126 }
127
128 static void
first_point(struct lp_setup_context * setup,const float (* v0)[4])129 first_point( struct lp_setup_context *setup,
130 const float (*v0)[4])
131 {
132 assert(setup->state == SETUP_ACTIVE);
133 lp_setup_choose_point( setup );
134 setup->point( setup, v0 );
135 }
136
137 void
lp_setup_reset(struct lp_setup_context * setup)138 lp_setup_reset( struct lp_setup_context *setup )
139 {
140 unsigned i;
141
142 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
143
144 /* Reset derived state */
145 for (i = 0; i < ARRAY_SIZE(setup->constants); ++i) {
146 setup->constants[i].stored_size = 0;
147 setup->constants[i].stored_data = NULL;
148 }
149
150 setup->fs.stored = NULL;
151 setup->dirty = ~0;
152
153 /* no current bin */
154 setup->scene = NULL;
155
156 /* Reset some state:
157 */
158 memset(&setup->clear, 0, sizeof setup->clear);
159
160 /* Have an explicit "start-binning" call and get rid of this
161 * pointer twiddling?
162 */
163 setup->line = first_line;
164 setup->point = first_point;
165 setup->triangle = first_triangle;
166 setup->rect = first_rectangle;
167 }
168
169
170 /** Rasterize all scene's bins */
171 static void
lp_setup_rasterize_scene(struct lp_setup_context * setup)172 lp_setup_rasterize_scene( struct lp_setup_context *setup )
173 {
174 struct lp_scene *scene = setup->scene;
175 struct llvmpipe_screen *screen = llvmpipe_screen(scene->pipe->screen);
176
177 scene->num_active_queries = setup->active_binned_queries;
178 memcpy(scene->active_queries, setup->active_queries,
179 scene->num_active_queries * sizeof(scene->active_queries[0]));
180
181 lp_scene_end_binning(scene);
182
183 lp_fence_reference(&setup->last_fence, scene->fence);
184
185 if (setup->last_fence)
186 setup->last_fence->issued = TRUE;
187
188 mtx_lock(&screen->rast_mutex);
189
190 /* FIXME: We enqueue the scene then wait on the rasterizer to finish.
191 * This means we never actually run any vertex stuff in parallel to
192 * rasterization (not in the same context at least) which is what the
193 * multiple scenes per setup is about - when we get a new empty scene
194 * any old one is already empty again because we waited here for
195 * raster tasks to be finished. Ideally, we shouldn't need to wait here
196 * and rely on fences elsewhere when waiting is necessary.
197 * Certainly, lp_scene_end_rasterization() would need to be deferred too
198 * and there's probably other bits why this doesn't actually work.
199 */
200 lp_rast_queue_scene(screen->rast, scene);
201 lp_rast_finish(screen->rast);
202 mtx_unlock(&screen->rast_mutex);
203
204 lp_scene_end_rasterization(setup->scene);
205 lp_setup_reset( setup );
206
207 LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
208 }
209
210
211
212 static boolean
begin_binning(struct lp_setup_context * setup)213 begin_binning( struct lp_setup_context *setup )
214 {
215 struct lp_scene *scene = setup->scene;
216 boolean need_zsload = FALSE;
217 boolean ok;
218
219 assert(scene);
220 assert(scene->fence == NULL);
221
222 /* Always create a fence:
223 */
224 scene->fence = lp_fence_create(MAX2(1, setup->num_threads));
225 if (!scene->fence)
226 return FALSE;
227
228 ok = try_update_scene_state(setup);
229 if (!ok)
230 return FALSE;
231
232 if (setup->fb.zsbuf &&
233 ((setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
234 util_format_is_depth_and_stencil(setup->fb.zsbuf->format))
235 need_zsload = TRUE;
236
237 LP_DBG(DEBUG_SETUP, "%s color clear bufs: %x depth: %s\n", __FUNCTION__,
238 setup->clear.flags >> 2,
239 need_zsload ? "clear": "load");
240
241 if (setup->clear.flags & PIPE_CLEAR_COLOR) {
242 unsigned cbuf;
243 for (cbuf = 0; cbuf < setup->fb.nr_cbufs; cbuf++) {
244 assert(PIPE_CLEAR_COLOR0 == 1 << 2);
245 if (setup->clear.flags & (1 << (2 + cbuf))) {
246 union lp_rast_cmd_arg clearrb_arg;
247 struct lp_rast_clear_rb *cc_scene =
248 (struct lp_rast_clear_rb *)
249 lp_scene_alloc(scene, sizeof(struct lp_rast_clear_rb));
250
251 if (!cc_scene) {
252 return FALSE;
253 }
254
255 cc_scene->cbuf = cbuf;
256 cc_scene->color_val = setup->clear.color_val[cbuf];
257 clearrb_arg.clear_rb = cc_scene;
258
259 if (!lp_scene_bin_everywhere(scene,
260 LP_RAST_OP_CLEAR_COLOR,
261 clearrb_arg))
262 return FALSE;
263 }
264 }
265 }
266
267 if (setup->fb.zsbuf) {
268 if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) {
269 ok = lp_scene_bin_everywhere( scene,
270 LP_RAST_OP_CLEAR_ZSTENCIL,
271 lp_rast_arg_clearzs(
272 setup->clear.zsvalue,
273 setup->clear.zsmask));
274 if (!ok)
275 return FALSE;
276 }
277 }
278
279 setup->clear.flags = 0;
280 setup->clear.zsmask = 0;
281 setup->clear.zsvalue = 0;
282
283 scene->had_queries = !!setup->active_binned_queries;
284
285 LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__);
286 return TRUE;
287 }
288
289
290 /* This basically bins and then flushes any outstanding full-screen
291 * clears.
292 *
293 * TODO: fast path for fullscreen clears and no triangles.
294 */
295 static boolean
execute_clears(struct lp_setup_context * setup)296 execute_clears( struct lp_setup_context *setup )
297 {
298 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
299
300 return begin_binning( setup );
301 }
302
303 const char *states[] = {
304 "FLUSHED",
305 "CLEARED",
306 "ACTIVE "
307 };
308
309
310 static boolean
set_scene_state(struct lp_setup_context * setup,enum setup_state new_state,const char * reason)311 set_scene_state( struct lp_setup_context *setup,
312 enum setup_state new_state,
313 const char *reason)
314 {
315 unsigned old_state = setup->state;
316
317 if (old_state == new_state)
318 return TRUE;
319
320 if (LP_DEBUG & DEBUG_SCENE) {
321 debug_printf("%s old %s new %s%s%s\n",
322 __FUNCTION__,
323 states[old_state],
324 states[new_state],
325 (new_state == SETUP_FLUSHED) ? ": " : "",
326 (new_state == SETUP_FLUSHED) ? reason : "");
327
328 if (new_state == SETUP_FLUSHED && setup->scene)
329 lp_debug_draw_bins_by_cmd_length(setup->scene);
330 }
331
332 /* wait for a free/empty scene
333 */
334 if (old_state == SETUP_FLUSHED)
335 lp_setup_get_empty_scene(setup);
336
337 switch (new_state) {
338 case SETUP_CLEARED:
339 break;
340
341 case SETUP_ACTIVE:
342 if (!begin_binning( setup ))
343 goto fail;
344 break;
345
346 case SETUP_FLUSHED:
347 if (old_state == SETUP_CLEARED)
348 if (!execute_clears( setup ))
349 goto fail;
350
351 lp_setup_rasterize_scene( setup );
352 assert(setup->scene == NULL);
353 break;
354
355 default:
356 assert(0 && "invalid setup state mode");
357 goto fail;
358 }
359
360 setup->state = new_state;
361 return TRUE;
362
363 fail:
364 if (setup->scene) {
365 lp_scene_end_rasterization(setup->scene);
366 setup->scene = NULL;
367 }
368
369 setup->state = SETUP_FLUSHED;
370 lp_setup_reset( setup );
371 return FALSE;
372 }
373
374
375 void
lp_setup_flush(struct lp_setup_context * setup,struct pipe_fence_handle ** fence,const char * reason)376 lp_setup_flush( struct lp_setup_context *setup,
377 struct pipe_fence_handle **fence,
378 const char *reason)
379 {
380 set_scene_state( setup, SETUP_FLUSHED, reason );
381
382 if (fence) {
383 lp_fence_reference((struct lp_fence **)fence, setup->last_fence);
384 if (!*fence)
385 *fence = (struct pipe_fence_handle *)lp_fence_create(0);
386 }
387 }
388
389
390 void
lp_setup_bind_framebuffer(struct lp_setup_context * setup,const struct pipe_framebuffer_state * fb)391 lp_setup_bind_framebuffer( struct lp_setup_context *setup,
392 const struct pipe_framebuffer_state *fb )
393 {
394 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
395
396 /* Flush any old scene.
397 */
398 set_scene_state( setup, SETUP_FLUSHED, __FUNCTION__ );
399
400 /*
401 * Ensure the old scene is not reused.
402 */
403 assert(!setup->scene);
404
405 /* Set new state. This will be picked up later when we next need a
406 * scene.
407 */
408 util_copy_framebuffer_state(&setup->fb, fb);
409 setup->framebuffer.x0 = 0;
410 setup->framebuffer.y0 = 0;
411 setup->framebuffer.x1 = fb->width-1;
412 setup->framebuffer.y1 = fb->height-1;
413 setup->dirty |= LP_SETUP_NEW_SCISSOR;
414 }
415
416
417 /*
418 * Try to clear one color buffer of the attached fb, either by binning a clear
419 * command or queuing up the clear for later (when binning is started).
420 */
421 static boolean
lp_setup_try_clear_color_buffer(struct lp_setup_context * setup,const union pipe_color_union * color,unsigned cbuf)422 lp_setup_try_clear_color_buffer(struct lp_setup_context *setup,
423 const union pipe_color_union *color,
424 unsigned cbuf)
425 {
426 union lp_rast_cmd_arg clearrb_arg;
427 union util_color uc;
428 enum pipe_format format = setup->fb.cbufs[cbuf]->format;
429
430 LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);
431
432 util_pack_color_union(format, &uc, color);
433
434 if (setup->state == SETUP_ACTIVE) {
435 struct lp_scene *scene = setup->scene;
436
437 /* Add the clear to existing scene. In the unusual case where
438 * both color and depth-stencil are being cleared when there's
439 * already been some rendering, we could discard the currently
440 * binned scene and start again, but I don't see that as being
441 * a common usage.
442 */
443 struct lp_rast_clear_rb *cc_scene =
444 (struct lp_rast_clear_rb *)
445 lp_scene_alloc_aligned(scene, sizeof(struct lp_rast_clear_rb), 8);
446
447 if (!cc_scene) {
448 return FALSE;
449 }
450
451 cc_scene->cbuf = cbuf;
452 cc_scene->color_val = uc;
453 clearrb_arg.clear_rb = cc_scene;
454
455 if (!lp_scene_bin_everywhere(scene,
456 LP_RAST_OP_CLEAR_COLOR,
457 clearrb_arg))
458 return FALSE;
459 }
460 else {
461 /* Put ourselves into the 'pre-clear' state, specifically to try
462 * and accumulate multiple clears to color and depth_stencil
463 * buffers which the app or gallium frontend might issue
464 * separately.
465 */
466 set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ );
467
468 assert(PIPE_CLEAR_COLOR0 == (1 << 2));
469 setup->clear.flags |= 1 << (cbuf + 2);
470 setup->clear.color_val[cbuf] = uc;
471 }
472
473 return TRUE;
474 }
475
476 static boolean
lp_setup_try_clear_zs(struct lp_setup_context * setup,double depth,unsigned stencil,unsigned flags)477 lp_setup_try_clear_zs(struct lp_setup_context *setup,
478 double depth,
479 unsigned stencil,
480 unsigned flags)
481 {
482 uint64_t zsmask = 0;
483 uint64_t zsvalue = 0;
484 uint32_t zmask32;
485 uint8_t smask8;
486 enum pipe_format format = setup->fb.zsbuf->format;
487
488 LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);
489
490 zmask32 = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
491 smask8 = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
492
493 zsvalue = util_pack64_z_stencil(format, depth, stencil);
494
495 zsmask = util_pack64_mask_z_stencil(format, zmask32, smask8);
496
497 zsvalue &= zsmask;
498
499 if (format == PIPE_FORMAT_Z24X8_UNORM ||
500 format == PIPE_FORMAT_X8Z24_UNORM) {
501 /*
502 * Make full mask if there's "X" bits so we can do full
503 * clear (without rmw).
504 */
505 uint32_t zsmask_full = 0;
506 zsmask_full = util_pack_mask_z_stencil(format, ~0, ~0);
507 zsmask |= ~zsmask_full;
508 }
509
510 if (setup->state == SETUP_ACTIVE) {
511 struct lp_scene *scene = setup->scene;
512
513 /* Add the clear to existing scene. In the unusual case where
514 * both color and depth-stencil are being cleared when there's
515 * already been some rendering, we could discard the currently
516 * binned scene and start again, but I don't see that as being
517 * a common usage.
518 */
519 if (!lp_scene_bin_everywhere(scene,
520 LP_RAST_OP_CLEAR_ZSTENCIL,
521 lp_rast_arg_clearzs(zsvalue, zsmask)))
522 return FALSE;
523 }
524 else {
525 /* Put ourselves into the 'pre-clear' state, specifically to try
526 * and accumulate multiple clears to color and depth_stencil
527 * buffers which the app or gallium frontend might issue
528 * separately.
529 */
530 set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ );
531
532 setup->clear.flags |= flags;
533
534 setup->clear.zsmask |= zsmask;
535 setup->clear.zsvalue =
536 (setup->clear.zsvalue & ~zsmask) | (zsvalue & zsmask);
537 }
538
539 return TRUE;
540 }
541
542 void
lp_setup_clear(struct lp_setup_context * setup,const union pipe_color_union * color,double depth,unsigned stencil,unsigned flags)543 lp_setup_clear( struct lp_setup_context *setup,
544 const union pipe_color_union *color,
545 double depth,
546 unsigned stencil,
547 unsigned flags )
548 {
549 unsigned i;
550
551 /*
552 * Note any of these (max 9) clears could fail (but at most there should
553 * be just one failure!). This avoids doing the previous succeeded
554 * clears again (we still clear tiles twice if a clear command succeeded
555 * partially for one buffer).
556 */
557 if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
558 unsigned flagszs = flags & PIPE_CLEAR_DEPTHSTENCIL;
559 if (!lp_setup_try_clear_zs(setup, depth, stencil, flagszs)) {
560 lp_setup_flush(setup, NULL, __FUNCTION__);
561
562 if (!lp_setup_try_clear_zs(setup, depth, stencil, flagszs))
563 assert(0);
564 }
565 }
566
567 if (flags & PIPE_CLEAR_COLOR) {
568 assert(PIPE_CLEAR_COLOR0 == (1 << 2));
569 for (i = 0; i < setup->fb.nr_cbufs; i++) {
570 if ((flags & (1 << (2 + i))) && setup->fb.cbufs[i]) {
571 if (!lp_setup_try_clear_color_buffer(setup, color, i)) {
572 lp_setup_flush(setup, NULL, __FUNCTION__);
573
574 if (!lp_setup_try_clear_color_buffer(setup, color, i))
575 assert(0);
576 }
577 }
578 }
579 }
580 }
581
582
583
584 void
lp_setup_set_triangle_state(struct lp_setup_context * setup,unsigned cull_mode,boolean ccw_is_frontface,boolean scissor,boolean half_pixel_center,boolean bottom_edge_rule,boolean multisample)585 lp_setup_set_triangle_state( struct lp_setup_context *setup,
586 unsigned cull_mode,
587 boolean ccw_is_frontface,
588 boolean scissor,
589 boolean half_pixel_center,
590 boolean bottom_edge_rule,
591 boolean multisample)
592 {
593 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
594
595 setup->ccw_is_frontface = ccw_is_frontface;
596 setup->cullmode = cull_mode;
597 setup->triangle = first_triangle;
598 setup->rect = first_rectangle;
599 setup->multisample = multisample;
600 setup->pixel_offset = half_pixel_center ? 0.5f : 0.0f;
601 setup->bottom_edge_rule = bottom_edge_rule;
602
603 if (setup->scissor_test != scissor) {
604 setup->dirty |= LP_SETUP_NEW_SCISSOR;
605 setup->scissor_test = scissor;
606 }
607 }
608
609 void
lp_setup_set_line_state(struct lp_setup_context * setup,float line_width,boolean line_rectangular)610 lp_setup_set_line_state( struct lp_setup_context *setup,
611 float line_width,
612 boolean line_rectangular)
613 {
614 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
615
616 setup->line_width = line_width;
617 setup->rectangular_lines = line_rectangular;
618 }
619
620 void
lp_setup_set_point_state(struct lp_setup_context * setup,float point_size,boolean point_tri_clip,boolean point_size_per_vertex,uint sprite_coord_enable,uint sprite_coord_origin,boolean point_quad_rasterization)621 lp_setup_set_point_state( struct lp_setup_context *setup,
622 float point_size,
623 boolean point_tri_clip,
624 boolean point_size_per_vertex,
625 uint sprite_coord_enable,
626 uint sprite_coord_origin,
627 boolean point_quad_rasterization)
628 {
629 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
630
631 setup->point_size = point_size;
632 setup->sprite_coord_enable = sprite_coord_enable;
633 setup->sprite_coord_origin = sprite_coord_origin;
634 setup->point_tri_clip = point_tri_clip;
635 setup->point_size_per_vertex = point_size_per_vertex;
636 setup->legacy_points = !point_quad_rasterization;
637 }
638
639 void
lp_setup_set_setup_variant(struct lp_setup_context * setup,const struct lp_setup_variant * variant)640 lp_setup_set_setup_variant( struct lp_setup_context *setup,
641 const struct lp_setup_variant *variant)
642 {
643 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
644
645 setup->setup.variant = variant;
646 }
647
648 void
lp_setup_set_fs_variant(struct lp_setup_context * setup,struct lp_fragment_shader_variant * variant)649 lp_setup_set_fs_variant( struct lp_setup_context *setup,
650 struct lp_fragment_shader_variant *variant)
651 {
652 LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__,
653 variant);
654
655 setup->fs.current.variant = variant;
656 setup->dirty |= LP_SETUP_NEW_FS;
657 }
658
659 void
lp_setup_set_fs_constants(struct lp_setup_context * setup,unsigned num,struct pipe_constant_buffer * buffers)660 lp_setup_set_fs_constants(struct lp_setup_context *setup,
661 unsigned num,
662 struct pipe_constant_buffer *buffers)
663 {
664 unsigned i;
665
666 LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffers);
667
668 assert(num <= ARRAY_SIZE(setup->constants));
669
670 for (i = 0; i < num; ++i) {
671 util_copy_constant_buffer(&setup->constants[i].current, &buffers[i], false);
672 }
673 for (; i < ARRAY_SIZE(setup->constants); i++) {
674 util_copy_constant_buffer(&setup->constants[i].current, NULL, false);
675 }
676 setup->dirty |= LP_SETUP_NEW_CONSTANTS;
677 }
678
679 void
lp_setup_set_fs_ssbos(struct lp_setup_context * setup,unsigned num,struct pipe_shader_buffer * buffers)680 lp_setup_set_fs_ssbos(struct lp_setup_context *setup,
681 unsigned num,
682 struct pipe_shader_buffer *buffers)
683 {
684 unsigned i;
685
686 LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffers);
687
688 assert(num <= ARRAY_SIZE(setup->ssbos));
689
690 for (i = 0; i < num; ++i) {
691 util_copy_shader_buffer(&setup->ssbos[i].current, &buffers[i]);
692 }
693 for (; i < ARRAY_SIZE(setup->ssbos); i++) {
694 util_copy_shader_buffer(&setup->ssbos[i].current, NULL);
695 }
696 setup->dirty |= LP_SETUP_NEW_SSBOS;
697 }
698
699 void
lp_setup_set_fs_images(struct lp_setup_context * setup,unsigned num,struct pipe_image_view * images)700 lp_setup_set_fs_images(struct lp_setup_context *setup,
701 unsigned num,
702 struct pipe_image_view *images)
703 {
704 unsigned i;
705
706 LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) images);
707
708 assert(num <= ARRAY_SIZE(setup->images));
709
710 for (i = 0; i < num; ++i) {
711 struct pipe_image_view *image = &images[i];
712 util_copy_image_view(&setup->images[i].current, &images[i]);
713
714 struct pipe_resource *res = image->resource;
715 struct llvmpipe_resource *lp_res = llvmpipe_resource(res);
716 struct lp_jit_image *jit_image;
717
718 jit_image = &setup->fs.current.jit_context.images[i];
719 if (!lp_res)
720 continue;
721 if (!lp_res->dt) {
722 /* regular texture - setup array of mipmap level offsets */
723 if (llvmpipe_resource_is_texture(res)) {
724 jit_image->base = lp_res->tex_data;
725 } else
726 jit_image->base = lp_res->data;
727
728 jit_image->width = res->width0;
729 jit_image->height = res->height0;
730 jit_image->depth = res->depth0;
731 jit_image->num_samples = res->nr_samples;
732
733 if (llvmpipe_resource_is_texture(res)) {
734 uint32_t mip_offset = lp_res->mip_offsets[image->u.tex.level];
735 const uint32_t bw = util_format_get_blockwidth(image->resource->format);
736 const uint32_t bh = util_format_get_blockheight(image->resource->format);
737
738 jit_image->width = DIV_ROUND_UP(jit_image->width, bw);
739 jit_image->height = DIV_ROUND_UP(jit_image->height, bh);
740 jit_image->width = u_minify(jit_image->width, image->u.tex.level);
741 jit_image->height = u_minify(jit_image->height, image->u.tex.level);
742
743 if (res->target == PIPE_TEXTURE_1D_ARRAY ||
744 res->target == PIPE_TEXTURE_2D_ARRAY ||
745 res->target == PIPE_TEXTURE_3D ||
746 res->target == PIPE_TEXTURE_CUBE ||
747 res->target == PIPE_TEXTURE_CUBE_ARRAY) {
748 /*
749 * For array textures, we don't have first_layer, instead
750 * adjust last_layer (stored as depth) plus the mip level offsets
751 * (as we have mip-first layout can't just adjust base ptr).
752 * XXX For mip levels, could do something similar.
753 */
754 jit_image->depth = image->u.tex.last_layer - image->u.tex.first_layer + 1;
755 mip_offset += image->u.tex.first_layer * lp_res->img_stride[image->u.tex.level];
756 } else
757 jit_image->depth = u_minify(jit_image->depth, image->u.tex.level);
758
759 jit_image->row_stride = lp_res->row_stride[image->u.tex.level];
760 jit_image->img_stride = lp_res->img_stride[image->u.tex.level];
761 jit_image->sample_stride = lp_res->sample_stride;
762 jit_image->base = (uint8_t *)jit_image->base + mip_offset;
763 }
764 else {
765 unsigned view_blocksize = util_format_get_blocksize(image->format);
766 jit_image->width = image->u.buf.size / view_blocksize;
767 jit_image->base = (uint8_t *)jit_image->base + image->u.buf.offset;
768 }
769 }
770 }
771 for (; i < ARRAY_SIZE(setup->images); i++) {
772 util_copy_image_view(&setup->images[i].current, NULL);
773 }
774 setup->dirty |= LP_SETUP_NEW_FS;
775 }
776
777 void
lp_setup_set_alpha_ref_value(struct lp_setup_context * setup,float alpha_ref_value)778 lp_setup_set_alpha_ref_value( struct lp_setup_context *setup,
779 float alpha_ref_value )
780 {
781 LP_DBG(DEBUG_SETUP, "%s %f\n", __FUNCTION__, alpha_ref_value);
782
783 if(setup->fs.current.jit_context.alpha_ref_value != alpha_ref_value) {
784 setup->fs.current.jit_context.alpha_ref_value = alpha_ref_value;
785 setup->dirty |= LP_SETUP_NEW_FS;
786 }
787 }
788
789 void
lp_setup_set_stencil_ref_values(struct lp_setup_context * setup,const ubyte refs[2])790 lp_setup_set_stencil_ref_values( struct lp_setup_context *setup,
791 const ubyte refs[2] )
792 {
793 LP_DBG(DEBUG_SETUP, "%s %d %d\n", __FUNCTION__, refs[0], refs[1]);
794
795 if (setup->fs.current.jit_context.stencil_ref_front != refs[0] ||
796 setup->fs.current.jit_context.stencil_ref_back != refs[1]) {
797 setup->fs.current.jit_context.stencil_ref_front = refs[0];
798 setup->fs.current.jit_context.stencil_ref_back = refs[1];
799 setup->dirty |= LP_SETUP_NEW_FS;
800 }
801 }
802
803 void
lp_setup_set_blend_color(struct lp_setup_context * setup,const struct pipe_blend_color * blend_color)804 lp_setup_set_blend_color( struct lp_setup_context *setup,
805 const struct pipe_blend_color *blend_color )
806 {
807 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
808
809 assert(blend_color);
810
811 if(memcmp(&setup->blend_color.current, blend_color, sizeof *blend_color) != 0) {
812 memcpy(&setup->blend_color.current, blend_color, sizeof *blend_color);
813 setup->dirty |= LP_SETUP_NEW_BLEND_COLOR;
814 }
815 }
816
817
818 void
lp_setup_set_scissors(struct lp_setup_context * setup,const struct pipe_scissor_state * scissors)819 lp_setup_set_scissors( struct lp_setup_context *setup,
820 const struct pipe_scissor_state *scissors )
821 {
822 unsigned i;
823 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
824
825 assert(scissors);
826
827 for (i = 0; i < PIPE_MAX_VIEWPORTS; ++i) {
828 setup->scissors[i].x0 = scissors[i].minx;
829 setup->scissors[i].x1 = scissors[i].maxx-1;
830 setup->scissors[i].y0 = scissors[i].miny;
831 setup->scissors[i].y1 = scissors[i].maxy-1;
832 }
833 setup->dirty |= LP_SETUP_NEW_SCISSOR;
834 }
835
836 void
lp_setup_set_sample_mask(struct lp_setup_context * setup,uint32_t sample_mask)837 lp_setup_set_sample_mask(struct lp_setup_context *setup,
838 uint32_t sample_mask)
839 {
840 if (setup->fs.current.jit_context.sample_mask != sample_mask) {
841 setup->fs.current.jit_context.sample_mask = sample_mask;
842 setup->dirty |= LP_SETUP_NEW_FS;
843 }
844 }
845
846 void
lp_setup_set_flatshade_first(struct lp_setup_context * setup,boolean flatshade_first)847 lp_setup_set_flatshade_first(struct lp_setup_context *setup,
848 boolean flatshade_first)
849 {
850 setup->flatshade_first = flatshade_first;
851 }
852
853 void
lp_setup_set_rasterizer_discard(struct lp_setup_context * setup,boolean rasterizer_discard)854 lp_setup_set_rasterizer_discard(struct lp_setup_context *setup,
855 boolean rasterizer_discard)
856 {
857 if (setup->rasterizer_discard != rasterizer_discard) {
858 setup->rasterizer_discard = rasterizer_discard;
859 setup->line = first_line;
860 setup->point = first_point;
861 setup->triangle = first_triangle;
862 setup->rect = first_rectangle;
863 }
864 }
865
866 void
lp_setup_set_vertex_info(struct lp_setup_context * setup,struct vertex_info * vertex_info)867 lp_setup_set_vertex_info(struct lp_setup_context *setup,
868 struct vertex_info *vertex_info)
869 {
870 /* XXX: just silently holding onto the pointer:
871 */
872 setup->vertex_info = vertex_info;
873 }
874
875
876 void
lp_setup_set_linear_mode(struct lp_setup_context * setup,boolean mode)877 lp_setup_set_linear_mode( struct lp_setup_context *setup,
878 boolean mode )
879 {
880 /* The linear rasterizer requires sse2 both at compile and runtime,
881 * in particular for the code in lp_rast_linear_fallback.c. This
882 * is more than ten-year-old technology, so it's a reasonable
883 * baseline.
884 */
885 #if defined(PIPE_ARCH_SSE)
886 setup->permit_linear_rasterizer = (mode &&
887 util_get_cpu_caps()->has_sse2);
888 #else
889 setup->permit_linear_rasterizer = FALSE;
890 #endif
891 }
892
893
894 /**
895 * Called during state validation when LP_NEW_VIEWPORT is set.
896 */
897 void
lp_setup_set_viewports(struct lp_setup_context * setup,unsigned num_viewports,const struct pipe_viewport_state * viewports)898 lp_setup_set_viewports(struct lp_setup_context *setup,
899 unsigned num_viewports,
900 const struct pipe_viewport_state *viewports)
901 {
902 struct llvmpipe_context *lp = llvmpipe_context(setup->pipe);
903 float half_height, x0, y0;
904 unsigned i;
905
906 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
907
908 assert(num_viewports <= PIPE_MAX_VIEWPORTS);
909 assert(viewports);
910
911 /*
912 * Linear rasterizer path for scissor/viewport intersection.
913 *
914 * Calculate "scissor" rect from the (first) viewport.
915 * Just like stored scissor rects need inclusive coords.
916 * For rounding, assume half pixel center (d3d9 should not end up
917 * with fractional viewports) - quite obviously for msaa we'd need
918 * fractional values here (and elsewhere for the point bounding box).
919 *
920 * See: lp_setup.c::try_update_scene_state
921 */
922 half_height = fabsf(viewports[0].scale[1]);
923 x0 = viewports[0].translate[0] - viewports[0].scale[0];
924 y0 = viewports[0].translate[1] - half_height;
925 setup->vpwh.x0 = (int)(x0 + 0.5f);
926 setup->vpwh.x1 = (int)(viewports[0].scale[0] * 2.0f + x0 - 0.5f);
927 setup->vpwh.y0 = (int)(y0 + 0.5f);
928 setup->vpwh.y1 = (int)(half_height * 2.0f + y0 - 0.5f);
929 setup->dirty |= LP_SETUP_NEW_SCISSOR;
930
931 /*
932 * For use in lp_state_fs.c, propagate the viewport values for all viewports.
933 */
934 for (i = 0; i < num_viewports; i++) {
935 float min_depth;
936 float max_depth;
937 util_viewport_zmin_zmax(&viewports[i], lp->rasterizer->clip_halfz,
938 &min_depth, &max_depth);
939
940 if (setup->viewports[i].min_depth != min_depth ||
941 setup->viewports[i].max_depth != max_depth) {
942 setup->viewports[i].min_depth = min_depth;
943 setup->viewports[i].max_depth = max_depth;
944 setup->dirty |= LP_SETUP_NEW_VIEWPORTS;
945 }
946 }
947 }
948
949
950 /**
951 * Called directly by llvmpipe_set_sampler_views
952 */
953 void
lp_setup_set_fragment_sampler_views(struct lp_setup_context * setup,unsigned num,struct pipe_sampler_view ** views)954 lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
955 unsigned num,
956 struct pipe_sampler_view **views)
957 {
958 unsigned i, max_tex_num;
959
960 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
961
962 assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
963
964 max_tex_num = MAX2(num, setup->fs.current_tex_num);
965
966 for (i = 0; i < max_tex_num; i++) {
967 struct pipe_sampler_view *view = i < num ? views[i] : NULL;
968
969 /* We are going to overwrite/unref the current texture further below. If
970 * set, make sure to unmap its resource to avoid leaking previous
971 * mapping. */
972 if (setup->fs.current_tex[i])
973 llvmpipe_resource_unmap(setup->fs.current_tex[i], 0, 0);
974
975 if (view) {
976 struct pipe_resource *res = view->texture;
977 struct llvmpipe_resource *lp_tex = llvmpipe_resource(res);
978 struct lp_jit_texture *jit_tex;
979 jit_tex = &setup->fs.current.jit_context.textures[i];
980
981 /* We're referencing the texture's internal data, so save a
982 * reference to it.
983 */
984 pipe_resource_reference(&setup->fs.current_tex[i], res);
985
986 if (!lp_tex->dt) {
987 /* regular texture - setup array of mipmap level offsets */
988 int j;
989 unsigned first_level = 0;
990 unsigned last_level = 0;
991
992 if (llvmpipe_resource_is_texture(res)) {
993 first_level = view->u.tex.first_level;
994 last_level = view->u.tex.last_level;
995 assert(first_level <= last_level);
996 assert(last_level <= res->last_level);
997 jit_tex->base = lp_tex->tex_data;
998 }
999 else {
1000 jit_tex->base = lp_tex->data;
1001 }
1002
1003 if (LP_PERF & PERF_TEX_MEM) {
1004 /* use dummy tile memory */
1005 jit_tex->base = lp_dummy_tile;
1006 jit_tex->width = TILE_SIZE/8;
1007 jit_tex->height = TILE_SIZE/8;
1008 jit_tex->depth = 1;
1009 jit_tex->first_level = 0;
1010 jit_tex->last_level = 0;
1011 jit_tex->mip_offsets[0] = 0;
1012 jit_tex->row_stride[0] = 0;
1013 jit_tex->img_stride[0] = 0;
1014 jit_tex->num_samples = 0;
1015 jit_tex->sample_stride = 0;
1016 }
1017 else {
1018 jit_tex->width = res->width0;
1019 jit_tex->height = res->height0;
1020 jit_tex->depth = res->depth0;
1021 jit_tex->first_level = first_level;
1022 jit_tex->last_level = last_level;
1023 jit_tex->num_samples = res->nr_samples;
1024 jit_tex->sample_stride = 0;
1025
1026 if (llvmpipe_resource_is_texture(res)) {
1027 for (j = first_level; j <= last_level; j++) {
1028 jit_tex->mip_offsets[j] = lp_tex->mip_offsets[j];
1029 jit_tex->row_stride[j] = lp_tex->row_stride[j];
1030 jit_tex->img_stride[j] = lp_tex->img_stride[j];
1031 }
1032
1033 jit_tex->sample_stride = lp_tex->sample_stride;
1034
1035 if (res->target == PIPE_TEXTURE_1D_ARRAY ||
1036 res->target == PIPE_TEXTURE_2D_ARRAY ||
1037 res->target == PIPE_TEXTURE_CUBE ||
1038 res->target == PIPE_TEXTURE_CUBE_ARRAY) {
1039 /*
1040 * For array textures, we don't have first_layer, instead
1041 * adjust last_layer (stored as depth) plus the mip level offsets
1042 * (as we have mip-first layout can't just adjust base ptr).
1043 * XXX For mip levels, could do something similar.
1044 */
1045 jit_tex->depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
1046 for (j = first_level; j <= last_level; j++) {
1047 jit_tex->mip_offsets[j] += view->u.tex.first_layer *
1048 lp_tex->img_stride[j];
1049 }
1050 if (view->target == PIPE_TEXTURE_CUBE ||
1051 view->target == PIPE_TEXTURE_CUBE_ARRAY) {
1052 assert(jit_tex->depth % 6 == 0);
1053 }
1054 assert(view->u.tex.first_layer <= view->u.tex.last_layer);
1055 assert(view->u.tex.last_layer < res->array_size);
1056 }
1057 }
1058 else {
1059 /*
1060 * For buffers, we don't have "offset", instead adjust
1061 * the size (stored as width) plus the base pointer.
1062 */
1063 unsigned view_blocksize = util_format_get_blocksize(view->format);
1064 /* probably don't really need to fill that out */
1065 jit_tex->mip_offsets[0] = 0;
1066 jit_tex->row_stride[0] = 0;
1067 jit_tex->img_stride[0] = 0;
1068
1069 /* everything specified in number of elements here. */
1070 jit_tex->width = view->u.buf.size / view_blocksize;
1071 jit_tex->base = (uint8_t *)jit_tex->base + view->u.buf.offset;
1072 /* XXX Unsure if we need to sanitize parameters? */
1073 assert(view->u.buf.offset + view->u.buf.size <= res->width0);
1074 }
1075 }
1076 }
1077 else {
1078 /* display target texture/surface */
1079 jit_tex->base = llvmpipe_resource_map(res, 0, 0, LP_TEX_USAGE_READ);
1080 jit_tex->row_stride[0] = lp_tex->row_stride[0];
1081 jit_tex->img_stride[0] = lp_tex->img_stride[0];
1082 jit_tex->mip_offsets[0] = 0;
1083 jit_tex->width = res->width0;
1084 jit_tex->height = res->height0;
1085 jit_tex->depth = res->depth0;
1086 jit_tex->first_level = jit_tex->last_level = 0;
1087 jit_tex->num_samples = res->nr_samples;
1088 jit_tex->sample_stride = 0;
1089 assert(jit_tex->base);
1090 }
1091 }
1092 else {
1093 pipe_resource_reference(&setup->fs.current_tex[i], NULL);
1094 }
1095 }
1096 setup->fs.current_tex_num = num;
1097
1098 setup->dirty |= LP_SETUP_NEW_FS;
1099 }
1100
1101 /**
1102 * Called during state validation when LP_NEW_SAMPLER is set.
1103 */
1104 void
lp_setup_set_fragment_sampler_state(struct lp_setup_context * setup,unsigned num,struct pipe_sampler_state ** samplers)1105 lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup,
1106 unsigned num,
1107 struct pipe_sampler_state **samplers)
1108 {
1109 unsigned i;
1110
1111 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
1112
1113 assert(num <= PIPE_MAX_SAMPLERS);
1114
1115 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
1116 const struct pipe_sampler_state *sampler = i < num ? samplers[i] : NULL;
1117
1118 if (sampler) {
1119 struct lp_jit_sampler *jit_sam;
1120 jit_sam = &setup->fs.current.jit_context.samplers[i];
1121
1122 jit_sam->min_lod = sampler->min_lod;
1123 jit_sam->max_lod = sampler->max_lod;
1124 jit_sam->lod_bias = sampler->lod_bias;
1125 jit_sam->max_aniso = sampler->max_anisotropy;
1126 COPY_4V(jit_sam->border_color, sampler->border_color.f);
1127 }
1128 }
1129
1130 setup->dirty |= LP_SETUP_NEW_FS;
1131 }
1132
1133
1134
1135
1136 /**
1137 * Is the given texture referenced by any scene?
1138 * Note: we have to check all scenes including any scenes currently
1139 * being rendered and the current scene being built.
1140 */
1141 unsigned
lp_setup_is_resource_referenced(const struct lp_setup_context * setup,const struct pipe_resource * texture)1142 lp_setup_is_resource_referenced( const struct lp_setup_context *setup,
1143 const struct pipe_resource *texture )
1144 {
1145 unsigned i;
1146
1147 /* check the render targets */
1148 for (i = 0; i < setup->fb.nr_cbufs; i++) {
1149 if (setup->fb.cbufs[i] && setup->fb.cbufs[i]->texture == texture)
1150 return LP_REFERENCED_FOR_READ | LP_REFERENCED_FOR_WRITE;
1151 }
1152 if (setup->fb.zsbuf && setup->fb.zsbuf->texture == texture) {
1153 return LP_REFERENCED_FOR_READ | LP_REFERENCED_FOR_WRITE;
1154 }
1155
1156 /* check textures referenced by the scene */
1157 for (i = 0; i < ARRAY_SIZE(setup->scenes); i++) {
1158 if (lp_scene_is_resource_referenced(setup->scenes[i], texture)) {
1159 return LP_REFERENCED_FOR_READ;
1160 }
1161 }
1162
1163 for (i = 0; i < ARRAY_SIZE(setup->ssbos); i++) {
1164 if (setup->ssbos[i].current.buffer == texture)
1165 return LP_REFERENCED_FOR_READ | LP_REFERENCED_FOR_WRITE;
1166 }
1167
1168 for (i = 0; i < ARRAY_SIZE(setup->images); i++) {
1169 if (setup->images[i].current.resource == texture)
1170 return LP_REFERENCED_FOR_READ | LP_REFERENCED_FOR_WRITE;
1171 }
1172
1173 return LP_UNREFERENCED;
1174 }
1175
1176
1177 /**
1178 * Called by vbuf code when we're about to draw something.
1179 *
1180 * This function stores all dirty state in the current scene's display list
1181 * memory, via lp_scene_alloc(). We can not pass pointers of mutable state to
1182 * the JIT functions, as the JIT functions will be called later on, most likely
1183 * on a different thread.
1184 *
1185 * When processing dirty state it is imperative that we don't refer to any
1186 * pointers previously allocated with lp_scene_alloc() in this function (or any
1187 * function) as they may belong to a scene freed since then.
1188 */
1189 static boolean
try_update_scene_state(struct lp_setup_context * setup)1190 try_update_scene_state( struct lp_setup_context *setup )
1191 {
1192 static const float fake_const_buf[4];
1193 boolean new_scene = (setup->fs.stored == NULL);
1194 struct lp_scene *scene = setup->scene;
1195 unsigned i;
1196
1197 assert(scene);
1198
1199 if (setup->dirty & LP_SETUP_NEW_VIEWPORTS) {
1200 /*
1201 * Record new depth range state for changes due to viewport updates.
1202 *
1203 * TODO: Collapse the existing viewport and depth range information
1204 * into one structure, for access by JIT.
1205 */
1206 struct lp_jit_viewport *stored;
1207
1208 stored = (struct lp_jit_viewport *)
1209 lp_scene_alloc(scene, sizeof setup->viewports);
1210
1211 if (!stored) {
1212 assert(!new_scene);
1213 return FALSE;
1214 }
1215
1216 memcpy(stored, setup->viewports, sizeof setup->viewports);
1217
1218 setup->fs.current.jit_context.viewports = stored;
1219 setup->dirty |= LP_SETUP_NEW_FS;
1220 }
1221
1222 if(setup->dirty & LP_SETUP_NEW_BLEND_COLOR) {
1223 uint8_t *stored;
1224 float* fstored;
1225 unsigned i, j;
1226 unsigned size;
1227
1228 /* Alloc u8_blend_color (16 x i8) and f_blend_color (4 or 8 x f32) */
1229 size = 4 * 16 * sizeof(uint8_t);
1230 size += (LP_MAX_VECTOR_LENGTH / 4) * sizeof(float);
1231 stored = lp_scene_alloc_aligned(scene, size, LP_MIN_VECTOR_ALIGN);
1232
1233 if (!stored) {
1234 assert(!new_scene);
1235 return FALSE;
1236 }
1237
1238 /* Store floating point colour */
1239 fstored = (float*)(stored + 4*16);
1240 for (i = 0; i < (LP_MAX_VECTOR_LENGTH / 4); ++i) {
1241 fstored[i] = setup->blend_color.current.color[i % 4];
1242 }
1243
1244 /* smear each blend color component across 16 ubyte elements */
1245 for (i = 0; i < 4; ++i) {
1246 uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]);
1247 for (j = 0; j < 16; ++j)
1248 stored[i*16 + j] = c;
1249 }
1250
1251 setup->blend_color.stored = stored;
1252 setup->fs.current.jit_context.u8_blend_color = stored;
1253 setup->fs.current.jit_context.f_blend_color = fstored;
1254 setup->dirty |= LP_SETUP_NEW_FS;
1255 }
1256
1257 struct llvmpipe_context *llvmpipe = llvmpipe_context(setup->pipe);
1258 if (llvmpipe->dirty & LP_NEW_FS_CONSTANTS)
1259 lp_setup_set_fs_constants(llvmpipe->setup,
1260 ARRAY_SIZE(llvmpipe->constants[PIPE_SHADER_FRAGMENT]),
1261 llvmpipe->constants[PIPE_SHADER_FRAGMENT]);
1262
1263 if (setup->dirty & LP_SETUP_NEW_CONSTANTS) {
1264 for (i = 0; i < ARRAY_SIZE(setup->constants); ++i) {
1265 struct pipe_resource *buffer = setup->constants[i].current.buffer;
1266 const unsigned current_size = MIN2(setup->constants[i].current.buffer_size,
1267 LP_MAX_TGSI_CONST_BUFFER_SIZE);
1268 const ubyte *current_data = NULL;
1269 int num_constants;
1270
1271 STATIC_ASSERT(DATA_BLOCK_SIZE >= LP_MAX_TGSI_CONST_BUFFER_SIZE);
1272
1273 if (buffer) {
1274 /* resource buffer */
1275 current_data = (ubyte *) llvmpipe_resource_data(buffer);
1276 }
1277 else if (setup->constants[i].current.user_buffer) {
1278 /* user-space buffer */
1279 current_data = (ubyte *) setup->constants[i].current.user_buffer;
1280 }
1281
1282 if (current_data && current_size >= sizeof(float)) {
1283 current_data += setup->constants[i].current.buffer_offset;
1284
1285 /* TODO: copy only the actually used constants? */
1286
1287 if (setup->constants[i].stored_size != current_size ||
1288 !setup->constants[i].stored_data ||
1289 memcmp(setup->constants[i].stored_data,
1290 current_data,
1291 current_size) != 0) {
1292 void *stored;
1293
1294 stored = lp_scene_alloc(scene, current_size);
1295 if (!stored) {
1296 assert(!new_scene);
1297 return FALSE;
1298 }
1299
1300 memcpy(stored,
1301 current_data,
1302 current_size);
1303 setup->constants[i].stored_size = current_size;
1304 setup->constants[i].stored_data = stored;
1305 }
1306 setup->fs.current.jit_context.constants[i] =
1307 setup->constants[i].stored_data;
1308 }
1309 else {
1310 setup->constants[i].stored_size = 0;
1311 setup->constants[i].stored_data = NULL;
1312 setup->fs.current.jit_context.constants[i] = fake_const_buf;
1313 }
1314
1315 num_constants =
1316 DIV_ROUND_UP(setup->constants[i].stored_size, lp_get_constant_buffer_stride(scene->pipe->screen));
1317 setup->fs.current.jit_context.num_constants[i] = num_constants;
1318 setup->dirty |= LP_SETUP_NEW_FS;
1319 }
1320 }
1321
1322 if (setup->dirty & LP_SETUP_NEW_SSBOS) {
1323 for (i = 0; i < ARRAY_SIZE(setup->ssbos); ++i) {
1324 struct pipe_resource *buffer = setup->ssbos[i].current.buffer;
1325 const ubyte *current_data = NULL;
1326
1327 if (!buffer)
1328 continue;
1329 /* resource buffer */
1330 current_data = (ubyte *) llvmpipe_resource_data(buffer);
1331 if (current_data) {
1332 current_data += setup->ssbos[i].current.buffer_offset;
1333
1334 setup->fs.current.jit_context.ssbos[i] = (const uint32_t *)current_data;
1335 setup->fs.current.jit_context.num_ssbos[i] = setup->ssbos[i].current.buffer_size;
1336 } else {
1337 setup->fs.current.jit_context.ssbos[i] = NULL;
1338 setup->fs.current.jit_context.num_ssbos[i] = 0;
1339 }
1340 setup->dirty |= LP_SETUP_NEW_FS;
1341 }
1342 }
1343 if (setup->dirty & LP_SETUP_NEW_FS) {
1344 if (!setup->fs.stored ||
1345 memcmp(setup->fs.stored,
1346 &setup->fs.current,
1347 sizeof setup->fs.current) != 0)
1348 {
1349 struct lp_rast_state *stored;
1350
1351 /* The fs state that's been stored in the scene is different from
1352 * the new, current state. So allocate a new lp_rast_state object
1353 * and append it to the bin's setup data buffer.
1354 */
1355 stored = (struct lp_rast_state *) lp_scene_alloc(scene, sizeof *stored);
1356 if (!stored) {
1357 assert(!new_scene);
1358 return FALSE;
1359 }
1360
1361 memcpy(&stored->jit_context,
1362 &setup->fs.current.jit_context,
1363 sizeof setup->fs.current.jit_context);
1364 stored->jit_context.aniso_filter_table = lp_build_sample_aniso_filter_table();
1365 stored->variant = setup->fs.current.variant;
1366
1367 if (!lp_scene_add_frag_shader_reference(scene,
1368 setup->fs.current.variant))
1369 return FALSE;
1370 setup->fs.stored = stored;
1371
1372 /* The scene now references the textures in the rasterization
1373 * state record. Note that now.
1374 */
1375 for (i = 0; i < ARRAY_SIZE(setup->fs.current_tex); i++) {
1376 if (setup->fs.current_tex[i]) {
1377 if (!lp_scene_add_resource_reference(scene,
1378 setup->fs.current_tex[i],
1379 new_scene)) {
1380 assert(!new_scene);
1381 return FALSE;
1382 }
1383 }
1384 }
1385 }
1386 }
1387
1388 if (setup->dirty & LP_SETUP_NEW_SCISSOR) {
1389 unsigned i;
1390
1391 for (i = 0; i < PIPE_MAX_VIEWPORTS; ++i) {
1392 setup->draw_regions[i] = setup->framebuffer;
1393 if (setup->scissor_test) {
1394 u_rect_possible_intersection(&setup->scissors[i],
1395 &setup->draw_regions[i]);
1396 }
1397 }
1398 if (setup->permit_linear_rasterizer) {
1399 /* NOTE: this only takes first vp into account. */
1400 boolean need_vp_scissoring = !!memcmp(&setup->vpwh, &setup->framebuffer,
1401 sizeof(setup->framebuffer));
1402 assert(setup->viewport_index_slot < 0);
1403 if (need_vp_scissoring) {
1404 u_rect_possible_intersection(&setup->vpwh,
1405 &setup->draw_regions[0]);
1406 }
1407 }
1408 else if (setup->point_tri_clip) {
1409 /*
1410 * for d3d-style point clipping, we're going to need
1411 * the fake vp scissor too. Hence do the intersection with vp,
1412 * but don't indicate this. As above this will only work for first vp
1413 * which should be ok because we instruct draw to only skip point
1414 * clipping when there's only one viewport (this works because d3d10
1415 * points are always single pixel).
1416 * (Also note that if we have permit_linear_rasterizer this will
1417 * cause large points to always get vp scissored, regardless the
1418 * point_tri_clip setting.)
1419 */
1420 boolean need_vp_scissoring = !!memcmp(&setup->vpwh, &setup->framebuffer,
1421 sizeof(setup->framebuffer));
1422 if (need_vp_scissoring) {
1423 u_rect_possible_intersection(&setup->vpwh,
1424 &setup->draw_regions[0]);
1425 }
1426 }
1427 }
1428
1429 setup->dirty = 0;
1430
1431 assert(setup->fs.stored);
1432 return TRUE;
1433 }
1434
1435 boolean
lp_setup_update_state(struct lp_setup_context * setup,boolean update_scene)1436 lp_setup_update_state( struct lp_setup_context *setup,
1437 boolean update_scene )
1438 {
1439 /* Some of the 'draw' pipeline stages may have changed some driver state.
1440 * Make sure we've processed those state changes before anything else.
1441 *
1442 * XXX this is the only place where llvmpipe_context is used in the
1443 * setup code. This may get refactored/changed...
1444 */
1445 {
1446 struct llvmpipe_context *lp = llvmpipe_context(setup->pipe);
1447 if (lp->dirty) {
1448 llvmpipe_update_derived(lp);
1449 }
1450
1451 if (lp->setup->dirty) {
1452 llvmpipe_update_setup(lp);
1453 }
1454
1455 assert(setup->setup.variant);
1456
1457 /* Will probably need to move this somewhere else, just need
1458 * to know about vertex shader point size attribute.
1459 */
1460 setup->psize_slot = lp->psize_slot;
1461 setup->viewport_index_slot = lp->viewport_index_slot;
1462 setup->layer_slot = lp->layer_slot;
1463 setup->face_slot = lp->face_slot;
1464
1465 assert(lp->dirty == 0);
1466
1467 assert(lp->setup_variant.key.size ==
1468 setup->setup.variant->key.size);
1469
1470 assert(memcmp(&lp->setup_variant.key,
1471 &setup->setup.variant->key,
1472 setup->setup.variant->key.size) == 0);
1473 }
1474
1475 if (update_scene && setup->state != SETUP_ACTIVE) {
1476 if (!set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ ))
1477 return FALSE;
1478 }
1479
1480 /* Only call into update_scene_state() if we already have a
1481 * scene:
1482 */
1483 if (update_scene && setup->scene) {
1484 assert(setup->state == SETUP_ACTIVE);
1485
1486 if (try_update_scene_state(setup))
1487 return TRUE;
1488
1489 /* Update failed, try to restart the scene.
1490 *
1491 * Cannot call lp_setup_flush_and_restart() directly here
1492 * because of potential recursion.
1493 */
1494 if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__))
1495 return FALSE;
1496
1497 if (!set_scene_state(setup, SETUP_ACTIVE, __FUNCTION__))
1498 return FALSE;
1499
1500 if (!setup->scene)
1501 return FALSE;
1502
1503 return try_update_scene_state(setup);
1504 }
1505
1506 return TRUE;
1507 }
1508
1509
1510
1511 /* Only caller is lp_setup_vbuf_destroy()
1512 */
1513 void
lp_setup_destroy(struct lp_setup_context * setup)1514 lp_setup_destroy( struct lp_setup_context *setup )
1515 {
1516 uint i;
1517
1518 lp_setup_reset( setup );
1519
1520 util_unreference_framebuffer_state(&setup->fb);
1521
1522 for (i = 0; i < ARRAY_SIZE(setup->fs.current_tex); i++) {
1523 struct pipe_resource **res_ptr = &setup->fs.current_tex[i];
1524 if (*res_ptr)
1525 llvmpipe_resource_unmap(*res_ptr, 0, 0);
1526 pipe_resource_reference(res_ptr, NULL);
1527 }
1528
1529 for (i = 0; i < ARRAY_SIZE(setup->constants); i++) {
1530 pipe_resource_reference(&setup->constants[i].current.buffer, NULL);
1531 }
1532
1533 for (i = 0; i < ARRAY_SIZE(setup->ssbos); i++) {
1534 pipe_resource_reference(&setup->ssbos[i].current.buffer, NULL);
1535 }
1536
1537 /* free the scenes in the 'empty' queue */
1538 for (i = 0; i < ARRAY_SIZE(setup->scenes); i++) {
1539 struct lp_scene *scene = setup->scenes[i];
1540
1541 if (scene->fence)
1542 lp_fence_wait(scene->fence);
1543
1544 lp_scene_destroy(scene);
1545 }
1546
1547 lp_fence_reference(&setup->last_fence, NULL);
1548
1549 FREE( setup );
1550 }
1551
1552
1553 /**
1554 * Create a new primitive tiling engine. Plug it into the backend of
1555 * the draw module. Currently also creates a rasterizer to use with
1556 * it.
1557 */
1558 struct lp_setup_context *
lp_setup_create(struct pipe_context * pipe,struct draw_context * draw)1559 lp_setup_create( struct pipe_context *pipe,
1560 struct draw_context *draw )
1561 {
1562 struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
1563 struct lp_setup_context *setup;
1564 unsigned i;
1565
1566 setup = CALLOC_STRUCT(lp_setup_context);
1567 if (!setup) {
1568 goto no_setup;
1569 }
1570
1571 lp_setup_init_vbuf(setup);
1572
1573 /* Used only in update_state():
1574 */
1575 setup->pipe = pipe;
1576
1577
1578 setup->num_threads = screen->num_threads;
1579 setup->vbuf = draw_vbuf_stage(draw, &setup->base);
1580 if (!setup->vbuf) {
1581 goto no_vbuf;
1582 }
1583
1584 draw_set_rasterize_stage(draw, setup->vbuf);
1585 draw_set_render(draw, &setup->base);
1586
1587 /* create some empty scenes */
1588 for (i = 0; i < MAX_SCENES; i++) {
1589 setup->scenes[i] = lp_scene_create( pipe );
1590 if (!setup->scenes[i]) {
1591 goto no_scenes;
1592 }
1593 }
1594
1595 setup->triangle = first_triangle;
1596 setup->line = first_line;
1597 setup->point = first_point;
1598
1599 setup->dirty = ~0;
1600
1601 /* Initialize empty default fb correctly, so the rect is empty */
1602 setup->framebuffer.x1 = -1;
1603 setup->framebuffer.y1 = -1;
1604
1605 return setup;
1606
1607 no_scenes:
1608 for (i = 0; i < MAX_SCENES; i++) {
1609 if (setup->scenes[i]) {
1610 lp_scene_destroy(setup->scenes[i]);
1611 }
1612 }
1613
1614 setup->vbuf->destroy(setup->vbuf);
1615 no_vbuf:
1616 FREE(setup);
1617 no_setup:
1618 return NULL;
1619 }
1620
1621
1622 /**
1623 * Put a BeginQuery command into all bins.
1624 */
1625 void
lp_setup_begin_query(struct lp_setup_context * setup,struct llvmpipe_query * pq)1626 lp_setup_begin_query(struct lp_setup_context *setup,
1627 struct llvmpipe_query *pq)
1628 {
1629 set_scene_state(setup, SETUP_ACTIVE, "begin_query");
1630
1631 if (!(pq->type == PIPE_QUERY_OCCLUSION_COUNTER ||
1632 pq->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
1633 pq->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
1634 pq->type == PIPE_QUERY_PIPELINE_STATISTICS ||
1635 pq->type == PIPE_QUERY_TIME_ELAPSED))
1636 return;
1637
1638 /* init the query to its beginning state */
1639 assert(setup->active_binned_queries < LP_MAX_ACTIVE_BINNED_QUERIES);
1640 /* exceeding list size so just ignore the query */
1641 if (setup->active_binned_queries >= LP_MAX_ACTIVE_BINNED_QUERIES) {
1642 return;
1643 }
1644 assert(setup->active_queries[setup->active_binned_queries] == NULL);
1645 setup->active_queries[setup->active_binned_queries] = pq;
1646 setup->active_binned_queries++;
1647
1648 assert(setup->scene);
1649 if (setup->scene) {
1650 if (!lp_scene_bin_everywhere(setup->scene,
1651 LP_RAST_OP_BEGIN_QUERY,
1652 lp_rast_arg_query(pq))) {
1653
1654 if (!lp_setup_flush_and_restart(setup))
1655 return;
1656
1657 if (!lp_scene_bin_everywhere(setup->scene,
1658 LP_RAST_OP_BEGIN_QUERY,
1659 lp_rast_arg_query(pq))) {
1660 return;
1661 }
1662 }
1663 setup->scene->had_queries |= TRUE;
1664 }
1665 }
1666
1667
1668 /**
1669 * Put an EndQuery command into all bins.
1670 */
1671 void
lp_setup_end_query(struct lp_setup_context * setup,struct llvmpipe_query * pq)1672 lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq)
1673 {
1674 set_scene_state(setup, SETUP_ACTIVE, "end_query");
1675
1676 assert(setup->scene);
1677 if (setup->scene) {
1678 /* pq->fence should be the fence of the *last* scene which
1679 * contributed to the query result.
1680 */
1681 lp_fence_reference(&pq->fence, setup->scene->fence);
1682
1683 if (pq->type == PIPE_QUERY_OCCLUSION_COUNTER ||
1684 pq->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
1685 pq->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
1686 pq->type == PIPE_QUERY_PIPELINE_STATISTICS ||
1687 pq->type == PIPE_QUERY_TIMESTAMP ||
1688 pq->type == PIPE_QUERY_TIME_ELAPSED) {
1689 if (pq->type == PIPE_QUERY_TIMESTAMP &&
1690 !(setup->scene->tiles_x | setup->scene->tiles_y)) {
1691 /*
1692 * If there's a zero width/height framebuffer, there's no bins and
1693 * hence no rast task is ever run. So fill in something here instead.
1694 */
1695 pq->end[0] = os_time_get_nano();
1696 }
1697
1698 if (!lp_scene_bin_everywhere(setup->scene,
1699 LP_RAST_OP_END_QUERY,
1700 lp_rast_arg_query(pq))) {
1701 if (!lp_setup_flush_and_restart(setup))
1702 goto fail;
1703
1704 if (!lp_scene_bin_everywhere(setup->scene,
1705 LP_RAST_OP_END_QUERY,
1706 lp_rast_arg_query(pq))) {
1707 goto fail;
1708 }
1709 }
1710 setup->scene->had_queries |= TRUE;
1711 }
1712 }
1713 else {
1714 lp_fence_reference(&pq->fence, setup->last_fence);
1715 }
1716
1717 fail:
1718 /* Need to do this now not earlier since it still needs to be marked as
1719 * active when binning it would cause a flush.
1720 */
1721 if (pq->type == PIPE_QUERY_OCCLUSION_COUNTER ||
1722 pq->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
1723 pq->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
1724 pq->type == PIPE_QUERY_PIPELINE_STATISTICS ||
1725 pq->type == PIPE_QUERY_TIME_ELAPSED) {
1726 unsigned i;
1727
1728 /* remove from active binned query list */
1729 for (i = 0; i < setup->active_binned_queries; i++) {
1730 if (setup->active_queries[i] == pq)
1731 break;
1732 }
1733 assert(i < setup->active_binned_queries);
1734 if (i == setup->active_binned_queries)
1735 return;
1736 setup->active_binned_queries--;
1737 setup->active_queries[i] = setup->active_queries[setup->active_binned_queries];
1738 setup->active_queries[setup->active_binned_queries] = NULL;
1739 }
1740 }
1741
1742
1743 boolean
lp_setup_flush_and_restart(struct lp_setup_context * setup)1744 lp_setup_flush_and_restart(struct lp_setup_context *setup)
1745 {
1746 if (0) debug_printf("%s\n", __FUNCTION__);
1747
1748 assert(setup->state == SETUP_ACTIVE);
1749
1750 if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__))
1751 return FALSE;
1752
1753 if (!lp_setup_update_state(setup, TRUE))
1754 return FALSE;
1755
1756 return TRUE;
1757 }
1758
1759 void
lp_setup_add_scissor_planes(const struct u_rect * scissor,struct lp_rast_plane * plane_s,boolean s_planes[4],bool multisample)1760 lp_setup_add_scissor_planes(const struct u_rect *scissor,
1761 struct lp_rast_plane *plane_s,
1762 boolean s_planes[4], bool multisample)
1763 {
1764 /*
1765 * When rasterizing scissored tris, use the intersection of the
1766 * triangle bounding box and the scissor rect to generate the
1767 * scissor planes.
1768 *
1769 * This permits us to cut off the triangle "tails" that are present
1770 * in the intermediate recursive levels caused when two of the
1771 * triangles edges don't diverge quickly enough to trivially reject
1772 * exterior blocks from the triangle.
1773 *
1774 * It's not really clear if it's worth worrying about these tails,
1775 * but since we generate the planes for each scissored tri, it's
1776 * free to trim them in this case.
1777 *
1778 * Note that otherwise, the scissor planes only vary in 'C' value,
1779 * and even then only on state-changes. Could alternatively store
1780 * these planes elsewhere.
1781 * (Or only store the c value together with a bit indicating which
1782 * scissor edge this is, so rasterization would treat them differently
1783 * (easier to evaluate) to ordinary planes.)
1784 */
1785 int adj = multisample ? 127 : 0;
1786 if (s_planes[0]) {
1787 int x0 = scissor->x0 - 1;
1788 plane_s->dcdx = ~0U << 8;
1789 plane_s->dcdy = 0;
1790 plane_s->c = x0 << 8;
1791 plane_s->c += adj;
1792 plane_s->c = -plane_s->c; /* flip sign */
1793 plane_s->eo = 1 << 8;
1794 plane_s++;
1795 }
1796 if (s_planes[1]) {
1797 int x1 = scissor->x1;
1798 plane_s->dcdx = 1 << 8;
1799 plane_s->dcdy = 0;
1800 plane_s->c = x1 << 8;
1801 plane_s->c += 127 + adj;
1802 plane_s->eo = 0 << 8;
1803 plane_s++;
1804 }
1805 if (s_planes[2]) {
1806 int y0 = scissor->y0 - 1;
1807 plane_s->dcdx = 0;
1808 plane_s->dcdy = 1 << 8;
1809 plane_s->c = y0 << 8;
1810 plane_s->c += adj;
1811 plane_s->c = -plane_s->c; /* flip sign */
1812 plane_s->eo = 1 << 8;
1813 plane_s++;
1814 }
1815 if (s_planes[3]) {
1816 int y1 = scissor->y1;
1817 plane_s->dcdx = 0;
1818 plane_s->dcdy = ~0U << 8;
1819 plane_s->c = y1 << 8;
1820 plane_s->c += 127 + adj;
1821 plane_s->eo = 0;
1822 plane_s++;
1823 }
1824 }
1825