• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2010-2021 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * Setup/binning code for screen-aligned quads.
30  */
31 
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "lp_perf.h"
35 #include "lp_setup_context.h"
36 #include "lp_rast.h"
37 #include "lp_state_fs.h"
38 #include "lp_state_setup.h"
39 
40 
41 #define NUM_CHANNELS 4
42 
43 #define UNDETERMINED_BLIT  -1
44 
45 
46 static inline int
subpixel_snap(float a)47 subpixel_snap(float a)
48 {
49    return util_iround(FIXED_ONE * a);
50 }
51 
52 
53 static inline float
fixed_to_float(int a)54 fixed_to_float(int a)
55 {
56    return a * (1.0f / FIXED_ONE);
57 }
58 
59 
60 /**
61  * Alloc space for a new rectangle plus the input.a0/dadx/dady arrays
62  * immediately after it.
63  * The memory is allocated from the per-scene pool, not per-tile.
64  * \param size  returns number of bytes allocated
65  * \param nr_inputs  number of fragment shader inputs
66  * \return pointer to rectangle space
67  */
68 struct lp_rast_rectangle *
lp_setup_alloc_rectangle(struct lp_scene * scene,unsigned nr_inputs)69 lp_setup_alloc_rectangle(struct lp_scene *scene, unsigned nr_inputs)
70 {
71    unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
72    struct lp_rast_rectangle *rect;
73    unsigned bytes = sizeof(*rect) + (3 * input_array_sz);
74    rect = lp_scene_alloc_aligned(scene, bytes, 16);
75    if (rect == NULL)
76       return NULL;
77 
78    rect->inputs.stride = input_array_sz;
79 
80    return rect;
81 }
82 
83 
84 /**
85  * The rectangle covers the whole tile- shade whole tile.
86  * XXX no rectangle/triangle dependencies in this file - share it with
87  * the same code in lp_setup_tri.c
88  * \param tx, ty  the tile position in tiles, not pixels
89  */
90 boolean
lp_setup_whole_tile(struct lp_setup_context * setup,const struct lp_rast_shader_inputs * inputs,int tx,int ty,boolean opaque)91 lp_setup_whole_tile(struct lp_setup_context *setup,
92                     const struct lp_rast_shader_inputs *inputs,
93                     int tx, int ty, boolean opaque)
94 {
95    struct lp_scene *scene = setup->scene;
96 
97    LP_COUNT(nr_fully_covered_64);
98 
99    /* if variant is opaque and scissor doesn't effect the tile */
100    if (opaque) {
101       /* Several things prevent this optimization from working:
102        * - For layered rendering we can't determine if this covers the same
103        * layer as previous rendering (or in case of clears those actually
104        * always cover all layers so optimization is impossible). Need to use
105        * fb_max_layer and not setup->layer_slot to determine this since even
106        * if there's currently no slot assigned previous rendering could have
107        * used one.
108        * - If there were any Begin/End query commands in the scene then those
109        * would get removed which would be very wrong. Furthermore, if queries
110        * were just active we also can't do the optimization since to get
111        * accurate query results we unfortunately need to execute the rendering
112        * commands.
113        */
114       if (!scene->fb.zsbuf && scene->fb_max_layer == 0 &&
115           !scene->had_queries) {
116          /*
117           * All previous rendering will be overwritten so reset the bin.
118           */
119          lp_scene_bin_reset(scene, tx, ty);
120       }
121 
122       if (inputs->is_blit) {
123          LP_COUNT(nr_blit_64);
124          return lp_scene_bin_cmd_with_state(scene, tx, ty,
125                                             setup->fs.stored,
126                                             LP_RAST_OP_BLIT,
127                                             lp_rast_arg_inputs(inputs));
128       } else {
129          LP_COUNT(nr_shade_opaque_64);
130          return lp_scene_bin_cmd_with_state(scene, tx, ty,
131                                             setup->fs.stored,
132                                             LP_RAST_OP_SHADE_TILE_OPAQUE,
133                                             lp_rast_arg_inputs(inputs));
134       }
135    } else {
136       LP_COUNT(nr_shade_64);
137       return lp_scene_bin_cmd_with_state(scene, tx, ty,
138                                          setup->fs.stored,
139                                          LP_RAST_OP_SHADE_TILE,
140                                          lp_rast_arg_inputs(inputs));
141    }
142 }
143 
144 
145 boolean
lp_setup_is_blit(const struct lp_setup_context * setup,const struct lp_rast_shader_inputs * inputs)146 lp_setup_is_blit(const struct lp_setup_context *setup,
147                  const struct lp_rast_shader_inputs *inputs)
148 {
149    const struct lp_fragment_shader_variant *variant =
150       setup->fs.current.variant;
151 
152    if (variant->blit) {
153       /*
154        * Detect blits.
155        */
156       const struct lp_jit_texture *texture =
157          &setup->fs.current.jit_context.textures[0];
158 
159       /* XXX: dadx vs dady confusion below?
160        */
161       const float dsdx = GET_DADX(inputs)[1][0] * texture->width;
162       const float dsdy = GET_DADX(inputs)[1][1] * texture->width;
163       const float dtdx = GET_DADY(inputs)[1][0] * texture->height;
164       const float dtdy = GET_DADY(inputs)[1][1] * texture->height;
165 
166       /*
167        * We don't need to check s0/t0 tolerances
168        * as we establish as pre-condition that there is no
169        * texture filtering.
170        */
171 
172       ASSERTED struct lp_sampler_static_state *samp0 = lp_fs_variant_key_sampler_idx(&variant->key, 0);
173       assert(samp0);
174       assert(samp0->sampler_state.min_img_filter == PIPE_TEX_FILTER_NEAREST);
175       assert(samp0->sampler_state.mag_img_filter == PIPE_TEX_FILTER_NEAREST);
176 
177       /*
178        * Check for 1:1 match of texels to dest pixels
179        */
180 
181       if (util_is_approx(dsdx, 1.0f, 1.0f/LP_MAX_WIDTH) &&
182           util_is_approx(dsdy, 0.0f, 1.0f/LP_MAX_HEIGHT) &&
183           util_is_approx(dtdx, 0.0f, 1.0f/LP_MAX_WIDTH) &&
184           util_is_approx(dtdy, 1.0f, 1.0f/LP_MAX_HEIGHT)) {
185          return true;
186       } else {
187 #if 0
188          debug_printf("dsdx = %f\n", dsdx);
189          debug_printf("dsdy = %f\n", dsdy);
190          debug_printf("dtdx = %f\n", dtdx);
191          debug_printf("dtdy = %f\n", dtdy);
192          debug_printf("\n");
193 #endif
194          return FALSE;
195       }
196    }
197 
198    return FALSE;
199 }
200 
201 
202 static inline void
partial(struct lp_setup_context * setup,const struct lp_rast_rectangle * rect,boolean opaque,unsigned ix,unsigned iy,unsigned mask)203 partial(struct lp_setup_context *setup,
204         const struct lp_rast_rectangle *rect,
205         boolean opaque,
206         unsigned ix, unsigned iy,
207         unsigned mask) // RECT_PLANE_x bits
208 {
209    if (mask == 0) {
210       assert(rect->box.x0 <= ix * TILE_SIZE);
211       assert(rect->box.y0 <= iy * TILE_SIZE);
212       assert(rect->box.x1 >= (ix+1) * TILE_SIZE - 1);
213       assert(rect->box.y1 >= (iy+1) * TILE_SIZE - 1);
214 
215       lp_setup_whole_tile(setup, &rect->inputs, ix, iy, opaque);
216    } else {
217       LP_COUNT(nr_partially_covered_64);
218       lp_scene_bin_cmd_with_state(setup->scene,
219                                   ix, iy,
220                                   setup->fs.stored,
221                                   LP_RAST_OP_RECTANGLE,
222                                   lp_rast_arg_rectangle(rect));
223    }
224 }
225 
226 
227 /**
228  * Setup/bin a screen-aligned rect.
229  * We need three corner vertices in order to correctly setup
230  * interpolated parameters.  We *could* get away with just the
231  * diagonal vertices but it'd cause ugliness elsewhere.
232  *
233  *   + -------v0
234  *   |        |
235  *  v2 ------ v1
236  *
237  * By an unfortunate mixup between GL and D3D coordinate spaces, half
238  * of this file talks about clockwise rectangles (which were CCW in GL
239  * coordinate space), while the other half prefers to work with D3D
240  * CCW rectangles.
241  */
242 static boolean
try_rect_cw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],boolean frontfacing)243 try_rect_cw(struct lp_setup_context *setup,
244             const float (*v0)[4],
245             const float (*v1)[4],
246             const float (*v2)[4],
247             boolean frontfacing)
248 {
249    const struct lp_fragment_shader_variant *variant =
250       setup->fs.current.variant;
251    const struct lp_setup_variant_key *key = &setup->setup.variant->key;
252    struct lp_scene *scene = setup->scene;
253 
254    /* x/y positions in fixed point */
255    int x0 = subpixel_snap(v0[0][0] - setup->pixel_offset);
256    int x1 = subpixel_snap(v1[0][0] - setup->pixel_offset);
257    int x2 = subpixel_snap(v2[0][0] - setup->pixel_offset);
258    int y0 = subpixel_snap(v0[0][1] - setup->pixel_offset);
259    int y1 = subpixel_snap(v1[0][1] - setup->pixel_offset);
260    int y2 = subpixel_snap(v2[0][1] - setup->pixel_offset);
261 
262    LP_COUNT(nr_rects);
263 
264    /* Cull clockwise rects without overflowing.
265     */
266    const boolean cw = (x2 < x1) ^ (y0 < y2);
267    if (cw) {
268       LP_COUNT(nr_culled_rects);
269       return TRUE;
270    }
271 
272    const float (*pv)[4];
273    if (setup->flatshade_first) {
274       pv = v0;
275    } else {
276       pv = v2;
277    }
278 
279    unsigned viewport_index = 0;
280    if (setup->viewport_index_slot > 0) {
281       unsigned *udata = (unsigned*)pv[setup->viewport_index_slot];
282       viewport_index = lp_clamp_viewport_idx(*udata);
283    }
284 
285    unsigned layer = 0;
286    if (setup->layer_slot > 0) {
287       layer = *(unsigned*)pv[setup->layer_slot];
288       layer = MIN2(layer, scene->fb_max_layer);
289    }
290 
291    /* Bounding rectangle (in pixels) */
292    struct u_rect bbox;
293    {
294       /* Yes this is necessary to accurately calculate bounding boxes
295        * with the two fill-conventions we support.  GL (normally) ends
296        * up needing a bottom-left fill convention, which requires
297        * slightly different rounding.
298        */
299       int adj = (setup->bottom_edge_rule != 0) ? 1 : 0;
300 
301       bbox.x0 = (MIN3(x0, x1, x2) + (FIXED_ONE-1)) >> FIXED_ORDER;
302       bbox.x1 = (MAX3(x0, x1, x2) + (FIXED_ONE-1)) >> FIXED_ORDER;
303       bbox.y0 = (MIN3(y0, y1, y2) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
304       bbox.y1 = (MAX3(y0, y1, y2) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
305 
306       /* Inclusive coordinates:
307        */
308       bbox.x1--;
309       bbox.y1--;
310    }
311 
312    if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) {
313       if (0) debug_printf("no intersection\n");
314       LP_COUNT(nr_culled_rects);
315       return TRUE;
316    }
317 
318    u_rect_find_intersection(&setup->draw_regions[viewport_index], &bbox);
319 
320    struct lp_rast_rectangle *rect =
321       lp_setup_alloc_rectangle(scene, key->num_inputs);
322    if (!rect)
323       return FALSE;
324 
325 #ifdef DEBUG
326    rect->v[0][0] = v0[0][0];
327    rect->v[0][1] = v0[0][1];
328    rect->v[1][0] = v1[0][0];
329    rect->v[1][1] = v1[0][1];
330 #endif
331 
332    rect->box.x0 = bbox.x0;
333    rect->box.x1 = bbox.x1;
334    rect->box.y0 = bbox.y0;
335    rect->box.y1 = bbox.y1;
336 
337    /* Setup parameter interpolants:
338     */
339    setup->setup.variant->jit_function(v0,
340                                       v1,
341                                       v2,
342                                       frontfacing,
343                                       GET_A0(&rect->inputs),
344                                       GET_DADX(&rect->inputs),
345                                       GET_DADY(&rect->inputs),
346                                       &setup->setup.variant->key);
347 
348    rect->inputs.frontfacing = frontfacing;
349    rect->inputs.disable = FALSE;
350    rect->inputs.is_blit = lp_setup_is_blit(setup, &rect->inputs);
351    rect->inputs.layer = layer;
352    rect->inputs.viewport_index = viewport_index;
353    rect->inputs.view_index = setup->view_index;
354 
355    return lp_setup_bin_rectangle(setup, rect, variant->opaque);
356 }
357 
358 
359 boolean
lp_setup_bin_rectangle(struct lp_setup_context * setup,struct lp_rast_rectangle * rect,boolean opaque)360 lp_setup_bin_rectangle(struct lp_setup_context *setup,
361                        struct lp_rast_rectangle *rect,
362                        boolean opaque)
363 {
364    struct lp_scene *scene = setup->scene;
365    unsigned left_mask = 0;
366    unsigned right_mask = 0;
367    unsigned top_mask = 0;
368    unsigned bottom_mask = 0;
369 
370    /*
371     * All fields of 'rect' are now set.  The remaining code here is
372     * concerned with binning.
373     */
374 
375    /* Convert to inclusive tile coordinates:
376     */
377    const unsigned ix0 = rect->box.x0 / TILE_SIZE;
378    const unsigned iy0 = rect->box.y0 / TILE_SIZE;
379    const unsigned ix1 = rect->box.x1 / TILE_SIZE;
380    const unsigned iy1 = rect->box.y1 / TILE_SIZE;
381 
382    /*
383     * Clamp to framebuffer size
384     */
385    assert(ix0 == MAX2(ix0, 0));
386    assert(iy0 == MAX2(iy0, 0));
387    assert(ix1 == MIN2(ix1, scene->tiles_x - 1));
388    assert(iy1 == MIN2(iy1, scene->tiles_y - 1));
389 
390    if (ix0 * TILE_SIZE != rect->box.x0)
391       left_mask = RECT_PLANE_LEFT;
392 
393    if (ix1 * TILE_SIZE + TILE_SIZE - 1 != rect->box.x1)
394       right_mask  = RECT_PLANE_RIGHT;
395 
396    if (iy0 * TILE_SIZE != rect->box.y0)
397       top_mask    = RECT_PLANE_TOP;
398 
399    if (iy1 * TILE_SIZE + TILE_SIZE - 1 != rect->box.y1)
400       bottom_mask = RECT_PLANE_BOTTOM;
401 
402    /* Determine which tile(s) intersect the rectangle's bounding box
403     */
404    if (iy0 == iy1 && ix0 == ix1) {
405       partial(setup, rect, opaque, ix0, iy0,
406               (left_mask | right_mask | top_mask | bottom_mask));
407    } else if (ix0 == ix1) {
408       unsigned mask = left_mask | right_mask;
409       partial(setup, rect, opaque, ix0, iy0, mask | top_mask);
410       for (unsigned i = iy0 + 1; i < iy1; i++)
411          partial(setup, rect, opaque, ix0, i, mask);
412       partial(setup, rect, opaque, ix0, iy1, mask | bottom_mask);
413    } else if (iy0 == iy1) {
414       unsigned mask = top_mask | bottom_mask;
415       partial(setup, rect, opaque, ix0, iy0, mask | left_mask);
416       for (unsigned i = ix0 + 1; i < ix1; i++)
417          partial(setup, rect, opaque, i, iy0, mask);
418       partial(setup, rect, opaque, ix1, iy0, mask | right_mask);
419    } else {
420       partial(setup, rect, opaque, ix0, iy0, left_mask  | top_mask);
421       partial(setup, rect, opaque, ix0, iy1, left_mask  | bottom_mask);
422       partial(setup, rect, opaque, ix1, iy0, right_mask | top_mask);
423       partial(setup, rect, opaque, ix1, iy1, right_mask | bottom_mask);
424 
425       /* Top/Bottom fringes
426        */
427       for (unsigned i = ix0 + 1; i < ix1; i++) {
428          partial(setup, rect, opaque, i, iy0, top_mask);
429          partial(setup, rect, opaque, i, iy1, bottom_mask);
430       }
431 
432       /* Left/Right fringes
433        */
434       for (unsigned i = iy0 + 1; i < iy1; i++) {
435          partial(setup, rect, opaque, ix0, i, left_mask);
436          partial(setup, rect, opaque, ix1, i, right_mask);
437       }
438 
439       /* Full interior tiles
440        */
441       for (unsigned j = iy0 + 1; j < iy1; j++) {
442          for (unsigned i = ix0 + 1; i < ix1; i++) {
443             lp_setup_whole_tile(setup, &rect->inputs, i, j, opaque);
444          }
445       }
446    }
447 
448    /* Catch any out-of-memory which occurred during binning.  Do this
449     * once here rather than checking all the return values throughout.
450     */
451    if (lp_scene_is_oom(scene)) {
452       /* Disable rasterization of this partially-binned rectangle.
453        * We'll flush this scene and re-bin the entire rectangle:
454        */
455       rect->inputs.disable = TRUE;
456       return FALSE;
457    }
458 
459    return TRUE;
460 }
461 
462 
463 void
lp_rect_cw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],boolean frontfacing)464 lp_rect_cw(struct lp_setup_context *setup,
465            const float (*v0)[4],
466            const float (*v1)[4],
467            const float (*v2)[4],
468            boolean frontfacing)
469 {
470    if (!try_rect_cw(setup, v0, v1, v2, frontfacing)) {
471       if (!lp_setup_flush_and_restart(setup))
472          return;
473 
474       if (!try_rect_cw(setup, v0, v1, v2, frontfacing))
475          return;
476    }
477 }
478 
479 
480 /**
481  * Take the six vertices for two triangles and try to determine if they
482  * form a screen-aligned quad/rectangle.  If so, draw the rect directly,
483  * else, draw as two regular triangles.
484  */
485 static boolean
do_rect_ccw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4],boolean front)486 do_rect_ccw(struct lp_setup_context *setup,
487             const float (*v0)[4],
488             const float (*v1)[4],
489             const float (*v2)[4],
490             const float (*v3)[4],
491             const float (*v4)[4],
492             const float (*v5)[4],
493             boolean front)
494 {
495    const float (*rv0)[4], (*rv1)[4], (*rv2)[4], (*rv3)[4];  /* rect verts */
496 
497 #define SAME_POS(A, B)   (A[0][0] == B[0][0] && \
498                           A[0][1] == B[0][1] && \
499                           A[0][2] == B[0][2] && \
500                           A[0][3] == B[0][3])
501 
502    /* Only need to consider CCW orientations.  There are nine ways
503     * that two counter-clockwise triangles can join up:
504     */
505    if (SAME_POS(v0, v3)) {
506       if (SAME_POS(v2, v4)) {
507          /*
508           *    v5   v4/v2
509           *     +-----+
510           *     |   / |
511           *     |  /  |
512           *     | /   |
513           *     +-----+
514           *   v3/v0   v1
515           */
516          rv0 = v5;
517          rv1 = v0;
518          rv2 = v1;
519          rv3 = v2;
520       } else if (SAME_POS(v1, v5)) {
521          /*
522           *    v4   v3/v0
523           *     +-----+
524           *     |   / |
525           *     |  /  |
526           *     | /   |
527           *     +-----+
528           *   v5/v1   v2
529           */
530          rv0 = v4;
531          rv1 = v1;
532          rv2 = v2;
533          rv3 = v0;
534       } else {
535          goto emit_triangles;
536       }
537    } else if (SAME_POS(v0, v5)) {
538       if (SAME_POS(v2, v3)) {
539          /*
540           *    v4   v3/v2
541           *     +-----+
542           *     |   / |
543           *     |  /  |
544           *     | /   |
545           *     +-----+
546           *   v5/v0   v1
547           */
548          rv0 = v4;
549          rv1 = v0;
550          rv2 = v1;
551          rv3 = v2;
552       } else if (SAME_POS(v1, v4)) {
553          /*
554           *    v3   v5/v0
555           *     +-----+
556           *     |   / |
557           *     |  /  |
558           *     | /   |
559           *     +-----+
560           *   v4/v1   v2
561           */
562          rv0 = v3;
563          rv1 = v1;
564          rv2 = v2;
565          rv3 = v0;
566       } else {
567          goto emit_triangles;
568       }
569    } else if (SAME_POS(v0, v4)) {
570       if (SAME_POS(v2, v5)) {
571          /*
572           *    v3   v5/v2
573           *     +-----+
574           *     |   / |
575           *     |  /  |
576           *     | /   |
577           *     +-----+
578           *   v4/v0   v1
579           */
580          rv0 = v3;
581          rv1 = v0;
582          rv2 = v1;
583          rv3 = v2;
584       } else if (SAME_POS(v1, v3)) {
585          /*
586           *    v5   v4/v0
587           *     +-----+
588           *     |   / |
589           *     |  /  |
590           *     | /   |
591           *     +-----+
592           *   v3/v1   v2
593           */
594          rv0 = v5;
595          rv1 = v1;
596          rv2 = v2;
597          rv3 = v0;
598       } else {
599          goto emit_triangles;
600       }
601    } else if (SAME_POS(v2, v3)) {
602       if (SAME_POS(v1, v4)) {
603          /*
604           *    v5   v4/v1
605           *     +-----+
606           *     |   / |
607           *     |  /  |
608           *     | /   |
609           *     +-----+
610           *   v3/v2   v0
611           */
612          rv0 = v5;
613          rv1 = v2;
614          rv2 = v0;
615          rv3 = v1;
616       } else {
617          goto emit_triangles;
618       }
619    } else if (SAME_POS(v2, v5)) {
620       if (SAME_POS(v1, v3)) {
621          /*
622           *    v4   v3/v1
623           *     +-----+
624           *     |   / |
625           *     |  /  |
626           *     | /   |
627           *     +-----+
628           *   v5/v2   v0
629           */
630          rv0 = v4;
631          rv1 = v2;
632          rv2 = v0;
633          rv3 = v1;
634       } else {
635          goto emit_triangles;
636       }
637    } else if (SAME_POS(v2, v4)) {
638       if (SAME_POS(v1, v5)) {
639          /*
640           *    v3   v5/v1
641           *     +-----+
642           *     |   / |
643           *     |  /  |
644           *     | /   |
645           *     +-----+
646           *   v4/v2   v0
647           */
648          rv0 = v3;
649          rv1 = v2;
650          rv2 = v0;
651          rv3 = v1;
652       } else {
653          goto emit_triangles;
654       }
655    } else {
656       goto emit_triangles;
657    }
658 
659 #define SAME_X(A, B)   (A[0][0] == B[0][0])
660 #define SAME_Y(A, B)   (A[0][1] == B[0][1])
661 
662    /* The vertices are now counter clockwise, as such:
663     *
664     *  rv0 -------rv3
665     *    |        |
666     *  rv1 ------ rv2
667     *
668     * To render as a rectangle,
669     *   * The X values should be the same at v0, v1 and v2, v3.
670     *   * The Y values should be the same at v0, v3 and v1, v2.
671     */
672    if (SAME_Y(rv0, rv1)) {
673       const float (*tmp)[4];
674       tmp = rv0;
675       rv0 = rv1;
676       rv1 = rv2;
677       rv2 = rv3;
678       rv3 = tmp;
679    }
680 
681    if (SAME_X(rv0, rv1) && SAME_X(rv2, rv3) &&
682        SAME_Y(rv0, rv3) && SAME_Y(rv1, rv2)) {
683       const struct lp_setup_variant_key *key = &setup->setup.variant->key;
684       const unsigned n = key->num_inputs;
685       unsigned i, j;
686 
687       /* We have a rectangle.  Check that the other attributes are
688        * coplanar.
689        */
690       for (i = 0; i < n; i++) {
691          for (j = 0; j < 4; j++) {
692             if (key->inputs[i].usage_mask & (1<<j)) {
693                unsigned k = key->inputs[i].src_index;
694                float dxdx1, dxdx2, dxdy1, dxdy2;
695                dxdx1 = rv0[k][j] - rv3[k][j];
696                dxdx2 = rv1[k][j] - rv2[k][j];
697                dxdy1 = rv0[k][j] - rv1[k][j];
698                dxdy2 = rv3[k][j] - rv2[k][j];
699                if (dxdx1 != dxdx2 ||
700                    dxdy1 != dxdy2) {
701                   goto emit_triangles;
702                }
703             }
704          }
705       }
706 
707       /* Note we're changing to clockwise here.  Fix this by reworking
708        * lp_rect_cw to expect/operate on ccw rects.  Note that
709        * function was previously misnamed.
710        */
711       lp_rect_cw(setup, rv0, rv2, rv1, front);
712       return TRUE;
713    } else {
714       /* setup->quad(setup, rv0, rv1, rv2, rv3); */
715    }
716 
717 emit_triangles:
718    return FALSE;
719 }
720 
721 
722 enum winding {
723    WINDING_NONE = 0,
724    WINDING_CCW,
725    WINDING_CW
726 };
727 
728 
729 static inline enum winding
winding(const float (* v0)[4],const float (* v1)[4],const float (* v2)[4])730 winding(const float (*v0)[4],
731         const float (*v1)[4],
732         const float (*v2)[4])
733 {
734    /* edge vectors e = v0 - v2, f = v1 - v2 */
735    const float ex = v0[0][0] - v2[0][0];
736    const float ey = v0[0][1] - v2[0][1];
737    const float fx = v1[0][0] - v2[0][0];
738    const float fy = v1[0][1] - v2[0][1];
739 
740    /* det = cross(e,f).z */
741    const float det = ex * fy - ey * fx;
742 
743    if (det < 0.0f)
744       return WINDING_CCW;
745    else if (det > 0.0f)
746       return WINDING_CW;
747    else
748       return WINDING_NONE;
749 }
750 
751 
752 static boolean
setup_rect_cw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4])753 setup_rect_cw(struct lp_setup_context *setup,
754               const float (*v0)[4],
755               const float (*v1)[4],
756               const float (*v2)[4],
757               const float (*v3)[4],
758               const float (*v4)[4],
759               const float (*v5)[4])
760 {
761    enum winding winding0 = winding(v0, v1, v2);
762    enum winding winding1 = winding(v3, v4, v5);
763 
764    if (winding0 == WINDING_CW &&
765        winding1 == WINDING_CW) {
766       return do_rect_ccw(setup, v0, v2, v1, v3, v5, v4, !setup->ccw_is_frontface);
767    } else if (winding0 == WINDING_CW) {
768       setup->triangle(setup, v0, v1, v2);
769       return TRUE;
770    } else if (winding1 == WINDING_CW) {
771       setup->triangle(setup, v3, v4, v5);
772       return TRUE;
773    } else {
774       return TRUE;
775    }
776 }
777 
778 
779 static boolean
setup_rect_ccw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4])780 setup_rect_ccw(struct lp_setup_context *setup,
781                const float (*v0)[4],
782                const float (*v1)[4],
783                const float (*v2)[4],
784                const float (*v3)[4],
785                const float (*v4)[4],
786                const float (*v5)[4])
787 {
788    enum winding winding0 = winding(v0, v1, v2);
789    enum winding winding1 = winding(v3, v4, v5);
790 
791    if (winding0 == WINDING_CCW &&
792        winding1 == WINDING_CCW) {
793       return do_rect_ccw(setup, v0, v1, v2, v3, v4, v5, setup->ccw_is_frontface);
794    } else if (winding0 == WINDING_CCW) {
795       setup->triangle(setup, v0, v1, v2);
796       return TRUE;
797    } else if (winding1 == WINDING_CCW) {
798       return FALSE;
799       setup->triangle(setup, v3, v4, v5);
800       return TRUE;
801    } else {
802       return TRUE;
803    }
804 }
805 
806 
807 static boolean
setup_rect_noop(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4])808 setup_rect_noop(struct lp_setup_context *setup,
809                 const float (*v0)[4],
810                 const float (*v1)[4],
811                 const float (*v2)[4],
812                 const float (*v3)[4],
813                 const float (*v4)[4],
814                 const float (*v5)[4])
815 {
816    return TRUE;
817 }
818 
819 
820 static boolean
setup_rect_both(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4])821 setup_rect_both(struct lp_setup_context *setup,
822                 const float (*v0)[4],
823                 const float (*v1)[4],
824                 const float (*v2)[4],
825                 const float (*v3)[4],
826                 const float (*v4)[4],
827                 const float (*v5)[4])
828 {
829    enum winding winding0 = winding(v0, v1, v2);
830    enum winding winding1 = winding(v3, v4, v5);
831 
832    if (winding0 != winding1) {
833       /* If we knew that the "front" parameter wasn't going to be
834        * referenced, could rearrange one of the two triangles such
835        * that they were both CCW.  Aero actually does send mixed
836        * CW/CCW rectangles under some circumstances, but we catch them
837        * explicitly.
838        */
839       return FALSE;
840    } else if (winding0 == WINDING_CCW) {
841       return do_rect_ccw(setup, v0, v1, v2, v3, v4, v5, setup->ccw_is_frontface);
842    } else if (winding0 == WINDING_CW) {
843       return do_rect_ccw(setup, v0, v2, v1, v3, v5, v4, !setup->ccw_is_frontface);
844    } else {
845       return TRUE;
846    }
847 }
848 
849 
850 void
lp_setup_choose_rect(struct lp_setup_context * setup)851 lp_setup_choose_rect(struct lp_setup_context *setup)
852 {
853    if (setup->rasterizer_discard) {
854       setup->rect = setup_rect_noop;
855       return;
856    }
857 
858    switch (setup->cullmode) {
859    case PIPE_FACE_NONE:
860       setup->rect = setup_rect_both;
861       break;
862    case PIPE_FACE_BACK:
863       setup->rect = setup->ccw_is_frontface ? setup_rect_ccw : setup_rect_cw;
864       break;
865    case PIPE_FACE_FRONT:
866       setup->rect = setup->ccw_is_frontface ? setup_rect_cw : setup_rect_ccw;
867       break;
868    default:
869       setup->rect = setup_rect_noop;
870       break;
871    }
872 }
873