• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2007-2021 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /*
29  * Rasterization for binned rectangles within a tile
30  */
31 
32 #include <limits.h>
33 #include "util/u_math.h"
34 #include "lp_debug.h"
35 #include "lp_perf.h"
36 #include "lp_rast_priv.h"
37 
38 
39 /* Our 16-pixel stamps are layed out as:
40  *
41  *    0  1  2  3
42  *    4  5  6  7
43  *    8  9  10 11
44  *    12 13 14 15
45  *
46  * Define bitmasks for each row and column in this layout:
47  */
48 #define COLUMN0 ((1<<0)|(1<<4)|(1<<8) |(1<<12))
49 #define COLUMN1 ((1<<1)|(1<<5)|(1<<9) |(1<<13))
50 #define COLUMN2 ((1<<2)|(1<<6)|(1<<10)|(1<<14))
51 #define COLUMN3 ((1<<3)|(1<<7)|(1<<11)|(1<<15))
52 
53 #define ROW0 ((1<<0) |(1<<1) |(1<<2) |(1<<3))
54 #define ROW1 ((1<<4) |(1<<5) |(1<<6) |(1<<7))
55 #define ROW2 ((1<<8) |(1<<9) |(1<<10)|(1<<11))
56 #define ROW3 ((1<<12)|(1<<13)|(1<<14)|(1<<15))
57 
58 #define STAMP_SIZE 4
59 
60 static unsigned left_mask_tab[STAMP_SIZE] = {
61    COLUMN0 | COLUMN1 | COLUMN2 | COLUMN3,
62    COLUMN1 | COLUMN2 | COLUMN3,
63    COLUMN2 | COLUMN3,
64    COLUMN3,
65 };
66 
67 static unsigned right_mask_tab[STAMP_SIZE] = {
68    COLUMN0,
69    COLUMN0 | COLUMN1,
70    COLUMN0 | COLUMN1 | COLUMN2,
71    COLUMN0 | COLUMN1 | COLUMN2 | COLUMN3,
72 };
73 
74 static unsigned top_mask_tab[STAMP_SIZE] = {
75    ROW0 | ROW1 | ROW2 | ROW3,
76    ROW1 | ROW2 | ROW3,
77    ROW2 | ROW3,
78    ROW3,
79 };
80 
81 static unsigned bottom_mask_tab[STAMP_SIZE] = {
82    ROW0,
83    ROW0 | ROW1,
84    ROW0 | ROW1 | ROW2,
85    ROW0 | ROW1 | ROW2 | ROW3,
86 };
87 
88 
89 /**
90  * Shade all pixels in a 4x4 block.  The fragment code omits the
91  * triangle in/out tests.
92  * \param x, y location of 4x4 block in window coords
93  */
94 static void
shade_quads_all(struct lp_rasterizer_task * task,const struct lp_rast_shader_inputs * inputs,unsigned x,unsigned y)95 shade_quads_all( struct lp_rasterizer_task *task,
96                  const struct lp_rast_shader_inputs *inputs,
97                  unsigned x, unsigned y )
98 {
99    const struct lp_scene *scene = task->scene;
100    const struct lp_rast_state *state = task->state;
101    struct lp_fragment_shader_variant *variant = state->variant;
102    uint8_t *color = scene->cbufs[0].map;
103    unsigned stride = scene->cbufs[0].stride;
104    uint8_t *cbufs[1];
105    unsigned strides[1];
106 
107    color += x * 4;
108    color += y * stride;
109    cbufs[0] = color;
110    strides[0] = stride;
111 
112    assert(!variant->key.depth.enabled);
113 
114    /* run shader on 4x4 block */
115    BEGIN_JIT_CALL(state, task);
116    variant->jit_function[RAST_WHOLE]( &state->jit_context,
117                                       x, y,
118                                       1,
119                                       (const float (*)[4])GET_A0(inputs),
120                                       (const float (*)[4])GET_DADX(inputs),
121                                       (const float (*)[4])GET_DADY(inputs),
122                                       cbufs,
123                                       NULL,
124                                       0xffff,
125                                       &task->thread_data,
126                                       strides, 0, 0, 0 );
127    END_JIT_CALL();
128 }
129 
130 static void
shade_quads_mask(struct lp_rasterizer_task * task,const struct lp_rast_shader_inputs * inputs,unsigned x,unsigned y,unsigned mask)131 shade_quads_mask(struct lp_rasterizer_task *task,
132                  const struct lp_rast_shader_inputs *inputs,
133                  unsigned x, unsigned y,
134                  unsigned mask)
135 {
136    const struct lp_rast_state *state = task->state;
137    struct lp_fragment_shader_variant *variant = state->variant;
138    const struct lp_scene *scene = task->scene;
139    uint8_t *color = scene->cbufs[0].map;
140    unsigned stride = scene->cbufs[0].stride;
141    uint8_t *cbufs[1];
142    unsigned strides[1];
143 
144    color += x * 4;
145    color += y * stride;
146    cbufs[0] = color;
147    strides[0] = stride;
148 
149    assert(!variant->key.depth.enabled);
150 
151    /* Propagate non-interpolated raster state */
152    task->thread_data.raster_state.viewport_index = inputs->viewport_index;
153 
154    /* run shader on 4x4 block */
155    BEGIN_JIT_CALL(state, task);
156    variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
157                                          x, y,
158                                          1,
159                                          (const float (*)[4])GET_A0(inputs),
160                                          (const float (*)[4])GET_DADX(inputs),
161                                          (const float (*)[4])GET_DADY(inputs),
162                                          cbufs,
163                                          NULL,
164                                          mask,
165                                          &task->thread_data,
166                                          strides, 0, 0, 0);
167    END_JIT_CALL();
168 }
169 
170 /* Shade a 4x4 stamp completely within the rectangle.
171  */
172 static inline void
full(struct lp_rasterizer_task * task,const struct lp_rast_shader_inputs * inputs,unsigned ix,unsigned iy)173 full(struct lp_rasterizer_task *task,
174      const struct lp_rast_shader_inputs *inputs,
175      unsigned ix, unsigned iy)
176 {
177    shade_quads_all(task,
178                    inputs,
179                    ix * STAMP_SIZE,
180                    iy * STAMP_SIZE);
181 }
182 
183 /* Shade a 4x4 stamp which may be partially outside the rectangle,
184  * according to the mask parameter.
185  */
186 static inline void
partial(struct lp_rasterizer_task * task,const struct lp_rast_shader_inputs * inputs,unsigned ix,unsigned iy,unsigned mask)187 partial(struct lp_rasterizer_task *task,
188         const struct lp_rast_shader_inputs *inputs,
189         unsigned ix, unsigned iy,
190         unsigned mask)
191 {
192    /* Unfortunately we can end up generating full blocks on this path,
193     * need to catch them.
194     */
195    if (mask == 0xffff)
196       full(task, inputs, ix, iy);
197    else {
198       assert(mask);
199       shade_quads_mask(task,
200                        inputs,
201                        ix * STAMP_SIZE,
202                        iy * STAMP_SIZE,
203                        mask);
204    }
205 }
206 
207 
208 /**
209  * Run the full SoA shader.
210  */
211 void
lp_rast_linear_rect_fallback(struct lp_rasterizer_task * task,const struct lp_rast_shader_inputs * inputs,const struct u_rect * box)212 lp_rast_linear_rect_fallback(struct lp_rasterizer_task *task,
213                              const struct lp_rast_shader_inputs *inputs,
214                              const struct u_rect *box)
215 {
216    unsigned ix0, ix1, iy0, iy1;
217    unsigned left_mask;
218    unsigned right_mask;
219    unsigned top_mask;
220    unsigned bottom_mask;
221    unsigned i,j;
222 
223    /* The interior of the rectangle (if there is one) will be
224     * rasterized as full 4x4 stamps.
225     *
226     * At each edge of the rectangle, however, there will be a fringe
227     * of partial blocks where the edge lands somewhere in the middle
228     * of a 4x4-pixel stamp.
229     *
230     * For each edge, precalculate a mask of the pixels inside that
231     * edge for the first 4x4-pixel stamp.
232     *
233     * Note that at the corners, and for narrow rectangles, an
234     * individual stamp may have two or more edges active.  We'll deal
235     * with that below by combining these masks as appropriate.
236     */
237    left_mask   = left_mask_tab   [box->x0 & (STAMP_SIZE - 1)];
238    right_mask  = right_mask_tab  [box->x1 & (STAMP_SIZE - 1)];
239    top_mask    = top_mask_tab    [box->y0 & (STAMP_SIZE - 1)];
240    bottom_mask = bottom_mask_tab [box->y1 & (STAMP_SIZE - 1)];
241 
242    ix0 = box->x0 / STAMP_SIZE;
243    ix1 = box->x1 / STAMP_SIZE;
244    iy0 = box->y0 / STAMP_SIZE;
245    iy1 = box->y1 / STAMP_SIZE;
246 
247    /* Various special cases.
248     */
249    if (ix0 == ix1 && iy0 == iy1) {
250       /* Rectangle is contained within a single 4x4-pixel stamp:
251        */
252       partial(task, inputs, ix0, iy0,
253               (left_mask & right_mask &
254                top_mask & bottom_mask));
255    }
256    else if (ix0 == ix1) {
257       /* Left and right edges fall on the same 4-pixel-wide column:
258        */
259       unsigned mask = left_mask & right_mask;
260       partial(task, inputs, ix0, iy0, mask & top_mask);
261       for (i = iy0 + 1; i < iy1; i++)
262          partial(task, inputs, ix0, i, mask);
263       partial(task, inputs, ix0, iy1, mask & bottom_mask);
264    }
265    else if (iy0 == iy1) {
266       /* Top and bottom edges fall on the same 4-pixel-wide row:
267        */
268       unsigned mask = top_mask & bottom_mask;
269       partial(task, inputs, ix0, iy0, mask & left_mask);
270       for (i = ix0 + 1; i < ix1; i++)
271          partial(task, inputs, i, iy0, mask);
272       partial(task, inputs, ix1, iy0, mask & right_mask);
273    }
274    else {
275       /* Each pair of edges falls in a separate 4-pixel-wide
276        * row/column.
277        */
278       partial(task, inputs, ix0, iy0, left_mask  & top_mask);
279       partial(task, inputs, ix0, iy1, left_mask  & bottom_mask);
280       partial(task, inputs, ix1, iy0, right_mask & top_mask);
281       partial(task, inputs, ix1, iy1, right_mask & bottom_mask);
282 
283       for (i = ix0 + 1; i < ix1; i++)
284          partial(task, inputs, i, iy0, top_mask);
285 
286       for (i = ix0 + 1; i < ix1; i++)
287          partial(task, inputs, i, iy1, bottom_mask);
288 
289       for (i = iy0 + 1; i < iy1; i++)
290          partial(task, inputs, ix0, i, left_mask);
291 
292       for (i = iy0 + 1; i < iy1; i++)
293          partial(task, inputs, ix1, i, right_mask);
294 
295       /* Full interior blocks
296        */
297       for (j = iy0 + 1; j < iy1; j++) {
298          for (i = ix0 + 1; i < ix1; i++) {
299             full(task, inputs, i, j);
300          }
301       }
302    }
303 }
304