1 /**************************************************************************
2 *
3 * Copyright 2007-2021 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Rasterization for binned rectangles within a tile
30 */
31
32 #include <limits.h>
33 #include "util/u_math.h"
34 #include "lp_debug.h"
35 #include "lp_perf.h"
36 #include "lp_rast_priv.h"
37
38
39 /* Our 16-pixel stamps are layed out as:
40 *
41 * 0 1 2 3
42 * 4 5 6 7
43 * 8 9 10 11
44 * 12 13 14 15
45 *
46 * Define bitmasks for each row and column in this layout:
47 */
48 #define COLUMN0 ((1<<0)|(1<<4)|(1<<8) |(1<<12))
49 #define COLUMN1 ((1<<1)|(1<<5)|(1<<9) |(1<<13))
50 #define COLUMN2 ((1<<2)|(1<<6)|(1<<10)|(1<<14))
51 #define COLUMN3 ((1<<3)|(1<<7)|(1<<11)|(1<<15))
52
53 #define ROW0 ((1<<0) |(1<<1) |(1<<2) |(1<<3))
54 #define ROW1 ((1<<4) |(1<<5) |(1<<6) |(1<<7))
55 #define ROW2 ((1<<8) |(1<<9) |(1<<10)|(1<<11))
56 #define ROW3 ((1<<12)|(1<<13)|(1<<14)|(1<<15))
57
58 #define STAMP_SIZE 4
59
60 static unsigned left_mask_tab[STAMP_SIZE] = {
61 COLUMN0 | COLUMN1 | COLUMN2 | COLUMN3,
62 COLUMN1 | COLUMN2 | COLUMN3,
63 COLUMN2 | COLUMN3,
64 COLUMN3,
65 };
66
67 static unsigned right_mask_tab[STAMP_SIZE] = {
68 COLUMN0,
69 COLUMN0 | COLUMN1,
70 COLUMN0 | COLUMN1 | COLUMN2,
71 COLUMN0 | COLUMN1 | COLUMN2 | COLUMN3,
72 };
73
74 static unsigned top_mask_tab[STAMP_SIZE] = {
75 ROW0 | ROW1 | ROW2 | ROW3,
76 ROW1 | ROW2 | ROW3,
77 ROW2 | ROW3,
78 ROW3,
79 };
80
81 static unsigned bottom_mask_tab[STAMP_SIZE] = {
82 ROW0,
83 ROW0 | ROW1,
84 ROW0 | ROW1 | ROW2,
85 ROW0 | ROW1 | ROW2 | ROW3,
86 };
87
88
89 /**
90 * Shade all pixels in a 4x4 block. The fragment code omits the
91 * triangle in/out tests.
92 * \param x, y location of 4x4 block in window coords
93 */
94 static void
shade_quads_all(struct lp_rasterizer_task * task,const struct lp_rast_shader_inputs * inputs,unsigned x,unsigned y)95 shade_quads_all( struct lp_rasterizer_task *task,
96 const struct lp_rast_shader_inputs *inputs,
97 unsigned x, unsigned y )
98 {
99 const struct lp_scene *scene = task->scene;
100 const struct lp_rast_state *state = task->state;
101 struct lp_fragment_shader_variant *variant = state->variant;
102 uint8_t *color = scene->cbufs[0].map;
103 unsigned stride = scene->cbufs[0].stride;
104 uint8_t *cbufs[1];
105 unsigned strides[1];
106
107 color += x * 4;
108 color += y * stride;
109 cbufs[0] = color;
110 strides[0] = stride;
111
112 assert(!variant->key.depth.enabled);
113
114 /* run shader on 4x4 block */
115 BEGIN_JIT_CALL(state, task);
116 variant->jit_function[RAST_WHOLE]( &state->jit_context,
117 x, y,
118 1,
119 (const float (*)[4])GET_A0(inputs),
120 (const float (*)[4])GET_DADX(inputs),
121 (const float (*)[4])GET_DADY(inputs),
122 cbufs,
123 NULL,
124 0xffff,
125 &task->thread_data,
126 strides, 0, 0, 0 );
127 END_JIT_CALL();
128 }
129
130 static void
shade_quads_mask(struct lp_rasterizer_task * task,const struct lp_rast_shader_inputs * inputs,unsigned x,unsigned y,unsigned mask)131 shade_quads_mask(struct lp_rasterizer_task *task,
132 const struct lp_rast_shader_inputs *inputs,
133 unsigned x, unsigned y,
134 unsigned mask)
135 {
136 const struct lp_rast_state *state = task->state;
137 struct lp_fragment_shader_variant *variant = state->variant;
138 const struct lp_scene *scene = task->scene;
139 uint8_t *color = scene->cbufs[0].map;
140 unsigned stride = scene->cbufs[0].stride;
141 uint8_t *cbufs[1];
142 unsigned strides[1];
143
144 color += x * 4;
145 color += y * stride;
146 cbufs[0] = color;
147 strides[0] = stride;
148
149 assert(!variant->key.depth.enabled);
150
151 /* Propagate non-interpolated raster state */
152 task->thread_data.raster_state.viewport_index = inputs->viewport_index;
153
154 /* run shader on 4x4 block */
155 BEGIN_JIT_CALL(state, task);
156 variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
157 x, y,
158 1,
159 (const float (*)[4])GET_A0(inputs),
160 (const float (*)[4])GET_DADX(inputs),
161 (const float (*)[4])GET_DADY(inputs),
162 cbufs,
163 NULL,
164 mask,
165 &task->thread_data,
166 strides, 0, 0, 0);
167 END_JIT_CALL();
168 }
169
170 /* Shade a 4x4 stamp completely within the rectangle.
171 */
172 static inline void
full(struct lp_rasterizer_task * task,const struct lp_rast_shader_inputs * inputs,unsigned ix,unsigned iy)173 full(struct lp_rasterizer_task *task,
174 const struct lp_rast_shader_inputs *inputs,
175 unsigned ix, unsigned iy)
176 {
177 shade_quads_all(task,
178 inputs,
179 ix * STAMP_SIZE,
180 iy * STAMP_SIZE);
181 }
182
183 /* Shade a 4x4 stamp which may be partially outside the rectangle,
184 * according to the mask parameter.
185 */
186 static inline void
partial(struct lp_rasterizer_task * task,const struct lp_rast_shader_inputs * inputs,unsigned ix,unsigned iy,unsigned mask)187 partial(struct lp_rasterizer_task *task,
188 const struct lp_rast_shader_inputs *inputs,
189 unsigned ix, unsigned iy,
190 unsigned mask)
191 {
192 /* Unfortunately we can end up generating full blocks on this path,
193 * need to catch them.
194 */
195 if (mask == 0xffff)
196 full(task, inputs, ix, iy);
197 else {
198 assert(mask);
199 shade_quads_mask(task,
200 inputs,
201 ix * STAMP_SIZE,
202 iy * STAMP_SIZE,
203 mask);
204 }
205 }
206
207
208 /**
209 * Run the full SoA shader.
210 */
211 void
lp_rast_linear_rect_fallback(struct lp_rasterizer_task * task,const struct lp_rast_shader_inputs * inputs,const struct u_rect * box)212 lp_rast_linear_rect_fallback(struct lp_rasterizer_task *task,
213 const struct lp_rast_shader_inputs *inputs,
214 const struct u_rect *box)
215 {
216 unsigned ix0, ix1, iy0, iy1;
217 unsigned left_mask;
218 unsigned right_mask;
219 unsigned top_mask;
220 unsigned bottom_mask;
221 unsigned i,j;
222
223 /* The interior of the rectangle (if there is one) will be
224 * rasterized as full 4x4 stamps.
225 *
226 * At each edge of the rectangle, however, there will be a fringe
227 * of partial blocks where the edge lands somewhere in the middle
228 * of a 4x4-pixel stamp.
229 *
230 * For each edge, precalculate a mask of the pixels inside that
231 * edge for the first 4x4-pixel stamp.
232 *
233 * Note that at the corners, and for narrow rectangles, an
234 * individual stamp may have two or more edges active. We'll deal
235 * with that below by combining these masks as appropriate.
236 */
237 left_mask = left_mask_tab [box->x0 & (STAMP_SIZE - 1)];
238 right_mask = right_mask_tab [box->x1 & (STAMP_SIZE - 1)];
239 top_mask = top_mask_tab [box->y0 & (STAMP_SIZE - 1)];
240 bottom_mask = bottom_mask_tab [box->y1 & (STAMP_SIZE - 1)];
241
242 ix0 = box->x0 / STAMP_SIZE;
243 ix1 = box->x1 / STAMP_SIZE;
244 iy0 = box->y0 / STAMP_SIZE;
245 iy1 = box->y1 / STAMP_SIZE;
246
247 /* Various special cases.
248 */
249 if (ix0 == ix1 && iy0 == iy1) {
250 /* Rectangle is contained within a single 4x4-pixel stamp:
251 */
252 partial(task, inputs, ix0, iy0,
253 (left_mask & right_mask &
254 top_mask & bottom_mask));
255 }
256 else if (ix0 == ix1) {
257 /* Left and right edges fall on the same 4-pixel-wide column:
258 */
259 unsigned mask = left_mask & right_mask;
260 partial(task, inputs, ix0, iy0, mask & top_mask);
261 for (i = iy0 + 1; i < iy1; i++)
262 partial(task, inputs, ix0, i, mask);
263 partial(task, inputs, ix0, iy1, mask & bottom_mask);
264 }
265 else if (iy0 == iy1) {
266 /* Top and bottom edges fall on the same 4-pixel-wide row:
267 */
268 unsigned mask = top_mask & bottom_mask;
269 partial(task, inputs, ix0, iy0, mask & left_mask);
270 for (i = ix0 + 1; i < ix1; i++)
271 partial(task, inputs, i, iy0, mask);
272 partial(task, inputs, ix1, iy0, mask & right_mask);
273 }
274 else {
275 /* Each pair of edges falls in a separate 4-pixel-wide
276 * row/column.
277 */
278 partial(task, inputs, ix0, iy0, left_mask & top_mask);
279 partial(task, inputs, ix0, iy1, left_mask & bottom_mask);
280 partial(task, inputs, ix1, iy0, right_mask & top_mask);
281 partial(task, inputs, ix1, iy1, right_mask & bottom_mask);
282
283 for (i = ix0 + 1; i < ix1; i++)
284 partial(task, inputs, i, iy0, top_mask);
285
286 for (i = ix0 + 1; i < ix1; i++)
287 partial(task, inputs, i, iy1, bottom_mask);
288
289 for (i = iy0 + 1; i < iy1; i++)
290 partial(task, inputs, ix0, i, left_mask);
291
292 for (i = iy0 + 1; i < iy1; i++)
293 partial(task, inputs, ix1, i, right_mask);
294
295 /* Full interior blocks
296 */
297 for (j = iy0 + 1; j < iy1; j++) {
298 for (i = ix0 + 1; i < ix1; i++) {
299 full(task, inputs, i, j);
300 }
301 }
302 }
303 }
304