1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * Copyright 2010 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * \brief Quad depth / stencil testing
31 */
32
33 #include "pipe/p_defines.h"
34 #include "util/u_format.h"
35 #include "util/u_math.h"
36 #include "util/u_memory.h"
37 #include "tgsi/tgsi_scan.h"
38 #include "sp_context.h"
39 #include "sp_quad.h"
40 #include "sp_quad_pipe.h"
41 #include "sp_tile_cache.h"
42 #include "sp_state.h" /* for sp_fragment_shader */
43
44
45 struct depth_data {
46 struct pipe_surface *ps;
47 enum pipe_format format;
48 unsigned bzzzz[TGSI_QUAD_SIZE]; /**< Z values fetched from depth buffer */
49 unsigned qzzzz[TGSI_QUAD_SIZE]; /**< Z values from the quad */
50 ubyte stencilVals[TGSI_QUAD_SIZE];
51 boolean use_shader_stencil_refs;
52 ubyte shader_stencil_refs[TGSI_QUAD_SIZE];
53 struct softpipe_cached_tile *tile;
54 float minval, maxval;
55 bool clamp;
56 };
57
58
59
60 static void
get_depth_stencil_values(struct depth_data * data,const struct quad_header * quad)61 get_depth_stencil_values( struct depth_data *data,
62 const struct quad_header *quad )
63 {
64 unsigned j;
65 const struct softpipe_cached_tile *tile = data->tile;
66
67 switch (data->format) {
68 case PIPE_FORMAT_Z16_UNORM:
69 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
70 int x = quad->input.x0 % TILE_SIZE + (j & 1);
71 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
72 data->bzzzz[j] = tile->data.depth16[y][x];
73 }
74 break;
75 case PIPE_FORMAT_Z32_UNORM:
76 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
77 int x = quad->input.x0 % TILE_SIZE + (j & 1);
78 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
79 data->bzzzz[j] = tile->data.depth32[y][x];
80 }
81 break;
82 case PIPE_FORMAT_Z24X8_UNORM:
83 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
84 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
85 int x = quad->input.x0 % TILE_SIZE + (j & 1);
86 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
87 data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
88 data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
89 }
90 break;
91 case PIPE_FORMAT_X8Z24_UNORM:
92 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
93 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
94 int x = quad->input.x0 % TILE_SIZE + (j & 1);
95 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
96 data->bzzzz[j] = tile->data.depth32[y][x] >> 8;
97 data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
98 }
99 break;
100 case PIPE_FORMAT_S8_UINT:
101 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
102 int x = quad->input.x0 % TILE_SIZE + (j & 1);
103 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
104 data->bzzzz[j] = 0;
105 data->stencilVals[j] = tile->data.stencil8[y][x];
106 }
107 break;
108 case PIPE_FORMAT_Z32_FLOAT:
109 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
110 int x = quad->input.x0 % TILE_SIZE + (j & 1);
111 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
112 data->bzzzz[j] = tile->data.depth32[y][x];
113 }
114 break;
115 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
116 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
117 int x = quad->input.x0 % TILE_SIZE + (j & 1);
118 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
119 data->bzzzz[j] = tile->data.depth64[y][x] & 0xffffffff;
120 data->stencilVals[j] = (tile->data.depth64[y][x] >> 32) & 0xff;
121 }
122 break;
123 default:
124 assert(0);
125 }
126 }
127
128
129 /**
130 * If the shader has not been run, interpolate the depth values
131 * ourselves.
132 */
133 static void
interpolate_quad_depth(struct quad_header * quad)134 interpolate_quad_depth( struct quad_header *quad )
135 {
136 const float fx = (float) quad->input.x0;
137 const float fy = (float) quad->input.y0;
138 const float dzdx = quad->posCoef->dadx[2];
139 const float dzdy = quad->posCoef->dady[2];
140 const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
141
142 quad->output.depth[0] = z0;
143 quad->output.depth[1] = z0 + dzdx;
144 quad->output.depth[2] = z0 + dzdy;
145 quad->output.depth[3] = z0 + dzdx + dzdy;
146 }
147
148
149 /**
150 * Compute the depth_data::qzzzz[] values from the float fragment Z values.
151 */
152 static void
convert_quad_depth(struct depth_data * data,const struct quad_header * quad)153 convert_quad_depth( struct depth_data *data,
154 const struct quad_header *quad )
155 {
156 unsigned j;
157 float dvals[TGSI_QUAD_SIZE];
158
159 /* Convert quad's float depth values to int depth values (qzzzz).
160 * If the Z buffer stores integer values, we _have_ to do the depth
161 * compares with integers (not floats). Otherwise, the float->int->float
162 * conversion of Z values (which isn't an identity function) will cause
163 * Z-fighting errors.
164 */
165 if (data->clamp) {
166 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
167 dvals[j] = CLAMP(quad->output.depth[j], data->minval, data->maxval);
168 }
169 } else {
170 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
171 dvals[j] = quad->output.depth[j];
172 }
173 }
174
175 switch (data->format) {
176 case PIPE_FORMAT_Z16_UNORM:
177 {
178 float scale = 65535.0;
179
180 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
181 data->qzzzz[j] = (unsigned) (dvals[j] * scale);
182 }
183 }
184 break;
185 case PIPE_FORMAT_Z32_UNORM:
186 {
187 double scale = (double) (uint) ~0UL;
188
189 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
190 data->qzzzz[j] = (unsigned) (dvals[j] * scale);
191 }
192 }
193 break;
194 case PIPE_FORMAT_Z24X8_UNORM:
195 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
196 {
197 float scale = (float) ((1 << 24) - 1);
198
199 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
200 data->qzzzz[j] = (unsigned) (dvals[j] * scale);
201 }
202 }
203 break;
204 case PIPE_FORMAT_X8Z24_UNORM:
205 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
206 {
207 float scale = (float) ((1 << 24) - 1);
208
209 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
210 data->qzzzz[j] = (unsigned) (dvals[j] * scale);
211 }
212 }
213 break;
214 case PIPE_FORMAT_Z32_FLOAT:
215 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
216 {
217 union fi fui;
218
219 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
220 fui.f = dvals[j];
221 data->qzzzz[j] = fui.ui;
222 }
223 }
224 break;
225 default:
226 assert(0);
227 }
228 }
229
230
231 /**
232 * Compute the depth_data::shader_stencil_refs[] values from the float
233 * fragment stencil values.
234 */
235 static void
convert_quad_stencil(struct depth_data * data,const struct quad_header * quad)236 convert_quad_stencil( struct depth_data *data,
237 const struct quad_header *quad )
238 {
239 unsigned j;
240
241 data->use_shader_stencil_refs = TRUE;
242 /* Copy quads stencil values
243 */
244 switch (data->format) {
245 case PIPE_FORMAT_Z24X8_UNORM:
246 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
247 case PIPE_FORMAT_X8Z24_UNORM:
248 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
249 case PIPE_FORMAT_S8_UINT:
250 case PIPE_FORMAT_Z32_FLOAT:
251 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
252 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
253 data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));
254 }
255 break;
256 default:
257 assert(0);
258 }
259 }
260
261
262 /**
263 * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer.
264 */
265 static void
write_depth_stencil_values(struct depth_data * data,struct quad_header * quad)266 write_depth_stencil_values( struct depth_data *data,
267 struct quad_header *quad )
268 {
269 struct softpipe_cached_tile *tile = data->tile;
270 unsigned j;
271
272 /* put updated Z values back into cached tile */
273 switch (data->format) {
274 case PIPE_FORMAT_Z16_UNORM:
275 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
276 int x = quad->input.x0 % TILE_SIZE + (j & 1);
277 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
278 tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
279 }
280 break;
281 case PIPE_FORMAT_Z24X8_UNORM:
282 case PIPE_FORMAT_Z32_UNORM:
283 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
284 int x = quad->input.x0 % TILE_SIZE + (j & 1);
285 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
286 tile->data.depth32[y][x] = data->bzzzz[j];
287 }
288 break;
289 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
290 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
291 int x = quad->input.x0 % TILE_SIZE + (j & 1);
292 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
293 tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
294 }
295 break;
296 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
297 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
298 int x = quad->input.x0 % TILE_SIZE + (j & 1);
299 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
300 tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
301 }
302 break;
303 case PIPE_FORMAT_X8Z24_UNORM:
304 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
305 int x = quad->input.x0 % TILE_SIZE + (j & 1);
306 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
307 tile->data.depth32[y][x] = data->bzzzz[j] << 8;
308 }
309 break;
310 case PIPE_FORMAT_S8_UINT:
311 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
312 int x = quad->input.x0 % TILE_SIZE + (j & 1);
313 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
314 tile->data.stencil8[y][x] = data->stencilVals[j];
315 }
316 break;
317 case PIPE_FORMAT_Z32_FLOAT:
318 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
319 int x = quad->input.x0 % TILE_SIZE + (j & 1);
320 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
321 tile->data.depth32[y][x] = data->bzzzz[j];
322 }
323 break;
324 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
325 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
326 int x = quad->input.x0 % TILE_SIZE + (j & 1);
327 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
328 tile->data.depth64[y][x] = (uint64_t)data->bzzzz[j] | ((uint64_t)data->stencilVals[j] << 32);
329 }
330 break;
331 default:
332 assert(0);
333 }
334 }
335
336
337
338 /** Only 8-bit stencil supported */
339 #define STENCIL_MAX 0xff
340
341
342 /**
343 * Do the basic stencil test (compare stencil buffer values against the
344 * reference value.
345 *
346 * \param data->stencilVals the stencil values from the stencil buffer
347 * \param func the stencil func (PIPE_FUNC_x)
348 * \param ref the stencil reference value
349 * \param valMask the stencil value mask indicating which bits of the stencil
350 * values and ref value are to be used.
351 * \return mask indicating which pixels passed the stencil test
352 */
353 static unsigned
do_stencil_test(struct depth_data * data,unsigned func,unsigned ref,unsigned valMask)354 do_stencil_test(struct depth_data *data,
355 unsigned func,
356 unsigned ref, unsigned valMask)
357 {
358 unsigned passMask = 0x0;
359 unsigned j;
360 ubyte refs[TGSI_QUAD_SIZE];
361
362 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
363 if (data->use_shader_stencil_refs)
364 refs[j] = data->shader_stencil_refs[j] & valMask;
365 else
366 refs[j] = ref & valMask;
367 }
368
369 switch (func) {
370 case PIPE_FUNC_NEVER:
371 /* passMask = 0x0 */
372 break;
373 case PIPE_FUNC_LESS:
374 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
375 if (refs[j] < (data->stencilVals[j] & valMask)) {
376 passMask |= (1 << j);
377 }
378 }
379 break;
380 case PIPE_FUNC_EQUAL:
381 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
382 if (refs[j] == (data->stencilVals[j] & valMask)) {
383 passMask |= (1 << j);
384 }
385 }
386 break;
387 case PIPE_FUNC_LEQUAL:
388 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
389 if (refs[j] <= (data->stencilVals[j] & valMask)) {
390 passMask |= (1 << j);
391 }
392 }
393 break;
394 case PIPE_FUNC_GREATER:
395 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
396 if (refs[j] > (data->stencilVals[j] & valMask)) {
397 passMask |= (1 << j);
398 }
399 }
400 break;
401 case PIPE_FUNC_NOTEQUAL:
402 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
403 if (refs[j] != (data->stencilVals[j] & valMask)) {
404 passMask |= (1 << j);
405 }
406 }
407 break;
408 case PIPE_FUNC_GEQUAL:
409 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
410 if (refs[j] >= (data->stencilVals[j] & valMask)) {
411 passMask |= (1 << j);
412 }
413 }
414 break;
415 case PIPE_FUNC_ALWAYS:
416 passMask = MASK_ALL;
417 break;
418 default:
419 assert(0);
420 }
421
422 return passMask;
423 }
424
425
426 /**
427 * Apply the stencil operator to stencil values.
428 *
429 * \param data->stencilVals the stencil buffer values (read and written)
430 * \param mask indicates which pixels to update
431 * \param op the stencil operator (PIPE_STENCIL_OP_x)
432 * \param ref the stencil reference value
433 * \param wrtMask writemask controlling which bits are changed in the
434 * stencil values
435 */
436 static void
apply_stencil_op(struct depth_data * data,unsigned mask,unsigned op,ubyte ref,ubyte wrtMask)437 apply_stencil_op(struct depth_data *data,
438 unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
439 {
440 unsigned j;
441 ubyte newstencil[TGSI_QUAD_SIZE];
442 ubyte refs[TGSI_QUAD_SIZE];
443
444 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
445 newstencil[j] = data->stencilVals[j];
446 if (data->use_shader_stencil_refs)
447 refs[j] = data->shader_stencil_refs[j];
448 else
449 refs[j] = ref;
450 }
451
452 switch (op) {
453 case PIPE_STENCIL_OP_KEEP:
454 /* no-op */
455 break;
456 case PIPE_STENCIL_OP_ZERO:
457 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
458 if (mask & (1 << j)) {
459 newstencil[j] = 0;
460 }
461 }
462 break;
463 case PIPE_STENCIL_OP_REPLACE:
464 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
465 if (mask & (1 << j)) {
466 newstencil[j] = refs[j];
467 }
468 }
469 break;
470 case PIPE_STENCIL_OP_INCR:
471 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
472 if (mask & (1 << j)) {
473 if (data->stencilVals[j] < STENCIL_MAX) {
474 newstencil[j] = data->stencilVals[j] + 1;
475 }
476 }
477 }
478 break;
479 case PIPE_STENCIL_OP_DECR:
480 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
481 if (mask & (1 << j)) {
482 if (data->stencilVals[j] > 0) {
483 newstencil[j] = data->stencilVals[j] - 1;
484 }
485 }
486 }
487 break;
488 case PIPE_STENCIL_OP_INCR_WRAP:
489 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
490 if (mask & (1 << j)) {
491 newstencil[j] = data->stencilVals[j] + 1;
492 }
493 }
494 break;
495 case PIPE_STENCIL_OP_DECR_WRAP:
496 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
497 if (mask & (1 << j)) {
498 newstencil[j] = data->stencilVals[j] - 1;
499 }
500 }
501 break;
502 case PIPE_STENCIL_OP_INVERT:
503 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
504 if (mask & (1 << j)) {
505 newstencil[j] = ~data->stencilVals[j];
506 }
507 }
508 break;
509 default:
510 assert(0);
511 }
512
513 /*
514 * update the stencil values
515 */
516 if (wrtMask != STENCIL_MAX) {
517 /* apply bit-wise stencil buffer writemask */
518 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
519 data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);
520 }
521 }
522 else {
523 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
524 data->stencilVals[j] = newstencil[j];
525 }
526 }
527 }
528
529
530
531 /**
532 * To increase efficiency, we should probably have multiple versions
533 * of this function that are specifically for Z16, Z32 and FP Z buffers.
534 * Try to effectively do that with codegen...
535 */
536 static boolean
depth_test_quad(struct quad_stage * qs,struct depth_data * data,struct quad_header * quad)537 depth_test_quad(struct quad_stage *qs,
538 struct depth_data *data,
539 struct quad_header *quad)
540 {
541 struct softpipe_context *softpipe = qs->softpipe;
542 unsigned zmask = 0;
543 unsigned j;
544
545 switch (softpipe->depth_stencil->depth.func) {
546 case PIPE_FUNC_NEVER:
547 /* zmask = 0 */
548 break;
549 case PIPE_FUNC_LESS:
550 /* Note this is pretty much a single sse or cell instruction.
551 * Like this: quad->mask &= (quad->outputs.depth < zzzz);
552 */
553 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
554 if (data->qzzzz[j] < data->bzzzz[j])
555 zmask |= 1 << j;
556 }
557 break;
558 case PIPE_FUNC_EQUAL:
559 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
560 if (data->qzzzz[j] == data->bzzzz[j])
561 zmask |= 1 << j;
562 }
563 break;
564 case PIPE_FUNC_LEQUAL:
565 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
566 if (data->qzzzz[j] <= data->bzzzz[j])
567 zmask |= (1 << j);
568 }
569 break;
570 case PIPE_FUNC_GREATER:
571 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
572 if (data->qzzzz[j] > data->bzzzz[j])
573 zmask |= (1 << j);
574 }
575 break;
576 case PIPE_FUNC_NOTEQUAL:
577 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
578 if (data->qzzzz[j] != data->bzzzz[j])
579 zmask |= (1 << j);
580 }
581 break;
582 case PIPE_FUNC_GEQUAL:
583 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
584 if (data->qzzzz[j] >= data->bzzzz[j])
585 zmask |= (1 << j);
586 }
587 break;
588 case PIPE_FUNC_ALWAYS:
589 zmask = MASK_ALL;
590 break;
591 default:
592 assert(0);
593 }
594
595 quad->inout.mask &= zmask;
596 if (quad->inout.mask == 0)
597 return FALSE;
598
599 /* Update our internal copy only if writemask set. Even if
600 * depth.writemask is FALSE, may still need to write out buffer
601 * data due to stencil changes.
602 */
603 if (softpipe->depth_stencil->depth.writemask) {
604 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
605 if (quad->inout.mask & (1 << j)) {
606 data->bzzzz[j] = data->qzzzz[j];
607 }
608 }
609 }
610
611 return TRUE;
612 }
613
614
615
616 /**
617 * Do stencil (and depth) testing. Stenciling depends on the outcome of
618 * depth testing.
619 */
620 static void
depth_stencil_test_quad(struct quad_stage * qs,struct depth_data * data,struct quad_header * quad)621 depth_stencil_test_quad(struct quad_stage *qs,
622 struct depth_data *data,
623 struct quad_header *quad)
624 {
625 struct softpipe_context *softpipe = qs->softpipe;
626 unsigned func, zFailOp, zPassOp, failOp;
627 ubyte ref, wrtMask, valMask;
628 uint face = quad->input.facing;
629
630 if (!softpipe->depth_stencil->stencil[1].enabled) {
631 /* single-sided stencil test, use front (face=0) state */
632 face = 0;
633 }
634
635 /* 0 = front-face, 1 = back-face */
636 assert(face == 0 || face == 1);
637
638 /* choose front or back face function, operator, etc */
639 /* XXX we could do these initializations once per primitive */
640 func = softpipe->depth_stencil->stencil[face].func;
641 failOp = softpipe->depth_stencil->stencil[face].fail_op;
642 zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
643 zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
644 ref = softpipe->stencil_ref.ref_value[face];
645 wrtMask = softpipe->depth_stencil->stencil[face].writemask;
646 valMask = softpipe->depth_stencil->stencil[face].valuemask;
647
648 /* do the stencil test first */
649 {
650 unsigned passMask, failMask;
651 passMask = do_stencil_test(data, func, ref, valMask);
652 failMask = quad->inout.mask & ~passMask;
653 quad->inout.mask &= passMask;
654
655 if (failOp != PIPE_STENCIL_OP_KEEP) {
656 apply_stencil_op(data, failMask, failOp, ref, wrtMask);
657 }
658 }
659
660 if (quad->inout.mask) {
661 /* now the pixels that passed the stencil test are depth tested */
662 if (softpipe->depth_stencil->depth.enabled) {
663 const unsigned origMask = quad->inout.mask;
664
665 depth_test_quad(qs, data, quad); /* quad->mask is updated */
666
667 /* update stencil buffer values according to z pass/fail result */
668 if (zFailOp != PIPE_STENCIL_OP_KEEP) {
669 const unsigned zFailMask = origMask & ~quad->inout.mask;
670 apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask);
671 }
672
673 if (zPassOp != PIPE_STENCIL_OP_KEEP) {
674 const unsigned zPassMask = origMask & quad->inout.mask;
675 apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask);
676 }
677 }
678 else {
679 /* no depth test, apply Zpass operator to stencil buffer values */
680 apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);
681 }
682 }
683 }
684
685
686 #define ALPHATEST( FUNC, COMP ) \
687 static unsigned \
688 alpha_test_quads_##FUNC( struct quad_stage *qs, \
689 struct quad_header *quads[], \
690 unsigned nr ) \
691 { \
692 const float ref = qs->softpipe->depth_stencil->alpha.ref_value; \
693 const uint cbuf = 0; /* only output[0].alpha is tested */ \
694 unsigned pass_nr = 0; \
695 unsigned i; \
696 \
697 for (i = 0; i < nr; i++) { \
698 const float *aaaa = quads[i]->output.color[cbuf][3]; \
699 unsigned passMask = 0; \
700 \
701 if (aaaa[0] COMP ref) passMask |= (1 << 0); \
702 if (aaaa[1] COMP ref) passMask |= (1 << 1); \
703 if (aaaa[2] COMP ref) passMask |= (1 << 2); \
704 if (aaaa[3] COMP ref) passMask |= (1 << 3); \
705 \
706 quads[i]->inout.mask &= passMask; \
707 \
708 if (quads[i]->inout.mask) \
709 quads[pass_nr++] = quads[i]; \
710 } \
711 \
712 return pass_nr; \
713 }
714
715
716 ALPHATEST( LESS, < )
717 ALPHATEST( EQUAL, == )
718 ALPHATEST( LEQUAL, <= )
719 ALPHATEST( GREATER, > )
720 ALPHATEST( NOTEQUAL, != )
721 ALPHATEST( GEQUAL, >= )
722
723
724 /* XXX: Incorporate into shader using KILL_IF.
725 */
726 static unsigned
alpha_test_quads(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)727 alpha_test_quads(struct quad_stage *qs,
728 struct quad_header *quads[],
729 unsigned nr)
730 {
731 switch (qs->softpipe->depth_stencil->alpha.func) {
732 case PIPE_FUNC_LESS:
733 return alpha_test_quads_LESS( qs, quads, nr );
734 case PIPE_FUNC_EQUAL:
735 return alpha_test_quads_EQUAL( qs, quads, nr );
736 case PIPE_FUNC_LEQUAL:
737 return alpha_test_quads_LEQUAL( qs, quads, nr );
738 case PIPE_FUNC_GREATER:
739 return alpha_test_quads_GREATER( qs, quads, nr );
740 case PIPE_FUNC_NOTEQUAL:
741 return alpha_test_quads_NOTEQUAL( qs, quads, nr );
742 case PIPE_FUNC_GEQUAL:
743 return alpha_test_quads_GEQUAL( qs, quads, nr );
744 case PIPE_FUNC_ALWAYS:
745 return nr;
746 case PIPE_FUNC_NEVER:
747 default:
748 return 0;
749 }
750 }
751
752
753 static unsigned mask_count[16] =
754 {
755 0, /* 0x0 */
756 1, /* 0x1 */
757 1, /* 0x2 */
758 2, /* 0x3 */
759 1, /* 0x4 */
760 2, /* 0x5 */
761 2, /* 0x6 */
762 3, /* 0x7 */
763 1, /* 0x8 */
764 2, /* 0x9 */
765 2, /* 0xa */
766 3, /* 0xb */
767 2, /* 0xc */
768 3, /* 0xd */
769 3, /* 0xe */
770 4, /* 0xf */
771 };
772
773
774
775 /**
776 * General depth/stencil test function. Used when there's no fast-path.
777 */
778 static void
depth_test_quads_fallback(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)779 depth_test_quads_fallback(struct quad_stage *qs,
780 struct quad_header *quads[],
781 unsigned nr)
782 {
783 unsigned i, pass = 0;
784 const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
785 boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
786 boolean shader_stencil_ref = fsInfo->writes_stencil;
787 struct depth_data data;
788 unsigned vp_idx = quads[0]->input.viewport_index;
789
790 data.use_shader_stencil_refs = FALSE;
791
792 if (qs->softpipe->depth_stencil->alpha.enabled) {
793 nr = alpha_test_quads(qs, quads, nr);
794 }
795
796 if (qs->softpipe->framebuffer.zsbuf &&
797 (qs->softpipe->depth_stencil->depth.enabled ||
798 qs->softpipe->depth_stencil->stencil[0].enabled)) {
799 float near_val, far_val;
800
801 data.ps = qs->softpipe->framebuffer.zsbuf;
802 data.format = data.ps->format;
803 data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache,
804 quads[0]->input.x0,
805 quads[0]->input.y0, quads[0]->input.layer);
806 data.clamp = !qs->softpipe->rasterizer->depth_clip;
807
808 near_val = qs->softpipe->viewports[vp_idx].translate[2] - qs->softpipe->viewports[vp_idx].scale[2];
809 far_val = near_val + (qs->softpipe->viewports[vp_idx].scale[2] * 2.0);
810 data.minval = MIN2(near_val, far_val);
811 data.maxval = MAX2(near_val, far_val);
812
813 for (i = 0; i < nr; i++) {
814 get_depth_stencil_values(&data, quads[i]);
815
816 if (qs->softpipe->depth_stencil->depth.enabled) {
817 if (interp_depth)
818 interpolate_quad_depth(quads[i]);
819
820 convert_quad_depth(&data, quads[i]);
821 }
822
823 if (qs->softpipe->depth_stencil->stencil[0].enabled) {
824 if (shader_stencil_ref)
825 convert_quad_stencil(&data, quads[i]);
826
827 depth_stencil_test_quad(qs, &data, quads[i]);
828 write_depth_stencil_values(&data, quads[i]);
829 }
830 else {
831 if (!depth_test_quad(qs, &data, quads[i]))
832 continue;
833
834 if (qs->softpipe->depth_stencil->depth.writemask)
835 write_depth_stencil_values(&data, quads[i]);
836 }
837
838 quads[pass++] = quads[i];
839 }
840
841 nr = pass;
842 }
843
844 if (qs->softpipe->active_query_count) {
845 for (i = 0; i < nr; i++)
846 qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask];
847 }
848
849 if (nr)
850 qs->next->run(qs->next, quads, nr);
851 }
852
853
854 /**
855 * Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled.
856 */
857
858 #define NAME depth_interp_z16_less_write
859 #define OPERATOR <
860 #include "sp_quad_depth_test_tmp.h"
861
862 #define NAME depth_interp_z16_equal_write
863 #define OPERATOR ==
864 #include "sp_quad_depth_test_tmp.h"
865
866 #define NAME depth_interp_z16_lequal_write
867 #define OPERATOR <=
868 #include "sp_quad_depth_test_tmp.h"
869
870 #define NAME depth_interp_z16_greater_write
871 #define OPERATOR >
872 #include "sp_quad_depth_test_tmp.h"
873
874 #define NAME depth_interp_z16_notequal_write
875 #define OPERATOR !=
876 #include "sp_quad_depth_test_tmp.h"
877
878 #define NAME depth_interp_z16_gequal_write
879 #define OPERATOR >=
880 #include "sp_quad_depth_test_tmp.h"
881
882 #define NAME depth_interp_z16_always_write
883 #define ALWAYS 1
884 #include "sp_quad_depth_test_tmp.h"
885
886
887
888 static void
depth_noop(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)889 depth_noop(struct quad_stage *qs,
890 struct quad_header *quads[],
891 unsigned nr)
892 {
893 qs->next->run(qs->next, quads, nr);
894 }
895
896
897
898 static void
choose_depth_test(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)899 choose_depth_test(struct quad_stage *qs,
900 struct quad_header *quads[],
901 unsigned nr)
902 {
903 const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
904
905 boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
906
907 boolean alpha = qs->softpipe->depth_stencil->alpha.enabled;
908
909 boolean depth = qs->softpipe->depth_stencil->depth.enabled;
910
911 unsigned depthfunc = qs->softpipe->depth_stencil->depth.func;
912
913 boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled;
914
915 boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask;
916
917 boolean occlusion = qs->softpipe->active_query_count;
918
919 boolean clipped = !qs->softpipe->rasterizer->depth_clip;
920
921 if(!qs->softpipe->framebuffer.zsbuf)
922 depth = depthwrite = stencil = FALSE;
923
924 /* default */
925 qs->run = depth_test_quads_fallback;
926
927 /* look for special cases */
928 if (!alpha &&
929 !depth &&
930 !occlusion &&
931 !clipped &&
932 !stencil) {
933 qs->run = depth_noop;
934 }
935 else if (!alpha &&
936 interp_depth &&
937 depth &&
938 depthwrite &&
939 !occlusion &&
940 !clipped &&
941 !stencil)
942 {
943 if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
944 switch (depthfunc) {
945 case PIPE_FUNC_NEVER:
946 qs->run = depth_test_quads_fallback;
947 break;
948 case PIPE_FUNC_LESS:
949 qs->run = depth_interp_z16_less_write;
950 break;
951 case PIPE_FUNC_EQUAL:
952 qs->run = depth_interp_z16_equal_write;
953 break;
954 case PIPE_FUNC_LEQUAL:
955 qs->run = depth_interp_z16_lequal_write;
956 break;
957 case PIPE_FUNC_GREATER:
958 qs->run = depth_interp_z16_greater_write;
959 break;
960 case PIPE_FUNC_NOTEQUAL:
961 qs->run = depth_interp_z16_notequal_write;
962 break;
963 case PIPE_FUNC_GEQUAL:
964 qs->run = depth_interp_z16_gequal_write;
965 break;
966 case PIPE_FUNC_ALWAYS:
967 qs->run = depth_interp_z16_always_write;
968 break;
969 default:
970 qs->run = depth_test_quads_fallback;
971 break;
972 }
973 }
974 }
975
976 /* next quad/fragment stage */
977 qs->run( qs, quads, nr );
978 }
979
980
981
982 static void
depth_test_begin(struct quad_stage * qs)983 depth_test_begin(struct quad_stage *qs)
984 {
985 qs->run = choose_depth_test;
986 qs->next->begin(qs->next);
987 }
988
989
990 static void
depth_test_destroy(struct quad_stage * qs)991 depth_test_destroy(struct quad_stage *qs)
992 {
993 FREE( qs );
994 }
995
996
997 struct quad_stage *
sp_quad_depth_test_stage(struct softpipe_context * softpipe)998 sp_quad_depth_test_stage(struct softpipe_context *softpipe)
999 {
1000 struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
1001
1002 stage->softpipe = softpipe;
1003 stage->begin = depth_test_begin;
1004 stage->run = choose_depth_test;
1005 stage->destroy = depth_test_destroy;
1006
1007 return stage;
1008 }
1009