1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * Copyright 2010 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * \brief Quad depth / stencil testing
31 */
32
33 #include "pipe/p_defines.h"
34 #include "util/format/u_format.h"
35 #include "util/u_math.h"
36 #include "util/u_memory.h"
37 #include "tgsi/tgsi_scan.h"
38 #include "sp_context.h"
39 #include "sp_quad.h"
40 #include "sp_quad_pipe.h"
41 #include "sp_tile_cache.h"
42 #include "sp_state.h" /* for sp_fragment_shader */
43
44
45 struct depth_data {
46 struct pipe_surface *ps;
47 enum pipe_format format;
48 unsigned bzzzz[TGSI_QUAD_SIZE]; /**< Z values fetched from depth buffer */
49 unsigned qzzzz[TGSI_QUAD_SIZE]; /**< Z values from the quad */
50 ubyte stencilVals[TGSI_QUAD_SIZE];
51 boolean use_shader_stencil_refs;
52 ubyte shader_stencil_refs[TGSI_QUAD_SIZE];
53 struct softpipe_cached_tile *tile;
54 float minval, maxval;
55 bool clamp;
56 };
57
58
59
60 static void
get_depth_stencil_values(struct depth_data * data,const struct quad_header * quad)61 get_depth_stencil_values( struct depth_data *data,
62 const struct quad_header *quad )
63 {
64 unsigned j;
65 const struct softpipe_cached_tile *tile = data->tile;
66
67 switch (data->format) {
68 case PIPE_FORMAT_Z16_UNORM:
69 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
70 int x = quad->input.x0 % TILE_SIZE + (j & 1);
71 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
72 data->bzzzz[j] = tile->data.depth16[y][x];
73 }
74 break;
75 case PIPE_FORMAT_Z32_UNORM:
76 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
77 int x = quad->input.x0 % TILE_SIZE + (j & 1);
78 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
79 data->bzzzz[j] = tile->data.depth32[y][x];
80 }
81 break;
82 case PIPE_FORMAT_Z24X8_UNORM:
83 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
84 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
85 int x = quad->input.x0 % TILE_SIZE + (j & 1);
86 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
87 data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
88 data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
89 }
90 break;
91 case PIPE_FORMAT_X8Z24_UNORM:
92 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
93 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
94 int x = quad->input.x0 % TILE_SIZE + (j & 1);
95 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
96 data->bzzzz[j] = tile->data.depth32[y][x] >> 8;
97 data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
98 }
99 break;
100 case PIPE_FORMAT_S8_UINT:
101 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
102 int x = quad->input.x0 % TILE_SIZE + (j & 1);
103 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
104 data->bzzzz[j] = 0;
105 data->stencilVals[j] = tile->data.stencil8[y][x];
106 }
107 break;
108 case PIPE_FORMAT_Z32_FLOAT:
109 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
110 int x = quad->input.x0 % TILE_SIZE + (j & 1);
111 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
112 data->bzzzz[j] = tile->data.depth32[y][x];
113 }
114 break;
115 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
116 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
117 int x = quad->input.x0 % TILE_SIZE + (j & 1);
118 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
119 data->bzzzz[j] = tile->data.depth64[y][x] & 0xffffffff;
120 data->stencilVals[j] = (tile->data.depth64[y][x] >> 32) & 0xff;
121 }
122 break;
123 default:
124 assert(0);
125 }
126 }
127
128
129 /**
130 * If the shader has not been run, interpolate the depth values
131 * ourselves.
132 */
133 static void
interpolate_quad_depth(struct quad_header * quad)134 interpolate_quad_depth( struct quad_header *quad )
135 {
136 const float fx = (float) quad->input.x0;
137 const float fy = (float) quad->input.y0;
138 const float dzdx = quad->posCoef->dadx[2];
139 const float dzdy = quad->posCoef->dady[2];
140 const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
141
142 quad->output.depth[0] = z0;
143 quad->output.depth[1] = z0 + dzdx;
144 quad->output.depth[2] = z0 + dzdy;
145 quad->output.depth[3] = z0 + dzdx + dzdy;
146 }
147
148
149 /**
150 * Compute the depth_data::qzzzz[] values from the float fragment Z values.
151 */
152 static void
convert_quad_depth(struct depth_data * data,const struct quad_header * quad)153 convert_quad_depth( struct depth_data *data,
154 const struct quad_header *quad )
155 {
156 unsigned j;
157 float dvals[TGSI_QUAD_SIZE];
158
159 /* Convert quad's float depth values to int depth values (qzzzz).
160 * If the Z buffer stores integer values, we _have_ to do the depth
161 * compares with integers (not floats). Otherwise, the float->int->float
162 * conversion of Z values (which isn't an identity function) will cause
163 * Z-fighting errors.
164 */
165 if (data->clamp) {
166 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
167 dvals[j] = CLAMP(quad->output.depth[j], data->minval, data->maxval);
168 }
169 } else {
170 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
171 dvals[j] = quad->output.depth[j];
172 }
173 }
174
175 switch (data->format) {
176 case PIPE_FORMAT_Z16_UNORM:
177 {
178 float scale = 65535.0;
179
180 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
181 data->qzzzz[j] = (unsigned) (dvals[j] * scale);
182 }
183 }
184 break;
185 case PIPE_FORMAT_Z32_UNORM:
186 {
187 double scale = (double) (uint) ~0UL;
188
189 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
190 data->qzzzz[j] = (unsigned) (dvals[j] * scale);
191 }
192 }
193 break;
194 case PIPE_FORMAT_Z24X8_UNORM:
195 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
196 {
197 float scale = (float) ((1 << 24) - 1);
198
199 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
200 data->qzzzz[j] = (unsigned) (dvals[j] * scale);
201 }
202 }
203 break;
204 case PIPE_FORMAT_X8Z24_UNORM:
205 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
206 {
207 float scale = (float) ((1 << 24) - 1);
208
209 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
210 data->qzzzz[j] = (unsigned) (dvals[j] * scale);
211 }
212 }
213 break;
214 case PIPE_FORMAT_Z32_FLOAT:
215 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
216 {
217 union fi fui;
218
219 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
220 fui.f = dvals[j];
221 data->qzzzz[j] = fui.ui;
222 }
223 }
224 break;
225 default:
226 assert(0);
227 }
228 }
229
230
231 /**
232 * Compute the depth_data::shader_stencil_refs[] values from the float
233 * fragment stencil values.
234 */
235 static void
convert_quad_stencil(struct depth_data * data,const struct quad_header * quad)236 convert_quad_stencil( struct depth_data *data,
237 const struct quad_header *quad )
238 {
239 unsigned j;
240
241 data->use_shader_stencil_refs = TRUE;
242 /* Copy quads stencil values
243 */
244 switch (data->format) {
245 case PIPE_FORMAT_Z24X8_UNORM:
246 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
247 case PIPE_FORMAT_X8Z24_UNORM:
248 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
249 case PIPE_FORMAT_S8_UINT:
250 case PIPE_FORMAT_Z32_FLOAT:
251 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
252 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
253 data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));
254 }
255 break;
256 default:
257 assert(0);
258 }
259 }
260
261
262 /**
263 * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer.
264 */
265 static void
write_depth_stencil_values(struct depth_data * data,struct quad_header * quad)266 write_depth_stencil_values( struct depth_data *data,
267 struct quad_header *quad )
268 {
269 struct softpipe_cached_tile *tile = data->tile;
270 unsigned j;
271
272 /* put updated Z values back into cached tile */
273 switch (data->format) {
274 case PIPE_FORMAT_Z16_UNORM:
275 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
276 int x = quad->input.x0 % TILE_SIZE + (j & 1);
277 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
278 tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
279 }
280 break;
281 case PIPE_FORMAT_Z24X8_UNORM:
282 case PIPE_FORMAT_Z32_UNORM:
283 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
284 int x = quad->input.x0 % TILE_SIZE + (j & 1);
285 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
286 tile->data.depth32[y][x] = data->bzzzz[j];
287 }
288 break;
289 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
290 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
291 int x = quad->input.x0 % TILE_SIZE + (j & 1);
292 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
293 tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
294 }
295 break;
296 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
297 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
298 int x = quad->input.x0 % TILE_SIZE + (j & 1);
299 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
300 tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
301 }
302 break;
303 case PIPE_FORMAT_X8Z24_UNORM:
304 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
305 int x = quad->input.x0 % TILE_SIZE + (j & 1);
306 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
307 tile->data.depth32[y][x] = data->bzzzz[j] << 8;
308 }
309 break;
310 case PIPE_FORMAT_S8_UINT:
311 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
312 int x = quad->input.x0 % TILE_SIZE + (j & 1);
313 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
314 tile->data.stencil8[y][x] = data->stencilVals[j];
315 }
316 break;
317 case PIPE_FORMAT_Z32_FLOAT:
318 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
319 int x = quad->input.x0 % TILE_SIZE + (j & 1);
320 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
321 tile->data.depth32[y][x] = data->bzzzz[j];
322 }
323 break;
324 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
325 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
326 int x = quad->input.x0 % TILE_SIZE + (j & 1);
327 int y = quad->input.y0 % TILE_SIZE + (j >> 1);
328 tile->data.depth64[y][x] = (uint64_t)data->bzzzz[j] | ((uint64_t)data->stencilVals[j] << 32);
329 }
330 break;
331 default:
332 assert(0);
333 }
334 }
335
336
337
338 /** Only 8-bit stencil supported */
339 #define STENCIL_MAX 0xff
340
341
342 /**
343 * Do the basic stencil test (compare stencil buffer values against the
344 * reference value.
345 *
346 * \param data->stencilVals the stencil values from the stencil buffer
347 * \param func the stencil func (PIPE_FUNC_x)
348 * \param ref the stencil reference value
349 * \param valMask the stencil value mask indicating which bits of the stencil
350 * values and ref value are to be used.
351 * \return mask indicating which pixels passed the stencil test
352 */
353 static unsigned
do_stencil_test(struct depth_data * data,unsigned func,unsigned ref,unsigned valMask)354 do_stencil_test(struct depth_data *data,
355 unsigned func,
356 unsigned ref, unsigned valMask)
357 {
358 unsigned passMask = 0x0;
359 unsigned j;
360 ubyte refs[TGSI_QUAD_SIZE];
361
362 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
363 if (data->use_shader_stencil_refs)
364 refs[j] = data->shader_stencil_refs[j] & valMask;
365 else
366 refs[j] = ref & valMask;
367 }
368
369 switch (func) {
370 case PIPE_FUNC_NEVER:
371 /* passMask = 0x0 */
372 break;
373 case PIPE_FUNC_LESS:
374 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
375 if (refs[j] < (data->stencilVals[j] & valMask)) {
376 passMask |= (1 << j);
377 }
378 }
379 break;
380 case PIPE_FUNC_EQUAL:
381 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
382 if (refs[j] == (data->stencilVals[j] & valMask)) {
383 passMask |= (1 << j);
384 }
385 }
386 break;
387 case PIPE_FUNC_LEQUAL:
388 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
389 if (refs[j] <= (data->stencilVals[j] & valMask)) {
390 passMask |= (1 << j);
391 }
392 }
393 break;
394 case PIPE_FUNC_GREATER:
395 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
396 if (refs[j] > (data->stencilVals[j] & valMask)) {
397 passMask |= (1 << j);
398 }
399 }
400 break;
401 case PIPE_FUNC_NOTEQUAL:
402 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
403 if (refs[j] != (data->stencilVals[j] & valMask)) {
404 passMask |= (1 << j);
405 }
406 }
407 break;
408 case PIPE_FUNC_GEQUAL:
409 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
410 if (refs[j] >= (data->stencilVals[j] & valMask)) {
411 passMask |= (1 << j);
412 }
413 }
414 break;
415 case PIPE_FUNC_ALWAYS:
416 passMask = MASK_ALL;
417 break;
418 default:
419 assert(0);
420 }
421
422 return passMask;
423 }
424
425
426 /**
427 * Apply the stencil operator to stencil values.
428 *
429 * \param data->stencilVals the stencil buffer values (read and written)
430 * \param mask indicates which pixels to update
431 * \param op the stencil operator (PIPE_STENCIL_OP_x)
432 * \param ref the stencil reference value
433 * \param wrtMask writemask controlling which bits are changed in the
434 * stencil values
435 */
436 static void
apply_stencil_op(struct depth_data * data,unsigned mask,unsigned op,ubyte ref,ubyte wrtMask)437 apply_stencil_op(struct depth_data *data,
438 unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
439 {
440 unsigned j;
441 ubyte newstencil[TGSI_QUAD_SIZE];
442 ubyte refs[TGSI_QUAD_SIZE];
443
444 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
445 newstencil[j] = data->stencilVals[j];
446 if (data->use_shader_stencil_refs)
447 refs[j] = data->shader_stencil_refs[j];
448 else
449 refs[j] = ref;
450 }
451
452 switch (op) {
453 case PIPE_STENCIL_OP_KEEP:
454 /* no-op */
455 break;
456 case PIPE_STENCIL_OP_ZERO:
457 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
458 if (mask & (1 << j)) {
459 newstencil[j] = 0;
460 }
461 }
462 break;
463 case PIPE_STENCIL_OP_REPLACE:
464 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
465 if (mask & (1 << j)) {
466 newstencil[j] = refs[j];
467 }
468 }
469 break;
470 case PIPE_STENCIL_OP_INCR:
471 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
472 if (mask & (1 << j)) {
473 if (data->stencilVals[j] < STENCIL_MAX) {
474 newstencil[j] = data->stencilVals[j] + 1;
475 }
476 }
477 }
478 break;
479 case PIPE_STENCIL_OP_DECR:
480 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
481 if (mask & (1 << j)) {
482 if (data->stencilVals[j] > 0) {
483 newstencil[j] = data->stencilVals[j] - 1;
484 }
485 }
486 }
487 break;
488 case PIPE_STENCIL_OP_INCR_WRAP:
489 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
490 if (mask & (1 << j)) {
491 newstencil[j] = data->stencilVals[j] + 1;
492 }
493 }
494 break;
495 case PIPE_STENCIL_OP_DECR_WRAP:
496 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
497 if (mask & (1 << j)) {
498 newstencil[j] = data->stencilVals[j] - 1;
499 }
500 }
501 break;
502 case PIPE_STENCIL_OP_INVERT:
503 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
504 if (mask & (1 << j)) {
505 newstencil[j] = ~data->stencilVals[j];
506 }
507 }
508 break;
509 default:
510 assert(0);
511 }
512
513 /*
514 * update the stencil values
515 */
516 if (wrtMask != STENCIL_MAX) {
517 /* apply bit-wise stencil buffer writemask */
518 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
519 data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);
520 }
521 }
522 else {
523 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
524 data->stencilVals[j] = newstencil[j];
525 }
526 }
527 }
528
529
530
531 /**
532 * To increase efficiency, we should probably have multiple versions
533 * of this function that are specifically for Z16, Z32 and FP Z buffers.
534 * Try to effectively do that with codegen...
535 */
536 static boolean
depth_test_quad(struct quad_stage * qs,struct depth_data * data,struct quad_header * quad)537 depth_test_quad(struct quad_stage *qs,
538 struct depth_data *data,
539 struct quad_header *quad)
540 {
541 struct softpipe_context *softpipe = qs->softpipe;
542 unsigned zmask = 0;
543 unsigned j;
544
545 #define DEPTHTEST(l, op, r) do { \
546 if (data->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || \
547 data->format == PIPE_FORMAT_Z32_FLOAT) { \
548 for (j = 0; j < TGSI_QUAD_SIZE; j++) { \
549 if (((float *)l)[j] op ((float *)r)[j]) \
550 zmask |= (1 << j); \
551 } \
552 } else { \
553 for (j = 0; j < TGSI_QUAD_SIZE; j++) { \
554 if (l[j] op r[j]) \
555 zmask |= (1 << j); \
556 } \
557 } \
558 } while (0)
559
560 switch (softpipe->depth_stencil->depth_func) {
561 case PIPE_FUNC_NEVER:
562 /* zmask = 0 */
563 break;
564 case PIPE_FUNC_LESS:
565 /* Note this is pretty much a single sse or cell instruction.
566 * Like this: quad->mask &= (quad->outputs.depth < zzzz);
567 */
568 DEPTHTEST(data->qzzzz, <, data->bzzzz);
569 break;
570 case PIPE_FUNC_EQUAL:
571 DEPTHTEST(data->qzzzz, ==, data->bzzzz);
572 break;
573 case PIPE_FUNC_LEQUAL:
574 DEPTHTEST(data->qzzzz, <=, data->bzzzz);
575 break;
576 case PIPE_FUNC_GREATER:
577 DEPTHTEST(data->qzzzz, >, data->bzzzz);
578 break;
579 case PIPE_FUNC_NOTEQUAL:
580 DEPTHTEST(data->qzzzz, !=, data->bzzzz);
581 break;
582 case PIPE_FUNC_GEQUAL:
583 DEPTHTEST(data->qzzzz, >=, data->bzzzz);
584 break;
585 case PIPE_FUNC_ALWAYS:
586 zmask = MASK_ALL;
587 break;
588 default:
589 assert(0);
590 }
591
592 quad->inout.mask &= zmask;
593 if (quad->inout.mask == 0)
594 return FALSE;
595
596 /* Update our internal copy only if writemask set. Even if
597 * depth.writemask is FALSE, may still need to write out buffer
598 * data due to stencil changes.
599 */
600 if (softpipe->depth_stencil->depth_writemask) {
601 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
602 if (quad->inout.mask & (1 << j)) {
603 data->bzzzz[j] = data->qzzzz[j];
604 }
605 }
606 }
607
608 return TRUE;
609 }
610
611
612
613 /**
614 * Do stencil (and depth) testing. Stenciling depends on the outcome of
615 * depth testing.
616 */
617 static void
depth_stencil_test_quad(struct quad_stage * qs,struct depth_data * data,struct quad_header * quad)618 depth_stencil_test_quad(struct quad_stage *qs,
619 struct depth_data *data,
620 struct quad_header *quad)
621 {
622 struct softpipe_context *softpipe = qs->softpipe;
623 unsigned func, zFailOp, zPassOp, failOp;
624 ubyte ref, wrtMask, valMask;
625 uint face = quad->input.facing;
626
627 if (!softpipe->depth_stencil->stencil[1].enabled) {
628 /* single-sided stencil test, use front (face=0) state */
629 face = 0;
630 }
631
632 /* 0 = front-face, 1 = back-face */
633 assert(face == 0 || face == 1);
634
635 /* choose front or back face function, operator, etc */
636 /* XXX we could do these initializations once per primitive */
637 func = softpipe->depth_stencil->stencil[face].func;
638 failOp = softpipe->depth_stencil->stencil[face].fail_op;
639 zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
640 zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
641 ref = softpipe->stencil_ref.ref_value[face];
642 wrtMask = softpipe->depth_stencil->stencil[face].writemask;
643 valMask = softpipe->depth_stencil->stencil[face].valuemask;
644
645 /* do the stencil test first */
646 {
647 unsigned passMask, failMask;
648 passMask = do_stencil_test(data, func, ref, valMask);
649 failMask = quad->inout.mask & ~passMask;
650 quad->inout.mask &= passMask;
651
652 if (failOp != PIPE_STENCIL_OP_KEEP) {
653 apply_stencil_op(data, failMask, failOp, ref, wrtMask);
654 }
655 }
656
657 if (quad->inout.mask) {
658 /* now the pixels that passed the stencil test are depth tested */
659 if (softpipe->depth_stencil->depth_enabled) {
660 const unsigned origMask = quad->inout.mask;
661
662 depth_test_quad(qs, data, quad); /* quad->mask is updated */
663
664 /* update stencil buffer values according to z pass/fail result */
665 if (zFailOp != PIPE_STENCIL_OP_KEEP) {
666 const unsigned zFailMask = origMask & ~quad->inout.mask;
667 apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask);
668 }
669
670 if (zPassOp != PIPE_STENCIL_OP_KEEP) {
671 const unsigned zPassMask = origMask & quad->inout.mask;
672 apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask);
673 }
674 }
675 else {
676 /* no depth test, apply Zpass operator to stencil buffer values */
677 apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);
678 }
679 }
680 }
681
682
683 #define ALPHATEST( FUNC, COMP ) \
684 static unsigned \
685 alpha_test_quads_##FUNC( struct quad_stage *qs, \
686 struct quad_header *quads[], \
687 unsigned nr ) \
688 { \
689 const float ref = qs->softpipe->depth_stencil->alpha_ref_value; \
690 const uint cbuf = 0; /* only output[0].alpha is tested */ \
691 unsigned pass_nr = 0; \
692 unsigned i; \
693 \
694 for (i = 0; i < nr; i++) { \
695 const float *aaaa = quads[i]->output.color[cbuf][3]; \
696 unsigned passMask = 0; \
697 \
698 if (aaaa[0] COMP ref) passMask |= (1 << 0); \
699 if (aaaa[1] COMP ref) passMask |= (1 << 1); \
700 if (aaaa[2] COMP ref) passMask |= (1 << 2); \
701 if (aaaa[3] COMP ref) passMask |= (1 << 3); \
702 \
703 quads[i]->inout.mask &= passMask; \
704 \
705 if (quads[i]->inout.mask) \
706 quads[pass_nr++] = quads[i]; \
707 } \
708 \
709 return pass_nr; \
710 }
711
712
713 ALPHATEST( LESS, < )
714 ALPHATEST( EQUAL, == )
715 ALPHATEST( LEQUAL, <= )
716 ALPHATEST( GREATER, > )
717 ALPHATEST( NOTEQUAL, != )
718 ALPHATEST( GEQUAL, >= )
719
720
721 /* XXX: Incorporate into shader using KILL_IF.
722 */
723 static unsigned
alpha_test_quads(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)724 alpha_test_quads(struct quad_stage *qs,
725 struct quad_header *quads[],
726 unsigned nr)
727 {
728 switch (qs->softpipe->depth_stencil->alpha_func) {
729 case PIPE_FUNC_LESS:
730 return alpha_test_quads_LESS( qs, quads, nr );
731 case PIPE_FUNC_EQUAL:
732 return alpha_test_quads_EQUAL( qs, quads, nr );
733 case PIPE_FUNC_LEQUAL:
734 return alpha_test_quads_LEQUAL( qs, quads, nr );
735 case PIPE_FUNC_GREATER:
736 return alpha_test_quads_GREATER( qs, quads, nr );
737 case PIPE_FUNC_NOTEQUAL:
738 return alpha_test_quads_NOTEQUAL( qs, quads, nr );
739 case PIPE_FUNC_GEQUAL:
740 return alpha_test_quads_GEQUAL( qs, quads, nr );
741 case PIPE_FUNC_ALWAYS:
742 return nr;
743 case PIPE_FUNC_NEVER:
744 default:
745 return 0;
746 }
747 }
748
749
750 /**
751 * EXT_depth_bounds_test has some careful language about precision:
752 *
753 * At what precision is the depth bounds test carried out?
754 *
755 * RESOLUTION: For the purposes of the test, the bounds are converted
756 * to fixed-point as though they were to be written to the depth buffer,
757 * and the comparison uses those quantized bounds.
758 *
759 * We choose the obvious interpretation that Z32F needs no such conversion.
760 */
761 static unsigned
depth_bounds_test_quads(struct quad_stage * qs,struct quad_header * quads[],unsigned nr,struct depth_data * data)762 depth_bounds_test_quads(struct quad_stage *qs,
763 struct quad_header *quads[],
764 unsigned nr,
765 struct depth_data *data)
766 {
767 struct pipe_depth_stencil_alpha_state *dsa = qs->softpipe->depth_stencil;
768 unsigned i = 0, pass_nr = 0;
769 enum pipe_format format = util_format_get_depth_only(data->format);
770 double min = dsa->depth_bounds_min;
771 double max = dsa->depth_bounds_max;
772
773 for (i = 0; i < nr; i++) {
774 unsigned j = 0, passMask = 0;
775
776 get_depth_stencil_values(data, quads[i]);
777
778 if (format == PIPE_FORMAT_Z32_FLOAT) {
779 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
780 double z = uif(data->bzzzz[j]);
781
782 if (z >= min && z <= max)
783 passMask |= (1 << j);
784 }
785 } else {
786 unsigned imin, imax;
787
788 if (format == PIPE_FORMAT_Z16_UNORM) {
789 imin = ((unsigned) (min * 65535.0)) & 0xffff;
790 imax = ((unsigned) (max * 65535.0)) & 0xffff;
791 } else if (format == PIPE_FORMAT_Z32_UNORM) {
792 imin = (unsigned) (min * 4294967295.0);
793 imax = (unsigned) (max * 4294967295.0);
794 } else if (format == PIPE_FORMAT_Z24X8_UNORM ||
795 format == PIPE_FORMAT_X8Z24_UNORM) {
796 imin = ((unsigned) (min * 16777215.0)) & 0xffffff;
797 imax = ((unsigned) (max * 16777215.0)) & 0xffffff;
798 } else {
799 unreachable("Unknown depth buffer format");
800 }
801
802 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
803 unsigned iz = data->bzzzz[j];
804
805 if (iz >= imin && iz <= imax)
806 passMask |= (1 << j);
807 }
808 }
809
810 quads[i]->inout.mask &= passMask;
811
812 if (quads[i]->inout.mask)
813 quads[pass_nr++] = quads[i];
814 }
815
816 return pass_nr;
817 }
818
819
820 static unsigned mask_count[16] =
821 {
822 0, /* 0x0 */
823 1, /* 0x1 */
824 1, /* 0x2 */
825 2, /* 0x3 */
826 1, /* 0x4 */
827 2, /* 0x5 */
828 2, /* 0x6 */
829 3, /* 0x7 */
830 1, /* 0x8 */
831 2, /* 0x9 */
832 2, /* 0xa */
833 3, /* 0xb */
834 2, /* 0xc */
835 3, /* 0xd */
836 3, /* 0xe */
837 4, /* 0xf */
838 };
839
840
841
842 /**
843 * General depth/stencil test function. Used when there's no fast-path.
844 */
845 static void
depth_test_quads_fallback(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)846 depth_test_quads_fallback(struct quad_stage *qs,
847 struct quad_header *quads[],
848 unsigned nr)
849 {
850 unsigned i, pass = 0;
851 const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
852 boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
853 boolean shader_stencil_ref = fsInfo->writes_stencil;
854 boolean have_zs = !!qs->softpipe->framebuffer.zsbuf;
855 struct depth_data data;
856 unsigned vp_idx = quads[0]->input.viewport_index;
857
858 data.use_shader_stencil_refs = FALSE;
859
860 if (have_zs && (qs->softpipe->depth_stencil->depth_enabled ||
861 qs->softpipe->depth_stencil->stencil[0].enabled ||
862 qs->softpipe->depth_stencil->depth_bounds_test)) {
863 float near_val, far_val;
864
865 data.ps = qs->softpipe->framebuffer.zsbuf;
866 data.format = data.ps->format;
867 data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache,
868 quads[0]->input.x0,
869 quads[0]->input.y0, quads[0]->input.layer);
870 data.clamp = !qs->softpipe->rasterizer->depth_clip_near;
871
872 near_val = qs->softpipe->viewports[vp_idx].translate[2] - qs->softpipe->viewports[vp_idx].scale[2];
873 far_val = near_val + (qs->softpipe->viewports[vp_idx].scale[2] * 2.0);
874 data.minval = MIN2(near_val, far_val);
875 data.maxval = MAX2(near_val, far_val);
876 }
877
878 /* EXT_depth_bounds_test says:
879 *
880 * Where should the depth bounds test take place in the OpenGL fragment
881 * processing pipeline?
882 *
883 * RESOLUTION: After scissor test, before alpha test. In practice,
884 * this is a logical placement of the test. An implementation is
885 * free to perform the test in a manner that is consistent with the
886 * specified ordering.
887 */
888
889 if (have_zs && qs->softpipe->depth_stencil->depth_bounds_test) {
890 nr = depth_bounds_test_quads(qs, quads, nr, &data);
891 }
892
893 if (qs->softpipe->depth_stencil->alpha_enabled) {
894 nr = alpha_test_quads(qs, quads, nr);
895 }
896
897 if (have_zs && (qs->softpipe->depth_stencil->depth_enabled ||
898 qs->softpipe->depth_stencil->stencil[0].enabled)) {
899 for (i = 0; i < nr; i++) {
900 get_depth_stencil_values(&data, quads[i]);
901
902 if (qs->softpipe->depth_stencil->depth_enabled) {
903 if (interp_depth)
904 interpolate_quad_depth(quads[i]);
905
906 convert_quad_depth(&data, quads[i]);
907 }
908
909 if (qs->softpipe->depth_stencil->stencil[0].enabled) {
910 if (shader_stencil_ref)
911 convert_quad_stencil(&data, quads[i]);
912
913 depth_stencil_test_quad(qs, &data, quads[i]);
914 write_depth_stencil_values(&data, quads[i]);
915 }
916 else {
917 if (!depth_test_quad(qs, &data, quads[i]))
918 continue;
919
920 if (qs->softpipe->depth_stencil->depth_writemask)
921 write_depth_stencil_values(&data, quads[i]);
922 }
923
924 quads[pass++] = quads[i];
925 }
926
927 nr = pass;
928 }
929
930 if (qs->softpipe->active_query_count) {
931 for (i = 0; i < nr; i++)
932 qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask];
933 }
934
935 if (nr)
936 qs->next->run(qs->next, quads, nr);
937 }
938
939
940 /**
941 * Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled.
942 */
943
944 #define NAME depth_interp_z16_less_write
945 #define OPERATOR <
946 #include "sp_quad_depth_test_tmp.h"
947
948 #define NAME depth_interp_z16_equal_write
949 #define OPERATOR ==
950 #include "sp_quad_depth_test_tmp.h"
951
952 #define NAME depth_interp_z16_lequal_write
953 #define OPERATOR <=
954 #include "sp_quad_depth_test_tmp.h"
955
956 #define NAME depth_interp_z16_greater_write
957 #define OPERATOR >
958 #include "sp_quad_depth_test_tmp.h"
959
960 #define NAME depth_interp_z16_notequal_write
961 #define OPERATOR !=
962 #include "sp_quad_depth_test_tmp.h"
963
964 #define NAME depth_interp_z16_gequal_write
965 #define OPERATOR >=
966 #include "sp_quad_depth_test_tmp.h"
967
968 #define NAME depth_interp_z16_always_write
969 #define ALWAYS 1
970 #include "sp_quad_depth_test_tmp.h"
971
972
973
974 static void
depth_noop(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)975 depth_noop(struct quad_stage *qs,
976 struct quad_header *quads[],
977 unsigned nr)
978 {
979 qs->next->run(qs->next, quads, nr);
980 }
981
982
983
984 static void
choose_depth_test(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)985 choose_depth_test(struct quad_stage *qs,
986 struct quad_header *quads[],
987 unsigned nr)
988 {
989 const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
990
991 boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
992
993 boolean alpha = qs->softpipe->depth_stencil->alpha_enabled;
994
995 boolean depth = qs->softpipe->depth_stencil->depth_enabled;
996
997 unsigned depthfunc = qs->softpipe->depth_stencil->depth_func;
998
999 boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled;
1000
1001 boolean depthwrite = qs->softpipe->depth_stencil->depth_writemask;
1002
1003 boolean occlusion = qs->softpipe->active_query_count;
1004
1005 boolean clipped = !qs->softpipe->rasterizer->depth_clip_near;
1006
1007 boolean depth_bounds = qs->softpipe->depth_stencil->depth_bounds_test;
1008
1009 if(!qs->softpipe->framebuffer.zsbuf)
1010 depth = depthwrite = stencil = FALSE;
1011
1012 /* default */
1013 qs->run = depth_test_quads_fallback;
1014
1015 /* look for special cases */
1016 if (!alpha &&
1017 !depth &&
1018 !occlusion &&
1019 !clipped &&
1020 !stencil &&
1021 !depth_bounds) {
1022 qs->run = depth_noop;
1023 }
1024 else if (!alpha &&
1025 interp_depth &&
1026 depth &&
1027 depthwrite &&
1028 !occlusion &&
1029 !clipped &&
1030 !stencil &&
1031 !depth_bounds)
1032 {
1033 if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
1034 switch (depthfunc) {
1035 case PIPE_FUNC_NEVER:
1036 qs->run = depth_test_quads_fallback;
1037 break;
1038 case PIPE_FUNC_LESS:
1039 qs->run = depth_interp_z16_less_write;
1040 break;
1041 case PIPE_FUNC_EQUAL:
1042 qs->run = depth_interp_z16_equal_write;
1043 break;
1044 case PIPE_FUNC_LEQUAL:
1045 qs->run = depth_interp_z16_lequal_write;
1046 break;
1047 case PIPE_FUNC_GREATER:
1048 qs->run = depth_interp_z16_greater_write;
1049 break;
1050 case PIPE_FUNC_NOTEQUAL:
1051 qs->run = depth_interp_z16_notequal_write;
1052 break;
1053 case PIPE_FUNC_GEQUAL:
1054 qs->run = depth_interp_z16_gequal_write;
1055 break;
1056 case PIPE_FUNC_ALWAYS:
1057 qs->run = depth_interp_z16_always_write;
1058 break;
1059 default:
1060 qs->run = depth_test_quads_fallback;
1061 break;
1062 }
1063 }
1064 }
1065
1066 /* next quad/fragment stage */
1067 qs->run( qs, quads, nr );
1068 }
1069
1070
1071
1072 static void
depth_test_begin(struct quad_stage * qs)1073 depth_test_begin(struct quad_stage *qs)
1074 {
1075 qs->run = choose_depth_test;
1076 qs->next->begin(qs->next);
1077 }
1078
1079
1080 static void
depth_test_destroy(struct quad_stage * qs)1081 depth_test_destroy(struct quad_stage *qs)
1082 {
1083 FREE( qs );
1084 }
1085
1086
1087 struct quad_stage *
sp_quad_depth_test_stage(struct softpipe_context * softpipe)1088 sp_quad_depth_test_stage(struct softpipe_context *softpipe)
1089 {
1090 struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
1091
1092 stage->softpipe = softpipe;
1093 stage->begin = depth_test_begin;
1094 stage->run = choose_depth_test;
1095 stage->destroy = depth_test_destroy;
1096
1097 return stage;
1098 }
1099