• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2007 VMware, Inc.
4  * Copyright 2010 VMware, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * \brief  Quad depth / stencil testing
31  */
32 
33 #include "pipe/p_defines.h"
34 #include "util/u_format.h"
35 #include "util/u_math.h"
36 #include "util/u_memory.h"
37 #include "tgsi/tgsi_scan.h"
38 #include "sp_context.h"
39 #include "sp_quad.h"
40 #include "sp_quad_pipe.h"
41 #include "sp_tile_cache.h"
42 #include "sp_state.h"           /* for sp_fragment_shader */
43 
44 
45 struct depth_data {
46    struct pipe_surface *ps;
47    enum pipe_format format;
48    unsigned bzzzz[TGSI_QUAD_SIZE];  /**< Z values fetched from depth buffer */
49    unsigned qzzzz[TGSI_QUAD_SIZE];  /**< Z values from the quad */
50    ubyte stencilVals[TGSI_QUAD_SIZE];
51    boolean use_shader_stencil_refs;
52    ubyte shader_stencil_refs[TGSI_QUAD_SIZE];
53    struct softpipe_cached_tile *tile;
54    float minval, maxval;
55    bool clamp;
56 };
57 
58 
59 
60 static void
get_depth_stencil_values(struct depth_data * data,const struct quad_header * quad)61 get_depth_stencil_values( struct depth_data *data,
62                           const struct quad_header *quad )
63 {
64    unsigned j;
65    const struct softpipe_cached_tile *tile = data->tile;
66 
67    switch (data->format) {
68    case PIPE_FORMAT_Z16_UNORM:
69       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
70          int x = quad->input.x0 % TILE_SIZE + (j & 1);
71          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
72          data->bzzzz[j] = tile->data.depth16[y][x];
73       }
74       break;
75    case PIPE_FORMAT_Z32_UNORM:
76       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
77          int x = quad->input.x0 % TILE_SIZE + (j & 1);
78          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
79          data->bzzzz[j] = tile->data.depth32[y][x];
80       }
81       break;
82    case PIPE_FORMAT_Z24X8_UNORM:
83    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
84       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
85          int x = quad->input.x0 % TILE_SIZE + (j & 1);
86          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
87          data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
88          data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
89       }
90       break;
91    case PIPE_FORMAT_X8Z24_UNORM:
92    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
93       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
94          int x = quad->input.x0 % TILE_SIZE + (j & 1);
95          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
96          data->bzzzz[j] = tile->data.depth32[y][x] >> 8;
97          data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
98       }
99       break;
100    case PIPE_FORMAT_S8_UINT:
101       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
102          int x = quad->input.x0 % TILE_SIZE + (j & 1);
103          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
104          data->bzzzz[j] = 0;
105          data->stencilVals[j] = tile->data.stencil8[y][x];
106       }
107       break;
108    case PIPE_FORMAT_Z32_FLOAT:
109       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
110          int x = quad->input.x0 % TILE_SIZE + (j & 1);
111          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
112          data->bzzzz[j] = tile->data.depth32[y][x];
113       }
114       break;
115    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
116       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
117          int x = quad->input.x0 % TILE_SIZE + (j & 1);
118          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
119          data->bzzzz[j] = tile->data.depth64[y][x] & 0xffffffff;
120          data->stencilVals[j] = (tile->data.depth64[y][x] >> 32) & 0xff;
121       }
122       break;
123    default:
124       assert(0);
125    }
126 }
127 
128 
129 /**
130  * If the shader has not been run, interpolate the depth values
131  * ourselves.
132  */
133 static void
interpolate_quad_depth(struct quad_header * quad)134 interpolate_quad_depth( struct quad_header *quad )
135 {
136    const float fx = (float) quad->input.x0;
137    const float fy = (float) quad->input.y0;
138    const float dzdx = quad->posCoef->dadx[2];
139    const float dzdy = quad->posCoef->dady[2];
140    const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
141 
142    quad->output.depth[0] = z0;
143    quad->output.depth[1] = z0 + dzdx;
144    quad->output.depth[2] = z0 + dzdy;
145    quad->output.depth[3] = z0 + dzdx + dzdy;
146 }
147 
148 
149 /**
150  * Compute the depth_data::qzzzz[] values from the float fragment Z values.
151  */
152 static void
convert_quad_depth(struct depth_data * data,const struct quad_header * quad)153 convert_quad_depth( struct depth_data *data,
154                     const struct quad_header *quad )
155 {
156    unsigned j;
157    float dvals[TGSI_QUAD_SIZE];
158 
159    /* Convert quad's float depth values to int depth values (qzzzz).
160     * If the Z buffer stores integer values, we _have_ to do the depth
161     * compares with integers (not floats).  Otherwise, the float->int->float
162     * conversion of Z values (which isn't an identity function) will cause
163     * Z-fighting errors.
164     */
165    if (data->clamp) {
166       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
167          dvals[j] = CLAMP(quad->output.depth[j], data->minval, data->maxval);
168       }
169    } else {
170       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
171          dvals[j] = quad->output.depth[j];
172       }
173    }
174 
175    switch (data->format) {
176    case PIPE_FORMAT_Z16_UNORM:
177       {
178          float scale = 65535.0;
179 
180          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
181             data->qzzzz[j] = (unsigned) (dvals[j] * scale);
182          }
183       }
184       break;
185    case PIPE_FORMAT_Z32_UNORM:
186       {
187          double scale = (double) (uint) ~0UL;
188 
189          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
190             data->qzzzz[j] = (unsigned) (dvals[j] * scale);
191          }
192       }
193       break;
194    case PIPE_FORMAT_Z24X8_UNORM:
195    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
196       {
197          float scale = (float) ((1 << 24) - 1);
198 
199          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
200             data->qzzzz[j] = (unsigned) (dvals[j] * scale);
201          }
202       }
203       break;
204    case PIPE_FORMAT_X8Z24_UNORM:
205    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
206       {
207          float scale = (float) ((1 << 24) - 1);
208 
209          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
210             data->qzzzz[j] = (unsigned) (dvals[j] * scale);
211          }
212       }
213       break;
214    case PIPE_FORMAT_Z32_FLOAT:
215    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
216       {
217          union fi fui;
218 
219          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
220             fui.f = dvals[j];
221             data->qzzzz[j] = fui.ui;
222          }
223       }
224       break;
225    default:
226       assert(0);
227    }
228 }
229 
230 
231 /**
232  * Compute the depth_data::shader_stencil_refs[] values from the float
233  * fragment stencil values.
234  */
235 static void
convert_quad_stencil(struct depth_data * data,const struct quad_header * quad)236 convert_quad_stencil( struct depth_data *data,
237                       const struct quad_header *quad )
238 {
239    unsigned j;
240 
241    data->use_shader_stencil_refs = TRUE;
242    /* Copy quads stencil values
243     */
244    switch (data->format) {
245    case PIPE_FORMAT_Z24X8_UNORM:
246    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
247    case PIPE_FORMAT_X8Z24_UNORM:
248    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
249    case PIPE_FORMAT_S8_UINT:
250    case PIPE_FORMAT_Z32_FLOAT:
251    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
252       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
253          data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));
254       }
255       break;
256    default:
257       assert(0);
258    }
259 }
260 
261 
262 /**
263  * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer.
264  */
265 static void
write_depth_stencil_values(struct depth_data * data,struct quad_header * quad)266 write_depth_stencil_values( struct depth_data *data,
267                             struct quad_header *quad )
268 {
269    struct softpipe_cached_tile *tile = data->tile;
270    unsigned j;
271 
272    /* put updated Z values back into cached tile */
273    switch (data->format) {
274    case PIPE_FORMAT_Z16_UNORM:
275       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
276          int x = quad->input.x0 % TILE_SIZE + (j & 1);
277          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
278          tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
279       }
280       break;
281    case PIPE_FORMAT_Z24X8_UNORM:
282    case PIPE_FORMAT_Z32_UNORM:
283       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
284          int x = quad->input.x0 % TILE_SIZE + (j & 1);
285          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
286          tile->data.depth32[y][x] = data->bzzzz[j];
287       }
288       break;
289    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
290       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
291          int x = quad->input.x0 % TILE_SIZE + (j & 1);
292          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
293          tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
294       }
295       break;
296    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
297       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
298          int x = quad->input.x0 % TILE_SIZE + (j & 1);
299          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
300          tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
301       }
302       break;
303    case PIPE_FORMAT_X8Z24_UNORM:
304       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
305          int x = quad->input.x0 % TILE_SIZE + (j & 1);
306          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
307          tile->data.depth32[y][x] = data->bzzzz[j] << 8;
308       }
309       break;
310    case PIPE_FORMAT_S8_UINT:
311       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
312          int x = quad->input.x0 % TILE_SIZE + (j & 1);
313          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
314          tile->data.stencil8[y][x] = data->stencilVals[j];
315       }
316       break;
317    case PIPE_FORMAT_Z32_FLOAT:
318       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
319          int x = quad->input.x0 % TILE_SIZE + (j & 1);
320          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
321          tile->data.depth32[y][x] = data->bzzzz[j];
322       }
323       break;
324    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
325       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
326          int x = quad->input.x0 % TILE_SIZE + (j & 1);
327          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
328          tile->data.depth64[y][x] = (uint64_t)data->bzzzz[j] | ((uint64_t)data->stencilVals[j] << 32);
329       }
330       break;
331    default:
332       assert(0);
333    }
334 }
335 
336 
337 
338 /** Only 8-bit stencil supported */
339 #define STENCIL_MAX 0xff
340 
341 
342 /**
343  * Do the basic stencil test (compare stencil buffer values against the
344  * reference value.
345  *
346  * \param data->stencilVals  the stencil values from the stencil buffer
347  * \param func  the stencil func (PIPE_FUNC_x)
348  * \param ref  the stencil reference value
349  * \param valMask  the stencil value mask indicating which bits of the stencil
350  *                 values and ref value are to be used.
351  * \return mask indicating which pixels passed the stencil test
352  */
353 static unsigned
do_stencil_test(struct depth_data * data,unsigned func,unsigned ref,unsigned valMask)354 do_stencil_test(struct depth_data *data,
355                 unsigned func,
356                 unsigned ref, unsigned valMask)
357 {
358    unsigned passMask = 0x0;
359    unsigned j;
360    ubyte refs[TGSI_QUAD_SIZE];
361 
362    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
363       if (data->use_shader_stencil_refs)
364          refs[j] = data->shader_stencil_refs[j] & valMask;
365       else
366          refs[j] = ref & valMask;
367    }
368 
369    switch (func) {
370    case PIPE_FUNC_NEVER:
371       /* passMask = 0x0 */
372       break;
373    case PIPE_FUNC_LESS:
374       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
375          if (refs[j] < (data->stencilVals[j] & valMask)) {
376             passMask |= (1 << j);
377          }
378       }
379       break;
380    case PIPE_FUNC_EQUAL:
381       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
382          if (refs[j] == (data->stencilVals[j] & valMask)) {
383             passMask |= (1 << j);
384          }
385       }
386       break;
387    case PIPE_FUNC_LEQUAL:
388       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
389          if (refs[j] <= (data->stencilVals[j] & valMask)) {
390             passMask |= (1 << j);
391          }
392       }
393       break;
394    case PIPE_FUNC_GREATER:
395       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
396          if (refs[j] > (data->stencilVals[j] & valMask)) {
397             passMask |= (1 << j);
398          }
399       }
400       break;
401    case PIPE_FUNC_NOTEQUAL:
402       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
403          if (refs[j] != (data->stencilVals[j] & valMask)) {
404             passMask |= (1 << j);
405          }
406       }
407       break;
408    case PIPE_FUNC_GEQUAL:
409       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
410          if (refs[j] >= (data->stencilVals[j] & valMask)) {
411             passMask |= (1 << j);
412          }
413       }
414       break;
415    case PIPE_FUNC_ALWAYS:
416       passMask = MASK_ALL;
417       break;
418    default:
419       assert(0);
420    }
421 
422    return passMask;
423 }
424 
425 
426 /**
427  * Apply the stencil operator to stencil values.
428  *
429  * \param data->stencilVals  the stencil buffer values (read and written)
430  * \param mask  indicates which pixels to update
431  * \param op  the stencil operator (PIPE_STENCIL_OP_x)
432  * \param ref  the stencil reference value
433  * \param wrtMask  writemask controlling which bits are changed in the
434  *                 stencil values
435  */
436 static void
apply_stencil_op(struct depth_data * data,unsigned mask,unsigned op,ubyte ref,ubyte wrtMask)437 apply_stencil_op(struct depth_data *data,
438                  unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
439 {
440    unsigned j;
441    ubyte newstencil[TGSI_QUAD_SIZE];
442    ubyte refs[TGSI_QUAD_SIZE];
443 
444    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
445       newstencil[j] = data->stencilVals[j];
446       if (data->use_shader_stencil_refs)
447          refs[j] = data->shader_stencil_refs[j];
448       else
449          refs[j] = ref;
450    }
451 
452    switch (op) {
453    case PIPE_STENCIL_OP_KEEP:
454       /* no-op */
455       break;
456    case PIPE_STENCIL_OP_ZERO:
457       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
458          if (mask & (1 << j)) {
459             newstencil[j] = 0;
460          }
461       }
462       break;
463    case PIPE_STENCIL_OP_REPLACE:
464       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
465          if (mask & (1 << j)) {
466             newstencil[j] = refs[j];
467          }
468       }
469       break;
470    case PIPE_STENCIL_OP_INCR:
471       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
472          if (mask & (1 << j)) {
473             if (data->stencilVals[j] < STENCIL_MAX) {
474                newstencil[j] = data->stencilVals[j] + 1;
475             }
476          }
477       }
478       break;
479    case PIPE_STENCIL_OP_DECR:
480       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
481          if (mask & (1 << j)) {
482             if (data->stencilVals[j] > 0) {
483                newstencil[j] = data->stencilVals[j] - 1;
484             }
485          }
486       }
487       break;
488    case PIPE_STENCIL_OP_INCR_WRAP:
489       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
490          if (mask & (1 << j)) {
491             newstencil[j] = data->stencilVals[j] + 1;
492          }
493       }
494       break;
495    case PIPE_STENCIL_OP_DECR_WRAP:
496       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
497          if (mask & (1 << j)) {
498             newstencil[j] = data->stencilVals[j] - 1;
499          }
500       }
501       break;
502    case PIPE_STENCIL_OP_INVERT:
503       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
504          if (mask & (1 << j)) {
505             newstencil[j] = ~data->stencilVals[j];
506          }
507       }
508       break;
509    default:
510       assert(0);
511    }
512 
513    /*
514     * update the stencil values
515     */
516    if (wrtMask != STENCIL_MAX) {
517       /* apply bit-wise stencil buffer writemask */
518       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
519          data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);
520       }
521    }
522    else {
523       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
524          data->stencilVals[j] = newstencil[j];
525       }
526    }
527 }
528 
529 
530 
531 /**
532  * To increase efficiency, we should probably have multiple versions
533  * of this function that are specifically for Z16, Z32 and FP Z buffers.
534  * Try to effectively do that with codegen...
535  */
536 static boolean
depth_test_quad(struct quad_stage * qs,struct depth_data * data,struct quad_header * quad)537 depth_test_quad(struct quad_stage *qs,
538                 struct depth_data *data,
539                 struct quad_header *quad)
540 {
541    struct softpipe_context *softpipe = qs->softpipe;
542    unsigned zmask = 0;
543    unsigned j;
544 
545    switch (softpipe->depth_stencil->depth.func) {
546    case PIPE_FUNC_NEVER:
547       /* zmask = 0 */
548       break;
549    case PIPE_FUNC_LESS:
550       /* Note this is pretty much a single sse or cell instruction.
551        * Like this:  quad->mask &= (quad->outputs.depth < zzzz);
552        */
553       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
554 	 if (data->qzzzz[j] < data->bzzzz[j])
555 	    zmask |= 1 << j;
556       }
557       break;
558    case PIPE_FUNC_EQUAL:
559       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
560 	 if (data->qzzzz[j] == data->bzzzz[j])
561 	    zmask |= 1 << j;
562       }
563       break;
564    case PIPE_FUNC_LEQUAL:
565       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
566 	 if (data->qzzzz[j] <= data->bzzzz[j])
567 	    zmask |= (1 << j);
568       }
569       break;
570    case PIPE_FUNC_GREATER:
571       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
572 	 if (data->qzzzz[j] > data->bzzzz[j])
573 	    zmask |= (1 << j);
574       }
575       break;
576    case PIPE_FUNC_NOTEQUAL:
577       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
578 	 if (data->qzzzz[j] != data->bzzzz[j])
579 	    zmask |= (1 << j);
580       }
581       break;
582    case PIPE_FUNC_GEQUAL:
583       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
584 	 if (data->qzzzz[j] >= data->bzzzz[j])
585 	    zmask |= (1 << j);
586       }
587       break;
588    case PIPE_FUNC_ALWAYS:
589       zmask = MASK_ALL;
590       break;
591    default:
592       assert(0);
593    }
594 
595    quad->inout.mask &= zmask;
596    if (quad->inout.mask == 0)
597       return FALSE;
598 
599    /* Update our internal copy only if writemask set.  Even if
600     * depth.writemask is FALSE, may still need to write out buffer
601     * data due to stencil changes.
602     */
603    if (softpipe->depth_stencil->depth.writemask) {
604       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
605          if (quad->inout.mask & (1 << j)) {
606             data->bzzzz[j] = data->qzzzz[j];
607          }
608       }
609    }
610 
611    return TRUE;
612 }
613 
614 
615 
616 /**
617  * Do stencil (and depth) testing.  Stenciling depends on the outcome of
618  * depth testing.
619  */
620 static void
depth_stencil_test_quad(struct quad_stage * qs,struct depth_data * data,struct quad_header * quad)621 depth_stencil_test_quad(struct quad_stage *qs,
622                         struct depth_data *data,
623                         struct quad_header *quad)
624 {
625    struct softpipe_context *softpipe = qs->softpipe;
626    unsigned func, zFailOp, zPassOp, failOp;
627    ubyte ref, wrtMask, valMask;
628    uint face = quad->input.facing;
629 
630    if (!softpipe->depth_stencil->stencil[1].enabled) {
631       /* single-sided stencil test, use front (face=0) state */
632       face = 0;
633    }
634 
635    /* 0 = front-face, 1 = back-face */
636    assert(face == 0 || face == 1);
637 
638    /* choose front or back face function, operator, etc */
639    /* XXX we could do these initializations once per primitive */
640    func    = softpipe->depth_stencil->stencil[face].func;
641    failOp  = softpipe->depth_stencil->stencil[face].fail_op;
642    zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
643    zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
644    ref     = softpipe->stencil_ref.ref_value[face];
645    wrtMask = softpipe->depth_stencil->stencil[face].writemask;
646    valMask = softpipe->depth_stencil->stencil[face].valuemask;
647 
648    /* do the stencil test first */
649    {
650       unsigned passMask, failMask;
651       passMask = do_stencil_test(data, func, ref, valMask);
652       failMask = quad->inout.mask & ~passMask;
653       quad->inout.mask &= passMask;
654 
655       if (failOp != PIPE_STENCIL_OP_KEEP) {
656          apply_stencil_op(data, failMask, failOp, ref, wrtMask);
657       }
658    }
659 
660    if (quad->inout.mask) {
661       /* now the pixels that passed the stencil test are depth tested */
662       if (softpipe->depth_stencil->depth.enabled) {
663          const unsigned origMask = quad->inout.mask;
664 
665          depth_test_quad(qs, data, quad);  /* quad->mask is updated */
666 
667          /* update stencil buffer values according to z pass/fail result */
668          if (zFailOp != PIPE_STENCIL_OP_KEEP) {
669             const unsigned zFailMask = origMask & ~quad->inout.mask;
670             apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask);
671          }
672 
673          if (zPassOp != PIPE_STENCIL_OP_KEEP) {
674             const unsigned zPassMask = origMask & quad->inout.mask;
675             apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask);
676          }
677       }
678       else {
679          /* no depth test, apply Zpass operator to stencil buffer values */
680          apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);
681       }
682    }
683 }
684 
685 
686 #define ALPHATEST( FUNC, COMP )                                         \
687    static unsigned                                                      \
688    alpha_test_quads_##FUNC( struct quad_stage *qs,                      \
689                            struct quad_header *quads[],                 \
690                            unsigned nr )                                \
691    {                                                                    \
692       const float ref = qs->softpipe->depth_stencil->alpha.ref_value;   \
693       const uint cbuf = 0; /* only output[0].alpha is tested */         \
694       unsigned pass_nr = 0;                                             \
695       unsigned i;                                                       \
696                                                                         \
697       for (i = 0; i < nr; i++) {                                        \
698          const float *aaaa = quads[i]->output.color[cbuf][3];           \
699          unsigned passMask = 0;                                         \
700                                                                         \
701          if (aaaa[0] COMP ref) passMask |= (1 << 0);                    \
702          if (aaaa[1] COMP ref) passMask |= (1 << 1);                    \
703          if (aaaa[2] COMP ref) passMask |= (1 << 2);                    \
704          if (aaaa[3] COMP ref) passMask |= (1 << 3);                    \
705                                                                         \
706          quads[i]->inout.mask &= passMask;                              \
707                                                                         \
708          if (quads[i]->inout.mask)                                      \
709             quads[pass_nr++] = quads[i];                                \
710       }                                                                 \
711                                                                         \
712       return pass_nr;                                                   \
713    }
714 
715 
716 ALPHATEST( LESS,     < )
717 ALPHATEST( EQUAL,    == )
718 ALPHATEST( LEQUAL,   <= )
719 ALPHATEST( GREATER,  > )
720 ALPHATEST( NOTEQUAL, != )
721 ALPHATEST( GEQUAL,   >= )
722 
723 
724 /* XXX: Incorporate into shader using KILL_IF.
725  */
726 static unsigned
alpha_test_quads(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)727 alpha_test_quads(struct quad_stage *qs,
728                  struct quad_header *quads[],
729                  unsigned nr)
730 {
731    switch (qs->softpipe->depth_stencil->alpha.func) {
732    case PIPE_FUNC_LESS:
733       return alpha_test_quads_LESS( qs, quads, nr );
734    case PIPE_FUNC_EQUAL:
735       return alpha_test_quads_EQUAL( qs, quads, nr );
736    case PIPE_FUNC_LEQUAL:
737       return alpha_test_quads_LEQUAL( qs, quads, nr );
738    case PIPE_FUNC_GREATER:
739       return alpha_test_quads_GREATER( qs, quads, nr );
740    case PIPE_FUNC_NOTEQUAL:
741       return alpha_test_quads_NOTEQUAL( qs, quads, nr );
742    case PIPE_FUNC_GEQUAL:
743       return alpha_test_quads_GEQUAL( qs, quads, nr );
744    case PIPE_FUNC_ALWAYS:
745       return nr;
746    case PIPE_FUNC_NEVER:
747    default:
748       return 0;
749    }
750 }
751 
752 
753 static unsigned mask_count[16] =
754 {
755    0,                           /* 0x0 */
756    1,                           /* 0x1 */
757    1,                           /* 0x2 */
758    2,                           /* 0x3 */
759    1,                           /* 0x4 */
760    2,                           /* 0x5 */
761    2,                           /* 0x6 */
762    3,                           /* 0x7 */
763    1,                           /* 0x8 */
764    2,                           /* 0x9 */
765    2,                           /* 0xa */
766    3,                           /* 0xb */
767    2,                           /* 0xc */
768    3,                           /* 0xd */
769    3,                           /* 0xe */
770    4,                           /* 0xf */
771 };
772 
773 
774 
775 /**
776  * General depth/stencil test function.  Used when there's no fast-path.
777  */
778 static void
depth_test_quads_fallback(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)779 depth_test_quads_fallback(struct quad_stage *qs,
780                           struct quad_header *quads[],
781                           unsigned nr)
782 {
783    unsigned i, pass = 0;
784    const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
785    boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
786    boolean shader_stencil_ref = fsInfo->writes_stencil;
787    struct depth_data data;
788    unsigned vp_idx = quads[0]->input.viewport_index;
789 
790    data.use_shader_stencil_refs = FALSE;
791 
792    if (qs->softpipe->depth_stencil->alpha.enabled) {
793       nr = alpha_test_quads(qs, quads, nr);
794    }
795 
796    if (qs->softpipe->framebuffer.zsbuf &&
797          (qs->softpipe->depth_stencil->depth.enabled ||
798           qs->softpipe->depth_stencil->stencil[0].enabled)) {
799       float near_val, far_val;
800 
801       data.ps = qs->softpipe->framebuffer.zsbuf;
802       data.format = data.ps->format;
803       data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache,
804                                      quads[0]->input.x0,
805                                      quads[0]->input.y0, quads[0]->input.layer);
806       data.clamp = !qs->softpipe->rasterizer->depth_clip;
807 
808       near_val = qs->softpipe->viewports[vp_idx].translate[2] - qs->softpipe->viewports[vp_idx].scale[2];
809       far_val = near_val + (qs->softpipe->viewports[vp_idx].scale[2] * 2.0);
810       data.minval = MIN2(near_val, far_val);
811       data.maxval = MAX2(near_val, far_val);
812 
813       for (i = 0; i < nr; i++) {
814          get_depth_stencil_values(&data, quads[i]);
815 
816          if (qs->softpipe->depth_stencil->depth.enabled) {
817             if (interp_depth)
818                interpolate_quad_depth(quads[i]);
819 
820             convert_quad_depth(&data, quads[i]);
821          }
822 
823          if (qs->softpipe->depth_stencil->stencil[0].enabled) {
824             if (shader_stencil_ref)
825                convert_quad_stencil(&data, quads[i]);
826 
827             depth_stencil_test_quad(qs, &data, quads[i]);
828             write_depth_stencil_values(&data, quads[i]);
829          }
830          else {
831             if (!depth_test_quad(qs, &data, quads[i]))
832                continue;
833 
834             if (qs->softpipe->depth_stencil->depth.writemask)
835                write_depth_stencil_values(&data, quads[i]);
836          }
837 
838          quads[pass++] = quads[i];
839       }
840 
841       nr = pass;
842    }
843 
844    if (qs->softpipe->active_query_count) {
845       for (i = 0; i < nr; i++)
846          qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask];
847    }
848 
849    if (nr)
850       qs->next->run(qs->next, quads, nr);
851 }
852 
853 
854 /**
855  * Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled.
856  */
857 
858 #define NAME depth_interp_z16_less_write
859 #define OPERATOR <
860 #include "sp_quad_depth_test_tmp.h"
861 
862 #define NAME depth_interp_z16_equal_write
863 #define OPERATOR ==
864 #include "sp_quad_depth_test_tmp.h"
865 
866 #define NAME depth_interp_z16_lequal_write
867 #define OPERATOR <=
868 #include "sp_quad_depth_test_tmp.h"
869 
870 #define NAME depth_interp_z16_greater_write
871 #define OPERATOR >
872 #include "sp_quad_depth_test_tmp.h"
873 
874 #define NAME depth_interp_z16_notequal_write
875 #define OPERATOR !=
876 #include "sp_quad_depth_test_tmp.h"
877 
878 #define NAME depth_interp_z16_gequal_write
879 #define OPERATOR >=
880 #include "sp_quad_depth_test_tmp.h"
881 
882 #define NAME depth_interp_z16_always_write
883 #define ALWAYS 1
884 #include "sp_quad_depth_test_tmp.h"
885 
886 
887 
888 static void
depth_noop(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)889 depth_noop(struct quad_stage *qs,
890            struct quad_header *quads[],
891            unsigned nr)
892 {
893    qs->next->run(qs->next, quads, nr);
894 }
895 
896 
897 
898 static void
choose_depth_test(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)899 choose_depth_test(struct quad_stage *qs,
900                   struct quad_header *quads[],
901                   unsigned nr)
902 {
903    const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
904 
905    boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
906 
907    boolean alpha = qs->softpipe->depth_stencil->alpha.enabled;
908 
909    boolean depth = qs->softpipe->depth_stencil->depth.enabled;
910 
911    unsigned depthfunc = qs->softpipe->depth_stencil->depth.func;
912 
913    boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled;
914 
915    boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask;
916 
917    boolean occlusion = qs->softpipe->active_query_count;
918 
919    boolean clipped = !qs->softpipe->rasterizer->depth_clip;
920 
921    if(!qs->softpipe->framebuffer.zsbuf)
922       depth = depthwrite = stencil = FALSE;
923 
924    /* default */
925    qs->run = depth_test_quads_fallback;
926 
927    /* look for special cases */
928    if (!alpha &&
929        !depth &&
930        !occlusion &&
931        !clipped &&
932        !stencil) {
933       qs->run = depth_noop;
934    }
935    else if (!alpha &&
936             interp_depth &&
937             depth &&
938             depthwrite &&
939             !occlusion &&
940             !clipped &&
941             !stencil)
942    {
943       if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
944          switch (depthfunc) {
945          case PIPE_FUNC_NEVER:
946             qs->run = depth_test_quads_fallback;
947             break;
948          case PIPE_FUNC_LESS:
949             qs->run = depth_interp_z16_less_write;
950             break;
951          case PIPE_FUNC_EQUAL:
952             qs->run = depth_interp_z16_equal_write;
953             break;
954          case PIPE_FUNC_LEQUAL:
955             qs->run = depth_interp_z16_lequal_write;
956             break;
957          case PIPE_FUNC_GREATER:
958             qs->run = depth_interp_z16_greater_write;
959             break;
960          case PIPE_FUNC_NOTEQUAL:
961             qs->run = depth_interp_z16_notequal_write;
962             break;
963          case PIPE_FUNC_GEQUAL:
964             qs->run = depth_interp_z16_gequal_write;
965             break;
966          case PIPE_FUNC_ALWAYS:
967             qs->run = depth_interp_z16_always_write;
968             break;
969          default:
970             qs->run = depth_test_quads_fallback;
971             break;
972          }
973       }
974    }
975 
976    /* next quad/fragment stage */
977    qs->run( qs, quads, nr );
978 }
979 
980 
981 
982 static void
depth_test_begin(struct quad_stage * qs)983 depth_test_begin(struct quad_stage *qs)
984 {
985    qs->run = choose_depth_test;
986    qs->next->begin(qs->next);
987 }
988 
989 
990 static void
depth_test_destroy(struct quad_stage * qs)991 depth_test_destroy(struct quad_stage *qs)
992 {
993    FREE( qs );
994 }
995 
996 
997 struct quad_stage *
sp_quad_depth_test_stage(struct softpipe_context * softpipe)998 sp_quad_depth_test_stage(struct softpipe_context *softpipe)
999 {
1000    struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
1001 
1002    stage->softpipe = softpipe;
1003    stage->begin = depth_test_begin;
1004    stage->run = choose_depth_test;
1005    stage->destroy = depth_test_destroy;
1006 
1007    return stage;
1008 }
1009