• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2007 VMware, Inc.
4  * Copyright 2010 VMware, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * \brief  Quad depth / stencil testing
31  */
32 
33 #include "pipe/p_defines.h"
34 #include "util/format/u_format.h"
35 #include "util/u_math.h"
36 #include "util/u_memory.h"
37 #include "tgsi/tgsi_scan.h"
38 #include "sp_context.h"
39 #include "sp_quad.h"
40 #include "sp_quad_pipe.h"
41 #include "sp_tile_cache.h"
42 #include "sp_state.h"           /* for sp_fragment_shader */
43 
44 
45 struct depth_data {
46    struct pipe_surface *ps;
47    enum pipe_format format;
48    unsigned bzzzz[TGSI_QUAD_SIZE];  /**< Z values fetched from depth buffer */
49    unsigned qzzzz[TGSI_QUAD_SIZE];  /**< Z values from the quad */
50    ubyte stencilVals[TGSI_QUAD_SIZE];
51    boolean use_shader_stencil_refs;
52    ubyte shader_stencil_refs[TGSI_QUAD_SIZE];
53    struct softpipe_cached_tile *tile;
54    float minval, maxval;
55    bool clamp;
56 };
57 
58 
59 
60 static void
get_depth_stencil_values(struct depth_data * data,const struct quad_header * quad)61 get_depth_stencil_values( struct depth_data *data,
62                           const struct quad_header *quad )
63 {
64    unsigned j;
65    const struct softpipe_cached_tile *tile = data->tile;
66 
67    switch (data->format) {
68    case PIPE_FORMAT_Z16_UNORM:
69       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
70          int x = quad->input.x0 % TILE_SIZE + (j & 1);
71          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
72          data->bzzzz[j] = tile->data.depth16[y][x];
73       }
74       break;
75    case PIPE_FORMAT_Z32_UNORM:
76       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
77          int x = quad->input.x0 % TILE_SIZE + (j & 1);
78          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
79          data->bzzzz[j] = tile->data.depth32[y][x];
80       }
81       break;
82    case PIPE_FORMAT_Z24X8_UNORM:
83    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
84       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
85          int x = quad->input.x0 % TILE_SIZE + (j & 1);
86          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
87          data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
88          data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
89       }
90       break;
91    case PIPE_FORMAT_X8Z24_UNORM:
92    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
93       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
94          int x = quad->input.x0 % TILE_SIZE + (j & 1);
95          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
96          data->bzzzz[j] = tile->data.depth32[y][x] >> 8;
97          data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
98       }
99       break;
100    case PIPE_FORMAT_S8_UINT:
101       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
102          int x = quad->input.x0 % TILE_SIZE + (j & 1);
103          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
104          data->bzzzz[j] = 0;
105          data->stencilVals[j] = tile->data.stencil8[y][x];
106       }
107       break;
108    case PIPE_FORMAT_Z32_FLOAT:
109       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
110          int x = quad->input.x0 % TILE_SIZE + (j & 1);
111          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
112          data->bzzzz[j] = tile->data.depth32[y][x];
113       }
114       break;
115    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
116       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
117          int x = quad->input.x0 % TILE_SIZE + (j & 1);
118          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
119          data->bzzzz[j] = tile->data.depth64[y][x] & 0xffffffff;
120          data->stencilVals[j] = (tile->data.depth64[y][x] >> 32) & 0xff;
121       }
122       break;
123    default:
124       assert(0);
125    }
126 }
127 
128 
129 /**
130  * If the shader has not been run, interpolate the depth values
131  * ourselves.
132  */
133 static void
interpolate_quad_depth(struct quad_header * quad)134 interpolate_quad_depth( struct quad_header *quad )
135 {
136    const float fx = (float) quad->input.x0;
137    const float fy = (float) quad->input.y0;
138    const float dzdx = quad->posCoef->dadx[2];
139    const float dzdy = quad->posCoef->dady[2];
140    const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
141 
142    quad->output.depth[0] = z0;
143    quad->output.depth[1] = z0 + dzdx;
144    quad->output.depth[2] = z0 + dzdy;
145    quad->output.depth[3] = z0 + dzdx + dzdy;
146 }
147 
148 
149 /**
150  * Compute the depth_data::qzzzz[] values from the float fragment Z values.
151  */
152 static void
convert_quad_depth(struct depth_data * data,const struct quad_header * quad)153 convert_quad_depth( struct depth_data *data,
154                     const struct quad_header *quad )
155 {
156    unsigned j;
157    float dvals[TGSI_QUAD_SIZE];
158 
159    /* Convert quad's float depth values to int depth values (qzzzz).
160     * If the Z buffer stores integer values, we _have_ to do the depth
161     * compares with integers (not floats).  Otherwise, the float->int->float
162     * conversion of Z values (which isn't an identity function) will cause
163     * Z-fighting errors.
164     */
165    if (data->clamp) {
166       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
167          dvals[j] = CLAMP(quad->output.depth[j], data->minval, data->maxval);
168       }
169    } else {
170       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
171          dvals[j] = quad->output.depth[j];
172       }
173    }
174 
175    switch (data->format) {
176    case PIPE_FORMAT_Z16_UNORM:
177       {
178          float scale = 65535.0;
179 
180          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
181             data->qzzzz[j] = (unsigned) (dvals[j] * scale);
182          }
183       }
184       break;
185    case PIPE_FORMAT_Z32_UNORM:
186       {
187          double scale = (double) (uint) ~0UL;
188 
189          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
190             data->qzzzz[j] = (unsigned) (dvals[j] * scale);
191          }
192       }
193       break;
194    case PIPE_FORMAT_Z24X8_UNORM:
195    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
196       {
197          float scale = (float) ((1 << 24) - 1);
198 
199          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
200             data->qzzzz[j] = (unsigned) (dvals[j] * scale);
201          }
202       }
203       break;
204    case PIPE_FORMAT_X8Z24_UNORM:
205    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
206       {
207          float scale = (float) ((1 << 24) - 1);
208 
209          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
210             data->qzzzz[j] = (unsigned) (dvals[j] * scale);
211          }
212       }
213       break;
214    case PIPE_FORMAT_Z32_FLOAT:
215    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
216       {
217          union fi fui;
218 
219          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
220             fui.f = dvals[j];
221             data->qzzzz[j] = fui.ui;
222          }
223       }
224       break;
225    default:
226       assert(0);
227    }
228 }
229 
230 
231 /**
232  * Compute the depth_data::shader_stencil_refs[] values from the float
233  * fragment stencil values.
234  */
235 static void
convert_quad_stencil(struct depth_data * data,const struct quad_header * quad)236 convert_quad_stencil( struct depth_data *data,
237                       const struct quad_header *quad )
238 {
239    unsigned j;
240 
241    data->use_shader_stencil_refs = TRUE;
242    /* Copy quads stencil values
243     */
244    switch (data->format) {
245    case PIPE_FORMAT_Z24X8_UNORM:
246    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
247    case PIPE_FORMAT_X8Z24_UNORM:
248    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
249    case PIPE_FORMAT_S8_UINT:
250    case PIPE_FORMAT_Z32_FLOAT:
251    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
252       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
253          data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));
254       }
255       break;
256    default:
257       assert(0);
258    }
259 }
260 
261 
262 /**
263  * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer.
264  */
265 static void
write_depth_stencil_values(struct depth_data * data,struct quad_header * quad)266 write_depth_stencil_values( struct depth_data *data,
267                             struct quad_header *quad )
268 {
269    struct softpipe_cached_tile *tile = data->tile;
270    unsigned j;
271 
272    /* put updated Z values back into cached tile */
273    switch (data->format) {
274    case PIPE_FORMAT_Z16_UNORM:
275       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
276          int x = quad->input.x0 % TILE_SIZE + (j & 1);
277          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
278          tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
279       }
280       break;
281    case PIPE_FORMAT_Z24X8_UNORM:
282    case PIPE_FORMAT_Z32_UNORM:
283       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
284          int x = quad->input.x0 % TILE_SIZE + (j & 1);
285          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
286          tile->data.depth32[y][x] = data->bzzzz[j];
287       }
288       break;
289    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
290       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
291          int x = quad->input.x0 % TILE_SIZE + (j & 1);
292          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
293          tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
294       }
295       break;
296    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
297       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
298          int x = quad->input.x0 % TILE_SIZE + (j & 1);
299          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
300          tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
301       }
302       break;
303    case PIPE_FORMAT_X8Z24_UNORM:
304       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
305          int x = quad->input.x0 % TILE_SIZE + (j & 1);
306          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
307          tile->data.depth32[y][x] = data->bzzzz[j] << 8;
308       }
309       break;
310    case PIPE_FORMAT_S8_UINT:
311       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
312          int x = quad->input.x0 % TILE_SIZE + (j & 1);
313          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
314          tile->data.stencil8[y][x] = data->stencilVals[j];
315       }
316       break;
317    case PIPE_FORMAT_Z32_FLOAT:
318       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
319          int x = quad->input.x0 % TILE_SIZE + (j & 1);
320          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
321          tile->data.depth32[y][x] = data->bzzzz[j];
322       }
323       break;
324    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
325       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
326          int x = quad->input.x0 % TILE_SIZE + (j & 1);
327          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
328          tile->data.depth64[y][x] = (uint64_t)data->bzzzz[j] | ((uint64_t)data->stencilVals[j] << 32);
329       }
330       break;
331    default:
332       assert(0);
333    }
334 }
335 
336 
337 
338 /** Only 8-bit stencil supported */
339 #define STENCIL_MAX 0xff
340 
341 
342 /**
343  * Do the basic stencil test (compare stencil buffer values against the
344  * reference value.
345  *
346  * \param data->stencilVals  the stencil values from the stencil buffer
347  * \param func  the stencil func (PIPE_FUNC_x)
348  * \param ref  the stencil reference value
349  * \param valMask  the stencil value mask indicating which bits of the stencil
350  *                 values and ref value are to be used.
351  * \return mask indicating which pixels passed the stencil test
352  */
353 static unsigned
do_stencil_test(struct depth_data * data,unsigned func,unsigned ref,unsigned valMask)354 do_stencil_test(struct depth_data *data,
355                 unsigned func,
356                 unsigned ref, unsigned valMask)
357 {
358    unsigned passMask = 0x0;
359    unsigned j;
360    ubyte refs[TGSI_QUAD_SIZE];
361 
362    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
363       if (data->use_shader_stencil_refs)
364          refs[j] = data->shader_stencil_refs[j] & valMask;
365       else
366          refs[j] = ref & valMask;
367    }
368 
369    switch (func) {
370    case PIPE_FUNC_NEVER:
371       /* passMask = 0x0 */
372       break;
373    case PIPE_FUNC_LESS:
374       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
375          if (refs[j] < (data->stencilVals[j] & valMask)) {
376             passMask |= (1 << j);
377          }
378       }
379       break;
380    case PIPE_FUNC_EQUAL:
381       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
382          if (refs[j] == (data->stencilVals[j] & valMask)) {
383             passMask |= (1 << j);
384          }
385       }
386       break;
387    case PIPE_FUNC_LEQUAL:
388       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
389          if (refs[j] <= (data->stencilVals[j] & valMask)) {
390             passMask |= (1 << j);
391          }
392       }
393       break;
394    case PIPE_FUNC_GREATER:
395       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
396          if (refs[j] > (data->stencilVals[j] & valMask)) {
397             passMask |= (1 << j);
398          }
399       }
400       break;
401    case PIPE_FUNC_NOTEQUAL:
402       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
403          if (refs[j] != (data->stencilVals[j] & valMask)) {
404             passMask |= (1 << j);
405          }
406       }
407       break;
408    case PIPE_FUNC_GEQUAL:
409       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
410          if (refs[j] >= (data->stencilVals[j] & valMask)) {
411             passMask |= (1 << j);
412          }
413       }
414       break;
415    case PIPE_FUNC_ALWAYS:
416       passMask = MASK_ALL;
417       break;
418    default:
419       assert(0);
420    }
421 
422    return passMask;
423 }
424 
425 
426 /**
427  * Apply the stencil operator to stencil values.
428  *
429  * \param data->stencilVals  the stencil buffer values (read and written)
430  * \param mask  indicates which pixels to update
431  * \param op  the stencil operator (PIPE_STENCIL_OP_x)
432  * \param ref  the stencil reference value
433  * \param wrtMask  writemask controlling which bits are changed in the
434  *                 stencil values
435  */
436 static void
apply_stencil_op(struct depth_data * data,unsigned mask,unsigned op,ubyte ref,ubyte wrtMask)437 apply_stencil_op(struct depth_data *data,
438                  unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
439 {
440    unsigned j;
441    ubyte newstencil[TGSI_QUAD_SIZE];
442    ubyte refs[TGSI_QUAD_SIZE];
443 
444    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
445       newstencil[j] = data->stencilVals[j];
446       if (data->use_shader_stencil_refs)
447          refs[j] = data->shader_stencil_refs[j];
448       else
449          refs[j] = ref;
450    }
451 
452    switch (op) {
453    case PIPE_STENCIL_OP_KEEP:
454       /* no-op */
455       break;
456    case PIPE_STENCIL_OP_ZERO:
457       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
458          if (mask & (1 << j)) {
459             newstencil[j] = 0;
460          }
461       }
462       break;
463    case PIPE_STENCIL_OP_REPLACE:
464       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
465          if (mask & (1 << j)) {
466             newstencil[j] = refs[j];
467          }
468       }
469       break;
470    case PIPE_STENCIL_OP_INCR:
471       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
472          if (mask & (1 << j)) {
473             if (data->stencilVals[j] < STENCIL_MAX) {
474                newstencil[j] = data->stencilVals[j] + 1;
475             }
476          }
477       }
478       break;
479    case PIPE_STENCIL_OP_DECR:
480       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
481          if (mask & (1 << j)) {
482             if (data->stencilVals[j] > 0) {
483                newstencil[j] = data->stencilVals[j] - 1;
484             }
485          }
486       }
487       break;
488    case PIPE_STENCIL_OP_INCR_WRAP:
489       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
490          if (mask & (1 << j)) {
491             newstencil[j] = data->stencilVals[j] + 1;
492          }
493       }
494       break;
495    case PIPE_STENCIL_OP_DECR_WRAP:
496       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
497          if (mask & (1 << j)) {
498             newstencil[j] = data->stencilVals[j] - 1;
499          }
500       }
501       break;
502    case PIPE_STENCIL_OP_INVERT:
503       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
504          if (mask & (1 << j)) {
505             newstencil[j] = ~data->stencilVals[j];
506          }
507       }
508       break;
509    default:
510       assert(0);
511    }
512 
513    /*
514     * update the stencil values
515     */
516    if (wrtMask != STENCIL_MAX) {
517       /* apply bit-wise stencil buffer writemask */
518       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
519          data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);
520       }
521    }
522    else {
523       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
524          data->stencilVals[j] = newstencil[j];
525       }
526    }
527 }
528 
529 
530 
531 /**
532  * To increase efficiency, we should probably have multiple versions
533  * of this function that are specifically for Z16, Z32 and FP Z buffers.
534  * Try to effectively do that with codegen...
535  */
536 static boolean
depth_test_quad(struct quad_stage * qs,struct depth_data * data,struct quad_header * quad)537 depth_test_quad(struct quad_stage *qs,
538                 struct depth_data *data,
539                 struct quad_header *quad)
540 {
541    struct softpipe_context *softpipe = qs->softpipe;
542    unsigned zmask = 0;
543    unsigned j;
544 
545 #define DEPTHTEST(l, op, r) do { \
546       if (data->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || \
547           data->format == PIPE_FORMAT_Z32_FLOAT) { \
548          for (j = 0; j < TGSI_QUAD_SIZE; j++) { \
549             if (((float *)l)[j] op ((float *)r)[j]) \
550                zmask |= (1 << j); \
551          } \
552       } else { \
553          for (j = 0; j < TGSI_QUAD_SIZE; j++) { \
554             if (l[j] op r[j]) \
555                zmask |= (1 << j); \
556          } \
557       } \
558    } while (0)
559 
560    switch (softpipe->depth_stencil->depth_func) {
561    case PIPE_FUNC_NEVER:
562       /* zmask = 0 */
563       break;
564    case PIPE_FUNC_LESS:
565       /* Note this is pretty much a single sse or cell instruction.
566        * Like this:  quad->mask &= (quad->outputs.depth < zzzz);
567        */
568       DEPTHTEST(data->qzzzz,  <, data->bzzzz);
569       break;
570    case PIPE_FUNC_EQUAL:
571       DEPTHTEST(data->qzzzz, ==, data->bzzzz);
572       break;
573    case PIPE_FUNC_LEQUAL:
574       DEPTHTEST(data->qzzzz, <=, data->bzzzz);
575       break;
576    case PIPE_FUNC_GREATER:
577       DEPTHTEST(data->qzzzz,  >, data->bzzzz);
578       break;
579    case PIPE_FUNC_NOTEQUAL:
580       DEPTHTEST(data->qzzzz, !=, data->bzzzz);
581       break;
582    case PIPE_FUNC_GEQUAL:
583       DEPTHTEST(data->qzzzz, >=, data->bzzzz);
584       break;
585    case PIPE_FUNC_ALWAYS:
586       zmask = MASK_ALL;
587       break;
588    default:
589       assert(0);
590    }
591 
592    quad->inout.mask &= zmask;
593    if (quad->inout.mask == 0)
594       return FALSE;
595 
596    /* Update our internal copy only if writemask set.  Even if
597     * depth.writemask is FALSE, may still need to write out buffer
598     * data due to stencil changes.
599     */
600    if (softpipe->depth_stencil->depth_writemask) {
601       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
602          if (quad->inout.mask & (1 << j)) {
603             data->bzzzz[j] = data->qzzzz[j];
604          }
605       }
606    }
607 
608    return TRUE;
609 }
610 
611 
612 
613 /**
614  * Do stencil (and depth) testing.  Stenciling depends on the outcome of
615  * depth testing.
616  */
617 static void
depth_stencil_test_quad(struct quad_stage * qs,struct depth_data * data,struct quad_header * quad)618 depth_stencil_test_quad(struct quad_stage *qs,
619                         struct depth_data *data,
620                         struct quad_header *quad)
621 {
622    struct softpipe_context *softpipe = qs->softpipe;
623    unsigned func, zFailOp, zPassOp, failOp;
624    ubyte ref, wrtMask, valMask;
625    uint face = quad->input.facing;
626 
627    if (!softpipe->depth_stencil->stencil[1].enabled) {
628       /* single-sided stencil test, use front (face=0) state */
629       face = 0;
630    }
631 
632    /* 0 = front-face, 1 = back-face */
633    assert(face == 0 || face == 1);
634 
635    /* choose front or back face function, operator, etc */
636    /* XXX we could do these initializations once per primitive */
637    func    = softpipe->depth_stencil->stencil[face].func;
638    failOp  = softpipe->depth_stencil->stencil[face].fail_op;
639    zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
640    zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
641    ref     = softpipe->stencil_ref.ref_value[face];
642    wrtMask = softpipe->depth_stencil->stencil[face].writemask;
643    valMask = softpipe->depth_stencil->stencil[face].valuemask;
644 
645    /* do the stencil test first */
646    {
647       unsigned passMask, failMask;
648       passMask = do_stencil_test(data, func, ref, valMask);
649       failMask = quad->inout.mask & ~passMask;
650       quad->inout.mask &= passMask;
651 
652       if (failOp != PIPE_STENCIL_OP_KEEP) {
653          apply_stencil_op(data, failMask, failOp, ref, wrtMask);
654       }
655    }
656 
657    if (quad->inout.mask) {
658       /* now the pixels that passed the stencil test are depth tested */
659       if (softpipe->depth_stencil->depth_enabled) {
660          const unsigned origMask = quad->inout.mask;
661 
662          depth_test_quad(qs, data, quad);  /* quad->mask is updated */
663 
664          /* update stencil buffer values according to z pass/fail result */
665          if (zFailOp != PIPE_STENCIL_OP_KEEP) {
666             const unsigned zFailMask = origMask & ~quad->inout.mask;
667             apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask);
668          }
669 
670          if (zPassOp != PIPE_STENCIL_OP_KEEP) {
671             const unsigned zPassMask = origMask & quad->inout.mask;
672             apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask);
673          }
674       }
675       else {
676          /* no depth test, apply Zpass operator to stencil buffer values */
677          apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);
678       }
679    }
680 }
681 
682 
683 #define ALPHATEST( FUNC, COMP )                                         \
684    static unsigned                                                      \
685    alpha_test_quads_##FUNC( struct quad_stage *qs,                      \
686                            struct quad_header *quads[],                 \
687                            unsigned nr )                                \
688    {                                                                    \
689       const float ref = qs->softpipe->depth_stencil->alpha_ref_value;   \
690       const uint cbuf = 0; /* only output[0].alpha is tested */         \
691       unsigned pass_nr = 0;                                             \
692       unsigned i;                                                       \
693                                                                         \
694       for (i = 0; i < nr; i++) {                                        \
695          const float *aaaa = quads[i]->output.color[cbuf][3];           \
696          unsigned passMask = 0;                                         \
697                                                                         \
698          if (aaaa[0] COMP ref) passMask |= (1 << 0);                    \
699          if (aaaa[1] COMP ref) passMask |= (1 << 1);                    \
700          if (aaaa[2] COMP ref) passMask |= (1 << 2);                    \
701          if (aaaa[3] COMP ref) passMask |= (1 << 3);                    \
702                                                                         \
703          quads[i]->inout.mask &= passMask;                              \
704                                                                         \
705          if (quads[i]->inout.mask)                                      \
706             quads[pass_nr++] = quads[i];                                \
707       }                                                                 \
708                                                                         \
709       return pass_nr;                                                   \
710    }
711 
712 
713 ALPHATEST( LESS,     < )
714 ALPHATEST( EQUAL,    == )
715 ALPHATEST( LEQUAL,   <= )
716 ALPHATEST( GREATER,  > )
717 ALPHATEST( NOTEQUAL, != )
718 ALPHATEST( GEQUAL,   >= )
719 
720 
721 /* XXX: Incorporate into shader using KILL_IF.
722  */
723 static unsigned
alpha_test_quads(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)724 alpha_test_quads(struct quad_stage *qs,
725                  struct quad_header *quads[],
726                  unsigned nr)
727 {
728    switch (qs->softpipe->depth_stencil->alpha_func) {
729    case PIPE_FUNC_LESS:
730       return alpha_test_quads_LESS( qs, quads, nr );
731    case PIPE_FUNC_EQUAL:
732       return alpha_test_quads_EQUAL( qs, quads, nr );
733    case PIPE_FUNC_LEQUAL:
734       return alpha_test_quads_LEQUAL( qs, quads, nr );
735    case PIPE_FUNC_GREATER:
736       return alpha_test_quads_GREATER( qs, quads, nr );
737    case PIPE_FUNC_NOTEQUAL:
738       return alpha_test_quads_NOTEQUAL( qs, quads, nr );
739    case PIPE_FUNC_GEQUAL:
740       return alpha_test_quads_GEQUAL( qs, quads, nr );
741    case PIPE_FUNC_ALWAYS:
742       return nr;
743    case PIPE_FUNC_NEVER:
744    default:
745       return 0;
746    }
747 }
748 
749 
750 /**
751  * EXT_depth_bounds_test has some careful language about precision:
752  *
753  *     At what precision is the depth bounds test carried out?
754  *
755  *       RESOLUTION:  For the purposes of the test, the bounds are converted
756  *       to fixed-point as though they were to be written to the depth buffer,
757  *       and the comparison uses those quantized bounds.
758  *
759  * We choose the obvious interpretation that Z32F needs no such conversion.
760  */
761 static unsigned
depth_bounds_test_quads(struct quad_stage * qs,struct quad_header * quads[],unsigned nr,struct depth_data * data)762 depth_bounds_test_quads(struct quad_stage *qs,
763                         struct quad_header *quads[],
764                         unsigned nr,
765                         struct depth_data *data)
766 {
767    struct pipe_depth_stencil_alpha_state *dsa = qs->softpipe->depth_stencil;
768    unsigned i = 0, pass_nr = 0;
769    enum pipe_format format = util_format_get_depth_only(data->format);
770    double min = dsa->depth_bounds_min;
771    double max = dsa->depth_bounds_max;
772 
773    for (i = 0; i < nr; i++) {
774       unsigned j = 0, passMask = 0;
775 
776       get_depth_stencil_values(data, quads[i]);
777 
778       if (format == PIPE_FORMAT_Z32_FLOAT) {
779          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
780             double z = uif(data->bzzzz[j]);
781 
782             if (z >= min && z <= max)
783                passMask |= (1 << j);
784          }
785       } else {
786          unsigned imin, imax;
787 
788          if (format == PIPE_FORMAT_Z16_UNORM) {
789             imin = ((unsigned) (min * 65535.0)) & 0xffff;
790             imax = ((unsigned) (max * 65535.0)) & 0xffff;
791          } else if (format == PIPE_FORMAT_Z32_UNORM) {
792             imin = (unsigned) (min * 4294967295.0);
793             imax = (unsigned) (max * 4294967295.0);
794          } else if (format == PIPE_FORMAT_Z24X8_UNORM ||
795                     format == PIPE_FORMAT_X8Z24_UNORM) {
796             imin = ((unsigned) (min * 16777215.0)) & 0xffffff;
797             imax = ((unsigned) (max * 16777215.0)) & 0xffffff;
798          } else {
799             unreachable("Unknown depth buffer format");
800          }
801 
802          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
803             unsigned iz = data->bzzzz[j];
804 
805             if (iz >= imin && iz <= imax)
806                passMask |= (1 << j);
807          }
808       }
809 
810       quads[i]->inout.mask &= passMask;
811 
812       if (quads[i]->inout.mask)
813          quads[pass_nr++] = quads[i];
814    }
815 
816    return pass_nr;
817 }
818 
819 
820 static unsigned mask_count[16] =
821 {
822    0,                           /* 0x0 */
823    1,                           /* 0x1 */
824    1,                           /* 0x2 */
825    2,                           /* 0x3 */
826    1,                           /* 0x4 */
827    2,                           /* 0x5 */
828    2,                           /* 0x6 */
829    3,                           /* 0x7 */
830    1,                           /* 0x8 */
831    2,                           /* 0x9 */
832    2,                           /* 0xa */
833    3,                           /* 0xb */
834    2,                           /* 0xc */
835    3,                           /* 0xd */
836    3,                           /* 0xe */
837    4,                           /* 0xf */
838 };
839 
840 
841 
842 /**
843  * General depth/stencil test function.  Used when there's no fast-path.
844  */
845 static void
depth_test_quads_fallback(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)846 depth_test_quads_fallback(struct quad_stage *qs,
847                           struct quad_header *quads[],
848                           unsigned nr)
849 {
850    unsigned i, pass = 0;
851    const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
852    boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
853    boolean shader_stencil_ref = fsInfo->writes_stencil;
854    boolean have_zs = !!qs->softpipe->framebuffer.zsbuf;
855    struct depth_data data;
856    unsigned vp_idx = quads[0]->input.viewport_index;
857 
858    data.use_shader_stencil_refs = FALSE;
859 
860    if (have_zs && (qs->softpipe->depth_stencil->depth_enabled ||
861                    qs->softpipe->depth_stencil->stencil[0].enabled ||
862                    qs->softpipe->depth_stencil->depth_bounds_test)) {
863       float near_val, far_val;
864 
865       data.ps = qs->softpipe->framebuffer.zsbuf;
866       data.format = data.ps->format;
867       data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache,
868                                      quads[0]->input.x0,
869                                      quads[0]->input.y0, quads[0]->input.layer);
870       data.clamp = !qs->softpipe->rasterizer->depth_clip_near;
871 
872       near_val = qs->softpipe->viewports[vp_idx].translate[2] - qs->softpipe->viewports[vp_idx].scale[2];
873       far_val = near_val + (qs->softpipe->viewports[vp_idx].scale[2] * 2.0);
874       data.minval = MIN2(near_val, far_val);
875       data.maxval = MAX2(near_val, far_val);
876    }
877 
878    /* EXT_depth_bounds_test says:
879     *
880     *     Where should the depth bounds test take place in the OpenGL fragment
881     *     processing pipeline?
882     *
883     *       RESOLUTION:  After scissor test, before alpha test. In practice,
884     *       this is a logical placement of the test.  An implementation is
885     *       free to perform the test in a manner that is consistent with the
886     *       specified ordering.
887     */
888 
889    if (have_zs && qs->softpipe->depth_stencil->depth_bounds_test) {
890       nr = depth_bounds_test_quads(qs, quads, nr, &data);
891    }
892 
893    if (qs->softpipe->depth_stencil->alpha_enabled) {
894       nr = alpha_test_quads(qs, quads, nr);
895    }
896 
897    if (have_zs && (qs->softpipe->depth_stencil->depth_enabled ||
898                    qs->softpipe->depth_stencil->stencil[0].enabled)) {
899       for (i = 0; i < nr; i++) {
900          get_depth_stencil_values(&data, quads[i]);
901 
902          if (qs->softpipe->depth_stencil->depth_enabled) {
903             if (interp_depth)
904                interpolate_quad_depth(quads[i]);
905 
906             convert_quad_depth(&data, quads[i]);
907          }
908 
909          if (qs->softpipe->depth_stencil->stencil[0].enabled) {
910             if (shader_stencil_ref)
911                convert_quad_stencil(&data, quads[i]);
912 
913             depth_stencil_test_quad(qs, &data, quads[i]);
914             write_depth_stencil_values(&data, quads[i]);
915          }
916          else {
917             if (!depth_test_quad(qs, &data, quads[i]))
918                continue;
919 
920             if (qs->softpipe->depth_stencil->depth_writemask)
921                write_depth_stencil_values(&data, quads[i]);
922          }
923 
924          quads[pass++] = quads[i];
925       }
926 
927       nr = pass;
928    }
929 
930    if (qs->softpipe->active_query_count) {
931       for (i = 0; i < nr; i++)
932          qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask];
933    }
934 
935    if (nr)
936       qs->next->run(qs->next, quads, nr);
937 }
938 
939 
940 /**
941  * Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled.
942  */
943 
944 #define NAME depth_interp_z16_less_write
945 #define OPERATOR <
946 #include "sp_quad_depth_test_tmp.h"
947 
948 #define NAME depth_interp_z16_equal_write
949 #define OPERATOR ==
950 #include "sp_quad_depth_test_tmp.h"
951 
952 #define NAME depth_interp_z16_lequal_write
953 #define OPERATOR <=
954 #include "sp_quad_depth_test_tmp.h"
955 
956 #define NAME depth_interp_z16_greater_write
957 #define OPERATOR >
958 #include "sp_quad_depth_test_tmp.h"
959 
960 #define NAME depth_interp_z16_notequal_write
961 #define OPERATOR !=
962 #include "sp_quad_depth_test_tmp.h"
963 
964 #define NAME depth_interp_z16_gequal_write
965 #define OPERATOR >=
966 #include "sp_quad_depth_test_tmp.h"
967 
968 #define NAME depth_interp_z16_always_write
969 #define ALWAYS 1
970 #include "sp_quad_depth_test_tmp.h"
971 
972 
973 
974 static void
depth_noop(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)975 depth_noop(struct quad_stage *qs,
976            struct quad_header *quads[],
977            unsigned nr)
978 {
979    qs->next->run(qs->next, quads, nr);
980 }
981 
982 
983 
984 static void
choose_depth_test(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)985 choose_depth_test(struct quad_stage *qs,
986                   struct quad_header *quads[],
987                   unsigned nr)
988 {
989    const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
990 
991    boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
992 
993    boolean alpha = qs->softpipe->depth_stencil->alpha_enabled;
994 
995    boolean depth = qs->softpipe->depth_stencil->depth_enabled;
996 
997    unsigned depthfunc = qs->softpipe->depth_stencil->depth_func;
998 
999    boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled;
1000 
1001    boolean depthwrite = qs->softpipe->depth_stencil->depth_writemask;
1002 
1003    boolean occlusion = qs->softpipe->active_query_count;
1004 
1005    boolean clipped = !qs->softpipe->rasterizer->depth_clip_near;
1006 
1007    boolean depth_bounds = qs->softpipe->depth_stencil->depth_bounds_test;
1008 
1009    if(!qs->softpipe->framebuffer.zsbuf)
1010       depth = depthwrite = stencil = FALSE;
1011 
1012    /* default */
1013    qs->run = depth_test_quads_fallback;
1014 
1015    /* look for special cases */
1016    if (!alpha &&
1017        !depth &&
1018        !occlusion &&
1019        !clipped &&
1020        !stencil &&
1021        !depth_bounds) {
1022       qs->run = depth_noop;
1023    }
1024    else if (!alpha &&
1025             interp_depth &&
1026             depth &&
1027             depthwrite &&
1028             !occlusion &&
1029             !clipped &&
1030             !stencil &&
1031             !depth_bounds)
1032    {
1033       if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
1034          switch (depthfunc) {
1035          case PIPE_FUNC_NEVER:
1036             qs->run = depth_test_quads_fallback;
1037             break;
1038          case PIPE_FUNC_LESS:
1039             qs->run = depth_interp_z16_less_write;
1040             break;
1041          case PIPE_FUNC_EQUAL:
1042             qs->run = depth_interp_z16_equal_write;
1043             break;
1044          case PIPE_FUNC_LEQUAL:
1045             qs->run = depth_interp_z16_lequal_write;
1046             break;
1047          case PIPE_FUNC_GREATER:
1048             qs->run = depth_interp_z16_greater_write;
1049             break;
1050          case PIPE_FUNC_NOTEQUAL:
1051             qs->run = depth_interp_z16_notequal_write;
1052             break;
1053          case PIPE_FUNC_GEQUAL:
1054             qs->run = depth_interp_z16_gequal_write;
1055             break;
1056          case PIPE_FUNC_ALWAYS:
1057             qs->run = depth_interp_z16_always_write;
1058             break;
1059          default:
1060             qs->run = depth_test_quads_fallback;
1061             break;
1062          }
1063       }
1064    }
1065 
1066    /* next quad/fragment stage */
1067    qs->run( qs, quads, nr );
1068 }
1069 
1070 
1071 
1072 static void
depth_test_begin(struct quad_stage * qs)1073 depth_test_begin(struct quad_stage *qs)
1074 {
1075    qs->run = choose_depth_test;
1076    qs->next->begin(qs->next);
1077 }
1078 
1079 
1080 static void
depth_test_destroy(struct quad_stage * qs)1081 depth_test_destroy(struct quad_stage *qs)
1082 {
1083    FREE( qs );
1084 }
1085 
1086 
1087 struct quad_stage *
sp_quad_depth_test_stage(struct softpipe_context * softpipe)1088 sp_quad_depth_test_stage(struct softpipe_context *softpipe)
1089 {
1090    struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
1091 
1092    stage->softpipe = softpipe;
1093    stage->begin = depth_test_begin;
1094    stage->run = choose_depth_test;
1095    stage->destroy = depth_test_destroy;
1096 
1097    return stage;
1098 }
1099