• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "pan_earlyzs.h"
25 #include "panfrost/util/pan_ir.h"
26 
27 /*
28  * Return an "early" mode. If it is known that the depth/stencil tests always
29  * pass (so the shader is always executed), weak early is usually faster than
30  * force early.
31  */
32 static enum pan_earlyzs
best_early_mode(bool zs_always_passes)33 best_early_mode(bool zs_always_passes)
34 {
35    if (zs_always_passes)
36       return PAN_EARLYZS_WEAK_EARLY;
37    else
38       return PAN_EARLYZS_FORCE_EARLY;
39 }
40 
41 /*
42  * Analyze a fragment shader and provided API state to determine the early-ZS
43  * configuration. The order of arguments must match the order of states in the
44  * lookup table, synchronized with pan_earlyzs_get.
45  */
46 static struct pan_earlyzs_state
analyze(const struct pan_shader_info * s,bool writes_zs_or_oq,bool alpha_to_coverage,bool zs_always_passes)47 analyze(const struct pan_shader_info *s, bool writes_zs_or_oq,
48         bool alpha_to_coverage, bool zs_always_passes)
49 {
50    /* If the shader writes depth or stencil, all depth/stencil tests must
51     * be deferred until the value is known after the ZS_EMIT instruction,
52     * if present. ZS_EMIT must precede ATEST, so the value is known when
53     * ATEST executes, justifying the late test/update.
54     * Also, if alpha_to_coverage is set that also forces a late update.
55     * NOTE: it's not at all clear why alpha_to_coverage always requires
56     * a late update; the late update should only really be required if
57     * we're writing z or stencil, or testing for occlusion queries.
58     * The docs are somewhat contradictory on this point.
59     * But empirically we observe this requirement on Valhall, and doing
60     * the update later is never wrong (just potentially a bit slower).
61     */
62    bool shader_writes_zs = (s->fs.writes_depth || s->fs.writes_stencil);
63    bool late_update = shader_writes_zs || alpha_to_coverage;
64    bool late_kill = shader_writes_zs;
65 
66    /* Late coverage updates are required if the coverage mask depends on
67     * the results of the shader. Discards are implemented as coverage mask
68     * updates and must be considered. Strictly, depth/stencil writes may
69     * also update the coverage mask, but these already force late updates.
70     */
71    bool late_coverage =
72       s->fs.writes_coverage || s->fs.can_discard || alpha_to_coverage;
73 
74    /* Late coverage mask updates may affect the value written to the
75     * depth/stencil buffer (if a pixel is discarded entirely). However,
76     * they do not affect depth/stencil testing. So they may only matter if
77     * depth or stencil is written.
78     *
79     * That dependency does mean late coverage mask updates require late
80     * depth/stencil updates.
81     *
82     * Similarly, occlusion queries count samples that pass the
83     * depth/stencil tests, so occlusion queries with late coverage also
84     * require a late update.
85     */
86    late_update |= (late_coverage && writes_zs_or_oq);
87 
88    /* Side effects require late depth/stencil tests to ensure the shader
89     * isn't killed before the side effects execute.
90     */
91    late_kill |= s->writes_global;
92 
93    /* Finally, the shader may override and force early fragment tests */
94    late_update &= !s->fs.early_fragment_tests;
95    late_kill &= !s->fs.early_fragment_tests;
96 
97    /* Collect results */
98    enum pan_earlyzs early_mode = best_early_mode(zs_always_passes);
99 
100    return (struct pan_earlyzs_state){
101       .update = late_update ? PAN_EARLYZS_FORCE_LATE : early_mode,
102       .kill = late_kill ? PAN_EARLYZS_FORCE_LATE : early_mode,
103    };
104 }
105 
106 /*
107  * Analyze a fragment shader to determine all possible early-ZS configurations.
108  * Returns a lookup table of configurations indexed by the API state.
109  */
110 struct pan_earlyzs_lut
pan_earlyzs_analyze(const struct pan_shader_info * s)111 pan_earlyzs_analyze(const struct pan_shader_info *s)
112 {
113    struct pan_earlyzs_lut lut;
114 
115    for (unsigned v0 = 0; v0 < 2; ++v0) {
116       for (unsigned v1 = 0; v1 < 2; ++v1) {
117          for (unsigned v2 = 0; v2 < 2; ++v2)
118             lut.states[v0][v1][v2] = analyze(s, v0, v1, v2);
119       }
120    }
121 
122    return lut;
123 }
124