• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "compiler/nir/nir_builder.h"
25 #include "brw_nir.h"
26 
27 /**
28  * Implements the WaPreventHSTessLevelsInterference workaround (for Gfx7-8).
29  *
30  * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the
31  * definition of the patch header layouts):
32  *
33  *    "HW Bug: The Tessellation stage will incorrectly add domain points
34  *     along patch edges under the following conditions, which may result
35  *     in conformance failures and/or cracking artifacts:
36  *
37  *       * QUAD domain
38  *       * INTEGER partitioning
39  *       * All three TessFactors in a given U or V direction (e.g., V
40  *         direction: UEQ0, InsideV, UEQ1) are all exactly 1.0
41  *       * All three TessFactors in the other direction are > 1.0 and all
42  *         round up to the same integer value (e.g, U direction:
43  *         VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4)
44  *
45  *     The suggested workaround (to be implemented as part of the postamble
46  *     to the HS shader in the HS kernel) is:
47  *
48  *     if (
49  *        (TF[UEQ0] > 1.0) ||
50  *        (TF[VEQ0] > 1.0) ||
51  *        (TF[UEQ1] > 1.0) ||
52  *        (TF[VEQ1] > 1.0) ||
53  *        (TF[INSIDE_U] > 1.0) ||
54  *        (TF[INSIDE_V] > 1.0) )
55  *     {
56  *        TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U];
57  *        TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V];
58  *     }"
59  *
60  * There's a subtlety here.  Intel internal HSD-ES bug 1208668495 notes
61  * that the above workaround fails to fix certain GL/ES CTS tests which
62  * have inside tessellation factors of -1.0.  This can be explained by
63  * a quote from the ARB_tessellation_shader specification:
64  *
65  *    "If "equal_spacing" is used, the floating-point tessellation level is
66  *     first clamped to the range [1,<max>], where <max> is implementation-
67  *     dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)."
68  *
69  * In other words, the actual inner tessellation factor used is
70  * clamp(TF[INSIDE_*], 1.0, 64.0).  So we want to compare the clamped
71  * value against 1.0.  To accomplish this, we change the comparison from
72  * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0).
73  */
74 
75 static inline nir_ssa_def *
load_output(nir_builder * b,int num_components,int offset,int component)76 load_output(nir_builder *b, int num_components, int offset, int component)
77 {
78    return nir_load_output(b, num_components, 32, nir_imm_int(b, 0),
79                           .base = offset,
80                           .component = component);
81 }
82 
83 static void
emit_quads_workaround(nir_builder * b,nir_block * block)84 emit_quads_workaround(nir_builder *b, nir_block *block)
85 {
86    b->cursor = nir_after_block_before_jump(block);
87 
88    nir_ssa_def *inner = load_output(b, 2, 0, 2);
89    nir_ssa_def *outer = load_output(b, 4, 1, 0);
90 
91    nir_ssa_def *any_greater_than_1 =
92        nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)),
93                   nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), inner)));
94 
95    nir_push_if(b, any_greater_than_1);
96 
97    inner = nir_bcsel(b, nir_fge(b, nir_imm_float(b, 1.0f), inner),
98                         nir_imm_float(b, 2.0f), inner);
99 
100    nir_store_output(b, inner, nir_imm_int(b, 0),
101                     .component = 2,
102                     .write_mask = WRITEMASK_XY);
103 
104    nir_pop_if(b, NULL);
105 }
106 
107 void
brw_nir_apply_tcs_quads_workaround(nir_shader * nir)108 brw_nir_apply_tcs_quads_workaround(nir_shader *nir)
109 {
110    assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
111 
112    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
113 
114    nir_builder b;
115    nir_builder_init(&b, impl);
116 
117    /* emit_quads_workaround() inserts an if statement into each block,
118     * which splits it in two.  This changes the set of predecessors of
119     * the end block.  We want to process the original set, so to be safe,
120     * save it off to an array first.
121     */
122    const unsigned num_end_preds = impl->end_block->predecessors->entries;
123    nir_block *end_preds[num_end_preds];
124    unsigned i = 0;
125 
126    set_foreach(impl->end_block->predecessors, entry) {
127       end_preds[i++] = (nir_block *) entry->key;
128    }
129 
130    for (i = 0; i < num_end_preds; i++) {
131       emit_quads_workaround(&b, end_preds[i]);
132    }
133 
134    nir_metadata_preserve(impl, nir_metadata_none);
135 }
136