• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2010-2021 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_text.h"
33 #include "tgsi/tgsi_util.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "lp_debug.h"
36 #include "lp_state.h"
37 
38 
39 /*
40  * Detect Aero minification shaders.
41  *
42  * Aero does not use texture mimaps when a window gets animated and its shaped
43  * bended. Instead it uses the average of 4 nearby texels. This is the simplest
44  * of such shader, but there are several variations:
45  *
46  *   FRAG
47  *   DCL IN[0], GENERIC[1], PERSPECTIVE
48  *   DCL IN[1], GENERIC[2], PERSPECTIVE
49  *   DCL IN[2], GENERIC[3], PERSPECTIVE
50  *   DCL OUT[0], COLOR
51  *   DCL SAMP[0]
52  *   DCL TEMP[0..3]
53  *   IMM FLT32 {     0.2500,     0.0000,     0.0000,     0.0000 }
54  *   MOV TEMP[0].x, IN[0].zzzz
55  *   MOV TEMP[0].y, IN[0].wwww
56  *   MOV TEMP[1].x, IN[1].zzzz
57  *   MOV TEMP[1].y, IN[1].wwww
58  *   TEX TEMP[0], TEMP[0], SAMP[0], 2D
59  *   TEX TEMP[2], IN[0], SAMP[0], 2D
60  *   TEX TEMP[3], IN[1], SAMP[0], 2D
61  *   TEX TEMP[1], TEMP[1], SAMP[0], 2D
62  *   ADD TEMP[0], TEMP[0], TEMP[2]
63  *   ADD TEMP[0], TEMP[3], TEMP[0]
64  *   ADD TEMP[0], TEMP[1], TEMP[0]
65  *   MUL TEMP[0], TEMP[0], IN[2]
66  *   MUL TEMP[0], TEMP[0], IMM[0].xxxx
67  *   MOV OUT[0], TEMP[0]
68  *   END
69  *
70  * Texture coordinates are interleaved like the Gaussian blur shaders, but
71  * unlike the later there isn't structure in the sub-pixel positioning of the
72  * texels, other than being disposed in a diamond-like shape. For example,
73  * these are the relative offsets of the texels relative to the average:
74  *
75  *    x offset   y offset
76  *   --------------------
77  *    0.691834   -0.21360
78  *   -0.230230   -0.64160
79  *   -0.692406    0.21356
80  *    0.230802    0.64160
81  *
82  *  These shaders are typically used with linear min/mag filtering, but the
83  *  linear filtering provides very little visual improvement compared to the
84  *  performance impact it has. The ultimate purpose of detecting these shaders
85  *  is to override with nearest texture filtering.
86  */
87 static inline boolean
match_aero_minification_shader(const struct tgsi_token * tokens,const struct lp_tgsi_info * info)88 match_aero_minification_shader(const struct tgsi_token *tokens,
89                                const struct lp_tgsi_info *info)
90 {
91    struct tgsi_parse_context parse;
92    unsigned coord_mask;
93    boolean has_quarter_imm;
94    unsigned index, chan;
95 
96    if ((info->base.opcode_count[TGSI_OPCODE_TEX] != 4 &&
97         info->base.opcode_count[TGSI_OPCODE_SAMPLE] != 4) ||
98        info->num_texs != 4) {
99       return FALSE;
100    }
101 
102    /*
103     * Ensure the texture coordinates are interleaved as in the example above.
104     */
105 
106    coord_mask = 0;
107    for (index = 0; index < 4; ++index) {
108       const struct lp_tgsi_texture_info *tex = &info->tex[index];
109       if (tex->sampler_unit != 0 ||
110           tex->texture_unit != 0 ||
111           tex->coord[0].file != TGSI_FILE_INPUT ||
112           tex->coord[1].file != TGSI_FILE_INPUT ||
113           tex->coord[0].u.index != tex->coord[1].u.index ||
114           (tex->coord[0].swizzle % 2) != 0 ||
115           tex->coord[1].swizzle != tex->coord[0].swizzle + 1) {
116          return FALSE;
117       }
118 
119       coord_mask |= 1 << (tex->coord[0].u.index*2 + tex->coord[0].swizzle/2);
120    }
121    if (coord_mask != 0xf) {
122       return FALSE;
123    }
124 
125    /*
126     * Ensure it has the 0.25 immediate.
127     */
128 
129    has_quarter_imm = FALSE;
130 
131    tgsi_parse_init(&parse, tokens);
132 
133    while (!tgsi_parse_end_of_tokens(&parse)) {
134       tgsi_parse_token(&parse);
135 
136       switch (parse.FullToken.Token.Type) {
137       case TGSI_TOKEN_TYPE_DECLARATION:
138          break;
139 
140       case TGSI_TOKEN_TYPE_INSTRUCTION:
141          goto finished;
142 
143       case TGSI_TOKEN_TYPE_IMMEDIATE:
144          {
145             const unsigned size =
146                   parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
147             assert(size <= 4);
148             for (chan = 0; chan < size; ++chan) {
149                if (parse.FullToken.FullImmediate.u[chan].Float == 0.25f) {
150                   has_quarter_imm = TRUE;
151                   goto finished;
152                }
153             }
154          }
155          break;
156 
157       case TGSI_TOKEN_TYPE_PROPERTY:
158          break;
159 
160       default:
161          assert(0);
162          goto finished;
163       }
164    }
165 finished:
166 
167    tgsi_parse_free(&parse);
168 
169    if (!has_quarter_imm) {
170       return FALSE;
171    }
172 
173    return TRUE;
174 }
175 
176 
177 void
llvmpipe_fs_analyse(struct lp_fragment_shader * shader,const struct tgsi_token * tokens)178 llvmpipe_fs_analyse(struct lp_fragment_shader *shader,
179                     const struct tgsi_token *tokens)
180 {
181    shader->kind = LP_FS_KIND_GENERAL;
182 
183    if (shader->kind == LP_FS_KIND_GENERAL &&
184        shader->info.base.num_inputs <= LP_MAX_LINEAR_INPUTS &&
185        shader->info.base.num_outputs == 1 &&
186        !shader->info.indirect_textures &&
187        !shader->info.sampler_texture_units_different &&
188        !shader->info.unclamped_immediates &&
189        shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES &&
190        (shader->info.base.opcode_count[TGSI_OPCODE_TEX] +
191         shader->info.base.opcode_count[TGSI_OPCODE_SAMPLE] +
192         shader->info.base.opcode_count[TGSI_OPCODE_MOV] +
193         shader->info.base.opcode_count[TGSI_OPCODE_MUL] +
194         shader->info.base.opcode_count[TGSI_OPCODE_RET] +
195         shader->info.base.opcode_count[TGSI_OPCODE_END] ==
196         shader->info.base.num_instructions)) {
197       shader->kind = LP_FS_KIND_LLVM_LINEAR;
198    }
199 
200    if (shader->kind == LP_FS_KIND_GENERAL &&
201        match_aero_minification_shader(tokens, &shader->info)) {
202       shader->kind = LP_FS_KIND_AERO_MINIFICATION;
203    }
204 }
205