• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2# Copyright (C) 2019 Vasily Khoruzhick <anarsoul@gmail.com>
3# Copyright (C) 2021 Pavel Ondračka
4#
5# Permission is hereby granted, free of charge, to any person obtaining a
6# copy of this software and associated documentation files (the "Software"),
7# to deal in the Software without restriction, including without limitation
8# the rights to use, copy, modify, merge, publish, distribute, sublicense,
9# and/or sell copies of the Software, and to permit persons to whom the
10# Software is furnished to do so, subject to the following conditions:
11#
12# The above copyright notice and this permission notice (including the next
13# paragraph) shall be included in all copies or substantial portions of the
14# Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22# IN THE SOFTWARE.
23
24import argparse
25import sys
26from math import pi
27
28# Convenience variables
29a = 'a'
30b = 'b'
31c = 'c'
32d = 'd'
33e = 'e'
34
35# Transform input to range [-PI, PI]:
36#
37# y = frac(x / 2PI + 0.5) * 2PI - PI
38#
39transform_trig_input_vs_r500 = [
40        (('fsin', 'a'), ('fsin', ('fadd', ('fmul', ('ffract', ('fadd', ('fmul', 'a', 1 / (2 * pi)) , 0.5)), 2 * pi), -pi))),
41        (('fcos', 'a'), ('fcos', ('fadd', ('fmul', ('ffract', ('fadd', ('fmul', 'a', 1 / (2 * pi)) , 0.5)), 2 * pi), -pi))),
42]
43
44# Transform input to range [-PI, PI]:
45#
46# y = frac(x / 2PI)
47#
48transform_trig_input_fs_r500 = [
49        (('fsin', 'a'), ('fsin', ('ffract', ('fmul', 'a', 1 / (2 * pi))))),
50        (('fcos', 'a'), ('fcos', ('ffract', ('fmul', 'a', 1 / (2 * pi))))),
51]
52
53# The is a pattern produced by wined3d for A0 register load.
54# The specific pattern wined3d emits looks like this
55# A0.x = (int(floor(abs(R0.x) + 0.5) * sign(R0.x)));
56# however we lower both sign and floor so here we check for the already lowered
57# sequence.
58r300_nir_fuse_fround_d3d9 = [
59        (('fmul', ('fadd', ('fadd', ('fabs', 'a') , 0.5),
60                           ('fneg', ('ffract', ('fadd', ('fabs', 'a') , 0.5)))),
61                  ('fadd', ('b2f', ('!flt', 0.0, 'a')),
62                           ('fneg', ('b2f', ('!flt', 'a', 0.0))))),
63         ('fround_even', 'a'))
64]
65
66# Here are some specific optimizations for code reordering such that the backend
67# has easier task of recognizing output modifiers and presubtract patterns.
68r300_nir_prepare_presubtract = [
69        # Backend can only recognize 1 - x pattern.
70        (('fadd', ('fneg', a), 1.0), ('fadd', 1.0, ('fneg', a))),
71        (('fadd', a, -1.0), ('fneg', ('fadd', 1.0, ('fneg', a)))),
72        (('fadd', -1.0, a), ('fneg', ('fadd', 1.0, ('fneg', a)))),
73        # Bias presubtract 1 - 2 * x expects MAD -a 2.0 1.0 form.
74        (('ffma', 2.0, ('fneg', a), 1.0), ('ffma', ('fneg', a), 2.0, 1.0)),
75        (('ffma', a, -2.0, 1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
76        (('ffma', -2.0, a, 1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
77        (('ffma', 2.0, a, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
78        (('ffma', a, 2.0, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
79        # x * 2 can be usually folded into output modifier for the previous
80        # instruction, but that only works if x is a temporary. If it is input or
81        # constant just convert it to add instead.
82        (('fmul', 'a(is_ubo_or_input)', 2.0), ('fadd', a, a)),
83]
84
85for multiplier in [2.0, 4.0, 8.0, 16.0, 0.5, 0.25, 0.125, 0.0625]:
86    r300_nir_prepare_presubtract.extend([
87        (('fmul', a, ('fmul(is_used_once)', 'b(is_ubo_or_input)', multiplier)), ('fmul', multiplier, ('fmul', a, b))),
88])
89
90r300_nir_opt_algebraic_late = [
91        # Previous prepare_presubtract pass can sometimes produce double fneg patterns.
92        # The backend copy propagate could handle it, but the nir to tgsi translation
93        # does not and blows up. Clean this up.
94        (('fneg', ('fneg', a)), a),
95        (('fabs', ('fneg', a)), ('fabs', a)),
96        # Some cleanups after comparison lowering if one of the operands is 0.
97        (('fadd', a, 0.0), a),
98        (('fadd', a, ('fneg', 0.0)), a)
99]
100
101# This is very late flrp lowering to clean up after bcsel->fcsel->flrp.
102r300_nir_lower_flrp = [
103        (('flrp', a, b, c), ('ffma', b, c, ('ffma', ('fneg', a), c, a)))
104]
105
106# Lower fcsel_ge from ftrunc on r300
107r300_nir_lower_fcsel_r300 = [
108        (('fcsel_ge', a, b, c), ('flrp', c, b, ('sge', a, 0.0)))
109]
110
111# Fragment shaders have no comparison opcodes. However, we can encode the comparison
112# in the aluresults operation, which is than used by next if. So if the comparison result
113# is used only in a single if, we can handle it just fine on R500.
114r300_nir_lower_comparison_fs = [
115        (('seq(is_not_used_in_single_if)', 'a@32', 'b@32'),
116            ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 1.0, 0.0)),
117        (('sne(is_not_used_in_single_if)', 'a@32', 'b@32'),
118            ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 0.0, 1.0)),
119        (('slt(is_not_used_in_single_if)', 'a@32', 'b@32'),
120            ('fcsel_ge', ('fadd', a, ('fneg', b)), 0.0, 1.0)),
121        (('sge(is_not_used_in_single_if)', 'a@32', 'b@32'),
122            ('fcsel_ge', ('fadd', a, ('fneg', b)), 1.0, 0.0)),
123]
124
125r300_nir_post_integer_lowering = [
126        # If ffloor result is used only for indirect constant load, we can get rid of it
127        # completelly as ntt emits ARL by default which already does the flooring.
128        # This actually checks for the lowered ffloor(a) = a - ffract(a) patterns.
129        (('fadd(is_only_used_by_load_ubo_vec4)', a, ('fneg', ('ffract', a))), a),
130        # This is a D3D9 pattern from Wine when shader wants ffloor instead of fround on register load.
131        (('fround_even(is_only_used_by_load_ubo_vec4)', ('fadd', a, ('fneg', ('ffract', a)))), a),
132        # Lower ftrunc
133        (('ftrunc', 'a@32'), ('fcsel_ge', a, ('fadd', ('fabs', a), ('fneg', ('ffract', ('fabs', a)))),
134                                     ('fneg', ('fadd', ('fabs', a), ('fneg', ('ffract', ('fabs', a)))))))
135]
136
137def main():
138    parser = argparse.ArgumentParser()
139    parser.add_argument('-p', '--import-path', required=True)
140    parser.add_argument('output')
141    args = parser.parse_args()
142    sys.path.insert(0, args.import_path)
143
144    import nir_algebraic  # pylint: disable=import-error
145    ignore_exact = nir_algebraic.ignore_exact
146
147    r300_nir_lower_bool_to_float = [
148        (('bcsel@32(is_only_used_as_float)', ignore_exact('feq', 'a@32', 'b@32'), c, d),
149             ('fadd', ('fmul', c, ('seq', a, b)), ('fsub', d, ('fmul', d, ('seq', a, b)))),
150             "!options->has_fused_comp_and_csel"),
151        (('bcsel@32(is_only_used_as_float)', ignore_exact('fneu', 'a@32', 'b@32'), c, d),
152             ('fadd', ('fmul', c, ('sne', a, b)), ('fsub', d, ('fmul', d, ('sne', a, b)))),
153          "!options->has_fused_comp_and_csel"),
154        (('bcsel@32(is_only_used_as_float)', ignore_exact('flt', 'a@32', 'b@32'), c, d),
155             ('fadd', ('fmul', c, ('slt', a, b)), ('fsub', d, ('fmul', d, ('slt', a, b)))),
156          "!options->has_fused_comp_and_csel"),
157        (('bcsel@32(is_only_used_as_float)', ignore_exact('fge', 'a@32', 'b@32'), c, d),
158             ('fadd', ('fmul', c, ('sge', a, b)), ('fsub', d, ('fmul', d, ('sge', a, b)))),
159          "!options->has_fused_comp_and_csel"),
160        (('bcsel@32(is_only_used_as_float)', ('feq', 'a@32', 'b@32'), c, d),
161             ('fcsel', ('seq', a, b), c, d), "options->has_fused_comp_and_csel"),
162        (('bcsel@32(is_only_used_as_float)', ('fneu', 'a@32', 'b@32'), c, d),
163             ('fcsel', ('sne', a, b), c, d), "options->has_fused_comp_and_csel"),
164        (('bcsel@32(is_only_used_as_float)', ('flt', 'a@32', 'b@32'), c, d),
165             ('fcsel', ('slt', a, b), c, d), "options->has_fused_comp_and_csel"),
166        (('bcsel@32(is_only_used_as_float)', ('fge', 'a@32', 'b@32'), c, d),
167             ('fcsel', ('sge', a, b), c, d), "options->has_fused_comp_and_csel"),
168    ]
169
170    r300_nir_lower_bool_to_float_fs = [
171        (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('feq', 'a@32', 'b@32'), c, d),
172             ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), c, d)),
173        (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('fneu', 'a@32', 'b@32'), c, d),
174             ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), d, c)),
175        (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('flt', 'a@32', 'b@32'), c, d),
176             ('fcsel_ge', ('fadd', a, ('fneg', b)), d, c)),
177        (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('fge', 'a@32', 'b@32'), c, d),
178             ('fcsel_ge', ('fadd', a, ('fneg', b)), c, d)),
179        (('b2f32', ('feq', 'a@32', 'b@32')),
180             ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 1.0, 0.0)),
181        (('b2f32', ('fneu', 'a@32', 'b@32')),
182             ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 0.0, 1.0)),
183        (('b2f32', ('flt', 'a@32', 'b@32')),
184             ('fcsel_ge', ('fadd', a, ('fneg', b)), 0.0, 1.0)),
185        (('b2f32', ('fge', 'a@32', 'b@32')),
186             ('fcsel_ge', ('fadd', a, ('fneg', b)), 1.0, 0.0)),
187    ]
188
189    with open(args.output, 'w') as f:
190        f.write('#include "compiler/r300_nir.h"')
191
192        f.write(nir_algebraic.AlgebraicPass("r300_transform_vs_trig_input",
193                                            transform_trig_input_vs_r500).render())
194
195        f.write(nir_algebraic.AlgebraicPass("r300_transform_fs_trig_input",
196                                            transform_trig_input_fs_r500).render())
197
198        f.write(nir_algebraic.AlgebraicPass("r300_nir_fuse_fround_d3d9",
199                                            r300_nir_fuse_fround_d3d9).render())
200
201        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_bool_to_float",
202                                            r300_nir_lower_bool_to_float).render())
203
204        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_bool_to_float_fs",
205                                            r300_nir_lower_bool_to_float_fs).render())
206
207        f.write(nir_algebraic.AlgebraicPass("r300_nir_prepare_presubtract",
208                                            r300_nir_prepare_presubtract).render())
209
210        f.write(nir_algebraic.AlgebraicPass("r300_nir_opt_algebraic_late",
211                                            r300_nir_opt_algebraic_late).render())
212
213        f.write(nir_algebraic.AlgebraicPass("r300_nir_post_integer_lowering",
214                                            r300_nir_post_integer_lowering).render())
215
216        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_flrp",
217                                            r300_nir_lower_flrp).render())
218
219        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_fcsel_r300",
220                                            r300_nir_lower_fcsel_r300).render())
221
222        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_comparison_fs",
223                                            r300_nir_lower_comparison_fs).render())
224
225if __name__ == '__main__':
226    main()
227