• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# SPDX-License-Identifier: MIT
2# Copyright 2019 Vasily Khoruzhick <anarsoul@gmail.com>
3# Copyright 2021 Pavel Ondračka
4
5import argparse
6import sys
7from math import pi
8
9# Convenience variables
10a = 'a'
11b = 'b'
12c = 'c'
13d = 'd'
14e = 'e'
15
16# Transform input to range [-PI, PI]:
17#
18# y = frac(x / 2PI + 0.5) * 2PI - PI
19#
20transform_trig_input_vs_r500 = [
21        (('fsin', 'a(needs_vs_trig_input_fixup)'), ('fsin', ('fadd', ('fmul', ('ffract', ('fadd', ('fmul', 'a', 1 / (2 * pi)) , 0.5)), 2 * pi), -pi))),
22        (('fcos', 'a(needs_vs_trig_input_fixup)'), ('fcos', ('fadd', ('fmul', ('ffract', ('fadd', ('fmul', 'a', 1 / (2 * pi)) , 0.5)), 2 * pi), -pi))),
23]
24
25# Transform input to range [-PI, PI]:
26#
27# y = frac(x / 2PI)
28#
29transform_trig_input_fs_r500 = [
30        (('fsin', 'a'), ('fsin', ('ffract', ('fmul', 'a', 1 / (2 * pi))))),
31        (('fcos', 'a'), ('fcos', ('ffract', ('fmul', 'a', 1 / (2 * pi))))),
32]
33
34# The is a pattern produced by wined3d for A0 register load.
35# The specific pattern wined3d emits looks like this
36# A0.x = (int(floor(abs(R0.x) + 0.5) * sign(R0.x)));
37# however we lower both sign and floor so here we check for the already lowered
38# sequence.
39r300_nir_fuse_fround_d3d9 = [
40        (('fmul', ('fadd', ('fadd', ('fabs', 'a') , 0.5),
41                           ('fneg', ('ffract', ('fadd', ('fabs', 'a') , 0.5)))),
42                  ('fadd', ('b2f', ('!flt', 0.0, 'a')),
43                           ('fneg', ('b2f', ('!flt', 'a', 0.0))))),
44         ('fround_even', 'a'))
45]
46
47# Here are some specific optimizations for code reordering such that the backend
48# has easier task of recognizing output modifiers and presubtract patterns.
49r300_nir_prepare_presubtract = [
50        # Backend can only recognize 1 - x pattern.
51        (('fadd', ('fneg', a), 1.0), ('fadd', 1.0, ('fneg', a))),
52        (('fadd', a, -1.0), ('fneg', ('fadd', 1.0, ('fneg', a)))),
53        (('fadd', -1.0, a), ('fneg', ('fadd', 1.0, ('fneg', a)))),
54        # Bias presubtract 1 - 2 * x expects MAD -a 2.0 1.0 form.
55        (('ffma', 2.0, ('fneg', a), 1.0), ('ffma', ('fneg', a), 2.0, 1.0)),
56        (('ffma', a, -2.0, 1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
57        (('ffma', -2.0, a, 1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
58        (('ffma', 2.0, a, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
59        (('ffma', a, 2.0, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))),
60        # x * 2 can be usually folded into output modifier for the previous
61        # instruction, but that only works if x is a temporary. If it is input or
62        # constant just convert it to add instead.
63        (('fmul', 'a(is_ubo_or_input)', 2.0), ('fadd', a, a)),
64]
65
66for multiplier in [2.0, 4.0, 8.0, 16.0, 0.5, 0.25, 0.125, 0.0625]:
67    r300_nir_prepare_presubtract.extend([
68        (('fmul', a, ('fmul(is_used_once)', 'b(is_ubo_or_input)', multiplier)), ('fmul', multiplier, ('fmul', a, b))),
69])
70
71r300_nir_opt_algebraic_late = [
72        # Previous prepare_presubtract pass can sometimes produce double fneg patterns.
73        # The backend copy propagate could handle it, but the nir to tgsi translation
74        # does not and blows up. Clean this up.
75        (('fneg', ('fneg', a)), a),
76        (('fabs', ('fneg', a)), ('fabs', a)),
77        # Some cleanups after comparison lowering if one of the operands is 0.
78        (('fadd', a, 0.0), a),
79        (('fadd', a, ('fneg', 0.0)), a),
80        # NIR terminate_if expects bools, but we can handle floats just fine
81        # so get rid of the unneeded select.
82        (('fcsel_ge(is_only_used_by_terminate_if)', a, 0.0, 1.0), ('fneg', a)),
83]
84
85# This is very late flrp lowering to clean up after bcsel->fcsel->flrp.
86r300_nir_lower_flrp = [
87        (('flrp', a, b, c), ('ffma', b, c, ('ffma', ('fneg', a), c, a)))
88]
89
90# Lower fcsel_ge from ftrunc on r300
91r300_nir_lower_fcsel_r300 = [
92        (('fcsel_ge', a, b, c), ('flrp', c, b, ('sge', a, 0.0)))
93]
94
95# Fragment shaders have no comparison opcodes. However, we can encode the comparison
96# in the aluresults operation, which is than used by next if. So if the comparison result
97# is used only in a single if, we can handle it just fine on R500.
98r300_nir_lower_comparison_fs = [
99        (('seq(is_not_used_in_single_if)', 'a@32', 'b@32'),
100            ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 1.0, 0.0)),
101        (('sne(is_not_used_in_single_if)', 'a@32', 'b@32'),
102            ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 0.0, 1.0)),
103        (('slt(is_not_used_in_single_if)', 'a@32', 'b@32'),
104            ('fcsel_ge', ('fadd', a, ('fneg', b)), 0.0, 1.0)),
105        (('sge(is_not_used_in_single_if)', 'a@32', 'b@32'),
106            ('fcsel_ge', ('fadd', a, ('fneg', b)), 1.0, 0.0)),
107]
108
109r300_nir_post_integer_lowering = [
110        # If ffloor result is used only for indirect constant load, we can get rid of it
111        # completely as ntt emits ARL by default which already does the flooring.
112        # This actually checks for the lowered ffloor(a) = a - ffract(a) patterns.
113        (('fadd(is_only_used_by_load_ubo_vec4)', a, ('fneg', ('ffract', a))), a),
114        # This is a D3D9 pattern from Wine when shader wants ffloor instead of fround on register load.
115        (('fround_even(is_only_used_by_load_ubo_vec4)', ('fadd', a, ('fneg', ('ffract', a)))), a),
116        # Lower ftrunc
117        (('ftrunc', 'a@32'), ('fcsel_ge', a, ('fadd', ('fabs', a), ('fneg', ('ffract', ('fabs', a)))),
118                                     ('fneg', ('fadd', ('fabs', a), ('fneg', ('ffract', ('fabs', a)))))))
119]
120
121def main():
122    parser = argparse.ArgumentParser()
123    parser.add_argument('-p', '--import-path', required=True)
124    parser.add_argument('output')
125    args = parser.parse_args()
126    sys.path.insert(0, args.import_path)
127
128    import nir_algebraic  # pylint: disable=import-error
129    ignore_exact = nir_algebraic.ignore_exact
130
131    r300_nir_lower_bool_to_float = [
132        (('bcsel@32(is_only_used_as_float)', ignore_exact('feq', 'a@32', 'b@32'), c, d),
133             ('fadd', ('fmul', c, ('seq', a, b)), ('fsub', d, ('fmul', d, ('seq', a, b)))),
134             "!options->has_fused_comp_and_csel"),
135        (('bcsel@32(is_only_used_as_float)', ignore_exact('fneu', 'a@32', 'b@32'), c, d),
136             ('fadd', ('fmul', c, ('sne', a, b)), ('fsub', d, ('fmul', d, ('sne', a, b)))),
137          "!options->has_fused_comp_and_csel"),
138        (('bcsel@32(is_only_used_as_float)', ignore_exact('flt', 'a@32', 'b@32'), c, d),
139             ('fadd', ('fmul', c, ('slt', a, b)), ('fsub', d, ('fmul', d, ('slt', a, b)))),
140          "!options->has_fused_comp_and_csel"),
141        (('bcsel@32(is_only_used_as_float)', ignore_exact('fge', 'a@32', 'b@32'), c, d),
142             ('fadd', ('fmul', c, ('sge', a, b)), ('fsub', d, ('fmul', d, ('sge', a, b)))),
143          "!options->has_fused_comp_and_csel"),
144        (('bcsel@32(is_only_used_as_float)', ('feq', 'a@32', 'b@32'), c, d),
145             ('fcsel', ('seq', a, b), c, d), "options->has_fused_comp_and_csel"),
146        (('bcsel@32(is_only_used_as_float)', ('fneu', 'a@32', 'b@32'), c, d),
147             ('fcsel', ('sne', a, b), c, d), "options->has_fused_comp_and_csel"),
148        (('bcsel@32(is_only_used_as_float)', ('flt', 'a@32', 'b@32'), c, d),
149             ('fcsel', ('slt', a, b), c, d), "options->has_fused_comp_and_csel"),
150        (('bcsel@32(is_only_used_as_float)', ('fge', 'a@32', 'b@32'), c, d),
151             ('fcsel', ('sge', a, b), c, d), "options->has_fused_comp_and_csel"),
152    ]
153
154    r300_nir_lower_bool_to_float_fs = [
155        (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('feq', 'a@32', 'b@32'), c, d),
156             ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), c, d)),
157        (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('fneu', 'a@32', 'b@32'), c, d),
158             ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), d, c)),
159        (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('flt', 'a@32', 'b@32'), c, d),
160             ('fcsel_ge', ('fadd', a, ('fneg', b)), d, c)),
161        (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('fge', 'a@32', 'b@32'), c, d),
162             ('fcsel_ge', ('fadd', a, ('fneg', b)), c, d)),
163        (('b2f32', ('feq', 'a@32', 'b@32')),
164             ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 1.0, 0.0)),
165        (('b2f32', ('fneu', 'a@32', 'b@32')),
166             ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 0.0, 1.0)),
167        (('b2f32', ('flt', 'a@32', 'b@32')),
168             ('fcsel_ge', ('fadd', a, ('fneg', b)), 0.0, 1.0)),
169        (('b2f32', ('fge', 'a@32', 'b@32')),
170             ('fcsel_ge', ('fadd', a, ('fneg', b)), 1.0, 0.0)),
171    ]
172
173    with open(args.output, 'w') as f:
174        f.write('#include "compiler/r300_nir.h"')
175
176        f.write(nir_algebraic.AlgebraicPass("r300_transform_vs_trig_input",
177                                            transform_trig_input_vs_r500).render())
178
179        f.write(nir_algebraic.AlgebraicPass("r300_transform_fs_trig_input",
180                                            transform_trig_input_fs_r500).render())
181
182        f.write(nir_algebraic.AlgebraicPass("r300_nir_fuse_fround_d3d9",
183                                            r300_nir_fuse_fround_d3d9).render())
184
185        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_bool_to_float",
186                                            r300_nir_lower_bool_to_float).render())
187
188        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_bool_to_float_fs",
189                                            r300_nir_lower_bool_to_float_fs).render())
190
191        f.write(nir_algebraic.AlgebraicPass("r300_nir_prepare_presubtract",
192                                            r300_nir_prepare_presubtract).render())
193
194        f.write(nir_algebraic.AlgebraicPass("r300_nir_opt_algebraic_late",
195                                            r300_nir_opt_algebraic_late).render())
196
197        f.write(nir_algebraic.AlgebraicPass("r300_nir_post_integer_lowering",
198                                            r300_nir_post_integer_lowering).render())
199
200        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_flrp",
201                                            r300_nir_lower_flrp).render())
202
203        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_fcsel_r300",
204                                            r300_nir_lower_fcsel_r300).render())
205
206        f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_comparison_fs",
207                                            r300_nir_lower_comparison_fs).render())
208
209if __name__ == '__main__':
210    main()
211