1# 2# Copyright (C) 2019 Vasily Khoruzhick <anarsoul@gmail.com> 3# Copyright (C) 2021 Pavel Ondračka 4# 5# Permission is hereby granted, free of charge, to any person obtaining a 6# copy of this software and associated documentation files (the "Software"), 7# to deal in the Software without restriction, including without limitation 8# the rights to use, copy, modify, merge, publish, distribute, sublicense, 9# and/or sell copies of the Software, and to permit persons to whom the 10# Software is furnished to do so, subject to the following conditions: 11# 12# The above copyright notice and this permission notice (including the next 13# paragraph) shall be included in all copies or substantial portions of the 14# Software. 15# 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22# IN THE SOFTWARE. 23 24import argparse 25import sys 26from math import pi 27 28# Convenience variables 29a = 'a' 30b = 'b' 31c = 'c' 32d = 'd' 33e = 'e' 34 35# Transform input to range [-PI, PI]: 36# 37# y = frac(x / 2PI + 0.5) * 2PI - PI 38# 39transform_trig_input_vs_r500 = [ 40 (('fsin', 'a'), ('fsin', ('fadd', ('fmul', ('ffract', ('fadd', ('fmul', 'a', 1 / (2 * pi)) , 0.5)), 2 * pi), -pi))), 41 (('fcos', 'a'), ('fcos', ('fadd', ('fmul', ('ffract', ('fadd', ('fmul', 'a', 1 / (2 * pi)) , 0.5)), 2 * pi), -pi))), 42] 43 44# Transform input to range [-PI, PI]: 45# 46# y = frac(x / 2PI) 47# 48transform_trig_input_fs_r500 = [ 49 (('fsin', 'a'), ('fsin', ('ffract', ('fmul', 'a', 1 / (2 * pi))))), 50 (('fcos', 'a'), ('fcos', ('ffract', ('fmul', 'a', 1 / (2 * pi))))), 51] 52 53# The is a pattern produced by wined3d for A0 register load. 54# The specific pattern wined3d emits looks like this 55# A0.x = (int(floor(abs(R0.x) + 0.5) * sign(R0.x))); 56# however we lower both sign and floor so here we check for the already lowered 57# sequence. 58r300_nir_fuse_fround_d3d9 = [ 59 (('fmul', ('fadd', ('fadd', ('fabs', 'a') , 0.5), 60 ('fneg', ('ffract', ('fadd', ('fabs', 'a') , 0.5)))), 61 ('fadd', ('b2f', ('!flt', 0.0, 'a')), 62 ('fneg', ('b2f', ('!flt', 'a', 0.0))))), 63 ('fround_even', 'a')) 64] 65 66# Here are some specific optimizations for code reordering such that the backend 67# has easier task of recognizing output modifiers and presubtract patterns. 68r300_nir_prepare_presubtract = [ 69 # Backend can only recognize 1 - x pattern. 70 (('fadd', ('fneg', a), 1.0), ('fadd', 1.0, ('fneg', a))), 71 (('fadd', a, -1.0), ('fneg', ('fadd', 1.0, ('fneg', a)))), 72 (('fadd', -1.0, a), ('fneg', ('fadd', 1.0, ('fneg', a)))), 73 # Bias presubtract 1 - 2 * x expects MAD -a 2.0 1.0 form. 74 (('ffma', 2.0, ('fneg', a), 1.0), ('ffma', ('fneg', a), 2.0, 1.0)), 75 (('ffma', a, -2.0, 1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))), 76 (('ffma', -2.0, a, 1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))), 77 (('ffma', 2.0, a, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))), 78 (('ffma', a, 2.0, -1.0), ('fneg', ('ffma', ('fneg', a), 2.0, 1.0))), 79 # x * 2 can be usually folded into output modifier for the previous 80 # instruction, but that only works if x is a temporary. If it is input or 81 # constant just convert it to add instead. 82 (('fmul', 'a(is_ubo_or_input)', 2.0), ('fadd', a, a)), 83] 84 85for multiplier in [2.0, 4.0, 8.0, 16.0, 0.5, 0.25, 0.125, 0.0625]: 86 r300_nir_prepare_presubtract.extend([ 87 (('fmul', a, ('fmul(is_used_once)', 'b(is_ubo_or_input)', multiplier)), ('fmul', multiplier, ('fmul', a, b))), 88]) 89 90r300_nir_opt_algebraic_late = [ 91 # Previous prepare_presubtract pass can sometimes produce double fneg patterns. 92 # The backend copy propagate could handle it, but the nir to tgsi translation 93 # does not and blows up. Clean this up. 94 (('fneg', ('fneg', a)), a), 95 (('fabs', ('fneg', a)), ('fabs', a)), 96 # Some cleanups after comparison lowering if one of the operands is 0. 97 (('fadd', a, 0.0), a), 98 (('fadd', a, ('fneg', 0.0)), a) 99] 100 101# This is very late flrp lowering to clean up after bcsel->fcsel->flrp. 102r300_nir_lower_flrp = [ 103 (('flrp', a, b, c), ('ffma', b, c, ('ffma', ('fneg', a), c, a))) 104] 105 106# Lower fcsel_ge from ftrunc on r300 107r300_nir_lower_fcsel_r300 = [ 108 (('fcsel_ge', a, b, c), ('flrp', c, b, ('sge', a, 0.0))) 109] 110 111# Fragment shaders have no comparison opcodes. However, we can encode the comparison 112# in the aluresults operation, which is than used by next if. So if the comparison result 113# is used only in a single if, we can handle it just fine on R500. 114r300_nir_lower_comparison_fs = [ 115 (('seq(is_not_used_in_single_if)', 'a@32', 'b@32'), 116 ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 1.0, 0.0)), 117 (('sne(is_not_used_in_single_if)', 'a@32', 'b@32'), 118 ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 0.0, 1.0)), 119 (('slt(is_not_used_in_single_if)', 'a@32', 'b@32'), 120 ('fcsel_ge', ('fadd', a, ('fneg', b)), 0.0, 1.0)), 121 (('sge(is_not_used_in_single_if)', 'a@32', 'b@32'), 122 ('fcsel_ge', ('fadd', a, ('fneg', b)), 1.0, 0.0)), 123] 124 125r300_nir_post_integer_lowering = [ 126 # If ffloor result is used only for indirect constant load, we can get rid of it 127 # completelly as ntt emits ARL by default which already does the flooring. 128 # This actually checks for the lowered ffloor(a) = a - ffract(a) patterns. 129 (('fadd(is_only_used_by_load_ubo_vec4)', a, ('fneg', ('ffract', a))), a), 130 # This is a D3D9 pattern from Wine when shader wants ffloor instead of fround on register load. 131 (('fround_even(is_only_used_by_load_ubo_vec4)', ('fadd', a, ('fneg', ('ffract', a)))), a), 132 # Lower ftrunc 133 (('ftrunc', 'a@32'), ('fcsel_ge', a, ('fadd', ('fabs', a), ('fneg', ('ffract', ('fabs', a)))), 134 ('fneg', ('fadd', ('fabs', a), ('fneg', ('ffract', ('fabs', a))))))) 135] 136 137def main(): 138 parser = argparse.ArgumentParser() 139 parser.add_argument('-p', '--import-path', required=True) 140 parser.add_argument('output') 141 args = parser.parse_args() 142 sys.path.insert(0, args.import_path) 143 144 import nir_algebraic # pylint: disable=import-error 145 ignore_exact = nir_algebraic.ignore_exact 146 147 r300_nir_lower_bool_to_float = [ 148 (('bcsel@32(is_only_used_as_float)', ignore_exact('feq', 'a@32', 'b@32'), c, d), 149 ('fadd', ('fmul', c, ('seq', a, b)), ('fsub', d, ('fmul', d, ('seq', a, b)))), 150 "!options->has_fused_comp_and_csel"), 151 (('bcsel@32(is_only_used_as_float)', ignore_exact('fneu', 'a@32', 'b@32'), c, d), 152 ('fadd', ('fmul', c, ('sne', a, b)), ('fsub', d, ('fmul', d, ('sne', a, b)))), 153 "!options->has_fused_comp_and_csel"), 154 (('bcsel@32(is_only_used_as_float)', ignore_exact('flt', 'a@32', 'b@32'), c, d), 155 ('fadd', ('fmul', c, ('slt', a, b)), ('fsub', d, ('fmul', d, ('slt', a, b)))), 156 "!options->has_fused_comp_and_csel"), 157 (('bcsel@32(is_only_used_as_float)', ignore_exact('fge', 'a@32', 'b@32'), c, d), 158 ('fadd', ('fmul', c, ('sge', a, b)), ('fsub', d, ('fmul', d, ('sge', a, b)))), 159 "!options->has_fused_comp_and_csel"), 160 (('bcsel@32(is_only_used_as_float)', ('feq', 'a@32', 'b@32'), c, d), 161 ('fcsel', ('seq', a, b), c, d), "options->has_fused_comp_and_csel"), 162 (('bcsel@32(is_only_used_as_float)', ('fneu', 'a@32', 'b@32'), c, d), 163 ('fcsel', ('sne', a, b), c, d), "options->has_fused_comp_and_csel"), 164 (('bcsel@32(is_only_used_as_float)', ('flt', 'a@32', 'b@32'), c, d), 165 ('fcsel', ('slt', a, b), c, d), "options->has_fused_comp_and_csel"), 166 (('bcsel@32(is_only_used_as_float)', ('fge', 'a@32', 'b@32'), c, d), 167 ('fcsel', ('sge', a, b), c, d), "options->has_fused_comp_and_csel"), 168 ] 169 170 r300_nir_lower_bool_to_float_fs = [ 171 (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('feq', 'a@32', 'b@32'), c, d), 172 ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), c, d)), 173 (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('fneu', 'a@32', 'b@32'), c, d), 174 ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), d, c)), 175 (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('flt', 'a@32', 'b@32'), c, d), 176 ('fcsel_ge', ('fadd', a, ('fneg', b)), d, c)), 177 (('bcsel@32(r300_is_only_used_as_float)', ignore_exact('fge', 'a@32', 'b@32'), c, d), 178 ('fcsel_ge', ('fadd', a, ('fneg', b)), c, d)), 179 (('b2f32', ('feq', 'a@32', 'b@32')), 180 ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 1.0, 0.0)), 181 (('b2f32', ('fneu', 'a@32', 'b@32')), 182 ('fcsel_ge', ('fneg', ('fabs', ('fadd', a, ('fneg', b)))), 0.0, 1.0)), 183 (('b2f32', ('flt', 'a@32', 'b@32')), 184 ('fcsel_ge', ('fadd', a, ('fneg', b)), 0.0, 1.0)), 185 (('b2f32', ('fge', 'a@32', 'b@32')), 186 ('fcsel_ge', ('fadd', a, ('fneg', b)), 1.0, 0.0)), 187 ] 188 189 with open(args.output, 'w') as f: 190 f.write('#include "compiler/r300_nir.h"') 191 192 f.write(nir_algebraic.AlgebraicPass("r300_transform_vs_trig_input", 193 transform_trig_input_vs_r500).render()) 194 195 f.write(nir_algebraic.AlgebraicPass("r300_transform_fs_trig_input", 196 transform_trig_input_fs_r500).render()) 197 198 f.write(nir_algebraic.AlgebraicPass("r300_nir_fuse_fround_d3d9", 199 r300_nir_fuse_fround_d3d9).render()) 200 201 f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_bool_to_float", 202 r300_nir_lower_bool_to_float).render()) 203 204 f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_bool_to_float_fs", 205 r300_nir_lower_bool_to_float_fs).render()) 206 207 f.write(nir_algebraic.AlgebraicPass("r300_nir_prepare_presubtract", 208 r300_nir_prepare_presubtract).render()) 209 210 f.write(nir_algebraic.AlgebraicPass("r300_nir_opt_algebraic_late", 211 r300_nir_opt_algebraic_late).render()) 212 213 f.write(nir_algebraic.AlgebraicPass("r300_nir_post_integer_lowering", 214 r300_nir_post_integer_lowering).render()) 215 216 f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_flrp", 217 r300_nir_lower_flrp).render()) 218 219 f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_fcsel_r300", 220 r300_nir_lower_fcsel_r300).render()) 221 222 f.write(nir_algebraic.AlgebraicPass("r300_nir_lower_comparison_fs", 223 r300_nir_lower_comparison_fs).render()) 224 225if __name__ == '__main__': 226 main() 227