• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "FunctionList.h"
17 #include "reference_math.h"
18 
19 #define FTZ_ON  1
20 #define FTZ_OFF 0
21 #define EXACT    0.0f
22 #define RELAXED_ON 1
23 #define RELAXED_OFF 0
24 
25 #define STRINGIFY( _s)                  #_s
26 
27 // Only use ulps information in spir test
28 #ifdef FUNCTION_LIST_ULPS_ONLY
29 
30 #define ENTRY(      _name, _ulp, _embedded_ulp, _rmode, _type )                 { STRINGIFY(_name), STRINGIFY(_name),                 {NULL}, {NULL}, {NULL}, _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
31 #define ENTRY_EXT(  _name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type )   { STRINGIFY(_name), STRINGIFY(_name),                 {NULL}, {NULL}, {NULL}, _ulp, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, RELAXED_ON,  _type }
32 #define HALF_ENTRY( _name, _ulp, _embedded_ulp, _rmode, _type )                 { "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), {NULL}, {NULL}, {NULL}, _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
33 #define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type)    { STRINGIFY(_name), _operator,                        {NULL}, {NULL}, {NULL}, _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
34 #define unaryF                NULL
35 #define i_unaryF              NULL
36 #define unaryF_u              NULL
37 #define macro_unaryF          NULL
38 #define binaryF               NULL
39 #define binaryF_nextafter     NULL
40 #define binaryOperatorF       NULL
41 #define binaryF_i             NULL
42 #define macro_binaryF         NULL
43 #define ternaryF              NULL
44 #define unaryF_two_results    NULL
45 #define unaryF_two_results_i  NULL
46 #define binaryF_two_results_i NULL
47 #define mad_function          NULL
48 
49 #define reference_sqrt        NULL
50 #define reference_sqrtl       NULL
51 #define reference_divide      NULL
52 #define reference_dividel     NULL
53 #define reference_relaxed_divide NULL
54 
55 #else // FUNCTION_LIST_ULPS_ONLY
56 
57 #define ENTRY(      _name, _ulp, _embedded_ulp, _rmode, _type )                 { STRINGIFY(_name), STRINGIFY(_name),                 {(void*)reference_##_name}, {(void*)reference_##_name##l}, {(void*)reference_##_name},           _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
58 #define ENTRY_EXT(  _name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type )   { STRINGIFY(_name), STRINGIFY(_name),                 {(void*)reference_##_name}, {(void*)reference_##_name##l}, {(void*)reference_##relaxed_##_name}, _ulp, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, RELAXED_ON,  _type }
59 #define HALF_ENTRY( _name, _ulp, _embedded_ulp, _rmode, _type )                 { "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), {(void*)reference_##_name}, {NULL}, {NULL},                   _ulp, _ulp, _embedded_ulp, INFINITY, _rmode, RELAXED_OFF, _type }
60 #define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type)    { STRINGIFY(_name), _operator,                        {(void*)reference_##_name}, {(void*)reference_##_name##l}, {NULL},                               _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
61 
62 extern const vtbl _unary;               // float foo( float )
63 extern const vtbl _unary_u;             // float foo( uint ),  double foo( ulong )
64 extern const vtbl _i_unary;             // int foo( float )
65 extern const vtbl _macro_unary;         // int foo( float ),  returns {0,1} for scalar, { 0, -1 } for vector
66 extern const vtbl _binary;              // float foo( float, float )
67 extern const vtbl _binary_nextafter;    // float foo( float, float ), special handling for nextafter
68 extern const vtbl _binary_operator;     // float .op. float
69 extern const vtbl _macro_binary;        // int foo( float, float ), returns {0,1} for scalar, { 0, -1 } for vector
70 extern const vtbl _binary_i;            // float foo( float, int )
71 extern const vtbl _ternary;             // float foo( float, float, float )
72 extern const vtbl _unary_two_results;   // float foo( float, float * )
73 extern const vtbl _unary_two_results_i; // float foo( float, int * )
74 extern const vtbl _binary_two_results_i; // float foo( float, float, int * )
75 extern const vtbl _mad_tbl;             // float mad( float, float, float )
76 
77 #define unaryF &_unary
78 #define i_unaryF &_i_unary
79 #define unaryF_u  &_unary_u
80 #define macro_unaryF &_macro_unary
81 #define binaryF &_binary
82 #define binaryF_nextafter &_binary_nextafter
83 #define binaryOperatorF &_binary_operator
84 #define binaryF_i &_binary_i
85 #define macro_binaryF &_macro_binary
86 #define ternaryF &_ternary
87 #define unaryF_two_results  &_unary_two_results
88 #define unaryF_two_results_i  &_unary_two_results_i
89 #define binaryF_two_results_i  &_binary_two_results_i
90 #define mad_function        &_mad_tbl
91 
92 #endif // FUNCTION_LIST_ULPS_ONLY
93 
94 const Func  functionList[] = {
95                                     ENTRY( acos,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
96                                     ENTRY( acosh,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
97                                     ENTRY( acospi,                5.0f,         5.0f,         FTZ_OFF,     unaryF),
98                                     ENTRY( asin,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
99                                     ENTRY( asinh,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
100                                     ENTRY( asinpi,                5.0f,         5.0f,         FTZ_OFF,     unaryF),
101                                     ENTRY( atan,                  5.0f,         5.0f,         FTZ_OFF,     unaryF),
102                                     ENTRY( atanh,                 5.0f,         5.0f,         FTZ_OFF,     unaryF),
103                                     ENTRY( atanpi,                5.0f,         5.0f,         FTZ_OFF,     unaryF),
104                                     ENTRY( atan2,                 6.0f,         6.0f,         FTZ_OFF,     binaryF),
105                                     ENTRY( atan2pi,               6.0f,         6.0f,         FTZ_OFF,     binaryF),
106                                     ENTRY( cbrt,                  2.0f,         4.0f,         FTZ_OFF,     unaryF),
107                                     ENTRY( ceil,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
108                                     ENTRY( copysign,              0.0f,         0.0f,         FTZ_OFF,     binaryF),
109                                     ENTRY_EXT( cos,               4.0f,         4.0f,        0.00048828125f,        FTZ_OFF,     unaryF), //relaxed ulp 2^-11
110                                     ENTRY( cosh,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
111                                     ENTRY( cospi,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
112 //                                  ENTRY( erfc,                  16.0f,         16.0f,         FTZ_OFF,     unaryF), //disabled for 1.0 due to lack of reference implementation
113 //                                  ENTRY( erf,                   16.0f,         16.0f,         FTZ_OFF,     unaryF), //disabled for 1.0 due to lack of reference implementation
114                                     ENTRY_EXT( exp,               3.0f,         4.0f,       3.0f,       FTZ_OFF,    unaryF), //relaxed error is actually overwritten in unary.c as it is 3+floor(fabs(2*x))
115                                     ENTRY_EXT( exp2,              3.0f,         4.0f,       3.0f,       FTZ_OFF,    unaryF), //relaxed error is actually overwritten in unary.c as it is 3+floor(fabs(2*x))
116                                     ENTRY_EXT( exp10,             3.0f,         4.0f,       8192.0f,    FTZ_OFF,    unaryF), //relaxed error is actually overwritten in unary.c as it is 3+floor(fabs(2*x)) in derived mode,
117                                     // in non-derived mode it uses the ulp error for half_exp10.
118                                     ENTRY( expm1,                 3.0f,         4.0f,         FTZ_OFF,     unaryF),
119                                     ENTRY( fabs,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
120                                     ENTRY( fdim,                  0.0f,         0.0f,         FTZ_OFF,     binaryF),
121                                     ENTRY( floor,                 0.0f,         0.0f,         FTZ_OFF,     unaryF),
122                                     ENTRY( fma,                   0.0f,         0.0f,         FTZ_OFF,     ternaryF),
123                                     ENTRY( fmax,                  0.0f,         0.0f,         FTZ_OFF,     binaryF),
124                                     ENTRY( fmin,                  0.0f,         0.0f,         FTZ_OFF,     binaryF),
125                                     ENTRY( fmod,                  0.0f,         0.0f,         FTZ_OFF,     binaryF ),
126                                     ENTRY( fract,                 0.0f,         0.0f,         FTZ_OFF,     unaryF_two_results),
127                                     ENTRY( frexp,                 0.0f,         0.0f,         FTZ_OFF,     unaryF_two_results_i),
128                                     ENTRY( hypot,                 4.0f,         4.0f,         FTZ_OFF,     binaryF),
129                                     ENTRY( ilogb,                 0.0f,         0.0f,         FTZ_OFF,     i_unaryF),
130                                     ENTRY( isequal,               0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
131                                     ENTRY( isfinite,              0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
132                                     ENTRY( isgreater,             0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
133                                     ENTRY( isgreaterequal,        0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
134                                     ENTRY( isinf,                 0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
135                                     ENTRY( isless,                0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
136                                     ENTRY( islessequal,           0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
137                                     ENTRY( islessgreater,         0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
138                                     ENTRY( isnan,                 0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
139                                     ENTRY( isnormal,              0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
140                                     ENTRY( isnotequal,            0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
141                                     ENTRY( isordered,             0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
142                                     ENTRY( isunordered,           0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
143                                     ENTRY( ldexp,                 0.0f,         0.0f,         FTZ_OFF,     binaryF_i),
144                                     ENTRY( lgamma,            INFINITY,     INFINITY,         FTZ_OFF,     unaryF),
145                                     ENTRY( lgamma_r,          INFINITY,     INFINITY,         FTZ_OFF,     unaryF_two_results_i),
146                                     ENTRY_EXT( log,               3.0f,         4.0f,       4.76837158203125e-7f,   FTZ_OFF,    unaryF), //relaxed ulp 2^-21
147                                     ENTRY_EXT( log2,              3.0f,         4.0f,       4.76837158203125e-7f,   FTZ_OFF,    unaryF), //relaxed ulp 2^-21
148                                     ENTRY( log10,                 3.0f,         4.0f,         FTZ_OFF,     unaryF),
149                                     ENTRY( log1p,                 2.0f,         4.0f,         FTZ_OFF,     unaryF),
150                                     ENTRY( logb,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
151                                     ENTRY_EXT( mad,           INFINITY,     INFINITY,        INFINITY,    FTZ_OFF,    mad_function), //in fast-relaxed-math mode it has to be either exactly rounded fma or exactly rounded a*b+c
152                                     ENTRY( maxmag,                0.0f,         0.0f,         FTZ_OFF,    binaryF ),
153                                     ENTRY( minmag,                0.0f,         0.0f,         FTZ_OFF,    binaryF ),
154                                     ENTRY( modf,                  0.0f,         0.0f,         FTZ_OFF,     unaryF_two_results ),
155                                     ENTRY( nan,                   0.0f,         0.0f,         FTZ_OFF,     unaryF_u),
156                                     ENTRY( nextafter,             0.0f,         0.0f,         FTZ_OFF,     binaryF_nextafter),
157                                     ENTRY_EXT( pow,              16.0f,        16.0f,         8192.0f,     FTZ_OFF,    binaryF), //in derived mode the ulp error is calculated as exp2(y*log2(x)) and in non-derived it is the same as half_pow
158                                     ENTRY( pown,                 16.0f,        16.0f,         FTZ_OFF,     binaryF_i),
159                                     ENTRY( powr,                 16.0f,        16.0f,         FTZ_OFF,     binaryF),
160 //                                  ENTRY( reciprocal,            1.0f,         1.0f,         FTZ_OFF,     unaryF),
161                                     ENTRY( remainder,             0.0f,         0.0f,         FTZ_OFF,     binaryF),
162                                     ENTRY( remquo,                0.0f,         0.0f,         FTZ_OFF,     binaryF_two_results_i),
163                                     ENTRY( rint,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
164                                     ENTRY( rootn,                16.0f,        16.0f,         FTZ_OFF,     binaryF_i),
165                                     ENTRY( round,                 0.0f,         0.0f,         FTZ_OFF,     unaryF),
166                                     ENTRY( rsqrt,                 2.0f,         4.0f,         FTZ_OFF,     unaryF),
167                                     ENTRY( signbit,               0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
168                                     ENTRY_EXT( sin,               4.0f,         4.0f,  0.00048828125f,     FTZ_OFF,    unaryF), //relaxed ulp 2^-11
169                                     ENTRY_EXT( sincos,            4.0f,         4.0f,  0.00048828125f,     FTZ_OFF,    unaryF_two_results), //relaxed ulp 2^-11
170                                     ENTRY( sinh,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
171                                     ENTRY( sinpi,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
172                                     { "sqrt", "sqrt",     {(void*)reference_sqrt}, {(void*)reference_sqrtl}, {NULL}, 3.0f, 0.0f,    4.0f, INFINITY, FTZ_OFF, RELAXED_OFF, unaryF },
173                                     { "sqrt_cr", "sqrt",  {(void*)reference_sqrt}, {(void*)reference_sqrtl}, {NULL}, 0.0f, 0.0f,    0.0f, INFINITY, FTZ_OFF, RELAXED_OFF, unaryF },
174                                     ENTRY_EXT( tan,               5.0f,         5.0f,         8192.0f,    FTZ_OFF,     unaryF), //in derived mode it the ulp error is calculated as sin/cos and in non-derived mode it is the same as half_tan.
175                                     ENTRY( tanh,                  5.0f,         5.0f,         FTZ_OFF,     unaryF),
176                                     ENTRY( tanpi,                 6.0f,         6.0f,         FTZ_OFF,     unaryF),
177 //                                    ENTRY( tgamma,                 16.0f,         16.0f,         FTZ_OFF,     unaryF), // Commented this out until we can be sure this requirement is realistic
178                                     ENTRY( trunc,                 0.0f,         0.0f,         FTZ_OFF,     unaryF),
179 
180                                     HALF_ENTRY( cos,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
181                                     HALF_ENTRY( divide,        8192.0f,      8192.0f,          FTZ_ON,     binaryF),
182                                     HALF_ENTRY( exp,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
183                                     HALF_ENTRY( exp2,          8192.0f,      8192.0f,          FTZ_ON,     unaryF),
184                                     HALF_ENTRY( exp10,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
185                                     HALF_ENTRY( log,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
186                                     HALF_ENTRY( log2,          8192.0f,      8192.0f,          FTZ_ON,     unaryF),
187                                     HALF_ENTRY( log10,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
188                                     HALF_ENTRY( powr,          8192.0f,      8192.0f,          FTZ_ON,     binaryF),
189                                     HALF_ENTRY( recip,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
190                                     HALF_ENTRY( rsqrt,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
191                                     HALF_ENTRY( sin,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
192                                     HALF_ENTRY( sqrt,          8192.0f,      8192.0f,          FTZ_ON,     unaryF),
193                                     HALF_ENTRY( tan,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
194 
195                                     // basic operations
196                                     OPERATOR_ENTRY( add, "+",         0.0f,         0.0f,     FTZ_OFF,     binaryOperatorF),
197                                     OPERATOR_ENTRY( subtract, "-",     0.0f,         0.0f,     FTZ_OFF,     binaryOperatorF),
198                                     { "divide", "/",  {(void*)reference_divide}, {(void*)reference_dividel}, {(void*)reference_relaxed_divide}, 2.5f, 0.0f,         3.0f, 2.5f, FTZ_OFF, RELAXED_ON, binaryOperatorF },
199                                     { "divide_cr", "/",  {(void*)reference_divide}, {(void*)reference_dividel}, {(void*)reference_relaxed_divide}, 0.0f, 0.0f,         0.0f, 0.f, FTZ_OFF, RELAXED_OFF, binaryOperatorF },
200                                     OPERATOR_ENTRY( multiply, "*",     0.0f,         0.0f,     FTZ_OFF,     binaryOperatorF),
201                                     OPERATOR_ENTRY( assignment, "", 0.0f,       0.0f,     FTZ_OFF,     unaryF),        // A simple copy operation
202                                     OPERATOR_ENTRY( not, "!",       0.0f,       0.0f,   FTZ_OFF,    macro_unaryF),
203                                 };
204 
205 const size_t functionListCount = sizeof( functionList ) / sizeof( functionList[0] );
206