1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "utility.h"
18 #include "function_list.h"
19
20 #if defined(__PPC__)
21 // Global varaiable used to hold the FPU control register state. The FPSCR
22 // register can not be used because not all Power implementations retain or
23 // observed the NI (non-IEEE mode) bit.
24 __thread fpu_control_t fpu_control = 0;
25 #endif
26
MulD(double * rhi,double * rlo,double u,double v)27 void MulD(double *rhi, double *rlo, double u, double v)
28 {
29 const double c = 134217729.0; // 1+2^27
30 double up, u1, u2, vp, v1, v2;
31
32 up = u * c;
33 u1 = (u - up) + up;
34 u2 = u - u1;
35
36 vp = v * c;
37 v1 = (v - vp) + vp;
38 v2 = v - v1;
39
40 double rh = u * v;
41 double rl = (((u1 * v1 - rh) + (u1 * v2)) + (u2 * v1)) + (u2 * v2);
42
43 *rhi = rh;
44 *rlo = rl;
45 }
46
AddD(double * rhi,double * rlo,double a,double b)47 void AddD(double *rhi, double *rlo, double a, double b)
48 {
49 double zhi, zlo;
50 zhi = a + b;
51 if (fabs(a) > fabs(b))
52 {
53 zlo = zhi - a;
54 zlo = b - zlo;
55 }
56 else
57 {
58 zlo = zhi - b;
59 zlo = a - zlo;
60 }
61
62 *rhi = zhi;
63 *rlo = zlo;
64 }
65
MulDD(double * rhi,double * rlo,double xh,double xl,double yh,double yl)66 void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
67 {
68 double mh, ml;
69 double c = 134217729.0;
70 double up, u1, u2, vp, v1, v2;
71
72 up = xh * c;
73 u1 = (xh - up) + up;
74 u2 = xh - u1;
75
76 vp = yh * c;
77 v1 = (yh - vp) + vp;
78 v2 = yh - v1;
79
80 mh = xh * yh;
81 ml = (((u1 * v1 - mh) + (u1 * v2)) + (u2 * v1)) + (u2 * v2);
82 ml += xh * yl + xl * yh;
83
84 *rhi = mh + ml;
85 *rlo = (mh - (*rhi)) + ml;
86 }
87
AddDD(double * rhi,double * rlo,double xh,double xl,double yh,double yl)88 void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
89 {
90 double r, s;
91 r = xh + yh;
92 s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl)
93 : (yh - r + xh + xl + yl);
94 *rhi = r + s;
95 *rlo = (r - (*rhi)) + s;
96 }
97
DivideDD(double * chi,double * clo,double a,double b)98 void DivideDD(double *chi, double *clo, double a, double b)
99 {
100 *chi = a / b;
101 double rhi, rlo;
102 MulD(&rhi, &rlo, *chi, b);
103 AddDD(&rhi, &rlo, -rhi, -rlo, a, 0.0);
104 *clo = rhi / b;
105 }
106
107 // These functions comapre two floats/doubles. Since some platforms may choose
108 // to flush denormals to zeros before comparison, comparison like a < b may give
109 // wrong result in "certain cases" where we do need correct compasion result
110 // when operands are denormals .... these functions comapre floats/doubles using
111 // signed integer/long int rep. In other cases, when flushing to zeros is fine,
112 // these should not be used. Also these doesn't check for nans and assume nans
113 // are handled separately as special edge case by the caller which calls these
114 // functions return 0 if both are equal, 1 if x > y and -1 if x < y.
115
compareFloats(float x,float y)116 inline int compareFloats(float x, float y)
117 {
118 int32f_t a, b;
119
120 a.f = x;
121 b.f = y;
122
123 if (a.i & 0x80000000) a.i = 0x80000000 - a.i;
124 if (b.i & 0x80000000) b.i = 0x80000000 - b.i;
125
126 if (a.i == b.i) return 0;
127
128 return a.i < b.i ? -1 : 1;
129 }
130
compareDoubles(double x,double y)131 inline int compareDoubles(double x, double y)
132 {
133 int64d_t a, b;
134
135 a.d = x;
136 b.d = y;
137
138 if (a.l & 0x8000000000000000LL) a.l = 0x8000000000000000LL - a.l;
139 if (b.l & 0x8000000000000000LL) b.l = 0x8000000000000000LL - b.l;
140
141 if (a.l == b.l) return 0;
142
143 return a.l < b.l ? -1 : 1;
144 }
145
logFunctionInfo(const char * fname,unsigned int float_size,unsigned int isFastRelaxed)146 void logFunctionInfo(const char *fname, unsigned int float_size,
147 unsigned int isFastRelaxed)
148 {
149 char const *fpSizeStr = NULL;
150 char const *fpFastRelaxedStr = "";
151 switch (float_size)
152 {
153 case sizeof(cl_double): fpSizeStr = "fp64"; break;
154 case sizeof(cl_float): fpSizeStr = "fp32"; break;
155 case sizeof(cl_half): fpSizeStr = "fp16"; break;
156 }
157 if (isFastRelaxed)
158 {
159 fpFastRelaxedStr = "rlx";
160 }
161 vlog("%15s %4s %4s", fname, fpSizeStr, fpFastRelaxedStr);
162 }
163
getAllowedUlpError(const Func * f,const bool relaxed)164 float getAllowedUlpError(const Func *f, const bool relaxed)
165 {
166 float ulp;
167
168 if (relaxed)
169 {
170 if (gIsEmbedded)
171 {
172 ulp = f->relaxed_embedded_error;
173 }
174 else
175 {
176 ulp = f->relaxed_error;
177 }
178 }
179 else
180 {
181 if (gIsEmbedded)
182 {
183 ulp = f->float_embedded_ulps;
184 }
185 else
186 {
187 ulp = f->float_ulps;
188 }
189 }
190
191 return ulp;
192 }
193