1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "compat.h"
17
18 #if defined ( _MSC_VER )
19
20 #include <limits.h>
21 #include <stdlib.h>
22
23 #include <CL/cl.h>
24
25 #include <windows.h>
26
27 #if _MSC_VER < 1900 && ! defined( __INTEL_COMPILER )
28
29 ///////////////////////////////////////////////////////////////////
30 //
31 // rint, rintf
32 //
33 ///////////////////////////////////////////////////////////////////
34
copysignf(float x,float y)35 float copysignf( float x, float y )
36 {
37 union{ cl_uint u; float f; }ux, uy;
38
39 ux.f = x;
40 uy.f = y;
41
42 ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);
43
44 return ux.f;
45 }
46
copysign(double x,double y)47 double copysign( double x, double y )
48 {
49 union{ cl_ulong u; double f; }ux, uy;
50
51 ux.f = x;
52 uy.f = y;
53
54 ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);
55
56 return ux.f;
57 }
58
copysignl(long double x,long double y)59 long double copysignl( long double x, long double y )
60 {
61 union
62 {
63 long double f;
64 struct{ cl_ulong m; cl_ushort sexp; }u;
65 }ux, uy;
66
67 ux.f = x;
68 uy.f = y;
69
70 ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);
71
72 return ux.f;
73 }
74
rintf(float x)75 float rintf(float x)
76 {
77 float absx = fabsf(x);
78
79 if( absx < 8388608.0f /* 0x1.0p23f */ )
80 {
81 float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
82 float rounded = x + magic;
83 rounded -= magic;
84 x = copysignf( rounded, x );
85 }
86
87 return x;
88 }
89
rint(double x)90 double rint(double x)
91 {
92 double absx = fabs(x);
93
94 if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
95 {
96 double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
97 double rounded = x + magic;
98 rounded -= magic;
99 x = copysign( rounded, x );
100 }
101
102 return x;
103 }
104
rintl(long double x)105 long double rintl(long double x)
106 {
107 double absx = fabs(x);
108
109 if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
110 {
111 long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
112 long double rounded = x + magic;
113 rounded -= magic;
114 x = copysignl( rounded, x );
115 }
116
117 return x;
118 }
119
120 #if _MSC_VER < 1800
121
122 ///////////////////////////////////////////////////////////////////
123 //
124 // ilogb, ilogbf, ilogbl
125 //
126 ///////////////////////////////////////////////////////////////////
127 #ifndef FP_ILOGB0
128 #define FP_ILOGB0 INT_MIN
129 #endif
130
131 #ifndef FP_ILOGBNAN
132 #define FP_ILOGBNAN INT_MIN
133 #endif
134
ilogb(double x)135 int ilogb (double x)
136 {
137 union{ double f; cl_ulong u;} u;
138 u.f = x;
139
140 cl_ulong absx = u.u & CL_LONG_MAX;
141 if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
142 {
143 switch( absx )
144 {
145 case 0:
146 return FP_ILOGB0;
147 case 0x7ff0000000000000ULL:
148 return INT_MAX;
149 default:
150 if( absx > 0x7ff0000000000000ULL )
151 return FP_ILOGBNAN;
152
153 // subnormal
154 u.u = absx | 0x3ff0000000000000ULL;
155 u.f -= 1.0;
156 return (u.u >> 52) - (1023 + 1022);
157 }
158 }
159
160 return (absx >> 52) - 1023;
161 }
162
163
ilogbf(float x)164 int ilogbf (float x)
165 {
166 union{ float f; cl_uint u;} u;
167 u.f = x;
168
169 cl_uint absx = u.u & 0x7fffffff;
170 if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
171 {
172 switch( absx )
173 {
174 case 0:
175 return FP_ILOGB0;
176 case 0x7f800000U:
177 return INT_MAX;
178 default:
179 if( absx > 0x7f800000 )
180 return FP_ILOGBNAN;
181
182 // subnormal
183 u.u = absx | 0x3f800000U;
184 u.f -= 1.0f;
185 return (u.u >> 23) - (127 + 126);
186 }
187 }
188
189 return (absx >> 23) - 127;
190 }
191
ilogbl(long double x)192 int ilogbl (long double x)
193 {
194 union
195 {
196 long double f;
197 struct{ cl_ulong m; cl_ushort sexp; }u;
198 } u;
199 u.f = x;
200
201 int exp = u.u.sexp & 0x7fff;
202 if( 0 == exp )
203 {
204 if( 0 == u.u.m )
205 return FP_ILOGB0;
206
207 //subnormal
208 u.u.sexp = 0x3fff;
209 u.f -= 1.0f;
210 exp = u.u.sexp & 0x7fff;
211
212 return exp - (0x3fff + 0x3ffe);
213 }
214 else if( 0x7fff == exp )
215 {
216 if( u.u.m & CL_LONG_MAX )
217 return FP_ILOGBNAN;
218
219 return INT_MAX;
220 }
221
222 return exp - 0x3fff;
223 }
224
225 #endif // _MSC_VER < 1800
226
227 ///////////////////////////////////////////////////////////////////
228 //
229 // fmax, fmin, fmaxf, fminf
230 //
231 ///////////////////////////////////////////////////////////////////
232
GET_BITS_SP32(float fx,unsigned int * ux)233 static void GET_BITS_SP32(float fx, unsigned int* ux)
234 {
235 volatile union {float f; unsigned int u;} _bitsy;
236 _bitsy.f = (fx);
237 *ux = _bitsy.u;
238 }
239 /* static void GET_BITS_SP32(float fx, unsigned int* ux) */
240 /* { */
241 /* volatile union {float f; unsigned int i;} _bitsy; */
242 /* _bitsy.f = (fx); */
243 /* *ux = _bitsy.i; */
244 /* } */
PUT_BITS_SP32(unsigned int ux,float * fx)245 static void PUT_BITS_SP32(unsigned int ux, float* fx)
246 {
247 volatile union {float f; unsigned int u;} _bitsy;
248 _bitsy.u = (ux);
249 *fx = _bitsy.f;
250 }
251 /* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
252 /* { */
253 /* volatile union {float f; unsigned int i;} _bitsy; */
254 /* _bitsy.i = (ux); */
255 /* *fx = _bitsy.f; */
256 /* } */
GET_BITS_DP64(double dx,unsigned __int64 * lx)257 static void GET_BITS_DP64(double dx, unsigned __int64* lx)
258 {
259 volatile union {double d; unsigned __int64 l;} _bitsy;
260 _bitsy.d = (dx);
261 *lx = _bitsy.l;
262 }
PUT_BITS_DP64(unsigned __int64 lx,double * dx)263 static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
264 {
265 volatile union {double d; unsigned __int64 l;} _bitsy;
266 _bitsy.l = (lx);
267 *dx = _bitsy.d;
268 }
269
270 #if 0
271 int SIGNBIT_DP64(double x )
272 {
273 int hx;
274 _GET_HIGH_WORD(hx,x);
275 return((hx>>31));
276 }
277 #endif
278
279 #if _MSC_VER < 1900
280
281 /* fmax(x, y) returns the larger (more positive) of x and y.
282 NaNs are treated as missing values: if one argument is NaN,
283 the other argument is returned. If both arguments are NaN,
284 the first argument is returned. */
285
286 /* This works so long as the compiler knows that (x != x) means
287 that x is NaN; gcc does. */
fmax(double x,double y)288 double fmax(double x, double y)
289 {
290 if( isnan(y) )
291 return x;
292
293 return x >= y ? x : y;
294 }
295
296
297 /* fmin(x, y) returns the smaller (more negative) of x and y.
298 NaNs are treated as missing values: if one argument is NaN,
299 the other argument is returned. If both arguments are NaN,
300 the first argument is returned. */
301
fmin(double x,double y)302 double fmin(double x, double y)
303 {
304 if( isnan(y) )
305 return x;
306
307 return x <= y ? x : y;
308 }
309
310
fmaxf(float x,float y)311 float fmaxf( float x, float y )
312 {
313 if( isnan(y) )
314 return x;
315
316 return x >= y ? x : y;
317 }
318
319 /* fminf(x, y) returns the smaller (more negative) of x and y.
320 NaNs are treated as missing values: if one argument is NaN,
321 the other argument is returned. If both arguments are NaN,
322 the first argument is returned. */
323
fminf(float x,float y)324 float fminf(float x, float y)
325 {
326 if( isnan(y) )
327 return x;
328
329 return x <= y ? x : y;
330 }
331
scalblnl(long double x,long n)332 long double scalblnl(long double x, long n)
333 {
334 union
335 {
336 long double d;
337 struct{ cl_ulong m; cl_ushort sexp;}u;
338 }u;
339 u.u.m = CL_LONG_MIN;
340
341 if( x == 0.0L || n < -2200)
342 return copysignl( 0.0L, x );
343
344 if( n > 2200 )
345 return INFINITY;
346
347 if( n < 0 )
348 {
349 u.u.sexp = 0x3fff - 1022;
350 while( n <= -1022 )
351 {
352 x *= u.d;
353 n += 1022;
354 }
355 u.u.sexp = 0x3fff + n;
356 x *= u.d;
357 return x;
358 }
359
360 if( n > 0 )
361 {
362 u.u.sexp = 0x3fff + 1023;
363 while( n >= 1023 )
364 {
365 x *= u.d;
366 n -= 1023;
367 }
368 u.u.sexp = 0x3fff + n;
369 x *= u.d;
370 return x;
371 }
372
373 return x;
374 }
375
376 ///////////////////////////////////////////////////////////////////
377 //
378 // log2
379 //
380 ///////////////////////////////////////////////////////////////////
381 const static cl_double log_e_base2 = 1.4426950408889634074;
382 const static cl_double log_10_base2 = 3.3219280948873623478;
383
384 //double log10(double x);
385
log2(double x)386 double log2(double x)
387 {
388 return 1.44269504088896340735992468100189214 * log(x);
389 }
390
log2l(long double x)391 long double log2l(long double x)
392 {
393 return 1.44269504088896340735992468100189214L * log(x);
394 }
395
trunc(double x)396 double trunc(double x)
397 {
398 double absx = fabs(x);
399
400 if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
401 {
402 cl_long rounded = x;
403 x = copysign( (double) rounded, x );
404 }
405
406 return x;
407 }
408
truncf(float x)409 float truncf(float x)
410 {
411 float absx = fabsf(x);
412
413 if( absx < 8388608.0f /* 0x1.0p23f */ )
414 {
415 cl_int rounded = x;
416 x = copysignf( (float) rounded, x );
417 }
418
419 return x;
420 }
421
lround(double x)422 long lround(double x)
423 {
424 double absx = fabs(x);
425
426 if( absx < 0.5 )
427 return 0;
428
429 if( absx < 4503599627370496.0 /* 0x1.0p52 */)
430 {
431 absx += 0.5;
432 cl_long rounded = absx;
433 absx = rounded;
434 x = copysign( absx, x );
435 }
436
437 if( x >= (double) LONG_MAX )
438 return LONG_MAX;
439
440 return (long) x;
441 }
442
lroundf(float x)443 long lroundf(float x)
444 {
445 float absx = fabsf(x);
446
447 if( absx < 0.5f )
448 return 0;
449
450 if( absx < 8388608.0f )
451 {
452 absx += 0.5f;
453 cl_int rounded = absx;
454 absx = rounded;
455 x = copysignf( absx, x );
456 }
457
458 if( x >= (float) LONG_MAX )
459 return LONG_MAX;
460
461 return (long) x;
462 }
463
round(double x)464 double round(double x)
465 {
466 double absx = fabs(x);
467
468 if( absx < 0.5 )
469 return copysign( 0.0, x);
470
471 if( absx < 4503599627370496.0 /* 0x1.0p52 */)
472 {
473 absx += 0.5;
474 cl_long rounded = absx;
475 absx = rounded;
476 x = copysign( absx, x );
477 }
478
479 return x;
480 }
481
roundf(float x)482 float roundf(float x)
483 {
484 float absx = fabsf(x);
485
486 if( absx < 0.5f )
487 return copysignf( 0.0f, x);
488
489 if( absx < 8388608.0f )
490 {
491 absx += 0.5f;
492 cl_int rounded = absx;
493 absx = rounded;
494 x = copysignf( absx, x );
495 }
496
497 return x;
498 }
499
roundl(long double x)500 long double roundl(long double x)
501 {
502 long double absx = fabsl(x);
503
504 if( absx < 0.5L )
505 return copysignl( 0.0L, x);
506
507 if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
508 {
509 absx += 0.5L;
510 cl_ulong rounded = absx;
511 absx = rounded;
512 x = copysignl( absx, x );
513 }
514
515 return x;
516 }
517
cbrtf(float x)518 float cbrtf( float x )
519 {
520 float z = pow( fabs((double) x), 1.0 / 3.0 );
521 return copysignf( z, x );
522 }
523
cbrt(double x)524 double cbrt( double x )
525 {
526 return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
527 }
528
lrint(double x)529 long int lrint (double x)
530 {
531 double absx = fabs(x);
532
533 if( x >= (double) LONG_MAX )
534 return LONG_MAX;
535
536 if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
537 {
538 double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
539 double rounded = x + magic;
540 rounded -= magic;
541 return (long int) rounded;
542 }
543
544 return (long int) x;
545 }
546
lrintf(float x)547 long int lrintf (float x)
548 {
549 float absx = fabsf(x);
550
551 if( x >= (float) LONG_MAX )
552 return LONG_MAX;
553
554 if( absx < 8388608.0f /* 0x1.0p23f */ )
555 {
556 float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
557 float rounded = x + magic;
558 rounded -= magic;
559 return (long int) rounded;
560 }
561
562 return (long int) x;
563 }
564
565 #endif // _MSC_VER < 1900
566
567 ///////////////////////////////////////////////////////////////////
568 //
569 // fenv functions
570 //
571 ///////////////////////////////////////////////////////////////////
572
573 #if _MSC_VER < 1800
fetestexcept(int excepts)574 int fetestexcept(int excepts)
575 {
576 unsigned int status = _statusfp();
577 return excepts & (
578 ((status & _SW_INEXACT) ? FE_INEXACT : 0) |
579 ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
580 ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
581 ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
582 ((status & _SW_INVALID) ? FE_INVALID : 0)
583 );
584 }
585
feclearexcept(int excepts)586 int feclearexcept(int excepts)
587 {
588 _clearfp();
589 return 0;
590 }
591 #endif
592
593 #endif // __INTEL_COMPILER
594
595 #if _MSC_VER < 1900 && ( ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300 )
596
nanf(const char * str)597 float nanf( const char* str)
598 {
599 cl_uint u = atoi( str );
600 u |= 0x7fc00000U;
601 return *( float*)(&u);
602 }
603
604
nan(const char * str)605 double nan( const char* str)
606 {
607 cl_ulong u = atoi( str );
608 u |= 0x7ff8000000000000ULL;
609 return *( double*)(&u);
610 }
611
612 // double check this implementatation
nanl(const char * str)613 long double nanl( const char* str)
614 {
615 union
616 {
617 long double f;
618 struct { cl_ulong m; cl_ushort sexp; }u;
619 }u;
620 u.u.sexp = 0x7fff;
621 u.u.m = 0x8000000000000000ULL | atoi( str );
622
623 return u.f;
624 }
625
626 #endif
627
628 ///////////////////////////////////////////////////////////////////
629 //
630 // misc functions
631 //
632 ///////////////////////////////////////////////////////////////////
633
634 /*
635 // This function is commented out because the Windows implementation should never call munmap.
636 // If it is calling it, we have a bug. Please file a bugzilla.
637 int munmap(void *addr, size_t len)
638 {
639 // FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
640
641 return (int)VirtualAlloc( (LPVOID)addr, len,
642 MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
643 }
644 */
645
ReadTime(void)646 uint64_t ReadTime( void )
647 {
648 LARGE_INTEGER current;
649 QueryPerformanceCounter(¤t);
650 return (uint64_t)current.QuadPart;
651 }
652
SubtractTime(uint64_t endTime,uint64_t startTime)653 double SubtractTime( uint64_t endTime, uint64_t startTime )
654 {
655 static double PerformanceFrequency = 0.0;
656
657 if (PerformanceFrequency == 0.0) {
658 LARGE_INTEGER frequency;
659 QueryPerformanceFrequency(&frequency);
660 PerformanceFrequency = (double) frequency.QuadPart;
661 }
662
663 return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
664 }
665
cf_signbit(double x)666 int cf_signbit(double x)
667 {
668 union
669 {
670 double f;
671 cl_ulong u;
672 }u;
673 u.f = x;
674 return u.u >> 63;
675 }
676
cf_signbitf(float x)677 int cf_signbitf(float x)
678 {
679 union
680 {
681 float f;
682 cl_uint u;
683 }u;
684 u.f = x;
685 return u.u >> 31;
686 }
687
int2float(int32_t ix)688 float int2float (int32_t ix)
689 {
690 union {
691 float f;
692 int32_t i;
693 } u;
694 u.i = ix;
695 return u.f;
696 }
697
float2int(float fx)698 int32_t float2int (float fx)
699 {
700 union {
701 float f;
702 int32_t i;
703 } u;
704 u.f = fx;
705 return u.i;
706 }
707
708 #if !defined(_WIN64)
709 /** Returns the number of leading 0-bits in x,
710 starting at the most significant bit position.
711 If x is 0, the result is undefined.
712 */
__builtin_clz(unsigned int pattern)713 int __builtin_clz(unsigned int pattern)
714 {
715 #if 0
716 int res;
717 __asm {
718 mov eax, pattern
719 bsr eax, eax
720 mov res, eax
721 }
722 return 31 - res;
723 #endif
724 unsigned long index;
725 unsigned char res = _BitScanReverse( &index, pattern);
726 if (res) {
727 return 8*sizeof(int) - 1 - index;
728 } else {
729 return 8*sizeof(int);
730 }
731 }
732 #else
__builtin_clz(unsigned int pattern)733 int __builtin_clz(unsigned int pattern)
734 {
735 int count;
736 if (pattern == 0u) {
737 return 32;
738 }
739 count = 31;
740 if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
741 if (pattern >= 1u<<8) { pattern >>= 8; count -= 8; }
742 if (pattern >= 1u<<4) { pattern >>= 4; count -= 4; }
743 if (pattern >= 1u<<2) { pattern >>= 2; count -= 2; }
744 if (pattern >= 1u<<1) { count -= 1; }
745 return count;
746 }
747
748 #endif // !defined(_WIN64)
749
750 #include <intrin.h>
751 #include <emmintrin.h>
752
usleep(int usec)753 int usleep(int usec)
754 {
755 Sleep((usec + 999) / 1000);
756 return 0;
757 }
758
sleep(unsigned int sec)759 unsigned int sleep( unsigned int sec )
760 {
761 Sleep( sec * 1000 );
762 return 0;
763 }
764
765 #endif // defined( _MSC_VER )
766