1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "compat.h"
17
18 #if defined(_MSC_VER)
19
20 #include <limits.h>
21 #include <stdlib.h>
22
23 #include <CL/cl.h>
24
25 #include <windows.h>
26
27 #if _MSC_VER < 1900 && !defined(__INTEL_COMPILER)
28
29 ///////////////////////////////////////////////////////////////////
30 //
31 // rint, rintf
32 //
33 ///////////////////////////////////////////////////////////////////
34
copysignf(float x,float y)35 float copysignf(float x, float y)
36 {
37 union {
38 cl_uint u;
39 float f;
40 } ux, uy;
41
42 ux.f = x;
43 uy.f = y;
44
45 ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);
46
47 return ux.f;
48 }
49
copysign(double x,double y)50 double copysign(double x, double y)
51 {
52 union {
53 cl_ulong u;
54 double f;
55 } ux, uy;
56
57 ux.f = x;
58 uy.f = y;
59
60 ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);
61
62 return ux.f;
63 }
64
copysignl(long double x,long double y)65 long double copysignl(long double x, long double y)
66 {
67 union {
68 long double f;
69 struct
70 {
71 cl_ulong m;
72 cl_ushort sexp;
73 } u;
74 } ux, uy;
75
76 ux.f = x;
77 uy.f = y;
78
79 ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);
80
81 return ux.f;
82 }
83
rintf(float x)84 float rintf(float x)
85 {
86 float absx = fabsf(x);
87
88 if (absx < 8388608.0f /* 0x1.0p23f */)
89 {
90 float magic = copysignf(8388608.0f /* 0x1.0p23f */, x);
91 float rounded = x + magic;
92 rounded -= magic;
93 x = copysignf(rounded, x);
94 }
95
96 return x;
97 }
98
rint(double x)99 double rint(double x)
100 {
101 double absx = fabs(x);
102
103 if (absx < 4503599627370496.0 /* 0x1.0p52f */)
104 {
105 double magic = copysign(4503599627370496.0 /* 0x1.0p52 */, x);
106 double rounded = x + magic;
107 rounded -= magic;
108 x = copysign(rounded, x);
109 }
110
111 return x;
112 }
113
rintl(long double x)114 long double rintl(long double x)
115 {
116 double absx = fabs(x);
117
118 if (absx < 9223372036854775808.0L /* 0x1.0p64f */)
119 {
120 long double magic =
121 copysignl(9223372036854775808.0L /* 0x1.0p63L */, x);
122 long double rounded = x + magic;
123 rounded -= magic;
124 x = copysignl(rounded, x);
125 }
126
127 return x;
128 }
129
130 #if _MSC_VER < 1800
131
132 ///////////////////////////////////////////////////////////////////
133 //
134 // ilogb, ilogbf, ilogbl
135 //
136 ///////////////////////////////////////////////////////////////////
137 #ifndef FP_ILOGB0
138 #define FP_ILOGB0 INT_MIN
139 #endif
140
141 #ifndef FP_ILOGBNAN
142 #define FP_ILOGBNAN INT_MIN
143 #endif
144
ilogb(double x)145 int ilogb(double x)
146 {
147 union {
148 double f;
149 cl_ulong u;
150 } u;
151 u.f = x;
152
153 cl_ulong absx = u.u & CL_LONG_MAX;
154 if (absx - 0x0001000000000000ULL
155 >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
156 {
157 switch (absx)
158 {
159 case 0: return FP_ILOGB0;
160 case 0x7ff0000000000000ULL: return INT_MAX;
161 default:
162 if (absx > 0x7ff0000000000000ULL) return FP_ILOGBNAN;
163
164 // subnormal
165 u.u = absx | 0x3ff0000000000000ULL;
166 u.f -= 1.0;
167 return (u.u >> 52) - (1023 + 1022);
168 }
169 }
170
171 return (absx >> 52) - 1023;
172 }
173
174
ilogbf(float x)175 int ilogbf(float x)
176 {
177 union {
178 float f;
179 cl_uint u;
180 } u;
181 u.f = x;
182
183 cl_uint absx = u.u & 0x7fffffff;
184 if (absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
185 {
186 switch (absx)
187 {
188 case 0: return FP_ILOGB0;
189 case 0x7f800000U: return INT_MAX;
190 default:
191 if (absx > 0x7f800000) return FP_ILOGBNAN;
192
193 // subnormal
194 u.u = absx | 0x3f800000U;
195 u.f -= 1.0f;
196 return (u.u >> 23) - (127 + 126);
197 }
198 }
199
200 return (absx >> 23) - 127;
201 }
202
ilogbl(long double x)203 int ilogbl(long double x)
204 {
205 union {
206 long double f;
207 struct
208 {
209 cl_ulong m;
210 cl_ushort sexp;
211 } u;
212 } u;
213 u.f = x;
214
215 int exp = u.u.sexp & 0x7fff;
216 if (0 == exp)
217 {
218 if (0 == u.u.m) return FP_ILOGB0;
219
220 // subnormal
221 u.u.sexp = 0x3fff;
222 u.f -= 1.0f;
223 exp = u.u.sexp & 0x7fff;
224
225 return exp - (0x3fff + 0x3ffe);
226 }
227 else if (0x7fff == exp)
228 {
229 if (u.u.m & CL_LONG_MAX) return FP_ILOGBNAN;
230
231 return INT_MAX;
232 }
233
234 return exp - 0x3fff;
235 }
236
237 #endif // _MSC_VER < 1800
238
239 ///////////////////////////////////////////////////////////////////
240 //
241 // fmax, fmin, fmaxf, fminf
242 //
243 ///////////////////////////////////////////////////////////////////
244
GET_BITS_SP32(float fx,unsigned int * ux)245 static void GET_BITS_SP32(float fx, unsigned int* ux)
246 {
247 volatile union {
248 float f;
249 unsigned int u;
250 } _bitsy;
251 _bitsy.f = (fx);
252 *ux = _bitsy.u;
253 }
254 /* static void GET_BITS_SP32(float fx, unsigned int* ux) */
255 /* { */
256 /* volatile union {float f; unsigned int i;} _bitsy; */
257 /* _bitsy.f = (fx); */
258 /* *ux = _bitsy.i; */
259 /* } */
PUT_BITS_SP32(unsigned int ux,float * fx)260 static void PUT_BITS_SP32(unsigned int ux, float* fx)
261 {
262 volatile union {
263 float f;
264 unsigned int u;
265 } _bitsy;
266 _bitsy.u = (ux);
267 *fx = _bitsy.f;
268 }
269 /* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
270 /* { */
271 /* volatile union {float f; unsigned int i;} _bitsy; */
272 /* _bitsy.i = (ux); */
273 /* *fx = _bitsy.f; */
274 /* } */
GET_BITS_DP64(double dx,unsigned __int64 * lx)275 static void GET_BITS_DP64(double dx, unsigned __int64* lx)
276 {
277 volatile union {
278 double d;
279 unsigned __int64 l;
280 } _bitsy;
281 _bitsy.d = (dx);
282 *lx = _bitsy.l;
283 }
PUT_BITS_DP64(unsigned __int64 lx,double * dx)284 static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
285 {
286 volatile union {
287 double d;
288 unsigned __int64 l;
289 } _bitsy;
290 _bitsy.l = (lx);
291 *dx = _bitsy.d;
292 }
293
294 #if 0
295 int SIGNBIT_DP64(double x )
296 {
297 int hx;
298 _GET_HIGH_WORD(hx,x);
299 return((hx>>31));
300 }
301 #endif
302
303 #if _MSC_VER < 1900
304
305 /* fmax(x, y) returns the larger (more positive) of x and y.
306 NaNs are treated as missing values: if one argument is NaN,
307 the other argument is returned. If both arguments are NaN,
308 the first argument is returned. */
309
310 /* This works so long as the compiler knows that (x != x) means
311 that x is NaN; gcc does. */
fmax(double x,double y)312 double fmax(double x, double y)
313 {
314 if (isnan(y)) return x;
315
316 return x >= y ? x : y;
317 }
318
319
320 /* fmin(x, y) returns the smaller (more negative) of x and y.
321 NaNs are treated as missing values: if one argument is NaN,
322 the other argument is returned. If both arguments are NaN,
323 the first argument is returned. */
324
fmin(double x,double y)325 double fmin(double x, double y)
326 {
327 if (isnan(y)) return x;
328
329 return x <= y ? x : y;
330 }
331
332
fmaxf(float x,float y)333 float fmaxf(float x, float y)
334 {
335 if (isnan(y)) return x;
336
337 return x >= y ? x : y;
338 }
339
340 /* fminf(x, y) returns the smaller (more negative) of x and y.
341 NaNs are treated as missing values: if one argument is NaN,
342 the other argument is returned. If both arguments are NaN,
343 the first argument is returned. */
344
fminf(float x,float y)345 float fminf(float x, float y)
346 {
347 if (isnan(y)) return x;
348
349 return x <= y ? x : y;
350 }
351
scalblnl(long double x,long n)352 long double scalblnl(long double x, long n)
353 {
354 union {
355 long double d;
356 struct
357 {
358 cl_ulong m;
359 cl_ushort sexp;
360 } u;
361 } u;
362 u.u.m = CL_LONG_MIN;
363
364 if (x == 0.0L || n < -2200) return copysignl(0.0L, x);
365
366 if (n > 2200) return INFINITY;
367
368 if (n < 0)
369 {
370 u.u.sexp = 0x3fff - 1022;
371 while (n <= -1022)
372 {
373 x *= u.d;
374 n += 1022;
375 }
376 u.u.sexp = 0x3fff + n;
377 x *= u.d;
378 return x;
379 }
380
381 if (n > 0)
382 {
383 u.u.sexp = 0x3fff + 1023;
384 while (n >= 1023)
385 {
386 x *= u.d;
387 n -= 1023;
388 }
389 u.u.sexp = 0x3fff + n;
390 x *= u.d;
391 return x;
392 }
393
394 return x;
395 }
396
397 ///////////////////////////////////////////////////////////////////
398 //
399 // log2
400 //
401 ///////////////////////////////////////////////////////////////////
402 const static cl_double log_e_base2 = 1.4426950408889634074;
403 const static cl_double log_10_base2 = 3.3219280948873623478;
404
405 // double log10(double x);
406
log2(double x)407 double log2(double x) { return 1.44269504088896340735992468100189214 * log(x); }
408
log2l(long double x)409 long double log2l(long double x)
410 {
411 return 1.44269504088896340735992468100189214L * log(x);
412 }
413
trunc(double x)414 double trunc(double x)
415 {
416 double absx = fabs(x);
417
418 if (absx < 4503599627370496.0 /* 0x1.0p52f */)
419 {
420 cl_long rounded = x;
421 x = copysign((double)rounded, x);
422 }
423
424 return x;
425 }
426
truncf(float x)427 float truncf(float x)
428 {
429 float absx = fabsf(x);
430
431 if (absx < 8388608.0f /* 0x1.0p23f */)
432 {
433 cl_int rounded = x;
434 x = copysignf((float)rounded, x);
435 }
436
437 return x;
438 }
439
lround(double x)440 long lround(double x)
441 {
442 double absx = fabs(x);
443
444 if (absx < 0.5) return 0;
445
446 if (absx < 4503599627370496.0 /* 0x1.0p52 */)
447 {
448 absx += 0.5;
449 cl_long rounded = absx;
450 absx = rounded;
451 x = copysign(absx, x);
452 }
453
454 if (x >= (double)LONG_MAX) return LONG_MAX;
455
456 return (long)x;
457 }
458
lroundf(float x)459 long lroundf(float x)
460 {
461 float absx = fabsf(x);
462
463 if (absx < 0.5f) return 0;
464
465 if (absx < 8388608.0f)
466 {
467 absx += 0.5f;
468 cl_int rounded = absx;
469 absx = rounded;
470 x = copysignf(absx, x);
471 }
472
473 if (x >= (float)LONG_MAX) return LONG_MAX;
474
475 return (long)x;
476 }
477
round(double x)478 double round(double x)
479 {
480 double absx = fabs(x);
481
482 if (absx < 0.5) return copysign(0.0, x);
483
484 if (absx < 4503599627370496.0 /* 0x1.0p52 */)
485 {
486 absx += 0.5;
487 cl_long rounded = absx;
488 absx = rounded;
489 x = copysign(absx, x);
490 }
491
492 return x;
493 }
494
roundf(float x)495 float roundf(float x)
496 {
497 float absx = fabsf(x);
498
499 if (absx < 0.5f) return copysignf(0.0f, x);
500
501 if (absx < 8388608.0f)
502 {
503 absx += 0.5f;
504 cl_int rounded = absx;
505 absx = rounded;
506 x = copysignf(absx, x);
507 }
508
509 return x;
510 }
511
roundl(long double x)512 long double roundl(long double x)
513 {
514 long double absx = fabsl(x);
515
516 if (absx < 0.5L) return copysignl(0.0L, x);
517
518 if (absx < 9223372036854775808.0L /*0x1.0p63L*/)
519 {
520 absx += 0.5L;
521 cl_ulong rounded = absx;
522 absx = rounded;
523 x = copysignl(absx, x);
524 }
525
526 return x;
527 }
528
cbrtf(float x)529 float cbrtf(float x)
530 {
531 float z = pow(fabs((double)x), 1.0 / 3.0);
532 return copysignf(z, x);
533 }
534
cbrt(double x)535 double cbrt(double x) { return copysign(pow(fabs(x), 1.0 / 3.0), x); }
536
lrint(double x)537 long int lrint(double x)
538 {
539 double absx = fabs(x);
540
541 if (x >= (double)LONG_MAX) return LONG_MAX;
542
543 if (absx < 4503599627370496.0 /* 0x1.0p52 */)
544 {
545 double magic = copysign(4503599627370496.0 /* 0x1.0p52 */, x);
546 double rounded = x + magic;
547 rounded -= magic;
548 return (long int)rounded;
549 }
550
551 return (long int)x;
552 }
553
lrintf(float x)554 long int lrintf(float x)
555 {
556 float absx = fabsf(x);
557
558 if (x >= (float)LONG_MAX) return LONG_MAX;
559
560 if (absx < 8388608.0f /* 0x1.0p23f */)
561 {
562 float magic = copysignf(8388608.0f /* 0x1.0p23f */, x);
563 float rounded = x + magic;
564 rounded -= magic;
565 return (long int)rounded;
566 }
567
568 return (long int)x;
569 }
570
571 #endif // _MSC_VER < 1900
572
573 ///////////////////////////////////////////////////////////////////
574 //
575 // fenv functions
576 //
577 ///////////////////////////////////////////////////////////////////
578
579 #if _MSC_VER < 1800
fetestexcept(int excepts)580 int fetestexcept(int excepts)
581 {
582 unsigned int status = _statusfp();
583 return excepts
584 & (((status & _SW_INEXACT) ? FE_INEXACT : 0)
585 | ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)
586 | ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)
587 | ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0)
588 | ((status & _SW_INVALID) ? FE_INVALID : 0));
589 }
590
feclearexcept(int excepts)591 int feclearexcept(int excepts)
592 {
593 _clearfp();
594 return 0;
595 }
596 #endif
597
598 #endif // __INTEL_COMPILER
599
600 #if _MSC_VER < 1900 && (!defined(__INTEL_COMPILER) || __INTEL_COMPILER < 1300)
601
nanf(const char * str)602 float nanf(const char* str)
603 {
604 cl_uint u = atoi(str);
605 u |= 0x7fc00000U;
606 return *(float*)(&u);
607 }
608
609
nan(const char * str)610 double nan(const char* str)
611 {
612 cl_ulong u = atoi(str);
613 u |= 0x7ff8000000000000ULL;
614 return *(double*)(&u);
615 }
616
617 // double check this implementatation
nanl(const char * str)618 long double nanl(const char* str)
619 {
620 union {
621 long double f;
622 struct
623 {
624 cl_ulong m;
625 cl_ushort sexp;
626 } u;
627 } u;
628 u.u.sexp = 0x7fff;
629 u.u.m = 0x8000000000000000ULL | atoi(str);
630
631 return u.f;
632 }
633
634 #endif
635
636 ///////////////////////////////////////////////////////////////////
637 //
638 // misc functions
639 //
640 ///////////////////////////////////////////////////////////////////
641
642 /*
643 // This function is commented out because the Windows implementation should
644 never call munmap.
645 // If it is calling it, we have a bug. Please file a bugzilla.
646 int munmap(void *addr, size_t len)
647 {
648 // FIXME: this is not correct. munmap is like free()
649 // http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
650
651 return (int)VirtualAlloc( (LPVOID)addr, len,
652 MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
653 }
654 */
655
ReadTime(void)656 uint64_t ReadTime(void)
657 {
658 LARGE_INTEGER current;
659 QueryPerformanceCounter(¤t);
660 return (uint64_t)current.QuadPart;
661 }
662
SubtractTime(uint64_t endTime,uint64_t startTime)663 double SubtractTime(uint64_t endTime, uint64_t startTime)
664 {
665 static double PerformanceFrequency = 0.0;
666
667 if (PerformanceFrequency == 0.0)
668 {
669 LARGE_INTEGER frequency;
670 QueryPerformanceFrequency(&frequency);
671 PerformanceFrequency = (double)frequency.QuadPart;
672 }
673
674 return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
675 }
676
cf_signbit(double x)677 int cf_signbit(double x)
678 {
679 union {
680 double f;
681 cl_ulong u;
682 } u;
683 u.f = x;
684 return u.u >> 63;
685 }
686
cf_signbitf(float x)687 int cf_signbitf(float x)
688 {
689 union {
690 float f;
691 cl_uint u;
692 } u;
693 u.f = x;
694 return u.u >> 31;
695 }
696
int2float(int32_t ix)697 float int2float(int32_t ix)
698 {
699 union {
700 float f;
701 int32_t i;
702 } u;
703 u.i = ix;
704 return u.f;
705 }
706
float2int(float fx)707 int32_t float2int(float fx)
708 {
709 union {
710 float f;
711 int32_t i;
712 } u;
713 u.f = fx;
714 return u.i;
715 }
716
717 #if !defined(_WIN64)
718 /** Returns the number of leading 0-bits in x,
719 starting at the most significant bit position.
720 If x is 0, the result is undefined.
721 */
__builtin_clz(unsigned int pattern)722 int __builtin_clz(unsigned int pattern)
723 {
724 #if 0
725 int res;
726 __asm {
727 mov eax, pattern
728 bsr eax, eax
729 mov res, eax
730 }
731 return 31 - res;
732 #endif
733 unsigned long index;
734 unsigned char res = _BitScanReverse(&index, pattern);
735 if (res)
736 {
737 return 8 * sizeof(int) - 1 - index;
738 }
739 else
740 {
741 return 8 * sizeof(int);
742 }
743 }
744 #else
__builtin_clz(unsigned int pattern)745 int __builtin_clz(unsigned int pattern)
746 {
747 int count;
748 if (pattern == 0u)
749 {
750 return 32;
751 }
752 count = 31;
753 if (pattern >= 1u << 16)
754 {
755 pattern >>= 16;
756 count -= 16;
757 }
758 if (pattern >= 1u << 8)
759 {
760 pattern >>= 8;
761 count -= 8;
762 }
763 if (pattern >= 1u << 4)
764 {
765 pattern >>= 4;
766 count -= 4;
767 }
768 if (pattern >= 1u << 2)
769 {
770 pattern >>= 2;
771 count -= 2;
772 }
773 if (pattern >= 1u << 1)
774 {
775 count -= 1;
776 }
777 return count;
778 }
779
780 #endif // !defined(_WIN64)
781
782 #include <intrin.h>
783 #include <emmintrin.h>
784
usleep(int usec)785 int usleep(int usec)
786 {
787 Sleep((usec + 999) / 1000);
788 return 0;
789 }
790
sleep(unsigned int sec)791 unsigned int sleep(unsigned int sec)
792 {
793 Sleep(sec * 1000);
794 return 0;
795 }
796
797 #endif // defined( _MSC_VER )
798