1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include "basic_test_conversions.h"
19 #include <limits.h>
20 #include <string.h>
21
22 #include "harness/mt19937.h"
23
24 #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
25 #include "fplib.h"
26 #endif
27
28 #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
29 /* Rounding modes and saturation for use with qcom 64 bit to float conversion library */
30 bool qcom_sat;
31 roundingMode qcom_rm;
32 #endif
33
34 static inline cl_ulong random64( MTdata d );
35
36 #if defined (_WIN32)
37 #include <mmintrin.h>
38 #include <emmintrin.h>
39 #else // !_WIN32
40 #if defined (__SSE__ )
41 #include <xmmintrin.h>
42 #endif
43 #if defined (__SSE2__ )
44 #include <emmintrin.h>
45 #endif
46 #endif // _WIN32
47
48 const char *gTypeNames[ kTypeCount ] = {
49 "uchar", "char",
50 "ushort", "short",
51 "uint", "int",
52 "float", "double",
53 "ulong", "long"
54 };
55
56 const char *gRoundingModeNames[ kRoundingModeCount ] = {
57 "",
58 "_rte",
59 "_rtp",
60 "_rtn",
61 "_rtz"
62 };
63
64 const char *gSaturationNames[ 2 ] = { "", "_sat" };
65
66 size_t gTypeSizes[ kTypeCount ] = {
67 sizeof( cl_uchar ), sizeof( cl_char ),
68 sizeof( cl_ushort ), sizeof( cl_short ),
69 sizeof( cl_uint ), sizeof( cl_int ),
70 sizeof( cl_float ), sizeof( cl_double ),
71 sizeof( cl_ulong ), sizeof( cl_long ),
72 };
73
74 long lrintf_clamped( float f );
lrintf_clamped(float f)75 long lrintf_clamped( float f )
76 {
77 static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) };
78
79 if( f >= -(float) LONG_MIN )
80 return LONG_MAX;
81
82 if( f <= (float) LONG_MIN )
83 return LONG_MIN;
84
85 // Round fractional values to integer in round towards nearest mode
86 if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) )
87 {
88 volatile float x = f;
89 float magicVal = magic[ f < 0 ];
90
91 #if defined( __SSE__ ) || defined (_WIN32)
92 // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
93 __m128 v = _mm_set_ss( x );
94 __m128 m = _mm_set_ss( magicVal );
95 v = _mm_add_ss( v, m );
96 v = _mm_sub_ss( v, m );
97 _mm_store_ss( (float*) &x, v );
98 #else
99 x += magicVal;
100 x -= magicVal;
101 #endif
102 f = x;
103 }
104
105 return (long) f;
106 }
107
108 long long llrintf_clamped( float f );
llrintf_clamped(float f)109 long long llrintf_clamped( float f )
110 {
111 static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) };
112
113 if( f >= -(float) LLONG_MIN )
114 return LLONG_MAX;
115
116 if( f <= (float) LLONG_MIN )
117 return LLONG_MIN;
118
119 // Round fractional values to integer in round towards nearest mode
120 if( fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23) )
121 {
122 volatile float x = f;
123 float magicVal = magic[ f < 0 ];
124 #if defined( __SSE__ ) || defined (_WIN32)
125 // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
126 __m128 v = _mm_set_ss( x );
127 __m128 m = _mm_set_ss( magicVal );
128 v = _mm_add_ss( v, m );
129 v = _mm_sub_ss( v, m );
130 _mm_store_ss( (float*) &x, v );
131 #else
132 x += magicVal;
133 x -= magicVal;
134 #endif
135 f = x;
136 }
137
138 return (long long) f;
139 }
140
141 long lrint_clamped( double f );
lrint_clamped(double f)142 long lrint_clamped( double f )
143 {
144 static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
145
146 if( sizeof( long ) > 4 )
147 {
148 if( f >= -(double) LONG_MIN )
149 return LONG_MAX;
150 }
151 else
152 {
153 if( f >= LONG_MAX )
154 return LONG_MAX;
155 }
156
157 if( f <= (double) LONG_MIN )
158 return LONG_MIN;
159
160 // Round fractional values to integer in round towards nearest mode
161 if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) )
162 {
163 volatile double x = f;
164 double magicVal = magic[ f < 0 ];
165 #if defined( __SSE2__ ) || defined (_MSC_VER)
166 // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
167 __m128d v = _mm_set_sd( x );
168 __m128d m = _mm_set_sd( magicVal );
169 v = _mm_add_sd( v, m );
170 v = _mm_sub_sd( v, m );
171 _mm_store_sd( (double*) &x, v );
172 #else
173 x += magicVal;
174 x -= magicVal;
175 #endif
176 f = x;
177 }
178
179 return (long) f;
180 }
181
182 long long llrint_clamped( double f );
llrint_clamped(double f)183 long long llrint_clamped( double f )
184 {
185 static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
186
187 if( f >= -(double) LLONG_MIN )
188 return LLONG_MAX;
189
190 if( f <= (double) LLONG_MIN )
191 return LLONG_MIN;
192
193 // Round fractional values to integer in round towards nearest mode
194 if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) )
195 {
196 volatile double x = f;
197 double magicVal = magic[ f < 0 ];
198 #if defined( __SSE2__ ) || defined (_MSC_VER)
199 // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
200 __m128d v = _mm_set_sd( x );
201 __m128d m = _mm_set_sd( magicVal );
202 v = _mm_add_sd( v, m );
203 v = _mm_sub_sd( v, m );
204 _mm_store_sd( (double*) &x, v );
205 #else
206 x += magicVal;
207 x -= magicVal;
208 #endif
209 f = x;
210 }
211
212 return (long long) f;
213 }
214
215
216 /*
217 Names created as:
218
219 #include <stdio.h>
220
221 const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" };
222
223 int main( void )
224 {
225
226 int i,j;
227
228 for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ )
229 for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ )
230 {
231 if( j == i )
232 continue;
233
234 vlog( "void %s2%s( void *, void *);\n", names[i], names[j] );
235 }
236
237
238 return 0;
239 }
240 */
241
242 static float my_fabsf( float x );
243 static double my_fabs( double x );
244
245
246
247 static void uchar2char( void *, void *);
248 static void uchar2ushort( void *, void *);
249 static void uchar2short( void *, void *);
250 static void uchar2uint( void *, void *);
251 static void uchar2int( void *, void *);
252 static void uchar2float( void *, void *);
253 static void uchar2double( void *, void *);
254 static void uchar2ulong( void *, void *);
255 static void uchar2long( void *, void *);
256 static void char2uchar( void *, void *);
257 static void char2ushort( void *, void *);
258 static void char2short( void *, void *);
259 static void char2uint( void *, void *);
260 static void char2int( void *, void *);
261 static void char2float( void *, void *);
262 static void char2double( void *, void *);
263 static void char2ulong( void *, void *);
264 static void char2long( void *, void *);
265 static void ushort2uchar( void *, void *);
266 static void ushort2char( void *, void *);
267 static void ushort2short( void *, void *);
268 static void ushort2uint( void *, void *);
269 static void ushort2int( void *, void *);
270 static void ushort2float( void *, void *);
271 static void ushort2double( void *, void *);
272 static void ushort2ulong( void *, void *);
273 static void ushort2long( void *, void *);
274 static void short2uchar( void *, void *);
275 static void short2char( void *, void *);
276 static void short2ushort( void *, void *);
277 static void short2uint( void *, void *);
278 static void short2int( void *, void *);
279 static void short2float( void *, void *);
280 static void short2double( void *, void *);
281 static void short2ulong( void *, void *);
282 static void short2long( void *, void *);
283 static void uint2uchar( void *, void *);
284 static void uint2char( void *, void *);
285 static void uint2ushort( void *, void *);
286 static void uint2short( void *, void *);
287 static void uint2int( void *, void *);
288 static void uint2float( void *, void *);
289 static void uint2double( void *, void *);
290 static void uint2ulong( void *, void *);
291 static void uint2long( void *, void *);
292 static void int2uchar( void *, void *);
293 static void int2char( void *, void *);
294 static void int2ushort( void *, void *);
295 static void int2short( void *, void *);
296 static void int2uint( void *, void *);
297 static void int2float( void *, void *);
298 static void int2double( void *, void *);
299 static void int2ulong( void *, void *);
300 static void int2long( void *, void *);
301 static void float2uchar( void *, void *);
302 static void float2char( void *, void *);
303 static void float2ushort( void *, void *);
304 static void float2short( void *, void *);
305 static void float2uint( void *, void *);
306 static void float2int( void *, void *);
307 static void float2double( void *, void *);
308 static void float2ulong( void *, void *);
309 static void float2long( void *, void *);
310 static void double2uchar( void *, void *);
311 static void double2char( void *, void *);
312 static void double2ushort( void *, void *);
313 static void double2short( void *, void *);
314 static void double2uint( void *, void *);
315 static void double2int( void *, void *);
316 static void double2float( void *, void *);
317 static void double2ulong( void *, void *);
318 static void double2long( void *, void *);
319 static void ulong2uchar( void *, void *);
320 static void ulong2char( void *, void *);
321 static void ulong2ushort( void *, void *);
322 static void ulong2short( void *, void *);
323 static void ulong2uint( void *, void *);
324 static void ulong2int( void *, void *);
325 static void ulong2float( void *, void *);
326 static void ulong2double( void *, void *);
327 static void ulong2long( void *, void *);
328 static void long2uchar( void *, void *);
329 static void long2char( void *, void *);
330 static void long2ushort( void *, void *);
331 static void long2short( void *, void *);
332 static void long2uint( void *, void *);
333 static void long2int( void *, void *);
334 static void long2float( void *, void *);
335 static void long2double( void *, void *);
336 static void long2ulong( void *, void *);
337
338 /*
339 Conversion list created as
340
341 #include <stdio.h>
342
343 const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" };
344
345 int main( void )
346 {
347
348 int i,j;
349
350 for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ )
351 {
352 vlog( "{ " );
353 for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ )
354 {
355 if( j == i )
356 vlog( " NULL, " );
357 else
358 {
359 char s[64];
360 sprintf( s, "%s2%s,", names[j], names[i] );
361 vlog( "%15s ", s );
362 }
363 }
364 vlog( "},\n" );
365 }
366
367 return 0;
368 }
369
370 */
371 /*
372 Convert gConversions[kTypeCount][kTypeCount] = {
373 { NULL, char2uchar, ushort2uchar, short2uchar, uint2uchar, int2uchar, float2uchar, double2uchar, ulong2uchar, long2uchar, },
374 { uchar2char, NULL, ushort2char, short2char, uint2char, int2char, float2char, double2char, ulong2char, long2char, },
375 { uchar2ushort, char2ushort, NULL, short2ushort, uint2ushort, int2ushort, float2ushort, double2ushort, ulong2ushort, long2ushort, },
376 { uchar2short, char2short, ushort2short, NULL, uint2short, int2short, float2short, double2short, ulong2short, long2short, },
377 { uchar2uint, char2uint, ushort2uint, short2uint, NULL, int2uint, float2uint, double2uint, ulong2uint, long2uint, },
378 { uchar2int, char2int, ushort2int, short2int, uint2int, NULL, float2int, double2int, ulong2int, long2int, },
379 { uchar2float, char2float, ushort2float, short2float, uint2float, int2float, NULL, double2float, ulong2float, long2float, },
380 { uchar2double, char2double, ushort2double, short2double, uint2double, int2double, float2double, NULL, ulong2double, long2double, },
381 { uchar2ulong, char2ulong, ushort2ulong, short2ulong, uint2ulong, int2ulong, float2ulong, double2ulong, NULL, long2ulong, },
382 { uchar2long, char2long, ushort2long, short2long, uint2long, int2long, float2long, double2long, ulong2long, NULL, } };
383 */
384
385 static void uchar2char_sat( void *, void *);
386 static void uchar2ushort_sat( void *, void *);
387 static void uchar2short_sat( void *, void *);
388 static void uchar2uint_sat( void *, void *);
389 static void uchar2int_sat( void *, void *);
390 static void uchar2float_sat( void *, void *);
391 static void uchar2double_sat( void *, void *);
392 static void uchar2ulong_sat( void *, void *);
393 static void uchar2long_sat( void *, void *);
394 static void char2uchar_sat( void *, void *);
395 static void char2ushort_sat( void *, void *);
396 static void char2short_sat( void *, void *);
397 static void char2uint_sat( void *, void *);
398 static void char2int_sat( void *, void *);
399 static void char2float_sat( void *, void *);
400 static void char2double_sat( void *, void *);
401 static void char2ulong_sat( void *, void *);
402 static void char2long_sat( void *, void *);
403 static void ushort2uchar_sat( void *, void *);
404 static void ushort2char_sat( void *, void *);
405 static void ushort2short_sat( void *, void *);
406 static void ushort2uint_sat( void *, void *);
407 static void ushort2int_sat( void *, void *);
408 static void ushort2float_sat( void *, void *);
409 static void ushort2double_sat( void *, void *);
410 static void ushort2ulong_sat( void *, void *);
411 static void ushort2long_sat( void *, void *);
412 static void short2uchar_sat( void *, void *);
413 static void short2char_sat( void *, void *);
414 static void short2ushort_sat( void *, void *);
415 static void short2uint_sat( void *, void *);
416 static void short2int_sat( void *, void *);
417 static void short2float_sat( void *, void *);
418 static void short2double_sat( void *, void *);
419 static void short2ulong_sat( void *, void *);
420 static void short2long_sat( void *, void *);
421 static void uint2uchar_sat( void *, void *);
422 static void uint2char_sat( void *, void *);
423 static void uint2ushort_sat( void *, void *);
424 static void uint2short_sat( void *, void *);
425 static void uint2int_sat( void *, void *);
426 static void uint2float_sat( void *, void *);
427 static void uint2double_sat( void *, void *);
428 static void uint2ulong_sat( void *, void *);
429 static void uint2long_sat( void *, void *);
430 static void int2uchar_sat( void *, void *);
431 static void int2char_sat( void *, void *);
432 static void int2ushort_sat( void *, void *);
433 static void int2short_sat( void *, void *);
434 static void int2uint_sat( void *, void *);
435 static void int2float_sat( void *, void *);
436 static void int2double_sat( void *, void *);
437 static void int2ulong_sat( void *, void *);
438 static void int2long_sat( void *, void *);
439 static void float2uchar_sat( void *, void *);
440 static void float2char_sat( void *, void *);
441 static void float2ushort_sat( void *, void *);
442 static void float2short_sat( void *, void *);
443 static void float2uint_sat( void *, void *);
444 static void float2int_sat( void *, void *);
445 static void float2double_sat( void *, void *);
446 static void float2ulong_sat( void *, void *);
447 static void float2long_sat( void *, void *);
448 static void double2uchar_sat( void *, void *);
449 static void double2char_sat( void *, void *);
450 static void double2ushort_sat( void *, void *);
451 static void double2short_sat( void *, void *);
452 static void double2uint_sat( void *, void *);
453 static void double2int_sat( void *, void *);
454 static void double2float_sat( void *, void *);
455 static void double2ulong_sat( void *, void *);
456 static void double2long_sat( void *, void *);
457 static void ulong2uchar_sat( void *, void *);
458 static void ulong2char_sat( void *, void *);
459 static void ulong2ushort_sat( void *, void *);
460 static void ulong2short_sat( void *, void *);
461 static void ulong2uint_sat( void *, void *);
462 static void ulong2int_sat( void *, void *);
463 static void ulong2float_sat( void *, void *);
464 static void ulong2double_sat( void *, void *);
465 static void ulong2long_sat( void *, void *);
466 static void long2uchar_sat( void *, void *);
467 static void long2char_sat( void *, void *);
468 static void long2ushort_sat( void *, void *);
469 static void long2short_sat( void *, void *);
470 static void long2uint_sat( void *, void *);
471 static void long2int_sat( void *, void *);
472 static void long2float_sat( void *, void *);
473 static void long2double_sat( void *, void *);
474 static void long2ulong_sat( void *, void *);
475 /*
476 #include <stdio.h>
477
478 const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" };
479
480 int main( void )
481 {
482
483 int i,j;
484
485 for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ )
486 {
487 vlog( "{ " );
488 for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ )
489 {
490 if( j == i )
491 vlog( " NULL, " );
492 else
493 {
494 char s[64];
495 sprintf( s, "%s2%s_sat,", names[j], names[i] );
496 vlog( "%18s ", s );
497 }
498 }
499 vlog( "},\n" );
500 }
501
502 return 0;
503 }
504
505 Convert gSaturatedConversions[kTypeCount][kTypeCount] = {
506 { NULL, char2uchar_sat, ushort2uchar_sat, short2uchar_sat, uint2uchar_sat, int2uchar_sat, float2uchar_sat, double2uchar_sat, ulong2uchar_sat, long2uchar_sat, },
507 { uchar2char_sat, NULL, ushort2char_sat, short2char_sat, uint2char_sat, int2char_sat, float2char_sat, double2char_sat, ulong2char_sat, long2char_sat, },
508 { uchar2ushort_sat, char2ushort_sat, NULL, short2ushort_sat, uint2ushort_sat, int2ushort_sat, float2ushort_sat, double2ushort_sat, ulong2ushort_sat, long2ushort_sat, },
509 { uchar2short_sat, char2short_sat, ushort2short_sat, NULL, uint2short_sat, int2short_sat, float2short_sat, double2short_sat, ulong2short_sat, long2short_sat, },
510 { uchar2uint_sat, char2uint_sat, ushort2uint_sat, short2uint_sat, NULL, int2uint_sat, float2uint_sat, double2uint_sat, ulong2uint_sat, long2uint_sat, },
511 { uchar2int_sat, char2int_sat, ushort2int_sat, short2int_sat, uint2int_sat, NULL, float2int_sat, double2int_sat, ulong2int_sat, long2int_sat, },
512 { uchar2float_sat, char2float_sat, ushort2float_sat, short2float_sat, uint2float_sat, int2float_sat, NULL, double2float_sat, ulong2float_sat, long2float_sat, },
513 { uchar2double_sat, char2double_sat, ushort2double_sat, short2double_sat, uint2double_sat, int2double_sat, float2double_sat, NULL, ulong2double_sat, long2double_sat, },
514 { uchar2ulong_sat, char2ulong_sat, ushort2ulong_sat, short2ulong_sat, uint2ulong_sat, int2ulong_sat, float2ulong_sat, double2ulong_sat, NULL, long2ulong_sat, },
515 { uchar2long_sat, char2long_sat, ushort2long_sat, short2long_sat, uint2long_sat, int2long_sat, float2long_sat, double2long_sat, ulong2long_sat, NULL, }
516 };
517 */
518
519 /*
520 #include <stdio.h>
521
522 const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" };
523 const char *types[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "llong" };
524
525 int main( void )
526 {
527
528 int i,j;
529
530 for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ )
531 for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ )
532 {
533 if( j == i )
534 continue;
535
536 switch( i )
537 {
538 case 6: //float
539 if( j == 7 )
540 vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] );
541 else
542 vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) my_rintf(((%s*) in)[0]); }\n", names[i], names[i], names[j], types[j], types[i] );
543 break;
544 case 7: //double
545 if( j == 6 )
546 vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] );
547 else
548 vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) rint(((%s*) in)[0]); }\n", names[i], names[i], names[j], types[j], types[i] );
549 break;
550 default:
551 vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s)
552 ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] );
553 break;
554 }
555 }
556
557
558 return 0;
559 }
560 */
561
my_fabsf(float x)562 float my_fabsf( float x )
563 {
564 union{ cl_uint u; float f; }u;
565 u.f = x;
566 u.u &= 0x7fffffff;
567 return u.f;
568 }
569
my_fabs(double x)570 double my_fabs( double x )
571 {
572 union{ cl_ulong u; double f; }u;
573 u.f = x;
574 u.u &= 0x7fffffffffffffffULL;
575 return u.f;
576 }
577
578 static float my_rintf( float f );
my_rintf(float f)579 static float my_rintf( float f )
580 {
581 static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) };
582
583 // Round fractional values to integer in round towards nearest mode
584 if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) )
585 {
586 volatile float x = f;
587 float magicVal = magic[ f < 0 ];
588
589 #if defined( __SSE__ )
590 // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
591 __m128 v = _mm_set_ss( x );
592 __m128 m = _mm_set_ss( magicVal );
593 v = _mm_add_ss( v, m );
594 v = _mm_sub_ss( v, m );
595 _mm_store_ss( (float*) &x, v );
596 #else
597 x += magicVal;
598 x -= magicVal;
599 #endif
600 f = x;
601 }
602
603 return f;
604 }
605
uchar2char(void * out,void * in)606 static void uchar2char( void *out, void *in){ ((char*) out)[0] = ((cl_uchar*) in)[0]; }
uchar2ushort(void * out,void * in)607 static void uchar2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uchar*) in)[0]; }
uchar2short(void * out,void * in)608 static void uchar2short( void *out, void *in){ ((short*) out)[0] = ((cl_uchar*) in)[0]; }
uchar2uint(void * out,void * in)609 static void uchar2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_uchar*) in)[0]; }
uchar2int(void * out,void * in)610 static void uchar2int( void *out, void *in){ ((int*) out)[0] = ((cl_uchar*) in)[0]; }
uchar2float(void * out,void * in)611 static void uchar2float( void *out, void *in)
612 {
613 cl_uchar l = ((cl_uchar*) in)[0];
614 ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
615 }
uchar2double(void * out,void * in)616 static void uchar2double( void *out, void *in)
617 {
618 cl_uchar l = ((cl_uchar*) in)[0];
619 ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
620 }
uchar2ulong(void * out,void * in)621 static void uchar2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uchar*) in)[0]; }
uchar2long(void * out,void * in)622 static void uchar2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uchar*) in)[0]; }
char2uchar(void * out,void * in)623 static void char2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_char*) in)[0]; }
char2ushort(void * out,void * in)624 static void char2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_char*) in)[0]; }
char2short(void * out,void * in)625 static void char2short( void *out, void *in){ ((short*) out)[0] = ((cl_char*) in)[0]; }
char2uint(void * out,void * in)626 static void char2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_char*) in)[0]; }
char2int(void * out,void * in)627 static void char2int( void *out, void *in){ ((int*) out)[0] = ((cl_char*) in)[0]; }
char2float(void * out,void * in)628 static void char2float( void *out, void *in)
629 {
630 cl_char l = ((cl_char*) in)[0];
631 ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
632 }
char2double(void * out,void * in)633 static void char2double( void *out, void *in)
634 {
635 cl_char l = ((cl_char*) in)[0];
636 ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
637 }
char2ulong(void * out,void * in)638 static void char2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_char*) in)[0]; }
char2long(void * out,void * in)639 static void char2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_char*) in)[0]; }
ushort2uchar(void * out,void * in)640 static void ushort2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_ushort*) in)[0]; }
ushort2char(void * out,void * in)641 static void ushort2char( void *out, void *in){ ((char*) out)[0] = ((cl_ushort*) in)[0]; }
ushort2short(void * out,void * in)642 static void ushort2short( void *out, void *in){ ((short*) out)[0] = ((cl_ushort*) in)[0]; }
ushort2uint(void * out,void * in)643 static void ushort2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_ushort*) in)[0]; }
ushort2int(void * out,void * in)644 static void ushort2int( void *out, void *in){ ((int*) out)[0] = ((cl_ushort*) in)[0]; }
ushort2float(void * out,void * in)645 static void ushort2float( void *out, void *in)
646 {
647 cl_ushort l = ((cl_ushort*) in)[0];
648 ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
649 }
ushort2double(void * out,void * in)650 static void ushort2double( void *out, void *in)
651 {
652 cl_ushort l = ((cl_ushort*) in)[0];
653 ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
654 }
ushort2ulong(void * out,void * in)655 static void ushort2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_ushort*) in)[0]; }
ushort2long(void * out,void * in)656 static void ushort2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ushort*) in)[0]; }
short2uchar(void * out,void * in)657 static void short2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_short*) in)[0]; }
short2char(void * out,void * in)658 static void short2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_short*) in)[0]; }
short2ushort(void * out,void * in)659 static void short2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_short*) in)[0]; }
short2uint(void * out,void * in)660 static void short2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_short*) in)[0]; }
short2int(void * out,void * in)661 static void short2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_short*) in)[0]; }
short2float(void * out,void * in)662 static void short2float( void *out, void *in)
663 {
664 cl_short l = ((cl_short*) in)[0];
665 ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
666 }
short2double(void * out,void * in)667 static void short2double( void *out, void *in)
668 {
669 cl_short l = ((cl_short*) in)[0];
670 ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
671 }
short2ulong(void * out,void * in)672 static void short2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_short*) in)[0]; }
short2long(void * out,void * in)673 static void short2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_short*) in)[0]; }
uint2uchar(void * out,void * in)674 static void uint2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_uint*) in)[0]; }
uint2char(void * out,void * in)675 static void uint2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_uint*) in)[0]; }
uint2ushort(void * out,void * in)676 static void uint2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uint*) in)[0]; }
uint2short(void * out,void * in)677 static void uint2short( void *out, void *in){ ((short*) out)[0] = ((cl_uint*) in)[0]; }
uint2int(void * out,void * in)678 static void uint2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uint*) in)[0]; }
uint2float(void * out,void * in)679 static void uint2float( void *out, void *in)
680 {
681 // Use volatile to prevent optimization by Clang compiler
682 volatile cl_uint l = ((cl_uint *)in)[0];
683 ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
684 }
uint2double(void * out,void * in)685 static void uint2double( void *out, void *in)
686 {
687 cl_uint l = ((cl_uint*) in)[0];
688 ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
689 }
uint2ulong(void * out,void * in)690 static void uint2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uint*) in)[0]; }
uint2long(void * out,void * in)691 static void uint2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uint*) in)[0]; }
int2uchar(void * out,void * in)692 static void int2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_int*) in)[0]; }
int2char(void * out,void * in)693 static void int2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_int*) in)[0]; }
int2ushort(void * out,void * in)694 static void int2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_int*) in)[0]; }
int2short(void * out,void * in)695 static void int2short( void *out, void *in){ ((cl_short*) out)[0] = ((cl_int*) in)[0]; }
int2uint(void * out,void * in)696 static void int2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_int*) in)[0]; }
int2float(void * out,void * in)697 static void int2float( void *out, void *in)
698 {
699 // Use volatile to prevent optimization by Clang compiler
700 volatile cl_int l = ((cl_int *)in)[0];
701 ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
702 }
int2double(void * out,void * in)703 static void int2double( void *out, void *in)
704 {
705 cl_int l = ((cl_int*) in)[0];
706 ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
707 }
int2ulong(void * out,void * in)708 static void int2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_int*) in)[0]; }
int2long(void * out,void * in)709 static void int2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_int*) in)[0]; }
float2uchar(void * out,void * in)710 static void float2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = my_rintf(((cl_float*) in)[0]); }
float2char(void * out,void * in)711 static void float2char( void *out, void *in){ ((cl_char*) out)[0] = my_rintf(((cl_float*) in)[0]); }
float2ushort(void * out,void * in)712 static void float2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = my_rintf(((cl_float*) in)[0]); }
float2short(void * out,void * in)713 static void float2short( void *out, void *in){ ((cl_short*) out)[0] = my_rintf(((cl_float*) in)[0]); }
float2uint(void * out,void * in)714 static void float2uint( void *out, void *in){ ((cl_uint*) out)[0] = my_rintf(((cl_float*) in)[0]); }
float2int(void * out,void * in)715 static void float2int( void *out, void *in){ ((cl_int*) out)[0] = my_rintf(((cl_float*) in)[0]); }
float2double(void * out,void * in)716 static void float2double( void *out, void *in){ ((cl_double*) out)[0] = ((cl_float*) in)[0]; }
float2ulong(void * out,void * in)717 static void float2ulong( void *out, void *in)
718 {
719 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
720 // VS2005 (at least) on x86 uses fistp to store the float as a 64-bit int.
721 // However, fistp stores it as a signed int, and some of the test values won't
722 // fit into a signed int. (These test values are >= 2^63.) The result on VS2005
723 // is that these end up silently (at least by default settings) clamped to
724 // the max lowest ulong.
725 cl_float x = my_rintf(((cl_float *)in)[0]);
726 if (x >= 9223372036854775808.0f) {
727 x -= 9223372036854775808.0f;
728 ((cl_ulong*) out)[0] = x;
729 ((cl_ulong*) out)[0] += 9223372036854775808ULL;
730 } else {
731 ((cl_ulong*) out)[0] = x;
732 }
733 #else
734 ((cl_ulong*) out)[0] = my_rintf(((cl_float*) in)[0]);
735 #endif
736 }
737
float2long(void * out,void * in)738 static void float2long( void *out, void *in){ ((cl_long*) out)[0] = llrint_clamped( ((cl_float*) in)[0] ); }
double2uchar(void * out,void * in)739 static void double2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = rint(((cl_double*) in)[0]); }
double2char(void * out,void * in)740 static void double2char( void *out, void *in){ ((cl_char*) out)[0] = rint(((cl_double*) in)[0]); }
double2ushort(void * out,void * in)741 static void double2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = rint(((cl_double*) in)[0]); }
double2short(void * out,void * in)742 static void double2short( void *out, void *in){ ((cl_short*) out)[0] = rint(((cl_double*) in)[0]); }
double2uint(void * out,void * in)743 static void double2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) rint(((cl_double*) in)[0]); }
double2int(void * out,void * in)744 static void double2int( void *out, void *in){ ((cl_int*) out)[0] = (int) rint(((cl_double*) in)[0]); }
double2float(void * out,void * in)745 static void double2float( void *out, void *in){ ((cl_float*) out)[0] = (float) ((cl_double*) in)[0]; }
double2ulong(void * out,void * in)746 static void double2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = (cl_ulong) rint(((cl_double*) in)[0]); }
double2long(void * out,void * in)747 static void double2long( void *out, void *in){ ((cl_long*) out)[0] = (cl_long) rint(((cl_double*) in)[0]); }
ulong2uchar(void * out,void * in)748 static void ulong2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = (cl_uchar) ((cl_ulong*) in)[0]; }
ulong2char(void * out,void * in)749 static void ulong2char( void *out, void *in){ ((cl_char*) out)[0] = (cl_char) ((cl_ulong*) in)[0]; }
ulong2ushort(void * out,void * in)750 static void ulong2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = (cl_ushort) ((cl_ulong*) in)[0]; }
ulong2short(void * out,void * in)751 static void ulong2short( void *out, void *in){ ((cl_short*) out)[0] = (cl_short)((cl_ulong*) in)[0]; }
ulong2uint(void * out,void * in)752 static void ulong2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((cl_ulong*) in)[0]; }
ulong2int(void * out,void * in)753 static void ulong2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_ulong*) in)[0]; }
ulong2float(void * out,void * in)754 static void ulong2float( void *out, void *in)
755 {
756 #if defined(_MSC_VER) && defined(_M_X64)
757 cl_ulong l = ((cl_ulong*) in)[0];
758 float result;
759 cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l;
760 _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl));
761 ((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result));
762 #else
763 cl_ulong l = ((cl_ulong*) in)[0];
764 #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
765 /* ARM VFP doesn't have hardware instruction for converting from 64-bit
766 * integer to float types, hence GCC ARM uses the floating-point emulation
767 * code despite which -mfloat-abi setting it is. But the emulation code in
768 * libgcc.a has only one rounding mode (round to nearest even in this case)
769 * and ignores the user rounding mode setting in hardware.
770 * As a result setting rounding modes in hardware won't give correct
771 * rounding results for type covert from 64-bit integer to float using GCC
772 * for ARM compiler so for testing different rounding modes, we need to use
773 * alternative reference function. ARM64 does have an instruction, however
774 * we cannot guarantee the compiler will use it. On all ARM architechures
775 * use emulation to calculate reference.*/
776 ((float*) out)[0] = qcom_u64_2_f32(l, qcom_sat, qcom_rm);
777 #else
778 ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
779 #endif
780 #endif
781 }
ulong2double(void * out,void * in)782 static void ulong2double( void *out, void *in)
783 {
784 #if defined(_MSC_VER)
785 cl_ulong l = ((cl_ulong*) in)[0];
786 double result;
787
788 cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l;
789 #if defined(_M_X64)
790 _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), sl));
791 #else
792 result = sl;
793 #endif
794 ((double*) out)[0] = (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result));
795 #else
796 // Use volatile to prevent optimization by Clang compiler
797 volatile cl_ulong l = ((cl_ulong *)in)[0];
798 ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
799 #endif
800 }
ulong2long(void * out,void * in)801 static void ulong2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ulong*) in)[0]; }
long2uchar(void * out,void * in)802 static void long2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = (cl_uchar) ((cl_long*) in)[0]; }
long2char(void * out,void * in)803 static void long2char( void *out, void *in){ ((cl_char*) out)[0] = (cl_char) ((cl_long*) in)[0]; }
long2ushort(void * out,void * in)804 static void long2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = (cl_ushort) ((cl_long*) in)[0]; }
long2short(void * out,void * in)805 static void long2short( void *out, void *in){ ((cl_short*) out)[0] = (cl_short) ((cl_long*) in)[0]; }
long2uint(void * out,void * in)806 static void long2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((cl_long*) in)[0]; }
long2int(void * out,void * in)807 static void long2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_long*) in)[0]; }
long2float(void * out,void * in)808 static void long2float( void *out, void *in)
809 {
810 #if defined(_MSC_VER) && defined(_M_X64)
811 cl_long l = ((cl_long*) in)[0];
812 float result;
813
814 _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), l));
815 ((float*) out)[0] = (l == 0 ? 0.0f : result); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
816 #else
817 cl_long l = ((cl_long*) in)[0];
818 #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
819 /* ARM VFP doesn't have hardware instruction for converting from 64-bit
820 * integer to float types, hence GCC ARM uses the floating-point emulation
821 * code despite which -mfloat-abi setting it is. But the emulation code in
822 * libgcc.a has only one rounding mode (round to nearest even in this case)
823 * and ignores the user rounding mode setting in hardware.
824 * As a result setting rounding modes in hardware won't give correct
825 * rounding results for type covert from 64-bit integer to float using GCC
826 * for ARM compiler so for testing different rounding modes, we need to use
827 * alternative reference function. ARM64 does have an instruction, however
828 * we cannot guarantee the compiler will use it. On all ARM architechures
829 * use emulation to calculate reference.*/
830 ((float*) out)[0] = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm));
831 #else
832 ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
833 #endif
834 #endif
835 }
long2double(void * out,void * in)836 static void long2double( void *out, void *in)
837 {
838 #if defined(_MSC_VER) && defined(_M_X64)
839 cl_long l = ((cl_long*) in)[0];
840 double result;
841
842 _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), l));
843 ((double*) out)[0] = (l == 0 ? 0.0 : result); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
844 #else
845 cl_long l = ((cl_long*) in)[0];
846 ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
847 #endif
848 }
long2ulong(void * out,void * in)849 static void long2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_long*) in)[0]; }
850
851 #define CLAMP( _lo, _x, _hi ) ( (_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x)))
852
853 // Done by hand
uchar2char_sat(void * out,void * in)854 static void uchar2char_sat( void *out, void *in){ cl_uchar c = ((cl_uchar*) in)[0]; ((cl_char*) out)[0] = c > 0x7f ? 0x7f : c; }
uchar2ushort_sat(void * out,void * in)855 static void uchar2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uchar*) in)[0]; }
uchar2short_sat(void * out,void * in)856 static void uchar2short_sat( void *out, void *in){ ((cl_short*) out)[0] = ((cl_uchar*) in)[0]; }
uchar2uint_sat(void * out,void * in)857 static void uchar2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_uchar*) in)[0]; }
uchar2int_sat(void * out,void * in)858 static void uchar2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uchar*) in)[0]; }
uchar2float_sat(void * out,void * in)859 static void uchar2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf( (cl_float) ((cl_uchar*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
uchar2double_sat(void * out,void * in)860 static void uchar2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_uchar*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
uchar2ulong_sat(void * out,void * in)861 static void uchar2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uchar*) in)[0]; }
uchar2long_sat(void * out,void * in)862 static void uchar2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uchar*) in)[0]; }
char2uchar_sat(void * out,void * in)863 static void char2uchar_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_uchar*) out)[0] = c < 0 ? 0 : c; }
char2ushort_sat(void * out,void * in)864 static void char2ushort_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_ushort*) out)[0] = c < 0 ? 0 : c; }
char2short_sat(void * out,void * in)865 static void char2short_sat( void *out, void *in){ ((cl_short*) out)[0] = ((cl_char*) in)[0]; }
char2uint_sat(void * out,void * in)866 static void char2uint_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_uint*) out)[0] = c < 0 ? 0 : c; }
char2int_sat(void * out,void * in)867 static void char2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_char*) in)[0]; }
char2float_sat(void * out,void * in)868 static void char2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_char*) in)[0]; }
char2double_sat(void * out,void * in)869 static void char2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_char*) in)[0]; }
char2ulong_sat(void * out,void * in)870 static void char2ulong_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_ulong*) out)[0] = c < 0 ? 0 : c; }
char2long_sat(void * out,void * in)871 static void char2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_char*) in)[0]; }
ushort2uchar_sat(void * out,void * in)872 static void ushort2uchar_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_uchar*) out)[0] = u > 0xff ? 0xFF : u; }
ushort2char_sat(void * out,void * in)873 static void ushort2char_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_char*) out)[0] = u > 0x7f ? 0x7F : u; }
ushort2short_sat(void * out,void * in)874 static void ushort2short_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_short*) out)[0] = u > 0x7fff ? 0x7fFF : u; }
ushort2uint_sat(void * out,void * in)875 static void ushort2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_ushort*) in)[0]; }
ushort2int_sat(void * out,void * in)876 static void ushort2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_ushort*) in)[0]; }
ushort2float_sat(void * out,void * in)877 static void ushort2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf((cl_float)((cl_ushort*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
ushort2double_sat(void * out,void * in)878 static void ushort2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_ushort*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
ushort2ulong_sat(void * out,void * in)879 static void ushort2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_ushort*) in)[0]; }
ushort2long_sat(void * out,void * in)880 static void ushort2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ushort*) in)[0]; }
short2uchar_sat(void * out,void * in)881 static void short2uchar_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, s, CL_UCHAR_MAX ); }
short2char_sat(void * out,void * in)882 static void short2char_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, s, CL_CHAR_MAX ); }
short2ushort_sat(void * out,void * in)883 static void short2ushort_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_ushort*) out)[0] = s < 0 ? 0 : s; }
short2uint_sat(void * out,void * in)884 static void short2uint_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_uint*) out)[0] = s < 0 ? 0 : s; }
short2int_sat(void * out,void * in)885 static void short2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_short*) in)[0]; }
short2float_sat(void * out,void * in)886 static void short2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_short*) in)[0]; }
short2double_sat(void * out,void * in)887 static void short2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_short*) in)[0]; }
short2ulong_sat(void * out,void * in)888 static void short2ulong_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_ulong*) out)[0] = s < 0 ? 0 : s; }
short2long_sat(void * out,void * in)889 static void short2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_short*) in)[0]; }
uint2uchar_sat(void * out,void * in)890 static void uint2uchar_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX); }
uint2char_sat(void * out,void * in)891 static void uint2char_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_char*) out)[0] = CLAMP( 0, u, CL_CHAR_MAX ); }
uint2ushort_sat(void * out,void * in)892 static void uint2ushort_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX); }
uint2short_sat(void * out,void * in)893 static void uint2short_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_short*) out)[0] = CLAMP( 0, u, CL_SHRT_MAX); }
uint2int_sat(void * out,void * in)894 static void uint2int_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_int*) out)[0] = CLAMP( 0, u, CL_INT_MAX); }
uint2float_sat(void * out,void * in)895 static void uint2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf( (cl_float) ((cl_uint*) in)[0] ); } // my_fabs workaround for <rdar://problem/5965527>
uint2double_sat(void * out,void * in)896 static void uint2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_uint*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
uint2ulong_sat(void * out,void * in)897 static void uint2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uint*) in)[0]; }
uint2long_sat(void * out,void * in)898 static void uint2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uint*) in)[0]; }
int2uchar_sat(void * out,void * in)899 static void int2uchar_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, i, CL_UCHAR_MAX); }
int2char_sat(void * out,void * in)900 static void int2char_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, i, CL_CHAR_MAX); }
int2ushort_sat(void * out,void * in)901 static void int2ushort_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, i, CL_USHRT_MAX); }
int2short_sat(void * out,void * in)902 static void int2short_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, i, CL_SHRT_MAX); }
int2uint_sat(void * out,void * in)903 static void int2uint_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_uint*) out)[0] = CLAMP( 0, i, CL_INT_MAX); }
int2float_sat(void * out,void * in)904 static void int2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_int*) in)[0]; }
int2double_sat(void * out,void * in)905 static void int2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_int*) in)[0]; }
int2ulong_sat(void * out,void * in)906 static void int2ulong_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_ulong*) out)[0] = i < 0 ? 0 : i; }
int2long_sat(void * out,void * in)907 static void int2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_int*) in)[0]; }
float2uchar_sat(void * out,void * in)908 static void float2uchar_sat( void *out, void *in){ ((cl_uchar*) out)[0] = CLAMP( 0, lrintf_clamped(((cl_float*) in)[0]), CL_UCHAR_MAX ); }
float2char_sat(void * out,void * in)909 static void float2char_sat( void *out, void *in){ ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_CHAR_MAX); }
float2ushort_sat(void * out,void * in)910 static void float2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = CLAMP( 0, lrintf_clamped(((cl_float*) in)[0]), CL_USHRT_MAX ); }
float2short_sat(void * out,void * in)911 static void float2short_sat( void *out, void *in){ ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_SHRT_MAX ); }
float2uint_sat(void * out,void * in)912 static void float2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, llrintf_clamped(((cl_float*) in)[0]), CL_UINT_MAX ); }
float2int_sat(void * out,void * in)913 static void float2int_sat( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) CLAMP( CL_INT_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_INT_MAX ); }
float2double_sat(void * out,void * in)914 static void float2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_float*) in)[0]; }
float2ulong_sat(void * out,void * in)915 static void float2ulong_sat( void *out, void *in)
916 {
917 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
918 // VS2005 (at least) on x86 uses fistp to store the float as a 64-bit int.
919 // However, fistp stores it as a signed int, and some of the test values won't
920 // fit into a signed int. (These test values are >= 2^63.) The result on VS2005
921 // is that these end up silently (at least by default settings) clamped to
922 // the max lowest ulong.
923 cl_float x = my_rintf(((cl_float *)in)[0]);
924 if (x >= 18446744073709551616.0f) { // 2^64
925 ((cl_ulong*) out)[0] = 0xFFFFFFFFFFFFFFFFULL;
926 } else if (x < 0) {
927 ((cl_ulong*) out)[0] = 0;
928 } else if (x >= 9223372036854775808.0f) { // 2^63
929 x -= 9223372036854775808.0f;
930 ((cl_ulong*) out)[0] = x;
931 ((cl_ulong*) out)[0] += 9223372036854775808ULL;
932 } else {
933 ((cl_ulong*) out)[0] = x;
934 }
935 #else
936 float f = my_rintf(((float*) in)[0]); ((cl_ulong*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) ? 0xFFFFFFFFFFFFFFFFULL : f < 0 ? 0 : (cl_ulong) f;
937 #endif
938 }
939 // The final cast used to be (cl_ulong) f, but on Linux (RHEL5 at least)
940 // if f = -1.0f, then (cl_ulong) f = 0xffffffff, which clearly isn't right.
941 // Switching it to (cl_long) f seems to fix that.
float2long_sat(void * out,void * in)942 static void float2long_sat( void *out, void *in){ float f = my_rintf(((float*) in)[0]); ((cl_long*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) ? 0x7FFFFFFFFFFFFFFFULL : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) ? 0x8000000000000000LL : (cl_long) f; }
double2uchar_sat(void * out,void * in)943 static void double2uchar_sat( void *out, void *in){ ((cl_uchar*) out)[0] = CLAMP( 0, lrint_clamped(((cl_double*) in)[0]), CL_UCHAR_MAX ); }
double2char_sat(void * out,void * in)944 static void double2char_sat( void *out, void *in){ ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, lrint_clamped(((cl_double*) in)[0]), CL_CHAR_MAX); }
double2ushort_sat(void * out,void * in)945 static void double2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = CLAMP( 0, lrint_clamped(((cl_double*) in)[0]), CL_USHRT_MAX ); }
double2short_sat(void * out,void * in)946 static void double2short_sat( void *out, void *in){ ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, lrint_clamped(((cl_double*) in)[0]), CL_SHRT_MAX ); }
double2uint_sat(void * out,void * in)947 static void double2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, llrint_clamped(((cl_double*) in)[0]), CL_UINT_MAX ); }
double2int_sat(void * out,void * in)948 static void double2int_sat( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) CLAMP( CL_INT_MIN, lrint_clamped(((cl_double*) in)[0]), CL_INT_MAX ); }
double2float_sat(void * out,void * in)949 static void double2float_sat( void *out, void *in){ ((cl_float*) out)[0] = (cl_float) ((double*) in)[0]; }
double2ulong_sat(void * out,void * in)950 static void double2ulong_sat( void *out, void *in){ double f = rint(((double*) in)[0]); ((cl_ulong*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) ? 0xFFFFFFFFFFFFFFFFULL : f < 0 ? 0 : (cl_ulong) f; }
double2long_sat(void * out,void * in)951 static void double2long_sat( void *out, void *in){ double f = rint(((double*) in)[0]); ((cl_long*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) ? 0x7FFFFFFFFFFFFFFFULL : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) ? 0x8000000000000000LL : (cl_long) f; }
ulong2uchar_sat(void * out,void * in)952 static void ulong2uchar_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX ); }
ulong2char_sat(void * out,void * in)953 static void ulong2char_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_char*) out)[0] = CLAMP( 0, u, CL_CHAR_MAX ); }
ulong2ushort_sat(void * out,void * in)954 static void ulong2ushort_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX ); }
ulong2short_sat(void * out,void * in)955 static void ulong2short_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_short*) out)[0] = CLAMP( 0, u, CL_SHRT_MAX ); }
ulong2uint_sat(void * out,void * in)956 static void ulong2uint_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, u, CL_UINT_MAX ); }
ulong2int_sat(void * out,void * in)957 static void ulong2int_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_int*) out)[0] = (cl_int) CLAMP( 0, u, CL_INT_MAX ); }
ulong2float_sat(void * out,void * in)958 static void ulong2float_sat( void *out, void *in){ ((float*) out)[0] = my_fabsf((float) ((cl_ulong*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
ulong2double_sat(void * out,void * in)959 static void ulong2double_sat( void *out, void *in){ ((double*) out)[0] = my_fabs( ((cl_ulong*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
ulong2long_sat(void * out,void * in)960 static void ulong2long_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_long*) out)[0] = CLAMP( 0, u, CL_LONG_MAX ); }
long2uchar_sat(void * out,void * in)961 static void long2uchar_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX ); }
long2char_sat(void * out,void * in)962 static void long2char_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, u, CL_CHAR_MAX ); }
long2ushort_sat(void * out,void * in)963 static void long2ushort_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX ); }
long2short_sat(void * out,void * in)964 static void long2short_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, u, CL_SHRT_MAX ); }
long2uint_sat(void * out,void * in)965 static void long2uint_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, u, CL_UINT_MAX ); }
long2int_sat(void * out,void * in)966 static void long2int_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_int*) out)[0] = (int) CLAMP( CL_INT_MIN, u, CL_INT_MAX ); }
long2float_sat(void * out,void * in)967 static void long2float_sat( void *out, void *in){ ((float*) out)[0] = (float) ((cl_long*) in)[0]; }
long2double_sat(void * out,void * in)968 static void long2double_sat( void *out, void *in){ ((double*) out)[0] = ((cl_long*) in)[0]; }
long2ulong_sat(void * out,void * in)969 static void long2ulong_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_ulong*) out)[0] = CLAMP( 0, u, CL_LONG_MAX ); }
970
971 /*
972 #include <stdio.h>
973
974 char *ground[] = { "",
975 "_rte",
976 "_rtp",
977 "_rtn",
978 "_rtz"
979 };
980
981 const char *gTypeNames[ ] = {
982 "uchar", "char",
983 "ushort", "short",
984 "uint", "int",
985 "float", "double",
986 "ulong", "long"
987 };
988
989
990 int main( void )
991 {
992 int i, j;
993
994 for( i = 0; i < sizeof( gTypeNames ) / sizeof( gTypeNames[0] ); i++ )
995 for( j = 0; j < sizeof( ground ) / sizeof( ground[0] ); j++ )
996 {
997 vlog( "float clampf_%s%s( float );\n", gTypeNames[i], ground[j] );
998 vlog( "double clampd_%s%s( double );\n", gTypeNames[i], ground[j] );
999 }
1000
1001 return 0;
1002
1003 }
1004 */
1005
1006
1007 float clampf_uchar( float );
1008 double clampd_uchar( double );
1009 float clampf_uchar_rte( float );
1010 double clampd_uchar_rte( double );
1011 float clampf_uchar_rtp( float );
1012 double clampd_uchar_rtp( double );
1013 float clampf_uchar_rtn( float );
1014 double clampd_uchar_rtn( double );
1015 float clampf_uchar_rtz( float );
1016 double clampd_uchar_rtz( double );
1017 float clampf_char( float );
1018 double clampd_char( double );
1019 float clampf_char_rte( float );
1020 double clampd_char_rte( double );
1021 float clampf_char_rtp( float );
1022 double clampd_char_rtp( double );
1023 float clampf_char_rtn( float );
1024 double clampd_char_rtn( double );
1025 float clampf_char_rtz( float );
1026 double clampd_char_rtz( double );
1027 float clampf_ushort( float );
1028 double clampd_ushort( double );
1029 float clampf_ushort_rte( float );
1030 double clampd_ushort_rte( double );
1031 float clampf_ushort_rtp( float );
1032 double clampd_ushort_rtp( double );
1033 float clampf_ushort_rtn( float );
1034 double clampd_ushort_rtn( double );
1035 float clampf_ushort_rtz( float );
1036 double clampd_ushort_rtz( double );
1037 float clampf_short( float );
1038 double clampd_short( double );
1039 float clampf_short_rte( float );
1040 double clampd_short_rte( double );
1041 float clampf_short_rtp( float );
1042 double clampd_short_rtp( double );
1043 float clampf_short_rtn( float );
1044 double clampd_short_rtn( double );
1045 float clampf_short_rtz( float );
1046 double clampd_short_rtz( double );
1047 float clampf_uint( float );
1048 double clampd_uint( double );
1049 float clampf_uint_rte( float );
1050 double clampd_uint_rte( double );
1051 float clampf_uint_rtp( float );
1052 double clampd_uint_rtp( double );
1053 float clampf_uint_rtn( float );
1054 double clampd_uint_rtn( double );
1055 float clampf_uint_rtz( float );
1056 double clampd_uint_rtz( double );
1057 float clampf_int( float );
1058 double clampd_int( double );
1059 float clampf_int_rte( float );
1060 double clampd_int_rte( double );
1061 float clampf_int_rtp( float );
1062 double clampd_int_rtp( double );
1063 float clampf_int_rtn( float );
1064 double clampd_int_rtn( double );
1065 float clampf_int_rtz( float );
1066 double clampd_int_rtz( double );
1067 float clampf_float( float );
1068 double clampd_float( double );
1069 float clampf_float_rte( float );
1070 double clampd_float_rte( double );
1071 float clampf_float_rtp( float );
1072 double clampd_float_rtp( double );
1073 float clampf_float_rtn( float );
1074 double clampd_float_rtn( double );
1075 float clampf_float_rtz( float );
1076 double clampd_float_rtz( double );
1077 float clampf_double( float );
1078 double clampd_double( double );
1079 float clampf_double_rte( float );
1080 double clampd_double_rte( double );
1081 float clampf_double_rtp( float );
1082 double clampd_double_rtp( double );
1083 float clampf_double_rtn( float );
1084 double clampd_double_rtn( double );
1085 float clampf_double_rtz( float );
1086 double clampd_double_rtz( double );
1087 float clampf_ulong( float );
1088 double clampd_ulong( double );
1089 float clampf_ulong_rte( float );
1090 double clampd_ulong_rte( double );
1091 float clampf_ulong_rtp( float );
1092 double clampd_ulong_rtp( double );
1093 float clampf_ulong_rtn( float );
1094 double clampd_ulong_rtn( double );
1095 float clampf_ulong_rtz( float );
1096 double clampd_ulong_rtz( double );
1097 float clampf_long( float );
1098 double clampd_long( double );
1099 float clampf_long_rte( float );
1100 double clampd_long_rte( double );
1101 float clampf_long_rtp( float );
1102 double clampd_long_rtp( double );
1103 float clampf_long_rtn( float );
1104 double clampd_long_rtn( double );
1105 float clampf_long_rtz( float );
1106 double clampd_long_rtz( double );
1107
1108 /*
1109 #include <stdio.h>
1110
1111 char *ground[] = { "",
1112 "_rte",
1113 "_rtp",
1114 "_rtn",
1115 "_rtz"
1116 };
1117
1118 const char *gTypeNames[ ] = {
1119 "uchar", "char",
1120 "ushort", "short",
1121 "uint", "int",
1122 "float", "double",
1123 "ulong", "long"
1124 };
1125
1126
1127 int main( void )
1128 {
1129 int i, j;
1130
1131 for( i = 0; i < sizeof( gTypeNames ) / sizeof( gTypeNames[0] ); i++ )
1132 {
1133 vlog( "{\t" );
1134 for( j = 0; j < sizeof( ground ) / sizeof( ground[0] ); j++ )
1135 vlog( "clampf_%s%s,\t", gTypeNames[i], ground[j] );
1136
1137 vlog( "\t},\n" );
1138 }
1139
1140 return 0;
1141
1142 }
1143 */
1144 clampf gClampFloat[ kTypeCount ][kRoundingModeCount] = {
1145 { clampf_uchar, clampf_uchar_rte, clampf_uchar_rtp, clampf_uchar_rtn, clampf_uchar_rtz, },
1146 { clampf_char, clampf_char_rte, clampf_char_rtp, clampf_char_rtn, clampf_char_rtz, },
1147 { clampf_ushort, clampf_ushort_rte, clampf_ushort_rtp, clampf_ushort_rtn, clampf_ushort_rtz, },
1148 { clampf_short, clampf_short_rte, clampf_short_rtp, clampf_short_rtn, clampf_short_rtz, },
1149 { clampf_uint, clampf_uint_rte, clampf_uint_rtp, clampf_uint_rtn, clampf_uint_rtz, },
1150 { clampf_int, clampf_int_rte, clampf_int_rtp, clampf_int_rtn, clampf_int_rtz, },
1151 { clampf_float, clampf_float_rte, clampf_float_rtp, clampf_float_rtn, clampf_float_rtz, },
1152 { clampf_double, clampf_double_rte, clampf_double_rtp, clampf_double_rtn, clampf_double_rtz, },
1153 { clampf_ulong, clampf_ulong_rte, clampf_ulong_rtp, clampf_ulong_rtn, clampf_ulong_rtz, },
1154 { clampf_long, clampf_long_rte, clampf_long_rtp, clampf_long_rtn, clampf_long_rtz, }
1155 };
1156
1157 clampd gClampDouble[ kTypeCount ][kRoundingModeCount] = {
1158 { clampd_uchar, clampd_uchar_rte, clampd_uchar_rtp, clampd_uchar_rtn, clampd_uchar_rtz, },
1159 { clampd_char, clampd_char_rte, clampd_char_rtp, clampd_char_rtn, clampd_char_rtz, },
1160 { clampd_ushort, clampd_ushort_rte, clampd_ushort_rtp, clampd_ushort_rtn, clampd_ushort_rtz, },
1161 { clampd_short, clampd_short_rte, clampd_short_rtp, clampd_short_rtn, clampd_short_rtz, },
1162 { clampd_uint, clampd_uint_rte, clampd_uint_rtp, clampd_uint_rtn, clampd_uint_rtz, },
1163 { clampd_int, clampd_int_rte, clampd_int_rtp, clampd_int_rtn, clampd_int_rtz, },
1164 { clampd_float, clampd_float_rte, clampd_float_rtp, clampd_float_rtn, clampd_float_rtz, },
1165 { clampd_double, clampd_double_rte, clampd_double_rtp, clampd_double_rtn, clampd_double_rtz, },
1166 { clampd_ulong, clampd_ulong_rte, clampd_ulong_rtp, clampd_ulong_rtn, clampd_ulong_rtz, },
1167 { clampd_long, clampd_long_rte, clampd_long_rtp, clampd_long_rtn, clampd_long_rtz, }
1168 };
1169
1170 #if defined (_WIN32)
1171 #define __attribute__(X)
1172 #endif
1173
1174 static inline float fclamp( float lo, float v, float hi ) __attribute__ ((always_inline));
1175 static inline double dclamp( double lo, double v, double hi ) __attribute__ ((always_inline));
1176
fclamp(float lo,float v,float hi)1177 static inline float fclamp( float lo, float v, float hi ){ v = v < lo ? lo : v; return v < hi ? v : hi; }
dclamp(double lo,double v,double hi)1178 static inline double dclamp( double lo, double v, double hi ){ v = v < lo ? lo : v; return v < hi ? v : hi; }
1179
1180 // Clamp unsaturated inputs into range so we don't get test errors:
clampf_uchar(float f)1181 float clampf_uchar( float f ) { return fclamp( -0.5f, f, 255.5f - 128.0f * FLT_EPSILON ); }
clampd_uchar(double f)1182 double clampd_uchar( double f ) { return dclamp( -0.5, f, 255.5 - 128.0 * DBL_EPSILON ); }
clampf_uchar_rte(float f)1183 float clampf_uchar_rte( float f ) { return fclamp( -0.5f, f, 255.5f - 128.0f * FLT_EPSILON ); }
clampd_uchar_rte(double f)1184 double clampd_uchar_rte( double f ) { return dclamp( -0.5, f, 255.5 - 128.0 * DBL_EPSILON ); }
clampf_uchar_rtp(float f)1185 float clampf_uchar_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 255.0f ); }
clampd_uchar_rtp(double f)1186 double clampd_uchar_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 255.0 ); }
clampf_uchar_rtn(float f)1187 float clampf_uchar_rtn( float f ) { return fclamp( -0.0f, f, 256.0f - 128.0f * FLT_EPSILON); }
clampd_uchar_rtn(double f)1188 double clampd_uchar_rtn( double f ) { return dclamp( -0.0, f, 256.0 - 128.0 * DBL_EPSILON); }
clampf_uchar_rtz(float f)1189 float clampf_uchar_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 256.0f - 128.0f * FLT_EPSILON); }
clampd_uchar_rtz(double f)1190 double clampd_uchar_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 256.0 - 128.0f * DBL_EPSILON); }
1191
clampf_char(float f)1192 float clampf_char( float f ) { return fclamp( -128.5f, f, 127.5f - 64.f * FLT_EPSILON ); }
clampd_char(double f)1193 double clampd_char( double f ) { return dclamp( -128.5, f, 127.5 - 64. * DBL_EPSILON ); }
clampf_char_rte(float f)1194 float clampf_char_rte( float f ) { return fclamp( -128.5f, f, 127.5f - 64.f * FLT_EPSILON ); }
clampd_char_rte(double f)1195 double clampd_char_rte( double f ) { return dclamp( -128.5, f, 127.5 - 64. * DBL_EPSILON ); }
clampf_char_rtp(float f)1196 float clampf_char_rtp( float f ) { return fclamp( -129.0f + 128.f*FLT_EPSILON, f, 127.f ); }
clampd_char_rtp(double f)1197 double clampd_char_rtp( double f ) { return dclamp( -129.0 + 128.*DBL_EPSILON, f, 127. ); }
clampf_char_rtn(float f)1198 float clampf_char_rtn( float f ) { return fclamp( -128.0f, f, 128.f - 64.0f*FLT_EPSILON ); }
clampd_char_rtn(double f)1199 double clampd_char_rtn( double f ) { return dclamp( -128.0, f, 128. - 64.0*DBL_EPSILON ); }
clampf_char_rtz(float f)1200 float clampf_char_rtz( float f ) { return fclamp( -129.0f + 128.f*FLT_EPSILON, f, 128.f - 64.0f*FLT_EPSILON ); }
clampd_char_rtz(double f)1201 double clampd_char_rtz( double f ) { return dclamp( -129.0 + 128.*DBL_EPSILON, f, 128. - 64.0*DBL_EPSILON ); }
1202
clampf_ushort(float f)1203 float clampf_ushort( float f ) { return fclamp( -0.5f, f, 65535.5f - 32768.0f * FLT_EPSILON ); }
clampd_ushort(double f)1204 double clampd_ushort( double f ) { return dclamp( -0.5, f, 65535.5 - 32768.0 * DBL_EPSILON ); }
clampf_ushort_rte(float f)1205 float clampf_ushort_rte( float f ) { return fclamp( -0.5f, f, 65535.5f - 32768.0f * FLT_EPSILON ); }
clampd_ushort_rte(double f)1206 double clampd_ushort_rte( double f ) { return dclamp( -0.5, f, 65535.5 - 32768.0 * DBL_EPSILON ); }
clampf_ushort_rtp(float f)1207 float clampf_ushort_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 65535.0f ); }
clampd_ushort_rtp(double f)1208 double clampd_ushort_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 65535.0 ); }
clampf_ushort_rtn(float f)1209 float clampf_ushort_rtn( float f ) { return fclamp( -0.0f, f, 65536.0f - 32768.0f * FLT_EPSILON); }
clampd_ushort_rtn(double f)1210 double clampd_ushort_rtn( double f ) { return dclamp( -0.0, f, 65536.0 - 32768.0 * DBL_EPSILON); }
clampf_ushort_rtz(float f)1211 float clampf_ushort_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 65536.0f - 32768.0f * FLT_EPSILON); }
clampd_ushort_rtz(double f)1212 double clampd_ushort_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 65536.0 - 32768.0f * DBL_EPSILON); }
1213
clampf_short(float f)1214 float clampf_short( float f ) { return fclamp( -32768.5f, f, 32767.5f - 16384.f * FLT_EPSILON ); }
clampd_short(double f)1215 double clampd_short( double f ) { return dclamp( -32768.5, f, 32767.5 - 16384. * DBL_EPSILON ); }
clampf_short_rte(float f)1216 float clampf_short_rte( float f ) { return fclamp( -32768.5f, f, 32767.5f - 16384.f * FLT_EPSILON ); }
clampd_short_rte(double f)1217 double clampd_short_rte( double f ) { return dclamp( -32768.5, f, 32767.5 - 16384. * DBL_EPSILON ); }
clampf_short_rtp(float f)1218 float clampf_short_rtp( float f ) { return fclamp( -32769.0f + 32768.f*FLT_EPSILON, f, 32767.f ); }
clampd_short_rtp(double f)1219 double clampd_short_rtp( double f ) { return dclamp( -32769.0 + 32768.*DBL_EPSILON, f, 32767. ); }
clampf_short_rtn(float f)1220 float clampf_short_rtn( float f ) { return fclamp( -32768.0f, f, 32768.f - 16384.0f*FLT_EPSILON ); }
clampd_short_rtn(double f)1221 double clampd_short_rtn( double f ) { return dclamp( -32768.0, f, 32768. - 16384.0*DBL_EPSILON ); }
clampf_short_rtz(float f)1222 float clampf_short_rtz( float f ) { return fclamp( -32769.0f + 32768.f*FLT_EPSILON, f, 32768.f - 16384.0f*FLT_EPSILON ); }
clampd_short_rtz(double f)1223 double clampd_short_rtz( double f ) { return dclamp( -32769.0 + 32768.*DBL_EPSILON, f, 32768. - 16384.0*DBL_EPSILON ); }
1224
clampf_uint(float f)1225 float clampf_uint( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); }
clampd_uint(double f)1226 double clampd_uint( double f ) { return dclamp( -0.5, f, CL_UINT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * DBL_EPSILON ); }
clampf_uint_rte(float f)1227 float clampf_uint_rte( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); }
clampd_uint_rte(double f)1228 double clampd_uint_rte( double f ) { return dclamp( -0.5, f, CL_UINT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * DBL_EPSILON ); }
clampf_uint_rtp(float f)1229 float clampf_uint_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); }
clampd_uint_rtp(double f)1230 double clampd_uint_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, CL_UINT_MAX ); }
clampf_uint_rtn(float f)1231 float clampf_uint_rtn( float f ) { return fclamp( -0.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)); }
clampd_uint_rtn(double f)1232 double clampd_uint_rtn( double f ) { return dclamp( -0.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21) ); }
clampf_uint_rtz(float f)1233 float clampf_uint_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)); }
clampd_uint_rtz(double f)1234 double clampd_uint_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21)); }
1235
clampf_int(float f)1236 float clampf_int( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
clampd_int(double f)1237 double clampd_int( double f ) { return dclamp( INT_MIN - 0.5, f, CL_INT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); }
clampf_int_rte(float f)1238 float clampf_int_rte( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
clampd_int_rte(double f)1239 double clampd_int_rte( double f ) { return dclamp( INT_MIN - 0.5, f, CL_INT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); }
clampf_int_rtp(float f)1240 float clampf_int_rtp( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
clampd_int_rtp(double f)1241 double clampd_int_rtp( double f ) { return dclamp( INT_MIN - 1.0 + DBL_EPSILON * MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31), f, CL_INT_MAX ); }
clampf_int_rtn(float f)1242 float clampf_int_rtn( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
clampd_int_rtn(double f)1243 double clampd_int_rtn( double f ) { return dclamp( INT_MIN, f, CL_INT_MAX + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); }
clampf_int_rtz(float f)1244 float clampf_int_rtz( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
clampd_int_rtz(double f)1245 double clampd_int_rtz( double f ) { return dclamp( INT_MIN - 1.0 + DBL_EPSILON * MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31), f, CL_INT_MAX + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); }
1246
clampf_float(float f)1247 float clampf_float( float f ){ return f; }
clampd_float(double f)1248 double clampd_float( double f ){ return f; }
clampf_float_rte(float f)1249 float clampf_float_rte( float f ){ return f; }
clampd_float_rte(double f)1250 double clampd_float_rte( double f ){ return f; }
clampf_float_rtp(float f)1251 float clampf_float_rtp( float f ){ return f; }
clampd_float_rtp(double f)1252 double clampd_float_rtp( double f ){ return f; }
clampf_float_rtn(float f)1253 float clampf_float_rtn( float f ){ return f; }
clampd_float_rtn(double f)1254 double clampd_float_rtn( double f ){ return f; }
clampf_float_rtz(float f)1255 float clampf_float_rtz( float f ){ return f; }
clampd_float_rtz(double f)1256 double clampd_float_rtz( double f ){ return f; }
1257
clampf_double(float f)1258 float clampf_double( float f ){ return f; }
clampd_double(double f)1259 double clampd_double( double f ){ return f; }
clampf_double_rte(float f)1260 float clampf_double_rte( float f ){ return f; }
clampd_double_rte(double f)1261 double clampd_double_rte( double f ){ return f; }
clampf_double_rtp(float f)1262 float clampf_double_rtp( float f ){ return f; }
clampd_double_rtp(double f)1263 double clampd_double_rtp( double f ){ return f; }
clampf_double_rtn(float f)1264 float clampf_double_rtn( float f ){ return f; }
clampd_double_rtn(double f)1265 double clampd_double_rtn( double f ){ return f; }
clampf_double_rtz(float f)1266 float clampf_double_rtz( float f ){ return f; }
clampd_double_rtz(double f)1267