1 // Auto-generated file. Do not edit! 2 // Template: src/f32-f16-vcvt/scalar-bitcast.c.in 3 // Generator: tools/xngen 4 // 5 // Copyright 2021 Google LLC 6 // 7 // This source code is licensed under the BSD-style license found in the 8 // LICENSE file in the root directory of this source tree. 9 10 #include <assert.h> 11 12 #include <xnnpack/common.h> 13 #include <xnnpack/math.h> 14 #include <xnnpack/vcvt.h> 15 16 #include <fp16.h> 17 18 xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x2(size_t n,const float * input,void * output,const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS (1)])19void xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x2( 20 size_t n, 21 const float* input, 22 void* output, 23 const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) 24 { 25 assert(n != 0); 26 assert(n % sizeof(float) == 0); 27 assert(input != NULL); 28 assert(output != NULL); 29 30 const uint32_t vnonsign_mask = params->scalar_bitcast.nonsign_mask; 31 const uint32_t vexp_bias = params->scalar_bitcast.exp_bias; 32 const float vscale_to_inf = params->scalar_bitcast.scale_to_inf; 33 const uint32_t vexpw_max = params->scalar_bitcast.expw_max; 34 const float vscale_to_zero = params->scalar_bitcast.scale_to_zero; 35 const uint32_t vbias_min = params->scalar_bitcast.bias_min; 36 const uint16_t vexph_mask = params->scalar_bitcast.exph_mask; 37 const uint16_t vmanth_mask = params->scalar_bitcast.manth_mask; 38 const uint16_t vnanh = params->scalar_bitcast.nanh; 39 40 const uint32_t* i = (const uint32_t*) input; 41 uint16_t* o = (uint16_t*) output; 42 for (; n >= 2 * sizeof(float); n -= 2 * sizeof(float)) { 43 const uint32_t vw0 = i[0]; 44 const uint32_t vw1 = i[1]; 45 i += 2; 46 47 const uint32_t vnonsignw0 = vw0 & vnonsign_mask; 48 const uint32_t vnonsignw1 = vw1 & vnonsign_mask; 49 50 float vf0 = fp32_from_bits(vnonsignw0); 51 float vf1 = fp32_from_bits(vnonsignw1); 52 const uint32_t vsignw0 = vw0 ^ vnonsignw0; 53 const uint32_t vsignw1 = vw1 ^ vnonsignw1; 54 uint32_t vbias0 = vnonsignw0 + vexp_bias; 55 uint32_t vbias1 = vnonsignw1 + vexp_bias; 56 57 vf0 *= vscale_to_inf; 58 vf1 *= vscale_to_inf; 59 vbias0 &= vexpw_max; 60 vbias1 &= vexpw_max; 61 62 vf0 *= vscale_to_zero; 63 vf1 *= vscale_to_zero; 64 vbias0 = math_max_u32(vbias0, vbias_min); 65 vbias1 = math_max_u32(vbias1, vbias_min); 66 67 vf0 += fp32_from_bits(vbias0); 68 vf1 += fp32_from_bits(vbias1); 69 70 const uint32_t vbits0 = fp32_to_bits(vf0); 71 const uint32_t vbits1 = fp32_to_bits(vf1); 72 73 const uint16_t vexph0 = (uint16_t) (vbits0 >> 13) & vexph_mask; 74 const uint16_t vexph1 = (uint16_t) (vbits1 >> 13) & vexph_mask; 75 const uint16_t vmanth0 = (uint16_t) vbits0 & vmanth_mask; 76 const uint16_t vmanth1 = (uint16_t) vbits1 & vmanth_mask; 77 const uint16_t vsignh0 = (uint16_t) (vsignw0 >> 16); 78 const uint16_t vsignh1 = (uint16_t) (vsignw1 >> 16); 79 80 uint16_t vh0 = vexph0 + vmanth0; 81 uint16_t vh1 = vexph1 + vmanth1; 82 if XNN_UNPREDICTABLE(vnonsignw0 > vexpw_max) { 83 vh0 = vnanh; 84 } 85 if XNN_UNPREDICTABLE(vnonsignw1 > vexpw_max) { 86 vh1 = vnanh; 87 } 88 vh0 |= vsignh0; 89 vh1 |= vsignh1; 90 91 o[0] = vh0; 92 o[1] = vh1; 93 o += 2; 94 } 95 if XNN_UNLIKELY(n != 0) { 96 const uint32_t vw = *i; 97 98 const uint32_t vnonsignw = vw & vnonsign_mask; 99 100 float vf = fp32_from_bits(vnonsignw); 101 const uint32_t vsignw = vw ^ vnonsignw; 102 uint32_t vbias = vnonsignw + vexp_bias; 103 104 vf *= vscale_to_inf; 105 vbias &= vexpw_max; 106 107 vf *= vscale_to_zero; 108 vbias = math_max_u32(vbias, vbias_min); 109 110 vf += fp32_from_bits(vbias); 111 112 const uint32_t vbits = fp32_to_bits(vf); 113 114 const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask; 115 const uint16_t vmanth = (uint16_t) vbits & vmanth_mask; 116 const uint16_t vsignh = (uint16_t) (vsignw >> 16); 117 118 uint16_t vh = vexph + vmanth; 119 if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) { 120 vh = vnanh; 121 } 122 vh |= vsignh; 123 124 *o = vh; 125 } 126 } 127