1// Copyright 2021 Google LLC 2// 3// This source code is licensed under the BSD-style license found in the 4// LICENSE file in the root directory of this source tree. 5 6$assert BATCH_TILE >= 1 7$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 8#include <assert.h> 9 10#include <xnnpack/common.h> 11#include <xnnpack/vcvt.h> 12 13#include <fp16.h> 14 15 16void xnn_f16_f32_vcvt_ukernel__scalar_x${BATCH_TILE}( 17 size_t n, 18 const void* input, 19 float* output, 20 const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) 21{ 22 assert(n != 0); 23 assert(n % sizeof(uint16_t) == 0); 24 assert(input != NULL); 25 assert(output != NULL); 26 27 const uint32_t vsign_mask = params->scalar.sign_mask; 28 const uint32_t vexp_offset = params->scalar.exp_offset; 29 const float vexp_scale = params->scalar.exp_scale; 30 const uint32_t vmagic_mask = params->scalar.magic_mask; 31 const float vmagic_bias = params->scalar.magic_bias; 32 const uint32_t vdenorm_cutoff = params->scalar.denorm_cutoff; 33 34 const uint16_t* i = (const uint16_t*) input; 35 uint32_t* o = (uint32_t*) output; 36 $if BATCH_TILE > 1: 37 for (; n >= ${BATCH_TILE} * sizeof(uint16_t); n -= ${BATCH_TILE} * sizeof(uint16_t)) { 38 $for N in range(BATCH_TILE): 39 const uint16_t vh${N} = i[${N}]; 40 i += ${BATCH_TILE}; 41 42 $for N in range(BATCH_TILE): 43 const uint32_t vw${N} = (uint32_t) vh${N} << 16; 44 45 $for N in range(BATCH_TILE): 46 const uint32_t vsign${N} = vw${N} & vsign_mask; 47 48 $for N in range(BATCH_TILE): 49 const uint32_t v2w${N} = vw${N} + vw${N}; 50 51 $for N in range(BATCH_TILE): 52 const uint32_t vnorm${N} = fp32_to_bits(fp32_from_bits((v2w${N} >> 4) + vexp_offset) * vexp_scale); 53 54 $for N in range(BATCH_TILE): 55 const uint32_t vdenorm${N} = fp32_to_bits(fp32_from_bits((v2w${N} >> 17) | vmagic_mask) - vmagic_bias); 56 57 $for N in range(BATCH_TILE): 58 const uint32_t vf${N} = vsign${N} | (XNN_UNPREDICTABLE(v2w${N} < vdenorm_cutoff) ? vdenorm${N} : vnorm${N}); 59 60 $for N in range(BATCH_TILE): 61 o[${N}] = vf${N}; 62 o += ${BATCH_TILE}; 63 } 64 $if BATCH_TILE == 1: 65 do { 66 const uint16_t vh = *i++; 67 68 const uint32_t vw = (uint32_t) vh << 16; 69 const uint32_t vsign = vw & vsign_mask; 70 const uint32_t v2w = vw + vw; 71 const uint32_t vnorm = fp32_to_bits(fp32_from_bits((v2w >> 4) + vexp_offset) * vexp_scale); 72 const uint32_t vdenorm = fp32_to_bits(fp32_from_bits((v2w >> 17) | vmagic_mask) - vmagic_bias); 73 const uint32_t vf = vsign | (XNN_UNPREDICTABLE(v2w < vdenorm_cutoff) ? vdenorm : vnorm); 74 75 *o++ = vf; 76 77 n -= sizeof(uint16_t); 78 } while (n != 0); 79 $elif BATCH_TILE == 2: 80 if XNN_UNLIKELY(n != 0) { 81 const uint16_t vh = *i; 82 83 const uint32_t vw = (uint32_t) vh << 16; 84 const uint32_t vsign = vw & vsign_mask; 85 const uint32_t v2w = vw + vw; 86 const uint32_t vnorm = fp32_to_bits(fp32_from_bits((v2w >> 4) + vexp_offset) * vexp_scale); 87 const uint32_t vdenorm = fp32_to_bits(fp32_from_bits((v2w >> 17) | vmagic_mask) - vmagic_bias); 88 const uint32_t vf = vsign | (XNN_UNPREDICTABLE(v2w < vdenorm_cutoff) ? vdenorm : vnorm); 89 90 *o = vf; 91 } 92 $else: 93 if XNN_UNLIKELY(n != 0) { 94 do { 95 const uint16_t vh = *i++; 96 97 const uint32_t vw = (uint32_t) vh << 16; 98 const uint32_t vsign = vw & vsign_mask; 99 const uint32_t v2w = vw + vw; 100 const uint32_t vnorm = fp32_to_bits(fp32_from_bits((v2w >> 4) + vexp_offset) * vexp_scale); 101 const uint32_t vdenorm = fp32_to_bits(fp32_from_bits((v2w >> 17) | vmagic_mask) - vmagic_bias); 102 const uint32_t vf = vsign | (XNN_UNPREDICTABLE(v2w < vdenorm_cutoff) ? vdenorm : vnorm); 103 104 *o++ = vf; 105 106 n -= sizeof(uint16_t); 107 } while (n != 0); 108 } 109} 110