1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_vinverse_f16.c 4 * Description: Fast vectorized inverse 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/fast_math_functions_f16.h" 30 31 #if defined(ARM_FLOAT16_SUPPORTED) 32 33 #include "arm_common_tables.h" 34 35 #include "arm_vec_math_f16.h" 36 arm_vinverse_f16(const float16_t * pSrc,float16_t * pDst,uint32_t blockSize)37void arm_vinverse_f16( 38 const float16_t * pSrc, 39 float16_t * pDst, 40 uint32_t blockSize) 41 { 42 uint32_t blkCnt; 43 44 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) 45 46 f16x8_t src; 47 f16x8_t dst; 48 49 blkCnt = blockSize >> 3; 50 51 while (blkCnt > 0U) 52 { 53 src = vld1q(pSrc); 54 dst = vrecip_hiprec_f16(src); 55 vst1q(pDst, dst); 56 57 pSrc += 8; 58 pDst += 8; 59 /* Decrement loop counter */ 60 blkCnt--; 61 } 62 63 blkCnt = blockSize & 7; 64 #else 65 blkCnt = blockSize; 66 #endif 67 68 while (blkCnt > 0U) 69 { 70 71 *pDst++ = 1.0f16 / (_Float16)*pSrc++; 72 73 /* Decrement loop counter */ 74 blkCnt--; 75 } 76 } 77 78 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 79 80