• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_vinverse_f16.c
4  * Description:  Fast vectorized inverse
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/fast_math_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 #include "arm_common_tables.h"
34 
35 #include "arm_vec_math_f16.h"
36 
arm_vinverse_f16(const float16_t * pSrc,float16_t * pDst,uint32_t blockSize)37 void arm_vinverse_f16(
38   const float16_t * pSrc,
39         float16_t * pDst,
40         uint32_t blockSize)
41 {
42    uint32_t blkCnt;
43 
44 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
45 
46    f16x8_t src;
47    f16x8_t dst;
48 
49    blkCnt = blockSize >> 3;
50 
51    while (blkCnt > 0U)
52    {
53       src = vld1q(pSrc);
54       dst = vrecip_hiprec_f16(src);
55       vst1q(pDst, dst);
56 
57       pSrc += 8;
58       pDst += 8;
59       /* Decrement loop counter */
60       blkCnt--;
61    }
62 
63    blkCnt = blockSize & 7;
64 #else
65    blkCnt = blockSize;
66 #endif
67 
68    while (blkCnt > 0U)
69    {
70 
71       *pDst++ = 1.0f16 / (_Float16)*pSrc++;
72 
73       /* Decrement loop counter */
74       blkCnt--;
75    }
76 }
77 
78 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
79 
80