• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_NNACL_CAST_FP16_H_
17 #define MINDSPORE_NNACL_CAST_FP16_H_
18 
19 #include "nnacl/op_base.h"
20 #if defined(ENABLE_ARM) && defined(ENABLE_FP16)
21 #include <arm_neon.h>
22 
23 #ifdef __cplusplus
24 extern "C" {
25 #endif
26 
BoolToFloat16(const bool * input,float16_t * output,int number)27 inline void BoolToFloat16(const bool *input, float16_t *output, int number) {
28   for (int i = 0; i < number; ++i) {
29     output[i] = (float16_t)input[i];
30   }
31 }
32 
Uint8ToFloat16(const uint8_t * input,float16_t * output,int number)33 inline void Uint8ToFloat16(const uint8_t *input, float16_t *output, int number) {
34   for (int i = 0; i < number; ++i) {
35     output[i] = (float16_t)input[i];
36   }
37 }
38 
Float16ToInt32(const float16_t * input,int32_t * output,int number)39 inline void Float16ToInt32(const float16_t *input, int32_t *output, int number) {
40   for (int i = 0; i < number; ++i) {
41     output[i] = (int32_t)input[i];
42   }
43 }
44 
Float16ToInt64(const float16_t * input,int64_t * output,int number)45 inline void Float16ToInt64(const float16_t *input, int64_t *output, int number) {
46   for (int i = 0; i < number; ++i) {
47     output[i] = (int64_t)input[i];
48   }
49 }
50 
51 #ifdef ENABLE_ARM64
Float32ToFloat16(const float * __restrict input,float16_t * __restrict output,int number)52 inline void Float32ToFloat16(const float *__restrict input, float16_t *__restrict output, int number) {
53   int count = (number & ~(C8NUM - 1));
54   int i = 0;
55   for (; i < count; i += C8NUM) {
56     float32x4_t in1 = vld1q_f32(input + i);
57     float16x4_t out1 = vcvt_f16_f32(in1);
58     float32x4_t in2 = vld1q_f32(input + i + 4);
59     float16x4_t out2 = vcvt_f16_f32(in2);
60     float16x8_t out = vcombine_f16(out1, out2);
61     vst1q_f16(output + i, out);
62   }
63   for (; i < number; ++i) {
64     output[i] = (float16_t)input[i];
65   }
66 }
67 
Float16ToFloat32(const float16_t * __restrict input,float * __restrict output,int number)68 inline void Float16ToFloat32(const float16_t *__restrict input, float *__restrict output, int number) {
69   int count = number & ~(C8NUM - 1);
70   int i = 0;
71   for (; i < count; i += C8NUM) {
72     float16x8_t in = vld1q_f16(input + i);
73     float16x4_t in1 = vget_low_f16(in);
74     float16x4_t in2 = vget_high_f16(in);
75     float32x4_t out1 = vcvt_f32_f16(in1);
76     vst1q_f32(output + i, out1);
77     float32x4_t out2 = vcvt_f32_f16(in2);
78     vst1q_f32(output + i + 4, out2);
79   }
80   for (; i < number; ++i) {
81     output[i] = (float)input[i];
82   }
83 }
84 #else
85 void Float32ToFloat16(const float *input, float16_t *output, int number);
86 
87 void Float16ToFloat32(const float16_t *input, float *output, int number);
88 #endif
89 
90 #ifdef __cplusplus
91 }
92 #endif
93 #endif
94 #endif  // MINDSPORE_NNACL_CAST_FP16_H_
95