1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #ifndef MINDSPORE_NNACL_CAST_FP16_H_
17 #define MINDSPORE_NNACL_CAST_FP16_H_
18
19 #include "nnacl/op_base.h"
20 #if defined(ENABLE_ARM) && defined(ENABLE_FP16)
21 #include <arm_neon.h>
22
23 #ifdef __cplusplus
24 extern "C" {
25 #endif
26
BoolToFloat16(const bool * input,float16_t * output,int number)27 inline void BoolToFloat16(const bool *input, float16_t *output, int number) {
28 for (int i = 0; i < number; ++i) {
29 output[i] = (float16_t)input[i];
30 }
31 }
32
Uint8ToFloat16(const uint8_t * input,float16_t * output,int number)33 inline void Uint8ToFloat16(const uint8_t *input, float16_t *output, int number) {
34 for (int i = 0; i < number; ++i) {
35 output[i] = (float16_t)input[i];
36 }
37 }
38
Float16ToInt32(const float16_t * input,int32_t * output,int number)39 inline void Float16ToInt32(const float16_t *input, int32_t *output, int number) {
40 for (int i = 0; i < number; ++i) {
41 output[i] = (int32_t)input[i];
42 }
43 }
44
Float16ToInt64(const float16_t * input,int64_t * output,int number)45 inline void Float16ToInt64(const float16_t *input, int64_t *output, int number) {
46 for (int i = 0; i < number; ++i) {
47 output[i] = (int64_t)input[i];
48 }
49 }
50
51 #ifdef ENABLE_ARM64
Float32ToFloat16(const float * __restrict input,float16_t * __restrict output,int number)52 inline void Float32ToFloat16(const float *__restrict input, float16_t *__restrict output, int number) {
53 int count = (number & ~(C8NUM - 1));
54 int i = 0;
55 for (; i < count; i += C8NUM) {
56 float32x4_t in1 = vld1q_f32(input + i);
57 float16x4_t out1 = vcvt_f16_f32(in1);
58 float32x4_t in2 = vld1q_f32(input + i + 4);
59 float16x4_t out2 = vcvt_f16_f32(in2);
60 float16x8_t out = vcombine_f16(out1, out2);
61 vst1q_f16(output + i, out);
62 }
63 for (; i < number; ++i) {
64 output[i] = (float16_t)input[i];
65 }
66 }
67
Float16ToFloat32(const float16_t * __restrict input,float * __restrict output,int number)68 inline void Float16ToFloat32(const float16_t *__restrict input, float *__restrict output, int number) {
69 int count = number & ~(C8NUM - 1);
70 int i = 0;
71 for (; i < count; i += C8NUM) {
72 float16x8_t in = vld1q_f16(input + i);
73 float16x4_t in1 = vget_low_f16(in);
74 float16x4_t in2 = vget_high_f16(in);
75 float32x4_t out1 = vcvt_f32_f16(in1);
76 vst1q_f32(output + i, out1);
77 float32x4_t out2 = vcvt_f32_f16(in2);
78 vst1q_f32(output + i + 4, out2);
79 }
80 for (; i < number; ++i) {
81 output[i] = (float)input[i];
82 }
83 }
84 #else
85 void Float32ToFloat16(const float *input, float16_t *output, int number);
86
87 void Float16ToFloat32(const float16_t *input, float *output, int number);
88 #endif
89
90 #ifdef __cplusplus
91 }
92 #endif
93 #endif
94 #endif // MINDSPORE_NNACL_CAST_FP16_H_
95