1 /*
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "simd_utils.h"
17 #include <algorithm>
18 #include <limits>
19 #include "audio_engine_log.h"
20
21 namespace OHOS {
22 namespace AudioStandard {
23 namespace HPAE {
24 #if USE_ARM_NEON == 1
25 constexpr int ALIGIN_FLOAT_SIZE = 4;
26 #endif
SimdPointByPointAdd(size_t length,const float * inputLeft,const float * inputRight,float * output)27 void SimdPointByPointAdd(size_t length, const float* inputLeft, const float* inputRight, float* output)
28 {
29 CHECK_AND_RETURN_LOG(inputLeft, "inputLeft is nullptr");
30 CHECK_AND_RETURN_LOG(inputRight, "inputRight is nullptr");
31 CHECK_AND_RETURN_LOG(output, "output is nullptr");
32 #if USE_ARM_NEON == 1
33 if (length < ALIGIN_FLOAT_SIZE) {
34 for (size_t i = 0; i < length; i++) {
35 output[i] = inputLeft[i] + inputRight[i];
36 }
37 } else {
38 size_t procLen = length >> 2;
39 float32x4_t left32x4;
40 float32x4_t right32x4;
41 float32x4_t out32x4;
42 for (size_t i = 0; i < procLen; i++) {
43 left32x4 = vld1q_f32(inputLeft + i * ALIGIN_FLOAT_SIZE);
44 right32x4 = vld1q_f32(inputRight + i * ALIGIN_FLOAT_SIZE);
45 out32x4 = vaddq_f32(left32x4, right32x4);
46 vst1q_f32(output + i * ALIGIN_FLOAT_SIZE, out32x4);
47 }
48 size_t odd = length - procLen * ALIGIN_FLOAT_SIZE;
49 if (odd) {
50 for (size_t j = length - odd; j < length; j++) {
51 output[j] = inputLeft[j] + inputRight[j];
52 }
53 }
54 }
55 #else
56 for (size_t i = 0; i < length; i++) {
57 output[i] = inputLeft[i] + inputRight[i];
58 }
59 #endif
60 }
61
SimdPointByPointSub(size_t length,const float * inputLeft,const float * inputRight,float * output)62 void SimdPointByPointSub(size_t length, const float* inputLeft, const float* inputRight, float* output)
63 {
64 CHECK_AND_RETURN_LOG(inputLeft, "inputLeft is nullptr");
65 CHECK_AND_RETURN_LOG(inputRight, "inputRight is nullptr");
66 CHECK_AND_RETURN_LOG(output, "output is nullptr");
67 #if USE_ARM_NEON == 1
68 if (length < ALIGIN_FLOAT_SIZE) {
69 for (size_t i = 0; i < length; i++) {
70 output[i] = inputLeft[i] - inputRight[i];
71 }
72 } else {
73 size_t procLen = length >> 2;
74 float32x4_t left32x4;
75 float32x4_t right32x4;
76 float32x4_t out32x4;
77 for (size_t i = 0; i < procLen; i++) {
78 left32x4 = vld1q_f32(inputLeft + i * ALIGIN_FLOAT_SIZE);
79 right32x4 = vld1q_f32(inputRight + i * ALIGIN_FLOAT_SIZE);
80 out32x4 = vsubq_f32(left32x4, right32x4);
81 vst1q_f32(output + i * ALIGIN_FLOAT_SIZE, out32x4);
82 }
83 size_t odd = length - procLen * ALIGIN_FLOAT_SIZE;
84 if (odd) {
85 for (size_t j = length - odd; j < length; j++) {
86 output[j] = inputLeft[j] - inputRight[j];
87 }
88 }
89 }
90 #else
91 for (size_t i = 0; i < length; i++) {
92 output[i] = inputLeft[i] - inputRight[i];
93 }
94 #endif
95 }
96
SimdPointByPointMul(size_t length,const float * inputLeft,const float * inputRight,float * output)97 void SimdPointByPointMul(size_t length, const float* inputLeft, const float* inputRight, float* output)
98 {
99 CHECK_AND_RETURN_LOG(inputLeft, "inputLeft is nullptr");
100 CHECK_AND_RETURN_LOG(inputRight, "inputRight is nullptr");
101 CHECK_AND_RETURN_LOG(output, "output is nullptr");
102 #if USE_ARM_NEON == 1
103 if (length < ALIGIN_FLOAT_SIZE) {
104 for (size_t i = 0; i < length; i++) {
105 output[i] = inputLeft[i] * inputRight[i];
106 }
107 } else {
108 size_t procLen = length >> 2;
109 float32x4_t left32x4;
110 float32x4_t right32x4;
111 float32x4_t out32x4;
112 for (size_t i = 0; i < procLen; i++) {
113 left32x4 = vld1q_f32(inputLeft + i * ALIGIN_FLOAT_SIZE);
114 right32x4 = vld1q_f32(inputRight + i * ALIGIN_FLOAT_SIZE);
115 out32x4 = vmulq_f32(left32x4, right32x4);
116 vst1q_f32(output + i * ALIGIN_FLOAT_SIZE, out32x4);
117 }
118 size_t odd = length - procLen * ALIGIN_FLOAT_SIZE;
119 if (odd) {
120 for (size_t j = length - odd; j < length; j++) {
121 output[j] = inputLeft[j] * inputRight[j];
122 }
123 }
124 }
125 #else
126 for (size_t i = 0; i < length; i++) {
127 output[i] = inputLeft[i] * inputRight[i];
128 }
129 #endif
130 }
131
132 } // namespace HPAE
133 } // namespace AudioStandard
134 } // namespace OHOS
135