1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12 /*
13 * This file contains the function WebRtcSpl_CrossCorrelation().
14 * The description header can be found in signal_processing_library.h
15 *
16 */
17
18 #include "signal_processing_library.h"
19
WebRtcSpl_CrossCorrelation(WebRtc_Word32 * cross_correlation,WebRtc_Word16 * seq1,WebRtc_Word16 * seq2,WebRtc_Word16 dim_seq,WebRtc_Word16 dim_cross_correlation,WebRtc_Word16 right_shifts,WebRtc_Word16 step_seq2)20 void WebRtcSpl_CrossCorrelation(WebRtc_Word32* cross_correlation, WebRtc_Word16* seq1,
21 WebRtc_Word16* seq2, WebRtc_Word16 dim_seq,
22 WebRtc_Word16 dim_cross_correlation,
23 WebRtc_Word16 right_shifts,
24 WebRtc_Word16 step_seq2)
25 {
26 int i, j;
27 WebRtc_Word16* seq1Ptr;
28 WebRtc_Word16* seq2Ptr;
29 WebRtc_Word32* CrossCorrPtr;
30
31 #ifdef _XSCALE_OPT_
32
33 #ifdef _WIN32
34 #pragma message("NOTE: _XSCALE_OPT_ optimizations are used (overrides _ARM_OPT_ and requires /QRxscale compiler flag)")
35 #endif
36
37 __int64 macc40;
38
39 int iseq1[250];
40 int iseq2[250];
41 int iseq3[250];
42 int * iseq1Ptr;
43 int * iseq2Ptr;
44 int * iseq3Ptr;
45 int len, i_len;
46
47 seq1Ptr = seq1;
48 iseq1Ptr = iseq1;
49 for(i = 0; i < ((dim_seq + 1) >> 1); i++)
50 {
51 *iseq1Ptr = (unsigned short)*seq1Ptr++;
52 *iseq1Ptr++ |= (WebRtc_Word32)*seq1Ptr++ << 16;
53
54 }
55
56 if(dim_seq%2)
57 {
58 *(iseq1Ptr-1) &= 0x0000ffff;
59 }
60 *iseq1Ptr = 0;
61 iseq1Ptr++;
62 *iseq1Ptr = 0;
63 iseq1Ptr++;
64 *iseq1Ptr = 0;
65
66 if(step_seq2 < 0)
67 {
68 seq2Ptr = seq2 - dim_cross_correlation + 1;
69 CrossCorrPtr = &cross_correlation[dim_cross_correlation - 1];
70 }
71 else
72 {
73 seq2Ptr = seq2;
74 CrossCorrPtr = cross_correlation;
75 }
76
77 len = dim_seq + dim_cross_correlation - 1;
78 i_len = (len + 1) >> 1;
79 iseq2Ptr = iseq2;
80
81 iseq3Ptr = iseq3;
82 for(i = 0; i < i_len; i++)
83 {
84 *iseq2Ptr = (unsigned short)*seq2Ptr++;
85 *iseq3Ptr = (unsigned short)*seq2Ptr;
86 *iseq2Ptr++ |= (WebRtc_Word32)*seq2Ptr++ << 16;
87 *iseq3Ptr++ |= (WebRtc_Word32)*seq2Ptr << 16;
88 }
89
90 if(len % 2)
91 {
92 iseq2[i_len - 1] &= 0x0000ffff;
93 iseq3[i_len - 1] = 0;
94 }
95 else
96 iseq3[i_len - 1] &= 0x0000ffff;
97
98 iseq2[i_len] = 0;
99 iseq3[i_len] = 0;
100 iseq2[i_len + 1] = 0;
101 iseq3[i_len + 1] = 0;
102 iseq2[i_len + 2] = 0;
103 iseq3[i_len + 2] = 0;
104
105 // Set pointer to start value
106 iseq2Ptr = iseq2;
107 iseq3Ptr = iseq3;
108
109 i_len = (dim_seq + 7) >> 3;
110 for (i = 0; i < dim_cross_correlation; i++)
111 {
112
113 iseq1Ptr = iseq1;
114
115 macc40 = 0;
116
117 _WriteCoProcessor(macc40, 0);
118
119 if((i & 1))
120 {
121 iseq3Ptr = iseq3 + (i >> 1);
122 for (j = i_len; j > 0; j--)
123 {
124 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
125 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
126 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
127 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
128 }
129 }
130 else
131 {
132 iseq2Ptr = iseq2 + (i >> 1);
133 for (j = i_len; j > 0; j--)
134 {
135 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
136 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
137 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
138 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
139 }
140
141 }
142
143 macc40 = _ReadCoProcessor(0);
144 *CrossCorrPtr = (WebRtc_Word32)(macc40 >> right_shifts);
145 CrossCorrPtr += step_seq2;
146 }
147 #else // #ifdef _XSCALE_OPT_
148 #ifdef _ARM_OPT_
149 WebRtc_Word16 dim_seq8 = (dim_seq >> 3) << 3;
150 #endif
151
152 CrossCorrPtr = cross_correlation;
153
154 for (i = 0; i < dim_cross_correlation; i++)
155 {
156 // Set the pointer to the static vector, set the pointer to the sliding vector
157 // and initialize cross_correlation
158 seq1Ptr = seq1;
159 seq2Ptr = seq2 + (step_seq2 * i);
160 (*CrossCorrPtr) = 0;
161
162 #ifndef _ARM_OPT_
163 #ifdef _WIN32
164 #pragma message("NOTE: default implementation is used")
165 #endif
166 // Perform the cross correlation
167 for (j = 0; j < dim_seq; j++)
168 {
169 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr), right_shifts);
170 seq1Ptr++;
171 seq2Ptr++;
172 }
173 #else
174 #ifdef _WIN32
175 #pragma message("NOTE: _ARM_OPT_ optimizations are used")
176 #endif
177 if (right_shifts == 0)
178 {
179 // Perform the optimized cross correlation
180 for (j = 0; j < dim_seq8; j = j + 8)
181 {
182 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
183 seq1Ptr++;
184 seq2Ptr++;
185 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
186 seq1Ptr++;
187 seq2Ptr++;
188 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
189 seq1Ptr++;
190 seq2Ptr++;
191 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
192 seq1Ptr++;
193 seq2Ptr++;
194 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
195 seq1Ptr++;
196 seq2Ptr++;
197 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
198 seq1Ptr++;
199 seq2Ptr++;
200 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
201 seq1Ptr++;
202 seq2Ptr++;
203 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
204 seq1Ptr++;
205 seq2Ptr++;
206 }
207
208 for (j = dim_seq8; j < dim_seq; j++)
209 {
210 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
211 seq1Ptr++;
212 seq2Ptr++;
213 }
214 }
215 else // right_shifts != 0
216
217 {
218 // Perform the optimized cross correlation
219 for (j = 0; j < dim_seq8; j = j + 8)
220 {
221 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
222 right_shifts);
223 seq1Ptr++;
224 seq2Ptr++;
225 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
226 right_shifts);
227 seq1Ptr++;
228 seq2Ptr++;
229 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
230 right_shifts);
231 seq1Ptr++;
232 seq2Ptr++;
233 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
234 right_shifts);
235 seq1Ptr++;
236 seq2Ptr++;
237 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
238 right_shifts);
239 seq1Ptr++;
240 seq2Ptr++;
241 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
242 right_shifts);
243 seq1Ptr++;
244 seq2Ptr++;
245 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
246 right_shifts);
247 seq1Ptr++;
248 seq2Ptr++;
249 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
250 right_shifts);
251 seq1Ptr++;
252 seq2Ptr++;
253 }
254
255 for (j = dim_seq8; j < dim_seq; j++)
256 {
257 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
258 right_shifts);
259 seq1Ptr++;
260 seq2Ptr++;
261 }
262 }
263 #endif
264 CrossCorrPtr++;
265 }
266 #endif
267 }
268