• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 /*
13  * This file contains the function WebRtcSpl_CrossCorrelation().
14  * The description header can be found in signal_processing_library.h
15  *
16  */
17 
18 #include "signal_processing_library.h"
19 
WebRtcSpl_CrossCorrelation(WebRtc_Word32 * cross_correlation,WebRtc_Word16 * seq1,WebRtc_Word16 * seq2,WebRtc_Word16 dim_seq,WebRtc_Word16 dim_cross_correlation,WebRtc_Word16 right_shifts,WebRtc_Word16 step_seq2)20 void WebRtcSpl_CrossCorrelation(WebRtc_Word32* cross_correlation, WebRtc_Word16* seq1,
21                                 WebRtc_Word16* seq2, WebRtc_Word16 dim_seq,
22                                 WebRtc_Word16 dim_cross_correlation,
23                                 WebRtc_Word16 right_shifts,
24                                 WebRtc_Word16 step_seq2)
25 {
26     int i, j;
27     WebRtc_Word16* seq1Ptr;
28     WebRtc_Word16* seq2Ptr;
29     WebRtc_Word32* CrossCorrPtr;
30 
31 #ifdef _XSCALE_OPT_
32 
33 #ifdef _WIN32
34 #pragma message("NOTE: _XSCALE_OPT_ optimizations are used (overrides _ARM_OPT_ and requires /QRxscale compiler flag)")
35 #endif
36 
37     __int64 macc40;
38 
39     int iseq1[250];
40     int iseq2[250];
41     int iseq3[250];
42     int * iseq1Ptr;
43     int * iseq2Ptr;
44     int * iseq3Ptr;
45     int len, i_len;
46 
47     seq1Ptr = seq1;
48     iseq1Ptr = iseq1;
49     for(i = 0; i < ((dim_seq + 1) >> 1); i++)
50     {
51         *iseq1Ptr = (unsigned short)*seq1Ptr++;
52         *iseq1Ptr++ |= (WebRtc_Word32)*seq1Ptr++ << 16;
53 
54     }
55 
56     if(dim_seq%2)
57     {
58         *(iseq1Ptr-1) &= 0x0000ffff;
59     }
60     *iseq1Ptr = 0;
61     iseq1Ptr++;
62     *iseq1Ptr = 0;
63     iseq1Ptr++;
64     *iseq1Ptr = 0;
65 
66     if(step_seq2 < 0)
67     {
68         seq2Ptr = seq2 - dim_cross_correlation + 1;
69         CrossCorrPtr = &cross_correlation[dim_cross_correlation - 1];
70     }
71     else
72     {
73         seq2Ptr = seq2;
74         CrossCorrPtr = cross_correlation;
75     }
76 
77     len = dim_seq + dim_cross_correlation - 1;
78     i_len = (len + 1) >> 1;
79     iseq2Ptr = iseq2;
80 
81     iseq3Ptr = iseq3;
82     for(i = 0; i < i_len; i++)
83     {
84         *iseq2Ptr = (unsigned short)*seq2Ptr++;
85         *iseq3Ptr = (unsigned short)*seq2Ptr;
86         *iseq2Ptr++ |= (WebRtc_Word32)*seq2Ptr++ << 16;
87         *iseq3Ptr++ |= (WebRtc_Word32)*seq2Ptr << 16;
88     }
89 
90     if(len % 2)
91     {
92         iseq2[i_len - 1] &= 0x0000ffff;
93         iseq3[i_len - 1] = 0;
94     }
95     else
96     iseq3[i_len - 1] &= 0x0000ffff;
97 
98     iseq2[i_len] = 0;
99     iseq3[i_len] = 0;
100     iseq2[i_len + 1] = 0;
101     iseq3[i_len + 1] = 0;
102     iseq2[i_len + 2] = 0;
103     iseq3[i_len + 2] = 0;
104 
105     // Set pointer to start value
106     iseq2Ptr = iseq2;
107     iseq3Ptr = iseq3;
108 
109     i_len = (dim_seq + 7) >> 3;
110     for (i = 0; i < dim_cross_correlation; i++)
111     {
112 
113         iseq1Ptr = iseq1;
114 
115         macc40 = 0;
116 
117         _WriteCoProcessor(macc40, 0);
118 
119         if((i & 1))
120         {
121             iseq3Ptr = iseq3 + (i >> 1);
122             for (j = i_len; j > 0; j--)
123             {
124                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
125                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
126                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
127                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++);
128             }
129         }
130         else
131         {
132             iseq2Ptr = iseq2 + (i >> 1);
133             for (j = i_len; j > 0; j--)
134             {
135                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
136                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
137                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
138                 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++);
139             }
140 
141         }
142 
143         macc40 = _ReadCoProcessor(0);
144         *CrossCorrPtr = (WebRtc_Word32)(macc40 >> right_shifts);
145         CrossCorrPtr += step_seq2;
146     }
147 #else // #ifdef _XSCALE_OPT_
148 #ifdef _ARM_OPT_
149     WebRtc_Word16 dim_seq8 = (dim_seq >> 3) << 3;
150 #endif
151 
152     CrossCorrPtr = cross_correlation;
153 
154     for (i = 0; i < dim_cross_correlation; i++)
155     {
156         // Set the pointer to the static vector, set the pointer to the sliding vector
157         // and initialize cross_correlation
158         seq1Ptr = seq1;
159         seq2Ptr = seq2 + (step_seq2 * i);
160         (*CrossCorrPtr) = 0;
161 
162 #ifndef _ARM_OPT_
163 #ifdef _WIN32
164 #pragma message("NOTE: default implementation is used")
165 #endif
166         // Perform the cross correlation
167         for (j = 0; j < dim_seq; j++)
168         {
169             (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr), right_shifts);
170             seq1Ptr++;
171             seq2Ptr++;
172         }
173 #else
174 #ifdef _WIN32
175 #pragma message("NOTE: _ARM_OPT_ optimizations are used")
176 #endif
177         if (right_shifts == 0)
178         {
179             // Perform the optimized cross correlation
180             for (j = 0; j < dim_seq8; j = j + 8)
181             {
182                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
183                 seq1Ptr++;
184                 seq2Ptr++;
185                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
186                 seq1Ptr++;
187                 seq2Ptr++;
188                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
189                 seq1Ptr++;
190                 seq2Ptr++;
191                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
192                 seq1Ptr++;
193                 seq2Ptr++;
194                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
195                 seq1Ptr++;
196                 seq2Ptr++;
197                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
198                 seq1Ptr++;
199                 seq2Ptr++;
200                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
201                 seq1Ptr++;
202                 seq2Ptr++;
203                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
204                 seq1Ptr++;
205                 seq2Ptr++;
206             }
207 
208             for (j = dim_seq8; j < dim_seq; j++)
209             {
210                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr));
211                 seq1Ptr++;
212                 seq2Ptr++;
213             }
214         }
215         else // right_shifts != 0
216 
217         {
218             // Perform the optimized cross correlation
219             for (j = 0; j < dim_seq8; j = j + 8)
220             {
221                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
222                                                              right_shifts);
223                 seq1Ptr++;
224                 seq2Ptr++;
225                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
226                                                              right_shifts);
227                 seq1Ptr++;
228                 seq2Ptr++;
229                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
230                                                              right_shifts);
231                 seq1Ptr++;
232                 seq2Ptr++;
233                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
234                                                              right_shifts);
235                 seq1Ptr++;
236                 seq2Ptr++;
237                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
238                                                              right_shifts);
239                 seq1Ptr++;
240                 seq2Ptr++;
241                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
242                                                              right_shifts);
243                 seq1Ptr++;
244                 seq2Ptr++;
245                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
246                                                              right_shifts);
247                 seq1Ptr++;
248                 seq2Ptr++;
249                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
250                                                              right_shifts);
251                 seq1Ptr++;
252                 seq2Ptr++;
253             }
254 
255             for (j = dim_seq8; j < dim_seq; j++)
256             {
257                 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr),
258                                                              right_shifts);
259                 seq1Ptr++;
260                 seq2Ptr++;
261             }
262         }
263 #endif
264         CrossCorrPtr++;
265     }
266 #endif
267 }
268