1 /*
2 * Copyright (c) 2012
3 * MIPS Technologies, Inc., California.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * Authors: Djordje Pesut (djordje@mips.com)
30 * Mirjana Vulin (mvulin@mips.com)
31 *
32 * This file is part of FFmpeg.
33 *
34 * FFmpeg is free software; you can redistribute it and/or
35 * modify it under the terms of the GNU Lesser General Public
36 * License as published by the Free Software Foundation; either
37 * version 2.1 of the License, or (at your option) any later version.
38 *
39 * FFmpeg is distributed in the hope that it will be useful,
40 * but WITHOUT ANY WARRANTY; without even the implied warranty of
41 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
42 * Lesser General Public License for more details.
43 *
44 * You should have received a copy of the GNU Lesser General Public
45 * License along with FFmpeg; if not, write to the Free Software
46 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
47 */
48
49 /**
50 * @file
51 * Reference: libavcodec/aacsbr.c
52 */
53
54 #include "libavcodec/aac.h"
55 #include "libavcodec/aacsbr.h"
56 #include "libavutil/mips/asmdefs.h"
57
58 #define ENVELOPE_ADJUSTMENT_OFFSET 2
59
60 #if HAVE_INLINE_ASM
61 #if HAVE_MIPSFPU
sbr_lf_gen_mips(AACContext * ac,SpectralBandReplication * sbr,float X_low[32][40][2],const float W[2][32][32][2],int buf_idx)62 static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
63 float X_low[32][40][2], const float W[2][32][32][2],
64 int buf_idx)
65 {
66 int i, k;
67 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
68 float *p_x_low = &X_low[0][8][0];
69 float *p_w = (float*)&W[buf_idx][0][0][0];
70 float *p_x1_low = &X_low[0][0][0];
71 float *p_w1 = (float*)&W[1-buf_idx][24][0][0];
72
73 float *loop_end=p_x1_low + 2560;
74
75 /* loop unrolled 8 times */
76 __asm__ volatile (
77 "1: \n\t"
78 "sw $0, 0(%[p_x1_low]) \n\t"
79 "sw $0, 4(%[p_x1_low]) \n\t"
80 "sw $0, 8(%[p_x1_low]) \n\t"
81 "sw $0, 12(%[p_x1_low]) \n\t"
82 "sw $0, 16(%[p_x1_low]) \n\t"
83 "sw $0, 20(%[p_x1_low]) \n\t"
84 "sw $0, 24(%[p_x1_low]) \n\t"
85 "sw $0, 28(%[p_x1_low]) \n\t"
86 PTR_ADDIU "%[p_x1_low],%[p_x1_low], 32 \n\t"
87 "bne %[p_x1_low], %[loop_end], 1b \n\t"
88 PTR_ADDIU "%[p_x1_low],%[p_x1_low], -10240 \n\t"
89
90 : [p_x1_low]"+r"(p_x1_low)
91 : [loop_end]"r"(loop_end)
92 : "memory"
93 );
94
95 for (k = 0; k < sbr->kx[1]; k++) {
96 for (i = 0; i < 32; i+=4) {
97 /* loop unrolled 4 times */
98 __asm__ volatile (
99 "lw %[temp0], 0(%[p_w]) \n\t"
100 "lw %[temp1], 4(%[p_w]) \n\t"
101 "lw %[temp2], 256(%[p_w]) \n\t"
102 "lw %[temp3], 260(%[p_w]) \n\t"
103 "lw %[temp4], 512(%[p_w]) \n\t"
104 "lw %[temp5], 516(%[p_w]) \n\t"
105 "lw %[temp6], 768(%[p_w]) \n\t"
106 "lw %[temp7], 772(%[p_w]) \n\t"
107 "sw %[temp0], 0(%[p_x_low]) \n\t"
108 "sw %[temp1], 4(%[p_x_low]) \n\t"
109 "sw %[temp2], 8(%[p_x_low]) \n\t"
110 "sw %[temp3], 12(%[p_x_low]) \n\t"
111 "sw %[temp4], 16(%[p_x_low]) \n\t"
112 "sw %[temp5], 20(%[p_x_low]) \n\t"
113 "sw %[temp6], 24(%[p_x_low]) \n\t"
114 "sw %[temp7], 28(%[p_x_low]) \n\t"
115 PTR_ADDIU "%[p_x_low], %[p_x_low], 32 \n\t"
116 PTR_ADDIU "%[p_w], %[p_w], 1024 \n\t"
117
118 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
119 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
120 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
121 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
122 [p_w]"+r"(p_w), [p_x_low]"+r"(p_x_low)
123 :
124 : "memory"
125 );
126 }
127 p_x_low += 16;
128 p_w -= 2046;
129 }
130
131 for (k = 0; k < sbr->kx[0]; k++) {
132 for (i = 0; i < 2; i++) {
133
134 /* loop unrolled 4 times */
135 __asm__ volatile (
136 "lw %[temp0], 0(%[p_w1]) \n\t"
137 "lw %[temp1], 4(%[p_w1]) \n\t"
138 "lw %[temp2], 256(%[p_w1]) \n\t"
139 "lw %[temp3], 260(%[p_w1]) \n\t"
140 "lw %[temp4], 512(%[p_w1]) \n\t"
141 "lw %[temp5], 516(%[p_w1]) \n\t"
142 "lw %[temp6], 768(%[p_w1]) \n\t"
143 "lw %[temp7], 772(%[p_w1]) \n\t"
144 "sw %[temp0], 0(%[p_x1_low]) \n\t"
145 "sw %[temp1], 4(%[p_x1_low]) \n\t"
146 "sw %[temp2], 8(%[p_x1_low]) \n\t"
147 "sw %[temp3], 12(%[p_x1_low]) \n\t"
148 "sw %[temp4], 16(%[p_x1_low]) \n\t"
149 "sw %[temp5], 20(%[p_x1_low]) \n\t"
150 "sw %[temp6], 24(%[p_x1_low]) \n\t"
151 "sw %[temp7], 28(%[p_x1_low]) \n\t"
152 PTR_ADDIU "%[p_x1_low], %[p_x1_low], 32 \n\t"
153 PTR_ADDIU "%[p_w1], %[p_w1], 1024 \n\t"
154
155 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
156 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
157 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
158 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
159 [p_w1]"+r"(p_w1), [p_x1_low]"+r"(p_x1_low)
160 :
161 : "memory"
162 );
163 }
164 p_x1_low += 64;
165 p_w1 -= 510;
166 }
167 return 0;
168 }
169
sbr_x_gen_mips(SpectralBandReplication * sbr,float X[2][38][64],const float Y0[38][64][2],const float Y1[38][64][2],const float X_low[32][40][2],int ch)170 static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
171 const float Y0[38][64][2], const float Y1[38][64][2],
172 const float X_low[32][40][2], int ch)
173 {
174 int k, i;
175 const int i_f = 32;
176 int temp0, temp1, temp2, temp3;
177 const float *X_low1, *Y01, *Y11;
178 float *x1=&X[0][0][0];
179 float *j=x1+4864;
180 const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0);
181
182 /* loop unrolled 8 times */
183 __asm__ volatile (
184 "1: \n\t"
185 "sw $0, 0(%[x1]) \n\t"
186 "sw $0, 4(%[x1]) \n\t"
187 "sw $0, 8(%[x1]) \n\t"
188 "sw $0, 12(%[x1]) \n\t"
189 "sw $0, 16(%[x1]) \n\t"
190 "sw $0, 20(%[x1]) \n\t"
191 "sw $0, 24(%[x1]) \n\t"
192 "sw $0, 28(%[x1]) \n\t"
193 PTR_ADDIU "%[x1],%[x1], 32 \n\t"
194 "bne %[x1], %[j], 1b \n\t"
195 PTR_ADDIU "%[x1],%[x1], -19456 \n\t"
196
197 : [x1]"+r"(x1)
198 : [j]"r"(j)
199 : "memory"
200 );
201
202 if (i_Temp != 0) {
203
204 X_low1=&X_low[0][2][0];
205
206 for (k = 0; k < sbr->kx[0]; k++) {
207
208 __asm__ volatile (
209 "move %[i], $zero \n\t"
210 "2: \n\t"
211 "lw %[temp0], 0(%[X_low1]) \n\t"
212 "lw %[temp1], 4(%[X_low1]) \n\t"
213 "sw %[temp0], 0(%[x1]) \n\t"
214 "sw %[temp1], 9728(%[x1]) \n\t"
215 PTR_ADDIU "%[x1], %[x1], 256 \n\t"
216 PTR_ADDIU "%[X_low1], %[X_low1], 8 \n\t"
217 "addiu %[i], %[i], 1 \n\t"
218 "bne %[i], %[i_Temp], 2b \n\t"
219
220 : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
221 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
222 : [i_Temp]"r"(i_Temp)
223 : "memory"
224 );
225 x1-=(i_Temp<<6)-1;
226 X_low1-=(i_Temp<<1)-80;
227 }
228
229 x1=&X[0][0][k];
230 Y01=(float*)&Y0[32][k][0];
231
232 for (; k < sbr->kx[0] + sbr->m[0]; k++) {
233 __asm__ volatile (
234 "move %[i], $zero \n\t"
235 "3: \n\t"
236 "lw %[temp0], 0(%[Y01]) \n\t"
237 "lw %[temp1], 4(%[Y01]) \n\t"
238 "sw %[temp0], 0(%[x1]) \n\t"
239 "sw %[temp1], 9728(%[x1]) \n\t"
240 PTR_ADDIU "%[x1], %[x1], 256 \n\t"
241 PTR_ADDIU "%[Y01], %[Y01], 512 \n\t"
242 "addiu %[i], %[i], 1 \n\t"
243 "bne %[i], %[i_Temp], 3b \n\t"
244
245 : [x1]"+r"(x1), [Y01]"+r"(Y01), [i]"=&r"(i),
246 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
247 : [i_Temp]"r"(i_Temp)
248 : "memory"
249 );
250 x1 -=(i_Temp<<6)-1;
251 Y01 -=(i_Temp<<7)-2;
252 }
253 }
254
255 x1=&X[0][i_Temp][0];
256 X_low1=&X_low[0][i_Temp+2][0];
257 temp3=38;
258
259 for (k = 0; k < sbr->kx[1]; k++) {
260
261 __asm__ volatile (
262 "move %[i], %[i_Temp] \n\t"
263 "4: \n\t"
264 "lw %[temp0], 0(%[X_low1]) \n\t"
265 "lw %[temp1], 4(%[X_low1]) \n\t"
266 "sw %[temp0], 0(%[x1]) \n\t"
267 "sw %[temp1], 9728(%[x1]) \n\t"
268 PTR_ADDIU "%[x1], %[x1], 256 \n\t"
269 PTR_ADDIU "%[X_low1],%[X_low1], 8 \n\t"
270 "addiu %[i], %[i], 1 \n\t"
271 "bne %[i], %[temp3], 4b \n\t"
272
273 : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
274 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
275 [temp2]"=&r"(temp2)
276 : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3)
277 : "memory"
278 );
279 x1 -= ((38-i_Temp)<<6)-1;
280 X_low1 -= ((38-i_Temp)<<1)- 80;
281 }
282
283 x1=&X[0][i_Temp][k];
284 Y11=&Y1[i_Temp][k][0];
285 temp2=32;
286
287 for (; k < sbr->kx[1] + sbr->m[1]; k++) {
288
289 __asm__ volatile (
290 "move %[i], %[i_Temp] \n\t"
291 "5: \n\t"
292 "lw %[temp0], 0(%[Y11]) \n\t"
293 "lw %[temp1], 4(%[Y11]) \n\t"
294 "sw %[temp0], 0(%[x1]) \n\t"
295 "sw %[temp1], 9728(%[x1]) \n\t"
296 PTR_ADDIU "%[x1], %[x1], 256 \n\t"
297 PTR_ADDIU "%[Y11], %[Y11], 512 \n\t"
298 "addiu %[i], %[i], 1 \n\t"
299 "bne %[i], %[temp2], 5b \n\t"
300
301 : [x1]"+r"(x1), [Y11]"+r"(Y11), [i]"=&r"(i),
302 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
303 : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3),
304 [temp2]"r"(temp2)
305 : "memory"
306 );
307
308 x1 -= ((32-i_Temp)<<6)-1;
309 Y11 -= ((32-i_Temp)<<7)-2;
310 }
311 return 0;
312 }
313
314 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
sbr_hf_assemble_mips(float Y1[38][64][2],const float X_high[64][40][2],SpectralBandReplication * sbr,SBRData * ch_data,const int e_a[2])315 static void sbr_hf_assemble_mips(float Y1[38][64][2],
316 const float X_high[64][40][2],
317 SpectralBandReplication *sbr, SBRData *ch_data,
318 const int e_a[2])
319 {
320 int e, i, j, m;
321 const int h_SL = 4 * !sbr->bs_smoothing_mode;
322 const int kx = sbr->kx[1];
323 const int m_max = sbr->m[1];
324 static const float h_smooth[5] = {
325 0.33333333333333,
326 0.30150283239582,
327 0.21816949906249,
328 0.11516383427084,
329 0.03183050093751,
330 };
331
332 float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
333 int indexnoise = ch_data->f_indexnoise;
334 int indexsine = ch_data->f_indexsine;
335 float *g_temp1, *q_temp1, *pok, *pok1;
336 float temp1, temp2, temp3, temp4;
337 int size = m_max;
338
339 if (sbr->reset) {
340 for (i = 0; i < h_SL; i++) {
341 memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
342 memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0], m_max * sizeof(sbr->q_m[0][0]));
343 }
344 } else if (h_SL) {
345 memcpy(g_temp[2*ch_data->t_env[0]], g_temp[2*ch_data->t_env_num_env_old], 4*sizeof(g_temp[0]));
346 memcpy(q_temp[2*ch_data->t_env[0]], q_temp[2*ch_data->t_env_num_env_old], 4*sizeof(q_temp[0]));
347 }
348
349 for (e = 0; e < ch_data->bs_num_env; e++) {
350 for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
351 g_temp1 = g_temp[h_SL + i];
352 pok = sbr->gain[e];
353 q_temp1 = q_temp[h_SL + i];
354 pok1 = sbr->q_m[e];
355
356 /* loop unrolled 4 times */
357 for (j=0; j<(size>>2); j++) {
358 __asm__ volatile (
359 "lw %[temp1], 0(%[pok]) \n\t"
360 "lw %[temp2], 4(%[pok]) \n\t"
361 "lw %[temp3], 8(%[pok]) \n\t"
362 "lw %[temp4], 12(%[pok]) \n\t"
363 "sw %[temp1], 0(%[g_temp1]) \n\t"
364 "sw %[temp2], 4(%[g_temp1]) \n\t"
365 "sw %[temp3], 8(%[g_temp1]) \n\t"
366 "sw %[temp4], 12(%[g_temp1]) \n\t"
367 "lw %[temp1], 0(%[pok1]) \n\t"
368 "lw %[temp2], 4(%[pok1]) \n\t"
369 "lw %[temp3], 8(%[pok1]) \n\t"
370 "lw %[temp4], 12(%[pok1]) \n\t"
371 "sw %[temp1], 0(%[q_temp1]) \n\t"
372 "sw %[temp2], 4(%[q_temp1]) \n\t"
373 "sw %[temp3], 8(%[q_temp1]) \n\t"
374 "sw %[temp4], 12(%[q_temp1]) \n\t"
375 PTR_ADDIU "%[pok], %[pok], 16 \n\t"
376 PTR_ADDIU "%[g_temp1], %[g_temp1], 16 \n\t"
377 PTR_ADDIU "%[pok1], %[pok1], 16 \n\t"
378 PTR_ADDIU "%[q_temp1], %[q_temp1], 16 \n\t"
379
380 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
381 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
382 [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
383 [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
384 :
385 : "memory"
386 );
387 }
388
389 for (j=0; j<(size&3); j++) {
390 __asm__ volatile (
391 "lw %[temp1], 0(%[pok]) \n\t"
392 "lw %[temp2], 0(%[pok1]) \n\t"
393 "sw %[temp1], 0(%[g_temp1]) \n\t"
394 "sw %[temp2], 0(%[q_temp1]) \n\t"
395 PTR_ADDIU "%[pok], %[pok], 4 \n\t"
396 PTR_ADDIU "%[g_temp1], %[g_temp1], 4 \n\t"
397 PTR_ADDIU "%[pok1], %[pok1], 4 \n\t"
398 PTR_ADDIU "%[q_temp1], %[q_temp1], 4 \n\t"
399
400 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
401 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
402 [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
403 [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
404 :
405 : "memory"
406 );
407 }
408 }
409 }
410
411 for (e = 0; e < ch_data->bs_num_env; e++) {
412 for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
413 LOCAL_ALIGNED_16(float, g_filt_tab, [48]);
414 LOCAL_ALIGNED_16(float, q_filt_tab, [48]);
415 float *g_filt, *q_filt;
416
417 if (h_SL && e != e_a[0] && e != e_a[1]) {
418 g_filt = g_filt_tab;
419 q_filt = q_filt_tab;
420
421 for (m = 0; m < m_max; m++) {
422 const int idx1 = i + h_SL;
423 g_filt[m] = 0.0f;
424 q_filt[m] = 0.0f;
425
426 for (j = 0; j <= h_SL; j++) {
427 g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j];
428 q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j];
429 }
430 }
431 } else {
432 g_filt = g_temp[i + h_SL];
433 q_filt = q_temp[i];
434 }
435
436 sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
437 i + ENVELOPE_ADJUSTMENT_OFFSET);
438
439 if (e != e_a[0] && e != e_a[1]) {
440 sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
441 q_filt, indexnoise,
442 kx, m_max);
443 } else {
444 int idx = indexsine&1;
445 int A = (1-((indexsine+(kx & 1))&2));
446 int B = (A^(-idx)) + idx;
447 float *out = &Y1[i][kx][idx];
448 float *in = sbr->s_m[e];
449 float temp0, temp1, temp2, temp3, temp4, temp5;
450 float A_f = (float)A;
451 float B_f = (float)B;
452
453 for (m = 0; m+1 < m_max; m+=2) {
454
455 temp2 = out[0];
456 temp3 = out[2];
457
458 __asm__ volatile(
459 "lwc1 %[temp0], 0(%[in]) \n\t"
460 "lwc1 %[temp1], 4(%[in]) \n\t"
461 "madd.s %[temp4], %[temp2], %[temp0], %[A_f] \n\t"
462 "madd.s %[temp5], %[temp3], %[temp1], %[B_f] \n\t"
463 "swc1 %[temp4], 0(%[out]) \n\t"
464 "swc1 %[temp5], 8(%[out]) \n\t"
465 PTR_ADDIU "%[in], %[in], 8 \n\t"
466 PTR_ADDIU "%[out], %[out], 16 \n\t"
467
468 : [temp0]"=&f" (temp0), [temp1]"=&f"(temp1),
469 [temp4]"=&f" (temp4), [temp5]"=&f"(temp5),
470 [in]"+r"(in), [out]"+r"(out)
471 : [A_f]"f"(A_f), [B_f]"f"(B_f), [temp2]"f"(temp2),
472 [temp3]"f"(temp3)
473 : "memory"
474 );
475 }
476 if(m_max&1)
477 out[2*m ] += in[m ] * A;
478 }
479 indexnoise = (indexnoise + m_max) & 0x1ff;
480 indexsine = (indexsine + 1) & 3;
481 }
482 }
483 ch_data->f_indexnoise = indexnoise;
484 ch_data->f_indexsine = indexsine;
485 }
486
sbr_hf_inverse_filter_mips(SBRDSPContext * dsp,float (* alpha0)[2],float (* alpha1)[2],const float X_low[32][40][2],int k0)487 static void sbr_hf_inverse_filter_mips(SBRDSPContext *dsp,
488 float (*alpha0)[2], float (*alpha1)[2],
489 const float X_low[32][40][2], int k0)
490 {
491 int k;
492 float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, c;
493 float *phi1, *alpha_1, *alpha_0, res1, res2, temp_real, temp_im;
494
495 c = 1.000001f;
496
497 for (k = 0; k < k0; k++) {
498 LOCAL_ALIGNED_16(float, phi, [3], [2][2]);
499 float dk;
500 phi1 = &phi[0][0][0];
501 alpha_1 = &alpha1[k][0];
502 alpha_0 = &alpha0[k][0];
503 dsp->autocorrelate(X_low[k], phi);
504
505 __asm__ volatile (
506 "lwc1 %[temp0], 40(%[phi1]) \n\t"
507 "lwc1 %[temp1], 16(%[phi1]) \n\t"
508 "lwc1 %[temp2], 24(%[phi1]) \n\t"
509 "lwc1 %[temp3], 28(%[phi1]) \n\t"
510 "mul.s %[dk], %[temp0], %[temp1] \n\t"
511 "lwc1 %[temp4], 0(%[phi1]) \n\t"
512 "mul.s %[res2], %[temp2], %[temp2] \n\t"
513 "lwc1 %[temp5], 4(%[phi1]) \n\t"
514 "madd.s %[res2], %[res2], %[temp3], %[temp3] \n\t"
515 "lwc1 %[temp6], 8(%[phi1]) \n\t"
516 "div.s %[res2], %[res2], %[c] \n\t"
517 "lwc1 %[temp0], 12(%[phi1]) \n\t"
518 "sub.s %[dk], %[dk], %[res2] \n\t"
519
520 : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
521 [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
522 [temp6]"=&f"(temp6), [res2]"=&f"(res2), [dk]"=&f"(dk)
523 : [phi1]"r"(phi1), [c]"f"(c)
524 : "memory"
525 );
526
527 if (!dk) {
528 alpha_1[0] = 0;
529 alpha_1[1] = 0;
530 } else {
531 __asm__ volatile (
532 "mul.s %[temp_real], %[temp4], %[temp2] \n\t"
533 "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3] \n\t"
534 "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1] \n\t"
535 "mul.s %[temp_im], %[temp4], %[temp3] \n\t"
536 "madd.s %[temp_im], %[temp_im], %[temp5], %[temp2] \n\t"
537 "nmsub.s %[temp_im], %[temp_im], %[temp0], %[temp1] \n\t"
538 "div.s %[temp_real], %[temp_real], %[dk] \n\t"
539 "div.s %[temp_im], %[temp_im], %[dk] \n\t"
540 "swc1 %[temp_real], 0(%[alpha_1]) \n\t"
541 "swc1 %[temp_im], 4(%[alpha_1]) \n\t"
542
543 : [temp_real]"=&f" (temp_real), [temp_im]"=&f"(temp_im)
544 : [phi1]"r"(phi1), [temp0]"f"(temp0), [temp1]"f"(temp1),
545 [temp2]"f"(temp2), [temp3]"f"(temp3), [temp4]"f"(temp4),
546 [temp5]"f"(temp5), [temp6]"f"(temp6),
547 [alpha_1]"r"(alpha_1), [dk]"f"(dk)
548 : "memory"
549 );
550 }
551
552 if (!phi1[4]) {
553 alpha_0[0] = 0;
554 alpha_0[1] = 0;
555 } else {
556 __asm__ volatile (
557 "lwc1 %[temp6], 0(%[alpha_1]) \n\t"
558 "lwc1 %[temp7], 4(%[alpha_1]) \n\t"
559 "mul.s %[temp_real], %[temp6], %[temp2] \n\t"
560 "add.s %[temp_real], %[temp_real], %[temp4] \n\t"
561 "madd.s %[temp_real], %[temp_real], %[temp7], %[temp3] \n\t"
562 "mul.s %[temp_im], %[temp7], %[temp2] \n\t"
563 "add.s %[temp_im], %[temp_im], %[temp5] \n\t"
564 "nmsub.s %[temp_im], %[temp_im], %[temp6], %[temp3] \n\t"
565 "div.s %[temp_real], %[temp_real], %[temp1] \n\t"
566 "div.s %[temp_im], %[temp_im], %[temp1] \n\t"
567 "neg.s %[temp_real], %[temp_real] \n\t"
568 "neg.s %[temp_im], %[temp_im] \n\t"
569 "swc1 %[temp_real], 0(%[alpha_0]) \n\t"
570 "swc1 %[temp_im], 4(%[alpha_0]) \n\t"
571
572 : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
573 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
574 [res1]"=&f"(res1), [res2]"=&f"(res2)
575 : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0),
576 [temp0]"f"(temp0), [temp1]"f"(temp1), [temp2]"f"(temp2),
577 [temp3]"f"(temp3), [temp4]"f"(temp4), [temp5]"f"(temp5)
578 : "memory"
579 );
580 }
581
582 __asm__ volatile (
583 "lwc1 %[temp1], 0(%[alpha_1]) \n\t"
584 "lwc1 %[temp2], 4(%[alpha_1]) \n\t"
585 "lwc1 %[temp_real], 0(%[alpha_0]) \n\t"
586 "lwc1 %[temp_im], 4(%[alpha_0]) \n\t"
587 "mul.s %[res1], %[temp1], %[temp1] \n\t"
588 "madd.s %[res1], %[res1], %[temp2], %[temp2] \n\t"
589 "mul.s %[res2], %[temp_real], %[temp_real] \n\t"
590 "madd.s %[res2], %[res2], %[temp_im], %[temp_im] \n\t"
591
592 : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
593 [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
594 [res1]"=&f"(res1), [res2]"=&f"(res2)
595 : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0)
596 : "memory"
597 );
598
599 if (res1 >= 16.0f || res2 >= 16.0f) {
600 alpha_1[0] = 0;
601 alpha_1[1] = 0;
602 alpha_0[0] = 0;
603 alpha_0[1] = 0;
604 }
605 }
606 }
607 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
608 #endif /* HAVE_MIPSFPU */
609 #endif /* HAVE_INLINE_ASM */
610
ff_aacsbr_func_ptr_init_mips(AACSBRContext * c)611 void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c)
612 {
613 #if HAVE_INLINE_ASM
614 #if HAVE_MIPSFPU
615 c->sbr_lf_gen = sbr_lf_gen_mips;
616 c->sbr_x_gen = sbr_x_gen_mips;
617 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
618 c->sbr_hf_inverse_filter = sbr_hf_inverse_filter_mips;
619 c->sbr_hf_assemble = sbr_hf_assemble_mips;
620 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
621 #endif /* HAVE_MIPSFPU */
622 #endif /* HAVE_INLINE_ASM */
623 }
624