1 /*
2 * Copyright (c) 2012
3 * MIPS Technologies, Inc., California.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * Author: Stanislav Ocovaj (socovaj@mips.com)
30 * Szabolcs Pal (sabolc@mips.com)
31 *
32 * AAC coefficients encoder optimized for MIPS floating-point architecture
33 *
34 * This file is part of FFmpeg.
35 *
36 * FFmpeg is free software; you can redistribute it and/or
37 * modify it under the terms of the GNU Lesser General Public
38 * License as published by the Free Software Foundation; either
39 * version 2.1 of the License, or (at your option) any later version.
40 *
41 * FFmpeg is distributed in the hope that it will be useful,
42 * but WITHOUT ANY WARRANTY; without even the implied warranty of
43 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
44 * Lesser General Public License for more details.
45 *
46 * You should have received a copy of the GNU Lesser General Public
47 * License along with FFmpeg; if not, write to the Free Software
48 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
49 */
50
51 /**
52 * @file
53 * Reference: libavcodec/aaccoder.c
54 */
55
56 #include "libavutil/libm.h"
57
58 #include <float.h>
59 #include "libavutil/mathematics.h"
60 #include "libavcodec/avcodec.h"
61 #include "libavcodec/put_bits.h"
62 #include "libavcodec/aac.h"
63 #include "libavcodec/aacenc.h"
64 #include "libavcodec/aactab.h"
65 #include "libavcodec/aacenctab.h"
66 #include "libavcodec/aacenc_utils.h"
67
68 #if HAVE_INLINE_ASM
69 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
70 typedef struct BandCodingPath {
71 int prev_idx;
72 float cost;
73 int run;
74 } BandCodingPath;
75
76 static const uint8_t uquad_sign_bits[81] = {
77 0, 1, 1, 1, 2, 2, 1, 2, 2,
78 1, 2, 2, 2, 3, 3, 2, 3, 3,
79 1, 2, 2, 2, 3, 3, 2, 3, 3,
80 1, 2, 2, 2, 3, 3, 2, 3, 3,
81 2, 3, 3, 3, 4, 4, 3, 4, 4,
82 2, 3, 3, 3, 4, 4, 3, 4, 4,
83 1, 2, 2, 2, 3, 3, 2, 3, 3,
84 2, 3, 3, 3, 4, 4, 3, 4, 4,
85 2, 3, 3, 3, 4, 4, 3, 4, 4
86 };
87
88 static const uint8_t upair7_sign_bits[64] = {
89 0, 1, 1, 1, 1, 1, 1, 1,
90 1, 2, 2, 2, 2, 2, 2, 2,
91 1, 2, 2, 2, 2, 2, 2, 2,
92 1, 2, 2, 2, 2, 2, 2, 2,
93 1, 2, 2, 2, 2, 2, 2, 2,
94 1, 2, 2, 2, 2, 2, 2, 2,
95 1, 2, 2, 2, 2, 2, 2, 2,
96 1, 2, 2, 2, 2, 2, 2, 2,
97 };
98
99 static const uint8_t upair12_sign_bits[169] = {
100 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
103 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
104 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
112 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
113 };
114
115 static const uint8_t esc_sign_bits[289] = {
116 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
125 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
126 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
127 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
128 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
129 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
133 };
134
135 /**
136 * Functions developed from template function and optimized for quantizing and encoding band
137 */
quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,float * out,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy,const float ROUNDING)138 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
139 PutBitContext *pb, const float *in, float *out,
140 const float *scaled, int size, int scale_idx,
141 int cb, const float lambda, const float uplim,
142 int *bits, float *energy, const float ROUNDING)
143 {
144 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
145 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
146 int i;
147 int qc1, qc2, qc3, qc4;
148 float qenergy = 0.0f;
149
150 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
151 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
152 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
153
154 abs_pow34_v(s->scoefs, in, size);
155 scaled = s->scoefs;
156 for (i = 0; i < size; i += 4) {
157 int curidx;
158 int *in_int = (int *)&in[i];
159 int t0, t1, t2, t3, t4, t5, t6, t7;
160 const float *vec;
161
162 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
163 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
164 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
165 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
166
167 __asm__ volatile (
168 ".set push \n\t"
169 ".set noreorder \n\t"
170
171 "slt %[qc1], $zero, %[qc1] \n\t"
172 "slt %[qc2], $zero, %[qc2] \n\t"
173 "slt %[qc3], $zero, %[qc3] \n\t"
174 "slt %[qc4], $zero, %[qc4] \n\t"
175 "lw %[t0], 0(%[in_int]) \n\t"
176 "lw %[t1], 4(%[in_int]) \n\t"
177 "lw %[t2], 8(%[in_int]) \n\t"
178 "lw %[t3], 12(%[in_int]) \n\t"
179 "srl %[t0], %[t0], 31 \n\t"
180 "srl %[t1], %[t1], 31 \n\t"
181 "srl %[t2], %[t2], 31 \n\t"
182 "srl %[t3], %[t3], 31 \n\t"
183 "subu %[t4], $zero, %[qc1] \n\t"
184 "subu %[t5], $zero, %[qc2] \n\t"
185 "subu %[t6], $zero, %[qc3] \n\t"
186 "subu %[t7], $zero, %[qc4] \n\t"
187 "movn %[qc1], %[t4], %[t0] \n\t"
188 "movn %[qc2], %[t5], %[t1] \n\t"
189 "movn %[qc3], %[t6], %[t2] \n\t"
190 "movn %[qc4], %[t7], %[t3] \n\t"
191
192 ".set pop \n\t"
193
194 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
195 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
196 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
197 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
198 : [in_int]"r"(in_int)
199 : "memory"
200 );
201
202 curidx = qc1;
203 curidx *= 3;
204 curidx += qc2;
205 curidx *= 3;
206 curidx += qc3;
207 curidx *= 3;
208 curidx += qc4;
209 curidx += 40;
210
211 put_bits(pb, p_bits[curidx], p_codes[curidx]);
212
213 if (out || energy) {
214 float e1,e2,e3,e4;
215 vec = &p_vec[curidx*4];
216 e1 = vec[0] * IQ;
217 e2 = vec[1] * IQ;
218 e3 = vec[2] * IQ;
219 e4 = vec[3] * IQ;
220 if (out) {
221 out[i+0] = e1;
222 out[i+1] = e2;
223 out[i+2] = e3;
224 out[i+3] = e4;
225 }
226 if (energy)
227 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
228 }
229 }
230 if (energy)
231 *energy = qenergy;
232 }
233
quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,float * out,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy,const float ROUNDING)234 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
235 PutBitContext *pb, const float *in, float *out,
236 const float *scaled, int size, int scale_idx,
237 int cb, const float lambda, const float uplim,
238 int *bits, float *energy, const float ROUNDING)
239 {
240 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
241 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
242 int i;
243 int qc1, qc2, qc3, qc4;
244 float qenergy = 0.0f;
245
246 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
247 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
248 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
249
250 abs_pow34_v(s->scoefs, in, size);
251 scaled = s->scoefs;
252 for (i = 0; i < size; i += 4) {
253 int curidx, sign, count;
254 int *in_int = (int *)&in[i];
255 uint8_t v_bits;
256 unsigned int v_codes;
257 int t0, t1, t2, t3, t4;
258 const float *vec;
259
260 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
261 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
262 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
263 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
264
265 __asm__ volatile (
266 ".set push \n\t"
267 ".set noreorder \n\t"
268
269 "ori %[t4], $zero, 2 \n\t"
270 "ori %[sign], $zero, 0 \n\t"
271 "slt %[t0], %[t4], %[qc1] \n\t"
272 "slt %[t1], %[t4], %[qc2] \n\t"
273 "slt %[t2], %[t4], %[qc3] \n\t"
274 "slt %[t3], %[t4], %[qc4] \n\t"
275 "movn %[qc1], %[t4], %[t0] \n\t"
276 "movn %[qc2], %[t4], %[t1] \n\t"
277 "movn %[qc3], %[t4], %[t2] \n\t"
278 "movn %[qc4], %[t4], %[t3] \n\t"
279 "lw %[t0], 0(%[in_int]) \n\t"
280 "lw %[t1], 4(%[in_int]) \n\t"
281 "lw %[t2], 8(%[in_int]) \n\t"
282 "lw %[t3], 12(%[in_int]) \n\t"
283 "slt %[t0], %[t0], $zero \n\t"
284 "movn %[sign], %[t0], %[qc1] \n\t"
285 "slt %[t1], %[t1], $zero \n\t"
286 "slt %[t2], %[t2], $zero \n\t"
287 "slt %[t3], %[t3], $zero \n\t"
288 "sll %[t0], %[sign], 1 \n\t"
289 "or %[t0], %[t0], %[t1] \n\t"
290 "movn %[sign], %[t0], %[qc2] \n\t"
291 "slt %[t4], $zero, %[qc1] \n\t"
292 "slt %[t1], $zero, %[qc2] \n\t"
293 "slt %[count], $zero, %[qc3] \n\t"
294 "sll %[t0], %[sign], 1 \n\t"
295 "or %[t0], %[t0], %[t2] \n\t"
296 "movn %[sign], %[t0], %[qc3] \n\t"
297 "slt %[t2], $zero, %[qc4] \n\t"
298 "addu %[count], %[count], %[t4] \n\t"
299 "addu %[count], %[count], %[t1] \n\t"
300 "sll %[t0], %[sign], 1 \n\t"
301 "or %[t0], %[t0], %[t3] \n\t"
302 "movn %[sign], %[t0], %[qc4] \n\t"
303 "addu %[count], %[count], %[t2] \n\t"
304
305 ".set pop \n\t"
306
307 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
308 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
309 [sign]"=&r"(sign), [count]"=&r"(count),
310 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
311 [t4]"=&r"(t4)
312 : [in_int]"r"(in_int)
313 : "memory"
314 );
315
316 curidx = qc1;
317 curidx *= 3;
318 curidx += qc2;
319 curidx *= 3;
320 curidx += qc3;
321 curidx *= 3;
322 curidx += qc4;
323
324 v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
325 v_bits = p_bits[curidx] + count;
326 put_bits(pb, v_bits, v_codes);
327
328 if (out || energy) {
329 float e1,e2,e3,e4;
330 vec = &p_vec[curidx*4];
331 e1 = copysignf(vec[0] * IQ, in[i+0]);
332 e2 = copysignf(vec[1] * IQ, in[i+1]);
333 e3 = copysignf(vec[2] * IQ, in[i+2]);
334 e4 = copysignf(vec[3] * IQ, in[i+3]);
335 if (out) {
336 out[i+0] = e1;
337 out[i+1] = e2;
338 out[i+2] = e3;
339 out[i+3] = e4;
340 }
341 if (energy)
342 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
343 }
344 }
345 if (energy)
346 *energy = qenergy;
347 }
348
quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,float * out,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy,const float ROUNDING)349 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
350 PutBitContext *pb, const float *in, float *out,
351 const float *scaled, int size, int scale_idx,
352 int cb, const float lambda, const float uplim,
353 int *bits, float *energy, const float ROUNDING)
354 {
355 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
356 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
357 int i;
358 int qc1, qc2, qc3, qc4;
359 float qenergy = 0.0f;
360
361 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
362 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
363 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
364
365 abs_pow34_v(s->scoefs, in, size);
366 scaled = s->scoefs;
367 for (i = 0; i < size; i += 4) {
368 int curidx, curidx2;
369 int *in_int = (int *)&in[i];
370 uint8_t v_bits;
371 unsigned int v_codes;
372 int t0, t1, t2, t3, t4, t5, t6, t7;
373 const float *vec1, *vec2;
374
375 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
376 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
377 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
378 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
379
380 __asm__ volatile (
381 ".set push \n\t"
382 ".set noreorder \n\t"
383
384 "ori %[t4], $zero, 4 \n\t"
385 "slt %[t0], %[t4], %[qc1] \n\t"
386 "slt %[t1], %[t4], %[qc2] \n\t"
387 "slt %[t2], %[t4], %[qc3] \n\t"
388 "slt %[t3], %[t4], %[qc4] \n\t"
389 "movn %[qc1], %[t4], %[t0] \n\t"
390 "movn %[qc2], %[t4], %[t1] \n\t"
391 "movn %[qc3], %[t4], %[t2] \n\t"
392 "movn %[qc4], %[t4], %[t3] \n\t"
393 "lw %[t0], 0(%[in_int]) \n\t"
394 "lw %[t1], 4(%[in_int]) \n\t"
395 "lw %[t2], 8(%[in_int]) \n\t"
396 "lw %[t3], 12(%[in_int]) \n\t"
397 "srl %[t0], %[t0], 31 \n\t"
398 "srl %[t1], %[t1], 31 \n\t"
399 "srl %[t2], %[t2], 31 \n\t"
400 "srl %[t3], %[t3], 31 \n\t"
401 "subu %[t4], $zero, %[qc1] \n\t"
402 "subu %[t5], $zero, %[qc2] \n\t"
403 "subu %[t6], $zero, %[qc3] \n\t"
404 "subu %[t7], $zero, %[qc4] \n\t"
405 "movn %[qc1], %[t4], %[t0] \n\t"
406 "movn %[qc2], %[t5], %[t1] \n\t"
407 "movn %[qc3], %[t6], %[t2] \n\t"
408 "movn %[qc4], %[t7], %[t3] \n\t"
409
410 ".set pop \n\t"
411
412 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
413 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
414 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
415 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
416 : [in_int]"r"(in_int)
417 : "memory"
418 );
419
420 curidx = 9 * qc1;
421 curidx += qc2 + 40;
422
423 curidx2 = 9 * qc3;
424 curidx2 += qc4 + 40;
425
426 v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
427 v_bits = p_bits[curidx] + p_bits[curidx2];
428 put_bits(pb, v_bits, v_codes);
429
430 if (out || energy) {
431 float e1,e2,e3,e4;
432 vec1 = &p_vec[curidx*2 ];
433 vec2 = &p_vec[curidx2*2];
434 e1 = vec1[0] * IQ;
435 e2 = vec1[1] * IQ;
436 e3 = vec2[0] * IQ;
437 e4 = vec2[1] * IQ;
438 if (out) {
439 out[i+0] = e1;
440 out[i+1] = e2;
441 out[i+2] = e3;
442 out[i+3] = e4;
443 }
444 if (energy)
445 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
446 }
447 }
448 if (energy)
449 *energy = qenergy;
450 }
451
quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,float * out,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy,const float ROUNDING)452 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
453 PutBitContext *pb, const float *in, float *out,
454 const float *scaled, int size, int scale_idx,
455 int cb, const float lambda, const float uplim,
456 int *bits, float *energy, const float ROUNDING)
457 {
458 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
459 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
460 int i;
461 int qc1, qc2, qc3, qc4;
462 float qenergy = 0.0f;
463
464 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
465 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
466 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
467
468 abs_pow34_v(s->scoefs, in, size);
469 scaled = s->scoefs;
470 for (i = 0; i < size; i += 4) {
471 int curidx1, curidx2, sign1, count1, sign2, count2;
472 int *in_int = (int *)&in[i];
473 uint8_t v_bits;
474 unsigned int v_codes;
475 int t0, t1, t2, t3, t4;
476 const float *vec1, *vec2;
477
478 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
479 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
480 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
481 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
482
483 __asm__ volatile (
484 ".set push \n\t"
485 ".set noreorder \n\t"
486
487 "ori %[t4], $zero, 7 \n\t"
488 "ori %[sign1], $zero, 0 \n\t"
489 "ori %[sign2], $zero, 0 \n\t"
490 "slt %[t0], %[t4], %[qc1] \n\t"
491 "slt %[t1], %[t4], %[qc2] \n\t"
492 "slt %[t2], %[t4], %[qc3] \n\t"
493 "slt %[t3], %[t4], %[qc4] \n\t"
494 "movn %[qc1], %[t4], %[t0] \n\t"
495 "movn %[qc2], %[t4], %[t1] \n\t"
496 "movn %[qc3], %[t4], %[t2] \n\t"
497 "movn %[qc4], %[t4], %[t3] \n\t"
498 "lw %[t0], 0(%[in_int]) \n\t"
499 "lw %[t1], 4(%[in_int]) \n\t"
500 "lw %[t2], 8(%[in_int]) \n\t"
501 "lw %[t3], 12(%[in_int]) \n\t"
502 "slt %[t0], %[t0], $zero \n\t"
503 "movn %[sign1], %[t0], %[qc1] \n\t"
504 "slt %[t2], %[t2], $zero \n\t"
505 "movn %[sign2], %[t2], %[qc3] \n\t"
506 "slt %[t1], %[t1], $zero \n\t"
507 "sll %[t0], %[sign1], 1 \n\t"
508 "or %[t0], %[t0], %[t1] \n\t"
509 "movn %[sign1], %[t0], %[qc2] \n\t"
510 "slt %[t3], %[t3], $zero \n\t"
511 "sll %[t0], %[sign2], 1 \n\t"
512 "or %[t0], %[t0], %[t3] \n\t"
513 "movn %[sign2], %[t0], %[qc4] \n\t"
514 "slt %[count1], $zero, %[qc1] \n\t"
515 "slt %[t1], $zero, %[qc2] \n\t"
516 "slt %[count2], $zero, %[qc3] \n\t"
517 "slt %[t2], $zero, %[qc4] \n\t"
518 "addu %[count1], %[count1], %[t1] \n\t"
519 "addu %[count2], %[count2], %[t2] \n\t"
520
521 ".set pop \n\t"
522
523 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
524 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
525 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
526 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
527 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
528 [t4]"=&r"(t4)
529 : [in_int]"r"(in_int)
530 : "t0", "t1", "t2", "t3", "t4",
531 "memory"
532 );
533
534 curidx1 = 8 * qc1;
535 curidx1 += qc2;
536
537 v_codes = (p_codes[curidx1] << count1) | sign1;
538 v_bits = p_bits[curidx1] + count1;
539 put_bits(pb, v_bits, v_codes);
540
541 curidx2 = 8 * qc3;
542 curidx2 += qc4;
543
544 v_codes = (p_codes[curidx2] << count2) | sign2;
545 v_bits = p_bits[curidx2] + count2;
546 put_bits(pb, v_bits, v_codes);
547
548 if (out || energy) {
549 float e1,e2,e3,e4;
550 vec1 = &p_vec[curidx1*2];
551 vec2 = &p_vec[curidx2*2];
552 e1 = copysignf(vec1[0] * IQ, in[i+0]);
553 e2 = copysignf(vec1[1] * IQ, in[i+1]);
554 e3 = copysignf(vec2[0] * IQ, in[i+2]);
555 e4 = copysignf(vec2[1] * IQ, in[i+3]);
556 if (out) {
557 out[i+0] = e1;
558 out[i+1] = e2;
559 out[i+2] = e3;
560 out[i+3] = e4;
561 }
562 if (energy)
563 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
564 }
565 }
566 if (energy)
567 *energy = qenergy;
568 }
569
quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,float * out,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy,const float ROUNDING)570 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
571 PutBitContext *pb, const float *in, float *out,
572 const float *scaled, int size, int scale_idx,
573 int cb, const float lambda, const float uplim,
574 int *bits, float *energy, const float ROUNDING)
575 {
576 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
577 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
578 int i;
579 int qc1, qc2, qc3, qc4;
580 float qenergy = 0.0f;
581
582 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
583 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
584 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
585
586 abs_pow34_v(s->scoefs, in, size);
587 scaled = s->scoefs;
588 for (i = 0; i < size; i += 4) {
589 int curidx1, curidx2, sign1, count1, sign2, count2;
590 int *in_int = (int *)&in[i];
591 uint8_t v_bits;
592 unsigned int v_codes;
593 int t0, t1, t2, t3, t4;
594 const float *vec1, *vec2;
595
596 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
597 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
598 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
599 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
600
601 __asm__ volatile (
602 ".set push \n\t"
603 ".set noreorder \n\t"
604
605 "ori %[t4], $zero, 12 \n\t"
606 "ori %[sign1], $zero, 0 \n\t"
607 "ori %[sign2], $zero, 0 \n\t"
608 "slt %[t0], %[t4], %[qc1] \n\t"
609 "slt %[t1], %[t4], %[qc2] \n\t"
610 "slt %[t2], %[t4], %[qc3] \n\t"
611 "slt %[t3], %[t4], %[qc4] \n\t"
612 "movn %[qc1], %[t4], %[t0] \n\t"
613 "movn %[qc2], %[t4], %[t1] \n\t"
614 "movn %[qc3], %[t4], %[t2] \n\t"
615 "movn %[qc4], %[t4], %[t3] \n\t"
616 "lw %[t0], 0(%[in_int]) \n\t"
617 "lw %[t1], 4(%[in_int]) \n\t"
618 "lw %[t2], 8(%[in_int]) \n\t"
619 "lw %[t3], 12(%[in_int]) \n\t"
620 "slt %[t0], %[t0], $zero \n\t"
621 "movn %[sign1], %[t0], %[qc1] \n\t"
622 "slt %[t2], %[t2], $zero \n\t"
623 "movn %[sign2], %[t2], %[qc3] \n\t"
624 "slt %[t1], %[t1], $zero \n\t"
625 "sll %[t0], %[sign1], 1 \n\t"
626 "or %[t0], %[t0], %[t1] \n\t"
627 "movn %[sign1], %[t0], %[qc2] \n\t"
628 "slt %[t3], %[t3], $zero \n\t"
629 "sll %[t0], %[sign2], 1 \n\t"
630 "or %[t0], %[t0], %[t3] \n\t"
631 "movn %[sign2], %[t0], %[qc4] \n\t"
632 "slt %[count1], $zero, %[qc1] \n\t"
633 "slt %[t1], $zero, %[qc2] \n\t"
634 "slt %[count2], $zero, %[qc3] \n\t"
635 "slt %[t2], $zero, %[qc4] \n\t"
636 "addu %[count1], %[count1], %[t1] \n\t"
637 "addu %[count2], %[count2], %[t2] \n\t"
638
639 ".set pop \n\t"
640
641 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
642 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
643 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
644 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
645 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
646 [t4]"=&r"(t4)
647 : [in_int]"r"(in_int)
648 : "memory"
649 );
650
651 curidx1 = 13 * qc1;
652 curidx1 += qc2;
653
654 v_codes = (p_codes[curidx1] << count1) | sign1;
655 v_bits = p_bits[curidx1] + count1;
656 put_bits(pb, v_bits, v_codes);
657
658 curidx2 = 13 * qc3;
659 curidx2 += qc4;
660
661 v_codes = (p_codes[curidx2] << count2) | sign2;
662 v_bits = p_bits[curidx2] + count2;
663 put_bits(pb, v_bits, v_codes);
664
665 if (out || energy) {
666 float e1,e2,e3,e4;
667 vec1 = &p_vec[curidx1*2];
668 vec2 = &p_vec[curidx2*2];
669 e1 = copysignf(vec1[0] * IQ, in[i+0]);
670 e2 = copysignf(vec1[1] * IQ, in[i+1]);
671 e3 = copysignf(vec2[0] * IQ, in[i+2]);
672 e4 = copysignf(vec2[1] * IQ, in[i+3]);
673 if (out) {
674 out[i+0] = e1;
675 out[i+1] = e2;
676 out[i+2] = e3;
677 out[i+3] = e4;
678 }
679 if (energy)
680 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
681 }
682 }
683 if (energy)
684 *energy = qenergy;
685 }
686
quantize_and_encode_band_cost_ESC_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,float * out,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy,const float ROUNDING)687 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
688 PutBitContext *pb, const float *in, float *out,
689 const float *scaled, int size, int scale_idx,
690 int cb, const float lambda, const float uplim,
691 int *bits, float *energy, const float ROUNDING)
692 {
693 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
694 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
695 int i;
696 int qc1, qc2, qc3, qc4;
697 float qenergy = 0.0f;
698
699 uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
700 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
701 float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
702
703 abs_pow34_v(s->scoefs, in, size);
704 scaled = s->scoefs;
705
706 if (cb < 11) {
707 for (i = 0; i < size; i += 4) {
708 int curidx, curidx2, sign1, count1, sign2, count2;
709 int *in_int = (int *)&in[i];
710 uint8_t v_bits;
711 unsigned int v_codes;
712 int t0, t1, t2, t3, t4;
713 const float *vec1, *vec2;
714
715 qc1 = scaled[i ] * Q34 + ROUNDING;
716 qc2 = scaled[i+1] * Q34 + ROUNDING;
717 qc3 = scaled[i+2] * Q34 + ROUNDING;
718 qc4 = scaled[i+3] * Q34 + ROUNDING;
719
720 __asm__ volatile (
721 ".set push \n\t"
722 ".set noreorder \n\t"
723
724 "ori %[t4], $zero, 16 \n\t"
725 "ori %[sign1], $zero, 0 \n\t"
726 "ori %[sign2], $zero, 0 \n\t"
727 "slt %[t0], %[t4], %[qc1] \n\t"
728 "slt %[t1], %[t4], %[qc2] \n\t"
729 "slt %[t2], %[t4], %[qc3] \n\t"
730 "slt %[t3], %[t4], %[qc4] \n\t"
731 "movn %[qc1], %[t4], %[t0] \n\t"
732 "movn %[qc2], %[t4], %[t1] \n\t"
733 "movn %[qc3], %[t4], %[t2] \n\t"
734 "movn %[qc4], %[t4], %[t3] \n\t"
735 "lw %[t0], 0(%[in_int]) \n\t"
736 "lw %[t1], 4(%[in_int]) \n\t"
737 "lw %[t2], 8(%[in_int]) \n\t"
738 "lw %[t3], 12(%[in_int]) \n\t"
739 "slt %[t0], %[t0], $zero \n\t"
740 "movn %[sign1], %[t0], %[qc1] \n\t"
741 "slt %[t2], %[t2], $zero \n\t"
742 "movn %[sign2], %[t2], %[qc3] \n\t"
743 "slt %[t1], %[t1], $zero \n\t"
744 "sll %[t0], %[sign1], 1 \n\t"
745 "or %[t0], %[t0], %[t1] \n\t"
746 "movn %[sign1], %[t0], %[qc2] \n\t"
747 "slt %[t3], %[t3], $zero \n\t"
748 "sll %[t0], %[sign2], 1 \n\t"
749 "or %[t0], %[t0], %[t3] \n\t"
750 "movn %[sign2], %[t0], %[qc4] \n\t"
751 "slt %[count1], $zero, %[qc1] \n\t"
752 "slt %[t1], $zero, %[qc2] \n\t"
753 "slt %[count2], $zero, %[qc3] \n\t"
754 "slt %[t2], $zero, %[qc4] \n\t"
755 "addu %[count1], %[count1], %[t1] \n\t"
756 "addu %[count2], %[count2], %[t2] \n\t"
757
758 ".set pop \n\t"
759
760 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
761 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
762 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
763 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
764 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
765 [t4]"=&r"(t4)
766 : [in_int]"r"(in_int)
767 : "memory"
768 );
769
770 curidx = 17 * qc1;
771 curidx += qc2;
772 curidx2 = 17 * qc3;
773 curidx2 += qc4;
774
775 v_codes = (p_codes[curidx] << count1) | sign1;
776 v_bits = p_bits[curidx] + count1;
777 put_bits(pb, v_bits, v_codes);
778
779 v_codes = (p_codes[curidx2] << count2) | sign2;
780 v_bits = p_bits[curidx2] + count2;
781 put_bits(pb, v_bits, v_codes);
782
783 if (out || energy) {
784 float e1,e2,e3,e4;
785 vec1 = &p_vectors[curidx*2 ];
786 vec2 = &p_vectors[curidx2*2];
787 e1 = copysignf(vec1[0] * IQ, in[i+0]);
788 e2 = copysignf(vec1[1] * IQ, in[i+1]);
789 e3 = copysignf(vec2[0] * IQ, in[i+2]);
790 e4 = copysignf(vec2[1] * IQ, in[i+3]);
791 if (out) {
792 out[i+0] = e1;
793 out[i+1] = e2;
794 out[i+2] = e3;
795 out[i+3] = e4;
796 }
797 if (energy)
798 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
799 }
800 }
801 } else {
802 for (i = 0; i < size; i += 4) {
803 int curidx, curidx2, sign1, count1, sign2, count2;
804 int *in_int = (int *)&in[i];
805 uint8_t v_bits;
806 unsigned int v_codes;
807 int c1, c2, c3, c4;
808 int t0, t1, t2, t3, t4;
809
810 qc1 = scaled[i ] * Q34 + ROUNDING;
811 qc2 = scaled[i+1] * Q34 + ROUNDING;
812 qc3 = scaled[i+2] * Q34 + ROUNDING;
813 qc4 = scaled[i+3] * Q34 + ROUNDING;
814
815 __asm__ volatile (
816 ".set push \n\t"
817 ".set noreorder \n\t"
818
819 "ori %[t4], $zero, 16 \n\t"
820 "ori %[sign1], $zero, 0 \n\t"
821 "ori %[sign2], $zero, 0 \n\t"
822 "shll_s.w %[c1], %[qc1], 18 \n\t"
823 "shll_s.w %[c2], %[qc2], 18 \n\t"
824 "shll_s.w %[c3], %[qc3], 18 \n\t"
825 "shll_s.w %[c4], %[qc4], 18 \n\t"
826 "srl %[c1], %[c1], 18 \n\t"
827 "srl %[c2], %[c2], 18 \n\t"
828 "srl %[c3], %[c3], 18 \n\t"
829 "srl %[c4], %[c4], 18 \n\t"
830 "slt %[t0], %[t4], %[qc1] \n\t"
831 "slt %[t1], %[t4], %[qc2] \n\t"
832 "slt %[t2], %[t4], %[qc3] \n\t"
833 "slt %[t3], %[t4], %[qc4] \n\t"
834 "movn %[qc1], %[t4], %[t0] \n\t"
835 "movn %[qc2], %[t4], %[t1] \n\t"
836 "movn %[qc3], %[t4], %[t2] \n\t"
837 "movn %[qc4], %[t4], %[t3] \n\t"
838 "lw %[t0], 0(%[in_int]) \n\t"
839 "lw %[t1], 4(%[in_int]) \n\t"
840 "lw %[t2], 8(%[in_int]) \n\t"
841 "lw %[t3], 12(%[in_int]) \n\t"
842 "slt %[t0], %[t0], $zero \n\t"
843 "movn %[sign1], %[t0], %[qc1] \n\t"
844 "slt %[t2], %[t2], $zero \n\t"
845 "movn %[sign2], %[t2], %[qc3] \n\t"
846 "slt %[t1], %[t1], $zero \n\t"
847 "sll %[t0], %[sign1], 1 \n\t"
848 "or %[t0], %[t0], %[t1] \n\t"
849 "movn %[sign1], %[t0], %[qc2] \n\t"
850 "slt %[t3], %[t3], $zero \n\t"
851 "sll %[t0], %[sign2], 1 \n\t"
852 "or %[t0], %[t0], %[t3] \n\t"
853 "movn %[sign2], %[t0], %[qc4] \n\t"
854 "slt %[count1], $zero, %[qc1] \n\t"
855 "slt %[t1], $zero, %[qc2] \n\t"
856 "slt %[count2], $zero, %[qc3] \n\t"
857 "slt %[t2], $zero, %[qc4] \n\t"
858 "addu %[count1], %[count1], %[t1] \n\t"
859 "addu %[count2], %[count2], %[t2] \n\t"
860
861 ".set pop \n\t"
862
863 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
864 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
865 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
866 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
867 [c1]"=&r"(c1), [c2]"=&r"(c2),
868 [c3]"=&r"(c3), [c4]"=&r"(c4),
869 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
870 [t4]"=&r"(t4)
871 : [in_int]"r"(in_int)
872 : "memory"
873 );
874
875 curidx = 17 * qc1;
876 curidx += qc2;
877
878 curidx2 = 17 * qc3;
879 curidx2 += qc4;
880
881 v_codes = (p_codes[curidx] << count1) | sign1;
882 v_bits = p_bits[curidx] + count1;
883 put_bits(pb, v_bits, v_codes);
884
885 if (p_vectors[curidx*2 ] == 64.0f) {
886 int len = av_log2(c1);
887 v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
888 put_bits(pb, len * 2 - 3, v_codes);
889 }
890 if (p_vectors[curidx*2+1] == 64.0f) {
891 int len = av_log2(c2);
892 v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
893 put_bits(pb, len*2-3, v_codes);
894 }
895
896 v_codes = (p_codes[curidx2] << count2) | sign2;
897 v_bits = p_bits[curidx2] + count2;
898 put_bits(pb, v_bits, v_codes);
899
900 if (p_vectors[curidx2*2 ] == 64.0f) {
901 int len = av_log2(c3);
902 v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
903 put_bits(pb, len* 2 - 3, v_codes);
904 }
905 if (p_vectors[curidx2*2+1] == 64.0f) {
906 int len = av_log2(c4);
907 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
908 put_bits(pb, len * 2 - 3, v_codes);
909 }
910
911 if (out || energy) {
912 float e1, e2, e3, e4;
913 e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]);
914 e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]);
915 e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]);
916 e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]);
917 if (out) {
918 out[i+0] = e1;
919 out[i+1] = e2;
920 out[i+2] = e3;
921 out[i+3] = e4;
922 }
923 if (energy)
924 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
925 }
926 }
927 }
928 if (energy)
929 *energy = qenergy;
930 }
931
quantize_and_encode_band_cost_NONE_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,float * out,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy,const float ROUNDING)932 static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
933 PutBitContext *pb, const float *in, float *out,
934 const float *scaled, int size, int scale_idx,
935 int cb, const float lambda, const float uplim,
936 int *bits, float *energy, const float ROUNDING) {
937 av_assert0(0);
938 }
939
quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,float * out,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy,const float ROUNDING)940 static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
941 PutBitContext *pb, const float *in, float *out,
942 const float *scaled, int size, int scale_idx,
943 int cb, const float lambda, const float uplim,
944 int *bits, float *energy, const float ROUNDING) {
945 int i;
946 if (bits)
947 *bits = 0;
948 if (out) {
949 for (i = 0; i < size; i += 4) {
950 out[i ] = 0.0f;
951 out[i+1] = 0.0f;
952 out[i+2] = 0.0f;
953 out[i+3] = 0.0f;
954 }
955 }
956 if (energy)
957 *energy = 0.0f;
958 }
959
960 static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
961 PutBitContext *pb, const float *in, float *out,
962 const float *scaled, int size, int scale_idx,
963 int cb, const float lambda, const float uplim,
964 int *bits, float *energy, const float ROUNDING) = {
965 quantize_and_encode_band_cost_ZERO_mips,
966 quantize_and_encode_band_cost_SQUAD_mips,
967 quantize_and_encode_band_cost_SQUAD_mips,
968 quantize_and_encode_band_cost_UQUAD_mips,
969 quantize_and_encode_band_cost_UQUAD_mips,
970 quantize_and_encode_band_cost_SPAIR_mips,
971 quantize_and_encode_band_cost_SPAIR_mips,
972 quantize_and_encode_band_cost_UPAIR7_mips,
973 quantize_and_encode_band_cost_UPAIR7_mips,
974 quantize_and_encode_band_cost_UPAIR12_mips,
975 quantize_and_encode_band_cost_UPAIR12_mips,
976 quantize_and_encode_band_cost_ESC_mips,
977 quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */
978 quantize_and_encode_band_cost_ZERO_mips,
979 quantize_and_encode_band_cost_ZERO_mips,
980 quantize_and_encode_band_cost_ZERO_mips,
981 };
982
983 #define quantize_and_encode_band_cost( \
984 s, pb, in, out, scaled, size, scale_idx, cb, \
985 lambda, uplim, bits, energy, ROUNDING) \
986 quantize_and_encode_band_cost_arr[cb]( \
987 s, pb, in, out, scaled, size, scale_idx, cb, \
988 lambda, uplim, bits, energy, ROUNDING)
989
quantize_and_encode_band_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,float * out,int size,int scale_idx,int cb,const float lambda,int rtz)990 static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
991 const float *in, float *out, int size, int scale_idx,
992 int cb, const float lambda, int rtz)
993 {
994 quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
995 INFINITY, NULL, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
996 }
997
998 /**
999 * Functions developed from template function and optimized for getting the number of bits
1000 */
get_band_numbits_ZERO_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits)1001 static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
1002 PutBitContext *pb, const float *in,
1003 const float *scaled, int size, int scale_idx,
1004 int cb, const float lambda, const float uplim,
1005 int *bits)
1006 {
1007 return 0;
1008 }
1009
get_band_numbits_NONE_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits)1010 static float get_band_numbits_NONE_mips(struct AACEncContext *s,
1011 PutBitContext *pb, const float *in,
1012 const float *scaled, int size, int scale_idx,
1013 int cb, const float lambda, const float uplim,
1014 int *bits)
1015 {
1016 av_assert0(0);
1017 return 0;
1018 }
1019
get_band_numbits_SQUAD_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits)1020 static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
1021 PutBitContext *pb, const float *in,
1022 const float *scaled, int size, int scale_idx,
1023 int cb, const float lambda, const float uplim,
1024 int *bits)
1025 {
1026 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1027 int i;
1028 int qc1, qc2, qc3, qc4;
1029 int curbits = 0;
1030
1031 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1032
1033 for (i = 0; i < size; i += 4) {
1034 int curidx;
1035 int *in_int = (int *)&in[i];
1036 int t0, t1, t2, t3, t4, t5, t6, t7;
1037
1038 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1039 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1040 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1041 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1042
1043 __asm__ volatile (
1044 ".set push \n\t"
1045 ".set noreorder \n\t"
1046
1047 "slt %[qc1], $zero, %[qc1] \n\t"
1048 "slt %[qc2], $zero, %[qc2] \n\t"
1049 "slt %[qc3], $zero, %[qc3] \n\t"
1050 "slt %[qc4], $zero, %[qc4] \n\t"
1051 "lw %[t0], 0(%[in_int]) \n\t"
1052 "lw %[t1], 4(%[in_int]) \n\t"
1053 "lw %[t2], 8(%[in_int]) \n\t"
1054 "lw %[t3], 12(%[in_int]) \n\t"
1055 "srl %[t0], %[t0], 31 \n\t"
1056 "srl %[t1], %[t1], 31 \n\t"
1057 "srl %[t2], %[t2], 31 \n\t"
1058 "srl %[t3], %[t3], 31 \n\t"
1059 "subu %[t4], $zero, %[qc1] \n\t"
1060 "subu %[t5], $zero, %[qc2] \n\t"
1061 "subu %[t6], $zero, %[qc3] \n\t"
1062 "subu %[t7], $zero, %[qc4] \n\t"
1063 "movn %[qc1], %[t4], %[t0] \n\t"
1064 "movn %[qc2], %[t5], %[t1] \n\t"
1065 "movn %[qc3], %[t6], %[t2] \n\t"
1066 "movn %[qc4], %[t7], %[t3] \n\t"
1067
1068 ".set pop \n\t"
1069
1070 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1071 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1072 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1073 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1074 : [in_int]"r"(in_int)
1075 : "memory"
1076 );
1077
1078 curidx = qc1;
1079 curidx *= 3;
1080 curidx += qc2;
1081 curidx *= 3;
1082 curidx += qc3;
1083 curidx *= 3;
1084 curidx += qc4;
1085 curidx += 40;
1086
1087 curbits += p_bits[curidx];
1088 }
1089 return curbits;
1090 }
1091
get_band_numbits_UQUAD_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits)1092 static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
1093 PutBitContext *pb, const float *in,
1094 const float *scaled, int size, int scale_idx,
1095 int cb, const float lambda, const float uplim,
1096 int *bits)
1097 {
1098 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1099 int i;
1100 int curbits = 0;
1101 int qc1, qc2, qc3, qc4;
1102
1103 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1104
1105 for (i = 0; i < size; i += 4) {
1106 int curidx;
1107 int t0, t1, t2, t3, t4;
1108
1109 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1110 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1111 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1112 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1113
1114 __asm__ volatile (
1115 ".set push \n\t"
1116 ".set noreorder \n\t"
1117
1118 "ori %[t4], $zero, 2 \n\t"
1119 "slt %[t0], %[t4], %[qc1] \n\t"
1120 "slt %[t1], %[t4], %[qc2] \n\t"
1121 "slt %[t2], %[t4], %[qc3] \n\t"
1122 "slt %[t3], %[t4], %[qc4] \n\t"
1123 "movn %[qc1], %[t4], %[t0] \n\t"
1124 "movn %[qc2], %[t4], %[t1] \n\t"
1125 "movn %[qc3], %[t4], %[t2] \n\t"
1126 "movn %[qc4], %[t4], %[t3] \n\t"
1127
1128 ".set pop \n\t"
1129
1130 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1131 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1132 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1133 [t4]"=&r"(t4)
1134 );
1135
1136 curidx = qc1;
1137 curidx *= 3;
1138 curidx += qc2;
1139 curidx *= 3;
1140 curidx += qc3;
1141 curidx *= 3;
1142 curidx += qc4;
1143
1144 curbits += p_bits[curidx];
1145 curbits += uquad_sign_bits[curidx];
1146 }
1147 return curbits;
1148 }
1149
get_band_numbits_SPAIR_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits)1150 static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
1151 PutBitContext *pb, const float *in,
1152 const float *scaled, int size, int scale_idx,
1153 int cb, const float lambda, const float uplim,
1154 int *bits)
1155 {
1156 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1157 int i;
1158 int qc1, qc2, qc3, qc4;
1159 int curbits = 0;
1160
1161 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1162
1163 for (i = 0; i < size; i += 4) {
1164 int curidx, curidx2;
1165 int *in_int = (int *)&in[i];
1166 int t0, t1, t2, t3, t4, t5, t6, t7;
1167
1168 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1169 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1170 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1171 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1172
1173 __asm__ volatile (
1174 ".set push \n\t"
1175 ".set noreorder \n\t"
1176
1177 "ori %[t4], $zero, 4 \n\t"
1178 "slt %[t0], %[t4], %[qc1] \n\t"
1179 "slt %[t1], %[t4], %[qc2] \n\t"
1180 "slt %[t2], %[t4], %[qc3] \n\t"
1181 "slt %[t3], %[t4], %[qc4] \n\t"
1182 "movn %[qc1], %[t4], %[t0] \n\t"
1183 "movn %[qc2], %[t4], %[t1] \n\t"
1184 "movn %[qc3], %[t4], %[t2] \n\t"
1185 "movn %[qc4], %[t4], %[t3] \n\t"
1186 "lw %[t0], 0(%[in_int]) \n\t"
1187 "lw %[t1], 4(%[in_int]) \n\t"
1188 "lw %[t2], 8(%[in_int]) \n\t"
1189 "lw %[t3], 12(%[in_int]) \n\t"
1190 "srl %[t0], %[t0], 31 \n\t"
1191 "srl %[t1], %[t1], 31 \n\t"
1192 "srl %[t2], %[t2], 31 \n\t"
1193 "srl %[t3], %[t3], 31 \n\t"
1194 "subu %[t4], $zero, %[qc1] \n\t"
1195 "subu %[t5], $zero, %[qc2] \n\t"
1196 "subu %[t6], $zero, %[qc3] \n\t"
1197 "subu %[t7], $zero, %[qc4] \n\t"
1198 "movn %[qc1], %[t4], %[t0] \n\t"
1199 "movn %[qc2], %[t5], %[t1] \n\t"
1200 "movn %[qc3], %[t6], %[t2] \n\t"
1201 "movn %[qc4], %[t7], %[t3] \n\t"
1202
1203 ".set pop \n\t"
1204
1205 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1206 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1207 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1208 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1209 : [in_int]"r"(in_int)
1210 : "memory"
1211 );
1212
1213 curidx = 9 * qc1;
1214 curidx += qc2 + 40;
1215
1216 curidx2 = 9 * qc3;
1217 curidx2 += qc4 + 40;
1218
1219 curbits += p_bits[curidx] + p_bits[curidx2];
1220 }
1221 return curbits;
1222 }
1223
get_band_numbits_UPAIR7_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits)1224 static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
1225 PutBitContext *pb, const float *in,
1226 const float *scaled, int size, int scale_idx,
1227 int cb, const float lambda, const float uplim,
1228 int *bits)
1229 {
1230 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1231 int i;
1232 int qc1, qc2, qc3, qc4;
1233 int curbits = 0;
1234
1235 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1236
1237 for (i = 0; i < size; i += 4) {
1238 int curidx, curidx2;
1239 int t0, t1, t2, t3, t4;
1240
1241 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1242 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1243 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1244 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1245
1246 __asm__ volatile (
1247 ".set push \n\t"
1248 ".set noreorder \n\t"
1249
1250 "ori %[t4], $zero, 7 \n\t"
1251 "slt %[t0], %[t4], %[qc1] \n\t"
1252 "slt %[t1], %[t4], %[qc2] \n\t"
1253 "slt %[t2], %[t4], %[qc3] \n\t"
1254 "slt %[t3], %[t4], %[qc4] \n\t"
1255 "movn %[qc1], %[t4], %[t0] \n\t"
1256 "movn %[qc2], %[t4], %[t1] \n\t"
1257 "movn %[qc3], %[t4], %[t2] \n\t"
1258 "movn %[qc4], %[t4], %[t3] \n\t"
1259
1260 ".set pop \n\t"
1261
1262 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1263 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1264 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1265 [t4]"=&r"(t4)
1266 );
1267
1268 curidx = 8 * qc1;
1269 curidx += qc2;
1270
1271 curidx2 = 8 * qc3;
1272 curidx2 += qc4;
1273
1274 curbits += p_bits[curidx] +
1275 upair7_sign_bits[curidx] +
1276 p_bits[curidx2] +
1277 upair7_sign_bits[curidx2];
1278 }
1279 return curbits;
1280 }
1281
get_band_numbits_UPAIR12_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits)1282 static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
1283 PutBitContext *pb, const float *in,
1284 const float *scaled, int size, int scale_idx,
1285 int cb, const float lambda, const float uplim,
1286 int *bits)
1287 {
1288 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1289 int i;
1290 int qc1, qc2, qc3, qc4;
1291 int curbits = 0;
1292
1293 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1294
1295 for (i = 0; i < size; i += 4) {
1296 int curidx, curidx2;
1297 int t0, t1, t2, t3, t4;
1298
1299 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1300 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1301 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1302 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1303
1304 __asm__ volatile (
1305 ".set push \n\t"
1306 ".set noreorder \n\t"
1307
1308 "ori %[t4], $zero, 12 \n\t"
1309 "slt %[t0], %[t4], %[qc1] \n\t"
1310 "slt %[t1], %[t4], %[qc2] \n\t"
1311 "slt %[t2], %[t4], %[qc3] \n\t"
1312 "slt %[t3], %[t4], %[qc4] \n\t"
1313 "movn %[qc1], %[t4], %[t0] \n\t"
1314 "movn %[qc2], %[t4], %[t1] \n\t"
1315 "movn %[qc3], %[t4], %[t2] \n\t"
1316 "movn %[qc4], %[t4], %[t3] \n\t"
1317
1318 ".set pop \n\t"
1319
1320 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1321 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1322 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1323 [t4]"=&r"(t4)
1324 );
1325
1326 curidx = 13 * qc1;
1327 curidx += qc2;
1328
1329 curidx2 = 13 * qc3;
1330 curidx2 += qc4;
1331
1332 curbits += p_bits[curidx] +
1333 p_bits[curidx2] +
1334 upair12_sign_bits[curidx] +
1335 upair12_sign_bits[curidx2];
1336 }
1337 return curbits;
1338 }
1339
get_band_numbits_ESC_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits)1340 static float get_band_numbits_ESC_mips(struct AACEncContext *s,
1341 PutBitContext *pb, const float *in,
1342 const float *scaled, int size, int scale_idx,
1343 int cb, const float lambda, const float uplim,
1344 int *bits)
1345 {
1346 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1347 int i;
1348 int qc1, qc2, qc3, qc4;
1349 int curbits = 0;
1350
1351 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1352
1353 for (i = 0; i < size; i += 4) {
1354 int curidx, curidx2;
1355 int cond0, cond1, cond2, cond3;
1356 int c1, c2, c3, c4;
1357 int t4, t5;
1358
1359 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1360 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1361 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1362 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1363
1364 __asm__ volatile (
1365 ".set push \n\t"
1366 ".set noreorder \n\t"
1367
1368 "ori %[t4], $zero, 15 \n\t"
1369 "ori %[t5], $zero, 16 \n\t"
1370 "shll_s.w %[c1], %[qc1], 18 \n\t"
1371 "shll_s.w %[c2], %[qc2], 18 \n\t"
1372 "shll_s.w %[c3], %[qc3], 18 \n\t"
1373 "shll_s.w %[c4], %[qc4], 18 \n\t"
1374 "srl %[c1], %[c1], 18 \n\t"
1375 "srl %[c2], %[c2], 18 \n\t"
1376 "srl %[c3], %[c3], 18 \n\t"
1377 "srl %[c4], %[c4], 18 \n\t"
1378 "slt %[cond0], %[t4], %[qc1] \n\t"
1379 "slt %[cond1], %[t4], %[qc2] \n\t"
1380 "slt %[cond2], %[t4], %[qc3] \n\t"
1381 "slt %[cond3], %[t4], %[qc4] \n\t"
1382 "movn %[qc1], %[t5], %[cond0] \n\t"
1383 "movn %[qc2], %[t5], %[cond1] \n\t"
1384 "movn %[qc3], %[t5], %[cond2] \n\t"
1385 "movn %[qc4], %[t5], %[cond3] \n\t"
1386 "ori %[t5], $zero, 31 \n\t"
1387 "clz %[c1], %[c1] \n\t"
1388 "clz %[c2], %[c2] \n\t"
1389 "clz %[c3], %[c3] \n\t"
1390 "clz %[c4], %[c4] \n\t"
1391 "subu %[c1], %[t5], %[c1] \n\t"
1392 "subu %[c2], %[t5], %[c2] \n\t"
1393 "subu %[c3], %[t5], %[c3] \n\t"
1394 "subu %[c4], %[t5], %[c4] \n\t"
1395 "sll %[c1], %[c1], 1 \n\t"
1396 "sll %[c2], %[c2], 1 \n\t"
1397 "sll %[c3], %[c3], 1 \n\t"
1398 "sll %[c4], %[c4], 1 \n\t"
1399 "addiu %[c1], %[c1], -3 \n\t"
1400 "addiu %[c2], %[c2], -3 \n\t"
1401 "addiu %[c3], %[c3], -3 \n\t"
1402 "addiu %[c4], %[c4], -3 \n\t"
1403 "subu %[cond0], $zero, %[cond0] \n\t"
1404 "subu %[cond1], $zero, %[cond1] \n\t"
1405 "subu %[cond2], $zero, %[cond2] \n\t"
1406 "subu %[cond3], $zero, %[cond3] \n\t"
1407 "and %[c1], %[c1], %[cond0] \n\t"
1408 "and %[c2], %[c2], %[cond1] \n\t"
1409 "and %[c3], %[c3], %[cond2] \n\t"
1410 "and %[c4], %[c4], %[cond3] \n\t"
1411
1412 ".set pop \n\t"
1413
1414 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1415 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1416 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
1417 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
1418 [c1]"=&r"(c1), [c2]"=&r"(c2),
1419 [c3]"=&r"(c3), [c4]"=&r"(c4),
1420 [t4]"=&r"(t4), [t5]"=&r"(t5)
1421 );
1422
1423 curidx = 17 * qc1;
1424 curidx += qc2;
1425
1426 curidx2 = 17 * qc3;
1427 curidx2 += qc4;
1428
1429 curbits += p_bits[curidx];
1430 curbits += esc_sign_bits[curidx];
1431 curbits += p_bits[curidx2];
1432 curbits += esc_sign_bits[curidx2];
1433
1434 curbits += c1;
1435 curbits += c2;
1436 curbits += c3;
1437 curbits += c4;
1438 }
1439 return curbits;
1440 }
1441
1442 static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
1443 PutBitContext *pb, const float *in,
1444 const float *scaled, int size, int scale_idx,
1445 int cb, const float lambda, const float uplim,
1446 int *bits) = {
1447 get_band_numbits_ZERO_mips,
1448 get_band_numbits_SQUAD_mips,
1449 get_band_numbits_SQUAD_mips,
1450 get_band_numbits_UQUAD_mips,
1451 get_band_numbits_UQUAD_mips,
1452 get_band_numbits_SPAIR_mips,
1453 get_band_numbits_SPAIR_mips,
1454 get_band_numbits_UPAIR7_mips,
1455 get_band_numbits_UPAIR7_mips,
1456 get_band_numbits_UPAIR12_mips,
1457 get_band_numbits_UPAIR12_mips,
1458 get_band_numbits_ESC_mips,
1459 get_band_numbits_NONE_mips, /* cb 12 doesn't exist */
1460 get_band_numbits_ZERO_mips,
1461 get_band_numbits_ZERO_mips,
1462 get_band_numbits_ZERO_mips,
1463 };
1464
1465 #define get_band_numbits( \
1466 s, pb, in, scaled, size, scale_idx, cb, \
1467 lambda, uplim, bits) \
1468 get_band_numbits_arr[cb]( \
1469 s, pb, in, scaled, size, scale_idx, cb, \
1470 lambda, uplim, bits)
1471
quantize_band_cost_bits(struct AACEncContext * s,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy,int rtz)1472 static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
1473 const float *scaled, int size, int scale_idx,
1474 int cb, const float lambda, const float uplim,
1475 int *bits, float *energy, int rtz)
1476 {
1477 return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1478 }
1479
1480 /**
1481 * Functions developed from template function and optimized for getting the band cost
1482 */
1483 #if HAVE_MIPSFPU
get_band_cost_ZERO_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy)1484 static float get_band_cost_ZERO_mips(struct AACEncContext *s,
1485 PutBitContext *pb, const float *in,
1486 const float *scaled, int size, int scale_idx,
1487 int cb, const float lambda, const float uplim,
1488 int *bits, float *energy)
1489 {
1490 int i;
1491 float cost = 0;
1492
1493 for (i = 0; i < size; i += 4) {
1494 cost += in[i ] * in[i ];
1495 cost += in[i+1] * in[i+1];
1496 cost += in[i+2] * in[i+2];
1497 cost += in[i+3] * in[i+3];
1498 }
1499 if (bits)
1500 *bits = 0;
1501 if (energy)
1502 *energy = 0.0f;
1503 return cost * lambda;
1504 }
1505
get_band_cost_NONE_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy)1506 static float get_band_cost_NONE_mips(struct AACEncContext *s,
1507 PutBitContext *pb, const float *in,
1508 const float *scaled, int size, int scale_idx,
1509 int cb, const float lambda, const float uplim,
1510 int *bits, float *energy)
1511 {
1512 av_assert0(0);
1513 return 0;
1514 }
1515
get_band_cost_SQUAD_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy)1516 static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
1517 PutBitContext *pb, const float *in,
1518 const float *scaled, int size, int scale_idx,
1519 int cb, const float lambda, const float uplim,
1520 int *bits, float *energy)
1521 {
1522 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1523 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1524 int i;
1525 float cost = 0;
1526 float qenergy = 0.0f;
1527 int qc1, qc2, qc3, qc4;
1528 int curbits = 0;
1529
1530 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1531 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1532
1533 for (i = 0; i < size; i += 4) {
1534 const float *vec;
1535 int curidx;
1536 int *in_int = (int *)&in[i];
1537 float *in_pos = (float *)&in[i];
1538 float di0, di1, di2, di3;
1539 int t0, t1, t2, t3, t4, t5, t6, t7;
1540
1541 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1542 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1543 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1544 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1545
1546 __asm__ volatile (
1547 ".set push \n\t"
1548 ".set noreorder \n\t"
1549
1550 "slt %[qc1], $zero, %[qc1] \n\t"
1551 "slt %[qc2], $zero, %[qc2] \n\t"
1552 "slt %[qc3], $zero, %[qc3] \n\t"
1553 "slt %[qc4], $zero, %[qc4] \n\t"
1554 "lw %[t0], 0(%[in_int]) \n\t"
1555 "lw %[t1], 4(%[in_int]) \n\t"
1556 "lw %[t2], 8(%[in_int]) \n\t"
1557 "lw %[t3], 12(%[in_int]) \n\t"
1558 "srl %[t0], %[t0], 31 \n\t"
1559 "srl %[t1], %[t1], 31 \n\t"
1560 "srl %[t2], %[t2], 31 \n\t"
1561 "srl %[t3], %[t3], 31 \n\t"
1562 "subu %[t4], $zero, %[qc1] \n\t"
1563 "subu %[t5], $zero, %[qc2] \n\t"
1564 "subu %[t6], $zero, %[qc3] \n\t"
1565 "subu %[t7], $zero, %[qc4] \n\t"
1566 "movn %[qc1], %[t4], %[t0] \n\t"
1567 "movn %[qc2], %[t5], %[t1] \n\t"
1568 "movn %[qc3], %[t6], %[t2] \n\t"
1569 "movn %[qc4], %[t7], %[t3] \n\t"
1570
1571 ".set pop \n\t"
1572
1573 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1574 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1575 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1576 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1577 : [in_int]"r"(in_int)
1578 : "memory"
1579 );
1580
1581 curidx = qc1;
1582 curidx *= 3;
1583 curidx += qc2;
1584 curidx *= 3;
1585 curidx += qc3;
1586 curidx *= 3;
1587 curidx += qc4;
1588 curidx += 40;
1589
1590 curbits += p_bits[curidx];
1591 vec = &p_codes[curidx*4];
1592
1593 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1594 + vec[2]*vec[2] + vec[3]*vec[3];
1595
1596 __asm__ volatile (
1597 ".set push \n\t"
1598 ".set noreorder \n\t"
1599
1600 "lwc1 $f0, 0(%[in_pos]) \n\t"
1601 "lwc1 $f1, 0(%[vec]) \n\t"
1602 "lwc1 $f2, 4(%[in_pos]) \n\t"
1603 "lwc1 $f3, 4(%[vec]) \n\t"
1604 "lwc1 $f4, 8(%[in_pos]) \n\t"
1605 "lwc1 $f5, 8(%[vec]) \n\t"
1606 "lwc1 $f6, 12(%[in_pos]) \n\t"
1607 "lwc1 $f7, 12(%[vec]) \n\t"
1608 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1609 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1610 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1611 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1612
1613 ".set pop \n\t"
1614
1615 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1616 [di2]"=&f"(di2), [di3]"=&f"(di3)
1617 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1618 [IQ]"f"(IQ)
1619 : "$f0", "$f1", "$f2", "$f3",
1620 "$f4", "$f5", "$f6", "$f7",
1621 "memory"
1622 );
1623
1624 cost += di0 * di0 + di1 * di1
1625 + di2 * di2 + di3 * di3;
1626 }
1627
1628 if (bits)
1629 *bits = curbits;
1630 if (energy)
1631 *energy = qenergy * (IQ*IQ);
1632 return cost * lambda + curbits;
1633 }
1634
get_band_cost_UQUAD_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy)1635 static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
1636 PutBitContext *pb, const float *in,
1637 const float *scaled, int size, int scale_idx,
1638 int cb, const float lambda, const float uplim,
1639 int *bits, float *energy)
1640 {
1641 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1642 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1643 int i;
1644 float cost = 0;
1645 float qenergy = 0.0f;
1646 int curbits = 0;
1647 int qc1, qc2, qc3, qc4;
1648
1649 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1650 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1651
1652 for (i = 0; i < size; i += 4) {
1653 const float *vec;
1654 int curidx;
1655 float *in_pos = (float *)&in[i];
1656 float di0, di1, di2, di3;
1657 int t0, t1, t2, t3, t4;
1658
1659 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1660 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1661 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1662 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1663
1664 __asm__ volatile (
1665 ".set push \n\t"
1666 ".set noreorder \n\t"
1667
1668 "ori %[t4], $zero, 2 \n\t"
1669 "slt %[t0], %[t4], %[qc1] \n\t"
1670 "slt %[t1], %[t4], %[qc2] \n\t"
1671 "slt %[t2], %[t4], %[qc3] \n\t"
1672 "slt %[t3], %[t4], %[qc4] \n\t"
1673 "movn %[qc1], %[t4], %[t0] \n\t"
1674 "movn %[qc2], %[t4], %[t1] \n\t"
1675 "movn %[qc3], %[t4], %[t2] \n\t"
1676 "movn %[qc4], %[t4], %[t3] \n\t"
1677
1678 ".set pop \n\t"
1679
1680 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1681 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1682 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1683 [t4]"=&r"(t4)
1684 );
1685
1686 curidx = qc1;
1687 curidx *= 3;
1688 curidx += qc2;
1689 curidx *= 3;
1690 curidx += qc3;
1691 curidx *= 3;
1692 curidx += qc4;
1693
1694 curbits += p_bits[curidx];
1695 curbits += uquad_sign_bits[curidx];
1696 vec = &p_codes[curidx*4];
1697
1698 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1699 + vec[2]*vec[2] + vec[3]*vec[3];
1700
1701 __asm__ volatile (
1702 ".set push \n\t"
1703 ".set noreorder \n\t"
1704
1705 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1706 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1707 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1708 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1709 "abs.s %[di0], %[di0] \n\t"
1710 "abs.s %[di1], %[di1] \n\t"
1711 "abs.s %[di2], %[di2] \n\t"
1712 "abs.s %[di3], %[di3] \n\t"
1713 "lwc1 $f0, 0(%[vec]) \n\t"
1714 "lwc1 $f1, 4(%[vec]) \n\t"
1715 "lwc1 $f2, 8(%[vec]) \n\t"
1716 "lwc1 $f3, 12(%[vec]) \n\t"
1717 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1718 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1719 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1720 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1721
1722 ".set pop \n\t"
1723
1724 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1725 [di2]"=&f"(di2), [di3]"=&f"(di3)
1726 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1727 [IQ]"f"(IQ)
1728 : "$f0", "$f1", "$f2", "$f3",
1729 "memory"
1730 );
1731
1732 cost += di0 * di0 + di1 * di1
1733 + di2 * di2 + di3 * di3;
1734 }
1735
1736 if (bits)
1737 *bits = curbits;
1738 if (energy)
1739 *energy = qenergy * (IQ*IQ);
1740 return cost * lambda + curbits;
1741 }
1742
get_band_cost_SPAIR_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy)1743 static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
1744 PutBitContext *pb, const float *in,
1745 const float *scaled, int size, int scale_idx,
1746 int cb, const float lambda, const float uplim,
1747 int *bits, float *energy)
1748 {
1749 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1750 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1751 int i;
1752 float cost = 0;
1753 float qenergy = 0.0f;
1754 int qc1, qc2, qc3, qc4;
1755 int curbits = 0;
1756
1757 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1758 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1759
1760 for (i = 0; i < size; i += 4) {
1761 const float *vec, *vec2;
1762 int curidx, curidx2;
1763 int *in_int = (int *)&in[i];
1764 float *in_pos = (float *)&in[i];
1765 float di0, di1, di2, di3;
1766 int t0, t1, t2, t3, t4, t5, t6, t7;
1767
1768 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1769 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1770 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1771 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1772
1773 __asm__ volatile (
1774 ".set push \n\t"
1775 ".set noreorder \n\t"
1776
1777 "ori %[t4], $zero, 4 \n\t"
1778 "slt %[t0], %[t4], %[qc1] \n\t"
1779 "slt %[t1], %[t4], %[qc2] \n\t"
1780 "slt %[t2], %[t4], %[qc3] \n\t"
1781 "slt %[t3], %[t4], %[qc4] \n\t"
1782 "movn %[qc1], %[t4], %[t0] \n\t"
1783 "movn %[qc2], %[t4], %[t1] \n\t"
1784 "movn %[qc3], %[t4], %[t2] \n\t"
1785 "movn %[qc4], %[t4], %[t3] \n\t"
1786 "lw %[t0], 0(%[in_int]) \n\t"
1787 "lw %[t1], 4(%[in_int]) \n\t"
1788 "lw %[t2], 8(%[in_int]) \n\t"
1789 "lw %[t3], 12(%[in_int]) \n\t"
1790 "srl %[t0], %[t0], 31 \n\t"
1791 "srl %[t1], %[t1], 31 \n\t"
1792 "srl %[t2], %[t2], 31 \n\t"
1793 "srl %[t3], %[t3], 31 \n\t"
1794 "subu %[t4], $zero, %[qc1] \n\t"
1795 "subu %[t5], $zero, %[qc2] \n\t"
1796 "subu %[t6], $zero, %[qc3] \n\t"
1797 "subu %[t7], $zero, %[qc4] \n\t"
1798 "movn %[qc1], %[t4], %[t0] \n\t"
1799 "movn %[qc2], %[t5], %[t1] \n\t"
1800 "movn %[qc3], %[t6], %[t2] \n\t"
1801 "movn %[qc4], %[t7], %[t3] \n\t"
1802
1803 ".set pop \n\t"
1804
1805 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1806 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1807 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1808 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1809 : [in_int]"r"(in_int)
1810 : "memory"
1811 );
1812
1813 curidx = 9 * qc1;
1814 curidx += qc2 + 40;
1815
1816 curidx2 = 9 * qc3;
1817 curidx2 += qc4 + 40;
1818
1819 curbits += p_bits[curidx];
1820 curbits += p_bits[curidx2];
1821
1822 vec = &p_codes[curidx*2];
1823 vec2 = &p_codes[curidx2*2];
1824
1825 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1826 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1827
1828 __asm__ volatile (
1829 ".set push \n\t"
1830 ".set noreorder \n\t"
1831
1832 "lwc1 $f0, 0(%[in_pos]) \n\t"
1833 "lwc1 $f1, 0(%[vec]) \n\t"
1834 "lwc1 $f2, 4(%[in_pos]) \n\t"
1835 "lwc1 $f3, 4(%[vec]) \n\t"
1836 "lwc1 $f4, 8(%[in_pos]) \n\t"
1837 "lwc1 $f5, 0(%[vec2]) \n\t"
1838 "lwc1 $f6, 12(%[in_pos]) \n\t"
1839 "lwc1 $f7, 4(%[vec2]) \n\t"
1840 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1841 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1842 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1843 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1844
1845 ".set pop \n\t"
1846
1847 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1848 [di2]"=&f"(di2), [di3]"=&f"(di3)
1849 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1850 [vec2]"r"(vec2), [IQ]"f"(IQ)
1851 : "$f0", "$f1", "$f2", "$f3",
1852 "$f4", "$f5", "$f6", "$f7",
1853 "memory"
1854 );
1855
1856 cost += di0 * di0 + di1 * di1
1857 + di2 * di2 + di3 * di3;
1858 }
1859
1860 if (bits)
1861 *bits = curbits;
1862 if (energy)
1863 *energy = qenergy * (IQ*IQ);
1864 return cost * lambda + curbits;
1865 }
1866
get_band_cost_UPAIR7_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy)1867 static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
1868 PutBitContext *pb, const float *in,
1869 const float *scaled, int size, int scale_idx,
1870 int cb, const float lambda, const float uplim,
1871 int *bits, float *energy)
1872 {
1873 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1874 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1875 int i;
1876 float cost = 0;
1877 float qenergy = 0.0f;
1878 int qc1, qc2, qc3, qc4;
1879 int curbits = 0;
1880
1881 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1882 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1883
1884 for (i = 0; i < size; i += 4) {
1885 const float *vec, *vec2;
1886 int curidx, curidx2, sign1, count1, sign2, count2;
1887 int *in_int = (int *)&in[i];
1888 float *in_pos = (float *)&in[i];
1889 float di0, di1, di2, di3;
1890 int t0, t1, t2, t3, t4;
1891
1892 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1893 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1894 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1895 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1896
1897 __asm__ volatile (
1898 ".set push \n\t"
1899 ".set noreorder \n\t"
1900
1901 "ori %[t4], $zero, 7 \n\t"
1902 "ori %[sign1], $zero, 0 \n\t"
1903 "ori %[sign2], $zero, 0 \n\t"
1904 "slt %[t0], %[t4], %[qc1] \n\t"
1905 "slt %[t1], %[t4], %[qc2] \n\t"
1906 "slt %[t2], %[t4], %[qc3] \n\t"
1907 "slt %[t3], %[t4], %[qc4] \n\t"
1908 "movn %[qc1], %[t4], %[t0] \n\t"
1909 "movn %[qc2], %[t4], %[t1] \n\t"
1910 "movn %[qc3], %[t4], %[t2] \n\t"
1911 "movn %[qc4], %[t4], %[t3] \n\t"
1912 "lw %[t0], 0(%[in_int]) \n\t"
1913 "lw %[t1], 4(%[in_int]) \n\t"
1914 "lw %[t2], 8(%[in_int]) \n\t"
1915 "lw %[t3], 12(%[in_int]) \n\t"
1916 "slt %[t0], %[t0], $zero \n\t"
1917 "movn %[sign1], %[t0], %[qc1] \n\t"
1918 "slt %[t2], %[t2], $zero \n\t"
1919 "movn %[sign2], %[t2], %[qc3] \n\t"
1920 "slt %[t1], %[t1], $zero \n\t"
1921 "sll %[t0], %[sign1], 1 \n\t"
1922 "or %[t0], %[t0], %[t1] \n\t"
1923 "movn %[sign1], %[t0], %[qc2] \n\t"
1924 "slt %[t3], %[t3], $zero \n\t"
1925 "sll %[t0], %[sign2], 1 \n\t"
1926 "or %[t0], %[t0], %[t3] \n\t"
1927 "movn %[sign2], %[t0], %[qc4] \n\t"
1928 "slt %[count1], $zero, %[qc1] \n\t"
1929 "slt %[t1], $zero, %[qc2] \n\t"
1930 "slt %[count2], $zero, %[qc3] \n\t"
1931 "slt %[t2], $zero, %[qc4] \n\t"
1932 "addu %[count1], %[count1], %[t1] \n\t"
1933 "addu %[count2], %[count2], %[t2] \n\t"
1934
1935 ".set pop \n\t"
1936
1937 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1938 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1939 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1940 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1941 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1942 [t4]"=&r"(t4)
1943 : [in_int]"r"(in_int)
1944 : "memory"
1945 );
1946
1947 curidx = 8 * qc1;
1948 curidx += qc2;
1949
1950 curidx2 = 8 * qc3;
1951 curidx2 += qc4;
1952
1953 curbits += p_bits[curidx];
1954 curbits += upair7_sign_bits[curidx];
1955 vec = &p_codes[curidx*2];
1956
1957 curbits += p_bits[curidx2];
1958 curbits += upair7_sign_bits[curidx2];
1959 vec2 = &p_codes[curidx2*2];
1960
1961 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1962 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1963
1964 __asm__ volatile (
1965 ".set push \n\t"
1966 ".set noreorder \n\t"
1967
1968 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1969 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1970 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1971 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1972 "abs.s %[di0], %[di0] \n\t"
1973 "abs.s %[di1], %[di1] \n\t"
1974 "abs.s %[di2], %[di2] \n\t"
1975 "abs.s %[di3], %[di3] \n\t"
1976 "lwc1 $f0, 0(%[vec]) \n\t"
1977 "lwc1 $f1, 4(%[vec]) \n\t"
1978 "lwc1 $f2, 0(%[vec2]) \n\t"
1979 "lwc1 $f3, 4(%[vec2]) \n\t"
1980 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1981 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1982 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1983 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1984
1985 ".set pop \n\t"
1986
1987 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1988 [di2]"=&f"(di2), [di3]"=&f"(di3)
1989 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1990 [vec2]"r"(vec2), [IQ]"f"(IQ)
1991 : "$f0", "$f1", "$f2", "$f3",
1992 "memory"
1993 );
1994
1995 cost += di0 * di0 + di1 * di1
1996 + di2 * di2 + di3 * di3;
1997 }
1998
1999 if (bits)
2000 *bits = curbits;
2001 if (energy)
2002 *energy = qenergy * (IQ*IQ);
2003 return cost * lambda + curbits;
2004 }
2005
get_band_cost_UPAIR12_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy)2006 static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
2007 PutBitContext *pb, const float *in,
2008 const float *scaled, int size, int scale_idx,
2009 int cb, const float lambda, const float uplim,
2010 int *bits, float *energy)
2011 {
2012 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2013 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2014 int i;
2015 float cost = 0;
2016 float qenergy = 0.0f;
2017 int qc1, qc2, qc3, qc4;
2018 int curbits = 0;
2019
2020 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
2021 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
2022
2023 for (i = 0; i < size; i += 4) {
2024 const float *vec, *vec2;
2025 int curidx, curidx2;
2026 int sign1, count1, sign2, count2;
2027 int *in_int = (int *)&in[i];
2028 float *in_pos = (float *)&in[i];
2029 float di0, di1, di2, di3;
2030 int t0, t1, t2, t3, t4;
2031
2032 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2033 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2034 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2035 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2036
2037 __asm__ volatile (
2038 ".set push \n\t"
2039 ".set noreorder \n\t"
2040
2041 "ori %[t4], $zero, 12 \n\t"
2042 "ori %[sign1], $zero, 0 \n\t"
2043 "ori %[sign2], $zero, 0 \n\t"
2044 "slt %[t0], %[t4], %[qc1] \n\t"
2045 "slt %[t1], %[t4], %[qc2] \n\t"
2046 "slt %[t2], %[t4], %[qc3] \n\t"
2047 "slt %[t3], %[t4], %[qc4] \n\t"
2048 "movn %[qc1], %[t4], %[t0] \n\t"
2049 "movn %[qc2], %[t4], %[t1] \n\t"
2050 "movn %[qc3], %[t4], %[t2] \n\t"
2051 "movn %[qc4], %[t4], %[t3] \n\t"
2052 "lw %[t0], 0(%[in_int]) \n\t"
2053 "lw %[t1], 4(%[in_int]) \n\t"
2054 "lw %[t2], 8(%[in_int]) \n\t"
2055 "lw %[t3], 12(%[in_int]) \n\t"
2056 "slt %[t0], %[t0], $zero \n\t"
2057 "movn %[sign1], %[t0], %[qc1] \n\t"
2058 "slt %[t2], %[t2], $zero \n\t"
2059 "movn %[sign2], %[t2], %[qc3] \n\t"
2060 "slt %[t1], %[t1], $zero \n\t"
2061 "sll %[t0], %[sign1], 1 \n\t"
2062 "or %[t0], %[t0], %[t1] \n\t"
2063 "movn %[sign1], %[t0], %[qc2] \n\t"
2064 "slt %[t3], %[t3], $zero \n\t"
2065 "sll %[t0], %[sign2], 1 \n\t"
2066 "or %[t0], %[t0], %[t3] \n\t"
2067 "movn %[sign2], %[t0], %[qc4] \n\t"
2068 "slt %[count1], $zero, %[qc1] \n\t"
2069 "slt %[t1], $zero, %[qc2] \n\t"
2070 "slt %[count2], $zero, %[qc3] \n\t"
2071 "slt %[t2], $zero, %[qc4] \n\t"
2072 "addu %[count1], %[count1], %[t1] \n\t"
2073 "addu %[count2], %[count2], %[t2] \n\t"
2074
2075 ".set pop \n\t"
2076
2077 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2078 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2079 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
2080 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
2081 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
2082 [t4]"=&r"(t4)
2083 : [in_int]"r"(in_int)
2084 : "memory"
2085 );
2086
2087 curidx = 13 * qc1;
2088 curidx += qc2;
2089
2090 curidx2 = 13 * qc3;
2091 curidx2 += qc4;
2092
2093 curbits += p_bits[curidx];
2094 curbits += p_bits[curidx2];
2095 curbits += upair12_sign_bits[curidx];
2096 curbits += upair12_sign_bits[curidx2];
2097 vec = &p_codes[curidx*2];
2098 vec2 = &p_codes[curidx2*2];
2099
2100 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
2101 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
2102
2103 __asm__ volatile (
2104 ".set push \n\t"
2105 ".set noreorder \n\t"
2106
2107 "lwc1 %[di0], 0(%[in_pos]) \n\t"
2108 "lwc1 %[di1], 4(%[in_pos]) \n\t"
2109 "lwc1 %[di2], 8(%[in_pos]) \n\t"
2110 "lwc1 %[di3], 12(%[in_pos]) \n\t"
2111 "abs.s %[di0], %[di0] \n\t"
2112 "abs.s %[di1], %[di1] \n\t"
2113 "abs.s %[di2], %[di2] \n\t"
2114 "abs.s %[di3], %[di3] \n\t"
2115 "lwc1 $f0, 0(%[vec]) \n\t"
2116 "lwc1 $f1, 4(%[vec]) \n\t"
2117 "lwc1 $f2, 0(%[vec2]) \n\t"
2118 "lwc1 $f3, 4(%[vec2]) \n\t"
2119 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
2120 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
2121 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
2122 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
2123
2124 ".set pop \n\t"
2125
2126 : [di0]"=&f"(di0), [di1]"=&f"(di1),
2127 [di2]"=&f"(di2), [di3]"=&f"(di3)
2128 : [in_pos]"r"(in_pos), [vec]"r"(vec),
2129 [vec2]"r"(vec2), [IQ]"f"(IQ)
2130 : "$f0", "$f1", "$f2", "$f3",
2131 "memory"
2132 );
2133
2134 cost += di0 * di0 + di1 * di1
2135 + di2 * di2 + di3 * di3;
2136 }
2137
2138 if (bits)
2139 *bits = curbits;
2140 if (energy)
2141 *energy = qenergy * (IQ*IQ);
2142 return cost * lambda + curbits;
2143 }
2144
get_band_cost_ESC_mips(struct AACEncContext * s,PutBitContext * pb,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy)2145 static float get_band_cost_ESC_mips(struct AACEncContext *s,
2146 PutBitContext *pb, const float *in,
2147 const float *scaled, int size, int scale_idx,
2148 int cb, const float lambda, const float uplim,
2149 int *bits, float *energy)
2150 {
2151 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2152 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2153 const float CLIPPED_ESCAPE = 165140.0f * IQ;
2154 int i;
2155 float cost = 0;
2156 float qenergy = 0.0f;
2157 int qc1, qc2, qc3, qc4;
2158 int curbits = 0;
2159
2160 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
2161 float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
2162
2163 for (i = 0; i < size; i += 4) {
2164 const float *vec, *vec2;
2165 int curidx, curidx2;
2166 float t1, t2, t3, t4, V;
2167 float di1, di2, di3, di4;
2168 int cond0, cond1, cond2, cond3;
2169 int c1, c2, c3, c4;
2170 int t6, t7;
2171
2172 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2173 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2174 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2175 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2176
2177 __asm__ volatile (
2178 ".set push \n\t"
2179 ".set noreorder \n\t"
2180
2181 "ori %[t6], $zero, 15 \n\t"
2182 "ori %[t7], $zero, 16 \n\t"
2183 "shll_s.w %[c1], %[qc1], 18 \n\t"
2184 "shll_s.w %[c2], %[qc2], 18 \n\t"
2185 "shll_s.w %[c3], %[qc3], 18 \n\t"
2186 "shll_s.w %[c4], %[qc4], 18 \n\t"
2187 "srl %[c1], %[c1], 18 \n\t"
2188 "srl %[c2], %[c2], 18 \n\t"
2189 "srl %[c3], %[c3], 18 \n\t"
2190 "srl %[c4], %[c4], 18 \n\t"
2191 "slt %[cond0], %[t6], %[qc1] \n\t"
2192 "slt %[cond1], %[t6], %[qc2] \n\t"
2193 "slt %[cond2], %[t6], %[qc3] \n\t"
2194 "slt %[cond3], %[t6], %[qc4] \n\t"
2195 "movn %[qc1], %[t7], %[cond0] \n\t"
2196 "movn %[qc2], %[t7], %[cond1] \n\t"
2197 "movn %[qc3], %[t7], %[cond2] \n\t"
2198 "movn %[qc4], %[t7], %[cond3] \n\t"
2199
2200 ".set pop \n\t"
2201
2202 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2203 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2204 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
2205 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
2206 [c1]"=&r"(c1), [c2]"=&r"(c2),
2207 [c3]"=&r"(c3), [c4]"=&r"(c4),
2208 [t6]"=&r"(t6), [t7]"=&r"(t7)
2209 );
2210
2211 curidx = 17 * qc1;
2212 curidx += qc2;
2213
2214 curidx2 = 17 * qc3;
2215 curidx2 += qc4;
2216
2217 curbits += p_bits[curidx];
2218 curbits += esc_sign_bits[curidx];
2219 vec = &p_codes[curidx*2];
2220
2221 curbits += p_bits[curidx2];
2222 curbits += esc_sign_bits[curidx2];
2223 vec2 = &p_codes[curidx2*2];
2224
2225 curbits += (av_log2(c1) * 2 - 3) & (-cond0);
2226 curbits += (av_log2(c2) * 2 - 3) & (-cond1);
2227 curbits += (av_log2(c3) * 2 - 3) & (-cond2);
2228 curbits += (av_log2(c4) * 2 - 3) & (-cond3);
2229
2230 t1 = fabsf(in[i ]);
2231 t2 = fabsf(in[i+1]);
2232 t3 = fabsf(in[i+2]);
2233 t4 = fabsf(in[i+3]);
2234
2235 if (cond0) {
2236 if (t1 >= CLIPPED_ESCAPE) {
2237 di1 = t1 - CLIPPED_ESCAPE;
2238 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2239 } else {
2240 di1 = t1 - (V = c1 * cbrtf(c1) * IQ);
2241 qenergy += V*V;
2242 }
2243 } else {
2244 di1 = t1 - (V = vec[0] * IQ);
2245 qenergy += V*V;
2246 }
2247
2248 if (cond1) {
2249 if (t2 >= CLIPPED_ESCAPE) {
2250 di2 = t2 - CLIPPED_ESCAPE;
2251 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2252 } else {
2253 di2 = t2 - (V = c2 * cbrtf(c2) * IQ);
2254 qenergy += V*V;
2255 }
2256 } else {
2257 di2 = t2 - (V = vec[1] * IQ);
2258 qenergy += V*V;
2259 }
2260
2261 if (cond2) {
2262 if (t3 >= CLIPPED_ESCAPE) {
2263 di3 = t3 - CLIPPED_ESCAPE;
2264 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2265 } else {
2266 di3 = t3 - (V = c3 * cbrtf(c3) * IQ);
2267 qenergy += V*V;
2268 }
2269 } else {
2270 di3 = t3 - (V = vec2[0] * IQ);
2271 qenergy += V*V;
2272 }
2273
2274 if (cond3) {
2275 if (t4 >= CLIPPED_ESCAPE) {
2276 di4 = t4 - CLIPPED_ESCAPE;
2277 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2278 } else {
2279 di4 = t4 - (V = c4 * cbrtf(c4) * IQ);
2280 qenergy += V*V;
2281 }
2282 } else {
2283 di4 = t4 - (V = vec2[1]*IQ);
2284 qenergy += V*V;
2285 }
2286
2287 cost += di1 * di1 + di2 * di2
2288 + di3 * di3 + di4 * di4;
2289 }
2290
2291 if (bits)
2292 *bits = curbits;
2293 return cost * lambda + curbits;
2294 }
2295
2296 static float (*const get_band_cost_arr[])(struct AACEncContext *s,
2297 PutBitContext *pb, const float *in,
2298 const float *scaled, int size, int scale_idx,
2299 int cb, const float lambda, const float uplim,
2300 int *bits, float *energy) = {
2301 get_band_cost_ZERO_mips,
2302 get_band_cost_SQUAD_mips,
2303 get_band_cost_SQUAD_mips,
2304 get_band_cost_UQUAD_mips,
2305 get_band_cost_UQUAD_mips,
2306 get_band_cost_SPAIR_mips,
2307 get_band_cost_SPAIR_mips,
2308 get_band_cost_UPAIR7_mips,
2309 get_band_cost_UPAIR7_mips,
2310 get_band_cost_UPAIR12_mips,
2311 get_band_cost_UPAIR12_mips,
2312 get_band_cost_ESC_mips,
2313 get_band_cost_NONE_mips, /* cb 12 doesn't exist */
2314 get_band_cost_ZERO_mips,
2315 get_band_cost_ZERO_mips,
2316 get_band_cost_ZERO_mips,
2317 };
2318
2319 #define get_band_cost( \
2320 s, pb, in, scaled, size, scale_idx, cb, \
2321 lambda, uplim, bits, energy) \
2322 get_band_cost_arr[cb]( \
2323 s, pb, in, scaled, size, scale_idx, cb, \
2324 lambda, uplim, bits, energy)
2325
quantize_band_cost(struct AACEncContext * s,const float * in,const float * scaled,int size,int scale_idx,int cb,const float lambda,const float uplim,int * bits,float * energy,int rtz)2326 static float quantize_band_cost(struct AACEncContext *s, const float *in,
2327 const float *scaled, int size, int scale_idx,
2328 int cb, const float lambda, const float uplim,
2329 int *bits, float *energy, int rtz)
2330 {
2331 return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
2332 }
2333
2334 #include "libavcodec/aacenc_quantization_misc.h"
2335
2336 #include "libavcodec/aaccoder_twoloop.h"
2337
search_for_ms_mips(AACEncContext * s,ChannelElement * cpe)2338 static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
2339 {
2340 int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
2341 uint8_t nextband0[128], nextband1[128];
2342 float M[128], S[128];
2343 float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
2344 const float lambda = s->lambda;
2345 const float mslambda = FFMIN(1.0f, lambda / 120.f);
2346 SingleChannelElement *sce0 = &cpe->ch[0];
2347 SingleChannelElement *sce1 = &cpe->ch[1];
2348 if (!cpe->common_window)
2349 return;
2350
2351 /** Scout out next nonzero bands */
2352 ff_init_nextband_map(sce0, nextband0);
2353 ff_init_nextband_map(sce1, nextband1);
2354
2355 prev_mid = sce0->sf_idx[0];
2356 prev_side = sce1->sf_idx[0];
2357 for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
2358 start = 0;
2359 for (g = 0; g < sce0->ics.num_swb; g++) {
2360 float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
2361 if (!cpe->is_mask[w*16+g])
2362 cpe->ms_mask[w*16+g] = 0;
2363 if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
2364 float Mmax = 0.0f, Smax = 0.0f;
2365
2366 /* Must compute mid/side SF and book for the whole window group */
2367 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2368 for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
2369 M[i] = (sce0->coeffs[start+(w+w2)*128+i]
2370 + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
2371 S[i] = M[i]
2372 - sce1->coeffs[start+(w+w2)*128+i];
2373 }
2374 abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2375 abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2376 for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
2377 Mmax = FFMAX(Mmax, M34[i]);
2378 Smax = FFMAX(Smax, S34[i]);
2379 }
2380 }
2381
2382 for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
2383 float dist1 = 0.0f, dist2 = 0.0f;
2384 int B0 = 0, B1 = 0;
2385 int minidx;
2386 int mididx, sididx;
2387 int midcb, sidcb;
2388
2389 minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
2390 mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
2391 sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
2392 if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
2393 && ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
2394 || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
2395 /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
2396 continue;
2397 }
2398
2399 midcb = find_min_book(Mmax, mididx);
2400 sidcb = find_min_book(Smax, sididx);
2401
2402 /* No CB can be zero */
2403 midcb = FFMAX(1,midcb);
2404 sidcb = FFMAX(1,sidcb);
2405
2406 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2407 FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
2408 FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
2409 float minthr = FFMIN(band0->threshold, band1->threshold);
2410 int b1,b2,b3,b4;
2411 for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
2412 M[i] = (sce0->coeffs[start+(w+w2)*128+i]
2413 + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
2414 S[i] = M[i]
2415 - sce1->coeffs[start+(w+w2)*128+i];
2416 }
2417
2418 abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2419 abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2420 abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2421 abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2422 dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
2423 L34,
2424 sce0->ics.swb_sizes[g],
2425 sce0->sf_idx[w*16+g],
2426 sce0->band_type[w*16+g],
2427 lambda / band0->threshold, INFINITY, &b1, NULL, 0);
2428 dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
2429 R34,
2430 sce1->ics.swb_sizes[g],
2431 sce1->sf_idx[w*16+g],
2432 sce1->band_type[w*16+g],
2433 lambda / band1->threshold, INFINITY, &b2, NULL, 0);
2434 dist2 += quantize_band_cost(s, M,
2435 M34,
2436 sce0->ics.swb_sizes[g],
2437 mididx,
2438 midcb,
2439 lambda / minthr, INFINITY, &b3, NULL, 0);
2440 dist2 += quantize_band_cost(s, S,
2441 S34,
2442 sce1->ics.swb_sizes[g],
2443 sididx,
2444 sidcb,
2445 mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
2446 B0 += b1+b2;
2447 B1 += b3+b4;
2448 dist1 -= b1+b2;
2449 dist2 -= b3+b4;
2450 }
2451 cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
2452 if (cpe->ms_mask[w*16+g]) {
2453 if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
2454 sce0->sf_idx[w*16+g] = mididx;
2455 sce1->sf_idx[w*16+g] = sididx;
2456 sce0->band_type[w*16+g] = midcb;
2457 sce1->band_type[w*16+g] = sidcb;
2458 } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
2459 /* ms_mask unneeded, and it confuses some decoders */
2460 cpe->ms_mask[w*16+g] = 0;
2461 }
2462 break;
2463 } else if (B1 > B0) {
2464 /* More boost won't fix this */
2465 break;
2466 }
2467 }
2468 }
2469 if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
2470 prev_mid = sce0->sf_idx[w*16+g];
2471 if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
2472 prev_side = sce1->sf_idx[w*16+g];
2473 start += sce0->ics.swb_sizes[g];
2474 }
2475 }
2476 }
2477 #endif /*HAVE_MIPSFPU */
2478
2479 #include "libavcodec/aaccoder_trellis.h"
2480
2481 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
2482 #endif /* HAVE_INLINE_ASM */
2483
ff_aac_coder_init_mips(AACEncContext * c)2484 void ff_aac_coder_init_mips(AACEncContext *c) {
2485 #if HAVE_INLINE_ASM
2486 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
2487 AACCoefficientsEncoder *e = c->coder;
2488 int option = c->options.coder;
2489
2490 if (option == 2) {
2491 e->quantize_and_encode_band = quantize_and_encode_band_mips;
2492 e->encode_window_bands_info = codebook_trellis_rate;
2493 #if HAVE_MIPSFPU
2494 e->search_for_quantizers = search_for_quantizers_twoloop;
2495 #endif /* HAVE_MIPSFPU */
2496 }
2497 #if HAVE_MIPSFPU
2498 e->search_for_ms = search_for_ms_mips;
2499 #endif /* HAVE_MIPSFPU */
2500 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
2501 #endif /* HAVE_INLINE_ASM */
2502 }
2503