1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include <stdint.h>
20 #include "libavutil/avutil.h"
21 #include "mathops.h"
22
23 #undef FUNC
24 #undef sum_type
25 #undef MUL
26 #undef CLIP
27 #undef FSUF
28
29 #define FUNC(n) AV_JOIN(n ## _, SAMPLE_SIZE)
30
31 #if SAMPLE_SIZE == 32
32 # define sum_type int64_t
33 # define MUL(a, b) MUL64(a, b)
34 # define CLIP(x) av_clipl_int32(x)
35 #else
36 # define sum_type int32_t
37 # define MUL(a, b) ((a) * (b))
38 # define CLIP(x) (x)
39 #endif
40
41 #define LPC1(x) { \
42 int c = coefs[(x)-1]; \
43 p0 += MUL(c, s); \
44 s = smp[i-(x)+1]; \
45 p1 += MUL(c, s); \
46 }
47
FUNC(lpc_encode_unrolled)48 static av_always_inline void FUNC(lpc_encode_unrolled)(int32_t *res,
49 const int32_t *smp, int len, int order,
50 const int32_t *coefs, int shift, int big)
51 {
52 int i;
53 for (i = order; i < len; i += 2) {
54 int s = smp[i-order];
55 sum_type p0 = 0, p1 = 0;
56 if (big) {
57 switch (order) {
58 case 32: LPC1(32)
59 case 31: LPC1(31)
60 case 30: LPC1(30)
61 case 29: LPC1(29)
62 case 28: LPC1(28)
63 case 27: LPC1(27)
64 case 26: LPC1(26)
65 case 25: LPC1(25)
66 case 24: LPC1(24)
67 case 23: LPC1(23)
68 case 22: LPC1(22)
69 case 21: LPC1(21)
70 case 20: LPC1(20)
71 case 19: LPC1(19)
72 case 18: LPC1(18)
73 case 17: LPC1(17)
74 case 16: LPC1(16)
75 case 15: LPC1(15)
76 case 14: LPC1(14)
77 case 13: LPC1(13)
78 case 12: LPC1(12)
79 case 11: LPC1(11)
80 case 10: LPC1(10)
81 case 9: LPC1( 9)
82 LPC1( 8)
83 LPC1( 7)
84 LPC1( 6)
85 LPC1( 5)
86 LPC1( 4)
87 LPC1( 3)
88 LPC1( 2)
89 LPC1( 1)
90 }
91 } else {
92 switch (order) {
93 case 8: LPC1( 8)
94 case 7: LPC1( 7)
95 case 6: LPC1( 6)
96 case 5: LPC1( 5)
97 case 4: LPC1( 4)
98 case 3: LPC1( 3)
99 case 2: LPC1( 2)
100 case 1: LPC1( 1)
101 }
102 }
103 res[i ] = smp[i ] - CLIP(p0 >> shift);
104 res[i+1] = smp[i+1] - CLIP(p1 >> shift);
105 }
106 }
107
FUNC(flac_lpc_encode_c)108 static void FUNC(flac_lpc_encode_c)(int32_t *res, const int32_t *smp, int len,
109 int order, const int32_t *coefs, int shift)
110 {
111 int i;
112 for (i = 0; i < order; i++)
113 res[i] = smp[i];
114 #if CONFIG_SMALL
115 for (i = order; i < len; i += 2) {
116 int j;
117 int s = smp[i];
118 sum_type p0 = 0, p1 = 0;
119 for (j = 0; j < order; j++) {
120 int c = coefs[j];
121 p1 += MUL(c, s);
122 s = smp[i-j-1];
123 p0 += MUL(c, s);
124 }
125 res[i ] = smp[i ] - CLIP(p0 >> shift);
126 res[i+1] = smp[i+1] - CLIP(p1 >> shift);
127 }
128 #else
129 switch (order) {
130 case 1: FUNC(lpc_encode_unrolled)(res, smp, len, 1, coefs, shift, 0); break;
131 case 2: FUNC(lpc_encode_unrolled)(res, smp, len, 2, coefs, shift, 0); break;
132 case 3: FUNC(lpc_encode_unrolled)(res, smp, len, 3, coefs, shift, 0); break;
133 case 4: FUNC(lpc_encode_unrolled)(res, smp, len, 4, coefs, shift, 0); break;
134 case 5: FUNC(lpc_encode_unrolled)(res, smp, len, 5, coefs, shift, 0); break;
135 case 6: FUNC(lpc_encode_unrolled)(res, smp, len, 6, coefs, shift, 0); break;
136 case 7: FUNC(lpc_encode_unrolled)(res, smp, len, 7, coefs, shift, 0); break;
137 case 8: FUNC(lpc_encode_unrolled)(res, smp, len, 8, coefs, shift, 0); break;
138 default: FUNC(lpc_encode_unrolled)(res, smp, len, order, coefs, shift, 1); break;
139 }
140 #endif
141 }
142
143 /* Comment for clarity/de-obfuscation.
144 *
145 * for (int i = order; i < len; i++) {
146 * int32_t p = 0;
147 * for (int j = 0; j < order; j++) {
148 * int c = coefs[j];
149 * int s = smp[(i-1)-j];
150 * p += c*s;
151 * }
152 * res[i] = smp[i] - (p >> shift);
153 * }
154 *
155 * The CONFIG_SMALL code above simplifies to this, in the case of SAMPLE_SIZE
156 * not being equal to 32 (at the present time that means for 16-bit audio). The
157 * code above does 2 samples per iteration. Commit bfdd5bc (made all the way
158 * back in 2007) says that way is faster.
159 */
160