1 /*
2 * Copyright (c) CMU 1993 Computer Science, Speech Group
3 * Chengxiang Lu and Alex Hauptmann
4 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5 * Copyright (c) 2009 Kenan Gillet
6 * Copyright (c) 2010 Martin Storsjo
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 /**
26 * @file
27 * G.722 ADPCM audio encoder
28 */
29
30 #include "libavutil/avassert.h"
31 #include "libavutil/channel_layout.h"
32 #include "avcodec.h"
33 #include "codec_internal.h"
34 #include "encode.h"
35 #include "internal.h"
36 #include "g722.h"
37 #include "libavutil/common.h"
38
39 #define FREEZE_INTERVAL 128
40
41 /* This is an arbitrary value. Allowing insanely large values leads to strange
42 problems, so we limit it to a reasonable value */
43 #define MAX_FRAME_SIZE 32768
44
45 /* We clip the value of avctx->trellis to prevent data type overflows and
46 undefined behavior. Using larger values is insanely slow anyway. */
47 #define MIN_TRELLIS 0
48 #define MAX_TRELLIS 16
49
g722_encode_close(AVCodecContext * avctx)50 static av_cold int g722_encode_close(AVCodecContext *avctx)
51 {
52 G722Context *c = avctx->priv_data;
53 int i;
54 for (i = 0; i < 2; i++) {
55 av_freep(&c->paths[i]);
56 av_freep(&c->node_buf[i]);
57 av_freep(&c->nodep_buf[i]);
58 }
59 return 0;
60 }
61
g722_encode_init(AVCodecContext * avctx)62 static av_cold int g722_encode_init(AVCodecContext * avctx)
63 {
64 G722Context *c = avctx->priv_data;
65
66 c->band[0].scale_factor = 8;
67 c->band[1].scale_factor = 2;
68 c->prev_samples_pos = 22;
69
70 if (avctx->frame_size) {
71 /* validate frame size */
72 if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
73 int new_frame_size;
74
75 if (avctx->frame_size == 1)
76 new_frame_size = 2;
77 else if (avctx->frame_size > MAX_FRAME_SIZE)
78 new_frame_size = MAX_FRAME_SIZE;
79 else
80 new_frame_size = avctx->frame_size - 1;
81
82 av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
83 "allowed. Using %d instead of %d\n", new_frame_size,
84 avctx->frame_size);
85 avctx->frame_size = new_frame_size;
86 }
87 } else {
88 /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
89 a common packet size for VoIP applications */
90 avctx->frame_size = 320;
91 }
92 avctx->initial_padding = 22;
93
94 if (avctx->trellis) {
95 /* validate trellis */
96 if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
97 int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
98 av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
99 "allowed. Using %d instead of %d\n", new_trellis,
100 avctx->trellis);
101 avctx->trellis = new_trellis;
102 }
103 if (avctx->trellis) {
104 int frontier = 1 << avctx->trellis;
105 int max_paths = frontier * FREEZE_INTERVAL;
106
107 for (int i = 0; i < 2; i++) {
108 c->paths[i] = av_calloc(max_paths, sizeof(**c->paths));
109 c->node_buf[i] = av_calloc(frontier, 2 * sizeof(**c->node_buf));
110 c->nodep_buf[i] = av_calloc(frontier, 2 * sizeof(**c->nodep_buf));
111 if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i])
112 return AVERROR(ENOMEM);
113 }
114 }
115 }
116
117 ff_g722dsp_init(&c->dsp);
118
119 return 0;
120 }
121
122 static const int16_t low_quant[33] = {
123 35, 72, 110, 150, 190, 233, 276, 323,
124 370, 422, 473, 530, 587, 650, 714, 786,
125 858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
126 1765, 1980, 2195, 2557, 2919
127 };
128
filter_samples(G722Context * c,const int16_t * samples,int * xlow,int * xhigh)129 static inline void filter_samples(G722Context *c, const int16_t *samples,
130 int *xlow, int *xhigh)
131 {
132 int xout[2];
133 c->prev_samples[c->prev_samples_pos++] = samples[0];
134 c->prev_samples[c->prev_samples_pos++] = samples[1];
135 c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
136 *xlow = xout[0] + xout[1] >> 14;
137 *xhigh = xout[0] - xout[1] >> 14;
138 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
139 memmove(c->prev_samples,
140 c->prev_samples + c->prev_samples_pos - 22,
141 22 * sizeof(c->prev_samples[0]));
142 c->prev_samples_pos = 22;
143 }
144 }
145
encode_high(const struct G722Band * state,int xhigh)146 static inline int encode_high(const struct G722Band *state, int xhigh)
147 {
148 int diff = av_clip_int16(xhigh - state->s_predictor);
149 int pred = 141 * state->scale_factor >> 8;
150 /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
151 return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
152 }
153
encode_low(const struct G722Band * state,int xlow)154 static inline int encode_low(const struct G722Band* state, int xlow)
155 {
156 int diff = av_clip_int16(xlow - state->s_predictor);
157 /* = diff >= 0 ? diff : -(diff + 1) */
158 int limit = diff ^ (diff >> (sizeof(diff)*8-1));
159 int i = 0;
160 limit = limit + 1 << 10;
161 if (limit > low_quant[8] * state->scale_factor)
162 i = 9;
163 while (i < 29 && limit > low_quant[i] * state->scale_factor)
164 i++;
165 return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
166 }
167
g722_encode_trellis(G722Context * c,int trellis,uint8_t * dst,int nb_samples,const int16_t * samples)168 static void g722_encode_trellis(G722Context *c, int trellis,
169 uint8_t *dst, int nb_samples,
170 const int16_t *samples)
171 {
172 int i, j, k;
173 int frontier = 1 << trellis;
174 struct TrellisNode **nodes[2];
175 struct TrellisNode **nodes_next[2];
176 int pathn[2] = {0, 0}, froze = -1;
177 struct TrellisPath *p[2];
178
179 for (i = 0; i < 2; i++) {
180 nodes[i] = c->nodep_buf[i];
181 nodes_next[i] = c->nodep_buf[i] + frontier;
182 memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
183 nodes[i][0] = c->node_buf[i] + frontier;
184 nodes[i][0]->ssd = 0;
185 nodes[i][0]->path = 0;
186 nodes[i][0]->state = c->band[i];
187 }
188
189 for (i = 0; i < nb_samples >> 1; i++) {
190 int xlow, xhigh;
191 struct TrellisNode *next[2];
192 int heap_pos[2] = {0, 0};
193
194 for (j = 0; j < 2; j++) {
195 next[j] = c->node_buf[j] + frontier*(i & 1);
196 memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
197 }
198
199 filter_samples(c, &samples[2*i], &xlow, &xhigh);
200
201 for (j = 0; j < frontier && nodes[0][j]; j++) {
202 /* Only k >> 2 affects the future adaptive state, therefore testing
203 * small steps that don't change k >> 2 is useless, the original
204 * value from encode_low is better than them. Since we step k
205 * in steps of 4, make sure range is a multiple of 4, so that
206 * we don't miss the original value from encode_low. */
207 int range = j < frontier/2 ? 4 : 0;
208 struct TrellisNode *cur_node = nodes[0][j];
209
210 int ilow = encode_low(&cur_node->state, xlow);
211
212 for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
213 int decoded, dec_diff, pos;
214 uint32_t ssd;
215 struct TrellisNode* node;
216
217 if (k < 0)
218 continue;
219
220 decoded = av_clip_intp2((cur_node->state.scale_factor *
221 ff_g722_low_inv_quant6[k] >> 10)
222 + cur_node->state.s_predictor, 14);
223 dec_diff = xlow - decoded;
224
225 #define STORE_NODE(index, UPDATE, VALUE)\
226 ssd = cur_node->ssd + dec_diff*dec_diff;\
227 /* Check for wraparound. Using 64 bit ssd counters would \
228 * be simpler, but is slower on x86 32 bit. */\
229 if (ssd < cur_node->ssd)\
230 continue;\
231 if (heap_pos[index] < frontier) {\
232 pos = heap_pos[index]++;\
233 av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
234 node = nodes_next[index][pos] = next[index]++;\
235 node->path = pathn[index]++;\
236 } else {\
237 /* Try to replace one of the leaf nodes with the new \
238 * one, but not always testing the same leaf position */\
239 pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
240 if (ssd >= nodes_next[index][pos]->ssd)\
241 continue;\
242 heap_pos[index]++;\
243 node = nodes_next[index][pos];\
244 }\
245 node->ssd = ssd;\
246 node->state = cur_node->state;\
247 UPDATE;\
248 c->paths[index][node->path].value = VALUE;\
249 c->paths[index][node->path].prev = cur_node->path;\
250 /* Sift the newly inserted node up in the heap to restore \
251 * the heap property */\
252 while (pos > 0) {\
253 int parent = (pos - 1) >> 1;\
254 if (nodes_next[index][parent]->ssd <= ssd)\
255 break;\
256 FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
257 nodes_next[index][pos]);\
258 pos = parent;\
259 }
260 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
261 }
262 }
263
264 for (j = 0; j < frontier && nodes[1][j]; j++) {
265 int ihigh;
266 struct TrellisNode *cur_node = nodes[1][j];
267
268 /* We don't try to get any initial guess for ihigh via
269 * encode_high - since there's only 4 possible values, test
270 * them all. Testing all of these gives a much, much larger
271 * gain than testing a larger range around ilow. */
272 for (ihigh = 0; ihigh < 4; ihigh++) {
273 int dhigh, decoded, dec_diff, pos;
274 uint32_t ssd;
275 struct TrellisNode* node;
276
277 dhigh = cur_node->state.scale_factor *
278 ff_g722_high_inv_quant[ihigh] >> 10;
279 decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
280 dec_diff = xhigh - decoded;
281
282 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
283 }
284 }
285
286 for (j = 0; j < 2; j++) {
287 FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
288
289 if (nodes[j][0]->ssd > (1 << 16)) {
290 for (k = 1; k < frontier && nodes[j][k]; k++)
291 nodes[j][k]->ssd -= nodes[j][0]->ssd;
292 nodes[j][0]->ssd = 0;
293 }
294 }
295
296 if (i == froze + FREEZE_INTERVAL) {
297 p[0] = &c->paths[0][nodes[0][0]->path];
298 p[1] = &c->paths[1][nodes[1][0]->path];
299 for (j = i; j > froze; j--) {
300 dst[j] = p[1]->value << 6 | p[0]->value;
301 p[0] = &c->paths[0][p[0]->prev];
302 p[1] = &c->paths[1][p[1]->prev];
303 }
304 froze = i;
305 pathn[0] = pathn[1] = 0;
306 memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
307 memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
308 }
309 }
310
311 p[0] = &c->paths[0][nodes[0][0]->path];
312 p[1] = &c->paths[1][nodes[1][0]->path];
313 for (j = i; j > froze; j--) {
314 dst[j] = p[1]->value << 6 | p[0]->value;
315 p[0] = &c->paths[0][p[0]->prev];
316 p[1] = &c->paths[1][p[1]->prev];
317 }
318 c->band[0] = nodes[0][0]->state;
319 c->band[1] = nodes[1][0]->state;
320 }
321
encode_byte(G722Context * c,uint8_t * dst,const int16_t * samples)322 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
323 const int16_t *samples)
324 {
325 int xlow, xhigh, ilow, ihigh;
326 filter_samples(c, samples, &xlow, &xhigh);
327 ihigh = encode_high(&c->band[1], xhigh);
328 ilow = encode_low (&c->band[0], xlow);
329 ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
330 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
331 ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
332 *dst = ihigh << 6 | ilow;
333 }
334
g722_encode_no_trellis(G722Context * c,uint8_t * dst,int nb_samples,const int16_t * samples)335 static void g722_encode_no_trellis(G722Context *c,
336 uint8_t *dst, int nb_samples,
337 const int16_t *samples)
338 {
339 int i;
340 for (i = 0; i < nb_samples; i += 2)
341 encode_byte(c, dst++, &samples[i]);
342 }
343
g722_encode_frame(AVCodecContext * avctx,AVPacket * avpkt,const AVFrame * frame,int * got_packet_ptr)344 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
345 const AVFrame *frame, int *got_packet_ptr)
346 {
347 G722Context *c = avctx->priv_data;
348 const int16_t *samples = (const int16_t *)frame->data[0];
349 int nb_samples, out_size, ret;
350
351 out_size = (frame->nb_samples + 1) / 2;
352 if ((ret = ff_get_encode_buffer(avctx, avpkt, out_size, 0)) < 0)
353 return ret;
354
355 nb_samples = frame->nb_samples - (frame->nb_samples & 1);
356
357 if (avctx->trellis)
358 g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
359 else
360 g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
361
362 /* handle last frame with odd frame_size */
363 if (nb_samples < frame->nb_samples) {
364 int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
365 encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
366 }
367
368 if (frame->pts != AV_NOPTS_VALUE)
369 avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
370 *got_packet_ptr = 1;
371 return 0;
372 }
373
374 const FFCodec ff_adpcm_g722_encoder = {
375 .p.name = "g722",
376 .p.long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
377 .p.type = AVMEDIA_TYPE_AUDIO,
378 .p.id = AV_CODEC_ID_ADPCM_G722,
379 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SMALL_LAST_FRAME,
380 .priv_data_size = sizeof(G722Context),
381 .init = g722_encode_init,
382 .close = g722_encode_close,
383 FF_CODEC_ENCODE_CB(g722_encode_frame),
384 .p.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
385 #if FF_API_OLD_CHANNEL_LAYOUT
386 .p.channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO, 0 },
387 #endif
388 .p.ch_layouts = (const AVChannelLayout[]){
389 AV_CHANNEL_LAYOUT_MONO, { 0 }
390 },
391 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
392 };
393