• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) CMU 1993 Computer Science, Speech Group
3  *                        Chengxiang Lu and Alex Hauptmann
4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5  * Copyright (c) 2009 Kenan Gillet
6  * Copyright (c) 2010 Martin Storsjo
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * G.722 ADPCM audio encoder
28  */
29 
30 #include "libavutil/avassert.h"
31 #include "libavutil/channel_layout.h"
32 #include "avcodec.h"
33 #include "codec_internal.h"
34 #include "encode.h"
35 #include "internal.h"
36 #include "g722.h"
37 #include "libavutil/common.h"
38 
39 #define FREEZE_INTERVAL 128
40 
41 /* This is an arbitrary value. Allowing insanely large values leads to strange
42    problems, so we limit it to a reasonable value */
43 #define MAX_FRAME_SIZE 32768
44 
45 /* We clip the value of avctx->trellis to prevent data type overflows and
46    undefined behavior. Using larger values is insanely slow anyway. */
47 #define MIN_TRELLIS 0
48 #define MAX_TRELLIS 16
49 
g722_encode_close(AVCodecContext * avctx)50 static av_cold int g722_encode_close(AVCodecContext *avctx)
51 {
52     G722Context *c = avctx->priv_data;
53     int i;
54     for (i = 0; i < 2; i++) {
55         av_freep(&c->paths[i]);
56         av_freep(&c->node_buf[i]);
57         av_freep(&c->nodep_buf[i]);
58     }
59     return 0;
60 }
61 
g722_encode_init(AVCodecContext * avctx)62 static av_cold int g722_encode_init(AVCodecContext * avctx)
63 {
64     G722Context *c = avctx->priv_data;
65 
66     c->band[0].scale_factor = 8;
67     c->band[1].scale_factor = 2;
68     c->prev_samples_pos = 22;
69 
70     if (avctx->frame_size) {
71         /* validate frame size */
72         if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
73             int new_frame_size;
74 
75             if (avctx->frame_size == 1)
76                 new_frame_size = 2;
77             else if (avctx->frame_size > MAX_FRAME_SIZE)
78                 new_frame_size = MAX_FRAME_SIZE;
79             else
80                 new_frame_size = avctx->frame_size - 1;
81 
82             av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
83                    "allowed. Using %d instead of %d\n", new_frame_size,
84                    avctx->frame_size);
85             avctx->frame_size = new_frame_size;
86         }
87     } else {
88         /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
89            a common packet size for VoIP applications */
90         avctx->frame_size = 320;
91     }
92     avctx->initial_padding = 22;
93 
94     if (avctx->trellis) {
95         /* validate trellis */
96         if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
97             int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
98             av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
99                    "allowed. Using %d instead of %d\n", new_trellis,
100                    avctx->trellis);
101             avctx->trellis = new_trellis;
102         }
103         if (avctx->trellis) {
104             int frontier = 1 << avctx->trellis;
105             int max_paths = frontier * FREEZE_INTERVAL;
106 
107             for (int i = 0; i < 2; i++) {
108                 c->paths[i]     = av_calloc(max_paths, sizeof(**c->paths));
109                 c->node_buf[i]  = av_calloc(frontier, 2 * sizeof(**c->node_buf));
110                 c->nodep_buf[i] = av_calloc(frontier, 2 * sizeof(**c->nodep_buf));
111                 if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i])
112                     return AVERROR(ENOMEM);
113             }
114         }
115     }
116 
117     ff_g722dsp_init(&c->dsp);
118 
119     return 0;
120 }
121 
122 static const int16_t low_quant[33] = {
123       35,   72,  110,  150,  190,  233,  276,  323,
124      370,  422,  473,  530,  587,  650,  714,  786,
125      858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
126     1765, 1980, 2195, 2557, 2919
127 };
128 
filter_samples(G722Context * c,const int16_t * samples,int * xlow,int * xhigh)129 static inline void filter_samples(G722Context *c, const int16_t *samples,
130                                   int *xlow, int *xhigh)
131 {
132     int xout[2];
133     c->prev_samples[c->prev_samples_pos++] = samples[0];
134     c->prev_samples[c->prev_samples_pos++] = samples[1];
135     c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
136     *xlow  = xout[0] + xout[1] >> 14;
137     *xhigh = xout[0] - xout[1] >> 14;
138     if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
139         memmove(c->prev_samples,
140                 c->prev_samples + c->prev_samples_pos - 22,
141                 22 * sizeof(c->prev_samples[0]));
142         c->prev_samples_pos = 22;
143     }
144 }
145 
encode_high(const struct G722Band * state,int xhigh)146 static inline int encode_high(const struct G722Band *state, int xhigh)
147 {
148     int diff = av_clip_int16(xhigh - state->s_predictor);
149     int pred = 141 * state->scale_factor >> 8;
150            /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
151     return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
152 }
153 
encode_low(const struct G722Band * state,int xlow)154 static inline int encode_low(const struct G722Band* state, int xlow)
155 {
156     int diff  = av_clip_int16(xlow - state->s_predictor);
157            /* = diff >= 0 ? diff : -(diff + 1) */
158     int limit = diff ^ (diff >> (sizeof(diff)*8-1));
159     int i = 0;
160     limit = limit + 1 << 10;
161     if (limit > low_quant[8] * state->scale_factor)
162         i = 9;
163     while (i < 29 && limit > low_quant[i] * state->scale_factor)
164         i++;
165     return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
166 }
167 
g722_encode_trellis(G722Context * c,int trellis,uint8_t * dst,int nb_samples,const int16_t * samples)168 static void g722_encode_trellis(G722Context *c, int trellis,
169                                 uint8_t *dst, int nb_samples,
170                                 const int16_t *samples)
171 {
172     int i, j, k;
173     int frontier = 1 << trellis;
174     struct TrellisNode **nodes[2];
175     struct TrellisNode **nodes_next[2];
176     int pathn[2] = {0, 0}, froze = -1;
177     struct TrellisPath *p[2];
178 
179     for (i = 0; i < 2; i++) {
180         nodes[i] = c->nodep_buf[i];
181         nodes_next[i] = c->nodep_buf[i] + frontier;
182         memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
183         nodes[i][0] = c->node_buf[i] + frontier;
184         nodes[i][0]->ssd = 0;
185         nodes[i][0]->path = 0;
186         nodes[i][0]->state = c->band[i];
187     }
188 
189     for (i = 0; i < nb_samples >> 1; i++) {
190         int xlow, xhigh;
191         struct TrellisNode *next[2];
192         int heap_pos[2] = {0, 0};
193 
194         for (j = 0; j < 2; j++) {
195             next[j] = c->node_buf[j] + frontier*(i & 1);
196             memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
197         }
198 
199         filter_samples(c, &samples[2*i], &xlow, &xhigh);
200 
201         for (j = 0; j < frontier && nodes[0][j]; j++) {
202             /* Only k >> 2 affects the future adaptive state, therefore testing
203              * small steps that don't change k >> 2 is useless, the original
204              * value from encode_low is better than them. Since we step k
205              * in steps of 4, make sure range is a multiple of 4, so that
206              * we don't miss the original value from encode_low. */
207             int range = j < frontier/2 ? 4 : 0;
208             struct TrellisNode *cur_node = nodes[0][j];
209 
210             int ilow = encode_low(&cur_node->state, xlow);
211 
212             for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
213                 int decoded, dec_diff, pos;
214                 uint32_t ssd;
215                 struct TrellisNode* node;
216 
217                 if (k < 0)
218                     continue;
219 
220                 decoded = av_clip_intp2((cur_node->state.scale_factor *
221                                   ff_g722_low_inv_quant6[k] >> 10)
222                                 + cur_node->state.s_predictor, 14);
223                 dec_diff = xlow - decoded;
224 
225 #define STORE_NODE(index, UPDATE, VALUE)\
226                 ssd = cur_node->ssd + dec_diff*dec_diff;\
227                 /* Check for wraparound. Using 64 bit ssd counters would \
228                  * be simpler, but is slower on x86 32 bit. */\
229                 if (ssd < cur_node->ssd)\
230                     continue;\
231                 if (heap_pos[index] < frontier) {\
232                     pos = heap_pos[index]++;\
233                     av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
234                     node = nodes_next[index][pos] = next[index]++;\
235                     node->path = pathn[index]++;\
236                 } else {\
237                     /* Try to replace one of the leaf nodes with the new \
238                      * one, but not always testing the same leaf position */\
239                     pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
240                     if (ssd >= nodes_next[index][pos]->ssd)\
241                         continue;\
242                     heap_pos[index]++;\
243                     node = nodes_next[index][pos];\
244                 }\
245                 node->ssd = ssd;\
246                 node->state = cur_node->state;\
247                 UPDATE;\
248                 c->paths[index][node->path].value = VALUE;\
249                 c->paths[index][node->path].prev = cur_node->path;\
250                 /* Sift the newly inserted node up in the heap to restore \
251                  * the heap property */\
252                 while (pos > 0) {\
253                     int parent = (pos - 1) >> 1;\
254                     if (nodes_next[index][parent]->ssd <= ssd)\
255                         break;\
256                     FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
257                                                 nodes_next[index][pos]);\
258                     pos = parent;\
259                 }
260                 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
261             }
262         }
263 
264         for (j = 0; j < frontier && nodes[1][j]; j++) {
265             int ihigh;
266             struct TrellisNode *cur_node = nodes[1][j];
267 
268             /* We don't try to get any initial guess for ihigh via
269              * encode_high - since there's only 4 possible values, test
270              * them all. Testing all of these gives a much, much larger
271              * gain than testing a larger range around ilow. */
272             for (ihigh = 0; ihigh < 4; ihigh++) {
273                 int dhigh, decoded, dec_diff, pos;
274                 uint32_t ssd;
275                 struct TrellisNode* node;
276 
277                 dhigh = cur_node->state.scale_factor *
278                         ff_g722_high_inv_quant[ihigh] >> 10;
279                 decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
280                 dec_diff = xhigh - decoded;
281 
282                 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
283             }
284         }
285 
286         for (j = 0; j < 2; j++) {
287             FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
288 
289             if (nodes[j][0]->ssd > (1 << 16)) {
290                 for (k = 1; k < frontier && nodes[j][k]; k++)
291                     nodes[j][k]->ssd -= nodes[j][0]->ssd;
292                 nodes[j][0]->ssd = 0;
293             }
294         }
295 
296         if (i == froze + FREEZE_INTERVAL) {
297             p[0] = &c->paths[0][nodes[0][0]->path];
298             p[1] = &c->paths[1][nodes[1][0]->path];
299             for (j = i; j > froze; j--) {
300                 dst[j] = p[1]->value << 6 | p[0]->value;
301                 p[0] = &c->paths[0][p[0]->prev];
302                 p[1] = &c->paths[1][p[1]->prev];
303             }
304             froze = i;
305             pathn[0] = pathn[1] = 0;
306             memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
307             memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
308         }
309     }
310 
311     p[0] = &c->paths[0][nodes[0][0]->path];
312     p[1] = &c->paths[1][nodes[1][0]->path];
313     for (j = i; j > froze; j--) {
314         dst[j] = p[1]->value << 6 | p[0]->value;
315         p[0] = &c->paths[0][p[0]->prev];
316         p[1] = &c->paths[1][p[1]->prev];
317     }
318     c->band[0] = nodes[0][0]->state;
319     c->band[1] = nodes[1][0]->state;
320 }
321 
encode_byte(G722Context * c,uint8_t * dst,const int16_t * samples)322 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
323                                          const int16_t *samples)
324 {
325     int xlow, xhigh, ilow, ihigh;
326     filter_samples(c, samples, &xlow, &xhigh);
327     ihigh = encode_high(&c->band[1], xhigh);
328     ilow  = encode_low (&c->band[0], xlow);
329     ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
330                                 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
331     ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
332     *dst = ihigh << 6 | ilow;
333 }
334 
g722_encode_no_trellis(G722Context * c,uint8_t * dst,int nb_samples,const int16_t * samples)335 static void g722_encode_no_trellis(G722Context *c,
336                                    uint8_t *dst, int nb_samples,
337                                    const int16_t *samples)
338 {
339     int i;
340     for (i = 0; i < nb_samples; i += 2)
341         encode_byte(c, dst++, &samples[i]);
342 }
343 
g722_encode_frame(AVCodecContext * avctx,AVPacket * avpkt,const AVFrame * frame,int * got_packet_ptr)344 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
345                              const AVFrame *frame, int *got_packet_ptr)
346 {
347     G722Context *c = avctx->priv_data;
348     const int16_t *samples = (const int16_t *)frame->data[0];
349     int nb_samples, out_size, ret;
350 
351     out_size = (frame->nb_samples + 1) / 2;
352     if ((ret = ff_get_encode_buffer(avctx, avpkt, out_size, 0)) < 0)
353         return ret;
354 
355     nb_samples = frame->nb_samples - (frame->nb_samples & 1);
356 
357     if (avctx->trellis)
358         g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
359     else
360         g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
361 
362     /* handle last frame with odd frame_size */
363     if (nb_samples < frame->nb_samples) {
364         int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
365         encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
366     }
367 
368     if (frame->pts != AV_NOPTS_VALUE)
369         avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
370     *got_packet_ptr = 1;
371     return 0;
372 }
373 
374 const FFCodec ff_adpcm_g722_encoder = {
375     .p.name          = "g722",
376     .p.long_name     = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
377     .p.type          = AVMEDIA_TYPE_AUDIO,
378     .p.id            = AV_CODEC_ID_ADPCM_G722,
379     .p.capabilities  = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SMALL_LAST_FRAME,
380     .priv_data_size  = sizeof(G722Context),
381     .init            = g722_encode_init,
382     .close           = g722_encode_close,
383     FF_CODEC_ENCODE_CB(g722_encode_frame),
384     .p.sample_fmts   = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
385 #if FF_API_OLD_CHANNEL_LAYOUT
386     .p.channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO, 0 },
387 #endif
388     .p.ch_layouts   = (const AVChannelLayout[]){
389         AV_CHANNEL_LAYOUT_MONO, { 0 }
390     },
391     .caps_internal   = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
392 };
393