1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 /**
20 * @file
21 * Audio join filter
22 *
23 * Join multiple audio inputs as different channels in
24 * a single output
25 */
26
27 #include "libavutil/avassert.h"
28 #include "libavutil/avstring.h"
29 #include "libavutil/channel_layout.h"
30 #include "libavutil/common.h"
31 #include "libavutil/opt.h"
32
33 #include "audio.h"
34 #include "avfilter.h"
35 #include "formats.h"
36 #include "filters.h"
37 #include "internal.h"
38
39 typedef struct ChannelMap {
40 int input; ///< input stream index
41 int in_channel_idx; ///< index of in_channel in the input stream data
42 enum AVChannel in_channel;
43 enum AVChannel out_channel;
44 } ChannelMap;
45
46 typedef struct JoinContext {
47 const AVClass *class;
48
49 int inputs;
50 char *map;
51 char *channel_layout_str;
52 AVChannelLayout ch_layout;
53
54 int64_t eof_pts;
55
56 ChannelMap *channels;
57
58 /**
59 * Temporary storage for input frames, until we get one on each input.
60 */
61 AVFrame **input_frames;
62
63 /**
64 * Temporary storage for buffer references, for assembling the output frame.
65 */
66 AVBufferRef **buffers;
67 } JoinContext;
68
69 #define OFFSET(x) offsetof(JoinContext, x)
70 #define A AV_OPT_FLAG_AUDIO_PARAM
71 #define F AV_OPT_FLAG_FILTERING_PARAM
72 static const AVOption join_options[] = {
73 { "inputs", "Number of input streams.", OFFSET(inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, INT_MAX, A|F },
74 { "channel_layout", "Channel layout of the "
75 "output stream.", OFFSET(channel_layout_str), AV_OPT_TYPE_STRING, {.str = "stereo"}, 0, 0, A|F },
76 { "map", "A comma-separated list of channels maps in the format "
77 "'input_stream.input_channel-output_channel.",
78 OFFSET(map), AV_OPT_TYPE_STRING, .flags = A|F },
79 { NULL }
80 };
81
82 #define MAP_SEPARATOR '|'
83
84 AVFILTER_DEFINE_CLASS(join);
85
parse_maps(AVFilterContext * ctx)86 static int parse_maps(AVFilterContext *ctx)
87 {
88 JoinContext *s = ctx->priv;
89 char *cur = s->map;
90
91 while (cur && *cur) {
92 ChannelMap *map;
93 char *sep, *next, *p;
94 int input_idx, out_ch_idx;
95
96 next = strchr(cur, MAP_SEPARATOR);
97 if (next)
98 *next++ = 0;
99
100 /* split the map into input and output parts */
101 if (!(sep = strchr(cur, '-'))) {
102 av_log(ctx, AV_LOG_ERROR, "Missing separator '-' in channel "
103 "map '%s'\n", cur);
104 return AVERROR(EINVAL);
105 }
106 *sep++ = 0;
107
108 /* parse output channel */
109 out_ch_idx = av_channel_layout_index_from_string(&s->ch_layout, sep);
110 if (out_ch_idx < 0) {
111 av_log(ctx, AV_LOG_ERROR, "Invalid output channel: %s.\n", sep);
112 return AVERROR(EINVAL);
113 }
114
115 map = &s->channels[out_ch_idx];
116
117 if (map->input >= 0) {
118 av_log(ctx, AV_LOG_ERROR, "Multiple maps for output channel "
119 "'%s'.\n", sep);
120 return AVERROR(EINVAL);
121 }
122
123 /* parse input channel */
124 input_idx = strtol(cur, &cur, 0);
125 if (input_idx < 0 || input_idx >= s->inputs) {
126 av_log(ctx, AV_LOG_ERROR, "Invalid input stream index: %d.\n",
127 input_idx);
128 return AVERROR(EINVAL);
129 }
130
131 if (*cur)
132 cur++;
133
134 map->input = input_idx;
135 map->in_channel = AV_CHAN_NONE;
136 map->in_channel_idx = strtol(cur, &p, 0);
137 if (p == cur) {
138 /* channel specifier is not a number, handle as channel name */
139 map->in_channel = av_channel_from_string(cur);
140 if (map->in_channel < 0) {
141 av_log(ctx, AV_LOG_ERROR, "Invalid input channel: %s.\n", cur);
142 return AVERROR(EINVAL);
143 }
144 } else if (map->in_channel_idx < 0) {
145 av_log(ctx, AV_LOG_ERROR, "Invalid input channel index: %d\n", map->in_channel_idx);
146 return AVERROR(EINVAL);
147 }
148
149 cur = next;
150 }
151 return 0;
152 }
153
join_init(AVFilterContext * ctx)154 static av_cold int join_init(AVFilterContext *ctx)
155 {
156 JoinContext *s = ctx->priv;
157 int ret, i;
158
159 ret = av_channel_layout_from_string(&s->ch_layout, s->channel_layout_str);
160 if (ret < 0) {
161 #if FF_API_OLD_CHANNEL_LAYOUT
162 uint64_t mask;
163 FF_DISABLE_DEPRECATION_WARNINGS
164 mask = av_get_channel_layout(s->channel_layout_str);
165 if (!mask) {
166 #endif
167 av_log(ctx, AV_LOG_ERROR, "Error parsing channel layout '%s'.\n",
168 s->channel_layout_str);
169 return AVERROR(EINVAL);
170 #if FF_API_OLD_CHANNEL_LAYOUT
171 }
172 FF_ENABLE_DEPRECATION_WARNINGS
173 av_log(ctx, AV_LOG_WARNING, "Channel layout '%s' uses a deprecated syntax.\n",
174 s->channel_layout_str);
175 av_channel_layout_from_mask(&s->ch_layout, mask);
176 #endif
177 }
178
179 s->channels = av_calloc(s->ch_layout.nb_channels, sizeof(*s->channels));
180 s->buffers = av_calloc(s->ch_layout.nb_channels, sizeof(*s->buffers));
181 s->input_frames = av_calloc(s->inputs, sizeof(*s->input_frames));
182 if (!s->channels || !s->buffers|| !s->input_frames)
183 return AVERROR(ENOMEM);
184
185 for (i = 0; i < s->ch_layout.nb_channels; i++) {
186 s->channels[i].out_channel = av_channel_layout_channel_from_index(&s->ch_layout, i);
187 s->channels[i].input = -1;
188 s->channels[i].in_channel_idx = -1;
189 s->channels[i].in_channel = AV_CHAN_NONE;
190 }
191
192 if ((ret = parse_maps(ctx)) < 0)
193 return ret;
194
195 for (i = 0; i < s->inputs; i++) {
196 AVFilterPad pad = { 0 };
197
198 pad.type = AVMEDIA_TYPE_AUDIO;
199 pad.name = av_asprintf("input%d", i);
200 if (!pad.name)
201 return AVERROR(ENOMEM);
202
203 if ((ret = ff_append_inpad_free_name(ctx, &pad)) < 0)
204 return ret;
205 }
206
207 return 0;
208 }
209
join_uninit(AVFilterContext * ctx)210 static av_cold void join_uninit(AVFilterContext *ctx)
211 {
212 JoinContext *s = ctx->priv;
213 int i;
214
215 for (i = 0; i < s->inputs && s->input_frames; i++) {
216 av_frame_free(&s->input_frames[i]);
217 }
218
219 av_freep(&s->channels);
220 av_freep(&s->buffers);
221 av_freep(&s->input_frames);
222 }
223
join_query_formats(AVFilterContext * ctx)224 static int join_query_formats(AVFilterContext *ctx)
225 {
226 JoinContext *s = ctx->priv;
227 AVFilterChannelLayouts *layouts = NULL;
228 int i, ret;
229
230 if ((ret = ff_add_channel_layout(&layouts, &s->ch_layout)) < 0 ||
231 (ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->incfg.channel_layouts)) < 0)
232 return ret;
233
234 for (i = 0; i < ctx->nb_inputs; i++) {
235 layouts = ff_all_channel_layouts();
236 if ((ret = ff_channel_layouts_ref(layouts, &ctx->inputs[i]->outcfg.channel_layouts)) < 0)
237 return ret;
238 }
239
240 if ((ret = ff_set_common_formats(ctx, ff_planar_sample_fmts())) < 0 ||
241 (ret = ff_set_common_all_samplerates(ctx)) < 0)
242 return ret;
243
244 return 0;
245 }
246
247 typedef struct ChannelList {
248 enum AVChannel *ch;
249 int nb_ch;
250 } ChannelList;
251
channel_list_pop(ChannelList * chl,int idx)252 static enum AVChannel channel_list_pop(ChannelList *chl, int idx)
253 {
254 enum AVChannel ret = chl->ch[idx];
255 memmove(chl->ch + idx, chl->ch + idx + 1,
256 (chl->nb_ch - idx - 1) * sizeof(*chl->ch));
257 chl->nb_ch--;
258 return ret;
259 }
260
261 /*
262 * If ch is present in chl, remove it from the list and return it.
263 * Otherwise return AV_CHAN_NONE.
264 */
channel_list_pop_ch(ChannelList * chl,enum AVChannel ch)265 static enum AVChannel channel_list_pop_ch(ChannelList *chl, enum AVChannel ch)
266 {
267 for (int i = 0; i < chl->nb_ch; i++)
268 if (chl->ch[i] == ch)
269 return channel_list_pop(chl, i);
270 return AV_CHAN_NONE;
271 }
272
guess_map_matching(AVFilterContext * ctx,ChannelMap * ch,ChannelList * inputs)273 static void guess_map_matching(AVFilterContext *ctx, ChannelMap *ch,
274 ChannelList *inputs)
275 {
276 int i;
277
278 for (i = 0; i < ctx->nb_inputs; i++) {
279 if (channel_list_pop_ch(&inputs[i], ch->out_channel) != AV_CHAN_NONE) {
280 ch->input = i;
281 ch->in_channel = ch->out_channel;
282 return;
283 }
284 }
285 }
286
guess_map_any(AVFilterContext * ctx,ChannelMap * ch,ChannelList * inputs)287 static void guess_map_any(AVFilterContext *ctx, ChannelMap *ch,
288 ChannelList *inputs)
289 {
290 int i;
291
292 for (i = 0; i < ctx->nb_inputs; i++) {
293 if (inputs[i].nb_ch) {
294 ch->input = i;
295 ch->in_channel = channel_list_pop(&inputs[i], 0);
296 return;
297 }
298 }
299 }
300
join_config_output(AVFilterLink * outlink)301 static int join_config_output(AVFilterLink *outlink)
302 {
303 AVFilterContext *ctx = outlink->src;
304 JoinContext *s = ctx->priv;
305 // unused channels from each input
306 ChannelList *inputs_unused;
307 char inbuf[64], outbuf[64];
308 int i, ret = 0;
309
310 /* initialize unused channel list for each input */
311 inputs_unused = av_calloc(ctx->nb_inputs, sizeof(*inputs_unused));
312 if (!inputs_unused)
313 return AVERROR(ENOMEM);
314 for (i = 0; i < ctx->nb_inputs; i++) {
315 AVFilterLink *inlink = ctx->inputs[i];
316 AVChannelLayout *chl = &inlink->ch_layout;
317 ChannelList *iu = &inputs_unused[i];
318
319 iu->nb_ch = chl->nb_channels;
320 iu->ch = av_malloc_array(iu->nb_ch, sizeof(*iu->ch));
321 if (!iu->ch) {
322 ret = AVERROR(ENOMEM);
323 goto fail;
324 }
325
326 for (int ch_idx = 0; ch_idx < iu->nb_ch; ch_idx++) {
327 iu->ch[ch_idx] = av_channel_layout_channel_from_index(chl, ch_idx);
328 if (iu->ch[ch_idx] < 0) {
329 /* no channel ordering information in this input,
330 * so don't auto-map from it */
331 iu->nb_ch = 0;
332 break;
333 }
334 }
335 }
336
337 /* process user-specified maps */
338 for (i = 0; i < s->ch_layout.nb_channels; i++) {
339 ChannelMap *ch = &s->channels[i];
340 AVFilterLink *inlink;
341 AVChannelLayout *ichl;
342 ChannelList *iu;
343
344 if (ch->input < 0)
345 continue;
346
347 inlink = ctx->inputs[ch->input];
348 ichl = &inlink->ch_layout;
349 iu = &inputs_unused[ch->input];
350
351 /* get the index for the channels defined by name */
352 if (ch->in_channel != AV_CHAN_NONE) {
353 ch->in_channel_idx = av_channel_layout_index_from_channel(ichl, ch->in_channel);
354 if (ch->in_channel_idx < 0) {
355 av_channel_name(inbuf, sizeof(inbuf), ch->in_channel);
356 av_log(ctx, AV_LOG_ERROR, "Requested channel %s is not present in "
357 "input stream #%d.\n", inbuf,
358 ch->input);
359 ret = AVERROR(EINVAL);
360 goto fail;
361 }
362 }
363
364 /* make sure channels specified by index actually exist */
365 if (ch->in_channel_idx >= ichl->nb_channels) {
366 av_log(ctx, AV_LOG_ERROR, "Requested channel with index %d is not "
367 "present in input stream #%d.\n", ch->in_channel_idx, ch->input);
368 ret = AVERROR(EINVAL);
369 goto fail;
370 }
371
372 channel_list_pop_ch(iu, av_channel_layout_channel_from_index(ichl, ch->in_channel_idx));
373 }
374
375 /* guess channel maps when not explicitly defined */
376 /* first try unused matching channels */
377 for (i = 0; i < s->ch_layout.nb_channels; i++) {
378 ChannelMap *ch = &s->channels[i];
379
380 if (ch->input < 0)
381 guess_map_matching(ctx, ch, inputs_unused);
382 }
383
384 /* if the above failed, try to find _any_ unused input channel */
385 for (i = 0; i < s->ch_layout.nb_channels; i++) {
386 ChannelMap *ch = &s->channels[i];
387
388 if (ch->input < 0)
389 guess_map_any(ctx, ch, inputs_unused);
390
391 if (ch->input < 0) {
392 av_channel_name(outbuf, sizeof(outbuf), ch->out_channel);
393 av_log(ctx, AV_LOG_ERROR, "Could not find input channel for "
394 "output channel '%s'.\n",
395 outbuf);
396 ret = AVERROR(EINVAL);
397 goto fail;
398 }
399
400 if (ch->in_channel != AV_CHAN_NONE) {
401 ch->in_channel_idx = av_channel_layout_index_from_channel(
402 &ctx->inputs[ch->input]->ch_layout, ch->in_channel);
403 }
404
405 av_assert0(ch->in_channel_idx >= 0);
406 }
407
408 /* print mappings */
409 av_log(ctx, AV_LOG_VERBOSE, "mappings: ");
410 for (i = 0; i < s->ch_layout.nb_channels; i++) {
411 ChannelMap *ch = &s->channels[i];
412 AVFilterLink *inlink = ctx->inputs[ch->input];
413 AVChannelLayout *ichl = &inlink->ch_layout;
414 enum AVChannel in_ch = av_channel_layout_channel_from_index(
415 ichl, ch->in_channel_idx);
416
417 av_channel_name(inbuf, sizeof(inbuf), in_ch);
418 av_channel_name(outbuf, sizeof(outbuf), ch->out_channel);
419 av_log(ctx, AV_LOG_VERBOSE, "%d.%s(%d) => %s(%d) ", ch->input,
420 inbuf, ch->in_channel_idx,
421 outbuf, i);
422 }
423 av_log(ctx, AV_LOG_VERBOSE, "\n");
424
425 for (i = 0; i < ctx->nb_inputs; i++) {
426 if (inputs_unused[i].nb_ch == ctx->inputs[i]->ch_layout.nb_channels)
427 av_log(ctx, AV_LOG_WARNING, "No channels are used from input "
428 "stream %d.\n", i);
429 }
430
431 fail:
432 for (i = 0; i < ctx->nb_inputs; i++)
433 av_freep(&inputs_unused[i].ch);
434 av_freep(&inputs_unused);
435 return ret;
436 }
437
try_push_frame(AVFilterContext * ctx)438 static int try_push_frame(AVFilterContext *ctx)
439 {
440 AVFilterLink *outlink = ctx->outputs[0];
441 JoinContext *s = ctx->priv;
442 AVFrame *frame;
443 int linesize = INT_MAX;
444 int nb_samples = INT_MAX;
445 int nb_buffers = 0;
446 int i, j, ret;
447
448 for (i = 0; i < ctx->nb_inputs; i++) {
449 if (!s->input_frames[i]) {
450 nb_samples = 0;
451 break;
452 } else {
453 nb_samples = FFMIN(nb_samples, s->input_frames[i]->nb_samples);
454 }
455 }
456 if (!nb_samples)
457 goto eof;
458
459 /* setup the output frame */
460 frame = av_frame_alloc();
461 if (!frame)
462 return AVERROR(ENOMEM);
463 if (s->ch_layout.nb_channels > FF_ARRAY_ELEMS(frame->data)) {
464 frame->extended_data = av_calloc(s->ch_layout.nb_channels,
465 sizeof(*frame->extended_data));
466 if (!frame->extended_data) {
467 ret = AVERROR(ENOMEM);
468 goto fail;
469 }
470 }
471
472 /* copy the data pointers */
473 for (i = 0; i < s->ch_layout.nb_channels; i++) {
474 ChannelMap *ch = &s->channels[i];
475 AVFrame *cur = s->input_frames[ch->input];
476 AVBufferRef *buf;
477
478 frame->extended_data[i] = cur->extended_data[ch->in_channel_idx];
479 linesize = FFMIN(linesize, cur->linesize[0]);
480
481 /* add the buffer where this plan is stored to the list if it's
482 * not already there */
483 buf = av_frame_get_plane_buffer(cur, ch->in_channel_idx);
484 if (!buf) {
485 ret = AVERROR(EINVAL);
486 goto fail;
487 }
488 for (j = 0; j < nb_buffers; j++)
489 if (s->buffers[j]->buffer == buf->buffer)
490 break;
491 if (j == i)
492 s->buffers[nb_buffers++] = buf;
493 }
494
495 /* create references to the buffers we copied to output */
496 if (nb_buffers > FF_ARRAY_ELEMS(frame->buf)) {
497 frame->nb_extended_buf = nb_buffers - FF_ARRAY_ELEMS(frame->buf);
498 frame->extended_buf = av_calloc(frame->nb_extended_buf,
499 sizeof(*frame->extended_buf));
500 if (!frame->extended_buf) {
501 frame->nb_extended_buf = 0;
502 ret = AVERROR(ENOMEM);
503 goto fail;
504 }
505 }
506 for (i = 0; i < FFMIN(FF_ARRAY_ELEMS(frame->buf), nb_buffers); i++) {
507 frame->buf[i] = av_buffer_ref(s->buffers[i]);
508 if (!frame->buf[i]) {
509 ret = AVERROR(ENOMEM);
510 goto fail;
511 }
512 }
513 for (i = 0; i < frame->nb_extended_buf; i++) {
514 frame->extended_buf[i] = av_buffer_ref(s->buffers[i +
515 FF_ARRAY_ELEMS(frame->buf)]);
516 if (!frame->extended_buf[i]) {
517 ret = AVERROR(ENOMEM);
518 goto fail;
519 }
520 }
521
522 frame->nb_samples = nb_samples;
523 #if FF_API_OLD_CHANNEL_LAYOUT
524 FF_DISABLE_DEPRECATION_WARNINGS
525 frame->channel_layout = outlink->channel_layout;
526 frame->channels = outlink->ch_layout.nb_channels;
527 FF_ENABLE_DEPRECATION_WARNINGS
528 #endif
529 if ((ret = av_channel_layout_copy(&frame->ch_layout, &outlink->ch_layout)) < 0)
530 return ret;
531 frame->sample_rate = outlink->sample_rate;
532 frame->format = outlink->format;
533 frame->pts = s->input_frames[0]->pts;
534 frame->linesize[0] = linesize;
535 if (frame->data != frame->extended_data) {
536 memcpy(frame->data, frame->extended_data, sizeof(*frame->data) *
537 FFMIN(FF_ARRAY_ELEMS(frame->data), s->ch_layout.nb_channels));
538 }
539
540 s->eof_pts = frame->pts + av_rescale_q(frame->nb_samples,
541 av_make_q(1, outlink->sample_rate),
542 outlink->time_base);
543 ret = ff_filter_frame(outlink, frame);
544
545 for (i = 0; i < ctx->nb_inputs; i++)
546 av_frame_free(&s->input_frames[i]);
547
548 return ret;
549
550 fail:
551 av_frame_free(&frame);
552 return ret;
553 eof:
554 for (i = 0; i < ctx->nb_inputs; i++) {
555 if (ff_outlink_get_status(ctx->inputs[i]) &&
556 ff_inlink_queued_samples(ctx->inputs[i]) <= 0 &&
557 !s->input_frames[i]) {
558 ff_outlink_set_status(outlink, AVERROR_EOF, s->eof_pts);
559 }
560 }
561
562 return 0;
563 }
564
activate(AVFilterContext * ctx)565 static int activate(AVFilterContext *ctx)
566 {
567 JoinContext *s = ctx->priv;
568 int i, ret, status;
569 int nb_samples = 0;
570 int64_t pts;
571
572 FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx);
573
574 if (!s->input_frames[0]) {
575 ret = ff_inlink_consume_frame(ctx->inputs[0], &s->input_frames[0]);
576 if (ret < 0) {
577 return ret;
578 } else if (ret == 0 && ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts)) {
579 ff_outlink_set_status(ctx->outputs[0], status, s->eof_pts);
580 return 0;
581 }
582
583 if (!s->input_frames[0] && ff_outlink_frame_wanted(ctx->outputs[0])) {
584 ff_inlink_request_frame(ctx->inputs[0]);
585 return 0;
586 }
587 }
588
589 if (s->input_frames[0])
590 nb_samples = s->input_frames[0]->nb_samples;
591
592 for (i = 1; i < ctx->nb_inputs && nb_samples > 0; i++) {
593 if (s->input_frames[i])
594 continue;
595 ret = ff_inlink_consume_samples(ctx->inputs[i], nb_samples, nb_samples, &s->input_frames[i]);
596 if (ret < 0) {
597 return ret;
598 } else if (ff_inlink_acknowledge_status(ctx->inputs[i], &status, &pts)) {
599 ff_outlink_set_status(ctx->outputs[0], status, pts);
600 return 0;
601 }
602
603 if (!s->input_frames[i]) {
604 ff_inlink_request_frame(ctx->inputs[i]);
605 return 0;
606 }
607 }
608
609 return try_push_frame(ctx);
610 }
611
612 static const AVFilterPad avfilter_af_join_outputs[] = {
613 {
614 .name = "default",
615 .type = AVMEDIA_TYPE_AUDIO,
616 .config_props = join_config_output,
617 },
618 };
619
620 const AVFilter ff_af_join = {
621 .name = "join",
622 .description = NULL_IF_CONFIG_SMALL("Join multiple audio streams into "
623 "multi-channel output."),
624 .priv_size = sizeof(JoinContext),
625 .priv_class = &join_class,
626 .init = join_init,
627 .uninit = join_uninit,
628 .activate = activate,
629 .inputs = NULL,
630 FILTER_OUTPUTS(avfilter_af_join_outputs),
631 FILTER_QUERY_FUNC(join_query_formats),
632 .flags = AVFILTER_FLAG_DYNAMIC_INPUTS,
633 };
634