• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * AV3A Demuxer
3  *
4  * Copyright (c) 2024 Shuai Liu <cqliushuai@outlook.com>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 #include "avformat.h"
23 #include "avio_internal.h"
24 #include "internal.h"
25 #include "rawdec.h"
26 #include "libavutil/opt.h"
27 #include "libavutil/avassert.h"
28 #include "libavutil/intreadwrite.h"
29 #include "libavutil/channel_layout.h"
30 #include "libavcodec/get_bits.h"
31 #include "libavcodec/av3a.h"
32 #include <string.h>
33 
34 typedef struct {
35     uint8_t audio_codec_id;
36     uint8_t sampling_frequency_index;
37     uint8_t nn_type;
38     uint8_t content_type;
39     uint8_t channel_number_index;
40     uint8_t number_objects;
41     uint8_t hoa_order;
42     uint8_t resolution_index;
43     uint16_t total_bitrate_kbps;
44 } Av3aFormatContext;
45 
av3a_read_aatf_frame_header(const uint8_t * buf,AATFHeaderInfo * hdf)46 static int av3a_read_aatf_frame_header(const uint8_t *buf, AATFHeaderInfo *hdf)
47 {
48     int ret = 0;
49     uint16_t sync_word;
50     GetBitContext gb;
51 
52     if ((!buf) || (!hdf)) {
53         return AVERROR(ENOMEM);
54     }
55 
56     hdf->nb_channels = 0;
57     hdf->nb_objects  = 0;
58 
59     /* (aatf header size + padding size) + AV_INPUT_BUFFER_PADDING_SIZE */
60     if ((ret = init_get_bits8(&gb, buf, (AV3A_MAX_NBYTES_HEADER + 7))) < 0) {
61         return ret;
62     }
63 
64     sync_word = get_bits(&gb, 12);
65     if (sync_word != AV3A_AUDIO_SYNC_WORD) {
66         return AVERROR_INVALIDDATA;
67     }
68 
69     /* codec id */
70     hdf->audio_codec_id = get_bits(&gb, 4);
71     if (hdf->audio_codec_id != AV3A_LOSSY_CODEC_ID) {
72         return AVERROR_INVALIDDATA;
73     }
74 
75     /* anc data */
76     hdf->anc_data = get_bits(&gb, 1);
77     if (hdf->anc_data) {
78         return AVERROR_INVALIDDATA;
79     }
80 
81     /* neural network type */
82     hdf->nn_type = get_bits(&gb, 3);
83     if ((hdf->nn_type > AV3A_LC_NN_TYPE) || (hdf->nn_type < AV3A_BASELINE_NN_TYPE)) {
84         return AVERROR_INVALIDDATA;
85     }
86 
87     /* coding profile */
88     hdf->coding_profile = get_bits(&gb, 3);
89 
90     /* sampling rate */
91     hdf->sampling_frequency_index = get_bits(&gb, 4);
92     if ((hdf->sampling_frequency_index >= AV3A_FS_TABLE_SIZE) || (hdf->sampling_frequency_index < 0)) {
93         return AVERROR_INVALIDDATA;
94     }
95     hdf->sampling_rate = ff_av3a_sampling_rate_table[hdf->sampling_frequency_index];
96 
97     skip_bits(&gb, 8);
98 
99     if (hdf->coding_profile == AV3A_BASE_PROFILE) {
100         hdf->content_type         = AV3A_CHANNEL_BASED_TYPE;
101         hdf->channel_number_index = get_bits(&gb, 7);
102         if ((hdf->channel_number_index > CHANNEL_CONFIG_MC_7_1_4) ||
103             (hdf->channel_number_index == CHANNEL_CONFIG_MC_10_2) ||
104             (hdf->channel_number_index == CHANNEL_CONFIG_MC_22_2) ||
105             (hdf->channel_number_index < CHANNEL_CONFIG_MONO)) {
106             return AVERROR_INVALIDDATA;
107         }
108         hdf->nb_channels = ff_av3a_channels_map_table[hdf->channel_number_index].channels;
109     } else if (hdf->coding_profile == AV3A_OBJECT_METADATA_PROFILE) {
110         hdf->soundbed_type = get_bits(&gb, 2);
111         if (hdf->soundbed_type == 0) {
112             hdf->content_type              = AV3A_OBJECT_BASED_TYPE;
113             hdf->object_channel_number     = get_bits(&gb, 7);
114             if (hdf->object_channel_number < 0) {
115                 return AVERROR_INVALIDDATA;
116             }
117             hdf->bitrate_index_per_channel = get_bits(&gb, 4);
118             if ((hdf->bitrate_index_per_channel >= AV3A_BITRATE_TABLE_SIZE) || (hdf->bitrate_index_per_channel < 0)) {
119                 return AVERROR_INVALIDDATA;
120             }
121             hdf->nb_objects    = hdf->object_channel_number + 1;
122             hdf->total_bitrate = ff_av3a_bitrate_map_table[CHANNEL_CONFIG_MONO].bitrate_table[hdf->bitrate_index_per_channel] * hdf->nb_objects;
123         } else if (hdf->soundbed_type == 1) {
124             hdf->content_type = AV3A_CHANNEL_OBJECT_TYPE;
125             hdf->channel_number_index = get_bits(&gb, 7);
126             if ((hdf->channel_number_index > CHANNEL_CONFIG_MC_7_1_4) ||
127                 (hdf->channel_number_index == CHANNEL_CONFIG_MC_10_2) ||
128                 (hdf->channel_number_index == CHANNEL_CONFIG_MC_22_2) ||
129                 (hdf->channel_number_index < CHANNEL_CONFIG_STEREO)) {
130                 return AVERROR_INVALIDDATA;
131             }
132             hdf->nb_channels   = ff_av3a_channels_map_table[hdf->channel_number_index].channels;
133             hdf->bitrate_index = get_bits(&gb, 4);
134             if ((hdf->bitrate_index >= AV3A_BITRATE_TABLE_SIZE) || (hdf->bitrate_index < 0)) {
135                 return AVERROR_INVALIDDATA;
136             }
137 
138             hdf->object_channel_number = get_bits(&gb, 7);
139             if (hdf->object_channel_number < 0) {
140                 return AVERROR_INVALIDDATA;
141             }
142             hdf->nb_objects                = hdf->object_channel_number + 1;
143             hdf->bitrate_index_per_channel = get_bits(&gb, 4);
144             if ((hdf->bitrate_index_per_channel >= AV3A_BITRATE_TABLE_SIZE) || (hdf->bitrate_index_per_channel < 0)) {
145                 return AVERROR_INVALIDDATA;
146             }
147 
148             hdf->total_bitrate = ff_av3a_bitrate_map_table[hdf->channel_number_index].bitrate_table[hdf->bitrate_index] +
149                 ff_av3a_bitrate_map_table[CHANNEL_CONFIG_MONO].bitrate_table[hdf->bitrate_index_per_channel] * hdf->nb_objects;
150         } else {
151             return AVERROR_INVALIDDATA;
152         }
153     } else if (hdf->coding_profile == AV3A_AMBISONIC_PROFILE) {
154         hdf->content_type = AV3A_AMBISONIC_TYPE;
155         hdf->order        = get_bits(&gb, 4);
156         hdf->hoa_order    = hdf->order + 1;
157 
158         switch (hdf->hoa_order) {
159         case AV3A_AMBISONIC_FIRST_ORDER:
160             hdf->channel_number_index = CHANNEL_CONFIG_HOA_ORDER1;
161             break;
162         case AV3A_AMBISONIC_SECOND_ORDER:
163             hdf->channel_number_index = CHANNEL_CONFIG_HOA_ORDER2;
164             break;
165         case AV3A_AMBISONIC_THIRD_ORDER:
166             hdf->channel_number_index = CHANNEL_CONFIG_HOA_ORDER3;
167             break;
168         default:
169             return AVERROR_INVALIDDATA;
170         }
171         hdf->nb_channels = ff_av3a_channels_map_table[hdf->channel_number_index].channels;
172     } else {
173         return AVERROR_INVALIDDATA;
174     }
175 
176     hdf->total_channels = hdf->nb_channels + hdf->nb_objects;
177 
178     /* resolution */
179     hdf->resolution_index = get_bits(&gb, 2);
180     if ((hdf->resolution_index >= AV3A_RESOLUTION_TABLE_SIZE) || (hdf->resolution_index < 0)) {
181         return AVERROR_INVALIDDATA;
182     }
183     hdf->resolution    = ff_av3a_sample_format_map_table[hdf->resolution_index].resolution;
184     hdf->sample_format = ff_av3a_sample_format_map_table[hdf->resolution_index].sample_format;
185 
186     if (hdf->coding_profile != AV3A_OBJECT_METADATA_PROFILE) {
187         hdf->bitrate_index  = get_bits(&gb, 4);
188         if ((hdf->bitrate_index >= AV3A_BITRATE_TABLE_SIZE) || (hdf->bitrate_index < 0)) {
189                 return AVERROR_INVALIDDATA;
190         }
191         hdf->total_bitrate = ff_av3a_bitrate_map_table[hdf->channel_number_index].bitrate_table[hdf->bitrate_index];
192     }
193 
194     skip_bits(&gb, 8);
195 
196     return 0;
197 }
198 
av3a_get_packet_size(AVFormatContext * s)199 static int av3a_get_packet_size(AVFormatContext *s)
200 {
201     int ret = 0;
202     int read_bytes = 0;
203     uint16_t sync_word = 0;
204     int packet_bytes = 0;
205     int packet_bits  = 0;
206     uint8_t header[(AV3A_MAX_NBYTES_HEADER + 7) + AV_INPUT_BUFFER_PADDING_SIZE];
207     GetBitContext gb;
208     int32_t sampling_rate;
209     int16_t coding_profile, sampling_frequency_index, channel_number_index;
210     int16_t bitrate_index, bitrate_index_per_channel;
211     int16_t objects, hoa_order;
212     int64_t total_bitrate;
213 
214     if (!s) {
215         return AVERROR(ENOMEM);
216     }
217 
218     if (!s->pb) {
219         return AVERROR(ENOMEM);
220     }
221 
222     read_bytes = avio_read(s->pb, header, AV3A_MAX_NBYTES_HEADER);
223     if (read_bytes != AV3A_MAX_NBYTES_HEADER) {
224         return (read_bytes < 0) ? read_bytes : AVERROR_EOF;
225     }
226 
227     if ((ret = init_get_bits8(&gb, header, (AV3A_MAX_NBYTES_HEADER + 7))) < 0) {
228         return ret;
229     }
230 
231     sync_word = get_bits(&gb, 12);
232     if (sync_word != AV3A_AUDIO_SYNC_WORD) {
233         return AVERROR_INVALIDDATA;
234     }
235 
236     skip_bits(&gb, 8);
237 
238     coding_profile            = get_bits(&gb, 3);
239     sampling_frequency_index  = get_bits(&gb, 4);
240     if ((sampling_frequency_index >= AV3A_FS_TABLE_SIZE) || (sampling_frequency_index < 0)) {
241         return AVERROR_INVALIDDATA;
242     }
243     sampling_rate = ff_av3a_sampling_rate_table[sampling_frequency_index];
244 
245     skip_bits(&gb, 8);
246 
247     if (coding_profile == AV3A_BASE_PROFILE) {
248         channel_number_index = get_bits(&gb, 7);
249         if ((channel_number_index > CHANNEL_CONFIG_MC_7_1_4) ||
250             (channel_number_index == CHANNEL_CONFIG_MC_10_2) ||
251             (channel_number_index == CHANNEL_CONFIG_MC_22_2) ||
252             (channel_number_index < CHANNEL_CONFIG_MONO)) {
253                 return AVERROR_INVALIDDATA;
254         }
255     } else if (coding_profile == AV3A_OBJECT_METADATA_PROFILE) {
256         int64_t soundbed_bitrate, objects_bitrate;
257         int16_t soundbed_type = get_bits(&gb, 2);
258         if (soundbed_type == 0) {
259             objects = get_bits(&gb, 7);
260             if (objects < 0) {
261                 return AVERROR_INVALIDDATA;
262             }
263             objects += 1;
264 
265             bitrate_index_per_channel = get_bits(&gb, 4);
266             if ((bitrate_index_per_channel >= AV3A_BITRATE_TABLE_SIZE) || (bitrate_index_per_channel < 0)) {
267                 return AVERROR_INVALIDDATA;
268             }
269             total_bitrate = ff_av3a_bitrate_map_table[CHANNEL_CONFIG_MONO].bitrate_table[bitrate_index_per_channel] * objects;
270         } else if (soundbed_type == 1) {
271             channel_number_index = get_bits(&gb, 7);
272             if ((channel_number_index > CHANNEL_CONFIG_MC_7_1_4) ||
273                 (channel_number_index == CHANNEL_CONFIG_MC_10_2) ||
274                 (channel_number_index == CHANNEL_CONFIG_MC_22_2) ||
275                 (channel_number_index < CHANNEL_CONFIG_STEREO)) {
276                 return AVERROR_INVALIDDATA;
277             }
278 
279             bitrate_index = get_bits(&gb, 4);
280             if ((bitrate_index >= AV3A_BITRATE_TABLE_SIZE) || (bitrate_index < 0)) {
281                 return AVERROR_INVALIDDATA;
282             }
283             soundbed_bitrate = ff_av3a_bitrate_map_table[channel_number_index].bitrate_table[bitrate_index];
284 
285             objects = get_bits(&gb, 7);
286             if (objects < 0) {
287                 return AVERROR_INVALIDDATA;
288             }
289             objects += 1;
290             bitrate_index_per_channel = get_bits(&gb, 4);
291             if ((bitrate_index_per_channel >= AV3A_BITRATE_TABLE_SIZE) || (bitrate_index_per_channel < 0)) {
292                 return AVERROR_INVALIDDATA;
293             }
294 
295             objects_bitrate = ff_av3a_bitrate_map_table[CHANNEL_CONFIG_MONO].bitrate_table[bitrate_index_per_channel];
296             total_bitrate   = soundbed_bitrate + (objects_bitrate * objects);
297         } else {
298             return AVERROR_INVALIDDATA;
299         }
300     } else if (coding_profile == AV3A_AMBISONIC_PROFILE) {
301         hoa_order = get_bits(&gb, 4);
302         hoa_order += 1;
303 
304         switch (hoa_order) {
305         case AV3A_AMBISONIC_FIRST_ORDER:
306             channel_number_index = CHANNEL_CONFIG_HOA_ORDER1;
307             break;
308         case AV3A_AMBISONIC_SECOND_ORDER:
309             channel_number_index = CHANNEL_CONFIG_HOA_ORDER2;
310             break;
311         case AV3A_AMBISONIC_THIRD_ORDER:
312             channel_number_index = CHANNEL_CONFIG_HOA_ORDER3;
313             break;
314         default:
315             return AVERROR_INVALIDDATA;
316         }
317     } else {
318         return AVERROR_INVALIDDATA;
319     }
320 
321     skip_bits(&gb, 2);
322     if (coding_profile != AV3A_OBJECT_METADATA_PROFILE) {
323         bitrate_index = get_bits(&gb, 4);
324         if ((bitrate_index >= AV3A_BITRATE_TABLE_SIZE) || (bitrate_index < 0)) {
325             return AVERROR_INVALIDDATA;
326         }
327         total_bitrate = ff_av3a_bitrate_map_table[channel_number_index].bitrate_table[bitrate_index];
328     }
329 
330     skip_bits(&gb, 8);
331 
332     if (sampling_rate == 44100) {
333         packet_bits  = (int)floor(((float)(total_bitrate) / sampling_rate) * AV3A_AUDIO_FRAME_SIZE);
334         packet_bytes = (int)ceil((float)packet_bits / 8);
335     } else {
336         packet_bytes = (int)ceil((((float) (total_bitrate) / sampling_rate) * AV3A_AUDIO_FRAME_SIZE) / 8);
337     }
338 
339     if ((ret = avio_seek(s->pb, -read_bytes, SEEK_CUR)) < 0) {
340         return ret;
341     }
342 
343     return packet_bytes;
344 }
345 
av3a_probe(const AVProbeData * p)346 static int av3a_probe(const AVProbeData *p)
347 {
348     uint16_t frame_sync_word;
349     uint16_t lval = ((uint16_t)(p->buf[0]));
350     uint16_t rval = ((uint16_t)(p->buf[1]));
351     frame_sync_word = ((lval << 8) | rval) >> 4;
352 
353     if (frame_sync_word == AV3A_AUDIO_SYNC_WORD && av_match_ext(p->filename, "av3a")) {
354         return AVPROBE_SCORE_MAX;
355     }
356 
357     return 0;
358 }
359 
av3a_read_header(AVFormatContext * s)360 static int av3a_read_header(AVFormatContext *s)
361 {
362     int ret = 0;
363     uint8_t header[(AV3A_MAX_NBYTES_HEADER + 7) + AV_INPUT_BUFFER_PADDING_SIZE];
364     AVStream *stream = NULL;
365     Av3aFormatContext av3afmtctx;
366     AATFHeaderInfo hdf;
367 
368     if (!s) {
369         return AVERROR(ENOMEM);
370     }
371 
372     if (!(stream = avformat_new_stream(s, NULL))) {
373         return AVERROR(ENOMEM);
374     }
375 
376     stream->start_time             = 0;
377     ffstream(stream)->need_parsing = AVSTREAM_PARSE_FULL_RAW;
378     stream->codecpar->codec_type   = AVMEDIA_TYPE_AUDIO;
379     stream->codecpar->codec_id     = s->iformat->raw_codec_id;
380     stream->codecpar->codec_tag    = MKTAG('a', 'v', '3', 'a');
381 
382     if ((ret = avio_read(s->pb, header, AV3A_MAX_NBYTES_HEADER)) != AV3A_MAX_NBYTES_HEADER) {
383         return (ret < 0) ? ret : AVERROR_EOF;
384     }
385 
386     if ((ret = av3a_read_aatf_frame_header(header, &hdf)) < 0) {
387         return ret;
388     }
389 
390     /* stream parameters */
391     stream->codecpar->format                = hdf.sample_format;
392     stream->codecpar->bits_per_raw_sample   = hdf.resolution;
393     stream->codecpar->bit_rate              = hdf.total_bitrate;
394     stream->codecpar->sample_rate           = (int) (hdf.sampling_rate);
395     stream->codecpar->frame_size            = AV3A_AUDIO_FRAME_SIZE;
396     stream->codecpar->ch_layout.order       = AV_CHANNEL_ORDER_UNSPEC;
397     stream->codecpar->ch_layout.nb_channels = hdf.total_channels;
398 
399     /* extradata */
400     av3afmtctx.audio_codec_id           = hdf.audio_codec_id;
401     av3afmtctx.sampling_frequency_index = hdf.sampling_frequency_index;
402     av3afmtctx.nn_type                  = hdf.nn_type;
403     av3afmtctx.content_type             = hdf.content_type;
404     av3afmtctx.channel_number_index     = hdf.channel_number_index;
405     av3afmtctx.number_objects           = hdf.nb_objects;
406     av3afmtctx.hoa_order                = hdf.hoa_order;
407     av3afmtctx.resolution_index         = hdf.resolution_index;
408     av3afmtctx.total_bitrate_kbps       = (uint16_t) (hdf.total_bitrate / 1000);
409 
410     if ((ret = ff_alloc_extradata(stream->codecpar, sizeof(Av3aFormatContext))) < 0) {
411         return ret;
412     }
413     memcpy(stream->codecpar->extradata, &av3afmtctx, sizeof(Av3aFormatContext));
414 
415     if ((ret = avio_seek(s->pb, -AV3A_MAX_NBYTES_HEADER, SEEK_CUR)) < 0) {
416         return ret;
417     }
418 
419     return 0;
420 }
421 
av3a_read_packet(AVFormatContext * s,AVPacket * pkt)422 static int av3a_read_packet(AVFormatContext *s, AVPacket *pkt)
423 {
424     int64_t pos;
425     int packet_size = 0;
426     int read_bytes = 0;
427     int ret = 0;
428 
429     if (!s) {
430         return AVERROR(ENOMEM);
431     }
432 
433     if (avio_feof(s->pb)) {
434         return AVERROR_EOF;
435     }
436     pos = avio_tell(s->pb);
437 
438     if (!(packet_size = av3a_get_packet_size(s))) {
439         return AVERROR_EOF;
440     }
441 
442     if (packet_size < 0) {
443         return packet_size;
444     }
445 
446     if ((ret = av_new_packet(pkt, packet_size)) < 0) {
447         return ret;
448     }
449 
450     if (!s->streams[0]) {
451         return AVERROR(ENOMEM);
452     }
453 
454     if (!s->streams[0]->codecpar) {
455         return AVERROR(ENOMEM);
456     }
457 
458     pkt->stream_index = 0;
459     pkt->pos          = pos;
460     pkt->duration     = s->streams[0]->codecpar->frame_size;
461 
462     read_bytes = avio_read(s->pb, pkt->data, packet_size);
463     if (read_bytes != packet_size) {
464         return (read_bytes < 0) ? read_bytes : AVERROR_EOF;
465     }
466 
467     return 0;
468 }
469 
470 const AVInputFormat ff_av3a_demuxer = {
471     .name           = "av3a",
472     .long_name      = NULL_IF_CONFIG_SMALL("Audio Vivid"),
473     .raw_codec_id   = AV_CODEC_ID_AVS3DA,
474     .priv_data_size = sizeof(FFRawDemuxerContext),
475     .read_probe     = av3a_probe,
476     .read_header    = av3a_read_header,
477     .read_packet    = av3a_read_packet,
478     .flags          = AVFMT_GENERIC_INDEX,
479     .extensions     = "av3a",
480     .mime_type      = "audio/av3a",
481 };
482