1 /*
2 * AV3A Demuxer
3 *
4 * Copyright (c) 2024 Shuai Liu <cqliushuai@outlook.com>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22 #include "avformat.h"
23 #include "avio_internal.h"
24 #include "internal.h"
25 #include "rawdec.h"
26 #include "libavutil/opt.h"
27 #include "libavutil/avassert.h"
28 #include "libavutil/intreadwrite.h"
29 #include "libavutil/channel_layout.h"
30 #include "libavcodec/get_bits.h"
31 #include "libavcodec/av3a.h"
32 #include <string.h>
33
34 typedef struct {
35 uint8_t audio_codec_id;
36 uint8_t sampling_frequency_index;
37 uint8_t nn_type;
38 uint8_t content_type;
39 uint8_t channel_number_index;
40 uint8_t number_objects;
41 uint8_t hoa_order;
42 uint8_t resolution_index;
43 uint16_t total_bitrate_kbps;
44 } Av3aFormatContext;
45
av3a_read_aatf_frame_header(const uint8_t * buf,AATFHeaderInfo * hdf)46 static int av3a_read_aatf_frame_header(const uint8_t *buf, AATFHeaderInfo *hdf)
47 {
48 int ret = 0;
49 uint16_t sync_word;
50 GetBitContext gb;
51
52 if ((!buf) || (!hdf)) {
53 return AVERROR(ENOMEM);
54 }
55
56 hdf->nb_channels = 0;
57 hdf->nb_objects = 0;
58
59 /* (aatf header size + padding size) + AV_INPUT_BUFFER_PADDING_SIZE */
60 if ((ret = init_get_bits8(&gb, buf, (AV3A_MAX_NBYTES_HEADER + 7))) < 0) {
61 return ret;
62 }
63
64 sync_word = get_bits(&gb, 12);
65 if (sync_word != AV3A_AUDIO_SYNC_WORD) {
66 return AVERROR_INVALIDDATA;
67 }
68
69 /* codec id */
70 hdf->audio_codec_id = get_bits(&gb, 4);
71 if (hdf->audio_codec_id != AV3A_LOSSY_CODEC_ID) {
72 return AVERROR_INVALIDDATA;
73 }
74
75 /* anc data */
76 hdf->anc_data = get_bits(&gb, 1);
77 if (hdf->anc_data) {
78 return AVERROR_INVALIDDATA;
79 }
80
81 /* neural network type */
82 hdf->nn_type = get_bits(&gb, 3);
83 if ((hdf->nn_type > AV3A_LC_NN_TYPE) || (hdf->nn_type < AV3A_BASELINE_NN_TYPE)) {
84 return AVERROR_INVALIDDATA;
85 }
86
87 /* coding profile */
88 hdf->coding_profile = get_bits(&gb, 3);
89
90 /* sampling rate */
91 hdf->sampling_frequency_index = get_bits(&gb, 4);
92 if ((hdf->sampling_frequency_index >= AV3A_FS_TABLE_SIZE) || (hdf->sampling_frequency_index < 0)) {
93 return AVERROR_INVALIDDATA;
94 }
95 hdf->sampling_rate = ff_av3a_sampling_rate_table[hdf->sampling_frequency_index];
96
97 skip_bits(&gb, 8);
98
99 if (hdf->coding_profile == AV3A_BASE_PROFILE) {
100 hdf->content_type = AV3A_CHANNEL_BASED_TYPE;
101 hdf->channel_number_index = get_bits(&gb, 7);
102 if ((hdf->channel_number_index > CHANNEL_CONFIG_MC_7_1_4) ||
103 (hdf->channel_number_index == CHANNEL_CONFIG_MC_10_2) ||
104 (hdf->channel_number_index == CHANNEL_CONFIG_MC_22_2) ||
105 (hdf->channel_number_index < CHANNEL_CONFIG_MONO)) {
106 return AVERROR_INVALIDDATA;
107 }
108 hdf->nb_channels = ff_av3a_channels_map_table[hdf->channel_number_index].channels;
109 } else if (hdf->coding_profile == AV3A_OBJECT_METADATA_PROFILE) {
110 hdf->soundbed_type = get_bits(&gb, 2);
111 if (hdf->soundbed_type == 0) {
112 hdf->content_type = AV3A_OBJECT_BASED_TYPE;
113 hdf->object_channel_number = get_bits(&gb, 7);
114 if (hdf->object_channel_number < 0) {
115 return AVERROR_INVALIDDATA;
116 }
117 hdf->bitrate_index_per_channel = get_bits(&gb, 4);
118 if ((hdf->bitrate_index_per_channel >= AV3A_BITRATE_TABLE_SIZE) || (hdf->bitrate_index_per_channel < 0)) {
119 return AVERROR_INVALIDDATA;
120 }
121 hdf->nb_objects = hdf->object_channel_number + 1;
122 hdf->total_bitrate = ff_av3a_bitrate_map_table[CHANNEL_CONFIG_MONO].bitrate_table[hdf->bitrate_index_per_channel] * hdf->nb_objects;
123 } else if (hdf->soundbed_type == 1) {
124 hdf->content_type = AV3A_CHANNEL_OBJECT_TYPE;
125 hdf->channel_number_index = get_bits(&gb, 7);
126 if ((hdf->channel_number_index > CHANNEL_CONFIG_MC_7_1_4) ||
127 (hdf->channel_number_index == CHANNEL_CONFIG_MC_10_2) ||
128 (hdf->channel_number_index == CHANNEL_CONFIG_MC_22_2) ||
129 (hdf->channel_number_index < CHANNEL_CONFIG_STEREO)) {
130 return AVERROR_INVALIDDATA;
131 }
132 hdf->nb_channels = ff_av3a_channels_map_table[hdf->channel_number_index].channels;
133 hdf->bitrate_index = get_bits(&gb, 4);
134 if ((hdf->bitrate_index >= AV3A_BITRATE_TABLE_SIZE) || (hdf->bitrate_index < 0)) {
135 return AVERROR_INVALIDDATA;
136 }
137
138 hdf->object_channel_number = get_bits(&gb, 7);
139 if (hdf->object_channel_number < 0) {
140 return AVERROR_INVALIDDATA;
141 }
142 hdf->nb_objects = hdf->object_channel_number + 1;
143 hdf->bitrate_index_per_channel = get_bits(&gb, 4);
144 if ((hdf->bitrate_index_per_channel >= AV3A_BITRATE_TABLE_SIZE) || (hdf->bitrate_index_per_channel < 0)) {
145 return AVERROR_INVALIDDATA;
146 }
147
148 hdf->total_bitrate = ff_av3a_bitrate_map_table[hdf->channel_number_index].bitrate_table[hdf->bitrate_index] +
149 ff_av3a_bitrate_map_table[CHANNEL_CONFIG_MONO].bitrate_table[hdf->bitrate_index_per_channel] * hdf->nb_objects;
150 } else {
151 return AVERROR_INVALIDDATA;
152 }
153 } else if (hdf->coding_profile == AV3A_AMBISONIC_PROFILE) {
154 hdf->content_type = AV3A_AMBISONIC_TYPE;
155 hdf->order = get_bits(&gb, 4);
156 hdf->hoa_order = hdf->order + 1;
157
158 switch (hdf->hoa_order) {
159 case AV3A_AMBISONIC_FIRST_ORDER:
160 hdf->channel_number_index = CHANNEL_CONFIG_HOA_ORDER1;
161 break;
162 case AV3A_AMBISONIC_SECOND_ORDER:
163 hdf->channel_number_index = CHANNEL_CONFIG_HOA_ORDER2;
164 break;
165 case AV3A_AMBISONIC_THIRD_ORDER:
166 hdf->channel_number_index = CHANNEL_CONFIG_HOA_ORDER3;
167 break;
168 default:
169 return AVERROR_INVALIDDATA;
170 }
171 hdf->nb_channels = ff_av3a_channels_map_table[hdf->channel_number_index].channels;
172 } else {
173 return AVERROR_INVALIDDATA;
174 }
175
176 hdf->total_channels = hdf->nb_channels + hdf->nb_objects;
177
178 /* resolution */
179 hdf->resolution_index = get_bits(&gb, 2);
180 if ((hdf->resolution_index >= AV3A_RESOLUTION_TABLE_SIZE) || (hdf->resolution_index < 0)) {
181 return AVERROR_INVALIDDATA;
182 }
183 hdf->resolution = ff_av3a_sample_format_map_table[hdf->resolution_index].resolution;
184 hdf->sample_format = ff_av3a_sample_format_map_table[hdf->resolution_index].sample_format;
185
186 if (hdf->coding_profile != AV3A_OBJECT_METADATA_PROFILE) {
187 hdf->bitrate_index = get_bits(&gb, 4);
188 if ((hdf->bitrate_index >= AV3A_BITRATE_TABLE_SIZE) || (hdf->bitrate_index < 0)) {
189 return AVERROR_INVALIDDATA;
190 }
191 hdf->total_bitrate = ff_av3a_bitrate_map_table[hdf->channel_number_index].bitrate_table[hdf->bitrate_index];
192 }
193
194 skip_bits(&gb, 8);
195
196 return 0;
197 }
198
av3a_get_packet_size(AVFormatContext * s)199 static int av3a_get_packet_size(AVFormatContext *s)
200 {
201 int ret = 0;
202 int read_bytes = 0;
203 uint16_t sync_word = 0;
204 int packet_bytes = 0;
205 int packet_bits = 0;
206 uint8_t header[(AV3A_MAX_NBYTES_HEADER + 7) + AV_INPUT_BUFFER_PADDING_SIZE];
207 GetBitContext gb;
208 int32_t sampling_rate;
209 int16_t coding_profile, sampling_frequency_index, channel_number_index;
210 int16_t bitrate_index, bitrate_index_per_channel;
211 int16_t objects, hoa_order;
212 int64_t total_bitrate;
213
214 if (!s) {
215 return AVERROR(ENOMEM);
216 }
217
218 if (!s->pb) {
219 return AVERROR(ENOMEM);
220 }
221
222 read_bytes = avio_read(s->pb, header, AV3A_MAX_NBYTES_HEADER);
223 if (read_bytes != AV3A_MAX_NBYTES_HEADER) {
224 return (read_bytes < 0) ? read_bytes : AVERROR_EOF;
225 }
226
227 if ((ret = init_get_bits8(&gb, header, (AV3A_MAX_NBYTES_HEADER + 7))) < 0) {
228 return ret;
229 }
230
231 sync_word = get_bits(&gb, 12);
232 if (sync_word != AV3A_AUDIO_SYNC_WORD) {
233 return AVERROR_INVALIDDATA;
234 }
235
236 skip_bits(&gb, 8);
237
238 coding_profile = get_bits(&gb, 3);
239 sampling_frequency_index = get_bits(&gb, 4);
240 if ((sampling_frequency_index >= AV3A_FS_TABLE_SIZE) || (sampling_frequency_index < 0)) {
241 return AVERROR_INVALIDDATA;
242 }
243 sampling_rate = ff_av3a_sampling_rate_table[sampling_frequency_index];
244
245 skip_bits(&gb, 8);
246
247 if (coding_profile == AV3A_BASE_PROFILE) {
248 channel_number_index = get_bits(&gb, 7);
249 if ((channel_number_index > CHANNEL_CONFIG_MC_7_1_4) ||
250 (channel_number_index == CHANNEL_CONFIG_MC_10_2) ||
251 (channel_number_index == CHANNEL_CONFIG_MC_22_2) ||
252 (channel_number_index < CHANNEL_CONFIG_MONO)) {
253 return AVERROR_INVALIDDATA;
254 }
255 } else if (coding_profile == AV3A_OBJECT_METADATA_PROFILE) {
256 int64_t soundbed_bitrate, objects_bitrate;
257 int16_t soundbed_type = get_bits(&gb, 2);
258 if (soundbed_type == 0) {
259 objects = get_bits(&gb, 7);
260 if (objects < 0) {
261 return AVERROR_INVALIDDATA;
262 }
263 objects += 1;
264
265 bitrate_index_per_channel = get_bits(&gb, 4);
266 if ((bitrate_index_per_channel >= AV3A_BITRATE_TABLE_SIZE) || (bitrate_index_per_channel < 0)) {
267 return AVERROR_INVALIDDATA;
268 }
269 total_bitrate = ff_av3a_bitrate_map_table[CHANNEL_CONFIG_MONO].bitrate_table[bitrate_index_per_channel] * objects;
270 } else if (soundbed_type == 1) {
271 channel_number_index = get_bits(&gb, 7);
272 if ((channel_number_index > CHANNEL_CONFIG_MC_7_1_4) ||
273 (channel_number_index == CHANNEL_CONFIG_MC_10_2) ||
274 (channel_number_index == CHANNEL_CONFIG_MC_22_2) ||
275 (channel_number_index < CHANNEL_CONFIG_STEREO)) {
276 return AVERROR_INVALIDDATA;
277 }
278
279 bitrate_index = get_bits(&gb, 4);
280 if ((bitrate_index >= AV3A_BITRATE_TABLE_SIZE) || (bitrate_index < 0)) {
281 return AVERROR_INVALIDDATA;
282 }
283 soundbed_bitrate = ff_av3a_bitrate_map_table[channel_number_index].bitrate_table[bitrate_index];
284
285 objects = get_bits(&gb, 7);
286 if (objects < 0) {
287 return AVERROR_INVALIDDATA;
288 }
289 objects += 1;
290 bitrate_index_per_channel = get_bits(&gb, 4);
291 if ((bitrate_index_per_channel >= AV3A_BITRATE_TABLE_SIZE) || (bitrate_index_per_channel < 0)) {
292 return AVERROR_INVALIDDATA;
293 }
294
295 objects_bitrate = ff_av3a_bitrate_map_table[CHANNEL_CONFIG_MONO].bitrate_table[bitrate_index_per_channel];
296 total_bitrate = soundbed_bitrate + (objects_bitrate * objects);
297 } else {
298 return AVERROR_INVALIDDATA;
299 }
300 } else if (coding_profile == AV3A_AMBISONIC_PROFILE) {
301 hoa_order = get_bits(&gb, 4);
302 hoa_order += 1;
303
304 switch (hoa_order) {
305 case AV3A_AMBISONIC_FIRST_ORDER:
306 channel_number_index = CHANNEL_CONFIG_HOA_ORDER1;
307 break;
308 case AV3A_AMBISONIC_SECOND_ORDER:
309 channel_number_index = CHANNEL_CONFIG_HOA_ORDER2;
310 break;
311 case AV3A_AMBISONIC_THIRD_ORDER:
312 channel_number_index = CHANNEL_CONFIG_HOA_ORDER3;
313 break;
314 default:
315 return AVERROR_INVALIDDATA;
316 }
317 } else {
318 return AVERROR_INVALIDDATA;
319 }
320
321 skip_bits(&gb, 2);
322 if (coding_profile != AV3A_OBJECT_METADATA_PROFILE) {
323 bitrate_index = get_bits(&gb, 4);
324 if ((bitrate_index >= AV3A_BITRATE_TABLE_SIZE) || (bitrate_index < 0)) {
325 return AVERROR_INVALIDDATA;
326 }
327 total_bitrate = ff_av3a_bitrate_map_table[channel_number_index].bitrate_table[bitrate_index];
328 }
329
330 skip_bits(&gb, 8);
331
332 if (sampling_rate == 44100) {
333 packet_bits = (int)floor(((float)(total_bitrate) / sampling_rate) * AV3A_AUDIO_FRAME_SIZE);
334 packet_bytes = (int)ceil((float)packet_bits / 8);
335 } else {
336 packet_bytes = (int)ceil((((float) (total_bitrate) / sampling_rate) * AV3A_AUDIO_FRAME_SIZE) / 8);
337 }
338
339 if ((ret = avio_seek(s->pb, -read_bytes, SEEK_CUR)) < 0) {
340 return ret;
341 }
342
343 return packet_bytes;
344 }
345
av3a_probe(const AVProbeData * p)346 static int av3a_probe(const AVProbeData *p)
347 {
348 uint16_t frame_sync_word;
349 uint16_t lval = ((uint16_t)(p->buf[0]));
350 uint16_t rval = ((uint16_t)(p->buf[1]));
351 frame_sync_word = ((lval << 8) | rval) >> 4;
352
353 if (frame_sync_word == AV3A_AUDIO_SYNC_WORD && av_match_ext(p->filename, "av3a")) {
354 return AVPROBE_SCORE_MAX;
355 }
356
357 return 0;
358 }
359
av3a_read_header(AVFormatContext * s)360 static int av3a_read_header(AVFormatContext *s)
361 {
362 int ret = 0;
363 uint8_t header[(AV3A_MAX_NBYTES_HEADER + 7) + AV_INPUT_BUFFER_PADDING_SIZE];
364 AVStream *stream = NULL;
365 Av3aFormatContext av3afmtctx;
366 AATFHeaderInfo hdf;
367
368 if (!s) {
369 return AVERROR(ENOMEM);
370 }
371
372 if (!(stream = avformat_new_stream(s, NULL))) {
373 return AVERROR(ENOMEM);
374 }
375
376 stream->start_time = 0;
377 ffstream(stream)->need_parsing = AVSTREAM_PARSE_FULL_RAW;
378 stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
379 stream->codecpar->codec_id = s->iformat->raw_codec_id;
380 stream->codecpar->codec_tag = MKTAG('a', 'v', '3', 'a');
381
382 if ((ret = avio_read(s->pb, header, AV3A_MAX_NBYTES_HEADER)) != AV3A_MAX_NBYTES_HEADER) {
383 return (ret < 0) ? ret : AVERROR_EOF;
384 }
385
386 if ((ret = av3a_read_aatf_frame_header(header, &hdf)) < 0) {
387 return ret;
388 }
389
390 /* stream parameters */
391 stream->codecpar->format = hdf.sample_format;
392 stream->codecpar->bits_per_raw_sample = hdf.resolution;
393 stream->codecpar->bit_rate = hdf.total_bitrate;
394 stream->codecpar->sample_rate = (int) (hdf.sampling_rate);
395 stream->codecpar->frame_size = AV3A_AUDIO_FRAME_SIZE;
396 stream->codecpar->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
397 stream->codecpar->ch_layout.nb_channels = hdf.total_channels;
398
399 /* extradata */
400 av3afmtctx.audio_codec_id = hdf.audio_codec_id;
401 av3afmtctx.sampling_frequency_index = hdf.sampling_frequency_index;
402 av3afmtctx.nn_type = hdf.nn_type;
403 av3afmtctx.content_type = hdf.content_type;
404 av3afmtctx.channel_number_index = hdf.channel_number_index;
405 av3afmtctx.number_objects = hdf.nb_objects;
406 av3afmtctx.hoa_order = hdf.hoa_order;
407 av3afmtctx.resolution_index = hdf.resolution_index;
408 av3afmtctx.total_bitrate_kbps = (uint16_t) (hdf.total_bitrate / 1000);
409
410 if ((ret = ff_alloc_extradata(stream->codecpar, sizeof(Av3aFormatContext))) < 0) {
411 return ret;
412 }
413 memcpy(stream->codecpar->extradata, &av3afmtctx, sizeof(Av3aFormatContext));
414
415 if ((ret = avio_seek(s->pb, -AV3A_MAX_NBYTES_HEADER, SEEK_CUR)) < 0) {
416 return ret;
417 }
418
419 return 0;
420 }
421
av3a_read_packet(AVFormatContext * s,AVPacket * pkt)422 static int av3a_read_packet(AVFormatContext *s, AVPacket *pkt)
423 {
424 int64_t pos;
425 int packet_size = 0;
426 int read_bytes = 0;
427 int ret = 0;
428
429 if (!s) {
430 return AVERROR(ENOMEM);
431 }
432
433 if (avio_feof(s->pb)) {
434 return AVERROR_EOF;
435 }
436 pos = avio_tell(s->pb);
437
438 if (!(packet_size = av3a_get_packet_size(s))) {
439 return AVERROR_EOF;
440 }
441
442 if (packet_size < 0) {
443 return packet_size;
444 }
445
446 if ((ret = av_new_packet(pkt, packet_size)) < 0) {
447 return ret;
448 }
449
450 if (!s->streams[0]) {
451 return AVERROR(ENOMEM);
452 }
453
454 if (!s->streams[0]->codecpar) {
455 return AVERROR(ENOMEM);
456 }
457
458 pkt->stream_index = 0;
459 pkt->pos = pos;
460 pkt->duration = s->streams[0]->codecpar->frame_size;
461
462 read_bytes = avio_read(s->pb, pkt->data, packet_size);
463 if (read_bytes != packet_size) {
464 return (read_bytes < 0) ? read_bytes : AVERROR_EOF;
465 }
466
467 return 0;
468 }
469
470 const AVInputFormat ff_av3a_demuxer = {
471 .name = "av3a",
472 .long_name = NULL_IF_CONFIG_SMALL("Audio Vivid"),
473 .raw_codec_id = AV_CODEC_ID_AVS3DA,
474 .priv_data_size = sizeof(FFRawDemuxerContext),
475 .read_probe = av3a_probe,
476 .read_header = av3a_read_header,
477 .read_packet = av3a_read_packet,
478 .flags = AVFMT_GENERIC_INDEX,
479 .extensions = "av3a",
480 .mime_type = "audio/av3a",
481 };
482