• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Audible AA demuxer
3  * Copyright (c) 2015 Vesselin Bontchev
4  *
5  * Header parsing is borrowed from https://github.com/jteeuwen/audible project.
6  * Copyright (c) 2001-2014, Jim Teeuwen
7  *
8  * Redistribution and use in source and binary forms, with or without modification,
9  * are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright notice, this
12  *    list of conditions and the following disclaimer.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
18  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "avformat.h"
27 #include "demux.h"
28 #include "internal.h"
29 #include "libavutil/avstring.h"
30 #include "libavutil/dict.h"
31 #include "libavutil/intreadwrite.h"
32 #include "libavutil/tea.h"
33 #include "libavutil/opt.h"
34 
35 #define AA_MAGIC 1469084982 /* this identifies an audible .aa file */
36 #define MAX_TOC_ENTRIES 16
37 #define MAX_DICTIONARY_ENTRIES 128
38 #define TEA_BLOCK_SIZE 8
39 #define CHAPTER_HEADER_SIZE 8
40 #define TIMEPREC 1000
41 #define MP3_FRAME_SIZE 104
42 
43 typedef struct AADemuxContext {
44     AVClass *class;
45     uint8_t *aa_fixed_key;
46     int aa_fixed_key_len;
47     int codec_second_size;
48     int current_codec_second_size;
49     int chapter_idx;
50     struct AVTEA *tea_ctx;
51     uint8_t file_key[16];
52     int64_t current_chapter_size;
53     int64_t content_start;
54     int64_t content_end;
55     int seek_offset;
56 } AADemuxContext;
57 
get_second_size(char * codec_name)58 static int get_second_size(char *codec_name)
59 {
60     int result = -1;
61 
62     if (!strcmp(codec_name, "mp332")) {
63         result = 3982;
64     } else if (!strcmp(codec_name, "acelp16")) {
65         result = 2000;
66     } else if (!strcmp(codec_name, "acelp85")) {
67         result = 1045;
68     }
69 
70     return result;
71 }
72 
aa_read_header(AVFormatContext * s)73 static int aa_read_header(AVFormatContext *s)
74 {
75     int largest_idx = -1;
76     uint32_t toc_size, npairs, header_seed = 0, start;
77     char codec_name[64] = {0};
78     uint8_t buf[24];
79     int64_t largest_size = -1, current_size = -1, chapter_pos;
80     struct toc_entry {
81         uint32_t offset;
82         uint32_t size;
83     } TOC[MAX_TOC_ENTRIES];
84     uint8_t header_key[16] = {0};
85     AADemuxContext *c = s->priv_data;
86     char file_key[2 * sizeof(c->file_key) + 1];
87     AVIOContext *pb = s->pb;
88     AVStream *st;
89     FFStream *sti;
90     int ret;
91 
92     /* parse .aa header */
93     avio_skip(pb, 4); // file size
94     avio_skip(pb, 4); // magic string
95     toc_size = avio_rb32(pb); // TOC size
96     avio_skip(pb, 4); // unidentified integer
97     if (toc_size > MAX_TOC_ENTRIES || toc_size < 2)
98         return AVERROR_INVALIDDATA;
99     for (uint32_t i = 0; i < toc_size; i++) { // read TOC
100         avio_skip(pb, 4); // TOC entry index
101         TOC[i].offset = avio_rb32(pb); // block offset
102         TOC[i].size = avio_rb32(pb); // block size
103     }
104     avio_skip(pb, 24); // header termination block (ignored)
105     npairs = avio_rb32(pb); // read dictionary entries
106     if (npairs > MAX_DICTIONARY_ENTRIES)
107         return AVERROR_INVALIDDATA;
108     for (uint32_t i = 0; i < npairs; i++) {
109         char key[128], val[128];
110         uint32_t nkey, nval;
111 
112         avio_skip(pb, 1); // unidentified integer
113         nkey = avio_rb32(pb); // key string length
114         nval = avio_rb32(pb); // value string length
115         avio_get_str(pb, nkey, key, sizeof(key));
116         avio_get_str(pb, nval, val, sizeof(val));
117         if (!strcmp(key, "codec")) {
118             av_log(s, AV_LOG_DEBUG, "Codec is <%s>\n", val);
119             av_strlcpy(codec_name, val, sizeof(codec_name));
120         } else if (!strcmp(key, "HeaderSeed")) {
121             av_log(s, AV_LOG_DEBUG, "HeaderSeed is <%s>\n", val);
122             header_seed = atoi(val);
123         } else if (!strcmp(key, "HeaderKey")) { // this looks like "1234567890 1234567890 1234567890 1234567890"
124             uint32_t header_key_part[4];
125             av_log(s, AV_LOG_DEBUG, "HeaderKey is <%s>\n", val);
126 
127             ret = sscanf(val, "%"SCNu32"%"SCNu32"%"SCNu32"%"SCNu32,
128                    &header_key_part[0], &header_key_part[1], &header_key_part[2], &header_key_part[3]);
129             if (ret != 4)
130                 return AVERROR_INVALIDDATA;
131 
132             for (int idx = 0; idx < 4; idx++)
133                 AV_WB32(&header_key[idx * 4], header_key_part[idx]); // convert each part to BE!
134             ff_data_to_hex(key, header_key, sizeof(header_key), 1);
135             av_log(s, AV_LOG_DEBUG, "Processed HeaderKey is %s\n", key);
136         } else {
137             av_dict_set(&s->metadata, key, val, 0);
138         }
139     }
140 
141     /* verify fixed key */
142     if (c->aa_fixed_key_len != 16) {
143         av_log(s, AV_LOG_ERROR, "aa_fixed_key value needs to be 16 bytes!\n");
144         return AVERROR(EINVAL);
145     }
146 
147     /* verify codec */
148     if ((c->codec_second_size = get_second_size(codec_name)) == -1) {
149         av_log(s, AV_LOG_ERROR, "unknown codec <%s>!\n", codec_name);
150         return AVERROR(EINVAL);
151     }
152 
153     /* decryption key derivation */
154     c->tea_ctx = av_tea_alloc();
155     if (!c->tea_ctx)
156         return AVERROR(ENOMEM);
157     av_tea_init(c->tea_ctx, c->aa_fixed_key, 16);
158     for (int i = 0; i < 6; i++)
159         AV_WB32(buf + 4 * i, header_seed + i);
160     av_tea_crypt(c->tea_ctx, buf, buf, 3, NULL, 0);
161     AV_WN64(c->file_key,     AV_RN64(buf + 2)  ^ AV_RN64(header_key));
162     AV_WN64(c->file_key + 8, AV_RN64(buf + 10) ^ AV_RN64(header_key + 8));
163     ff_data_to_hex(file_key, c->file_key, sizeof(c->file_key), 1);
164     av_log(s, AV_LOG_DEBUG, "File key is %s\n", file_key);
165     av_tea_init(c->tea_ctx, c->file_key, 16);
166 
167     /* decoder setup */
168     st = avformat_new_stream(s, NULL);
169     if (!st)
170         return AVERROR(ENOMEM);
171     sti = ffstream(st);
172     st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
173     if (!strcmp(codec_name, "mp332")) {
174         st->codecpar->codec_id = AV_CODEC_ID_MP3;
175         st->codecpar->sample_rate = 22050;
176         sti->need_parsing = AVSTREAM_PARSE_FULL_RAW;
177         avpriv_set_pts_info(st, 64, 8, 32000 * TIMEPREC);
178         // encoded audio frame is MP3_FRAME_SIZE bytes (+1 with padding, unlikely)
179     } else if (!strcmp(codec_name, "acelp85")) {
180         st->codecpar->codec_id = AV_CODEC_ID_SIPR;
181         st->codecpar->block_align = 19;
182         st->codecpar->ch_layout.nb_channels = 1;
183         st->codecpar->sample_rate = 8500;
184         st->codecpar->bit_rate = 8500;
185         sti->need_parsing = AVSTREAM_PARSE_FULL_RAW;
186         avpriv_set_pts_info(st, 64, 8, 8500 * TIMEPREC);
187     } else if (!strcmp(codec_name, "acelp16")) {
188         st->codecpar->codec_id = AV_CODEC_ID_SIPR;
189         st->codecpar->block_align = 20;
190         st->codecpar->ch_layout.nb_channels = 1;
191         st->codecpar->sample_rate = 16000;
192         st->codecpar->bit_rate = 16000;
193         sti->need_parsing = AVSTREAM_PARSE_FULL_RAW;
194         avpriv_set_pts_info(st, 64, 8, 16000 * TIMEPREC);
195     }
196 
197     /* determine, and jump to audio start offset */
198     for (uint32_t i = 1; i < toc_size; i++) { // skip the first entry!
199         current_size = TOC[i].size;
200         if (current_size > largest_size) {
201             largest_idx = i;
202             largest_size = current_size;
203         }
204     }
205     start = TOC[largest_idx].offset;
206     avio_seek(pb, start, SEEK_SET);
207 
208     // extract chapter positions. since all formats have constant bit rate, use it
209     // as time base in bytes/s, for easy stream position <-> timestamp conversion
210     st->start_time = 0;
211     c->content_start = start;
212     c->content_end = start + largest_size;
213 
214     while ((chapter_pos = avio_tell(pb)) >= 0 && chapter_pos < c->content_end) {
215         unsigned chapter_idx = s->nb_chapters;
216         uint32_t chapter_size = avio_rb32(pb);
217         if (chapter_size == 0 || avio_feof(pb))
218             break;
219         chapter_pos -= start + CHAPTER_HEADER_SIZE * chapter_idx;
220         avio_skip(pb, 4 + chapter_size);
221         if (!avpriv_new_chapter(s, chapter_idx, st->time_base,
222                                 chapter_pos * TIMEPREC,
223                                 (chapter_pos + chapter_size) * TIMEPREC, NULL))
224             return AVERROR(ENOMEM);
225     }
226 
227     st->duration = (largest_size - CHAPTER_HEADER_SIZE * s->nb_chapters) * TIMEPREC;
228 
229     avpriv_update_cur_dts(s, st, 0);
230     avio_seek(pb, start, SEEK_SET);
231     c->current_chapter_size = 0;
232     c->seek_offset = 0;
233 
234     return 0;
235 }
236 
aa_read_packet(AVFormatContext * s,AVPacket * pkt)237 static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
238 {
239     int ret;
240     AADemuxContext *c = s->priv_data;
241     uint64_t pos = avio_tell(s->pb);
242 
243     // are we at the end of the audio content?
244     if (pos >= c->content_end) {
245         return AVERROR_EOF;
246     }
247 
248     // are we at the start of a chapter?
249     if (c->current_chapter_size == 0) {
250         c->current_chapter_size = avio_rb32(s->pb);
251         if (c->current_chapter_size == 0) {
252             return AVERROR_EOF;
253         }
254         av_log(s, AV_LOG_DEBUG, "Chapter %d (%" PRId64 " bytes)\n", c->chapter_idx, c->current_chapter_size);
255         c->chapter_idx = c->chapter_idx + 1;
256         avio_skip(s->pb, 4); // data start offset
257         c->current_codec_second_size = c->codec_second_size;
258     }
259 
260     // is this the last block in this chapter?
261     if (c->current_chapter_size / c->current_codec_second_size == 0) {
262         c->current_codec_second_size = c->current_chapter_size % c->current_codec_second_size;
263     }
264 
265     ret = av_get_packet(s->pb, pkt, c->current_codec_second_size);
266     if (ret != c->current_codec_second_size)
267         return AVERROR_EOF;
268 
269     // decrypt c->current_codec_second_size bytes in blocks of TEA_BLOCK_SIZE
270     // trailing bytes are left unencrypted!
271     av_tea_crypt(c->tea_ctx, pkt->data, pkt->data,
272                  c->current_codec_second_size / TEA_BLOCK_SIZE, NULL, 1);
273 
274     // update state
275     c->current_chapter_size = c->current_chapter_size - c->current_codec_second_size;
276     if (c->current_chapter_size <= 0)
277         c->current_chapter_size = 0;
278 
279     if (c->seek_offset > c->current_codec_second_size)
280         c->seek_offset = 0; // ignore wrong estimate
281     pkt->data += c->seek_offset;
282     pkt->size -= c->seek_offset;
283     c->seek_offset = 0;
284 
285     return 0;
286 }
287 
aa_read_seek(AVFormatContext * s,int stream_index,int64_t timestamp,int flags)288 static int aa_read_seek(AVFormatContext *s,
289                         int stream_index, int64_t timestamp, int flags)
290 {
291     AADemuxContext *c = s->priv_data;
292     AVChapter *ch;
293     int64_t chapter_pos, chapter_start, chapter_size;
294     int chapter_idx = 0;
295 
296     // find chapter containing seek timestamp
297     if (timestamp < 0)
298         timestamp = 0;
299 
300     while (chapter_idx < s->nb_chapters && timestamp >= s->chapters[chapter_idx]->end) {
301         ++chapter_idx;
302     }
303 
304     if (chapter_idx >= s->nb_chapters) {
305         chapter_idx = s->nb_chapters - 1;
306         if (chapter_idx < 0) return -1; // there is no chapter.
307         timestamp = s->chapters[chapter_idx]->end;
308     }
309 
310     ch = s->chapters[chapter_idx];
311 
312     // sync by clamping timestamp to nearest valid block position in its chapter
313     chapter_size = ch->end / TIMEPREC - ch->start / TIMEPREC;
314     chapter_pos = av_rescale_rnd((timestamp - ch->start) / TIMEPREC,
315         1, c->codec_second_size,
316         (flags & AVSEEK_FLAG_BACKWARD) ? AV_ROUND_DOWN : AV_ROUND_UP)
317         * c->codec_second_size;
318     if (chapter_pos >= chapter_size)
319         chapter_pos = chapter_size;
320     chapter_start = c->content_start + (ch->start / TIMEPREC) + CHAPTER_HEADER_SIZE * (1 + chapter_idx);
321 
322     // reinit read state
323     avio_seek(s->pb, chapter_start + chapter_pos, SEEK_SET);
324     c->current_codec_second_size = c->codec_second_size;
325     c->current_chapter_size = chapter_size - chapter_pos;
326     c->chapter_idx = 1 + chapter_idx;
327 
328     // for unaligned frames, estimate offset of first frame in block (assume no padding)
329     if (s->streams[0]->codecpar->codec_id == AV_CODEC_ID_MP3) {
330         c->seek_offset = (MP3_FRAME_SIZE - chapter_pos % MP3_FRAME_SIZE) % MP3_FRAME_SIZE;
331     }
332 
333     avpriv_update_cur_dts(s, s->streams[0], ch->start + (chapter_pos + c->seek_offset) * TIMEPREC);
334 
335     return 1;
336 }
337 
aa_probe(const AVProbeData * p)338 static int aa_probe(const AVProbeData *p)
339 {
340     uint8_t *buf = p->buf;
341 
342     // first 4 bytes are file size, next 4 bytes are the magic
343     if (AV_RB32(buf+4) != AA_MAGIC)
344         return 0;
345 
346     return AVPROBE_SCORE_MAX / 2;
347 }
348 
aa_read_close(AVFormatContext * s)349 static int aa_read_close(AVFormatContext *s)
350 {
351     AADemuxContext *c = s->priv_data;
352 
353     av_freep(&c->tea_ctx);
354 
355     return 0;
356 }
357 
358 #define OFFSET(x) offsetof(AADemuxContext, x)
359 static const AVOption aa_options[] = {
360     { "aa_fixed_key", // extracted from libAAX_SDK.so and AAXSDKWin.dll files!
361         "Fixed key used for handling Audible AA files", OFFSET(aa_fixed_key),
362         AV_OPT_TYPE_BINARY, {.str="77214d4b196a87cd520045fd2a51d673"},
363         .flags = AV_OPT_FLAG_DECODING_PARAM },
364     { NULL },
365 };
366 
367 static const AVClass aa_class = {
368     .class_name = "aa",
369     .item_name  = av_default_item_name,
370     .option     = aa_options,
371     .version    = LIBAVUTIL_VERSION_INT,
372 };
373 
374 const AVInputFormat ff_aa_demuxer = {
375     .name           = "aa",
376     .long_name      = NULL_IF_CONFIG_SMALL("Audible AA format files"),
377     .priv_class     = &aa_class,
378     .priv_data_size = sizeof(AADemuxContext),
379     .extensions     = "aa",
380     .read_probe     = aa_probe,
381     .read_header    = aa_read_header,
382     .read_packet    = aa_read_packet,
383     .read_seek      = aa_read_seek,
384     .read_close     = aa_read_close,
385     .flags          = AVFMT_NO_BYTE_SEEK | AVFMT_NOGENSEARCH,
386     .flags_internal = FF_FMT_INIT_CLEANUP,
387 };
388