1 /*
2 * Copyright (c) 2012-2013 Clément Bœsch <u pkh me>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "avformat.h"
22 #include "subtitles.h"
23 #include "avio_internal.h"
24 #include "libavutil/avassert.h"
25 #include "libavutil/avstring.h"
26
ff_text_init_avio(void * s,FFTextReader * r,AVIOContext * pb)27 void ff_text_init_avio(void *s, FFTextReader *r, AVIOContext *pb)
28 {
29 int i;
30 r->pb = pb;
31 r->buf_pos = r->buf_len = 0;
32 r->type = FF_UTF_8;
33 for (i = 0; i < 2; i++)
34 r->buf[r->buf_len++] = avio_r8(r->pb);
35 if (strncmp("\xFF\xFE", r->buf, 2) == 0) {
36 r->type = FF_UTF16LE;
37 r->buf_pos += 2;
38 } else if (strncmp("\xFE\xFF", r->buf, 2) == 0) {
39 r->type = FF_UTF16BE;
40 r->buf_pos += 2;
41 } else {
42 r->buf[r->buf_len++] = avio_r8(r->pb);
43 if (strncmp("\xEF\xBB\xBF", r->buf, 3) == 0) {
44 // UTF8
45 r->buf_pos += 3;
46 }
47 }
48 if (s && (r->type == FF_UTF16LE || r->type == FF_UTF16BE))
49 av_log(s, AV_LOG_INFO,
50 "UTF16 is automatically converted to UTF8, do not specify a character encoding\n");
51 }
52
ff_text_init_buf(FFTextReader * r,void * buf,size_t size)53 void ff_text_init_buf(FFTextReader *r, void *buf, size_t size)
54 {
55 memset(&r->buf_pb, 0, sizeof(r->buf_pb));
56 ffio_init_context(&r->buf_pb, buf, size, 0, NULL, NULL, NULL, NULL);
57 ff_text_init_avio(NULL, r, &r->buf_pb);
58 }
59
ff_text_pos(FFTextReader * r)60 int64_t ff_text_pos(FFTextReader *r)
61 {
62 return avio_tell(r->pb) - r->buf_len + r->buf_pos;
63 }
64
ff_text_r8(FFTextReader * r)65 int ff_text_r8(FFTextReader *r)
66 {
67 uint32_t val;
68 uint8_t tmp;
69 if (r->buf_pos < r->buf_len)
70 return r->buf[r->buf_pos++];
71 if (r->type == FF_UTF16LE) {
72 GET_UTF16(val, avio_rl16(r->pb), return 0;)
73 } else if (r->type == FF_UTF16BE) {
74 GET_UTF16(val, avio_rb16(r->pb), return 0;)
75 } else {
76 return avio_r8(r->pb);
77 }
78 if (!val)
79 return 0;
80 r->buf_pos = 0;
81 r->buf_len = 0;
82 PUT_UTF8(val, tmp, r->buf[r->buf_len++] = tmp;)
83 return r->buf[r->buf_pos++]; // buf_len is at least 1
84 }
85
ff_text_read(FFTextReader * r,char * buf,size_t size)86 void ff_text_read(FFTextReader *r, char *buf, size_t size)
87 {
88 for ( ; size > 0; size--)
89 *buf++ = ff_text_r8(r);
90 }
91
ff_text_eof(FFTextReader * r)92 int ff_text_eof(FFTextReader *r)
93 {
94 return r->buf_pos >= r->buf_len && avio_feof(r->pb);
95 }
96
ff_text_peek_r8(FFTextReader * r)97 int ff_text_peek_r8(FFTextReader *r)
98 {
99 int c;
100 if (r->buf_pos < r->buf_len)
101 return r->buf[r->buf_pos];
102 c = ff_text_r8(r);
103 if (!avio_feof(r->pb)) {
104 r->buf_pos = 0;
105 r->buf_len = 1;
106 r->buf[0] = c;
107 }
108 return c;
109 }
110
ff_subtitles_queue_insert(FFDemuxSubtitlesQueue * q,const uint8_t * event,size_t len,int merge)111 AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q,
112 const uint8_t *event, size_t len, int merge)
113 {
114 AVPacket **subs, *sub;
115
116 if (merge && q->nb_subs > 0) {
117 /* merge with previous event */
118
119 int old_len;
120 sub = q->subs[q->nb_subs - 1];
121 old_len = sub->size;
122 if (av_grow_packet(sub, len) < 0)
123 return NULL;
124 memcpy(sub->data + old_len, event, len);
125 } else {
126 /* new event */
127
128 if (q->nb_subs >= INT_MAX/sizeof(*q->subs) - 1)
129 return NULL;
130 subs = av_fast_realloc(q->subs, &q->allocated_size,
131 (q->nb_subs + 1) * sizeof(*q->subs));
132 if (!subs)
133 return NULL;
134 q->subs = subs;
135 sub = av_packet_alloc();
136 if (!sub)
137 return NULL;
138 if (av_new_packet(sub, len) < 0) {
139 av_packet_free(&sub);
140 return NULL;
141 }
142 subs[q->nb_subs++] = sub;
143 sub->flags |= AV_PKT_FLAG_KEY;
144 sub->pts = sub->dts = 0;
145 memcpy(sub->data, event, len);
146 }
147 return sub;
148 }
149
cmp_pkt_sub_ts_pos(const void * a,const void * b)150 static int cmp_pkt_sub_ts_pos(const void *a, const void *b)
151 {
152 const AVPacket *s1 = *(const AVPacket **)a;
153 const AVPacket *s2 = *(const AVPacket **)b;
154 if (s1->pts == s2->pts)
155 return FFDIFFSIGN(s1->pos, s2->pos);
156 return FFDIFFSIGN(s1->pts , s2->pts);
157 }
158
cmp_pkt_sub_pos_ts(const void * a,const void * b)159 static int cmp_pkt_sub_pos_ts(const void *a, const void *b)
160 {
161 const AVPacket *s1 = *(const AVPacket **)a;
162 const AVPacket *s2 = *(const AVPacket **)b;
163 if (s1->pos == s2->pos) {
164 if (s1->pts == s2->pts)
165 return 0;
166 return s1->pts > s2->pts ? 1 : -1;
167 }
168 return s1->pos > s2->pos ? 1 : -1;
169 }
170
drop_dups(void * log_ctx,FFDemuxSubtitlesQueue * q)171 static void drop_dups(void *log_ctx, FFDemuxSubtitlesQueue *q)
172 {
173 int i, drop = 0;
174
175 for (i = 1; i < q->nb_subs; i++) {
176 const int last_id = i - 1 - drop;
177 const AVPacket *last = q->subs[last_id];
178
179 if (q->subs[i]->pts == last->pts &&
180 q->subs[i]->duration == last->duration &&
181 q->subs[i]->stream_index == last->stream_index &&
182 !strcmp(q->subs[i]->data, last->data)) {
183
184 av_packet_free(&q->subs[i]);
185 drop++;
186 } else if (drop) {
187 q->subs[last_id + 1] = q->subs[i];
188 q->subs[i] = NULL;
189 }
190 }
191
192 if (drop) {
193 q->nb_subs -= drop;
194 av_log(log_ctx, AV_LOG_WARNING, "Dropping %d duplicated subtitle events\n", drop);
195 }
196 }
197
ff_subtitles_queue_finalize(void * log_ctx,FFDemuxSubtitlesQueue * q)198 void ff_subtitles_queue_finalize(void *log_ctx, FFDemuxSubtitlesQueue *q)
199 {
200 int i;
201
202 if (!q->nb_subs)
203 return;
204
205 qsort(q->subs, q->nb_subs, sizeof(*q->subs),
206 q->sort == SUB_SORT_TS_POS ? cmp_pkt_sub_ts_pos
207 : cmp_pkt_sub_pos_ts);
208 for (i = 0; i < q->nb_subs; i++)
209 if (q->subs[i]->duration < 0 && i < q->nb_subs - 1 && q->subs[i + 1]->pts - (uint64_t)q->subs[i]->pts <= INT64_MAX)
210 q->subs[i]->duration = q->subs[i + 1]->pts - q->subs[i]->pts;
211
212 if (!q->keep_duplicates)
213 drop_dups(log_ctx, q);
214 }
215
ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue * q,AVPacket * pkt)216 int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt)
217 {
218 AVPacket *sub;
219 int ret;
220
221 if (q->current_sub_idx == q->nb_subs)
222 return AVERROR_EOF;
223 sub = q->subs[q->current_sub_idx];
224 if ((ret = av_packet_ref(pkt, sub)) < 0) {
225 return ret;
226 }
227
228 pkt->dts = pkt->pts;
229 q->current_sub_idx++;
230 return 0;
231 }
232
search_sub_ts(const FFDemuxSubtitlesQueue * q,int64_t ts)233 static int search_sub_ts(const FFDemuxSubtitlesQueue *q, int64_t ts)
234 {
235 int s1 = 0, s2 = q->nb_subs - 1;
236
237 if (s2 < s1)
238 return AVERROR(ERANGE);
239
240 for (;;) {
241 int mid;
242
243 if (s1 == s2)
244 return s1;
245 if (s1 == s2 - 1)
246 return q->subs[s1]->pts <= q->subs[s2]->pts ? s1 : s2;
247 mid = (s1 + s2) / 2;
248 if (q->subs[mid]->pts <= ts)
249 s1 = mid;
250 else
251 s2 = mid;
252 }
253 }
254
ff_subtitles_queue_seek(FFDemuxSubtitlesQueue * q,AVFormatContext * s,int stream_index,int64_t min_ts,int64_t ts,int64_t max_ts,int flags)255 int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue *q, AVFormatContext *s, int stream_index,
256 int64_t min_ts, int64_t ts, int64_t max_ts, int flags)
257 {
258 if (flags & AVSEEK_FLAG_BYTE) {
259 return AVERROR(ENOSYS);
260 } else if (flags & AVSEEK_FLAG_FRAME) {
261 if (ts < 0 || ts >= q->nb_subs)
262 return AVERROR(ERANGE);
263 q->current_sub_idx = ts;
264 } else {
265 int i, idx = search_sub_ts(q, ts);
266 int64_t ts_selected;
267
268 if (idx < 0)
269 return idx;
270 for (i = idx; i < q->nb_subs && q->subs[i]->pts < min_ts; i++)
271 if (stream_index == -1 || q->subs[i]->stream_index == stream_index)
272 idx = i;
273 for (i = idx; i > 0 && q->subs[i]->pts > max_ts; i--)
274 if (stream_index == -1 || q->subs[i]->stream_index == stream_index)
275 idx = i;
276
277 ts_selected = q->subs[idx]->pts;
278 if (ts_selected < min_ts || ts_selected > max_ts)
279 return AVERROR(ERANGE);
280
281 /* look back in the latest subtitles for overlapping subtitles */
282 for (i = idx - 1; i >= 0; i--) {
283 int64_t pts = q->subs[i]->pts;
284 if (q->subs[i]->duration <= 0 ||
285 (stream_index != -1 && q->subs[i]->stream_index != stream_index))
286 continue;
287 if (pts >= min_ts && pts > ts_selected - q->subs[i]->duration)
288 idx = i;
289 else
290 break;
291 }
292
293 /* If the queue is used to store multiple subtitles streams (like with
294 * VobSub) and the stream index is not specified, we need to make sure
295 * to focus on the smallest file position offset for a same timestamp;
296 * queue is ordered by pts and then filepos, so we can take the first
297 * entry for a given timestamp. */
298 if (stream_index == -1)
299 while (idx > 0 && q->subs[idx - 1]->pts == q->subs[idx]->pts)
300 idx--;
301
302 q->current_sub_idx = idx;
303 }
304 return 0;
305 }
306
ff_subtitles_queue_clean(FFDemuxSubtitlesQueue * q)307 void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q)
308 {
309 int i;
310
311 for (i = 0; i < q->nb_subs; i++)
312 av_packet_free(&q->subs[i]);
313 av_freep(&q->subs);
314 q->nb_subs = q->allocated_size = q->current_sub_idx = 0;
315 }
316
ff_smil_extract_next_text_chunk(FFTextReader * tr,AVBPrint * buf,char * c)317 int ff_smil_extract_next_text_chunk(FFTextReader *tr, AVBPrint *buf, char *c)
318 {
319 int i = 0;
320 char end_chr;
321
322 if (!*c) // cached char?
323 *c = ff_text_r8(tr);
324 if (!*c)
325 return 0;
326
327 end_chr = *c == '<' ? '>' : '<';
328 do {
329 av_bprint_chars(buf, *c, 1);
330 *c = ff_text_r8(tr);
331 i++;
332 } while (*c != end_chr && *c);
333 if (end_chr == '>') {
334 av_bprint_chars(buf, '>', 1);
335 *c = 0;
336 }
337 return i;
338 }
339
ff_smil_get_attr_ptr(const char * s,const char * attr)340 const char *ff_smil_get_attr_ptr(const char *s, const char *attr)
341 {
342 int in_quotes = 0;
343 const size_t len = strlen(attr);
344
345 while (*s) {
346 while (*s) {
347 if (!in_quotes && av_isspace(*s))
348 break;
349 in_quotes ^= *s == '"'; // XXX: support escaping?
350 s++;
351 }
352 while (av_isspace(*s))
353 s++;
354 if (!av_strncasecmp(s, attr, len) && s[len] == '=')
355 return s + len + 1 + (s[len + 1] == '"');
356 }
357 return NULL;
358 }
359
is_eol(char c)360 static inline int is_eol(char c)
361 {
362 return c == '\r' || c == '\n';
363 }
364
ff_subtitles_read_text_chunk(FFTextReader * tr,AVBPrint * buf)365 void ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf)
366 {
367 char eol_buf[5], last_was_cr = 0;
368 int n = 0, i = 0, nb_eol = 0;
369
370 av_bprint_clear(buf);
371
372 for (;;) {
373 char c = ff_text_r8(tr);
374
375 if (!c)
376 break;
377
378 /* ignore all initial line breaks */
379 if (n == 0 && is_eol(c))
380 continue;
381
382 /* line break buffering: we don't want to add the trailing \r\n */
383 if (is_eol(c)) {
384 nb_eol += c == '\n' || last_was_cr;
385 if (nb_eol == 2)
386 break;
387 eol_buf[i++] = c;
388 if (i == sizeof(eol_buf) - 1)
389 break;
390 last_was_cr = c == '\r';
391 continue;
392 }
393
394 /* only one line break followed by data: we flush the line breaks
395 * buffer */
396 if (i) {
397 eol_buf[i] = 0;
398 av_bprintf(buf, "%s", eol_buf);
399 i = nb_eol = 0;
400 }
401
402 av_bprint_chars(buf, c, 1);
403 n++;
404 }
405 }
406
ff_subtitles_read_chunk(AVIOContext * pb,AVBPrint * buf)407 void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf)
408 {
409 FFTextReader tr;
410 tr.buf_pos = tr.buf_len = 0;
411 tr.type = 0;
412 tr.pb = pb;
413 ff_subtitles_read_text_chunk(&tr, buf);
414 }
415
ff_subtitles_read_line(FFTextReader * tr,char * buf,size_t size)416 ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size)
417 {
418 size_t cur = 0;
419 if (!size)
420 return 0;
421 while (cur + 1 < size) {
422 unsigned char c = ff_text_r8(tr);
423 if (!c)
424 return ff_text_eof(tr) ? cur : AVERROR_INVALIDDATA;
425 if (c == '\r' || c == '\n')
426 break;
427 buf[cur++] = c;
428 buf[cur] = '\0';
429 }
430 while (ff_text_peek_r8(tr) == '\r')
431 ff_text_r8(tr);
432 if (ff_text_peek_r8(tr) == '\n')
433 ff_text_r8(tr);
434 return cur;
435 }
436