1 /*
2 * Copyright (c) 2012 Clément Bœsch
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #ifndef AVFORMAT_SUBTITLES_H
22 #define AVFORMAT_SUBTITLES_H
23
24 #include <stdint.h>
25 #include <stddef.h>
26 #include "avformat.h"
27 #include "libavutil/bprint.h"
28 #include "avio_internal.h"
29
30 enum sub_sort {
31 SUB_SORT_TS_POS = 0, ///< sort by timestamps, then position
32 SUB_SORT_POS_TS, ///< sort by position, then timestamps
33 };
34
35 enum ff_utf_type {
36 FF_UTF_8, // or other 8 bit encodings
37 FF_UTF16LE,
38 FF_UTF16BE,
39 };
40
41 typedef struct {
42 int type;
43 AVIOContext *pb;
44 unsigned char buf[8];
45 int buf_pos, buf_len;
46 FFIOContext buf_pb;
47 } FFTextReader;
48
49 /**
50 * Initialize the FFTextReader from the given AVIOContext. This function will
51 * read some bytes from pb, and test for UTF-8 or UTF-16 BOMs. Further accesses
52 * to FFTextReader will read more data from pb.
53 * If s is not NULL, the user will be warned if a UTF-16 conversion takes place.
54 *
55 * The purpose of FFTextReader is to transparently convert read data to UTF-8
56 * if the stream had a UTF-16 BOM.
57 *
58 * @param s Pointer to provide av_log context
59 * @param r object which will be initialized
60 * @param pb stream to read from (referenced as long as FFTextReader is in use)
61 */
62 void ff_text_init_avio(void *s, FFTextReader *r, AVIOContext *pb);
63
64 /**
65 * Similar to ff_text_init_avio(), but sets it up to read from a bounded buffer.
66 *
67 * @param r object which will be initialized
68 * @param buf buffer to read from (referenced as long as FFTextReader is in use)
69 * @param size size of buf
70 */
71 void ff_text_init_buf(FFTextReader *r, void *buf, size_t size);
72
73 /**
74 * Return the byte position of the next byte returned by ff_text_r8(). For
75 * UTF-16 source streams, this will return the original position, but it will
76 * be incorrect if a codepoint was only partially read with ff_text_r8().
77 */
78 int64_t ff_text_pos(FFTextReader *r);
79
80 /**
81 * Return the next byte. The return value is always 0 - 255. Returns 0 on EOF.
82 * If the source stream is UTF-16, this reads from the stream converted to
83 * UTF-8. On invalid UTF-16, 0 is returned.
84 */
85 int ff_text_r8(FFTextReader *r);
86
87 /**
88 * Return non-zero if EOF was reached.
89 */
90 int ff_text_eof(FFTextReader *r);
91
92 /**
93 * Like ff_text_r8(), but don't remove the byte from the buffer.
94 */
95 int ff_text_peek_r8(FFTextReader *r);
96
97 /**
98 * Read the given number of bytes (in UTF-8). On error or EOF, \0 bytes are
99 * written.
100 */
101 void ff_text_read(FFTextReader *r, char *buf, size_t size);
102
103 typedef struct {
104 AVPacket **subs; ///< array of subtitles packets
105 int nb_subs; ///< number of subtitles packets
106 int allocated_size; ///< allocated size for subs
107 int current_sub_idx; ///< current position for the read packet callback
108 enum sub_sort sort; ///< sort method to use when finalizing subtitles
109 int keep_duplicates; ///< set to 1 to keep duplicated subtitle events
110 } FFDemuxSubtitlesQueue;
111
112 /**
113 * Insert a new subtitle event.
114 *
115 * @param event the subtitle line, may not be zero terminated
116 * @param len the length of the event (in strlen() sense, so without '\0')
117 * @param merge set to 1 if the current event should be concatenated with the
118 * previous one instead of adding a new entry, 0 otherwise
119 */
120 AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q,
121 const uint8_t *event, size_t len, int merge);
122
123 /**
124 * Set missing durations, sort subtitles by PTS (and then byte position), and
125 * drop duplicated events.
126 */
127 void ff_subtitles_queue_finalize(void *log_ctx, FFDemuxSubtitlesQueue *q);
128
129 /**
130 * Generic read_packet() callback for subtitles demuxers using this queue
131 * system.
132 */
133 int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt);
134
135 /**
136 * Update current_sub_idx to emulate a seek. Except the first parameter, it
137 * matches AVInputFormat->read_seek2 prototypes.
138 */
139 int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue *q, AVFormatContext *s, int stream_index,
140 int64_t min_ts, int64_t ts, int64_t max_ts, int flags);
141
142 /**
143 * Remove and destroy all the subtitles packets.
144 */
145 void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q);
146
147 int ff_subtitles_read_packet(AVFormatContext *s, AVPacket *pkt);
148
149 int ff_subtitles_read_seek(AVFormatContext *s, int stream_index,
150 int64_t min_ts, int64_t ts, int64_t max_ts, int flags);
151
152 int ff_subtitles_read_close(AVFormatContext *s);
153
154 /**
155 * SMIL helper to load next chunk ("<...>" or untagged content) in buf.
156 *
157 * @param c cached character, to avoid a backward seek
158 */
159 int ff_smil_extract_next_text_chunk(FFTextReader *tr, AVBPrint *buf, char *c);
160
161 /**
162 * SMIL helper to point on the value of an attribute in the given tag.
163 *
164 * @param s SMIL tag ("<...>")
165 * @param attr the attribute to look for
166 */
167 const char *ff_smil_get_attr_ptr(const char *s, const char *attr);
168
169 /**
170 * @brief Same as ff_subtitles_read_text_chunk(), but read from an AVIOContext.
171 */
172 void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf);
173
174 /**
175 * @brief Read a subtitles chunk from FFTextReader.
176 *
177 * A chunk is defined by a multiline "event", ending with a second line break.
178 * The trailing line breaks are trimmed. CRLF are supported.
179 * Example: "foo\r\nbar\r\n\r\nnext" will print "foo\r\nbar" into buf, and pb
180 * will focus on the 'n' of the "next" string.
181 *
182 * @param tr I/O context
183 * @param buf an initialized buf where the chunk is written
184 *
185 * @note buf is cleared before writing into it.
186 */
187 void ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf);
188
189 /**
190 * Get the number of characters to increment to jump to the next line, or to
191 * the end of the string.
192 * The function handles the following line breaks schemes:
193 * LF, CRLF (MS), or standalone CR (old MacOS).
194 */
ff_subtitles_next_line(const char * ptr)195 static av_always_inline int ff_subtitles_next_line(const char *ptr)
196 {
197 int n = strcspn(ptr, "\r\n");
198 ptr += n;
199 while (*ptr == '\r') {
200 ptr++;
201 n++;
202 }
203 if (*ptr == '\n')
204 n++;
205 return n;
206 }
207
208 /**
209 * Read a line of text. Discards line ending characters.
210 * The function handles the following line breaks schemes:
211 * LF, CRLF (MS), or standalone CR (old MacOS).
212 *
213 * Returns the number of bytes written to buf. Always writes a terminating 0,
214 * similar as with snprintf.
215 *
216 * @note returns a negative error code if a \0 byte is found
217 */
218 ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size);
219
220 #endif /* AVFORMAT_SUBTITLES_H */
221