1 /* GStreamer AVI GAB2 subtitle parser
2 * Copyright (C) <2007> Thijs Vermeir <thijsvermeir@gmail.com>
3 * Copyright (C) <2007> Tim-Philipp Müller <tim centricular net>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with this library; if not, write to the
17 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 */
20
21 /**
22 * SECTION:element-avisubtitle
23 *
24 * <refsect2>
25 * <para>
26 * Parses the subtitle stream from an avi file.
27 * </para>
28 * <title>Example launch line</title>
29 * <para>
30 * <programlisting>
31 * gst-launch-1.0 filesrc location=subtitle.avi ! avidemux name=demux ! queue ! avisubtitle ! subparse ! textoverlay name=overlay ! videoconvert ! autovideosink demux. ! queue ! decodebin ! overlay.
32 * </programlisting>
33 * This plays an avi file with a video and subtitle stream.
34 * </para>
35 * </refsect2>
36 */
37
38 /* example of a subtitle chunk in an avi file
39 * 00000000: 47 41 42 32 00 02 00 10 00 00 00 45 00 6e 00 67 GAB2.......E.n.g
40 * 00000010: 00 6c 00 69 00 73 00 68 00 00 00 04 00 8e 00 00 .l.i.s.h........
41 * 00000020: 00 ef bb bf 31 0d 0a 30 30 3a 30 30 3a 30 30 2c ....1..00:00:00,
42 * 00000030: 31 30 30 20 2d 2d 3e 20 30 30 3a 30 30 3a 30 32 100 --> 00:00:02
43 * 00000040: 2c 30 30 30 0d 0a 3c 62 3e 41 6e 20 55 54 46 38 ,000..<b>An UTF8
44 * 00000050: 20 53 75 62 74 69 74 6c 65 20 77 69 74 68 20 42 Subtitle with B
45 * 00000060: 4f 4d 3c 2f 62 3e 0d 0a 0d 0a 32 0d 0a 30 30 3a OM</b>....2..00:
46 * 00000070: 30 30 3a 30 32 2c 31 30 30 20 2d 2d 3e 20 30 30 00:02,100 --> 00
47 * 00000080: 3a 30 30 3a 30 34 2c 30 30 30 0d 0a 53 6f 6d 65 :00:04,000..Some
48 * 00000090: 74 68 69 6e 67 20 6e 6f 6e 41 53 43 49 49 20 2d thing nonASCII -
49 * 000000a0: 20 c2 b5 c3 b6 c3 a4 c3 bc c3 9f 0d 0a 0d 0a ..............
50 */
51
52 #ifdef HAVE_CONFIG_H
53 #include "config.h"
54 #endif
55
56 #include <string.h>
57
58 #include "gstavisubtitle.h"
59
60 GST_DEBUG_CATEGORY_STATIC (avisubtitle_debug);
61 #define GST_CAT_DEFAULT avisubtitle_debug
62
63 static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
64 GST_PAD_SINK,
65 GST_PAD_ALWAYS,
66 GST_STATIC_CAPS ("application/x-subtitle-avi")
67 );
68
69 static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
70 GST_PAD_SRC,
71 GST_PAD_ALWAYS,
72 GST_STATIC_CAPS ("application/x-subtitle")
73 );
74
75 static void gst_avi_subtitle_title_tag (GstAviSubtitle * sub, gchar * title);
76 static GstFlowReturn gst_avi_subtitle_chain (GstPad * pad, GstObject * parent,
77 GstBuffer * buffer);
78 static GstStateChangeReturn gst_avi_subtitle_change_state (GstElement * element,
79 GstStateChange transition);
80 static gboolean gst_avi_subtitle_send_event (GstElement * element,
81 GstEvent * event);
82
83 #define gst_avi_subtitle_parent_class parent_class
84 G_DEFINE_TYPE (GstAviSubtitle, gst_avi_subtitle, GST_TYPE_ELEMENT);
85
86 #define IS_BOM_UTF8(data) ((GST_READ_UINT32_BE(data) >> 8) == 0xEFBBBF)
87 #define IS_BOM_UTF16_BE(data) (GST_READ_UINT16_BE(data) == 0xFEFF)
88 #define IS_BOM_UTF16_LE(data) (GST_READ_UINT16_LE(data) == 0xFEFF)
89 #define IS_BOM_UTF32_BE(data) (GST_READ_UINT32_BE(data) == 0xFEFF)
90 #define IS_BOM_UTF32_LE(data) (GST_READ_UINT32_LE(data) == 0xFEFF)
91
92 static GstBuffer *
gst_avi_subtitle_extract_file(GstAviSubtitle * sub,GstBuffer * buffer,guint offset,guint len)93 gst_avi_subtitle_extract_file (GstAviSubtitle * sub, GstBuffer * buffer,
94 guint offset, guint len)
95 {
96 const gchar *input_enc = NULL;
97 GstBuffer *ret = NULL;
98 gchar *data;
99 GstMapInfo map;
100
101 gst_buffer_map (buffer, &map, GST_MAP_READ);
102 data = (gchar *) (map.data + offset);
103
104 if (len >= (3 + 1) && IS_BOM_UTF8 (data) &&
105 g_utf8_validate (data + 3, len - 3, NULL)) {
106 ret =
107 gst_buffer_copy_region (buffer, GST_BUFFER_COPY_ALL, offset + 3,
108 len - 3);
109 } else if (len >= 2 && IS_BOM_UTF16_BE (data)) {
110 input_enc = "UTF-16BE";
111 data += 2;
112 len -= 2;
113 } else if (len >= 2 && IS_BOM_UTF16_LE (data)) {
114 input_enc = "UTF-16LE";
115 data += 2;
116 len -= 2;
117 } else if (len >= 4 && IS_BOM_UTF32_BE (data)) {
118 input_enc = "UTF-32BE";
119 data += 4;
120 len -= 4;
121 } else if (len >= 4 && IS_BOM_UTF32_LE (data)) {
122 input_enc = "UTF-32LE";
123 data += 4;
124 len -= 4;
125 } else if (g_utf8_validate (data, len, NULL)) {
126 /* not specified, check if it's UTF-8 */
127 ret = gst_buffer_copy_region (buffer, GST_BUFFER_COPY_ALL, offset, len);
128 } else {
129 /* we could fall back to gst_tag_freeform_to_utf8() here */
130 GST_WARNING_OBJECT (sub, "unspecified encoding, and not UTF-8");
131 ret = NULL;
132 goto done;
133 }
134
135 g_return_val_if_fail (ret != NULL || input_enc != NULL, NULL);
136
137 if (input_enc) {
138 GError *err = NULL;
139 gchar *utf8;
140 gsize slen;
141
142 GST_DEBUG_OBJECT (sub, "converting subtitles from %s to UTF-8", input_enc);
143 utf8 = g_convert (data, len, "UTF-8", input_enc, NULL, NULL, &err);
144
145 if (err != NULL) {
146 GST_WARNING_OBJECT (sub, "conversion to UTF-8 failed : %s", err->message);
147 g_error_free (err);
148 ret = NULL;
149 goto done;
150 }
151
152 ret = gst_buffer_new ();
153 slen = strlen (utf8);
154 gst_buffer_append_memory (ret,
155 gst_memory_new_wrapped (0, utf8, slen, 0, slen, utf8, g_free));
156
157 GST_BUFFER_OFFSET (ret) = 0;
158 }
159
160 done:
161 gst_buffer_unmap (buffer, &map);
162
163 return ret;
164 }
165
166 /**
167 * gst_avi_subtitle_title_tag:
168 * @sub: subtitle element
169 * @title: the title of this subtitle stream
170 *
171 * Send an event to the srcpad of the @sub element with the title
172 * of the subtitle stream as a GST_TAG_TITLE
173 */
174 static void
gst_avi_subtitle_title_tag(GstAviSubtitle * sub,gchar * title)175 gst_avi_subtitle_title_tag (GstAviSubtitle * sub, gchar * title)
176 {
177 gst_pad_push_event (sub->src,
178 gst_event_new_tag (gst_tag_list_new (GST_TAG_TITLE, title, NULL)));
179 }
180
181 static GstFlowReturn
gst_avi_subtitle_parse_gab2_chunk(GstAviSubtitle * sub,GstBuffer * buf)182 gst_avi_subtitle_parse_gab2_chunk (GstAviSubtitle * sub, GstBuffer * buf)
183 {
184 gchar *name_utf8;
185 guint name_length;
186 guint file_length;
187 GstMapInfo map;
188
189 gst_buffer_map (buf, &map, GST_MAP_READ);
190
191 /* check the magic word "GAB2\0", and the next word must be 2 */
192 if (map.size < 12 || memcmp (map.data, "GAB2\0\2\0", 5 + 2) != 0)
193 goto wrong_magic_word;
194
195 /* read 'name' of subtitle */
196 name_length = GST_READ_UINT32_LE (map.data + 5 + 2);
197 GST_LOG_OBJECT (sub, "length of name: %u", name_length);
198 if (map.size <= 17 + name_length)
199 goto wrong_name_length;
200
201 name_utf8 =
202 g_convert ((gchar *) map.data + 11, name_length, "UTF-8", "UTF-16LE",
203 NULL, NULL, NULL);
204
205 if (name_utf8) {
206 GST_LOG_OBJECT (sub, "subtitle name: %s", name_utf8);
207 gst_avi_subtitle_title_tag (sub, name_utf8);
208 g_free (name_utf8);
209 }
210
211 /* next word must be 4 */
212 if (GST_READ_UINT16_LE (map.data + 11 + name_length) != 0x4)
213 goto wrong_fixed_word_2;
214
215 file_length = GST_READ_UINT32_LE (map.data + 13 + name_length);
216 GST_LOG_OBJECT (sub, "length srt/ssa file: %u", file_length);
217
218 if (map.size < (17 + name_length + file_length))
219 goto wrong_total_length;
220
221 /* store this, so we can send it again after a seek; note that we shouldn't
222 * assume all the remaining data in the chunk is subtitle data, there may
223 * be padding at the end for some reason, so only parse file_length bytes */
224 sub->subfile =
225 gst_avi_subtitle_extract_file (sub, buf, 17 + name_length, file_length);
226
227 if (sub->subfile == NULL)
228 goto extract_failed;
229
230 gst_buffer_unmap (buf, &map);
231
232 return GST_FLOW_OK;
233
234 /* ERRORS */
235 wrong_magic_word:
236 {
237 GST_ELEMENT_ERROR (sub, STREAM, DECODE, (NULL), ("Wrong magic word"));
238 gst_buffer_unmap (buf, &map);
239 return GST_FLOW_ERROR;
240 }
241 wrong_name_length:
242 {
243 GST_ELEMENT_ERROR (sub, STREAM, DECODE, (NULL),
244 ("name doesn't fit in buffer (%" G_GSIZE_FORMAT " < %d)", map.size,
245 17 + name_length));
246 gst_buffer_unmap (buf, &map);
247 return GST_FLOW_ERROR;
248 }
249 wrong_fixed_word_2:
250 {
251 GST_ELEMENT_ERROR (sub, STREAM, DECODE, (NULL),
252 ("wrong fixed word: expected %u, got %u", 4,
253 GST_READ_UINT16_LE (map.data + 11 + name_length)));
254 gst_buffer_unmap (buf, &map);
255 return GST_FLOW_ERROR;
256 }
257 wrong_total_length:
258 {
259 GST_ELEMENT_ERROR (sub, STREAM, DECODE, (NULL),
260 ("buffer size is wrong: need %d bytes, have %" G_GSIZE_FORMAT " bytes",
261 17 + name_length + file_length, map.size));
262 gst_buffer_unmap (buf, &map);
263 return GST_FLOW_ERROR;
264 }
265 extract_failed:
266 {
267 GST_ELEMENT_ERROR (sub, STREAM, DECODE, (NULL),
268 ("could not extract subtitles"));
269 gst_buffer_unmap (buf, &map);
270 return GST_FLOW_ERROR;
271 }
272 }
273
274 static GstFlowReturn
gst_avi_subtitle_chain(GstPad * pad,GstObject * parent,GstBuffer * buffer)275 gst_avi_subtitle_chain (GstPad * pad, GstObject * parent, GstBuffer * buffer)
276 {
277 GstAviSubtitle *sub = GST_AVI_SUBTITLE (parent);
278 GstFlowReturn ret;
279
280 if (sub->subfile != NULL) {
281 GST_WARNING_OBJECT (sub, "Got more buffers than expected, dropping");
282 ret = GST_FLOW_EOS;
283 goto done;
284 }
285
286 /* we expect exactly one buffer with the whole srt/ssa file in it */
287 ret = gst_avi_subtitle_parse_gab2_chunk (sub, buffer);
288 if (ret != GST_FLOW_OK)
289 goto done;
290
291 /* now push the subtitle data downstream */
292 ret = gst_pad_push (sub->src, gst_buffer_ref (sub->subfile));
293
294 done:
295
296 gst_buffer_unref (buffer);
297 return ret;
298 }
299
300 static gboolean
gst_avi_subtitle_send_event(GstElement * element,GstEvent * event)301 gst_avi_subtitle_send_event (GstElement * element, GstEvent * event)
302 {
303 GstAviSubtitle *avisubtitle = GST_AVI_SUBTITLE (element);
304 gboolean ret = FALSE;
305
306 if (GST_EVENT_TYPE (event) == GST_EVENT_SEEK) {
307 if (avisubtitle->subfile) {
308 if (gst_pad_push (avisubtitle->src,
309 gst_buffer_ref (avisubtitle->subfile)) == GST_FLOW_OK)
310 ret = TRUE;
311 }
312 }
313 gst_event_unref (event);
314 return ret;
315 }
316
317 static void
gst_avi_subtitle_class_init(GstAviSubtitleClass * klass)318 gst_avi_subtitle_class_init (GstAviSubtitleClass * klass)
319 {
320 GstElementClass *gstelement_class = (GstElementClass *) klass;
321
322 GST_DEBUG_CATEGORY_INIT (avisubtitle_debug, "avisubtitle", 0,
323 "parse avi subtitle stream");
324
325 gstelement_class->change_state =
326 GST_DEBUG_FUNCPTR (gst_avi_subtitle_change_state);
327 gstelement_class->send_event =
328 GST_DEBUG_FUNCPTR (gst_avi_subtitle_send_event);
329
330 gst_element_class_add_static_pad_template (gstelement_class, &sink_template);
331 gst_element_class_add_static_pad_template (gstelement_class, &src_template);
332
333 gst_element_class_set_static_metadata (gstelement_class,
334 "Avi subtitle parser", "Codec/Parser/Subtitle",
335 "Parse avi subtitle stream", "Thijs Vermeir <thijsvermeir@gmail.com>");
336 }
337
338 static void
gst_avi_subtitle_init(GstAviSubtitle * self)339 gst_avi_subtitle_init (GstAviSubtitle * self)
340 {
341 GstCaps *caps;
342
343 self->src = gst_pad_new_from_static_template (&src_template, "src");
344 gst_element_add_pad (GST_ELEMENT (self), self->src);
345
346 self->sink = gst_pad_new_from_static_template (&sink_template, "sink");
347 gst_pad_set_chain_function (self->sink,
348 GST_DEBUG_FUNCPTR (gst_avi_subtitle_chain));
349
350 caps = gst_static_pad_template_get_caps (&src_template);
351 gst_pad_set_caps (self->src, caps);
352 gst_caps_unref (caps);
353
354 gst_pad_use_fixed_caps (self->src);
355 gst_element_add_pad (GST_ELEMENT (self), self->sink);
356
357 self->subfile = NULL;
358 }
359
360 static GstStateChangeReturn
gst_avi_subtitle_change_state(GstElement * element,GstStateChange transition)361 gst_avi_subtitle_change_state (GstElement * element, GstStateChange transition)
362 {
363 GstStateChangeReturn ret;
364 GstAviSubtitle *sub = GST_AVI_SUBTITLE (element);
365
366 switch (transition) {
367 case GST_STATE_CHANGE_NULL_TO_READY:
368 case GST_STATE_CHANGE_READY_TO_PAUSED:
369 default:
370 break;
371 }
372
373 ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
374 if (ret == GST_STATE_CHANGE_FAILURE)
375 return ret;
376
377 switch (transition) {
378 case GST_STATE_CHANGE_PAUSED_TO_READY:
379 if (sub->subfile) {
380 gst_buffer_unref (sub->subfile);
381 sub->subfile = NULL;
382 }
383 break;
384 default:
385 break;
386 }
387
388 return ret;
389 }
390