• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GStreamer AVI GAB2 subtitle parser
2  * Copyright (C) <2007> Thijs Vermeir <thijsvermeir@gmail.com>
3  * Copyright (C) <2007> Tim-Philipp Müller <tim centricular net>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  */
20 
21 /**
22  * SECTION:element-avisubtitle
23  * @title: avisubtitle
24  *
25  * Parses the subtitle stream from an avi file.
26  *
27  * ## Example launch line
28  *
29  * |[
30  * gst-launch-1.0 filesrc location=subtitle.avi ! avidemux name=demux ! queue ! avisubtitle ! subparse ! textoverlay name=overlay ! videoconvert ! autovideosink demux. ! queue ! decodebin ! overlay.
31  * ]|
32  * This plays an avi file with a video and subtitle stream.
33  *
34  */
35 
36 /* example of a subtitle chunk in an avi file
37  * 00000000: 47 41 42 32 00 02 00 10 00 00 00 45 00 6e 00 67  GAB2.......E.n.g
38  * 00000010: 00 6c 00 69 00 73 00 68 00 00 00 04 00 8e 00 00  .l.i.s.h........
39  * 00000020: 00 ef bb bf 31 0d 0a 30 30 3a 30 30 3a 30 30 2c  ....1..00:00:00,
40  * 00000030: 31 30 30 20 2d 2d 3e 20 30 30 3a 30 30 3a 30 32  100 --> 00:00:02
41  * 00000040: 2c 30 30 30 0d 0a 3c 62 3e 41 6e 20 55 54 46 38  ,000..<b>An UTF8
42  * 00000050: 20 53 75 62 74 69 74 6c 65 20 77 69 74 68 20 42   Subtitle with B
43  * 00000060: 4f 4d 3c 2f 62 3e 0d 0a 0d 0a 32 0d 0a 30 30 3a  OM</b>....2..00:
44  * 00000070: 30 30 3a 30 32 2c 31 30 30 20 2d 2d 3e 20 30 30  00:02,100 --> 00
45  * 00000080: 3a 30 30 3a 30 34 2c 30 30 30 0d 0a 53 6f 6d 65  :00:04,000..Some
46  * 00000090: 74 68 69 6e 67 20 6e 6f 6e 41 53 43 49 49 20 2d  thing nonASCII -
47  * 000000a0: 20 c2 b5 c3 b6 c3 a4 c3 bc c3 9f 0d 0a 0d 0a      ..............
48  */
49 
50 #ifdef HAVE_CONFIG_H
51 #include "config.h"
52 #endif
53 
54 #include <string.h>
55 
56 #include "gstavielements.h"
57 #include "gstavisubtitle.h"
58 
59 GST_DEBUG_CATEGORY_STATIC (avisubtitle_debug);
60 #define GST_CAT_DEFAULT avisubtitle_debug
61 
62 static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
63     GST_PAD_SINK,
64     GST_PAD_ALWAYS,
65     GST_STATIC_CAPS ("application/x-subtitle-avi")
66     );
67 
68 static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
69     GST_PAD_SRC,
70     GST_PAD_ALWAYS,
71     GST_STATIC_CAPS ("application/x-subtitle")
72     );
73 
74 static void gst_avi_subtitle_title_tag (GstAviSubtitle * sub, gchar * title);
75 static GstFlowReturn gst_avi_subtitle_chain (GstPad * pad, GstObject * parent,
76     GstBuffer * buffer);
77 static GstStateChangeReturn gst_avi_subtitle_change_state (GstElement * element,
78     GstStateChange transition);
79 static gboolean gst_avi_subtitle_send_event (GstElement * element,
80     GstEvent * event);
81 
82 #define gst_avi_subtitle_parent_class parent_class
83 G_DEFINE_TYPE (GstAviSubtitle, gst_avi_subtitle, GST_TYPE_ELEMENT);
84 GST_ELEMENT_REGISTER_DEFINE_WITH_CODE (avisubtitle, "avisubtitle",
85     GST_RANK_PRIMARY, GST_TYPE_AVI_SUBTITLE, avi_element_init (plugin));
86 
87 #define IS_BOM_UTF8(data)     ((GST_READ_UINT32_BE(data) >> 8) == 0xEFBBBF)
88 #define IS_BOM_UTF16_BE(data) (GST_READ_UINT16_BE(data) == 0xFEFF)
89 #define IS_BOM_UTF16_LE(data) (GST_READ_UINT16_LE(data) == 0xFEFF)
90 #define IS_BOM_UTF32_BE(data) (GST_READ_UINT32_BE(data) == 0xFEFF)
91 #define IS_BOM_UTF32_LE(data) (GST_READ_UINT32_LE(data) == 0xFEFF)
92 
93 static GstBuffer *
gst_avi_subtitle_extract_file(GstAviSubtitle * sub,GstBuffer * buffer,guint offset,guint len)94 gst_avi_subtitle_extract_file (GstAviSubtitle * sub, GstBuffer * buffer,
95     guint offset, guint len)
96 {
97   const gchar *input_enc = NULL;
98   GstBuffer *ret = NULL;
99   gchar *data;
100   GstMapInfo map;
101 
102   gst_buffer_map (buffer, &map, GST_MAP_READ);
103   data = (gchar *) (map.data + offset);
104 
105   if (len >= (3 + 1) && IS_BOM_UTF8 (data) &&
106       g_utf8_validate (data + 3, len - 3, NULL)) {
107     ret =
108         gst_buffer_copy_region (buffer, GST_BUFFER_COPY_ALL, offset + 3,
109         len - 3);
110   } else if (len >= 2 && IS_BOM_UTF16_BE (data)) {
111     input_enc = "UTF-16BE";
112     data += 2;
113     len -= 2;
114   } else if (len >= 2 && IS_BOM_UTF16_LE (data)) {
115     input_enc = "UTF-16LE";
116     data += 2;
117     len -= 2;
118   } else if (len >= 4 && IS_BOM_UTF32_BE (data)) {
119     input_enc = "UTF-32BE";
120     data += 4;
121     len -= 4;
122   } else if (len >= 4 && IS_BOM_UTF32_LE (data)) {
123     input_enc = "UTF-32LE";
124     data += 4;
125     len -= 4;
126   } else if (g_utf8_validate (data, len, NULL)) {
127     /* not specified, check if it's UTF-8 */
128     ret = gst_buffer_copy_region (buffer, GST_BUFFER_COPY_ALL, offset, len);
129   } else {
130     /* we could fall back to gst_tag_freeform_to_utf8() here */
131     GST_WARNING_OBJECT (sub, "unspecified encoding, and not UTF-8");
132     ret = NULL;
133     goto done;
134   }
135 
136   g_return_val_if_fail (ret != NULL || input_enc != NULL, NULL);
137 
138   if (input_enc) {
139     GError *err = NULL;
140     gchar *utf8;
141     gsize slen;
142 
143     GST_DEBUG_OBJECT (sub, "converting subtitles from %s to UTF-8", input_enc);
144     utf8 = g_convert (data, len, "UTF-8", input_enc, NULL, NULL, &err);
145 
146     if (err != NULL) {
147       GST_WARNING_OBJECT (sub, "conversion to UTF-8 failed : %s", err->message);
148       g_error_free (err);
149       ret = NULL;
150       goto done;
151     }
152 
153     ret = gst_buffer_new ();
154     slen = strlen (utf8);
155     gst_buffer_append_memory (ret,
156         gst_memory_new_wrapped (0, utf8, slen, 0, slen, utf8, g_free));
157 
158     GST_BUFFER_OFFSET (ret) = 0;
159   }
160 
161 done:
162   gst_buffer_unmap (buffer, &map);
163 
164   return ret;
165 }
166 
167 /**
168  * gst_avi_subtitle_title_tag:
169  * @sub: subtitle element
170  * @title: the title of this subtitle stream
171  *
172  * Send an event to the srcpad of the @sub element with the title
173  * of the subtitle stream as a GST_TAG_TITLE
174  */
175 static void
gst_avi_subtitle_title_tag(GstAviSubtitle * sub,gchar * title)176 gst_avi_subtitle_title_tag (GstAviSubtitle * sub, gchar * title)
177 {
178   gst_pad_push_event (sub->src,
179       gst_event_new_tag (gst_tag_list_new (GST_TAG_TITLE, title, NULL)));
180 }
181 
182 static GstFlowReturn
gst_avi_subtitle_parse_gab2_chunk(GstAviSubtitle * sub,GstBuffer * buf)183 gst_avi_subtitle_parse_gab2_chunk (GstAviSubtitle * sub, GstBuffer * buf)
184 {
185   gchar *name_utf8;
186   guint name_length;
187   guint file_length;
188   GstMapInfo map;
189 
190   gst_buffer_map (buf, &map, GST_MAP_READ);
191 
192   /* check the magic word "GAB2\0", and the next word must be 2 */
193   if (map.size < 12 || memcmp (map.data, "GAB2\0\2\0", 5 + 2) != 0)
194     goto wrong_magic_word;
195 
196   /* read 'name' of subtitle */
197   name_length = GST_READ_UINT32_LE (map.data + 5 + 2);
198   GST_LOG_OBJECT (sub, "length of name: %u", name_length);
199   if (map.size <= 17 + name_length)
200     goto wrong_name_length;
201 
202   name_utf8 =
203       g_convert ((gchar *) map.data + 11, name_length, "UTF-8", "UTF-16LE",
204       NULL, NULL, NULL);
205 
206   if (name_utf8) {
207     GST_LOG_OBJECT (sub, "subtitle name: %s", name_utf8);
208     gst_avi_subtitle_title_tag (sub, name_utf8);
209     g_free (name_utf8);
210   }
211 
212   /* next word must be 4 */
213   if (GST_READ_UINT16_LE (map.data + 11 + name_length) != 0x4)
214     goto wrong_fixed_word_2;
215 
216   file_length = GST_READ_UINT32_LE (map.data + 13 + name_length);
217   GST_LOG_OBJECT (sub, "length srt/ssa file: %u", file_length);
218 
219   if (map.size < (17 + name_length + file_length))
220     goto wrong_total_length;
221 
222   /* store this, so we can send it again after a seek; note that we shouldn't
223    * assume all the remaining data in the chunk is subtitle data, there may
224    * be padding at the end for some reason, so only parse file_length bytes */
225   sub->subfile =
226       gst_avi_subtitle_extract_file (sub, buf, 17 + name_length, file_length);
227 
228   if (sub->subfile == NULL)
229     goto extract_failed;
230 
231   gst_buffer_unmap (buf, &map);
232 
233   return GST_FLOW_OK;
234 
235   /* ERRORS */
236 wrong_magic_word:
237   {
238     GST_ELEMENT_ERROR (sub, STREAM, DECODE, (NULL), ("Wrong magic word"));
239     gst_buffer_unmap (buf, &map);
240     return GST_FLOW_ERROR;
241   }
242 wrong_name_length:
243   {
244     GST_ELEMENT_ERROR (sub, STREAM, DECODE, (NULL),
245         ("name doesn't fit in buffer (%" G_GSIZE_FORMAT " < %d)", map.size,
246             17 + name_length));
247     gst_buffer_unmap (buf, &map);
248     return GST_FLOW_ERROR;
249   }
250 wrong_fixed_word_2:
251   {
252     GST_ELEMENT_ERROR (sub, STREAM, DECODE, (NULL),
253         ("wrong fixed word: expected %u, got %u", 4,
254             GST_READ_UINT16_LE (map.data + 11 + name_length)));
255     gst_buffer_unmap (buf, &map);
256     return GST_FLOW_ERROR;
257   }
258 wrong_total_length:
259   {
260     GST_ELEMENT_ERROR (sub, STREAM, DECODE, (NULL),
261         ("buffer size is wrong: need %d bytes, have %" G_GSIZE_FORMAT " bytes",
262             17 + name_length + file_length, map.size));
263     gst_buffer_unmap (buf, &map);
264     return GST_FLOW_ERROR;
265   }
266 extract_failed:
267   {
268     GST_ELEMENT_ERROR (sub, STREAM, DECODE, (NULL),
269         ("could not extract subtitles"));
270     gst_buffer_unmap (buf, &map);
271     return GST_FLOW_ERROR;
272   }
273 }
274 
275 static GstFlowReturn
gst_avi_subtitle_chain(GstPad * pad,GstObject * parent,GstBuffer * buffer)276 gst_avi_subtitle_chain (GstPad * pad, GstObject * parent, GstBuffer * buffer)
277 {
278   GstAviSubtitle *sub = GST_AVI_SUBTITLE (parent);
279   GstFlowReturn ret;
280 
281   if (sub->subfile != NULL) {
282     GST_WARNING_OBJECT (sub, "Got more buffers than expected, dropping");
283     ret = GST_FLOW_EOS;
284     goto done;
285   }
286 
287   /* we expect exactly one buffer with the whole srt/ssa file in it */
288   ret = gst_avi_subtitle_parse_gab2_chunk (sub, buffer);
289   if (ret != GST_FLOW_OK)
290     goto done;
291 
292   /* now push the subtitle data downstream */
293   ret = gst_pad_push (sub->src, gst_buffer_ref (sub->subfile));
294 
295 done:
296 
297   gst_buffer_unref (buffer);
298   return ret;
299 }
300 
301 static gboolean
gst_avi_subtitle_send_event(GstElement * element,GstEvent * event)302 gst_avi_subtitle_send_event (GstElement * element, GstEvent * event)
303 {
304   GstAviSubtitle *avisubtitle = GST_AVI_SUBTITLE (element);
305   gboolean ret = FALSE;
306 
307   if (GST_EVENT_TYPE (event) == GST_EVENT_SEEK) {
308     if (avisubtitle->subfile) {
309       if (gst_pad_push (avisubtitle->src,
310               gst_buffer_ref (avisubtitle->subfile)) == GST_FLOW_OK)
311         ret = TRUE;
312     }
313   }
314   gst_event_unref (event);
315   return ret;
316 }
317 
318 static void
gst_avi_subtitle_class_init(GstAviSubtitleClass * klass)319 gst_avi_subtitle_class_init (GstAviSubtitleClass * klass)
320 {
321   GstElementClass *gstelement_class = (GstElementClass *) klass;
322 
323   GST_DEBUG_CATEGORY_INIT (avisubtitle_debug, "avisubtitle", 0,
324       "parse avi subtitle stream");
325 
326   gstelement_class->change_state =
327       GST_DEBUG_FUNCPTR (gst_avi_subtitle_change_state);
328   gstelement_class->send_event =
329       GST_DEBUG_FUNCPTR (gst_avi_subtitle_send_event);
330 
331   gst_element_class_add_static_pad_template (gstelement_class, &sink_template);
332   gst_element_class_add_static_pad_template (gstelement_class, &src_template);
333 
334   gst_element_class_set_static_metadata (gstelement_class,
335       "Avi subtitle parser", "Codec/Parser/Subtitle",
336       "Parse avi subtitle stream", "Thijs Vermeir <thijsvermeir@gmail.com>");
337 }
338 
339 static void
gst_avi_subtitle_init(GstAviSubtitle * self)340 gst_avi_subtitle_init (GstAviSubtitle * self)
341 {
342   GstCaps *caps;
343 
344   self->src = gst_pad_new_from_static_template (&src_template, "src");
345   gst_element_add_pad (GST_ELEMENT (self), self->src);
346 
347   self->sink = gst_pad_new_from_static_template (&sink_template, "sink");
348   gst_pad_set_chain_function (self->sink,
349       GST_DEBUG_FUNCPTR (gst_avi_subtitle_chain));
350 
351   caps = gst_static_pad_template_get_caps (&src_template);
352   gst_pad_set_caps (self->src, caps);
353   gst_caps_unref (caps);
354 
355   gst_pad_use_fixed_caps (self->src);
356   gst_element_add_pad (GST_ELEMENT (self), self->sink);
357 
358   self->subfile = NULL;
359 }
360 
361 static GstStateChangeReturn
gst_avi_subtitle_change_state(GstElement * element,GstStateChange transition)362 gst_avi_subtitle_change_state (GstElement * element, GstStateChange transition)
363 {
364   GstStateChangeReturn ret;
365   GstAviSubtitle *sub = GST_AVI_SUBTITLE (element);
366 
367   switch (transition) {
368     case GST_STATE_CHANGE_NULL_TO_READY:
369     case GST_STATE_CHANGE_READY_TO_PAUSED:
370     default:
371       break;
372   }
373 
374   ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
375   if (ret == GST_STATE_CHANGE_FAILURE)
376     return ret;
377 
378   switch (transition) {
379     case GST_STATE_CHANGE_PAUSED_TO_READY:
380       if (sub->subfile) {
381         gst_buffer_unref (sub->subfile);
382         sub->subfile = NULL;
383       }
384       break;
385     default:
386       break;
387   }
388 
389   return ret;
390 }
391