• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GStreamer SSA subtitle parser
2  * Copyright (c) 2006 Tim-Philipp Müller <tim centricular net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public
15  * License along with this library; if not, write to the
16  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17  * Boston, MA 02110-1301, USA.
18  */
19 
20 /* Super-primitive SSA parser - we just want the text and ignore
21  * everything else like styles and timing codes etc. for now */
22 
23 #ifdef HAVE_CONFIG_H
24 #include "config.h"
25 #endif
26 
27 #include <stdlib.h>             /* atoi() */
28 #include <string.h>
29 
30 #include "gstssaparse.h"
31 
32 GST_DEBUG_CATEGORY_STATIC (ssa_parse_debug);
33 #define GST_CAT_DEFAULT ssa_parse_debug
34 
35 static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
36     GST_PAD_SINK,
37     GST_PAD_ALWAYS,
38     GST_STATIC_CAPS ("application/x-ssa; application/x-ass")
39     );
40 
41 static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src",
42     GST_PAD_SRC,
43     GST_PAD_ALWAYS,
44     GST_STATIC_CAPS ("text/x-raw, format=pango-markup")
45     );
46 
47 #define gst_ssa_parse_parent_class parent_class
48 G_DEFINE_TYPE (GstSsaParse, gst_ssa_parse, GST_TYPE_ELEMENT);
49 
50 static GstStateChangeReturn gst_ssa_parse_change_state (GstElement *
51     element, GstStateChange transition);
52 static gboolean gst_ssa_parse_setcaps (GstPad * sinkpad, GstCaps * caps);
53 static gboolean gst_ssa_parse_src_event (GstPad * pad, GstObject * parent,
54     GstEvent * event);
55 static gboolean gst_ssa_parse_sink_event (GstPad * pad, GstObject * parent,
56     GstEvent * event);
57 static GstFlowReturn gst_ssa_parse_chain (GstPad * sinkpad, GstObject * parent,
58     GstBuffer * buf);
59 
60 static void
gst_ssa_parse_dispose(GObject * object)61 gst_ssa_parse_dispose (GObject * object)
62 {
63   GstSsaParse *parse = GST_SSA_PARSE (object);
64 
65   g_free (parse->ini);
66   parse->ini = NULL;
67 
68   GST_CALL_PARENT (G_OBJECT_CLASS, dispose, (object));
69 }
70 
71 static void
gst_ssa_parse_init(GstSsaParse * parse)72 gst_ssa_parse_init (GstSsaParse * parse)
73 {
74   parse->sinkpad = gst_pad_new_from_static_template (&sink_templ, "sink");
75   gst_pad_set_chain_function (parse->sinkpad,
76       GST_DEBUG_FUNCPTR (gst_ssa_parse_chain));
77   gst_pad_set_event_function (parse->sinkpad,
78       GST_DEBUG_FUNCPTR (gst_ssa_parse_sink_event));
79   gst_element_add_pad (GST_ELEMENT (parse), parse->sinkpad);
80 
81   parse->srcpad = gst_pad_new_from_static_template (&src_templ, "src");
82   gst_pad_set_event_function (parse->srcpad,
83       GST_DEBUG_FUNCPTR (gst_ssa_parse_src_event));
84   gst_element_add_pad (GST_ELEMENT (parse), parse->srcpad);
85   gst_pad_use_fixed_caps (parse->srcpad);
86 
87   parse->ini = NULL;
88   parse->framed = FALSE;
89   parse->send_tags = FALSE;
90 }
91 
92 static void
gst_ssa_parse_class_init(GstSsaParseClass * klass)93 gst_ssa_parse_class_init (GstSsaParseClass * klass)
94 {
95   GObjectClass *object_class = G_OBJECT_CLASS (klass);
96   GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
97 
98   object_class->dispose = gst_ssa_parse_dispose;
99 
100   gst_element_class_add_static_pad_template (element_class, &sink_templ);
101   gst_element_class_add_static_pad_template (element_class, &src_templ);
102   gst_element_class_set_static_metadata (element_class,
103       "SSA Subtitle Parser", "Codec/Parser/Subtitle",
104       "Parses SSA subtitle streams",
105       "Tim-Philipp Müller <tim centricular net>");
106 
107   GST_DEBUG_CATEGORY_INIT (ssa_parse_debug, "ssaparse", 0,
108       "SSA subtitle parser");
109 
110   element_class->change_state = GST_DEBUG_FUNCPTR (gst_ssa_parse_change_state);
111 }
112 
113 static gboolean
gst_ssa_parse_src_event(GstPad * pad,GstObject * parent,GstEvent * event)114 gst_ssa_parse_src_event (GstPad * pad, GstObject * parent, GstEvent * event)
115 {
116   return gst_pad_event_default (pad, parent, event);
117 }
118 
119 static gboolean
gst_ssa_parse_sink_event(GstPad * pad,GstObject * parent,GstEvent * event)120 gst_ssa_parse_sink_event (GstPad * pad, GstObject * parent, GstEvent * event)
121 {
122   gboolean res;
123 
124   switch (GST_EVENT_TYPE (event)) {
125     case GST_EVENT_CAPS:
126     {
127       GstCaps *caps;
128 
129       gst_event_parse_caps (event, &caps);
130       res = gst_ssa_parse_setcaps (pad, caps);
131       gst_event_unref (event);
132       break;
133     }
134     default:
135       res = gst_pad_event_default (pad, parent, event);
136       break;
137   }
138   return res;
139 }
140 
141 static gboolean
gst_ssa_parse_setcaps(GstPad * sinkpad,GstCaps * caps)142 gst_ssa_parse_setcaps (GstPad * sinkpad, GstCaps * caps)
143 {
144   GstSsaParse *parse = GST_SSA_PARSE (GST_PAD_PARENT (sinkpad));
145   GstCaps *outcaps;
146   const GValue *val;
147   GstStructure *s;
148   const guchar bom_utf8[] = { 0xEF, 0xBB, 0xBF };
149   const gchar *end;
150   GstBuffer *priv;
151   GstMapInfo map;
152   gchar *ptr;
153   gsize left, bad_offset;
154   gboolean ret;
155 
156   s = gst_caps_get_structure (caps, 0);
157   val = gst_structure_get_value (s, "codec_data");
158   if (val == NULL) {
159     parse->framed = FALSE;
160     GST_ERROR ("Only SSA subtitles embedded in containers are supported");
161     return FALSE;
162   }
163 
164   parse->framed = TRUE;
165   parse->send_tags = TRUE;
166 
167   priv = (GstBuffer *) g_value_get_boxed (val);
168   g_return_val_if_fail (priv != NULL, FALSE);
169 
170   gst_buffer_ref (priv);
171 
172   if (!gst_buffer_map (priv, &map, GST_MAP_READ)) {
173     gst_buffer_unref (priv);
174     return FALSE;
175   }
176 
177   GST_MEMDUMP_OBJECT (parse, "init section", map.data, map.size);
178 
179   ptr = (gchar *) map.data;
180   left = map.size;
181 
182   /* skip UTF-8 BOM */
183   if (left >= 3 && memcmp (ptr, bom_utf8, 3) == 0) {
184     ptr += 3;
185     left -= 3;
186   }
187 
188   if (!strstr (ptr, "[Script Info]"))
189     goto invalid_init;
190 
191   if (!g_utf8_validate (ptr, left, &end)) {
192     bad_offset = (gsize) (end - ptr);
193     GST_WARNING_OBJECT (parse, "Init section is not valid UTF-8. Problem at "
194         "byte offset %" G_GSIZE_FORMAT, bad_offset);
195     /* continue with valid UTF-8 data */
196     left = bad_offset;
197   }
198 
199   /* FIXME: parse initial section */
200   if (parse->ini)
201     g_free (parse->ini);
202   parse->ini = g_strndup (ptr, left);
203   GST_LOG_OBJECT (parse, "Init section:\n%s", parse->ini);
204 
205   gst_buffer_unmap (priv, &map);
206   gst_buffer_unref (priv);
207 
208   outcaps = gst_caps_new_simple ("text/x-raw",
209       "format", G_TYPE_STRING, "pango-markup", NULL);
210 
211   ret = gst_pad_set_caps (parse->srcpad, outcaps);
212   gst_caps_unref (outcaps);
213 
214   return ret;
215 
216   /* ERRORS */
217 invalid_init:
218   {
219     GST_WARNING_OBJECT (parse, "Invalid Init section - no Script Info header");
220     gst_buffer_unmap (priv, &map);
221     gst_buffer_unref (priv);
222     return FALSE;
223   }
224 }
225 
226 static gboolean
gst_ssa_parse_remove_override_codes(GstSsaParse * parse,gchar * txt)227 gst_ssa_parse_remove_override_codes (GstSsaParse * parse, gchar * txt)
228 {
229   gchar *t, *end;
230   gboolean removed_any = FALSE;
231 
232   while ((t = strchr (txt, '{'))) {
233     end = strchr (txt, '}');
234     if (end == NULL) {
235       GST_WARNING_OBJECT (parse, "Missing { for style override code");
236       return removed_any;
237     }
238     /* move terminating NUL character forward as well */
239     memmove (t, end + 1, strlen (end + 1) + 1);
240     removed_any = TRUE;
241   }
242 
243   /* these may occur outside of curly brackets. We don't handle the different
244    * wrapping modes yet, so just remove these markers from the text for now */
245   while ((t = strstr (txt, "\\n"))) {
246     t[0] = ' ';
247     t[1] = '\n';
248   }
249   while ((t = strstr (txt, "\\N"))) {
250     t[0] = ' ';
251     t[1] = '\n';
252   }
253   while ((t = strstr (txt, "\\h"))) {
254     t[0] = ' ';
255     t[1] = ' ';
256   }
257 
258   return removed_any;
259 }
260 
261 /**
262  * gst_ssa_parse_push_line:
263  * @parse: caller element
264  * @txt: text to push
265  * @start: timestamp for the buffer
266  * @duration: duration for the buffer
267  *
268  * Parse the text in a buffer with the given properties and
269  * push it to the srcpad of the @parse element
270  *
271  * Returns: result of the push of the created buffer
272  */
273 static GstFlowReturn
gst_ssa_parse_push_line(GstSsaParse * parse,gchar * txt,GstClockTime start,GstClockTime duration)274 gst_ssa_parse_push_line (GstSsaParse * parse, gchar * txt,
275     GstClockTime start, GstClockTime duration)
276 {
277   GstFlowReturn ret;
278   GstBuffer *buf;
279   gchar *t, *escaped;
280   gint num, i, len;
281 
282   num = atoi (txt);
283   GST_LOG_OBJECT (parse, "Parsing line #%d at %" GST_TIME_FORMAT,
284       num, GST_TIME_ARGS (start));
285 
286   /* skip all non-text fields before the actual text */
287   t = txt;
288   for (i = 0; i < 8; ++i) {
289     t = strchr (t, ',');
290     if (t == NULL)
291       return GST_FLOW_ERROR;
292     ++t;
293   }
294 
295   GST_LOG_OBJECT (parse, "Text : %s", t);
296 
297   if (gst_ssa_parse_remove_override_codes (parse, t)) {
298     GST_LOG_OBJECT (parse, "Clean: %s", t);
299   }
300 
301   /* we claim to output pango markup, so we must escape the
302    * text even if we don't actually use any pango markup yet */
303   escaped = g_markup_printf_escaped ("%s", t);
304 
305   len = strlen (escaped);
306 
307   /* allocate enough for a terminating NUL, but don't include it in buf size */
308   buf = gst_buffer_new_and_alloc (len + 1);
309   gst_buffer_fill (buf, 0, escaped, len + 1);
310   gst_buffer_set_size (buf, len);
311   g_free (escaped);
312 
313   GST_BUFFER_TIMESTAMP (buf) = start;
314   GST_BUFFER_DURATION (buf) = duration;
315 
316   GST_LOG_OBJECT (parse, "Pushing buffer with timestamp %" GST_TIME_FORMAT
317       " and duration %" GST_TIME_FORMAT, GST_TIME_ARGS (start),
318       GST_TIME_ARGS (duration));
319 
320   ret = gst_pad_push (parse->srcpad, buf);
321 
322   if (ret != GST_FLOW_OK) {
323     GST_DEBUG_OBJECT (parse, "Push of text '%s' returned flow %s", txt,
324         gst_flow_get_name (ret));
325   }
326 
327   return ret;
328 }
329 
330 static GstFlowReturn
gst_ssa_parse_chain(GstPad * sinkpad,GstObject * parent,GstBuffer * buf)331 gst_ssa_parse_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * buf)
332 {
333   GstFlowReturn ret;
334   GstSsaParse *parse = GST_SSA_PARSE (parent);
335   GstClockTime ts;
336   gchar *txt;
337   GstMapInfo map;
338 
339   if (G_UNLIKELY (!parse->framed))
340     goto not_framed;
341 
342   if (G_UNLIKELY (parse->send_tags)) {
343     GstTagList *tags;
344 
345     tags = gst_tag_list_new_empty ();
346     gst_tag_list_add (tags, GST_TAG_MERGE_APPEND, GST_TAG_SUBTITLE_CODEC,
347         "SubStation Alpha", NULL);
348     gst_pad_push_event (parse->srcpad, gst_event_new_tag (tags));
349     parse->send_tags = FALSE;
350   }
351 
352   /* make double-sure it's 0-terminated and all */
353   gst_buffer_map (buf, &map, GST_MAP_READ);
354   txt = g_strndup ((gchar *) map.data, map.size);
355   gst_buffer_unmap (buf, &map);
356 
357   if (txt == NULL)
358     goto empty_text;
359 
360   ts = GST_BUFFER_TIMESTAMP (buf);
361   ret = gst_ssa_parse_push_line (parse, txt, ts, GST_BUFFER_DURATION (buf));
362 
363   if (ret != GST_FLOW_OK && GST_CLOCK_TIME_IS_VALID (ts)) {
364     GstSegment segment;
365 
366     /* just advance time without sending anything */
367     gst_segment_init (&segment, GST_FORMAT_TIME);
368     segment.start = ts;
369     segment.time = ts;
370     gst_pad_push_event (parse->srcpad, gst_event_new_segment (&segment));
371     ret = GST_FLOW_OK;
372   }
373 
374   gst_buffer_unref (buf);
375   g_free (txt);
376 
377   return ret;
378 
379 /* ERRORS */
380 not_framed:
381   {
382     GST_ELEMENT_ERROR (parse, STREAM, FORMAT, (NULL),
383         ("Only SSA subtitles embedded in containers are supported"));
384     gst_buffer_unref (buf);
385     return GST_FLOW_NOT_NEGOTIATED;
386   }
387 empty_text:
388   {
389     GST_ELEMENT_WARNING (parse, STREAM, FORMAT, (NULL),
390         ("Received empty subtitle"));
391     gst_buffer_unref (buf);
392     return GST_FLOW_OK;
393   }
394 }
395 
396 static GstStateChangeReturn
gst_ssa_parse_change_state(GstElement * element,GstStateChange transition)397 gst_ssa_parse_change_state (GstElement * element, GstStateChange transition)
398 {
399   GstStateChangeReturn ret = GST_STATE_CHANGE_SUCCESS;
400   GstSsaParse *parse = GST_SSA_PARSE (element);
401 
402   switch (transition) {
403     case GST_STATE_CHANGE_READY_TO_PAUSED:
404       break;
405     default:
406       break;
407   }
408 
409   ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
410   if (ret == GST_STATE_CHANGE_FAILURE)
411     return ret;
412 
413   switch (transition) {
414     case GST_STATE_CHANGE_PAUSED_TO_READY:
415       g_free (parse->ini);
416       parse->ini = NULL;
417       parse->framed = FALSE;
418       break;
419     default:
420       break;
421   }
422 
423   return ret;
424 }
425