• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GStreamer SSA subtitle parser
2  * Copyright (c) 2006 Tim-Philipp Müller <tim centricular net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public
15  * License along with this library; if not, write to the
16  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17  * Boston, MA 02110-1301, USA.
18  */
19 
20 /* Super-primitive SSA parser - we just want the text and ignore
21  * everything else like styles and timing codes etc. for now */
22 
23 #ifdef HAVE_CONFIG_H
24 #include "config.h"
25 #endif
26 
27 #include <stdlib.h>             /* atoi() */
28 #include <string.h>
29 
30 #include "gstssaparse.h"
31 #include "gstsubparseelements.h"
32 
33 
34 GST_DEBUG_CATEGORY_STATIC (ssa_parse_debug);
35 #undef GST_CAT_DEFAULT
36 #define GST_CAT_DEFAULT ssa_parse_debug
37 
38 static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
39     GST_PAD_SINK,
40     GST_PAD_ALWAYS,
41     GST_STATIC_CAPS ("application/x-ssa; application/x-ass")
42     );
43 
44 static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src",
45     GST_PAD_SRC,
46     GST_PAD_ALWAYS,
47     GST_STATIC_CAPS ("text/x-raw, format=pango-markup")
48     );
49 
50 #define gst_ssa_parse_parent_class parent_class
51 G_DEFINE_TYPE (GstSsaParse, gst_ssa_parse, GST_TYPE_ELEMENT);
52 GST_ELEMENT_REGISTER_DEFINE_WITH_CODE (ssaparse, "ssaparse",
53     GST_RANK_PRIMARY, GST_TYPE_SSA_PARSE, sub_parse_element_init (plugin));
54 
55 
56 static GstStateChangeReturn gst_ssa_parse_change_state (GstElement *
57     element, GstStateChange transition);
58 static gboolean gst_ssa_parse_setcaps (GstPad * sinkpad, GstCaps * caps);
59 static gboolean gst_ssa_parse_src_event (GstPad * pad, GstObject * parent,
60     GstEvent * event);
61 static gboolean gst_ssa_parse_sink_event (GstPad * pad, GstObject * parent,
62     GstEvent * event);
63 static GstFlowReturn gst_ssa_parse_chain (GstPad * sinkpad, GstObject * parent,
64     GstBuffer * buf);
65 
66 static void
gst_ssa_parse_dispose(GObject * object)67 gst_ssa_parse_dispose (GObject * object)
68 {
69   GstSsaParse *parse = GST_SSA_PARSE (object);
70 
71   g_free (parse->ini);
72   parse->ini = NULL;
73 
74   GST_CALL_PARENT (G_OBJECT_CLASS, dispose, (object));
75 }
76 
77 static void
gst_ssa_parse_init(GstSsaParse * parse)78 gst_ssa_parse_init (GstSsaParse * parse)
79 {
80   parse->sinkpad = gst_pad_new_from_static_template (&sink_templ, "sink");
81   gst_pad_set_chain_function (parse->sinkpad,
82       GST_DEBUG_FUNCPTR (gst_ssa_parse_chain));
83   gst_pad_set_event_function (parse->sinkpad,
84       GST_DEBUG_FUNCPTR (gst_ssa_parse_sink_event));
85   gst_element_add_pad (GST_ELEMENT (parse), parse->sinkpad);
86 
87   parse->srcpad = gst_pad_new_from_static_template (&src_templ, "src");
88   gst_pad_set_event_function (parse->srcpad,
89       GST_DEBUG_FUNCPTR (gst_ssa_parse_src_event));
90   gst_element_add_pad (GST_ELEMENT (parse), parse->srcpad);
91   gst_pad_use_fixed_caps (parse->srcpad);
92 
93   parse->ini = NULL;
94   parse->framed = FALSE;
95   parse->send_tags = FALSE;
96 }
97 
98 static void
gst_ssa_parse_class_init(GstSsaParseClass * klass)99 gst_ssa_parse_class_init (GstSsaParseClass * klass)
100 {
101   GObjectClass *object_class = G_OBJECT_CLASS (klass);
102   GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
103 
104   object_class->dispose = gst_ssa_parse_dispose;
105 
106   gst_element_class_add_static_pad_template (element_class, &sink_templ);
107   gst_element_class_add_static_pad_template (element_class, &src_templ);
108   gst_element_class_set_static_metadata (element_class,
109       "SSA Subtitle Parser", "Codec/Parser/Subtitle",
110       "Parses SSA subtitle streams",
111       "Tim-Philipp Müller <tim centricular net>");
112 
113   GST_DEBUG_CATEGORY_INIT (ssa_parse_debug, "ssaparse", 0,
114       "SSA subtitle parser");
115 
116   element_class->change_state = GST_DEBUG_FUNCPTR (gst_ssa_parse_change_state);
117 }
118 
119 static gboolean
gst_ssa_parse_src_event(GstPad * pad,GstObject * parent,GstEvent * event)120 gst_ssa_parse_src_event (GstPad * pad, GstObject * parent, GstEvent * event)
121 {
122   return gst_pad_event_default (pad, parent, event);
123 }
124 
125 static gboolean
gst_ssa_parse_sink_event(GstPad * pad,GstObject * parent,GstEvent * event)126 gst_ssa_parse_sink_event (GstPad * pad, GstObject * parent, GstEvent * event)
127 {
128   gboolean res;
129 
130   switch (GST_EVENT_TYPE (event)) {
131     case GST_EVENT_CAPS:
132     {
133       GstCaps *caps;
134 
135       gst_event_parse_caps (event, &caps);
136       res = gst_ssa_parse_setcaps (pad, caps);
137       gst_event_unref (event);
138       break;
139     }
140     default:
141       res = gst_pad_event_default (pad, parent, event);
142       break;
143   }
144   return res;
145 }
146 
147 static gboolean
gst_ssa_parse_setcaps(GstPad * sinkpad,GstCaps * caps)148 gst_ssa_parse_setcaps (GstPad * sinkpad, GstCaps * caps)
149 {
150   GstSsaParse *parse = GST_SSA_PARSE (GST_PAD_PARENT (sinkpad));
151   GstCaps *outcaps;
152   const GValue *val;
153   GstStructure *s;
154   const guchar bom_utf8[] = { 0xEF, 0xBB, 0xBF };
155   const gchar *end;
156   GstBuffer *priv;
157   GstMapInfo map;
158   gchar *ptr;
159   gsize left, bad_offset;
160   gboolean ret;
161 
162   s = gst_caps_get_structure (caps, 0);
163   val = gst_structure_get_value (s, "codec_data");
164   if (val == NULL) {
165     parse->framed = FALSE;
166     GST_ERROR ("Only SSA subtitles embedded in containers are supported");
167     return FALSE;
168   }
169 
170   parse->framed = TRUE;
171   parse->send_tags = TRUE;
172 
173   priv = (GstBuffer *) g_value_get_boxed (val);
174   g_return_val_if_fail (priv != NULL, FALSE);
175 
176   gst_buffer_ref (priv);
177 
178   if (!gst_buffer_map (priv, &map, GST_MAP_READ)) {
179     gst_buffer_unref (priv);
180     return FALSE;
181   }
182 
183   GST_MEMDUMP_OBJECT (parse, "init section", map.data, map.size);
184 
185   ptr = (gchar *) map.data;
186   left = map.size;
187 
188   /* skip UTF-8 BOM */
189   if (left >= 3 && memcmp (ptr, bom_utf8, 3) == 0) {
190     ptr += 3;
191     left -= 3;
192   }
193 
194   if (!strstr (ptr, "[Script Info]"))
195     goto invalid_init;
196 
197   if (!g_utf8_validate (ptr, left, &end)) {
198     bad_offset = (gsize) (end - ptr);
199     GST_WARNING_OBJECT (parse, "Init section is not valid UTF-8. Problem at "
200         "byte offset %" G_GSIZE_FORMAT, bad_offset);
201     /* continue with valid UTF-8 data */
202     left = bad_offset;
203   }
204 
205   /* FIXME: parse initial section */
206   if (parse->ini)
207     g_free (parse->ini);
208   parse->ini = g_strndup (ptr, left);
209   GST_LOG_OBJECT (parse, "Init section:\n%s", parse->ini);
210 
211   gst_buffer_unmap (priv, &map);
212   gst_buffer_unref (priv);
213 
214   outcaps = gst_caps_new_simple ("text/x-raw",
215       "format", G_TYPE_STRING, "pango-markup", NULL);
216 
217   ret = gst_pad_set_caps (parse->srcpad, outcaps);
218   gst_caps_unref (outcaps);
219 
220   return ret;
221 
222   /* ERRORS */
223 invalid_init:
224   {
225     GST_WARNING_OBJECT (parse, "Invalid Init section - no Script Info header");
226     gst_buffer_unmap (priv, &map);
227     gst_buffer_unref (priv);
228     return FALSE;
229   }
230 }
231 
232 static gboolean
gst_ssa_parse_remove_override_codes(GstSsaParse * parse,gchar * txt)233 gst_ssa_parse_remove_override_codes (GstSsaParse * parse, gchar * txt)
234 {
235   gchar *t, *end;
236   gboolean removed_any = FALSE;
237 
238   while ((t = strchr (txt, '{'))) {
239     end = strchr (txt, '}');
240     if (end == NULL) {
241       GST_WARNING_OBJECT (parse, "Missing { for style override code");
242       return removed_any;
243     }
244     /* move terminating NUL character forward as well */
245     memmove (t, end + 1, strlen (end + 1) + 1);
246     removed_any = TRUE;
247   }
248 
249   /* these may occur outside of curly brackets. We don't handle the different
250    * wrapping modes yet, so just remove these markers from the text for now */
251   while ((t = strstr (txt, "\\n"))) {
252     t[0] = ' ';
253     t[1] = '\n';
254   }
255   while ((t = strstr (txt, "\\N"))) {
256     t[0] = ' ';
257     t[1] = '\n';
258   }
259   while ((t = strstr (txt, "\\h"))) {
260     t[0] = ' ';
261     t[1] = ' ';
262   }
263 
264   return removed_any;
265 }
266 
267 /**
268  * gst_ssa_parse_push_line:
269  * @parse: caller element
270  * @txt: text to push
271  * @start: timestamp for the buffer
272  * @duration: duration for the buffer
273  *
274  * Parse the text in a buffer with the given properties and
275  * push it to the srcpad of the @parse element
276  *
277  * Returns: result of the push of the created buffer
278  */
279 static GstFlowReturn
gst_ssa_parse_push_line(GstSsaParse * parse,gchar * txt,GstClockTime start,GstClockTime duration)280 gst_ssa_parse_push_line (GstSsaParse * parse, gchar * txt,
281     GstClockTime start, GstClockTime duration)
282 {
283   GstFlowReturn ret;
284   GstBuffer *buf;
285   gchar *t, *escaped;
286   gint num, i, len;
287 
288   num = atoi (txt);
289   GST_LOG_OBJECT (parse, "Parsing line #%d at %" GST_TIME_FORMAT,
290       num, GST_TIME_ARGS (start));
291 
292   /* skip all non-text fields before the actual text */
293   t = txt;
294   for (i = 0; i < 8; ++i) {
295     t = strchr (t, ',');
296     if (t == NULL)
297       return GST_FLOW_ERROR;
298     ++t;
299   }
300 
301   GST_LOG_OBJECT (parse, "Text : %s", t);
302 
303   if (gst_ssa_parse_remove_override_codes (parse, t)) {
304     GST_LOG_OBJECT (parse, "Clean: %s", t);
305   }
306 
307   /* we claim to output pango markup, so we must escape the
308    * text even if we don't actually use any pango markup yet */
309   escaped = g_markup_printf_escaped ("%s", t);
310 
311   len = strlen (escaped);
312 
313   /* allocate enough for a terminating NUL, but don't include it in buf size */
314   buf = gst_buffer_new_and_alloc (len + 1);
315   gst_buffer_fill (buf, 0, escaped, len + 1);
316   gst_buffer_set_size (buf, len);
317   g_free (escaped);
318 
319   GST_BUFFER_TIMESTAMP (buf) = start;
320   GST_BUFFER_DURATION (buf) = duration;
321 
322   GST_LOG_OBJECT (parse, "Pushing buffer with timestamp %" GST_TIME_FORMAT
323       " and duration %" GST_TIME_FORMAT, GST_TIME_ARGS (start),
324       GST_TIME_ARGS (duration));
325 
326   ret = gst_pad_push (parse->srcpad, buf);
327 
328   if (ret != GST_FLOW_OK) {
329     GST_DEBUG_OBJECT (parse, "Push of text '%s' returned flow %s", txt,
330         gst_flow_get_name (ret));
331   }
332 
333   return ret;
334 }
335 
336 static GstFlowReturn
gst_ssa_parse_chain(GstPad * sinkpad,GstObject * parent,GstBuffer * buf)337 gst_ssa_parse_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * buf)
338 {
339   GstFlowReturn ret;
340   GstSsaParse *parse = GST_SSA_PARSE (parent);
341   GstClockTime ts;
342   gchar *txt;
343   GstMapInfo map;
344 
345   if (G_UNLIKELY (!parse->framed))
346     goto not_framed;
347 
348   if (G_UNLIKELY (parse->send_tags)) {
349     GstTagList *tags;
350 
351     tags = gst_tag_list_new_empty ();
352     gst_tag_list_add (tags, GST_TAG_MERGE_APPEND, GST_TAG_SUBTITLE_CODEC,
353         "SubStation Alpha", NULL);
354     gst_pad_push_event (parse->srcpad, gst_event_new_tag (tags));
355     parse->send_tags = FALSE;
356   }
357 
358   /* make double-sure it's 0-terminated and all */
359   gst_buffer_map (buf, &map, GST_MAP_READ);
360   txt = g_strndup ((gchar *) map.data, map.size);
361   gst_buffer_unmap (buf, &map);
362 
363   if (txt == NULL)
364     goto empty_text;
365 
366   ts = GST_BUFFER_TIMESTAMP (buf);
367   ret = gst_ssa_parse_push_line (parse, txt, ts, GST_BUFFER_DURATION (buf));
368 
369   if (ret != GST_FLOW_OK && GST_CLOCK_TIME_IS_VALID (ts)) {
370     GstSegment segment;
371 
372     /* just advance time without sending anything */
373     gst_segment_init (&segment, GST_FORMAT_TIME);
374     segment.start = ts;
375     segment.time = ts;
376     gst_pad_push_event (parse->srcpad, gst_event_new_segment (&segment));
377     ret = GST_FLOW_OK;
378   }
379 
380   gst_buffer_unref (buf);
381   g_free (txt);
382 
383   return ret;
384 
385 /* ERRORS */
386 not_framed:
387   {
388     GST_ELEMENT_ERROR (parse, STREAM, FORMAT, (NULL),
389         ("Only SSA subtitles embedded in containers are supported"));
390     gst_buffer_unref (buf);
391     return GST_FLOW_NOT_NEGOTIATED;
392   }
393 empty_text:
394   {
395     GST_ELEMENT_WARNING (parse, STREAM, FORMAT, (NULL),
396         ("Received empty subtitle"));
397     gst_buffer_unref (buf);
398     return GST_FLOW_OK;
399   }
400 }
401 
402 static GstStateChangeReturn
gst_ssa_parse_change_state(GstElement * element,GstStateChange transition)403 gst_ssa_parse_change_state (GstElement * element, GstStateChange transition)
404 {
405   GstStateChangeReturn ret = GST_STATE_CHANGE_SUCCESS;
406   GstSsaParse *parse = GST_SSA_PARSE (element);
407 
408   switch (transition) {
409     case GST_STATE_CHANGE_READY_TO_PAUSED:
410       break;
411     default:
412       break;
413   }
414 
415   ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
416   if (ret == GST_STATE_CHANGE_FAILURE)
417     return ret;
418 
419   switch (transition) {
420     case GST_STATE_CHANGE_PAUSED_TO_READY:
421       g_free (parse->ini);
422       parse->ini = NULL;
423       parse->framed = FALSE;
424       break;
425     default:
426       break;
427   }
428 
429   return ret;
430 }
431