1 /* GStreamer SSA subtitle parser
2 * Copyright (c) 2006 Tim-Philipp Müller <tim centricular net>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
13 *
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 */
19
20 /* Super-primitive SSA parser - we just want the text and ignore
21 * everything else like styles and timing codes etc. for now */
22
23 #ifdef HAVE_CONFIG_H
24 #include "config.h"
25 #endif
26
27 #include <stdlib.h> /* atoi() */
28 #include <string.h>
29
30 #include "gstssaparse.h"
31 #include "gstsubparseelements.h"
32
33
34 GST_DEBUG_CATEGORY_STATIC (ssa_parse_debug);
35 #undef GST_CAT_DEFAULT
36 #define GST_CAT_DEFAULT ssa_parse_debug
37
38 static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
39 GST_PAD_SINK,
40 GST_PAD_ALWAYS,
41 GST_STATIC_CAPS ("application/x-ssa; application/x-ass")
42 );
43
44 static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src",
45 GST_PAD_SRC,
46 GST_PAD_ALWAYS,
47 GST_STATIC_CAPS ("text/x-raw, format=pango-markup")
48 );
49
50 #define gst_ssa_parse_parent_class parent_class
51 G_DEFINE_TYPE (GstSsaParse, gst_ssa_parse, GST_TYPE_ELEMENT);
52 GST_ELEMENT_REGISTER_DEFINE_WITH_CODE (ssaparse, "ssaparse",
53 GST_RANK_PRIMARY, GST_TYPE_SSA_PARSE, sub_parse_element_init (plugin));
54
55
56 static GstStateChangeReturn gst_ssa_parse_change_state (GstElement *
57 element, GstStateChange transition);
58 static gboolean gst_ssa_parse_setcaps (GstPad * sinkpad, GstCaps * caps);
59 static gboolean gst_ssa_parse_src_event (GstPad * pad, GstObject * parent,
60 GstEvent * event);
61 static gboolean gst_ssa_parse_sink_event (GstPad * pad, GstObject * parent,
62 GstEvent * event);
63 static GstFlowReturn gst_ssa_parse_chain (GstPad * sinkpad, GstObject * parent,
64 GstBuffer * buf);
65
66 static void
gst_ssa_parse_dispose(GObject * object)67 gst_ssa_parse_dispose (GObject * object)
68 {
69 GstSsaParse *parse = GST_SSA_PARSE (object);
70
71 g_free (parse->ini);
72 parse->ini = NULL;
73
74 GST_CALL_PARENT (G_OBJECT_CLASS, dispose, (object));
75 }
76
77 static void
gst_ssa_parse_init(GstSsaParse * parse)78 gst_ssa_parse_init (GstSsaParse * parse)
79 {
80 parse->sinkpad = gst_pad_new_from_static_template (&sink_templ, "sink");
81 gst_pad_set_chain_function (parse->sinkpad,
82 GST_DEBUG_FUNCPTR (gst_ssa_parse_chain));
83 gst_pad_set_event_function (parse->sinkpad,
84 GST_DEBUG_FUNCPTR (gst_ssa_parse_sink_event));
85 gst_element_add_pad (GST_ELEMENT (parse), parse->sinkpad);
86
87 parse->srcpad = gst_pad_new_from_static_template (&src_templ, "src");
88 gst_pad_set_event_function (parse->srcpad,
89 GST_DEBUG_FUNCPTR (gst_ssa_parse_src_event));
90 gst_element_add_pad (GST_ELEMENT (parse), parse->srcpad);
91 gst_pad_use_fixed_caps (parse->srcpad);
92
93 parse->ini = NULL;
94 parse->framed = FALSE;
95 parse->send_tags = FALSE;
96 }
97
98 static void
gst_ssa_parse_class_init(GstSsaParseClass * klass)99 gst_ssa_parse_class_init (GstSsaParseClass * klass)
100 {
101 GObjectClass *object_class = G_OBJECT_CLASS (klass);
102 GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
103
104 object_class->dispose = gst_ssa_parse_dispose;
105
106 gst_element_class_add_static_pad_template (element_class, &sink_templ);
107 gst_element_class_add_static_pad_template (element_class, &src_templ);
108 gst_element_class_set_static_metadata (element_class,
109 "SSA Subtitle Parser", "Codec/Parser/Subtitle",
110 "Parses SSA subtitle streams",
111 "Tim-Philipp Müller <tim centricular net>");
112
113 GST_DEBUG_CATEGORY_INIT (ssa_parse_debug, "ssaparse", 0,
114 "SSA subtitle parser");
115
116 element_class->change_state = GST_DEBUG_FUNCPTR (gst_ssa_parse_change_state);
117 }
118
119 static gboolean
gst_ssa_parse_src_event(GstPad * pad,GstObject * parent,GstEvent * event)120 gst_ssa_parse_src_event (GstPad * pad, GstObject * parent, GstEvent * event)
121 {
122 return gst_pad_event_default (pad, parent, event);
123 }
124
125 static gboolean
gst_ssa_parse_sink_event(GstPad * pad,GstObject * parent,GstEvent * event)126 gst_ssa_parse_sink_event (GstPad * pad, GstObject * parent, GstEvent * event)
127 {
128 gboolean res;
129
130 switch (GST_EVENT_TYPE (event)) {
131 case GST_EVENT_CAPS:
132 {
133 GstCaps *caps;
134
135 gst_event_parse_caps (event, &caps);
136 res = gst_ssa_parse_setcaps (pad, caps);
137 gst_event_unref (event);
138 break;
139 }
140 default:
141 res = gst_pad_event_default (pad, parent, event);
142 break;
143 }
144 return res;
145 }
146
147 static gboolean
gst_ssa_parse_setcaps(GstPad * sinkpad,GstCaps * caps)148 gst_ssa_parse_setcaps (GstPad * sinkpad, GstCaps * caps)
149 {
150 GstSsaParse *parse = GST_SSA_PARSE (GST_PAD_PARENT (sinkpad));
151 GstCaps *outcaps;
152 const GValue *val;
153 GstStructure *s;
154 const guchar bom_utf8[] = { 0xEF, 0xBB, 0xBF };
155 const gchar *end;
156 GstBuffer *priv;
157 GstMapInfo map;
158 gchar *ptr;
159 gsize left, bad_offset;
160 gboolean ret;
161
162 s = gst_caps_get_structure (caps, 0);
163 val = gst_structure_get_value (s, "codec_data");
164 if (val == NULL) {
165 parse->framed = FALSE;
166 GST_ERROR ("Only SSA subtitles embedded in containers are supported");
167 return FALSE;
168 }
169
170 parse->framed = TRUE;
171 parse->send_tags = TRUE;
172
173 priv = (GstBuffer *) g_value_get_boxed (val);
174 g_return_val_if_fail (priv != NULL, FALSE);
175
176 gst_buffer_ref (priv);
177
178 if (!gst_buffer_map (priv, &map, GST_MAP_READ)) {
179 gst_buffer_unref (priv);
180 return FALSE;
181 }
182
183 GST_MEMDUMP_OBJECT (parse, "init section", map.data, map.size);
184
185 ptr = (gchar *) map.data;
186 left = map.size;
187
188 /* skip UTF-8 BOM */
189 if (left >= 3 && memcmp (ptr, bom_utf8, 3) == 0) {
190 ptr += 3;
191 left -= 3;
192 }
193
194 if (!strstr (ptr, "[Script Info]"))
195 goto invalid_init;
196
197 if (!g_utf8_validate (ptr, left, &end)) {
198 bad_offset = (gsize) (end - ptr);
199 GST_WARNING_OBJECT (parse, "Init section is not valid UTF-8. Problem at "
200 "byte offset %" G_GSIZE_FORMAT, bad_offset);
201 /* continue with valid UTF-8 data */
202 left = bad_offset;
203 }
204
205 /* FIXME: parse initial section */
206 if (parse->ini)
207 g_free (parse->ini);
208 parse->ini = g_strndup (ptr, left);
209 GST_LOG_OBJECT (parse, "Init section:\n%s", parse->ini);
210
211 gst_buffer_unmap (priv, &map);
212 gst_buffer_unref (priv);
213
214 outcaps = gst_caps_new_simple ("text/x-raw",
215 "format", G_TYPE_STRING, "pango-markup", NULL);
216
217 ret = gst_pad_set_caps (parse->srcpad, outcaps);
218 gst_caps_unref (outcaps);
219
220 return ret;
221
222 /* ERRORS */
223 invalid_init:
224 {
225 GST_WARNING_OBJECT (parse, "Invalid Init section - no Script Info header");
226 gst_buffer_unmap (priv, &map);
227 gst_buffer_unref (priv);
228 return FALSE;
229 }
230 }
231
232 static gboolean
gst_ssa_parse_remove_override_codes(GstSsaParse * parse,gchar * txt)233 gst_ssa_parse_remove_override_codes (GstSsaParse * parse, gchar * txt)
234 {
235 gchar *t, *end;
236 gboolean removed_any = FALSE;
237
238 while ((t = strchr (txt, '{'))) {
239 end = strchr (txt, '}');
240 if (end == NULL) {
241 GST_WARNING_OBJECT (parse, "Missing { for style override code");
242 return removed_any;
243 }
244 /* move terminating NUL character forward as well */
245 memmove (t, end + 1, strlen (end + 1) + 1);
246 removed_any = TRUE;
247 }
248
249 /* these may occur outside of curly brackets. We don't handle the different
250 * wrapping modes yet, so just remove these markers from the text for now */
251 while ((t = strstr (txt, "\\n"))) {
252 t[0] = ' ';
253 t[1] = '\n';
254 }
255 while ((t = strstr (txt, "\\N"))) {
256 t[0] = ' ';
257 t[1] = '\n';
258 }
259 while ((t = strstr (txt, "\\h"))) {
260 t[0] = ' ';
261 t[1] = ' ';
262 }
263
264 return removed_any;
265 }
266
267 /**
268 * gst_ssa_parse_push_line:
269 * @parse: caller element
270 * @txt: text to push
271 * @start: timestamp for the buffer
272 * @duration: duration for the buffer
273 *
274 * Parse the text in a buffer with the given properties and
275 * push it to the srcpad of the @parse element
276 *
277 * Returns: result of the push of the created buffer
278 */
279 static GstFlowReturn
gst_ssa_parse_push_line(GstSsaParse * parse,gchar * txt,GstClockTime start,GstClockTime duration)280 gst_ssa_parse_push_line (GstSsaParse * parse, gchar * txt,
281 GstClockTime start, GstClockTime duration)
282 {
283 GstFlowReturn ret;
284 GstBuffer *buf;
285 gchar *t, *escaped;
286 gint num, i, len;
287
288 num = atoi (txt);
289 GST_LOG_OBJECT (parse, "Parsing line #%d at %" GST_TIME_FORMAT,
290 num, GST_TIME_ARGS (start));
291
292 /* skip all non-text fields before the actual text */
293 t = txt;
294 for (i = 0; i < 8; ++i) {
295 t = strchr (t, ',');
296 if (t == NULL)
297 return GST_FLOW_ERROR;
298 ++t;
299 }
300
301 GST_LOG_OBJECT (parse, "Text : %s", t);
302
303 if (gst_ssa_parse_remove_override_codes (parse, t)) {
304 GST_LOG_OBJECT (parse, "Clean: %s", t);
305 }
306
307 /* we claim to output pango markup, so we must escape the
308 * text even if we don't actually use any pango markup yet */
309 escaped = g_markup_printf_escaped ("%s", t);
310
311 len = strlen (escaped);
312
313 /* allocate enough for a terminating NUL, but don't include it in buf size */
314 buf = gst_buffer_new_and_alloc (len + 1);
315 gst_buffer_fill (buf, 0, escaped, len + 1);
316 gst_buffer_set_size (buf, len);
317 g_free (escaped);
318
319 GST_BUFFER_TIMESTAMP (buf) = start;
320 GST_BUFFER_DURATION (buf) = duration;
321
322 GST_LOG_OBJECT (parse, "Pushing buffer with timestamp %" GST_TIME_FORMAT
323 " and duration %" GST_TIME_FORMAT, GST_TIME_ARGS (start),
324 GST_TIME_ARGS (duration));
325
326 ret = gst_pad_push (parse->srcpad, buf);
327
328 if (ret != GST_FLOW_OK) {
329 GST_DEBUG_OBJECT (parse, "Push of text '%s' returned flow %s", txt,
330 gst_flow_get_name (ret));
331 }
332
333 return ret;
334 }
335
336 static GstFlowReturn
gst_ssa_parse_chain(GstPad * sinkpad,GstObject * parent,GstBuffer * buf)337 gst_ssa_parse_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * buf)
338 {
339 GstFlowReturn ret;
340 GstSsaParse *parse = GST_SSA_PARSE (parent);
341 GstClockTime ts;
342 gchar *txt;
343 GstMapInfo map;
344
345 if (G_UNLIKELY (!parse->framed))
346 goto not_framed;
347
348 if (G_UNLIKELY (parse->send_tags)) {
349 GstTagList *tags;
350
351 tags = gst_tag_list_new_empty ();
352 gst_tag_list_add (tags, GST_TAG_MERGE_APPEND, GST_TAG_SUBTITLE_CODEC,
353 "SubStation Alpha", NULL);
354 gst_pad_push_event (parse->srcpad, gst_event_new_tag (tags));
355 parse->send_tags = FALSE;
356 }
357
358 /* make double-sure it's 0-terminated and all */
359 gst_buffer_map (buf, &map, GST_MAP_READ);
360 txt = g_strndup ((gchar *) map.data, map.size);
361 gst_buffer_unmap (buf, &map);
362
363 if (txt == NULL)
364 goto empty_text;
365
366 ts = GST_BUFFER_TIMESTAMP (buf);
367 ret = gst_ssa_parse_push_line (parse, txt, ts, GST_BUFFER_DURATION (buf));
368
369 if (ret != GST_FLOW_OK && GST_CLOCK_TIME_IS_VALID (ts)) {
370 GstSegment segment;
371
372 /* just advance time without sending anything */
373 gst_segment_init (&segment, GST_FORMAT_TIME);
374 segment.start = ts;
375 segment.time = ts;
376 gst_pad_push_event (parse->srcpad, gst_event_new_segment (&segment));
377 ret = GST_FLOW_OK;
378 }
379
380 gst_buffer_unref (buf);
381 g_free (txt);
382
383 return ret;
384
385 /* ERRORS */
386 not_framed:
387 {
388 GST_ELEMENT_ERROR (parse, STREAM, FORMAT, (NULL),
389 ("Only SSA subtitles embedded in containers are supported"));
390 gst_buffer_unref (buf);
391 return GST_FLOW_NOT_NEGOTIATED;
392 }
393 empty_text:
394 {
395 GST_ELEMENT_WARNING (parse, STREAM, FORMAT, (NULL),
396 ("Received empty subtitle"));
397 gst_buffer_unref (buf);
398 return GST_FLOW_OK;
399 }
400 }
401
402 static GstStateChangeReturn
gst_ssa_parse_change_state(GstElement * element,GstStateChange transition)403 gst_ssa_parse_change_state (GstElement * element, GstStateChange transition)
404 {
405 GstStateChangeReturn ret = GST_STATE_CHANGE_SUCCESS;
406 GstSsaParse *parse = GST_SSA_PARSE (element);
407
408 switch (transition) {
409 case GST_STATE_CHANGE_READY_TO_PAUSED:
410 break;
411 default:
412 break;
413 }
414
415 ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
416 if (ret == GST_STATE_CHANGE_FAILURE)
417 return ret;
418
419 switch (transition) {
420 case GST_STATE_CHANGE_PAUSED_TO_READY:
421 g_free (parse->ini);
422 parse->ini = NULL;
423 parse->framed = FALSE;
424 break;
425 default:
426 break;
427 }
428
429 return ret;
430 }
431