1 /* GStreamer QTtext subtitle parser
2 * Copyright (c) 2009 Thiago Santos <thiago.sousa.santos collabora co uk>>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
13 *
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 */
19
20 /* References:
21 * http://www.apple.com/quicktime/tutorials/texttracks.html
22 * http://www.apple.com/quicktime/tutorials/textdescriptors.html
23 */
24
25 #include "qttextparse.h"
26
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <string.h>
30
31 #define MIN_TO_NSEC (60 * GST_SECOND)
32 #define HOUR_TO_NSEC (60 * MIN_TO_NSEC)
33
34 #define GST_QTTEXT_CONTEXT(state) ((GstQTTextContext *) (state)->user_data)
35
36 typedef struct _GstQTTextContext GstQTTextContext;
37
38 struct _GstQTTextContext
39 {
40 /* timing variables */
41 gint timescale;
42 gboolean absolute;
43 guint64 start_time;
44
45 gboolean markup_open;
46 gboolean need_markup;
47
48 gchar *font;
49 gint font_size;
50 gchar *bg_color;
51 gchar *fg_color;
52
53 gboolean bold;
54 gboolean italic;
55 };
56
57 void
qttext_context_init(ParserState * state)58 qttext_context_init (ParserState * state)
59 {
60 GstQTTextContext *context;
61
62 state->user_data = g_new0 (GstQTTextContext, 1);
63
64 context = GST_QTTEXT_CONTEXT (state);
65
66 /* we use 1000 as a default */
67 context->timescale = 1000;
68 context->absolute = TRUE;
69
70 context->markup_open = FALSE;
71 context->need_markup = FALSE;
72
73 context->font_size = 12;
74 }
75
76 void
qttext_context_deinit(ParserState * state)77 qttext_context_deinit (ParserState * state)
78 {
79 if (state->user_data != NULL) {
80 GstQTTextContext *context = GST_QTTEXT_CONTEXT (state);
81 g_free (context->font);
82 g_free (context->bg_color);
83 g_free (context->fg_color);
84
85 g_free (state->user_data);
86 state->user_data = NULL;
87 }
88 }
89
90 /*
91 * Reads the string right after the ':'
92 */
93 static gchar *
read_str(const gchar * line,const gchar * end)94 read_str (const gchar * line, const gchar * end)
95 {
96 gint index = 0;
97
98 while (line[index] != ':' && line[index] != '}') {
99 index++;
100 }
101 if (line[index] != ':')
102 return NULL;
103 index++;
104 while (line[index] == ' ')
105 index++;
106
107 return g_strndup (line + index, (end - (line + index)));
108 }
109
110 /* search for the ':' and parse the number right after it */
111 static gint
read_int(const gchar * line)112 read_int (const gchar * line)
113 {
114 gint index = 0;
115 while (line[index] != ':' && line[index] != '}') {
116 index++;
117 }
118 if (line[index] != ':')
119 return 0;
120 index++;
121 return atoi (line + index);
122 }
123
124 /* skip the ':' and then match the following string
125 * with 'match', but only if it before 'upto' */
126 static gboolean
string_match(const gchar * line,const gchar * match,const gchar * upto)127 string_match (const gchar * line, const gchar * match, const gchar * upto)
128 {
129 gchar *result = strstr (line, match);
130 return (result < upto);
131 }
132
133 /*
134 * Reads the color values and stores them in r, g and b.
135 */
136 static gboolean
read_color(const gchar * line,gint * r,gint * g,gint * b)137 read_color (const gchar * line, gint * r, gint * g, gint * b)
138 {
139 gint index = 0;
140 while (line[index] != ':' && line[index] != '}') {
141 index++;
142 }
143 if (line[index] != ':')
144 return FALSE;
145 index++;
146
147 *r = atoi (line + index);
148
149 while (line[index] != '}' && line[index] != ',') {
150 index++;
151 }
152 if (line[index] != ',')
153 return FALSE;
154 index++;
155
156 *g = atoi (line + index);
157
158 while (line[index] != '}' && line[index] != ',') {
159 index++;
160 }
161 if (line[index] != ',')
162 return FALSE;
163 index++;
164
165 *b = atoi (line + index);
166
167 return TRUE;
168 }
169
170 static gchar *
make_color(gint r,gint g,gint b)171 make_color (gint r, gint g, gint b)
172 {
173 /* qttext goes up to 65535, while pango goes to 255 */
174 r /= 256;
175 g /= 256;
176 b /= 256;
177 return g_strdup_printf ("#%02X%02X%02X", r, g, b);
178 }
179
180 static gboolean
qttext_parse_tag(ParserState * state,const gchar * line,gint * index)181 qttext_parse_tag (ParserState * state, const gchar * line, gint * index)
182 {
183 gchar *next;
184 gint next_index;
185 gint aux;
186 gchar *str;
187 gint r, g, b;
188 GstQTTextContext *context = GST_QTTEXT_CONTEXT (state);
189
190 g_assert (line[*index] == '{');
191
192 next = strchr (line + *index, '}');
193 if (next == NULL) {
194 goto error_out;
195 } else {
196 next_index = 1 + (next - line);
197 }
198 g_assert (line[next_index - 1] == '}');
199
200 *index = *index + 1; /* skip the { */
201
202 /* now identify our tag */
203 /* FIXME: those should be case unsensitive */
204 /* TODO: there are other tags that could be added here */
205 if (strncmp (line + *index, "QTtext", 6) == 0) {
206 /* NOP */
207
208 } else if (strncmp (line + *index, "font", 4) == 0) {
209 str = read_str (line + *index + 4, line + next_index - 1);
210 if (str) {
211 g_free (context->font);
212 context->font = str;
213 context->need_markup = TRUE;
214 GST_DEBUG ("Setting qttext font to %s", str);
215 } else {
216 GST_WARNING ("Failed to parse qttext font at line: %s", line);
217 }
218
219 } else if (strncmp (line + *index, "size", 4) == 0) {
220 aux = read_int (line + *index + 4);
221 if (aux == 0) {
222 GST_WARNING ("Invalid size at line %s, using 12", line);
223 context->font_size = 12;
224 } else {
225 GST_DEBUG ("Setting qttext font-size to: %d", aux);
226 context->font_size = aux;
227 }
228 context->need_markup = TRUE;
229
230 } else if (strncmp (line + *index, "textColor", 9) == 0) {
231 if (read_color (line + *index + 9, &r, &g, &b)) {
232 context->fg_color = make_color (r, g, b);
233 GST_DEBUG ("Setting qttext fg color to %s", context->fg_color);
234 } else {
235 GST_WARNING ("Failed to read textColor at line %s", line);
236 }
237 context->need_markup = TRUE;
238
239 } else if (strncmp (line + *index, "backColor", 9) == 0) {
240 if (read_color (line + *index + 9, &r, &g, &b)) {
241 context->bg_color = make_color (r, g, b);
242 GST_DEBUG ("Setting qttext bg color to %s", context->bg_color);
243 } else {
244 GST_WARNING ("Failed to read backColor at line %s, disabling", line);
245 g_free (context->bg_color);
246 context->bg_color = NULL;
247 }
248 context->need_markup = TRUE;
249
250 } else if (strncmp (line + *index, "plain", 5) == 0) {
251 context->bold = FALSE;
252 context->italic = FALSE;
253 context->need_markup = TRUE;
254 GST_DEBUG ("Setting qttext style to plain");
255
256 } else if (strncmp (line + *index, "bold", 4) == 0) {
257 context->bold = TRUE;
258 context->italic = FALSE;
259 context->need_markup = TRUE;
260 GST_DEBUG ("Setting qttext style to bold");
261
262 } else if (strncmp (line + *index, "italic", 6) == 0) {
263 context->bold = FALSE;
264 context->italic = TRUE;
265 context->need_markup = TRUE;
266 GST_DEBUG ("Setting qttext style to italic");
267
268 } else if (strncmp (line + *index, "timescale", 9) == 0) {
269 aux = read_int (line + *index + 9);
270 if (aux == 0) {
271 GST_WARNING ("Couldn't interpret timescale at line %s, using 1000", line);
272 context->timescale = 1000;
273 } else {
274 GST_DEBUG ("Setting qttext timescale to: %d", aux);
275 context->timescale = aux;
276 }
277
278 } else if (strncmp (line + *index, "timestamps", 10) == 0) {
279 if (string_match (line + *index + 10, "relative", line + next_index)) {
280 GST_DEBUG ("Setting qttext timestamps to relative");
281 context->absolute = FALSE;
282 } else {
283 /* call it absolute otherwise */
284 GST_DEBUG ("Setting qttext timestamps to absolute");
285 context->absolute = TRUE;
286 }
287
288 } else {
289 GST_WARNING ("Unused qttext tag starting at: %s", line + *index);
290 }
291
292 *index = next_index;
293 return TRUE;
294
295 error_out:
296 {
297 GST_WARNING ("Failed to parse qttext tag at line %s", line);
298 return FALSE;
299 }
300 }
301
302 static guint64
qttext_parse_timestamp(ParserState * state,const gchar * line,gint index)303 qttext_parse_timestamp (ParserState * state, const gchar * line, gint index)
304 {
305 int ret;
306 gint hour, min, sec, dec;
307 GstQTTextContext *context = GST_QTTEXT_CONTEXT (state);
308
309 ret = sscanf (line + index, "[%d:%d:%d.%d]", &hour, &min, &sec, &dec);
310 if (ret != 3 && ret != 4) {
311 /* bad timestamp */
312 GST_WARNING ("Bad qttext timestamp found: %s", line);
313 return 0;
314 }
315
316 if (ret == 3) {
317 /* be forgiving for missing decimal part */
318 dec = 0;
319 }
320
321 /* parse the decimal part according to the timescale */
322 g_assert (context->timescale != 0);
323 dec = (GST_SECOND * dec) / context->timescale;
324
325 /* return the result */
326 return hour * HOUR_TO_NSEC + min * MIN_TO_NSEC + sec * GST_SECOND + dec;
327 }
328
329 static void
qttext_open_markup(ParserState * state)330 qttext_open_markup (ParserState * state)
331 {
332 GstQTTextContext *context = GST_QTTEXT_CONTEXT (state);
333
334 g_string_append (state->buf, "<span");
335
336 /* add your markup tags here */
337 if (context->font)
338 g_string_append_printf (state->buf, " font='%s %d'", context->font,
339 context->font_size);
340 else
341 g_string_append_printf (state->buf, " font='%d'", context->font_size);
342
343 if (context->bg_color)
344 g_string_append_printf (state->buf, " bgcolor='%s'", context->bg_color);
345 if (context->fg_color)
346 g_string_append_printf (state->buf, " color='%s'", context->fg_color);
347
348 if (context->bold)
349 g_string_append (state->buf, " weight='bold'");
350 if (context->italic)
351 g_string_append (state->buf, " style='italic'");
352
353 g_string_append (state->buf, ">");
354 }
355
356 static void
qttext_prepare_text(ParserState * state)357 qttext_prepare_text (ParserState * state)
358 {
359 GstQTTextContext *context = GST_QTTEXT_CONTEXT (state);
360 if (state->buf == NULL) {
361 state->buf = g_string_sized_new (256); /* this should be enough */
362 } else {
363 g_string_append (state->buf, "\n");
364 }
365
366 /* if needed, add pango markup */
367 if (context->need_markup) {
368 if (context->markup_open) {
369 g_string_append (state->buf, "</span>");
370 }
371 qttext_open_markup (state);
372 context->markup_open = TRUE;
373 }
374 }
375
376 static void
qttext_parse_text(ParserState * state,const gchar * line,gint index)377 qttext_parse_text (ParserState * state, const gchar * line, gint index)
378 {
379 qttext_prepare_text (state);
380 g_string_append (state->buf, line + index);
381 }
382
383 static gchar *
qttext_get_text(ParserState * state)384 qttext_get_text (ParserState * state)
385 {
386 gchar *ret;
387 GstQTTextContext *context = GST_QTTEXT_CONTEXT (state);
388 if (state->buf == NULL)
389 return NULL;
390
391 if (context->markup_open) {
392 g_string_append (state->buf, "</span>");
393 }
394 ret = g_string_free (state->buf, FALSE);
395 state->buf = NULL;
396 context->markup_open = FALSE;
397 return ret;
398 }
399
400 gchar *
parse_qttext(ParserState * state,const gchar * line)401 parse_qttext (ParserState * state, const gchar * line)
402 {
403 gint i;
404 guint64 ts;
405 gchar *ret = NULL;
406 GstQTTextContext *context = GST_QTTEXT_CONTEXT (state);
407
408 i = 0;
409 while (line[i] != '\0') {
410 /* find first interesting character from 'i' onwards */
411
412 if (line[i] == '{') {
413 /* this is a tag, parse it */
414 if (!qttext_parse_tag (state, line, &i)) {
415 break;
416 }
417 } else if (line[i] == '[') {
418 /* this is a time, convert it to a timestamp */
419 ts = qttext_parse_timestamp (state, line, i);
420
421 /* check if we have pending text to send, in case we prepare it */
422 if (state->buf) {
423 ret = qttext_get_text (state);
424 if (context->absolute)
425 state->duration = ts - context->start_time;
426 else
427 state->duration = ts;
428 state->start_time = context->start_time;
429 }
430 state->buf = NULL;
431
432 if (ts == 0) {
433 /* this is an error */
434 } else {
435 if (context->absolute)
436 context->start_time = ts;
437 else
438 context->start_time += ts;
439 }
440
441 /* we assume there is nothing else on this line */
442 break;
443
444 } else if (line[i] == ' ' || line[i] == '\t') {
445 i++; /* NOP */
446 } else {
447 /* this is the actual text, output the rest of the line as it */
448 qttext_parse_text (state, line, i);
449 break;
450 }
451 }
452 return ret;
453 }
454