• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GStreamer tmplayer format subtitle parser
2  * Copyright (C) 2006-2008 Tim-Philipp Müller <tim centricular net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public
15  * License along with this library; if not, write to the
16  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17  * Boston, MA 02110-1301, USA.
18  */
19 
20 #include "tmplayerparse.h"
21 
22 #include <stdio.h>
23 #include <string.h>
24 
25 /* From http://forum.doom9.org/archive/index.php/t-81059.html:
26  *
27  * TMPlayer format, which comes in five varieties:
28  *
29  * time-base 00:00:00:
30  * 00:00:50:This is the Earth at a time|when the dinosaurs roamed...
31  * 00:00:53:
32  * 00:00:54:a lush and fertile planet.
33  * 00:00:56:
34  *
35  * time-base 0:00:00:
36  * 0:00:50:This is the Earth at a time|when the dinosaurs roamed...
37  * 0:00:53:
38  * 0:00:54:a lush and fertile planet.
39  * 0:00:56:
40  *
41  * time-base 00:00:00=
42  * 00:00:50=This is the Earth at a time|when the dinosaurs roamed...
43  * 00:00:53=
44  * 00:00:54=a lush and fertile planet.
45  * 00:00:56=
46  *
47  * time-base 0:00:00=
48  * 0:00:50=This is the Earth at a time|when the dinosaurs roamed...
49  * 0:00:53=
50  * 0:00:54=a lush and fertile planet.
51  * 0:00:56=
52  *
53  * and multiline time-base 00:00:00,1=
54  * 00:00:50,1=This is the Earth at a time
55  * 00:00:50,2=when the dinosaurs roamed...
56  * 00:00:53,1=
57  * 00:00:54,1=a lush and fertile planet.
58  * 00:00:56,1=
59  *
60  * --------------------------------------------------------------------------
61  *
62  * And another variety (which is 'time-base 0:00:00:' but without empty lines):
63  *
64  * 00:00:01:This is the Earth at a time|when the dinosaurs roamed...
65  * 00:00:03:a lush and fertile planet.
66  * 00:00:06:More text here
67  * 00:00:12:Yet another line
68  *
69  */
70 
71 static gchar *
tmplayer_process_buffer(ParserState * state)72 tmplayer_process_buffer (ParserState * state)
73 {
74   gchar *ret;
75 
76   ret = g_strndup (state->buf->str, state->buf->len);
77   g_strdelimit (ret, "|", '\n');
78   g_string_truncate (state->buf, 0);
79   return ret;
80 }
81 
82 static gchar *
tmplayer_parse_line(ParserState * state,const gchar * line,guint line_num)83 tmplayer_parse_line (ParserState * state, const gchar * line, guint line_num)
84 {
85   GstClockTime ts = GST_CLOCK_TIME_NONE;
86   const gchar *text_start = NULL;
87   gchar *ret = NULL;
88   gchar divc = '\0';
89   guint h, m, s, l = 1;
90 
91   if (sscanf (line, "%u:%02u:%02u,%u%c", &h, &m, &s, &l, &divc) == 5 &&
92       (divc == '=')) {
93     GST_LOG ("multiline format %u %u %u %u", h, m, s, l);
94     ts = GST_SECOND * ((((h * 60) + m) * 60) + s);
95     text_start = strchr (line, '=');
96   } else if (sscanf (line, "%u:%02u:%02u%c", &h, &m, &s, &divc) == 4 &&
97       (divc == '=' || divc == ':')) {
98     GST_LOG ("single line format %u %u %u %u %c", h, m, s, l, divc);
99     ts = GST_SECOND * ((((h * 60) + m) * 60) + s);
100     text_start = strchr (line + 6, divc);
101   } else if (line[0] == '\0' && state->buf->len > 0 &&
102       GST_CLOCK_TIME_IS_VALID (state->start_time)) {
103     /* if we get an empty line (could be the end of the file, but doesn't have
104      * to be), just push whatever is still in the buffer without a duration */
105     GST_LOG ("empty line, and there's still text in the buffer");
106     ret = tmplayer_process_buffer (state);
107     state->duration = GST_CLOCK_TIME_NONE;
108     return ret;
109   } else {
110     GST_WARNING ("failed to parse line: '%s'", line);
111     return NULL;
112   }
113 
114   /* if this is a line without text, or the first line in a multiline file,
115    * process and return the data in the buffer, which is the previous line(s) */
116   if (text_start == NULL || text_start[1] == '\0' ||
117       (l == 1 && state->buf->len > 0)) {
118 
119     if (GST_CLOCK_TIME_IS_VALID (state->start_time) &&
120         state->start_time < ts && line_num > 0) {
121       ret = tmplayer_process_buffer (state);
122       state->duration = ts - state->start_time;
123       /* ..and append current line's text (if there is any) for the next round.
124        * We don't have to store ts as pending_start_time, since we deduce the
125        * durations from the start times anyway, so as long as the parser just
126        * forwards state->start_time by duration after it pushes the line we
127        * are about to return it will all be good. */
128       g_string_append (state->buf, text_start + 1);
129     } else if (line_num > 0) {
130       GST_WARNING ("end of subtitle unit but no valid start time?!");
131     }
132   } else {
133     if (l > 1)
134       g_string_append_c (state->buf, '\n');
135     g_string_append (state->buf, text_start + 1);
136     state->start_time = ts;
137   }
138 
139   GST_LOG ("returning: '%s'", GST_STR_NULL (ret));
140   return ret;
141 }
142 
143 gchar *
parse_tmplayer(ParserState * state,const gchar * line)144 parse_tmplayer (ParserState * state, const gchar * line)
145 {
146   gchar *ret;
147 
148   /* GST_LOG ("Parsing: %s", line); */
149 
150   ret = tmplayer_parse_line (state, line, state->state);
151   ++state->state;
152 
153   return ret;
154 }
155