1 /*
2 * Copyright (c) 2012 Clément Bœsch
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /**
22 * @file
23 * SAMI subtitle decoder
24 * @see http://msdn.microsoft.com/en-us/library/ms971327.aspx
25 */
26
27 #include "ass.h"
28 #include "libavutil/avstring.h"
29 #include "libavutil/bprint.h"
30 #include "htmlsubtitles.h"
31
32 typedef struct {
33 AVBPrint source;
34 AVBPrint content;
35 AVBPrint encoded_source;
36 AVBPrint encoded_content;
37 AVBPrint full;
38 int readorder;
39 } SAMIContext;
40
sami_paragraph_to_ass(AVCodecContext * avctx,const char * src)41 static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
42 {
43 SAMIContext *sami = avctx->priv_data;
44 int ret = 0;
45 char *tag = NULL;
46 char *dupsrc = av_strdup(src);
47 char *p = dupsrc;
48 AVBPrint *dst_content = &sami->encoded_content;
49 AVBPrint *dst_source = &sami->encoded_source;
50
51 if (!dupsrc)
52 return AVERROR(ENOMEM);
53
54 av_bprint_clear(&sami->encoded_content);
55 av_bprint_clear(&sami->content);
56 av_bprint_clear(&sami->encoded_source);
57 for (;;) {
58 char *saveptr = NULL;
59 int prev_chr_is_space = 0;
60 AVBPrint *dst = &sami->content;
61
62 /* parse & extract paragraph tag */
63 p = av_stristr(p, "<P");
64 if (!p)
65 break;
66 if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE>
67 p++;
68 continue;
69 }
70 if (dst->len) // add a separator with the previous paragraph if there was one
71 av_bprintf(dst, "\\N");
72 tag = av_strtok(p, ">", &saveptr);
73 if (!tag || !saveptr)
74 break;
75 p = saveptr;
76
77 /* check if the current paragraph is the "source" (speaker name) */
78 if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) {
79 dst = &sami->source;
80 av_bprint_clear(dst);
81 }
82
83 /* if empty event -> skip subtitle */
84 while (av_isspace(*p))
85 p++;
86 if (!strncmp(p, " ", 6)) {
87 ret = -1;
88 goto end;
89 }
90
91 /* extract the text, stripping most of the tags */
92 while (*p) {
93 if (*p == '<') {
94 if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
95 break;
96 }
97 if (!av_strncasecmp(p, "<BR", 3)) {
98 av_bprintf(dst, "\\N");
99 p++;
100 while (*p && *p != '>')
101 p++;
102 if (!*p)
103 break;
104 if (*p == '>')
105 p++;
106 continue;
107 }
108 if (!av_isspace(*p))
109 av_bprint_chars(dst, *p, 1);
110 else if (!prev_chr_is_space)
111 av_bprint_chars(dst, ' ', 1);
112 prev_chr_is_space = av_isspace(*p);
113 p++;
114 }
115 }
116
117 av_bprint_clear(&sami->full);
118 if (sami->source.len) {
119 ret = ff_htmlmarkup_to_ass(avctx, dst_source, sami->source.str);
120 if (ret < 0)
121 goto end;
122 av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->encoded_source.str);
123 }
124 ret = ff_htmlmarkup_to_ass(avctx, dst_content, sami->content.str);
125 if (ret < 0)
126 goto end;
127 av_bprintf(&sami->full, "%s", sami->encoded_content.str);
128
129 end:
130 av_free(dupsrc);
131 return ret;
132 }
133
sami_decode_frame(AVCodecContext * avctx,void * data,int * got_sub_ptr,AVPacket * avpkt)134 static int sami_decode_frame(AVCodecContext *avctx,
135 void *data, int *got_sub_ptr, AVPacket *avpkt)
136 {
137 AVSubtitle *sub = data;
138 const char *ptr = avpkt->data;
139 SAMIContext *sami = avctx->priv_data;
140
141 if (ptr && avpkt->size > 0) {
142 int ret = sami_paragraph_to_ass(avctx, ptr);
143 if (ret < 0)
144 return ret;
145 // TODO: pass escaped sami->encoded_source.str as source
146 ret = ff_ass_add_rect(sub, sami->full.str, sami->readorder++, 0, NULL, NULL);
147 if (ret < 0)
148 return ret;
149 }
150 *got_sub_ptr = sub->num_rects > 0;
151 return avpkt->size;
152 }
153
sami_init(AVCodecContext * avctx)154 static av_cold int sami_init(AVCodecContext *avctx)
155 {
156 SAMIContext *sami = avctx->priv_data;
157 av_bprint_init(&sami->source, 0, 2048);
158 av_bprint_init(&sami->content, 0, 2048);
159 av_bprint_init(&sami->encoded_source, 0, 2048);
160 av_bprint_init(&sami->encoded_content, 0, 2048);
161 av_bprint_init(&sami->full, 0, 2048);
162 return ff_ass_subtitle_header_default(avctx);
163 }
164
sami_close(AVCodecContext * avctx)165 static av_cold int sami_close(AVCodecContext *avctx)
166 {
167 SAMIContext *sami = avctx->priv_data;
168 av_bprint_finalize(&sami->source, NULL);
169 av_bprint_finalize(&sami->content, NULL);
170 av_bprint_finalize(&sami->encoded_source, NULL);
171 av_bprint_finalize(&sami->encoded_content, NULL);
172 av_bprint_finalize(&sami->full, NULL);
173 return 0;
174 }
175
sami_flush(AVCodecContext * avctx)176 static void sami_flush(AVCodecContext *avctx)
177 {
178 SAMIContext *sami = avctx->priv_data;
179 if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
180 sami->readorder = 0;
181 }
182
183 AVCodec ff_sami_decoder = {
184 .name = "sami",
185 .long_name = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
186 .type = AVMEDIA_TYPE_SUBTITLE,
187 .id = AV_CODEC_ID_SAMI,
188 .priv_data_size = sizeof(SAMIContext),
189 .init = sami_init,
190 .close = sami_close,
191 .decode = sami_decode_frame,
192 .flush = sami_flush,
193 };
194