1 /*
2 * Copyright (C) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "gst_subtitle_common.h"
17 #include "securec.h"
18 #include "gst_subtitle_base_parse_wrap.h"
19
20 namespace {
21 constexpr gsize MAX_BUFFER_SIZE = 100000000;
22 constexpr guint TYPEFIND_SIZE = 1025;
23 constexpr guint TYPEFIND_MIN_SIZE = 9;
24 constexpr guint BOM_OF_UTF_8 = 3;
25 constexpr guint FIRST_INDEX_OF_UTF_8 = 0;
26 constexpr guint SECOND_INDEX_OF_UTF_8 = 1;
27 constexpr guint THIRD_INDEX_OF_UTF_8 = 2;
28 }
29
gst_subtitle_str_dup(const gchar * str,gboolean ndup,gsize len)30 gchar *gst_subtitle_str_dup(const gchar *str, gboolean ndup, gsize len)
31 {
32 g_return_val_if_fail(str != nullptr, nullptr);
33
34 gsize dup_len = ndup ? len : (gsize)strlen(str);
35 g_return_val_if_fail(dup_len <= MAX_BUFFER_SIZE, nullptr);
36
37 return g_strndup(str, dup_len);
38 }
39
gst_subtitle_free_frame(GstSubtitleBaseParse * base,GstSubtitleDecodedFrame * decoded_frame)40 void gst_subtitle_free_frame(GstSubtitleBaseParse *base, GstSubtitleDecodedFrame *decoded_frame)
41 {
42 g_return_if_fail((base != nullptr) && (decoded_frame != nullptr));
43
44 if (decoded_frame->data != nullptr) {
45 g_free(decoded_frame->data);
46 decoded_frame->data = nullptr;
47 }
48 g_return_if_fail(memset_s(decoded_frame,
49 sizeof(GstSubtitleDecodedFrame), 0, sizeof(GstSubtitleDecodedFrame)) == EOK);
50 }
51
detect_encoding_and_convert_str(gchar ** encoding,const gchar * str,guint len)52 static gchar *detect_encoding_and_convert_str(gchar **encoding, const gchar *str, guint len)
53 {
54 g_return_val_if_fail((str != nullptr) && (len > 0), nullptr);
55
56 gchar *ret = nullptr;
57
58 if ((len >= BOM_OF_UTF_8) && ((guint8)str[FIRST_INDEX_OF_UTF_8] == 0xEF) &&
59 ((guint8)str[SECOND_INDEX_OF_UTF_8] == 0xBB) && ((guint8)str[THIRD_INDEX_OF_UTF_8] == 0xBF)) {
60 GST_INFO("utf-8 detected!");
61 *encoding = g_strdup("UTF-8");
62 g_return_val_if_fail(encoding != nullptr, nullptr);
63
64 str += BOM_OF_UTF_8;
65 len -= BOM_OF_UTF_8;
66 } else {
67 GST_INFO("not utf-8 detected!");
68 *encoding = g_strdup("NOT-UTF-8");
69 g_return_val_if_fail(encoding != nullptr, nullptr);
70 }
71
72 ret = static_cast<gchar *>(g_malloc0(len));
73 g_return_val_if_fail(ret != nullptr, nullptr);
74
75 if (memcpy_s(ret, len, str, len) != EOK) {
76 g_free(ret);
77 ret = nullptr;
78 }
79
80 return ret;
81 }
82
caps_detect_handle(const gchar * encoding,GstTypeFind * tf,const GstSubtitleFormatDetect detect_caps_pfn,gchar * converted_str)83 static void caps_detect_handle(const gchar *encoding, GstTypeFind *tf,
84 const GstSubtitleFormatDetect detect_caps_pfn, gchar *converted_str)
85 {
86 GstCaps *caps = nullptr;
87
88 g_return_if_fail(detect_caps_pfn != nullptr);
89 caps = detect_caps_pfn(converted_str);
90
91 if (caps != nullptr) {
92 GST_DEBUG("subtitle encoding: %s", encoding);
93 gst_type_find_suggest(tf, GST_TYPE_FIND_MAXIMUM, caps);
94 gst_caps_unref(caps);
95 }
96 }
97
probe_type_and_detect_caps(const gchar * str,guint tf_len,GstTypeFind * tf,const GstSubtitleFormatDetect detect_caps_pfn)98 static void probe_type_and_detect_caps(const gchar *str, guint tf_len,
99 GstTypeFind *tf, const GstSubtitleFormatDetect detect_caps_pfn)
100 {
101 g_return_if_fail((str != nullptr) && (tf != nullptr) && (detect_caps_pfn != nullptr) &&
102 (tf_len >= TYPEFIND_MIN_SIZE));
103
104 gchar *encoding = nullptr;
105 gchar *converted_str = detect_encoding_and_convert_str(&encoding, str, tf_len - 1);
106 if (converted_str == nullptr) {
107 GST_DEBUG("Encoding detected but conversion failed");
108 if (encoding != nullptr) {
109 GST_DEBUG("Encoding is %s", encoding);
110 g_free(encoding);
111 }
112 return;
113 }
114
115 /* call subclass caps detection function */
116 caps_detect_handle(encoding, tf, detect_caps_pfn, converted_str);
117
118 g_free(converted_str);
119 g_free(encoding);
120 }
121
gst_subtitle_typefind(GstTypeFind * tf,const gpointer priv,const GstSubtitleFormatDetect detect_caps_pfn)122 void gst_subtitle_typefind(GstTypeFind *tf, const gpointer priv,
123 const GstSubtitleFormatDetect detect_caps_pfn)
124 {
125 (void)priv;
126
127 g_return_if_fail((tf != nullptr) && (detect_caps_pfn != nullptr));
128
129 /* video or audio, no need to check characterset, the tf will mask sub */
130 g_return_if_fail(!gst_type_find_is_mask_sub(tf));
131
132 /* extract detected data */
133 guint tf_len = (guint)gst_type_find_get_length(tf);
134 tf_len = (tf_len >= TYPEFIND_SIZE) ? TYPEFIND_SIZE : tf_len;
135 g_return_if_fail(tf_len >= TYPEFIND_MIN_SIZE);
136
137 const guint8 *data = gst_type_find_peek(tf, (gint64)0, tf_len);
138 g_return_if_fail(data != nullptr);
139
140 gchar *str = static_cast<gchar *>(g_malloc0(tf_len));
141 g_return_if_fail(str != nullptr);
142
143 if (memcpy_s(str, tf_len, data, tf_len - 1) != EOK) {
144 GST_ERROR("memcpy_s failed");
145 g_free(str);
146 return;
147 }
148 str[tf_len - 1] = '\0';
149
150 probe_type_and_detect_caps(str, tf_len, tf, detect_caps_pfn);
151 g_free(str);
152 }
153
154 /* read a line of text data from the external buffer */
gst_subtitle_read_line(GstSubtitleBaseParse * base,gchar ** out_line)155 gsize gst_subtitle_read_line(GstSubtitleBaseParse *base, gchar **out_line)
156 {
157 g_return_val_if_fail((base != nullptr) && (out_line != nullptr), 0);
158
159 gchar *str = nullptr;
160 gsize consumed = 0;
161 *out_line = nullptr;
162 GstSubtitleBufferContext *buf_ctx = &base->buffer_ctx;
163
164 g_return_val_if_fail(buf_ctx->text != nullptr, consumed);
165 GST_DEBUG_OBJECT(base, "read line from the external buffer");
166
167 while (TRUE) {
168 g_return_val_if_fail((buf_ctx->text != nullptr) && (buf_ctx->text->str != nullptr), consumed);
169 str = buf_ctx->text->str;
170
171 if (str[0] == '\n') {
172 buf_ctx->text = g_string_erase(buf_ctx->text, 0, 1);
173 continue;
174 }
175 const char *line_end = strchr(str, '\n');
176 if (line_end == nullptr) { // end of line not found, return for more data
177 *out_line = nullptr;
178 break;
179 }
180 gsize line_len = static_cast<gsize>(line_end - str);
181 gchar *line = gst_subtitle_str_dup(str, TRUE, line_len + 1);
182 g_return_val_if_fail(line != nullptr, consumed);
183
184 buf_ctx->text = g_string_erase(buf_ctx->text, 0, (gssize)(line_len + 1));
185 *out_line = line;
186 consumed = line_len + 1;
187 break;
188 }
189
190 return consumed;
191 }
192
193 /* encode the decoded subtitle frame @decoded_frame into gstbuffer and push it downstream */
gst_subtitle_push_buffer(GstSubtitleBaseParse * self,const GstSubtitleDecodedFrame * decoded_frame)194 GstFlowReturn gst_subtitle_push_buffer(GstSubtitleBaseParse *self,
195 const GstSubtitleDecodedFrame *decoded_frame)
196 {
197 GstFlowReturn ret = GST_FLOW_NOT_LINKED;
198
199 g_return_val_if_fail((self != nullptr) && (decoded_frame != nullptr), ret);
200
201 GstSubtitleStream *stream = gst_subtitle_get_stream_by_id(self, decoded_frame->stream_index);
202 g_return_val_if_fail(stream != nullptr, GST_FLOW_NOT_LINKED);
203 g_return_val_if_fail(handle_text_subtitle(self, decoded_frame, stream, &ret), GST_FLOW_ERROR);
204 if (!self->from_internal) {
205 ret = GST_FLOW_OK;
206 }
207
208 return ret;
209 }
210