1 /*
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #define LOG_TAG "UnifiedHtmlRecordProcess"
16 #include "unified_html_record_process.h"
17
18 #include <regex>
19
20 #include "file_uri.h"
21 #include "html.h"
22 #include "logger.h"
23
24 namespace OHOS {
25 namespace UDMF {
26 constexpr const char *IMG_TAG_PATTERN = "<img.*?>";
27 constexpr const char *IMG_TAG_SRC_PATTERN = "src=(['\"])(.*?)\\1";
28 constexpr const char *IMG_TAG_SRC_HEAD = "src=\"";
29 constexpr const char *IMG_LOCAL_URI = "file:///";
30 constexpr const char *IMG_LOCAL_PATH = "://";
31 constexpr const char *FILE_SCHEME_PREFIX = "file://";
32
33 struct Cmp {
operator ()OHOS::UDMF::Cmp34 bool operator()(const uint32_t &lhs, const uint32_t &rhs) const
35 {
36 return lhs > rhs;
37 }
38 };
39
RebuildHtmlRecord(UnifiedData & unifiedData)40 void UnifiedHtmlRecordProcess::RebuildHtmlRecord(UnifiedData &unifiedData)
41 {
42 LOG_DEBUG(UDMF_CLIENT, "start");
43 auto utdId = UtdUtils::GetUtdIdFromUtdEnum(UDType::HTML);
44 for (auto &record : unifiedData.GetRecords()) {
45 if (record == nullptr || record->GetUris().empty()) {
46 continue;
47 }
48 if (!record->HasType(utdId)) {
49 continue;
50 }
51 if (record->GetType() == UDType::HTML) {
52 auto htmlRecord = std::static_pointer_cast<Html>(record);
53 auto rebuildContent = RebuildHtmlContent(htmlRecord->GetHtmlContent(), record->GetUris());
54 if (!rebuildContent.empty()) {
55 htmlRecord->SetHtmlContent(rebuildContent);
56 }
57 } else {
58 ProcessEntry(record);
59 }
60 }
61 }
62
ProcessEntry(const std::shared_ptr<UnifiedRecord> & record)63 void UnifiedHtmlRecordProcess::ProcessEntry(const std::shared_ptr<UnifiedRecord> &record)
64 {
65 if (record->GetInnerEntries() == nullptr) {
66 return;
67 }
68 for (auto &entry : *(record->GetInnerEntries())) {
69 auto udType = static_cast<UDType>(UtdUtils::GetUtdEnumFromUtdId(entry.first));
70 if (udType == UDType::HTML && std::holds_alternative<std::shared_ptr<Object>>(entry.second)) {
71 RebuildEntry(record->GetUris(), entry.second);
72 }
73 }
74 }
75
RebuildEntry(const std::vector<UriInfo> & uris,const ValueType & value)76 void UnifiedHtmlRecordProcess::RebuildEntry(const std::vector<UriInfo> &uris, const ValueType &value)
77 {
78 auto object = std::get<std::shared_ptr<Object>>(value);
79 auto iter = object->value_.find(HTML_CONTENT);
80 if (iter != object->value_.end()) {
81 if (std::holds_alternative<std::string>(iter->second)) {
82 auto content = std::get<std::string>(iter->second);
83 auto rebuildContent = RebuildHtmlContent(content, uris);
84 if (!rebuildContent.empty()) {
85 object->value_[HTML_CONTENT] = rebuildContent;
86 }
87 }
88 }
89 }
90
RebuildHtmlContent(const std::string & str,const std::vector<UriInfo> & uris)91 std::string UnifiedHtmlRecordProcess::RebuildHtmlContent(const std::string &str, const std::vector<UriInfo> &uris)
92 {
93 std::map<uint32_t, std::pair<std::string, std::string>, Cmp> replaceUris;
94 std::string strResult = str;
95 for (auto &uri : uris) {
96 if (uri.dfsUri.empty()) {
97 continue;
98 }
99 std::string realUri = uri.dfsUri;
100 if (realUri.substr(0, strlen(FILE_SCHEME_PREFIX)) == FILE_SCHEME_PREFIX) {
101 AppFileService::ModuleFileUri::FileUri fileUri(uri.dfsUri);
102 realUri = FILE_SCHEME_PREFIX;
103 realUri += fileUri.GetRealPath();
104 }
105 replaceUris[uri.position] = std::make_pair(std::move(uri.oriUri), std::move(realUri));
106 }
107 if (replaceUris.empty()) {
108 return "";
109 }
110 LOG_INFO(UDMF_CLIENT, "replaceUris size=%{public}zu", replaceUris.size());
111 for (auto &replaceUri : replaceUris) {
112 strResult.replace(replaceUri.first, replaceUri.second.first.size(), replaceUri.second.second);
113 }
114 return strResult;
115 }
116
GetUriFromHtmlRecord(UnifiedData & unifiedData)117 void UnifiedHtmlRecordProcess::GetUriFromHtmlRecord(UnifiedData &unifiedData)
118 {
119 LOG_DEBUG(UDMF_CLIENT, "start");
120 auto utdId = UtdUtils::GetUtdIdFromUtdEnum(UDType::HTML);
121 for (auto &record : unifiedData.GetRecords()) {
122 if (record == nullptr) {
123 continue;
124 }
125 if (!record->HasType(utdId)) {
126 continue;
127 }
128 auto htmlData = record->GetEntry(utdId);
129 if (std::holds_alternative<std::shared_ptr<Object>>(htmlData)) {
130 auto uriInfos = GetValueStr(htmlData);
131 if (!uriInfos.empty()) {
132 LOG_INFO(UDMF_CLIENT, "split uris size=%{public}zu", uriInfos.size());
133 record->SetUris(std::move(uriInfos));
134 }
135 }
136 }
137 }
138
GetValueStr(const ValueType & value)139 std::vector<UriInfo> UnifiedHtmlRecordProcess::GetValueStr(const ValueType &value)
140 {
141 auto object = std::get<std::shared_ptr<Object>>(value);
142 auto iter = object->value_.find(HTML_CONTENT);
143 if (iter != object->value_.end()) {
144 if (std::holds_alternative<std::string>(iter->second)) {
145 auto content = std::get<std::string>(iter->second);
146 return SplitHtmlStr(content);
147 }
148 }
149 return {};
150 }
151
SplitHtmlStr(const std::string & htmlContent)152 std::vector<UriInfo> UnifiedHtmlRecordProcess::SplitHtmlStr(const std::string &htmlContent)
153 {
154 std::vector<std::pair<std::string, uint32_t>> matchs = SplitHtmlWithImgLabel(htmlContent);
155 if (matchs.empty()) {
156 return {};
157 }
158 LOG_INFO(UDMF_CLIENT, "matchs size=%{public}zu", matchs.size());
159 return SplitHtmlWithImgSrcLabel(matchs);
160 }
161
SplitHtmlWithImgLabel(const std::string & htmlContent)162 std::vector<std::pair<std::string, uint32_t>> UnifiedHtmlRecordProcess::SplitHtmlWithImgLabel(
163 const std::string &htmlContent) noexcept
164 {
165 std::smatch match;
166 std::string pattern(IMG_TAG_PATTERN);
167 std::regex reg(pattern);
168 std::string::const_iterator iterStart = htmlContent.begin();
169 std::string::const_iterator iterEnd = htmlContent.end();
170 std::vector<std::pair<std::string, uint32_t>> matchs;
171 while (std::regex_search(iterStart, iterEnd, match, reg)) {
172 std::string tmp = match[0];
173 iterStart = match[0].second;
174 uint32_t position = static_cast<uint32_t>(match[0].first - htmlContent.begin());
175 matchs.emplace_back(tmp, position);
176 }
177 return matchs;
178 }
179
SplitHtmlWithImgSrcLabel(const std::vector<std::pair<std::string,uint32_t>> & matchs)180 std::vector<UriInfo> UnifiedHtmlRecordProcess::SplitHtmlWithImgSrcLabel(
181 const std::vector<std::pair<std::string, uint32_t>> &matchs) noexcept
182 {
183 std::vector<UriInfo> splitResult;
184 std::smatch match;
185 std::string pattern(IMG_TAG_SRC_PATTERN);
186 std::regex reg(pattern);
187 for (const auto &iter : matchs) {
188 std::string::const_iterator iterStart = iter.first.begin();
189 std::string::const_iterator iterEnd = iter.first.end();
190 while (std::regex_search(iterStart, iterEnd, match, reg)) {
191 std::string tmp = match[0];
192 iterStart = match[0].second;
193 uint32_t position = static_cast<uint32_t>(match[0].first - iter.first.begin());
194 tmp = tmp.substr(strlen(IMG_TAG_SRC_HEAD));
195 tmp.pop_back();
196 if (!IsLocalURI(tmp)) {
197 continue;
198 }
199 position += strlen(IMG_TAG_SRC_HEAD) + iter.second;
200 UriInfo uriInfo = {
201 .oriUri = tmp,
202 .position = position,
203 };
204 splitResult.push_back(std::move(uriInfo));
205 }
206 }
207 return splitResult;
208 }
209
IsLocalURI(const std::string & uri)210 bool UnifiedHtmlRecordProcess::IsLocalURI(const std::string &uri) noexcept
211 {
212 return uri.substr(0, strlen(IMG_LOCAL_URI)) == std::string(IMG_LOCAL_URI) ||
213 uri.find(IMG_LOCAL_PATH) == std::string::npos;
214 }
215
216 } // namespace UDMF
217 } // namespace OHOS