1 /*
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #define LOG_TAG "UnifiedHtmlRecordProcess"
16 #include "unified_html_record_process.h"
17
18 #include <regex>
19
20 #include "file_uri.h"
21 #include "html.h"
22 #include "logger.h"
23
24 namespace OHOS {
25 namespace UDMF {
26 constexpr const char *IMG_TAG_PATTERN = "<img.*?>";
27 constexpr const char *IMG_TAG_SRC_PATTERN = "src=(['\"])(.*?)\\1";
28 constexpr const char *IMG_TAG_SRC_HEAD = "src=\"";
29 constexpr const char *IMG_LOCAL_URI = "file:///";
30 constexpr const char *FILE_SCHEME_PREFIX = "file://";
31
32 struct Cmp {
operator ()OHOS::UDMF::Cmp33 bool operator()(const uint32_t &lhs, const uint32_t &rhs) const
34 {
35 return lhs > rhs;
36 }
37 };
38
RebuildHtmlRecord(UnifiedData & unifiedData)39 void UnifiedHtmlRecordProcess::RebuildHtmlRecord(UnifiedData &unifiedData)
40 {
41 LOG_DEBUG(UDMF_CLIENT, "start");
42 auto utdId = UtdUtils::GetUtdIdFromUtdEnum(UDType::HTML);
43 for (auto &record : unifiedData.GetRecords()) {
44 if (record == nullptr || record->GetUris().empty()) {
45 continue;
46 }
47 if (!record->HasType(utdId)) {
48 continue;
49 }
50 if (record->GetType() == UDType::HTML) {
51 auto htmlRecord = std::static_pointer_cast<Html>(record);
52 auto rebuildContent = RebuildHtmlContent(htmlRecord->GetHtmlContent(), record->GetUris());
53 if (!rebuildContent.empty()) {
54 htmlRecord->SetHtmlContent(rebuildContent);
55 }
56 } else {
57 ProcessEntry(record);
58 }
59 }
60 }
61
ProcessEntry(const std::shared_ptr<UnifiedRecord> & record)62 void UnifiedHtmlRecordProcess::ProcessEntry(const std::shared_ptr<UnifiedRecord> &record)
63 {
64 if (record->GetInnerEntries() == nullptr) {
65 return;
66 }
67 for (auto &entry : *(record->GetInnerEntries())) {
68 auto udType = static_cast<UDType>(UtdUtils::GetUtdEnumFromUtdId(entry.first));
69 if (udType == UDType::HTML && std::holds_alternative<std::shared_ptr<Object>>(entry.second)) {
70 RebuildEntry(record->GetUris(), entry.second);
71 }
72 }
73 }
74
RebuildEntry(const std::vector<UriInfo> & uris,const ValueType & value)75 void UnifiedHtmlRecordProcess::RebuildEntry(const std::vector<UriInfo> &uris, const ValueType &value)
76 {
77 auto object = std::get<std::shared_ptr<Object>>(value);
78 auto iter = object->value_.find(HTML_CONTENT);
79 if (iter != object->value_.end()) {
80 if (std::holds_alternative<std::string>(iter->second)) {
81 auto content = std::get<std::string>(iter->second);
82 auto rebuildContent = RebuildHtmlContent(content, uris);
83 if (!rebuildContent.empty()) {
84 object->value_[HTML_CONTENT] = rebuildContent;
85 }
86 }
87 }
88 }
89
RebuildHtmlContent(const std::string & str,const std::vector<UriInfo> & uris)90 std::string UnifiedHtmlRecordProcess::RebuildHtmlContent(const std::string &str, const std::vector<UriInfo> &uris)
91 {
92 std::map<uint32_t, std::pair<std::string, std::string>, Cmp> replaceUris;
93 std::string strResult = str;
94 for (auto &uri : uris) {
95 std::string tmpUri = uri.dfsUri.empty() ? uri.authUri : uri.dfsUri;
96 std::string realUri = tmpUri;
97 if (tmpUri.substr(0, strlen(FILE_SCHEME_PREFIX)) == FILE_SCHEME_PREFIX) {
98 AppFileService::ModuleFileUri::FileUri fileUri(tmpUri);
99 realUri = FILE_SCHEME_PREFIX;
100 realUri += fileUri.GetRealPath();
101 replaceUris[uri.position] = std::make_pair(std::move(uri.oriUri), std::move(realUri));
102 }
103 }
104 if (replaceUris.empty()) {
105 return "";
106 }
107 LOG_INFO(UDMF_CLIENT, "replaceUris size=%{public}zu", replaceUris.size());
108 for (auto &replaceUri : replaceUris) {
109 strResult.replace(replaceUri.first, replaceUri.second.first.size(), replaceUri.second.second);
110 }
111 return strResult;
112 }
113
GetUriFromHtmlRecord(UnifiedData & unifiedData)114 void UnifiedHtmlRecordProcess::GetUriFromHtmlRecord(UnifiedData &unifiedData)
115 {
116 LOG_DEBUG(UDMF_CLIENT, "start");
117 auto utdId = UtdUtils::GetUtdIdFromUtdEnum(UDType::HTML);
118 for (auto &record : unifiedData.GetRecords()) {
119 if (record == nullptr) {
120 continue;
121 }
122 if (!record->HasType(utdId)) {
123 continue;
124 }
125 auto htmlData = record->GetEntry(utdId);
126 if (std::holds_alternative<std::shared_ptr<Object>>(htmlData)) {
127 auto uriInfos = GetValueStr(htmlData);
128 if (!uriInfos.empty()) {
129 LOG_INFO(UDMF_CLIENT, "split uris size=%{public}zu", uriInfos.size());
130 record->SetUris(std::move(uriInfos));
131 }
132 }
133 }
134 }
135
GetValueStr(const ValueType & value)136 std::vector<UriInfo> UnifiedHtmlRecordProcess::GetValueStr(const ValueType &value)
137 {
138 auto object = std::get<std::shared_ptr<Object>>(value);
139 auto iter = object->value_.find(HTML_CONTENT);
140 if (iter != object->value_.end()) {
141 if (std::holds_alternative<std::string>(iter->second)) {
142 auto content = std::get<std::string>(iter->second);
143 return SplitHtmlStr(content);
144 }
145 }
146 return {};
147 }
148
SplitHtmlStr(const std::string & htmlContent)149 std::vector<UriInfo> UnifiedHtmlRecordProcess::SplitHtmlStr(const std::string &htmlContent)
150 {
151 std::vector<std::pair<std::string, uint32_t>> matchs = SplitHtmlWithImgLabel(htmlContent);
152 if (matchs.empty()) {
153 return {};
154 }
155 LOG_INFO(UDMF_CLIENT, "matchs size=%{public}zu", matchs.size());
156 return SplitHtmlWithImgSrcLabel(matchs);
157 }
158
SplitHtmlWithImgLabel(const std::string & htmlContent)159 std::vector<std::pair<std::string, uint32_t>> UnifiedHtmlRecordProcess::SplitHtmlWithImgLabel(
160 const std::string &htmlContent) noexcept
161 {
162 std::smatch match;
163 std::string pattern(IMG_TAG_PATTERN);
164 std::regex reg(pattern);
165 std::string::const_iterator iterStart = htmlContent.begin();
166 std::string::const_iterator iterEnd = htmlContent.end();
167 std::vector<std::pair<std::string, uint32_t>> matchs;
168 while (std::regex_search(iterStart, iterEnd, match, reg)) {
169 std::string tmp = match[0];
170 iterStart = match[0].second;
171 uint32_t position = static_cast<uint32_t>(match[0].first - htmlContent.begin());
172 matchs.emplace_back(tmp, position);
173 }
174 return matchs;
175 }
176
SplitHtmlWithImgSrcLabel(const std::vector<std::pair<std::string,uint32_t>> & matchs)177 std::vector<UriInfo> UnifiedHtmlRecordProcess::SplitHtmlWithImgSrcLabel(
178 const std::vector<std::pair<std::string, uint32_t>> &matchs) noexcept
179 {
180 std::vector<UriInfo> splitResult;
181 std::smatch match;
182 std::string pattern(IMG_TAG_SRC_PATTERN);
183 std::regex reg(pattern);
184 for (const auto &iter : matchs) {
185 std::string::const_iterator iterStart = iter.first.begin();
186 std::string::const_iterator iterEnd = iter.first.end();
187 while (std::regex_search(iterStart, iterEnd, match, reg)) {
188 std::string tmp = match[0];
189 iterStart = match[0].second;
190 uint32_t position = static_cast<uint32_t>(match[0].first - iter.first.begin());
191 tmp = tmp.substr(strlen(IMG_TAG_SRC_HEAD));
192 tmp.pop_back();
193 if (!IsLocalURI(tmp)) {
194 continue;
195 }
196 position += strlen(IMG_TAG_SRC_HEAD) + iter.second;
197 UriInfo uriInfo = {
198 .oriUri = tmp,
199 .position = position,
200 };
201 splitResult.push_back(std::move(uriInfo));
202 }
203 }
204 return splitResult;
205 }
206
IsLocalURI(const std::string & uri)207 bool UnifiedHtmlRecordProcess::IsLocalURI(const std::string &uri) noexcept
208 {
209 return uri.substr(0, strlen(IMG_LOCAL_URI)) == std::string(IMG_LOCAL_URI);
210 }
211
212 } // namespace UDMF
213 } // namespace OHOS