• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #define LOG_TAG "UnifiedHtmlRecordProcess"
16 #include "unified_html_record_process.h"
17 
18 #include <regex>
19 
20 #include "file_uri.h"
21 #include "html.h"
22 #include "logger.h"
23 
24 namespace OHOS {
25 namespace UDMF {
26 constexpr const char *IMG_TAG_PATTERN = "<img.*?>";
27 constexpr const char *IMG_TAG_SRC_PATTERN = "src=(['\"])(.*?)\\1";
28 constexpr const char *IMG_TAG_SRC_HEAD = "src=\"";
29 constexpr const char *IMG_LOCAL_URI = "file:///";
30 constexpr const char *IMG_LOCAL_PATH = "://";
31 constexpr const char *FILE_SCHEME_PREFIX = "file://";
32 
33 struct Cmp {
operator ()OHOS::UDMF::Cmp34     bool operator()(const uint32_t &lhs, const uint32_t &rhs) const
35     {
36         return lhs > rhs;
37     }
38 };
39 
RebuildHtmlRecord(UnifiedData & unifiedData)40 void UnifiedHtmlRecordProcess::RebuildHtmlRecord(UnifiedData &unifiedData)
41 {
42     LOG_DEBUG(UDMF_CLIENT, "start");
43     auto utdId = UtdUtils::GetUtdIdFromUtdEnum(UDType::HTML);
44     for (auto &record : unifiedData.GetRecords()) {
45         if (record == nullptr || record->GetUris().empty()) {
46             continue;
47         }
48         if (!record->HasType(utdId)) {
49             continue;
50         }
51         if (record->GetType() == UDType::HTML) {
52             auto htmlRecord = std::static_pointer_cast<Html>(record);
53             auto rebuildContent = RebuildHtmlContent(htmlRecord->GetHtmlContent(), record->GetUris());
54             if (!rebuildContent.empty()) {
55                 htmlRecord->SetHtmlContent(rebuildContent);
56             }
57         } else {
58             ProcessEntry(record);
59         }
60     }
61 }
62 
ProcessEntry(const std::shared_ptr<UnifiedRecord> & record)63 void UnifiedHtmlRecordProcess::ProcessEntry(const std::shared_ptr<UnifiedRecord> &record)
64 {
65     if (record->GetInnerEntries() == nullptr) {
66         return;
67     }
68     for (auto &entry : *(record->GetInnerEntries())) {
69         auto udType = static_cast<UDType>(UtdUtils::GetUtdEnumFromUtdId(entry.first));
70         if (udType == UDType::HTML && std::holds_alternative<std::shared_ptr<Object>>(entry.second)) {
71             RebuildEntry(record->GetUris(), entry.second);
72         }
73     }
74 }
75 
RebuildEntry(const std::vector<UriInfo> & uris,const ValueType & value)76 void UnifiedHtmlRecordProcess::RebuildEntry(const std::vector<UriInfo> &uris, const ValueType &value)
77 {
78     auto object = std::get<std::shared_ptr<Object>>(value);
79     auto iter = object->value_.find(HTML_CONTENT);
80     if (iter != object->value_.end()) {
81         if (std::holds_alternative<std::string>(iter->second)) {
82             auto content = std::get<std::string>(iter->second);
83             auto rebuildContent = RebuildHtmlContent(content, uris);
84             if (!rebuildContent.empty()) {
85                 object->value_[HTML_CONTENT] = rebuildContent;
86             }
87         }
88     }
89 }
90 
RebuildHtmlContent(const std::string & str,const std::vector<UriInfo> & uris)91 std::string UnifiedHtmlRecordProcess::RebuildHtmlContent(const std::string &str, const std::vector<UriInfo> &uris)
92 {
93     std::map<uint32_t, std::pair<std::string, std::string>, Cmp> replaceUris;
94     std::string strResult = str;
95     for (auto &uri : uris) {
96         if (uri.dfsUri.empty()) {
97             continue;
98         }
99         std::string realUri = uri.dfsUri;
100         if (realUri.substr(0, strlen(FILE_SCHEME_PREFIX)) == FILE_SCHEME_PREFIX) {
101             AppFileService::ModuleFileUri::FileUri fileUri(uri.dfsUri);
102             realUri = FILE_SCHEME_PREFIX;
103             realUri += fileUri.GetRealPath();
104         }
105         replaceUris[uri.position] = std::make_pair(std::move(uri.oriUri), std::move(realUri));
106     }
107     if (replaceUris.empty()) {
108         return "";
109     }
110     LOG_INFO(UDMF_CLIENT, "replaceUris size=%{public}zu", replaceUris.size());
111     for (auto &replaceUri : replaceUris) {
112         strResult.replace(replaceUri.first, replaceUri.second.first.size(), replaceUri.second.second);
113     }
114     return strResult;
115 }
116 
GetUriFromHtmlRecord(UnifiedData & unifiedData)117 void UnifiedHtmlRecordProcess::GetUriFromHtmlRecord(UnifiedData &unifiedData)
118 {
119     LOG_DEBUG(UDMF_CLIENT, "start");
120     auto utdId = UtdUtils::GetUtdIdFromUtdEnum(UDType::HTML);
121     for (auto &record : unifiedData.GetRecords()) {
122         if (record == nullptr) {
123             continue;
124         }
125         if (!record->HasType(utdId)) {
126             continue;
127         }
128         auto htmlData = record->GetEntry(utdId);
129         if (std::holds_alternative<std::shared_ptr<Object>>(htmlData)) {
130             auto uriInfos = GetValueStr(htmlData);
131             if (!uriInfos.empty()) {
132                 LOG_INFO(UDMF_CLIENT, "split uris size=%{public}zu", uriInfos.size());
133                 record->SetUris(std::move(uriInfos));
134             }
135         }
136     }
137 }
138 
GetValueStr(const ValueType & value)139 std::vector<UriInfo> UnifiedHtmlRecordProcess::GetValueStr(const ValueType &value)
140 {
141     auto object = std::get<std::shared_ptr<Object>>(value);
142     auto iter = object->value_.find(HTML_CONTENT);
143     if (iter != object->value_.end()) {
144         if (std::holds_alternative<std::string>(iter->second)) {
145             auto content = std::get<std::string>(iter->second);
146             return SplitHtmlStr(content);
147         }
148     }
149     return {};
150 }
151 
SplitHtmlStr(const std::string & htmlContent)152 std::vector<UriInfo> UnifiedHtmlRecordProcess::SplitHtmlStr(const std::string &htmlContent)
153 {
154     std::vector<std::pair<std::string, uint32_t>> matchs = SplitHtmlWithImgLabel(htmlContent);
155     if (matchs.empty()) {
156         return {};
157     }
158     LOG_INFO(UDMF_CLIENT, "matchs size=%{public}zu", matchs.size());
159     return SplitHtmlWithImgSrcLabel(matchs);
160 }
161 
SplitHtmlWithImgLabel(const std::string & htmlContent)162 std::vector<std::pair<std::string, uint32_t>> UnifiedHtmlRecordProcess::SplitHtmlWithImgLabel(
163     const std::string &htmlContent) noexcept
164 {
165     std::smatch match;
166     std::string pattern(IMG_TAG_PATTERN);
167     std::regex reg(pattern);
168     std::string::const_iterator iterStart = htmlContent.begin();
169     std::string::const_iterator iterEnd = htmlContent.end();
170     std::vector<std::pair<std::string, uint32_t>> matchs;
171     while (std::regex_search(iterStart, iterEnd, match, reg)) {
172         std::string tmp = match[0];
173         iterStart = match[0].second;
174         uint32_t position = static_cast<uint32_t>(match[0].first - htmlContent.begin());
175         matchs.emplace_back(tmp, position);
176     }
177     return matchs;
178 }
179 
SplitHtmlWithImgSrcLabel(const std::vector<std::pair<std::string,uint32_t>> & matchs)180 std::vector<UriInfo> UnifiedHtmlRecordProcess::SplitHtmlWithImgSrcLabel(
181     const std::vector<std::pair<std::string, uint32_t>> &matchs) noexcept
182 {
183     std::vector<UriInfo> splitResult;
184     std::smatch match;
185     std::string pattern(IMG_TAG_SRC_PATTERN);
186     std::regex reg(pattern);
187     for (const auto &iter : matchs) {
188         std::string::const_iterator iterStart = iter.first.begin();
189         std::string::const_iterator iterEnd = iter.first.end();
190         while (std::regex_search(iterStart, iterEnd, match, reg)) {
191             std::string tmp = match[0];
192             iterStart = match[0].second;
193             uint32_t position = static_cast<uint32_t>(match[0].first - iter.first.begin());
194             tmp = tmp.substr(strlen(IMG_TAG_SRC_HEAD));
195             tmp.pop_back();
196             if (!IsLocalURI(tmp)) {
197                 continue;
198             }
199             position += strlen(IMG_TAG_SRC_HEAD) + iter.second;
200             UriInfo uriInfo = {
201                 .oriUri = tmp,
202                 .position = position,
203             };
204             splitResult.push_back(std::move(uriInfo));
205         }
206     }
207     return splitResult;
208 }
209 
IsLocalURI(const std::string & uri)210 bool UnifiedHtmlRecordProcess::IsLocalURI(const std::string &uri) noexcept
211 {
212     return uri.substr(0, strlen(IMG_LOCAL_URI)) == std::string(IMG_LOCAL_URI) ||
213         uri.find(IMG_LOCAL_PATH) == std::string::npos;
214 }
215 
216 } // namespace UDMF
217 } // namespace OHOS