• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #define LOG_TAG "UnifiedHtmlRecordProcess"
16 #include "unified_html_record_process.h"
17 
18 #include <regex>
19 
20 #include "file_uri.h"
21 #include "html.h"
22 #include "logger.h"
23 
24 namespace OHOS {
25 namespace UDMF {
26 constexpr const char *IMG_TAG_PATTERN = "<img.*?>";
27 constexpr const char *IMG_TAG_SRC_PATTERN = "src=(['\"])(.*?)\\1";
28 constexpr const char *IMG_TAG_SRC_HEAD = "src=\"";
29 constexpr const char *IMG_LOCAL_URI = "file:///";
30 constexpr const char *FILE_SCHEME_PREFIX = "file://";
31 
32 struct Cmp {
operator ()OHOS::UDMF::Cmp33     bool operator()(const uint32_t &lhs, const uint32_t &rhs) const
34     {
35         return lhs > rhs;
36     }
37 };
38 
RebuildHtmlRecord(UnifiedData & unifiedData)39 void UnifiedHtmlRecordProcess::RebuildHtmlRecord(UnifiedData &unifiedData)
40 {
41     LOG_DEBUG(UDMF_CLIENT, "start");
42     auto utdId = UtdUtils::GetUtdIdFromUtdEnum(UDType::HTML);
43     for (auto &record : unifiedData.GetRecords()) {
44         if (record == nullptr || record->GetUris().empty()) {
45             continue;
46         }
47         if (!record->HasType(utdId)) {
48             continue;
49         }
50         if (record->GetType() == UDType::HTML) {
51             auto htmlRecord = std::static_pointer_cast<Html>(record);
52             auto rebuildContent = RebuildHtmlContent(htmlRecord->GetHtmlContent(), record->GetUris());
53             if (!rebuildContent.empty()) {
54                 htmlRecord->SetHtmlContent(rebuildContent);
55             }
56         } else {
57             ProcessEntry(record);
58         }
59     }
60 }
61 
ProcessEntry(const std::shared_ptr<UnifiedRecord> & record)62 void UnifiedHtmlRecordProcess::ProcessEntry(const std::shared_ptr<UnifiedRecord> &record)
63 {
64     if (record->GetInnerEntries() == nullptr) {
65         return;
66     }
67     for (auto &entry : *(record->GetInnerEntries())) {
68         auto udType = static_cast<UDType>(UtdUtils::GetUtdEnumFromUtdId(entry.first));
69         if (udType == UDType::HTML && std::holds_alternative<std::shared_ptr<Object>>(entry.second)) {
70             RebuildEntry(record->GetUris(), entry.second);
71         }
72     }
73 }
74 
RebuildEntry(const std::vector<UriInfo> & uris,const ValueType & value)75 void UnifiedHtmlRecordProcess::RebuildEntry(const std::vector<UriInfo> &uris, const ValueType &value)
76 {
77     auto object = std::get<std::shared_ptr<Object>>(value);
78     auto iter = object->value_.find(HTML_CONTENT);
79     if (iter != object->value_.end()) {
80         if (std::holds_alternative<std::string>(iter->second)) {
81             auto content = std::get<std::string>(iter->second);
82             auto rebuildContent = RebuildHtmlContent(content, uris);
83             if (!rebuildContent.empty()) {
84                 object->value_[HTML_CONTENT] = rebuildContent;
85             }
86         }
87     }
88 }
89 
RebuildHtmlContent(const std::string & str,const std::vector<UriInfo> & uris)90 std::string UnifiedHtmlRecordProcess::RebuildHtmlContent(const std::string &str, const std::vector<UriInfo> &uris)
91 {
92     std::map<uint32_t, std::pair<std::string, std::string>, Cmp> replaceUris;
93     std::string strResult = str;
94     for (auto &uri : uris) {
95         std::string tmpUri = uri.dfsUri.empty() ? uri.authUri : uri.dfsUri;
96         std::string realUri = tmpUri;
97         if (tmpUri.substr(0, strlen(FILE_SCHEME_PREFIX)) == FILE_SCHEME_PREFIX) {
98             AppFileService::ModuleFileUri::FileUri fileUri(tmpUri);
99             realUri = FILE_SCHEME_PREFIX;
100             realUri += fileUri.GetRealPath();
101             replaceUris[uri.position] = std::make_pair(std::move(uri.oriUri), std::move(realUri));
102         }
103     }
104     if (replaceUris.empty()) {
105         return "";
106     }
107     LOG_INFO(UDMF_CLIENT, "replaceUris size=%{public}zu", replaceUris.size());
108     for (auto &replaceUri : replaceUris) {
109         strResult.replace(replaceUri.first, replaceUri.second.first.size(), replaceUri.second.second);
110     }
111     return strResult;
112 }
113 
GetUriFromHtmlRecord(UnifiedData & unifiedData)114 void UnifiedHtmlRecordProcess::GetUriFromHtmlRecord(UnifiedData &unifiedData)
115 {
116     LOG_DEBUG(UDMF_CLIENT, "start");
117     auto utdId = UtdUtils::GetUtdIdFromUtdEnum(UDType::HTML);
118     for (auto &record : unifiedData.GetRecords()) {
119         if (record == nullptr) {
120             continue;
121         }
122         if (!record->HasType(utdId)) {
123             continue;
124         }
125         auto htmlData = record->GetEntry(utdId);
126         if (std::holds_alternative<std::shared_ptr<Object>>(htmlData)) {
127             auto uriInfos = GetValueStr(htmlData);
128             if (!uriInfos.empty()) {
129                 LOG_INFO(UDMF_CLIENT, "split uris size=%{public}zu", uriInfos.size());
130                 record->SetUris(std::move(uriInfos));
131             }
132         }
133     }
134 }
135 
GetValueStr(const ValueType & value)136 std::vector<UriInfo> UnifiedHtmlRecordProcess::GetValueStr(const ValueType &value)
137 {
138     auto object = std::get<std::shared_ptr<Object>>(value);
139     auto iter = object->value_.find(HTML_CONTENT);
140     if (iter != object->value_.end()) {
141         if (std::holds_alternative<std::string>(iter->second)) {
142             auto content = std::get<std::string>(iter->second);
143             return SplitHtmlStr(content);
144         }
145     }
146     return {};
147 }
148 
SplitHtmlStr(const std::string & htmlContent)149 std::vector<UriInfo> UnifiedHtmlRecordProcess::SplitHtmlStr(const std::string &htmlContent)
150 {
151     std::vector<std::pair<std::string, uint32_t>> matchs = SplitHtmlWithImgLabel(htmlContent);
152     if (matchs.empty()) {
153         return {};
154     }
155     LOG_INFO(UDMF_CLIENT, "matchs size=%{public}zu", matchs.size());
156     return SplitHtmlWithImgSrcLabel(matchs);
157 }
158 
SplitHtmlWithImgLabel(const std::string & htmlContent)159 std::vector<std::pair<std::string, uint32_t>> UnifiedHtmlRecordProcess::SplitHtmlWithImgLabel(
160     const std::string &htmlContent) noexcept
161 {
162     std::smatch match;
163     std::string pattern(IMG_TAG_PATTERN);
164     std::regex reg(pattern);
165     std::string::const_iterator iterStart = htmlContent.begin();
166     std::string::const_iterator iterEnd = htmlContent.end();
167     std::vector<std::pair<std::string, uint32_t>> matchs;
168     while (std::regex_search(iterStart, iterEnd, match, reg)) {
169         std::string tmp = match[0];
170         iterStart = match[0].second;
171         uint32_t position = static_cast<uint32_t>(match[0].first - htmlContent.begin());
172         matchs.emplace_back(tmp, position);
173     }
174     return matchs;
175 }
176 
SplitHtmlWithImgSrcLabel(const std::vector<std::pair<std::string,uint32_t>> & matchs)177 std::vector<UriInfo> UnifiedHtmlRecordProcess::SplitHtmlWithImgSrcLabel(
178     const std::vector<std::pair<std::string, uint32_t>> &matchs) noexcept
179 {
180     std::vector<UriInfo> splitResult;
181     std::smatch match;
182     std::string pattern(IMG_TAG_SRC_PATTERN);
183     std::regex reg(pattern);
184     for (const auto &iter : matchs) {
185         std::string::const_iterator iterStart = iter.first.begin();
186         std::string::const_iterator iterEnd = iter.first.end();
187         while (std::regex_search(iterStart, iterEnd, match, reg)) {
188             std::string tmp = match[0];
189             iterStart = match[0].second;
190             uint32_t position = static_cast<uint32_t>(match[0].first - iter.first.begin());
191             tmp = tmp.substr(strlen(IMG_TAG_SRC_HEAD));
192             tmp.pop_back();
193             if (!IsLocalURI(tmp)) {
194                 continue;
195             }
196             position += strlen(IMG_TAG_SRC_HEAD) + iter.second;
197             UriInfo uriInfo = {
198                 .oriUri = tmp,
199                 .position = position,
200             };
201             splitResult.push_back(std::move(uriInfo));
202         }
203     }
204     return splitResult;
205 }
206 
IsLocalURI(const std::string & uri)207 bool UnifiedHtmlRecordProcess::IsLocalURI(const std::string &uri) noexcept
208 {
209     return uri.substr(0, strlen(IMG_LOCAL_URI)) == std::string(IMG_LOCAL_URI);
210 }
211 
212 } // namespace UDMF
213 } // namespace OHOS