1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <libxml/HTMLparser.h>
17
18 #include "pasteboard_pattern.h"
19 #include "pasteboard_hilog.h"
20
21 namespace OHOS::MiscServices {
22 std::map<uint32_t, std::string> PatternDetection::patterns_{
23 { static_cast<uint32_t>(Pattern::URL), std::string("[a-zA-Z0-9+.-]+://[-a-zA-Z0-9+&@#/%?"
24 "=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_]") },
25 { static_cast<uint32_t>(Pattern::NUMBER), std::string("[-+]?[0-9]*\\.?[0-9]+") },
26 { static_cast<uint32_t>(Pattern::EMAIL_ADDRESS), std::string("(([a-zA-Z0-9_\\-\\.]+)@"
27 "((?:\\[([0-9]{1,3}\\.){3}[0-9]{1,3}\\])|"
28 "([a-zA-Z0-9\\-]+(?:\\.[a-zA-Z0-9\\-]+)*))"
29 "([a-zA-Z]{2,}|[0-9]{1,3}))") },
30 };
31
Detect(const std::set<Pattern> & patternsToCheck,const PasteData & pasteData,bool hasHTML,bool hasPlain)32 const std::set<Pattern> PatternDetection::Detect(
33 const std::set<Pattern> &patternsToCheck, const PasteData &pasteData, bool hasHTML, bool hasPlain)
34 {
35 std::set<Pattern> existedPatterns;
36 for (auto &record : pasteData.AllRecords()) {
37 if (patternsToCheck == existedPatterns) {
38 break;
39 }
40 if (hasPlain && record->GetPlainText() != nullptr) {
41 std::string recordText = *(record->GetPlainText());
42 DetectPlainText(existedPatterns, patternsToCheck, recordText);
43 }
44 if (hasHTML && record->GetHtmlText() != nullptr) {
45 std::string recordText = ExtractHtmlContent(*(record->GetHtmlText()));
46 DetectPlainText(existedPatterns, patternsToCheck, recordText);
47 }
48 }
49 return existedPatterns;
50 }
51
IsValid(const std::set<Pattern> & patterns)52 bool PatternDetection::IsValid(const std::set<Pattern> &patterns)
53 {
54 for (Pattern pattern : patterns) {
55 if (pattern >= Pattern::COUNT) {
56 return false;
57 }
58 }
59 return true;
60 }
61
DetectPlainText(std::set<Pattern> & patternsOut,const std::set<Pattern> & patternsIn,const std::string & plainText)62 void PatternDetection::DetectPlainText(
63 std::set<Pattern> &patternsOut, const std::set<Pattern> &patternsIn, const std::string &plainText)
64 {
65 for (Pattern pattern : patternsIn) {
66 if (patternsOut.find(pattern) != patternsOut.end()) {
67 continue;
68 }
69 uint32_t patternUint32 = static_cast<uint32_t>(pattern);
70 auto it = patterns_.find(patternUint32);
71 if (it == patterns_.end()) {
72 PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "pasteboard pattern, unexpected Pattern value!");
73 continue;
74 }
75 std::regex curRegex(it->second);
76 try {
77 if (std::regex_search(plainText, curRegex)) {
78 patternsOut.insert(pattern);
79 }
80 } catch (std::regex_error &e) {
81 PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "Regex error !");
82 }
83 }
84 }
85
ExtractHtmlContent(const std::string & html_str)86 std::string PatternDetection::ExtractHtmlContent(const std::string &html_str)
87 {
88 xmlDocPtr doc = htmlReadMemory(html_str.c_str(), html_str.size(), nullptr, nullptr, 0);
89 if (doc == nullptr) {
90 PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "Parse html failed! doc nullptr.");
91 return "";
92 }
93 xmlNode *rootNode = xmlDocGetRootElement(doc);
94 if (rootNode == nullptr) {
95 PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "Parse html failed! rootNode nullptr.");
96 xmlFreeDoc(doc);
97 return "";
98 }
99 xmlChar *xmlStr = xmlNodeGetContent(rootNode);
100 if (xmlStr == nullptr) {
101 PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "Parse html failed! xmlStr nullptr.");
102 xmlFreeDoc(doc);
103 return "";
104 }
105 std::string result(reinterpret_cast<const char *>(xmlStr));
106 xmlFree(xmlStr);
107 xmlFreeDoc(doc);
108 return result;
109 }
110 } // namespace OHOS::MiscServices