• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <libxml/HTMLparser.h>
17 
18 #include "pasteboard_pattern.h"
19 #include "pasteboard_hilog.h"
20 
21 namespace OHOS::MiscServices {
22 std::map<uint32_t, std::string> PatternDetection::patterns_{
23     { static_cast<uint32_t>(Pattern::URL), std::string("[a-zA-Z0-9+.-]+://[-a-zA-Z0-9+&@#/%?"
24                                                        "=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_]") },
25     { static_cast<uint32_t>(Pattern::NUMBER), std::string("[-+]?[0-9]*\\.?[0-9]+") },
26     { static_cast<uint32_t>(Pattern::EMAIL_ADDRESS), std::string("(([a-zA-Z0-9_\\-\\.]+)@"
27                                                                 "((?:\\[([0-9]{1,3}\\.){3}[0-9]{1,3}\\])|"
28                                                                 "([a-zA-Z0-9\\-]+(?:\\.[a-zA-Z0-9\\-]+)*))"
29                                                                 "([a-zA-Z]{2,}|[0-9]{1,3}))") },
30 };
31 
Detect(const std::set<Pattern> & patternsToCheck,const PasteData & pasteData,bool hasHTML,bool hasPlain)32 const std::set<Pattern> PatternDetection::Detect(
33     const std::set<Pattern> &patternsToCheck, const PasteData &pasteData, bool hasHTML, bool hasPlain)
34 {
35     std::set<Pattern> existedPatterns;
36     for (auto &record : pasteData.AllRecords()) {
37         if (patternsToCheck == existedPatterns) {
38             break;
39         }
40         if (hasPlain && record->GetPlainText() != nullptr) {
41             std::string recordText = *(record->GetPlainText());
42             DetectPlainText(existedPatterns, patternsToCheck, recordText);
43         }
44         if (hasHTML && record->GetHtmlText() != nullptr) {
45             std::string recordText = ExtractHtmlContent(*(record->GetHtmlText()));
46             DetectPlainText(existedPatterns, patternsToCheck, recordText);
47         }
48     }
49     return existedPatterns;
50 }
51 
IsValid(const std::set<Pattern> & patterns)52 bool PatternDetection::IsValid(const std::set<Pattern> &patterns)
53 {
54     for (Pattern pattern : patterns) {
55         if (pattern >= Pattern::COUNT) {
56             return false;
57         }
58     }
59     return true;
60 }
61 
DetectPlainText(std::set<Pattern> & patternsOut,const std::set<Pattern> & patternsIn,const std::string & plainText)62 void PatternDetection::DetectPlainText(
63     std::set<Pattern> &patternsOut, const std::set<Pattern> &patternsIn, const std::string &plainText)
64 {
65     for (Pattern pattern : patternsIn) {
66         if (patternsOut.find(pattern) != patternsOut.end()) {
67             continue;
68         }
69         uint32_t patternUint32 = static_cast<uint32_t>(pattern);
70         auto it = patterns_.find(patternUint32);
71         if (it == patterns_.end()) {
72             PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "pasteboard pattern, unexpected Pattern value!");
73             continue;
74         }
75         std::regex curRegex(it->second);
76         try {
77             if (std::regex_search(plainText, curRegex)) {
78                 patternsOut.insert(pattern);
79             }
80         } catch (std::regex_error &e) {
81             PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "Regex error !");
82         }
83     }
84 }
85 
ExtractHtmlContent(const std::string & html_str)86 std::string PatternDetection::ExtractHtmlContent(const std::string &html_str)
87 {
88     xmlDocPtr doc = htmlReadMemory(html_str.c_str(), html_str.size(), nullptr, nullptr, 0);
89     if (doc == nullptr) {
90         PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "Parse html failed! doc nullptr.");
91         return "";
92     }
93     xmlNode *rootNode = xmlDocGetRootElement(doc);
94     if (rootNode == nullptr) {
95         PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "Parse html failed! rootNode nullptr.");
96         xmlFreeDoc(doc);
97         return "";
98     }
99     xmlChar *xmlStr = xmlNodeGetContent(rootNode);
100     if (xmlStr == nullptr) {
101         PASTEBOARD_HILOGE(PASTEBOARD_MODULE_SERVICE, "Parse html failed! xmlStr nullptr.");
102         xmlFreeDoc(doc);
103         return "";
104     }
105     std::string result(reinterpret_cast<const char *>(xmlStr));
106     xmlFree(xmlStr);
107     xmlFreeDoc(doc);
108     return result;
109 }
110 } // namespace OHOS::MiscServices