1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "positive_rule.h"
16 #include "i18n_hilog.h"
17 #include "phonenumbers/phonenumberutil.h"
18 #include "phonenumbers/phonenumber.h"
19 #include "phonenumbers/shortnumberinfo.h"
20
21 namespace OHOS {
22 namespace Global {
23 namespace I18n {
24 using i18n::phonenumbers::PhoneNumber;
25 using i18n::phonenumbers::PhoneNumberUtil;
26 using i18n::phonenumbers::ShortNumberInfo;
PositiveRule(icu::UnicodeString & regex,std::string & handleType,std::string & insensitive)27 PositiveRule::PositiveRule(icu::UnicodeString& regex, std::string& handleType, std::string& insensitive)
28 {
29 this->regex = regex;
30 this->status = U_ZERO_ERROR;
31 this->handleType = handleType;
32 this->insensitive = insensitive;
33 if (regex.length() == 0) {
34 return;
35 }
36 }
37
GetPattern()38 icu::RegexPattern* PositiveRule::GetPattern()
39 {
40 // Sets whether regular expression matching is case sensitive
41 if (insensitive == "True") {
42 return icu::RegexPattern::compile(this->regex, URegexpFlag::UREGEX_CASE_INSENSITIVE, this->status);
43 } else {
44 return icu::RegexPattern::compile(this->regex, 0, this->status);
45 }
46 }
47
48 // check whether the bracket at the start position are redundant
IsNumberWithOneBracket(icu::UnicodeString & message)49 bool PositiveRule::IsNumberWithOneBracket(icu::UnicodeString& message)
50 {
51 if (message != "") {
52 int numLeft = 0;
53 int numRight = 0;
54 int len = message.length();
55 for (int i = 0; i < len; i++) {
56 if (message[i] == '(' || message[i] == '[') {
57 numLeft++;
58 }
59 if (message[i] == ')' || message[i] == ']') {
60 numRight++;
61 }
62 }
63 if (numLeft > numRight && (message[0] == '(' || message[0] == '[')) {
64 return true;
65 }
66 }
67 return false;
68 }
69
DealStringWithOneBracket(icu::UnicodeString & message)70 icu::UnicodeString PositiveRule::DealStringWithOneBracket(icu::UnicodeString& message)
71 {
72 if (IsNumberWithOneBracket(message)) {
73 return message.tempSubString(1);
74 }
75 return message;
76 }
77
Handle(PhoneNumberMatch * match,icu::UnicodeString & message)78 std::vector<MatchedNumberInfo> PositiveRule::Handle(PhoneNumberMatch* match, icu::UnicodeString& message)
79 {
80 icu::UnicodeString rawString = match->raw_string().c_str();
81 icu::UnicodeString str = DealStringWithOneBracket(rawString);
82 icu::RegexPattern* pattern = this->GetPattern();
83 if (pattern == nullptr) {
84 return {};
85 }
86 UErrorCode status = U_ZERO_ERROR;
87 icu::RegexMatcher* mat1 = pattern->matcher(str, status);
88 if (mat1 != nullptr && mat1->find(status)) {
89 std::vector<MatchedNumberInfo> infoList = this->HandleInner(match, message);
90 delete mat1;
91 delete pattern;
92 return infoList;
93 }
94 delete mat1;
95 icu::RegexMatcher* mat2 = pattern->matcher(message, status);
96 if (mat2 != nullptr && mat2->find(status)) {
97 std::vector<MatchedNumberInfo> infoList = this->HandleInner(match, message);
98 delete mat2;
99 delete pattern;
100 return infoList;
101 }
102 delete mat2;
103 delete pattern;
104 return {};
105 }
106
HandleInner(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)107 std::vector<MatchedNumberInfo> PositiveRule::HandleInner(PhoneNumberMatch *possibleNumber, icu::UnicodeString& message)
108 {
109 if (handleType == "Operator") {
110 return HandleOperator(possibleNumber, message);
111 } else if (handleType == "Blank") {
112 return HandleBlank(possibleNumber, message);
113 } else if (handleType == "Slant") {
114 return HandleSlant(possibleNumber, message);
115 } else if (handleType == "StartWithMobile") {
116 return HandleStartWithMobile(possibleNumber, message);
117 } else if (handleType == "EndWithMobile") {
118 return HandleEndWithMobile(possibleNumber, message);
119 }
120 return HandleDefault(possibleNumber, message);
121 }
122
HandleDefault(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)123 std::vector<MatchedNumberInfo> PositiveRule::HandleDefault(PhoneNumberMatch* possibleNumber,
124 icu::UnicodeString& message)
125 {
126 MatchedNumberInfo matcher;
127 matcher.SetBegin(0);
128 matcher.SetEnd(1);
129 icu::UnicodeString content = "";
130 matcher.SetContent(content);
131 std::vector<MatchedNumberInfo> matchedNumberInfoList;
132 matchedNumberInfoList.push_back(matcher);
133 return matchedNumberInfoList;
134 }
135
HandleOperator(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)136 std::vector<MatchedNumberInfo> PositiveRule::HandleOperator(PhoneNumberMatch* possibleNumber,
137 icu::UnicodeString& message)
138 {
139 MatchedNumberInfo matcher;
140 if (possibleNumber->raw_string()[0] == '(' || possibleNumber->raw_string()[0] == '[') {
141 matcher.SetBegin(possibleNumber->start() + 1);
142 } else {
143 matcher.SetBegin(possibleNumber->start());
144 }
145 matcher.SetEnd(possibleNumber->end());
146 matcher.SetContent(message);
147 std::vector<MatchedNumberInfo> matchedNumberInfoList;
148 matchedNumberInfoList.push_back(matcher);
149 return matchedNumberInfoList;
150 }
151
HandleBlank(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)152 std::vector<MatchedNumberInfo> PositiveRule::HandleBlank(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
153 {
154 std::vector<MatchedNumberInfo> matchedNumberInfoList;
155 // exclude phone number 5201314
156 icu::UnicodeString speString = "5201314";
157 MatchedNumberInfo matchedNumberInfo;
158 icu::UnicodeString number = possibleNumber->raw_string().c_str();
159 icu::RegexPattern* pattern = GetPattern();
160 if (pattern == nullptr) {
161 return matchedNumberInfoList;
162 }
163 UErrorCode status;
164 icu::RegexMatcher* matcher = pattern->matcher(number, status);
165 UErrorCode negativeStatus = U_ZERO_ERROR;
166 // exclude phone number 2333333
167 icu::UnicodeString negativeRegex = "(?<![-\\d])(23{6,7})(?![-\\d])";
168 icu::RegexMatcher negativePattern(negativeRegex, 0, negativeStatus);
169 negativePattern.reset(number);
170 if (matcher != nullptr && matcher->find()) {
171 if (negativePattern.find() || number == speString) {
172 delete matcher;
173 delete pattern;
174 return matchedNumberInfoList;
175 }
176 if (possibleNumber->raw_string()[0] != '(' && possibleNumber->raw_string()[0] != '[') {
177 matchedNumberInfo.SetBegin(matcher->start(status) + possibleNumber->start());
178 } else {
179 matchedNumberInfo.SetBegin(possibleNumber->start());
180 }
181 matchedNumberInfo.SetEnd(matcher->end(status) + possibleNumber->start());
182 matchedNumberInfo.SetContent(number);
183 matchedNumberInfoList.push_back(matchedNumberInfo);
184 }
185 delete matcher;
186 delete pattern;
187 return matchedNumberInfoList;
188 }
189
HandleSlant(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)190 std::vector<MatchedNumberInfo> PositiveRule::HandleSlant(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
191 {
192 std::vector<MatchedNumberInfo> matchedNumberInfoList;
193 MatchedNumberInfo matchedNumberInfo;
194 MatchedNumberInfo numberInfo;
195 icu::UnicodeString number = possibleNumber->raw_string().c_str();
196 icu::RegexPattern* pattern = GetPattern();
197 if (pattern == nullptr) {
198 return matchedNumberInfoList;
199 }
200 UErrorCode status;
201 icu::RegexMatcher* matcher = pattern->matcher(number, status);
202 if (matcher != nullptr && matcher->find()) {
203 int start = matcher->start(status);
204 std::vector<MatchedNumberInfo> tempList = GetNumbersWithSlant(number);
205 // 2 is the size of tempList.
206 if (tempList.size() == 2 && start == 1) {
207 start = 0;
208 }
209 if (tempList.size() > 0) {
210 matchedNumberInfo.SetBegin(tempList[0].GetBegin() + start + possibleNumber->start());
211 matchedNumberInfo.SetEnd(tempList[0].GetEnd() + possibleNumber->start());
212 icu::UnicodeString contentFirst = tempList[0].GetContent();
213 matchedNumberInfo.SetContent(contentFirst);
214 matchedNumberInfoList.push_back(matchedNumberInfo);
215 // 2 is the size of tempList.
216 if (tempList.size() == 2) {
217 numberInfo.SetBegin(tempList[1].GetBegin() + start + possibleNumber->start());
218 numberInfo.SetEnd(tempList[1].GetEnd() + possibleNumber->start());
219 icu::UnicodeString contentSecond = tempList[1].GetContent();
220 numberInfo.SetContent(contentSecond);
221 matchedNumberInfoList.push_back(numberInfo);
222 }
223 }
224 }
225 delete matcher;
226 delete pattern;
227 return matchedNumberInfoList;
228 }
229
HandleStartWithMobile(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)230 std::vector<MatchedNumberInfo> PositiveRule::HandleStartWithMobile(PhoneNumberMatch* possibleNumber,
231 icu::UnicodeString& message)
232 {
233 return HandlePossibleNumberWithPattern(possibleNumber, message, false);
234 }
235
HandleEndWithMobile(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)236 std::vector<MatchedNumberInfo> PositiveRule::HandleEndWithMobile(PhoneNumberMatch* possibleNumber,
237 icu::UnicodeString& message)
238 {
239 return HandlePossibleNumberWithPattern(possibleNumber, message, true);
240 }
241
242 // identify short number separated by '/'
GetNumbersWithSlant(icu::UnicodeString & testStr)243 std::vector<MatchedNumberInfo> PositiveRule::GetNumbersWithSlant(icu::UnicodeString& testStr)
244 {
245 std::vector<MatchedNumberInfo> shortList;
246 PhoneNumberUtil* pnu = PhoneNumberUtil::GetInstance();
247 ShortNumberInfo* shortInfo = new (std::nothrow) ShortNumberInfo();
248 if (shortInfo == nullptr) {
249 HILOG_ERROR_I18N("ShortNumberInfo construct failed.");
250 return shortList;
251 }
252 std::string numberFisrt = "";
253 std::string numberEnd = "";
254 int slantIndex = 0;
255 for (int i = 0; i < testStr.length(); i++) {
256 if (testStr[i] == '/' || testStr[i] == '|') {
257 slantIndex = i;
258 testStr.tempSubString(0, i).toUTF8String(numberFisrt);
259 testStr.tempSubString(i + 1).toUTF8String(numberEnd);
260 }
261 }
262 PhoneNumber phoneNumberFirst;
263 PhoneNumber phoneNumberEnd;
264 pnu->Parse(numberFisrt, "CN", &phoneNumberFirst);
265 pnu->Parse(numberEnd, "CN", &phoneNumberEnd);
266 if (shortInfo->IsValidShortNumber(phoneNumberFirst)) {
267 MatchedNumberInfo matchedNumberInfoFirst;
268 matchedNumberInfoFirst.SetBegin(0);
269 matchedNumberInfoFirst.SetEnd(slantIndex);
270 icu::UnicodeString contentFirst = numberFisrt.c_str();
271 matchedNumberInfoFirst.SetContent(contentFirst);
272 shortList.push_back(matchedNumberInfoFirst);
273 }
274 if (shortInfo->IsValidShortNumber(phoneNumberEnd)) {
275 MatchedNumberInfo matchedNumberInfoEnd;
276 matchedNumberInfoEnd.SetBegin(slantIndex + 1);
277 matchedNumberInfoEnd.SetEnd(testStr.length());
278 icu::UnicodeString contentEnd = numberEnd.c_str();
279 matchedNumberInfoEnd.SetContent(contentEnd);
280 shortList.push_back(matchedNumberInfoEnd);
281 }
282 delete shortInfo;
283 return shortList;
284 }
285
HandlePossibleNumberWithPattern(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message,bool isStartsWithNumber)286 std::vector<MatchedNumberInfo> PositiveRule::HandlePossibleNumberWithPattern(PhoneNumberMatch* possibleNumber,
287 icu::UnicodeString& message, bool isStartsWithNumber)
288 {
289 UErrorCode status = U_ZERO_ERROR;
290 std::vector<MatchedNumberInfo> matchedList;
291 icu::UnicodeString possible = possibleNumber->raw_string().c_str();
292 icu::RegexPattern* pattern = GetPattern();
293 if (pattern == nullptr) {
294 HILOG_ERROR_I18N("RegexPattern is nullptr.");
295 return matchedList;
296 }
297 icu::RegexMatcher* mat = pattern->matcher(message, status);
298 while (mat != nullptr && mat->find(status)) {
299 int start = mat->start(status);
300 int end = mat->end(status);
301 icu::UnicodeString matched = message.tempSubString(start, end - start);
302 bool isMatch = isStartsWithNumber ? matched.startsWith(possible) : matched.endsWith(possible);
303 if (isMatch) {
304 MatchedNumberInfo info;
305 info.SetBegin(isStartsWithNumber ? start : end - possible.length());
306 info.SetEnd(isStartsWithNumber ? (start + possible.length()) : end);
307 info.SetContent(possible);
308 matchedList.push_back(info);
309 }
310 }
311 delete mat;
312 delete pattern;
313 return matchedList;
314 }
315 } // namespace I18n
316 } // namespace Global
317 } // namespace OHOS