1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "regex_rule.h"
16 #include "i18n_hilog.h"
17 #include "phonenumbers/phonenumberutil.h"
18 #include "phonenumbers/phonenumber.h"
19 #include "phonenumbers/shortnumberinfo.h"
20
21 namespace OHOS {
22 namespace Global {
23 namespace I18n {
24 using i18n::phonenumbers::PhoneNumberMatch;
25 using i18n::phonenumbers::PhoneNumber;
26 using i18n::phonenumbers::PhoneNumberUtil;
27 using i18n::phonenumbers::ShortNumberInfo;
28
RegexRule(icu::UnicodeString & regex,std::string & isValidType,std::string & handleType,std::string & insensitive,std::string & type)29 RegexRule::RegexRule(icu::UnicodeString& regex, std::string& isValidType, std::string& handleType,
30 std::string& insensitive, std::string& type)
31 {
32 this->regex = regex;
33 if (type == "CONTAIN") {
34 // 9 indicates a certain execution logic of the border rule.
35 this->type = 9;
36 } else if (type == "CONTAIN_OR_INTERSECT") {
37 // 8 indicates a certain execution logic of the border rule.
38 this->type = 8;
39 } else {
40 this->type = 0;
41 }
42 this->isValidType = isValidType;
43 this->handleType = handleType;
44 this->insensitive = insensitive;
45 }
46
~RegexRule()47 RegexRule::~RegexRule()
48 {
49 }
50
CountDigits(icu::UnicodeString & str)51 int RegexRule::CountDigits(icu::UnicodeString& str)
52 {
53 int count = 0;
54 int len = str.length();
55 for (int i = 0; i < len; i++) {
56 if (u_isdigit(str[i])) {
57 count++;
58 }
59 }
60 return count;
61 }
62
GetType()63 int RegexRule::GetType()
64 {
65 return type;
66 }
67
GetPattern()68 icu::RegexPattern* RegexRule::GetPattern()
69 {
70 UErrorCode status = U_ZERO_ERROR;
71 icu::RegexPattern* pattern;
72 // Sets whether regular expression matching is case sensitive
73 if (insensitive == "True") {
74 pattern = icu::RegexPattern::compile(this->regex, URegexpFlag::UREGEX_CASE_INSENSITIVE, status);
75 } else {
76 pattern = icu::RegexPattern::compile(this->regex, 0, status);
77 }
78 if (U_FAILURE(status)) {
79 HILOG_ERROR_I18N("RegexRule::GetPattern: Compile regex pattern failed.");
80 return nullptr;
81 }
82 return pattern;
83 }
84
IsValid(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)85 PhoneNumberMatch* RegexRule::IsValid(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
86 {
87 if (isValidType == "PreSuf") {
88 return IsValidPreSuf(possibleNumber, message);
89 } else if (isValidType == "Code") {
90 return IsValidCode(possibleNumber, message);
91 } else if (isValidType == "Rawstr") {
92 return IsValidRawstr(possibleNumber, message);
93 }
94 return IsValidDefault(possibleNumber, message);
95 }
96
97 // Check the preifx or suffix of possibleNumber
IsValidPreSuf(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)98 PhoneNumberMatch* RegexRule::IsValidPreSuf(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
99 {
100 if (possibleNumber != nullptr) {
101 if (possibleNumber->start() - 1 >= 0) {
102 return IsValidStart(possibleNumber, message);
103 }
104 if (possibleNumber->end() <= message.length() - 1) {
105 return IsValidEnd(possibleNumber, message);
106 }
107 }
108 return possibleNumber;
109 }
110
111 // check the suffix of possibleNumber
IsValidEnd(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)112 PhoneNumberMatch* RegexRule::IsValidEnd(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
113 {
114 if (possibleNumber == nullptr) {
115 return possibleNumber;
116 }
117 icu::UnicodeString after = message.tempSubString(possibleNumber->end());
118 bool isTwo = true;
119 int len = after.length();
120 // check the 1st and 2nd char of the suffix.
121 for (int i = 0; i < len; i++) {
122 UChar32 afterChar = after[i];
123 if (i == 0 && !u_isUUppercase(afterChar)) {
124 isTwo = false;
125 break;
126 }
127 // 2 is the third position in the string.
128 if (i < 2 && u_isUAlphabetic(afterChar)) {
129 if (u_isUUppercase(afterChar)) {
130 continue;
131 } else {
132 isTwo = false;
133 break;
134 }
135 }
136 // 1 and 2 are the second and third position in the string, respectively.
137 if (i == 1 || i == 2) {
138 if (afterChar == '-' || afterChar == '\'') {
139 isTwo = false;
140 break;
141 } else if (u_isdigit(afterChar) || u_isspace(afterChar)) {
142 break;
143 } else if (!u_isUAlphabetic(afterChar)) {
144 break;
145 } else {
146 isTwo = false;
147 break;
148 }
149 }
150 }
151 return isTwo ? nullptr : possibleNumber;
152 }
153
154 // check the prefix of possibleNumber
IsValidStart(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)155 PhoneNumberMatch* RegexRule::IsValidStart(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
156 {
157 if (possibleNumber == nullptr) {
158 return possibleNumber;
159 }
160 icu::UnicodeString before = message.tempSubString(0, possibleNumber->start());
161 bool isTwo = true;
162 int len = before.length();
163 for (int i = 0; i < len; i++) {
164 char beforeChar = before[len - 1 - i];
165 if (i == 0 && !u_isUUppercase(beforeChar)) {
166 isTwo = false;
167 break;
168 }
169 // 2 is the third position in the string.
170 if (i < 2 && u_isUAlphabetic(beforeChar)) {
171 if (u_isUUppercase(beforeChar)) {
172 continue;
173 } else {
174 isTwo = false;
175 break;
176 }
177 }
178 if (beforeChar == '-' || beforeChar == '\'') {
179 isTwo = false;
180 break;
181 } else if (u_isdigit(beforeChar) || u_isspace(beforeChar)) {
182 break;
183 } else if (!u_isUAlphabetic(beforeChar)) {
184 break;
185 } else {
186 isTwo = false;
187 break;
188 }
189 }
190 return isTwo ? nullptr : possibleNumber;
191 }
192
IsValidDefault(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)193 PhoneNumberMatch* RegexRule::IsValidDefault(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
194 {
195 return possibleNumber;
196 }
197
PrefixValid(icu::UnicodeString & number,int length)198 bool RegexRule::PrefixValid(icu::UnicodeString& number, int length)
199 {
200 icu::UnicodeString preNumber = number.tempSubString(0, length);
201 if (length == 1) {
202 if (number[0] == '0' || number[0] == '1' || number[0] == '+') {
203 return true;
204 }
205 // 3 indicates the first three digits of a phone number.
206 } else if (length == 3) {
207 if (preNumber == "400" || preNumber == "800") {
208 return true;
209 }
210 // 5 indicates the first five digits of a phone number.
211 } else if (length == 5) {
212 if (preNumber == "11808" || preNumber == "17909" || preNumber == "12593" ||
213 preNumber == "17951" || preNumber == "17911") {
214 return true;
215 }
216 }
217 return false;
218 }
219
NumberValid(icu::UnicodeString & number)220 bool RegexRule::NumberValid(icu::UnicodeString& number)
221 {
222 int lengthOne = 1;
223 // 3 indicates the first three digits of a phone number.
224 int lengthThree = 3;
225 // 11 is the number of digits in the phone number.
226 if (number[0] == '1' && CountDigits(number) > 11) {
227 // 5 indicates the first five digits of a phone number.
228 int lengthFive = 5;
229 if (!PrefixValid(number, lengthFive)) {
230 return false;
231 }
232 // 12 is the number of digits, 0 and 1 indicate the first and second position, respectively.
233 } else if (number[0] == '0' && CountDigits(number) > 12 && number[1] != '0') {
234 return false;
235 // 10 is the number of digits in the phone number.
236 } else if (PrefixValid(number, lengthThree) && CountDigits(number) != 10) {
237 return false;
238 // 9 is the number of digits in the phone number.
239 } else if (!PrefixValid(number, lengthOne) && !PrefixValid(number, lengthThree) && CountDigits(number) >= 9) {
240 if (number.trim()[0] != '9' && number.trim()[0] != '1') {
241 return false;
242 }
243 // 4 is the number of digits in the phone number.
244 } else if (CountDigits(number) <= 4) {
245 return false;
246 }
247 return true;
248 }
249
IsValidCode(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)250 PhoneNumberMatch* RegexRule::IsValidCode(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
251 {
252 if (possibleNumber == nullptr) {
253 return possibleNumber;
254 }
255 icu::UnicodeString number = possibleNumber->raw_string().c_str();
256 // Processes the ;ext= extention number format
257 int32_t ind = number.trim().indexOf(";ext=");
258 if (ind != -1) {
259 number = number.trim().tempSubString(0, ind);
260 }
261 if (number[0] == '(' || number[0] == '[') {
262 StartWithBrackets(number);
263 }
264 if (!NumberValid(number)) {
265 return nullptr;
266 }
267 return possibleNumber;
268 }
269
IsValidRawstr(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)270 PhoneNumberMatch* RegexRule::IsValidRawstr(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
271 {
272 icu::UnicodeString number = possibleNumber->raw_string().c_str();
273 // Processes the ;ext= extention number format
274 int32_t ind = number.trim().indexOf(";ext=");
275 if (ind != -1) {
276 number = number.trim().tempSubString(0, ind);
277 }
278 if (number[0] == '(' || number[0] == '[') {
279 number = number.tempSubString(1);
280 }
281 // 8 and 4 is the number of digits in the phone number.
282 if ((number[0] != '0' && CountDigits(number) == 8) || CountDigits(number) <= 4) {
283 return nullptr;
284 }
285 return possibleNumber;
286 }
287
Handle(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)288 std::vector<MatchedNumberInfo> RegexRule::Handle(PhoneNumberMatch *possibleNumber, icu::UnicodeString& message)
289 {
290 if (handleType == "Operator") {
291 return HandleOperator(possibleNumber, message);
292 } else if (handleType == "Blank") {
293 return HandleBlank(possibleNumber, message);
294 } else if (handleType == "Slant") {
295 return HandleSlant(possibleNumber, message);
296 } else if (handleType == "StartWithMobile") {
297 return HandleStartWithMobile(possibleNumber, message);
298 } else if (handleType == "EndWithMobile") {
299 return HandleEndWithMobile(possibleNumber, message);
300 }
301 return HandleDefault(possibleNumber, message);
302 }
303
HandleDefault(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)304 std::vector<MatchedNumberInfo> RegexRule::HandleDefault(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
305 {
306 MatchedNumberInfo matcher;
307 matcher.SetBegin(0);
308 matcher.SetEnd(1);
309 icu::UnicodeString content = "";
310 matcher.SetContent(content);
311 std::vector<MatchedNumberInfo> matchedNumberInfoList;
312 matchedNumberInfoList.push_back(matcher);
313 return matchedNumberInfoList;
314 }
315
HandleOperator(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)316 std::vector<MatchedNumberInfo> RegexRule::HandleOperator(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
317 {
318 std::vector<MatchedNumberInfo> matchedNumberInfoList;
319 if (possibleNumber == nullptr) {
320 return matchedNumberInfoList;
321 }
322 MatchedNumberInfo matcher;
323 if (possibleNumber->raw_string()[0] == '(' || possibleNumber->raw_string()[0] == '[') {
324 matcher.SetBegin(possibleNumber->start() + 1);
325 } else {
326 matcher.SetBegin(possibleNumber->start());
327 }
328 matcher.SetEnd(possibleNumber->end());
329 matcher.SetContent(message);
330 matchedNumberInfoList.push_back(matcher);
331 return matchedNumberInfoList;
332 }
333
HandleBlank(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)334 std::vector<MatchedNumberInfo> RegexRule::HandleBlank(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
335 {
336 std::vector<MatchedNumberInfo> matchedNumberInfoList;
337 if (possibleNumber == nullptr) {
338 return matchedNumberInfoList;
339 }
340 icu::RegexPattern* pattern = GetPattern();
341 if (pattern == nullptr) {
342 return matchedNumberInfoList;
343 }
344 UErrorCode status = U_ZERO_ERROR;
345 icu::UnicodeString number = possibleNumber->raw_string().c_str();
346 icu::RegexMatcher* matcher = pattern->matcher(number, status);
347 if (U_FAILURE(status) || matcher == nullptr) {
348 HILOG_ERROR_I18N("RegexRule::HandleBlank: Pattern match failed.");
349 return matchedNumberInfoList;
350 }
351 // exclude phone number 2333333
352 icu::UnicodeString negativeRegex = "(?<![-\\d])(23{6,7})(?![-\\d])";
353 icu::RegexMatcher negativePattern(negativeRegex, 0, status);
354 if (U_FAILURE(status)) {
355 delete matcher;
356 delete pattern;
357 return matchedNumberInfoList;
358 }
359 negativePattern.reset(number);
360 if (matcher->find()) {
361 // exclude phone number 5201314
362 icu::UnicodeString speString = "5201314";
363 if (negativePattern.find() || number == speString) {
364 delete matcher;
365 delete pattern;
366 return matchedNumberInfoList;
367 }
368 MatchedNumberInfo matchedNumberInfo;
369 if (possibleNumber->raw_string()[0] != '(' && possibleNumber->raw_string()[0] != '[') {
370 matchedNumberInfo.SetBegin(matcher->start(status) + possibleNumber->start());
371 } else {
372 matchedNumberInfo.SetBegin(possibleNumber->start());
373 }
374 matchedNumberInfo.SetEnd(matcher->end(status) + possibleNumber->start());
375 matchedNumberInfo.SetContent(number);
376 matchedNumberInfoList.push_back(matchedNumberInfo);
377 }
378 delete matcher;
379 delete pattern;
380 return matchedNumberInfoList;
381 }
382
HandleSlant(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)383 std::vector<MatchedNumberInfo> RegexRule::HandleSlant(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
384 {
385 std::vector<MatchedNumberInfo> matchedNumberInfoList;
386 if (possibleNumber == nullptr) {
387 return matchedNumberInfoList;
388 }
389 icu::RegexPattern* pattern = GetPattern();
390 if (pattern == nullptr) {
391 HILOG_ERROR_I18N("RegexRule::HandleSlant: pattern is nullptr.");
392 return matchedNumberInfoList;
393 }
394 UErrorCode status = U_ZERO_ERROR;
395 icu::UnicodeString number = possibleNumber->raw_string().c_str();
396 icu::RegexMatcher* matcher = pattern->matcher(number, status);
397 if (U_FAILURE(status) || matcher == nullptr) {
398 HILOG_ERROR_I18N("RegexRule::HandleSlant: Pattern match failed.");
399 return matchedNumberInfoList;
400 }
401 if (matcher->find()) {
402 int start = matcher->start(status);
403 std::vector<MatchedNumberInfo> tempList = GetNumbersWithSlant(number);
404 // 2 is the size of tempList.
405 if (tempList.size() == 2 && start == 1) {
406 start = 0;
407 }
408 if (tempList.size() > 0) {
409 MatchedNumberInfo matchedNumberInfo;
410 matchedNumberInfo.SetBegin(tempList[0].GetBegin() + start + possibleNumber->start());
411 matchedNumberInfo.SetEnd(tempList[0].GetEnd() + possibleNumber->start());
412 icu::UnicodeString contentFirst = tempList[0].GetContent();
413 matchedNumberInfo.SetContent(contentFirst);
414 matchedNumberInfoList.push_back(matchedNumberInfo);
415 // 2 is the size of tempList.
416 if (tempList.size() == 2) {
417 MatchedNumberInfo numberInfo;
418 numberInfo.SetBegin(tempList[1].GetBegin() + start + possibleNumber->start());
419 numberInfo.SetEnd(tempList[1].GetEnd() + possibleNumber->start());
420 icu::UnicodeString contentSecond = tempList[1].GetContent();
421 numberInfo.SetContent(contentSecond);
422 matchedNumberInfoList.push_back(numberInfo);
423 }
424 }
425 }
426 delete matcher;
427 delete pattern;
428 return matchedNumberInfoList;
429 }
430
HandleStartWithMobile(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)431 std::vector<MatchedNumberInfo> RegexRule::HandleStartWithMobile(PhoneNumberMatch* possibleNumber,
432 icu::UnicodeString& message)
433 {
434 return HandlePossibleNumberWithPattern(possibleNumber, message, false);
435 }
436
HandleEndWithMobile(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)437 std::vector<MatchedNumberInfo> RegexRule::HandleEndWithMobile(PhoneNumberMatch* possibleNumber,
438 icu::UnicodeString& message)
439 {
440 return HandlePossibleNumberWithPattern(possibleNumber, message, true);
441 }
442
443 // Handle phone number starting with '(' or '['
StartWithBrackets(icu::UnicodeString & number)444 void RegexRule::StartWithBrackets(icu::UnicodeString& number)
445 {
446 icu::UnicodeString right = "";
447 if (number[0] == '(') {
448 right = ')';
449 }
450 if (number[0] == '[') {
451 right = ']';
452 }
453 int neind = number.indexOf(right);
454 if (neind != -1) {
455 icu::UnicodeString phoneStr = number.tempSubString(0, neind);
456 int phoneLength = CountDigits(phoneStr);
457 icu::UnicodeString extraStr = number.tempSubString(neind);
458 int extra = CountDigits(extraStr);
459 // 4 is the number of numbers in parentheses, 1 and 2 are the number of numbers outside parentheses.
460 if ((phoneLength > 4) && (extra == 1 || extra == 2)) {
461 number = number.tempSubString(1, neind - 1);
462 } else {
463 number = number.tempSubString(1);
464 }
465 } else {
466 number = number.tempSubString(1);
467 }
468 }
469
470 // identify short number separated by '/'
GetNumbersWithSlant(icu::UnicodeString & testStr)471 std::vector<MatchedNumberInfo> RegexRule::GetNumbersWithSlant(icu::UnicodeString& testStr)
472 {
473 std::vector<MatchedNumberInfo> shortList;
474 ShortNumberInfo* shortInfo = new (std::nothrow) ShortNumberInfo();
475 if (shortInfo == nullptr) {
476 HILOG_ERROR_I18N("ShortNumberInfo construct failed.");
477 return shortList;
478 }
479 std::string numberFisrt = "";
480 std::string numberEnd = "";
481 int slantIndex = 0;
482 for (int i = 0; i < testStr.length(); i++) {
483 if (testStr[i] == '/' || testStr[i] == '|') {
484 slantIndex = i;
485 testStr.tempSubString(0, i).toUTF8String(numberFisrt);
486 testStr.tempSubString(i + 1).toUTF8String(numberEnd);
487 }
488 }
489 PhoneNumberUtil* pnu = PhoneNumberUtil::GetInstance();
490 if (pnu == nullptr) {
491 delete shortInfo;
492 HILOG_ERROR_I18N("RegexRule::GetNumbersWithSlant: Get phone number util failed.");
493 return shortList;
494 }
495 PhoneNumber phoneNumberFirst;
496 PhoneNumber phoneNumberEnd;
497 pnu->Parse(numberFisrt, "CN", &phoneNumberFirst);
498 pnu->Parse(numberEnd, "CN", &phoneNumberEnd);
499 if (shortInfo->IsValidShortNumber(phoneNumberFirst)) {
500 MatchedNumberInfo matchedNumberInfoFirst;
501 matchedNumberInfoFirst.SetBegin(0);
502 matchedNumberInfoFirst.SetEnd(slantIndex);
503 icu::UnicodeString contentFirst = numberFisrt.c_str();
504 matchedNumberInfoFirst.SetContent(contentFirst);
505 shortList.push_back(matchedNumberInfoFirst);
506 }
507 if (shortInfo->IsValidShortNumber(phoneNumberEnd)) {
508 MatchedNumberInfo matchedNumberInfoEnd;
509 matchedNumberInfoEnd.SetBegin(slantIndex + 1);
510 matchedNumberInfoEnd.SetEnd(testStr.length());
511 icu::UnicodeString contentEnd = numberEnd.c_str();
512 matchedNumberInfoEnd.SetContent(contentEnd);
513 shortList.push_back(matchedNumberInfoEnd);
514 }
515 delete shortInfo;
516 return shortList;
517 }
518
HandlePossibleNumberWithPattern(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message,bool isStartsWithNumber)519 std::vector<MatchedNumberInfo> RegexRule::HandlePossibleNumberWithPattern(PhoneNumberMatch* possibleNumber,
520 icu::UnicodeString& message, bool isStartsWithNumber)
521 {
522 std::vector<MatchedNumberInfo> matchedList;
523 if (possibleNumber == nullptr) {
524 return matchedList;
525 }
526 icu::RegexPattern* pattern = GetPattern();
527 if (pattern == nullptr) {
528 HILOG_ERROR_I18N("RegexPattern is nullptr.");
529 return matchedList;
530 }
531 UErrorCode status = U_ZERO_ERROR;
532 icu::RegexMatcher* mat = pattern->matcher(message, status);
533 if (U_FAILURE(status) || mat == nullptr) {
534 HILOG_ERROR_I18N("RegexRule::HandlePossibleNumberWithPattern: Pattern match failed.");
535 return matchedList;
536 }
537 icu::UnicodeString possible = possibleNumber->raw_string().c_str();
538 while (mat->find(status)) {
539 int start = mat->start(status);
540 int end = mat->end(status);
541 icu::UnicodeString matched = message.tempSubString(start, end - start);
542 bool isMatch = isStartsWithNumber ? matched.startsWith(possible) : matched.endsWith(possible);
543 if (isMatch) {
544 MatchedNumberInfo info;
545 info.SetBegin(isStartsWithNumber ? start : end - possible.length());
546 info.SetEnd(isStartsWithNumber ? (start + possible.length()) : end);
547 info.SetContent(possible);
548 matchedList.push_back(info);
549 }
550 }
551 delete mat;
552 delete pattern;
553 return matchedList;
554 }
555 } // namespace I18n
556 } // namespace Global
557 } // namespace OHOS