• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "regex_rule.h"
16 #include "i18n_hilog.h"
17 #include "phonenumbers/phonenumberutil.h"
18 #include "phonenumbers/phonenumber.h"
19 #include "phonenumbers/shortnumberinfo.h"
20 
21 namespace OHOS {
22 namespace Global {
23 namespace I18n {
24 using i18n::phonenumbers::PhoneNumberMatch;
25 using i18n::phonenumbers::PhoneNumber;
26 using i18n::phonenumbers::PhoneNumberUtil;
27 using i18n::phonenumbers::ShortNumberInfo;
28 
RegexRule(icu::UnicodeString & regex,std::string & isValidType,std::string & handleType,std::string & insensitive,std::string & type)29 RegexRule::RegexRule(icu::UnicodeString& regex, std::string& isValidType, std::string& handleType,
30     std::string& insensitive, std::string& type)
31 {
32     this->regex = regex;
33     if (type == "CONTAIN") {
34         // 9 indicates a certain execution logic of the border rule.
35         this->type = 9;
36     } else if (type == "CONTAIN_OR_INTERSECT") {
37         // 8 indicates a certain execution logic of the border rule.
38         this->type = 8;
39     } else {
40         this->type = 0;
41     }
42     this->isValidType = isValidType;
43     this->handleType = handleType;
44     this->insensitive = insensitive;
45 }
46 
~RegexRule()47 RegexRule::~RegexRule()
48 {
49 }
50 
CountDigits(icu::UnicodeString & str)51 int RegexRule::CountDigits(icu::UnicodeString& str)
52 {
53     int count = 0;
54     int len = str.length();
55     for (int i = 0; i < len; i++) {
56         if (u_isdigit(str[i])) {
57             count++;
58         }
59     }
60     return count;
61 }
62 
GetType()63 int RegexRule::GetType()
64 {
65     return type;
66 }
67 
GetPattern()68 icu::RegexPattern* RegexRule::GetPattern()
69 {
70     UErrorCode status = U_ZERO_ERROR;
71     icu::RegexPattern* pattern;
72     // Sets whether regular expression matching is case sensitive
73     if (insensitive == "True") {
74         pattern = icu::RegexPattern::compile(this->regex, URegexpFlag::UREGEX_CASE_INSENSITIVE, status);
75     } else {
76         pattern = icu::RegexPattern::compile(this->regex, 0, status);
77     }
78     if (U_FAILURE(status)) {
79         HILOG_ERROR_I18N("RegexRule::GetPattern: Compile regex pattern failed.");
80         return nullptr;
81     }
82     return pattern;
83 }
84 
IsValid(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)85 PhoneNumberMatch* RegexRule::IsValid(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
86 {
87     if (isValidType == "PreSuf") {
88         return IsValidPreSuf(possibleNumber, message);
89     } else if (isValidType == "Code") {
90         return IsValidCode(possibleNumber, message);
91     } else if (isValidType == "Rawstr") {
92         return IsValidRawstr(possibleNumber, message);
93     }
94     return IsValidDefault(possibleNumber, message);
95 }
96 
97 // Check the preifx or suffix of possibleNumber
IsValidPreSuf(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)98 PhoneNumberMatch* RegexRule::IsValidPreSuf(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
99 {
100     if (possibleNumber != nullptr) {
101         if (possibleNumber->start() - 1 >= 0) {
102             return IsValidStart(possibleNumber, message);
103         }
104         if (possibleNumber->end() <= message.length() - 1) {
105             return IsValidEnd(possibleNumber, message);
106         }
107     }
108     return possibleNumber;
109 }
110 
111 // check the suffix of possibleNumber
IsValidEnd(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)112 PhoneNumberMatch* RegexRule::IsValidEnd(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
113 {
114     if (possibleNumber == nullptr) {
115         return possibleNumber;
116     }
117     icu::UnicodeString after = message.tempSubString(possibleNumber->end());
118     bool isTwo = true;
119     int len = after.length();
120     // check the 1st and 2nd char of the suffix.
121     for (int i = 0; i < len; i++) {
122         UChar32 afterChar = after[i];
123         if (i == 0 && !u_isUUppercase(afterChar)) {
124             isTwo = false;
125             break;
126         }
127         // 2 is the third position in the string.
128         if (i < 2 && u_isUAlphabetic(afterChar)) {
129             if (u_isUUppercase(afterChar)) {
130                 continue;
131             } else {
132                 isTwo = false;
133                 break;
134             }
135         }
136         // 1 and 2 are the second and third position in the string, respectively.
137         if (i == 1 || i == 2) {
138             if (afterChar == '-' || afterChar == '\'') {
139                 isTwo = false;
140                 break;
141             } else if (u_isdigit(afterChar) || u_isspace(afterChar)) {
142                 break;
143             } else if (!u_isUAlphabetic(afterChar)) {
144                 break;
145             } else {
146                 isTwo = false;
147                 break;
148             }
149         }
150     }
151     return isTwo ? nullptr : possibleNumber;
152 }
153 
154 // check the prefix of possibleNumber
IsValidStart(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)155 PhoneNumberMatch* RegexRule::IsValidStart(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
156 {
157     if (possibleNumber == nullptr) {
158         return possibleNumber;
159     }
160     icu::UnicodeString before = message.tempSubString(0, possibleNumber->start());
161     bool isTwo = true;
162     int len = before.length();
163     for (int i = 0; i < len; i++) {
164         char beforeChar = before[len - 1 - i];
165         if (i == 0 && !u_isUUppercase(beforeChar)) {
166             isTwo = false;
167             break;
168         }
169         // 2 is the third position in the string.
170         if (i < 2 && u_isUAlphabetic(beforeChar)) {
171             if (u_isUUppercase(beforeChar)) {
172                 continue;
173             } else {
174                 isTwo = false;
175                 break;
176             }
177         }
178         if (beforeChar == '-' || beforeChar == '\'') {
179             isTwo = false;
180             break;
181         } else if (u_isdigit(beforeChar) || u_isspace(beforeChar)) {
182             break;
183         } else if (!u_isUAlphabetic(beforeChar)) {
184             break;
185         } else {
186             isTwo = false;
187             break;
188         }
189     }
190     return isTwo ? nullptr : possibleNumber;
191 }
192 
IsValidDefault(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)193 PhoneNumberMatch* RegexRule::IsValidDefault(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
194 {
195     return possibleNumber;
196 }
197 
PrefixValid(icu::UnicodeString & number,int length)198 bool RegexRule::PrefixValid(icu::UnicodeString& number, int length)
199 {
200     icu::UnicodeString preNumber = number.tempSubString(0, length);
201     if (length == 1) {
202         if (number[0] == '0' || number[0] == '1' || number[0] == '+') {
203             return true;
204         }
205     // 3 indicates the first three digits of a phone number.
206     } else if (length == 3) {
207         if (preNumber == "400" || preNumber == "800") {
208             return true;
209         }
210     // 5 indicates the first five digits of a phone number.
211     } else if (length == 5) {
212         if (preNumber == "11808" || preNumber == "17909" || preNumber == "12593" ||
213             preNumber == "17951" || preNumber == "17911") {
214             return true;
215         }
216     }
217     return false;
218 }
219 
NumberValid(icu::UnicodeString & number)220 bool RegexRule::NumberValid(icu::UnicodeString& number)
221 {
222     int lengthOne = 1;
223     // 3 indicates the first three digits of a phone number.
224     int lengthThree = 3;
225     // 11 is the number of digits in the phone number.
226     if (number[0] == '1' && CountDigits(number) > 11) {
227         // 5 indicates the first five digits of a phone number.
228         int lengthFive = 5;
229         if (!PrefixValid(number, lengthFive)) {
230             return false;
231         }
232     // 12 is the number of digits, 0 and 1 indicate the first and second position, respectively.
233     } else if (number[0] == '0' && CountDigits(number) > 12 && number[1] != '0') {
234         return false;
235     // 10 is the number of digits in the phone number.
236     } else if (PrefixValid(number, lengthThree) && CountDigits(number) != 10) {
237         return false;
238     // 9 is the number of digits in the phone number.
239     } else if (!PrefixValid(number, lengthOne) && !PrefixValid(number, lengthThree) && CountDigits(number) >= 9) {
240         if (number.trim()[0] != '9' && number.trim()[0] != '1') {
241             return false;
242         }
243     // 4 is the number of digits in the phone number.
244     } else if (CountDigits(number) <= 4) {
245         return false;
246     }
247     return true;
248 }
249 
IsValidCode(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)250 PhoneNumberMatch* RegexRule::IsValidCode(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
251 {
252     if (possibleNumber == nullptr) {
253         return possibleNumber;
254     }
255     icu::UnicodeString number = possibleNumber->raw_string().c_str();
256     // Processes the ;ext= extention number format
257     int32_t ind = number.trim().indexOf(";ext=");
258     if (ind != -1) {
259         number = number.trim().tempSubString(0, ind);
260     }
261     if (number[0] == '(' || number[0] == '[') {
262         StartWithBrackets(number);
263     }
264     if (!NumberValid(number)) {
265         return nullptr;
266     }
267     return possibleNumber;
268 }
269 
IsValidRawstr(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)270 PhoneNumberMatch* RegexRule::IsValidRawstr(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
271 {
272     icu::UnicodeString number = possibleNumber->raw_string().c_str();
273     // Processes the ;ext= extention number format
274     int32_t ind = number.trim().indexOf(";ext=");
275     if (ind != -1) {
276         number = number.trim().tempSubString(0, ind);
277     }
278     if (number[0] == '(' || number[0] == '[') {
279         number = number.tempSubString(1);
280     }
281     // 8 and 4 is the number of digits in the phone number.
282     if ((number[0] != '0' && CountDigits(number) == 8) || CountDigits(number) <= 4) {
283         return nullptr;
284     }
285     return possibleNumber;
286 }
287 
Handle(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)288 std::vector<MatchedNumberInfo> RegexRule::Handle(PhoneNumberMatch *possibleNumber, icu::UnicodeString& message)
289 {
290     if (handleType == "Operator") {
291         return HandleOperator(possibleNumber, message);
292     } else if (handleType == "Blank") {
293         return HandleBlank(possibleNumber, message);
294     } else if (handleType == "Slant") {
295         return HandleSlant(possibleNumber, message);
296     } else if (handleType == "StartWithMobile") {
297         return HandleStartWithMobile(possibleNumber, message);
298     } else if (handleType == "EndWithMobile") {
299         return HandleEndWithMobile(possibleNumber, message);
300     }
301     return HandleDefault(possibleNumber, message);
302 }
303 
HandleDefault(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)304 std::vector<MatchedNumberInfo> RegexRule::HandleDefault(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
305 {
306     MatchedNumberInfo matcher;
307     matcher.SetBegin(0);
308     matcher.SetEnd(1);
309     icu::UnicodeString content = "";
310     matcher.SetContent(content);
311     std::vector<MatchedNumberInfo> matchedNumberInfoList;
312     matchedNumberInfoList.push_back(matcher);
313     return matchedNumberInfoList;
314 }
315 
HandleOperator(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)316 std::vector<MatchedNumberInfo> RegexRule::HandleOperator(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
317 {
318     std::vector<MatchedNumberInfo> matchedNumberInfoList;
319     if (possibleNumber == nullptr) {
320         return matchedNumberInfoList;
321     }
322     MatchedNumberInfo matcher;
323     if (possibleNumber->raw_string()[0] == '(' || possibleNumber->raw_string()[0] == '[') {
324         matcher.SetBegin(possibleNumber->start() + 1);
325     } else {
326         matcher.SetBegin(possibleNumber->start());
327     }
328     matcher.SetEnd(possibleNumber->end());
329     matcher.SetContent(message);
330     matchedNumberInfoList.push_back(matcher);
331     return matchedNumberInfoList;
332 }
333 
HandleBlank(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)334 std::vector<MatchedNumberInfo> RegexRule::HandleBlank(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
335 {
336     std::vector<MatchedNumberInfo> matchedNumberInfoList;
337     if (possibleNumber == nullptr) {
338         return matchedNumberInfoList;
339     }
340     icu::RegexPattern* pattern = GetPattern();
341     if (pattern == nullptr) {
342         return matchedNumberInfoList;
343     }
344     UErrorCode status = U_ZERO_ERROR;
345     icu::UnicodeString number = possibleNumber->raw_string().c_str();
346     icu::RegexMatcher* matcher = pattern->matcher(number, status);
347     if (U_FAILURE(status) || matcher == nullptr) {
348         HILOG_ERROR_I18N("RegexRule::HandleBlank: Pattern match failed.");
349         return matchedNumberInfoList;
350     }
351     // exclude phone number 2333333
352     icu::UnicodeString negativeRegex = "(?<![-\\d])(23{6,7})(?![-\\d])";
353     icu::RegexMatcher negativePattern(negativeRegex, 0, status);
354     if (U_FAILURE(status)) {
355         delete matcher;
356         delete pattern;
357         return matchedNumberInfoList;
358     }
359     negativePattern.reset(number);
360     if (matcher->find()) {
361         // exclude phone number 5201314
362         icu::UnicodeString speString = "5201314";
363         if (negativePattern.find() || number == speString) {
364             delete matcher;
365             delete pattern;
366             return matchedNumberInfoList;
367         }
368         MatchedNumberInfo matchedNumberInfo;
369         if (possibleNumber->raw_string()[0] != '(' && possibleNumber->raw_string()[0] != '[') {
370             matchedNumberInfo.SetBegin(matcher->start(status) + possibleNumber->start());
371         } else {
372             matchedNumberInfo.SetBegin(possibleNumber->start());
373         }
374         matchedNumberInfo.SetEnd(matcher->end(status) + possibleNumber->start());
375         matchedNumberInfo.SetContent(number);
376         matchedNumberInfoList.push_back(matchedNumberInfo);
377     }
378     delete matcher;
379     delete pattern;
380     return matchedNumberInfoList;
381 }
382 
HandleSlant(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)383 std::vector<MatchedNumberInfo> RegexRule::HandleSlant(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
384 {
385     std::vector<MatchedNumberInfo> matchedNumberInfoList;
386     if (possibleNumber == nullptr) {
387         return matchedNumberInfoList;
388     }
389     icu::RegexPattern* pattern = GetPattern();
390     if (pattern == nullptr) {
391         HILOG_ERROR_I18N("RegexRule::HandleSlant: pattern is nullptr.");
392         return matchedNumberInfoList;
393     }
394     UErrorCode status = U_ZERO_ERROR;
395     icu::UnicodeString number = possibleNumber->raw_string().c_str();
396     icu::RegexMatcher* matcher = pattern->matcher(number, status);
397     if (U_FAILURE(status) || matcher == nullptr) {
398         HILOG_ERROR_I18N("RegexRule::HandleSlant: Pattern match failed.");
399         return matchedNumberInfoList;
400     }
401     if (matcher->find()) {
402         int start = matcher->start(status);
403         std::vector<MatchedNumberInfo> tempList = GetNumbersWithSlant(number);
404         // 2 is the size of tempList.
405         if (tempList.size() == 2 && start == 1) {
406             start = 0;
407         }
408         if (tempList.size() > 0) {
409             MatchedNumberInfo matchedNumberInfo;
410             matchedNumberInfo.SetBegin(tempList[0].GetBegin() + start + possibleNumber->start());
411             matchedNumberInfo.SetEnd(tempList[0].GetEnd() + possibleNumber->start());
412             icu::UnicodeString contentFirst = tempList[0].GetContent();
413             matchedNumberInfo.SetContent(contentFirst);
414             matchedNumberInfoList.push_back(matchedNumberInfo);
415             // 2 is the size of tempList.
416             if (tempList.size() == 2) {
417                 MatchedNumberInfo numberInfo;
418                 numberInfo.SetBegin(tempList[1].GetBegin() + start + possibleNumber->start());
419                 numberInfo.SetEnd(tempList[1].GetEnd() + possibleNumber->start());
420                 icu::UnicodeString contentSecond = tempList[1].GetContent();
421                 numberInfo.SetContent(contentSecond);
422                 matchedNumberInfoList.push_back(numberInfo);
423             }
424         }
425     }
426     delete matcher;
427     delete pattern;
428     return matchedNumberInfoList;
429 }
430 
HandleStartWithMobile(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)431 std::vector<MatchedNumberInfo> RegexRule::HandleStartWithMobile(PhoneNumberMatch* possibleNumber,
432     icu::UnicodeString& message)
433 {
434     return HandlePossibleNumberWithPattern(possibleNumber, message, false);
435 }
436 
HandleEndWithMobile(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)437 std::vector<MatchedNumberInfo> RegexRule::HandleEndWithMobile(PhoneNumberMatch* possibleNumber,
438     icu::UnicodeString& message)
439 {
440     return HandlePossibleNumberWithPattern(possibleNumber, message, true);
441 }
442 
443 // Handle phone number starting with '(' or '['
StartWithBrackets(icu::UnicodeString & number)444 void RegexRule::StartWithBrackets(icu::UnicodeString& number)
445 {
446     icu::UnicodeString right = "";
447     if (number[0] == '(') {
448         right = ')';
449     }
450     if (number[0] == '[') {
451         right = ']';
452     }
453     int neind = number.indexOf(right);
454     if (neind != -1) {
455         icu::UnicodeString phoneStr = number.tempSubString(0, neind);
456         int phoneLength = CountDigits(phoneStr);
457         icu::UnicodeString extraStr = number.tempSubString(neind);
458         int extra = CountDigits(extraStr);
459         // 4 is the number of numbers in parentheses, 1 and 2 are the number of numbers outside parentheses.
460         if ((phoneLength > 4) && (extra == 1 || extra == 2)) {
461             number = number.tempSubString(1, neind - 1);
462         } else {
463             number = number.tempSubString(1);
464         }
465     } else {
466         number = number.tempSubString(1);
467     }
468 }
469 
470 // identify short number separated by '/'
GetNumbersWithSlant(icu::UnicodeString & testStr)471 std::vector<MatchedNumberInfo> RegexRule::GetNumbersWithSlant(icu::UnicodeString& testStr)
472 {
473     std::vector<MatchedNumberInfo> shortList;
474     ShortNumberInfo* shortInfo = new (std::nothrow) ShortNumberInfo();
475     if (shortInfo == nullptr) {
476         HILOG_ERROR_I18N("ShortNumberInfo construct failed.");
477         return shortList;
478     }
479     std::string numberFisrt = "";
480     std::string numberEnd = "";
481     int slantIndex = 0;
482     for (int i = 0; i < testStr.length(); i++) {
483         if (testStr[i] == '/' || testStr[i] == '|') {
484             slantIndex = i;
485             testStr.tempSubString(0, i).toUTF8String(numberFisrt);
486             testStr.tempSubString(i + 1).toUTF8String(numberEnd);
487         }
488     }
489     PhoneNumberUtil* pnu = PhoneNumberUtil::GetInstance();
490     if (pnu == nullptr) {
491         delete shortInfo;
492         HILOG_ERROR_I18N("RegexRule::GetNumbersWithSlant: Get phone number util failed.");
493         return shortList;
494     }
495     PhoneNumber phoneNumberFirst;
496     PhoneNumber phoneNumberEnd;
497     pnu->Parse(numberFisrt, "CN", &phoneNumberFirst);
498     pnu->Parse(numberEnd, "CN", &phoneNumberEnd);
499     if (shortInfo->IsValidShortNumber(phoneNumberFirst)) {
500         MatchedNumberInfo matchedNumberInfoFirst;
501         matchedNumberInfoFirst.SetBegin(0);
502         matchedNumberInfoFirst.SetEnd(slantIndex);
503         icu::UnicodeString contentFirst = numberFisrt.c_str();
504         matchedNumberInfoFirst.SetContent(contentFirst);
505         shortList.push_back(matchedNumberInfoFirst);
506     }
507     if (shortInfo->IsValidShortNumber(phoneNumberEnd)) {
508         MatchedNumberInfo matchedNumberInfoEnd;
509         matchedNumberInfoEnd.SetBegin(slantIndex + 1);
510         matchedNumberInfoEnd.SetEnd(testStr.length());
511         icu::UnicodeString contentEnd = numberEnd.c_str();
512         matchedNumberInfoEnd.SetContent(contentEnd);
513         shortList.push_back(matchedNumberInfoEnd);
514     }
515     delete shortInfo;
516     return shortList;
517 }
518 
HandlePossibleNumberWithPattern(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message,bool isStartsWithNumber)519 std::vector<MatchedNumberInfo> RegexRule::HandlePossibleNumberWithPattern(PhoneNumberMatch* possibleNumber,
520     icu::UnicodeString& message, bool isStartsWithNumber)
521 {
522     std::vector<MatchedNumberInfo> matchedList;
523     if (possibleNumber == nullptr) {
524         return matchedList;
525     }
526     icu::RegexPattern* pattern = GetPattern();
527     if (pattern == nullptr) {
528         HILOG_ERROR_I18N("RegexPattern is nullptr.");
529         return matchedList;
530     }
531     UErrorCode status = U_ZERO_ERROR;
532     icu::RegexMatcher* mat = pattern->matcher(message, status);
533     if (U_FAILURE(status) || mat == nullptr) {
534         HILOG_ERROR_I18N("RegexRule::HandlePossibleNumberWithPattern: Pattern match failed.");
535         return matchedList;
536     }
537     icu::UnicodeString possible = possibleNumber->raw_string().c_str();
538     while (mat->find(status)) {
539         int start = mat->start(status);
540         int end = mat->end(status);
541         icu::UnicodeString matched = message.tempSubString(start, end - start);
542         bool isMatch = isStartsWithNumber ? matched.startsWith(possible) : matched.endsWith(possible);
543         if (isMatch) {
544             MatchedNumberInfo info;
545             info.SetBegin(isStartsWithNumber ? start : end - possible.length());
546             info.SetEnd(isStartsWithNumber ? (start + possible.length()) : end);
547             info.SetContent(possible);
548             matchedList.push_back(info);
549         }
550     }
551     delete mat;
552     delete pattern;
553     return matchedList;
554 }
555 } // namespace I18n
556 } // namespace Global
557 } // namespace OHOS