• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "regex_rule.h"
16 #include "i18n_hilog.h"
17 #include "phonenumbers/phonenumberutil.h"
18 #include "phonenumbers/phonenumber.h"
19 #include "phonenumbers/shortnumberinfo.h"
20 
21 namespace OHOS {
22 namespace Global {
23 namespace I18n {
24 using i18n::phonenumbers::PhoneNumberMatch;
25 using i18n::phonenumbers::PhoneNumber;
26 using i18n::phonenumbers::PhoneNumberUtil;
27 using i18n::phonenumbers::ShortNumberInfo;
28 
RegexRule(icu::UnicodeString & regex,std::string & isValidType,std::string & handleType,std::string & insensitive,std::string & type)29 RegexRule::RegexRule(icu::UnicodeString& regex, std::string& isValidType, std::string& handleType,
30     std::string& insensitive, std::string& type)
31 {
32     this->regex = regex;
33     if (type == "CONTAIN") {
34         // 9 indicates a certain execution logic of the border rule.
35         this->type = 9;
36     } else if (type == "CONTAIN_OR_INTERSECT") {
37         // 8 indicates a certain execution logic of the border rule.
38         this->type = 8;
39     } else {
40         this->type = 0;
41     }
42     this->status = U_ZERO_ERROR;
43     this->isValidType = isValidType;
44     this->handleType = handleType;
45     this->insensitive = insensitive;
46     if (regex.length() == 0) {
47         return;
48     }
49 }
50 
~RegexRule()51 RegexRule::~RegexRule()
52 {
53 }
54 
CountDigits(icu::UnicodeString & str)55 int RegexRule::CountDigits(icu::UnicodeString& str)
56 {
57     int count = 0;
58     int len = str.length();
59     for (int i = 0; i < len; i++) {
60         if (u_isdigit(str[i])) {
61             count++;
62         }
63     }
64     return count;
65 }
66 
GetType()67 int RegexRule::GetType()
68 {
69     return type;
70 }
71 
GetPattern()72 icu::RegexPattern* RegexRule::GetPattern()
73 {
74     // Sets whether regular expression matching is case sensitive
75     if (insensitive == "True") {
76         return icu::RegexPattern::compile(this->regex, URegexpFlag::UREGEX_CASE_INSENSITIVE, this->status);
77     } else {
78         return icu::RegexPattern::compile(this->regex, 0, this->status);
79     }
80 }
81 
IsValid(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)82 PhoneNumberMatch* RegexRule::IsValid(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
83 {
84     if (isValidType == "PreSuf") {
85         return IsValidPreSuf(possibleNumber, message);
86     } else if (isValidType == "Code") {
87         return IsValidCode(possibleNumber, message);
88     } else if (isValidType == "Rawstr") {
89         return IsValidRawstr(possibleNumber, message);
90     }
91     return IsValidDefault(possibleNumber, message);
92 }
93 
94 // Check the preifx or suffix of possibleNumber
IsValidPreSuf(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)95 PhoneNumberMatch* RegexRule::IsValidPreSuf(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
96 {
97     if (possibleNumber != nullptr) {
98         if (possibleNumber->start() - 1 >= 0) {
99             return IsValidStart(possibleNumber, message);
100         }
101         if (possibleNumber->end() <= message.length() - 1) {
102             return IsValidEnd(possibleNumber, message);
103         }
104     }
105     return possibleNumber;
106 }
107 
108 // check the suffix of possibleNumber
IsValidEnd(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)109 PhoneNumberMatch* RegexRule::IsValidEnd(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
110 {
111     icu::UnicodeString after = message.tempSubString(possibleNumber->end());
112     bool isTwo = true;
113     int len = after.length();
114     // check the 1st and 2nd char of the suffix.
115     for (int i = 0; i < len; i++) {
116         UChar32 afterChar = after[i];
117         if (i == 0 && !u_isUUppercase(afterChar)) {
118             isTwo = false;
119             break;
120         }
121         // 2 is the third position in the string.
122         if (i < 2 && u_isUAlphabetic(afterChar)) {
123             if (u_isUUppercase(afterChar)) {
124                 continue;
125             } else {
126                 isTwo = false;
127                 break;
128             }
129         }
130         // 1 and 2 are the second and third position in the string, respectively.
131         if (i == 1 || i == 2) {
132             if (afterChar == '-' || afterChar == '\'') {
133                 isTwo = false;
134                 break;
135             } else if (u_isdigit(afterChar) || u_isspace(afterChar)) {
136                 break;
137             } else if (!u_isUAlphabetic(afterChar)) {
138                 break;
139             } else {
140                 isTwo = false;
141                 break;
142             }
143         }
144     }
145     return isTwo ? nullptr : possibleNumber;
146 }
147 
148 // check the prefix of possibleNumber
IsValidStart(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)149 PhoneNumberMatch* RegexRule::IsValidStart(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
150 {
151     icu::UnicodeString before = message.tempSubString(0, possibleNumber->start());
152     bool isTwo = true;
153     int len = before.length();
154     for (int i = 0; i < len; i++) {
155         char beforeChar = before[len - 1 - i];
156         if (i == 0 && !u_isUUppercase(beforeChar)) {
157             isTwo = false;
158             break;
159         }
160         // 2 is the third position in the string.
161         if (i < 2 && u_isUAlphabetic(beforeChar)) {
162             if (u_isUUppercase(beforeChar)) {
163                 continue;
164             } else {
165                 isTwo = false;
166                 break;
167             }
168         }
169         if (beforeChar == '-' || beforeChar == '\'') {
170             isTwo = false;
171             break;
172         } else if (u_isdigit(beforeChar) || u_isspace(beforeChar)) {
173             break;
174         } else if (!u_isUAlphabetic(beforeChar)) {
175             break;
176         } else {
177             isTwo = false;
178             break;
179         }
180     }
181     return isTwo ? nullptr : possibleNumber;
182 }
183 
IsValidDefault(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)184 PhoneNumberMatch* RegexRule::IsValidDefault(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
185 {
186     return possibleNumber;
187 }
188 
PrefixValid(icu::UnicodeString & number,int length)189 bool RegexRule::PrefixValid(icu::UnicodeString& number, int length)
190 {
191     icu::UnicodeString preNumber = number.tempSubString(0, length);
192     if (length == 1) {
193         if (number[0] == '0' || number[0] == '1' || number[0] == '+') {
194             return true;
195         }
196     // 3 indicates the first three digits of a phone number.
197     } else if (length == 3) {
198         if (preNumber == "400" || preNumber == "800") {
199             return true;
200         }
201     // 5 indicates the first five digits of a phone number.
202     } else if (length == 5) {
203         if (preNumber == "11808" || preNumber == "17909" || preNumber == "12593" ||
204             preNumber == "17951" || preNumber == "17911") {
205             return true;
206         }
207     }
208     return false;
209 }
210 
NumberValid(icu::UnicodeString & number)211 bool RegexRule::NumberValid(icu::UnicodeString& number)
212 {
213     int lengthOne = 1;
214     // 3 indicates the first three digits of a phone number.
215     int lengthThree = 3;
216     // 11 is the number of digits in the phone number.
217     if (number[0] == '1' && CountDigits(number) > 11) {
218         // 5 indicates the first five digits of a phone number.
219         int lengthFive = 5;
220         if (!PrefixValid(number, lengthFive)) {
221             return false;
222         }
223     // 12 is the number of digits, 0 and 1 indicate the first and second position, respectively.
224     } else if (number[0] == '0' && CountDigits(number) > 12 && number[1] != '0') {
225         return false;
226     // 10 is the number of digits in the phone number.
227     } else if (PrefixValid(number, lengthThree) && CountDigits(number) != 10) {
228         return false;
229     // 9 is the number of digits in the phone number.
230     } else if (!PrefixValid(number, lengthOne) && !PrefixValid(number, lengthThree) && CountDigits(number) >= 9) {
231         if (number.trim()[0] != '9' && number.trim()[0] != '1') {
232             return false;
233         }
234     // 4 is the number of digits in the phone number.
235     } else if (CountDigits(number) <= 4) {
236         return false;
237     }
238     return true;
239 }
240 
IsValidCode(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)241 PhoneNumberMatch* RegexRule::IsValidCode(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
242 {
243     bool isValid = true;
244     icu::UnicodeString number = possibleNumber->raw_string().c_str();
245     // Processes the ;ext= extention number format
246     int32_t ind = number.trim().indexOf(";ext=");
247     if (ind != -1) {
248         number = number.trim().tempSubString(0, ind);
249     }
250     if (number[0] == '(' || number[0] == '[') {
251         StartWithBrackets(number);
252     }
253     isValid = NumberValid(number);
254     if (isValid) {
255         return possibleNumber;
256     } else {
257         return nullptr;
258     }
259 }
260 
IsValidRawstr(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)261 PhoneNumberMatch* RegexRule::IsValidRawstr(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
262 {
263     bool isValid = true;
264     icu::UnicodeString number = possibleNumber->raw_string().c_str();
265     // Processes the ;ext= extention number format
266     int32_t ind = number.trim().indexOf(";ext=");
267     if (ind != -1) {
268         number = number.trim().tempSubString(0, ind);
269     }
270     if (number[0] == '(' || number[0] == '[') {
271         number = number.tempSubString(1);
272     }
273     // 8 is the number of digits in the phone number.
274     if (number[0] != '0' && CountDigits(number) == 8) {
275         isValid = false;
276     }
277     // 4 is the number of digits in the phone number.
278     if (CountDigits(number) <= 4) {
279         isValid = false;
280     }
281     if (isValid) {
282         return possibleNumber;
283     } else {
284         return nullptr;
285     }
286 }
287 
Handle(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)288 std::vector<MatchedNumberInfo> RegexRule::Handle(PhoneNumberMatch *possibleNumber, icu::UnicodeString& message)
289 {
290     if (handleType == "Operator") {
291         return HandleOperator(possibleNumber, message);
292     } else if (handleType == "Blank") {
293         return HandleBlank(possibleNumber, message);
294     } else if (handleType == "Slant") {
295         return HandleSlant(possibleNumber, message);
296     } else if (handleType == "StartWithMobile") {
297         return HandleStartWithMobile(possibleNumber, message);
298     } else if (handleType == "EndWithMobile") {
299         return HandleEndWithMobile(possibleNumber, message);
300     }
301     return HandleDefault(possibleNumber, message);
302 }
303 
HandleDefault(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)304 std::vector<MatchedNumberInfo> RegexRule::HandleDefault(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
305 {
306     MatchedNumberInfo matcher;
307     matcher.SetBegin(0);
308     matcher.SetEnd(1);
309     icu::UnicodeString content = "";
310     matcher.SetContent(content);
311     std::vector<MatchedNumberInfo> matchedNumberInfoList;
312     matchedNumberInfoList.push_back(matcher);
313     return matchedNumberInfoList;
314 }
315 
HandleOperator(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)316 std::vector<MatchedNumberInfo> RegexRule::HandleOperator(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
317 {
318     MatchedNumberInfo matcher;
319     if (possibleNumber->raw_string()[0] == '(' || possibleNumber->raw_string()[0] == '[') {
320         matcher.SetBegin(possibleNumber->start() + 1);
321     } else {
322         matcher.SetBegin(possibleNumber->start());
323     }
324     matcher.SetEnd(possibleNumber->end());
325     matcher.SetContent(message);
326     std::vector<MatchedNumberInfo> matchedNumberInfoList;
327     matchedNumberInfoList.push_back(matcher);
328     return matchedNumberInfoList;
329 }
330 
HandleBlank(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)331 std::vector<MatchedNumberInfo> RegexRule::HandleBlank(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
332 {
333     // exclude phone number 5201314
334     icu::UnicodeString speString = "5201314";
335     std::vector<MatchedNumberInfo> matchedNumberInfoList;
336     MatchedNumberInfo matchedNumberInfo;
337     icu::UnicodeString number = possibleNumber->raw_string().c_str();
338     icu::RegexPattern* pattern = GetPattern();
339     if (pattern == nullptr) {
340         return matchedNumberInfoList;
341     }
342     UErrorCode status;
343     icu::RegexMatcher* matcher = pattern->matcher(number, status);
344     UErrorCode negativeStatus = U_ZERO_ERROR;
345     // exclude phone number 2333333
346     icu::UnicodeString negativeRegex = "(?<![-\\d])(23{6,7})(?![-\\d])";
347     icu::RegexMatcher negativePattern(negativeRegex, 0, negativeStatus);
348     negativePattern.reset(number);
349     if (matcher != nullptr && matcher->find()) {
350         if (negativePattern.find() || number == speString) {
351             delete matcher;
352             delete pattern;
353             return matchedNumberInfoList;
354         }
355         if (possibleNumber->raw_string()[0] != '(' && possibleNumber->raw_string()[0] != '[') {
356             matchedNumberInfo.SetBegin(matcher->start(status) + possibleNumber->start());
357         } else {
358             matchedNumberInfo.SetBegin(possibleNumber->start());
359         }
360         matchedNumberInfo.SetEnd(matcher->end(status) + possibleNumber->start());
361         matchedNumberInfo.SetContent(number);
362         matchedNumberInfoList.push_back(matchedNumberInfo);
363     }
364     delete matcher;
365     delete pattern;
366     return matchedNumberInfoList;
367 }
368 
HandleSlant(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)369 std::vector<MatchedNumberInfo> RegexRule::HandleSlant(PhoneNumberMatch* possibleNumber, icu::UnicodeString& message)
370 {
371     MatchedNumberInfo matchedNumberInfo;
372     MatchedNumberInfo numberInfo;
373     std::vector<MatchedNumberInfo> matchedNumberInfoList;
374     icu::UnicodeString number = possibleNumber->raw_string().c_str();
375     icu::RegexPattern* pattern = GetPattern();
376     if (pattern == nullptr) {
377         return matchedNumberInfoList;
378     }
379     UErrorCode status;
380     icu::RegexMatcher* matcher = pattern->matcher(number, status);
381     if (matcher != nullptr && matcher->find()) {
382         int start = matcher->start(status);
383         std::vector<MatchedNumberInfo> tempList = GetNumbersWithSlant(number);
384         // 2 is the size of tempList.
385         if (tempList.size() == 2 && start == 1) {
386             start = 0;
387         }
388         if (tempList.size() > 0) {
389             matchedNumberInfo.SetBegin(tempList[0].GetBegin() + start + possibleNumber->start());
390             matchedNumberInfo.SetEnd(tempList[0].GetEnd() + possibleNumber->start());
391             icu::UnicodeString contentFirst = tempList[0].GetContent();
392             matchedNumberInfo.SetContent(contentFirst);
393             matchedNumberInfoList.push_back(matchedNumberInfo);
394             // 2 is the size of tempList.
395             if (tempList.size() == 2) {
396                 numberInfo.SetBegin(tempList[1].GetBegin() + start + possibleNumber->start());
397                 numberInfo.SetEnd(tempList[1].GetEnd() + possibleNumber->start());
398                 icu::UnicodeString contentSecond = tempList[1].GetContent();
399                 numberInfo.SetContent(contentSecond);
400                 matchedNumberInfoList.push_back(numberInfo);
401             }
402         }
403     }
404     delete matcher;
405     delete pattern;
406     return matchedNumberInfoList;
407 }
408 
HandleStartWithMobile(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)409 std::vector<MatchedNumberInfo> RegexRule::HandleStartWithMobile(PhoneNumberMatch* possibleNumber,
410     icu::UnicodeString& message)
411 {
412     return HandlePossibleNumberWithPattern(possibleNumber, message, false);
413 }
414 
HandleEndWithMobile(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message)415 std::vector<MatchedNumberInfo> RegexRule::HandleEndWithMobile(PhoneNumberMatch* possibleNumber,
416     icu::UnicodeString& message)
417 {
418     return HandlePossibleNumberWithPattern(possibleNumber, message, true);
419 }
420 
421 // Handle phone number starting with '(' or '['
StartWithBrackets(icu::UnicodeString & number)422 void RegexRule::StartWithBrackets(icu::UnicodeString& number)
423 {
424     icu::UnicodeString right = "";
425     if (number[0] == '(') {
426         right = ')';
427     }
428     if (number[0] == '[') {
429         right = ']';
430     }
431     int neind = number.indexOf(right);
432     if (neind != -1) {
433         icu::UnicodeString phoneStr = number.tempSubString(0, neind);
434         int phoneLength = CountDigits(phoneStr);
435         icu::UnicodeString extraStr = number.tempSubString(neind);
436         int extra = CountDigits(extraStr);
437         // 4 is the number of numbers in parentheses, 1 and 2 are the number of numbers outside parentheses.
438         if ((phoneLength > 4) && (extra == 1 || extra == 2)) {
439             number = number.tempSubString(1, neind - 1);
440         } else {
441             number = number.tempSubString(1);
442         }
443     } else {
444         number = number.tempSubString(1);
445     }
446 }
447 
448 // identify short number separated by '/'
GetNumbersWithSlant(icu::UnicodeString & testStr)449 std::vector<MatchedNumberInfo> RegexRule::GetNumbersWithSlant(icu::UnicodeString& testStr)
450 {
451     std::vector<MatchedNumberInfo> shortList;
452     PhoneNumberUtil* pnu = PhoneNumberUtil::GetInstance();
453     ShortNumberInfo* shortInfo = new (std::nothrow) ShortNumberInfo();
454     if (shortInfo == nullptr) {
455         HILOG_ERROR_I18N("ShortNumberInfo construct failed.");
456         return shortList;
457     }
458     std::string numberFisrt = "";
459     std::string numberEnd = "";
460     int slantIndex = 0;
461     for (int i = 0; i < testStr.length(); i++) {
462         if (testStr[i] == '/' || testStr[i] == '|') {
463             slantIndex = i;
464             testStr.tempSubString(0, i).toUTF8String(numberFisrt);
465             testStr.tempSubString(i + 1).toUTF8String(numberEnd);
466         }
467     }
468     PhoneNumber phoneNumberFirst;
469     PhoneNumber phoneNumberEnd;
470     pnu->Parse(numberFisrt, "CN", &phoneNumberFirst);
471     pnu->Parse(numberEnd, "CN", &phoneNumberEnd);
472     if (shortInfo->IsValidShortNumber(phoneNumberFirst)) {
473         MatchedNumberInfo matchedNumberInfoFirst;
474         matchedNumberInfoFirst.SetBegin(0);
475         matchedNumberInfoFirst.SetEnd(slantIndex);
476         icu::UnicodeString contentFirst = numberFisrt.c_str();
477         matchedNumberInfoFirst.SetContent(contentFirst);
478         shortList.push_back(matchedNumberInfoFirst);
479     }
480     if (shortInfo->IsValidShortNumber(phoneNumberEnd)) {
481         MatchedNumberInfo matchedNumberInfoEnd;
482         matchedNumberInfoEnd.SetBegin(slantIndex + 1);
483         matchedNumberInfoEnd.SetEnd(testStr.length());
484         icu::UnicodeString contentEnd = numberEnd.c_str();
485         matchedNumberInfoEnd.SetContent(contentEnd);
486         shortList.push_back(matchedNumberInfoEnd);
487     }
488     delete shortInfo;
489     return shortList;
490 }
491 
HandlePossibleNumberWithPattern(PhoneNumberMatch * possibleNumber,icu::UnicodeString & message,bool isStartsWithNumber)492 std::vector<MatchedNumberInfo> RegexRule::HandlePossibleNumberWithPattern(PhoneNumberMatch* possibleNumber,
493     icu::UnicodeString& message, bool isStartsWithNumber)
494 {
495     UErrorCode status = U_ZERO_ERROR;
496     std::vector<MatchedNumberInfo> matchedList;
497     icu::UnicodeString possible = possibleNumber->raw_string().c_str();
498     icu::RegexPattern* pattern = GetPattern();
499     if (pattern == nullptr) {
500         HILOG_ERROR_I18N("RegexPattern is nullptr.");
501         return matchedList;
502     }
503     icu::RegexMatcher* mat = pattern->matcher(message, status);
504     while (mat != nullptr && mat->find(status)) {
505         int start = mat->start(status);
506         int end = mat->end(status);
507         icu::UnicodeString matched = message.tempSubString(start, end - start);
508         bool isMatch = isStartsWithNumber ? matched.startsWith(possible) : matched.endsWith(possible);
509         if (isMatch) {
510             MatchedNumberInfo info;
511             info.SetBegin(isStartsWithNumber ? start : end - possible.length());
512             info.SetEnd(isStartsWithNumber ? (start + possible.length()) : end);
513             info.SetContent(possible);
514             matchedList.push_back(info);
515         }
516     }
517     delete mat;
518     delete pattern;
519     return matchedList;
520 }
521 } // namespace I18n
522 } // namespace Global
523 } // namespace OHOS