• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <climits>
17 #include <set>
18 #include "i18n_hilog.h"
19 #include "regex_rule.h"
20 #include "phone_number_matched.h"
21 #include "utils.h"
22 
23 namespace OHOS {
24 namespace Global {
25 namespace I18n {
26 using i18n::phonenumbers::PhoneNumber;
27 
28 const int PhoneNumberMatched::CONTAIN = 9;
29 const int PhoneNumberMatched::CONTAIN_OR_INTERSECT = 8;
30 const UChar32 PhoneNumberMatched::REPLACE_CHAR = 'A';
31 
PhoneNumberMatched(std::string & country)32 PhoneNumberMatched::PhoneNumberMatched(std::string& country)
33 {
34     phoneNumberRule = new PhoneNumberRule(country);
35     phoneNumberUtil = PhoneNumberUtil::GetInstance();
36     shortNumberInfo = new ShortNumberInfo();
37     this->country = country;
38     if (phoneNumberRule != nullptr) {
39         phoneNumberRule->Init();
40     }
41 }
42 
~PhoneNumberMatched()43 PhoneNumberMatched::~PhoneNumberMatched()
44 {
45     delete phoneNumberRule;
46     delete shortNumberInfo;
47 }
48 
GetMatchedPhoneNumber(icu::UnicodeString & message)49 std::vector<int> PhoneNumberMatched::GetMatchedPhoneNumber(icu::UnicodeString& message)
50 {
51     icu::UnicodeString messageStr = message;
52     if (phoneNumberRule != nullptr && !phoneNumberRule->isFixed) {
53         return DealWithoutFixed(messageStr, country);
54     }
55     icu::UnicodeString filteredString = HandleNegativeRule(messageStr);
56     std::vector<MatchedNumberInfo> matchedNumberInfoList = GetPossibleNumberInfos(country,
57         messageStr, filteredString);
58     std::vector<MatchedNumberInfo> shortList = FindShortNumbers(country, filteredString);
59     if (shortList.size() != 0) {
60         matchedNumberInfoList.insert(matchedNumberInfoList.end(), shortList.begin(), shortList.end());
61     }
62     matchedNumberInfoList = DeleteRepeatedInfo(matchedNumberInfoList);
63     for (auto& matchedNumberInfo : matchedNumberInfoList) {
64         DealNumberWithOneBracket(matchedNumberInfo);
65     }
66     return DealResult(matchedNumberInfoList);
67 }
68 
DealWithoutFixed(icu::UnicodeString & message,std::string & country)69 std::vector<int> PhoneNumberMatched::DealWithoutFixed(icu::UnicodeString& message, std::string& country)
70 {
71     std::vector<PhoneNumberMatch*> matchList = FindNumbers(country, message);
72     std::vector<MatchedNumberInfo> result;
73     for (auto& match : matchList) {
74         if (match == nullptr) continue;
75         MatchedNumberInfo info;
76         icu::UnicodeString content = match->raw_string().c_str();
77         PhoneNumber phoneNumber = match->number();
78         if (phoneNumberUtil->IsValidNumber(phoneNumber)) {
79             info.SetBegin(match->start());
80             info.SetEnd(match->end());
81             info.SetContent(content);
82             result.push_back(info);
83         }
84         delete match;
85     }
86     std::vector<MatchedNumberInfo> shortResult = FindShortNumbers(country, message);
87     // Merge result
88     if (shortResult.size() != 0) {
89         result.insert(result.end(), shortResult.begin(), shortResult.end());
90     }
91     result = DeleteRepeatedInfo(result);
92     for (auto& res: result) {
93         DealNumberWithOneBracket(res);
94     }
95     return DealResult(result);
96 }
97 
98 // Filtering text using negative rules
HandleNegativeRule(icu::UnicodeString & src)99 icu::UnicodeString PhoneNumberMatched::HandleNegativeRule(icu::UnicodeString& src)
100 {
101     std::vector<NegativeRule*> rules = phoneNumberRule->GetNegativeRules();
102     icu::UnicodeString ret = src;
103     for (NegativeRule* rule : rules) {
104         if (rule == nullptr) {
105             continue;
106         }
107         ret = rule->Handle(ret);
108     }
109     return ret;
110 }
111 
112 // Replace the characters in the specified area with REPLACE_CHAR
ReplaceSpecifiedPos(icu::UnicodeString & chs,int start,int end)113 void PhoneNumberMatched::ReplaceSpecifiedPos(icu::UnicodeString& chs, int start, int end)
114 {
115     if (start < end) {
116         int len = chs.length();
117         for (int i = 0; i < len; i++) {
118             if (i >= start && i < end) {
119                 chs.replace(i, 1, PhoneNumberMatched::REPLACE_CHAR);
120             }
121         }
122     }
123 }
124 
125 // Short numbers identification
FindShortNumbers(std::string & country,icu::UnicodeString & message)126 std::vector<MatchedNumberInfo> PhoneNumberMatched::FindShortNumbers(std::string& country,
127     icu::UnicodeString& message)
128 {
129     std::vector<MatchedNumberInfo> matchedNumberInfoList;
130     UErrorCode status = U_ZERO_ERROR;
131     size_t pos = GetFindRulesInside().size();
132     if (pos == 0) {
133         HILOG_ERROR_I18N("PhoneNumberRule.findRules is empty.");
134         return matchedNumberInfoList;
135     }
136     // 1 indicates the last position.
137     FindRule* shortRegexRule = GetFindRulesInside()[pos - 1];
138     icu::RegexPattern* shortPattern = shortRegexRule->GetPattern();
139     if (shortPattern == nullptr) {
140         HILOG_ERROR_I18N("shortPattern getPattern failed.");
141         return matchedNumberInfoList;
142     }
143     icu::RegexMatcher* shortMatch = shortPattern->matcher(message, status);
144     if (shortMatch == nullptr) {
145         delete shortPattern;
146         HILOG_ERROR_I18N("shortPattern matcher failed.");
147         return matchedNumberInfoList;
148     }
149     while (shortMatch->find(status)) {
150         icu::UnicodeString numberToParse = shortMatch->group(status);
151         std::string stringParse;
152         numberToParse.toUTF8String(stringParse);
153         PhoneNumber phoneNumber;
154         PhoneNumberUtil::ErrorType errorType =
155             phoneNumberUtil->ParseAndKeepRawInput(stringParse, country, &phoneNumber);
156         if (errorType != PhoneNumberUtil::NO_PARSING_ERROR) {
157             HILOG_ERROR_I18N("PhoneNumberRule: failed to call the ParseAndKeepRawInput.");
158             continue;
159         }
160         // Add the valid short number to the result
161         if (shortNumberInfo != nullptr &&
162                 shortNumberInfo->IsPossibleShortNumberForRegion(phoneNumber, country)) {
163             MatchedNumberInfo matcher;
164             matcher.SetBegin(shortMatch->start(status));
165             matcher.SetEnd(shortMatch->end(status));
166             icu::UnicodeString stringShort = shortMatch->group(status);
167             matcher.SetContent(stringShort);
168             matchedNumberInfoList.push_back(matcher);
169         }
170     }
171     delete shortMatch;
172     delete shortPattern;
173     return matchedNumberInfoList;
174 }
175 
GetFindRulesInside()176 std::vector<FindRule*> PhoneNumberMatched::GetFindRulesInside()
177 {
178     if (phoneNumberRule != nullptr) {
179         return phoneNumberRule->GetFindRules();
180     }
181     return {};
182 }
183 
GetBorderRulesInside()184 std::vector<BorderRule*> PhoneNumberMatched::GetBorderRulesInside()
185 {
186     if (phoneNumberRule != nullptr) {
187         return phoneNumberRule->GetBorderRules();
188     }
189     return {};
190 }
191 
GetCodesRulesInside()192 std::vector<CodeRule*> PhoneNumberMatched::GetCodesRulesInside()
193 {
194     if (phoneNumberRule != nullptr) {
195         return phoneNumberRule->GetCodesRules();
196     }
197     return {};
198 }
199 
GetPositiveRulesInside()200 std::vector<PositiveRule*> PhoneNumberMatched::GetPositiveRulesInside()
201 {
202     if (phoneNumberRule != nullptr) {
203         return phoneNumberRule->GetPositiveRules();
204     }
205     return {};
206 }
207 
208 // Add the phone number that may be correct, and return true if successful
AddPhoneNumber(std::string & number,int start,std::vector<PhoneNumberMatch * > & matchList,std::string & country)209 bool PhoneNumberMatched::AddPhoneNumber(std::string& number, int start, std::vector<PhoneNumberMatch*>& matchList,
210     std::string& country)
211 {
212     PhoneNumber phoneNumber;
213     int lenNumber = 5;
214     icu::UnicodeString uNumber = number.c_str();
215     if (RegexRule::CountDigits(uNumber) < lenNumber) {
216         return false;
217     }
218     PhoneNumberUtil::ErrorType parseStatus = phoneNumberUtil->Parse(number, country, &phoneNumber);
219     if (parseStatus != PhoneNumberUtil::NO_PARSING_ERROR) {
220         return false;
221     }
222     UChar32 space = ' ';
223     UChar32 slash = '/';
224     // Add to matchList if phone number is not delimited, or valid
225     if ((uNumber.indexOf(space) == -1 && uNumber.indexOf(slash) == -1) ||
226         phoneNumberUtil->IsValidNumber(phoneNumber)) {
227         PhoneNumberMatch* match = new PhoneNumberMatch(start, number, phoneNumber);
228         matchList.push_back(match);
229         return true;
230     }
231     return false;
232 }
233 
234 // Add the valid phone number
FindNumbers(std::string & country,icu::UnicodeString & filteredString)235 std::vector<PhoneNumberMatch*> PhoneNumberMatched::FindNumbers(std::string& country,
236     icu::UnicodeString& filteredString)
237 {
238     std::vector<PhoneNumberMatch*> matchList;
239     size_t pos = GetFindRulesInside().size();
240     if (pos < 2) { // 2 indicates the penultimate position.
241         HILOG_ERROR_I18N("numberRegexRule size less then 2.");
242         return matchList;
243     }
244     // 2 indicates the penultimate position.
245     FindRule* numberRegexRule = GetFindRulesInside()[pos - 2];
246     icu::RegexPattern* numberPattern = numberRegexRule->GetPattern();
247     if (numberPattern == nullptr) {
248         HILOG_ERROR_I18N("numberRegexRule getPattern failed.");
249         return matchList;
250     }
251     UErrorCode status = U_ZERO_ERROR;
252     icu::RegexMatcher* numberMatcher = numberPattern->matcher(filteredString, status);
253     if (numberMatcher == nullptr) {
254         HILOG_ERROR_I18N("numberPattern matcher failed.");
255         delete numberPattern;
256         return matchList;
257     }
258     FindNumbersFromInputString(matchList, numberMatcher, country, filteredString);
259     delete numberMatcher;
260     delete numberPattern;
261     return matchList;
262 }
263 
FindNumbersFromInputString(std::vector<PhoneNumberMatch * > & matchList,icu::RegexMatcher * numberMatcher,std::string & country,icu::UnicodeString & filteredString)264 void PhoneNumberMatched::FindNumbersFromInputString(std::vector<PhoneNumberMatch*> &matchList,
265     icu::RegexMatcher* numberMatcher, std::string& country, icu::UnicodeString& filteredString)
266 {
267     UErrorCode status = U_ZERO_ERROR;
268     while (numberMatcher->find(status)) {
269         int32_t start = numberMatcher->start(status);
270         int32_t end = numberMatcher->end(status);
271         icu::UnicodeString uNumber = filteredString.tempSubString(start, end - start);
272         std::string number;
273         uNumber.toUTF8String(number);
274         // if the entire phone number is invalid, identify each segment that is separated
275         if (!AddPhoneNumber(number, start, matchList, country)) {
276             int searchStart = 0;
277             UChar32 space = ' ';
278             UChar32 slash = '/';
279             if (uNumber.indexOf(space, searchStart) == -1 && uNumber.indexOf(slash, searchStart) == -1) {
280                 continue;
281             }
282             while (uNumber.indexOf(space, searchStart) != -1 || uNumber.indexOf(slash, searchStart) != -1) {
283                 int phoneStart = searchStart;
284                 int indexSpace = uNumber.indexOf(space, searchStart);
285                 int indexSlash = uNumber.indexOf(slash, searchStart);
286                 int phoneEnd =
287                     (indexSpace == -1 || (indexSlash != -1 && indexSlash < indexSpace)) ? indexSlash : indexSpace;
288                 searchStart = phoneEnd + 1;
289                 std::string tempNumber = number.substr(phoneStart, phoneEnd - phoneStart);
290                 AddPhoneNumber(tempNumber, phoneStart + start, matchList, country);
291             }
292             // identify the last segment
293             std::string lastStr = number.substr(searchStart);
294             AddPhoneNumber(lastStr, searchStart + start, matchList, country);
295         }
296     }
297 }
298 
299 // Handing the situation of shortnumber/shortnumber.
HandleWithShortAndShort(std::vector<MatchedNumberInfo> & result,std::string & country,MatchedNumberInfo & info,std::pair<int,int> & pos,icu::UnicodeString & filteredString)300 bool PhoneNumberMatched::HandleWithShortAndShort(std::vector<MatchedNumberInfo>& result, std::string& country,
301     MatchedNumberInfo& info, std::pair<int, int>& pos, icu::UnicodeString& filteredString)
302 {
303     bool flag = false;
304     // 3 is the length of findRules.
305     size_t length = 3;
306     if (GetFindRulesInside().size() == length) {
307         UErrorCode status = U_ZERO_ERROR;
308         FindRule* regexRule = GetFindRulesInside()[0];
309         icu::RegexPattern* pattern = regexRule->GetPattern();
310         if (pattern == nullptr) {
311             HILOG_ERROR_I18N("regexRule getPattern failed.");
312             return flag;
313         }
314         icu::UnicodeString str = "";
315         if (info.GetContent()[0] == '(' || info.GetContent()[0] == '[') {
316             str = info.GetContent().tempSubString(1);
317         } else {
318             str = info.GetContent();
319         }
320         icu::RegexMatcher* matcher = pattern->matcher(str, status);
321         if (matcher == nullptr) {
322             delete pattern;
323             HILOG_ERROR_I18N("pattern matcher failed.");
324             return flag;
325         }
326         if (!matcher->find(status)) {
327             result.push_back(info);
328             ReplaceSpecifiedPos(filteredString, pos.first, pos.second);
329             flag = true;
330         }
331         delete matcher;
332         delete pattern;
333     } else {
334         result.push_back(info);
335         ReplaceSpecifiedPos(filteredString, pos.first, pos.second);
336     }
337     return flag;
338 }
339 
340 // Get possible phone number
GetPossibleNumberInfos(std::string & country,icu::UnicodeString & src,icu::UnicodeString & filteredString)341 std::vector<MatchedNumberInfo> PhoneNumberMatched::GetPossibleNumberInfos(std::string& country,
342     icu::UnicodeString& src, icu::UnicodeString& filteredString)
343 {
344     std::vector<MatchedNumberInfo> result;
345     std::vector<PhoneNumberMatch*> matchList = FindNumbers(country, filteredString);
346     for (auto match : matchList) {
347         if (!HandleBorderRule(match, filteredString)) {
348             continue;
349         }
350         PhoneNumberMatch* delMatch = HandleCodesRule(match, src);
351         if (delMatch == nullptr) {
352             continue;
353         }
354         icu::UnicodeString content = delMatch->raw_string().c_str();
355         int contentStart = delMatch->start();
356         int contentEnd = delMatch->end();
357         std::pair<int, int> pos{contentStart, contentEnd};
358         if (phoneNumberUtil->IsValidNumber(delMatch->number())) {
359             MatchedNumberInfo info;
360             info.SetBegin(contentStart);
361             info.SetEnd(contentEnd);
362             info.SetContent(content);
363             bool flag = HandleWithShortAndShort(result, country, info, pos, filteredString);
364             if (flag) {
365                 continue;
366             }
367         }
368         std::vector<MatchedNumberInfo> posList = HandlePositiveRule(delMatch, filteredString);
369         if (posList.size() != 0) {
370             for (auto& matchInfo : posList) {
371                 ReplaceSpecifiedPos(filteredString, matchInfo.GetBegin(), matchInfo.GetEnd());
372             }
373             result.insert(result.end(), posList.begin(), posList.end());
374         }
375         delete match;
376     }
377     return result;
378 }
379 
380 // Remove duplicate results
DeleteRepeatedInfo(std::vector<MatchedNumberInfo> & list)381 std::vector<MatchedNumberInfo> PhoneNumberMatched::DeleteRepeatedInfo(std::vector<MatchedNumberInfo>& list)
382 {
383     std::set<MatchedNumberInfo> set;
384     std::vector<MatchedNumberInfo> ret;
385     for (auto info : list) {
386         if (set.find(info) == set.end()) {
387             ret.push_back(info);
388         }
389         set.insert(info);
390     }
391     return ret;
392 }
393 
394 // Process the case where the phone number starts with one bracket
DealNumberWithOneBracket(MatchedNumberInfo & info)395 void PhoneNumberMatched::DealNumberWithOneBracket(MatchedNumberInfo& info)
396 {
397     icu::UnicodeString message = info.GetContent();
398     if (IsNumberWithOneBracket(message)) {
399         info.SetBegin(info.GetBegin() + 1);
400         icu::UnicodeString content = info.GetContent().tempSubString(1);
401         info.SetContent(content);
402     }
403 }
404 
405 // check whether the bracket at the start position are redundant
IsNumberWithOneBracket(icu::UnicodeString & message)406 bool PhoneNumberMatched::IsNumberWithOneBracket(icu::UnicodeString& message)
407 {
408     if (message != "") {
409         int numLeft = 0;
410         int numRight = 0;
411         int len = message.length();
412         for (int i = 0; i < len; i++) {
413             if (message[i] == '(' || message[i] == '[') {
414                 numLeft++;
415             }
416             if (message[i] == ')' || message[i] == ']') {
417                 numRight++;
418             }
419         }
420         if (numLeft > numRight && (message[0] == '(' || message[0] == '[')) {
421             return true;
422         }
423     }
424     return false;
425 }
426 
DealResult(std::vector<MatchedNumberInfo> & matchedNumberInfoList)427 std::vector<int> PhoneNumberMatched::DealResult(std::vector<MatchedNumberInfo>& matchedNumberInfoList)
428 {
429     std::vector<int> result;
430     size_t length = matchedNumberInfoList.size();
431     if (length == 0) {
432         result.push_back(0);
433     } else {
434         size_t posNumber = 2;
435         size_t posStart = 1;
436         size_t posEnd = 2;
437         result.resize(posNumber * length + 1);
438         result[0] = static_cast<int>(length);
439         for (size_t i = 0; i < length; i++) {
440             result[posNumber * i + posStart] = matchedNumberInfoList[i].GetBegin();
441             result[posNumber * i + posEnd] = matchedNumberInfoList[i].GetEnd();
442         }
443     }
444     return result;
445 }
446 
447 // Filter result based on Border rule
HandleBorderRule(PhoneNumberMatch * match,icu::UnicodeString & message)448 bool PhoneNumberMatched::HandleBorderRule(PhoneNumberMatch* match, icu::UnicodeString& message)
449 {
450     if (match == nullptr) {
451         return false;
452     }
453     std::vector<BorderRule*> rules = GetBorderRulesInside();
454     if (rules.size() == 0) {
455         return true;
456     }
457     for (BorderRule* rule : rules) {
458         if (!rule->Handle(match, message)) {
459             return false;
460         }
461     }
462     return true;
463 }
464 
465 // Filter result based on Codes rule
HandleCodesRule(PhoneNumberMatch * phoneNumberMatch,icu::UnicodeString & message)466 PhoneNumberMatch* PhoneNumberMatched::HandleCodesRule(PhoneNumberMatch* phoneNumberMatch, icu::UnicodeString& message)
467 {
468     PhoneNumberMatch* match = phoneNumberMatch;
469     std::vector<CodeRule*> rules = GetCodesRulesInside();
470     if (rules.size() == 0) {
471         return nullptr;
472     }
473     for (CodeRule* rule : rules) {
474         match = rule->Handle(match, message);
475     }
476     return match;
477 }
478 
479 // Add phone numbers that meet the positive rule to the result
HandlePositiveRule(PhoneNumberMatch * match,icu::UnicodeString & message)480 std::vector<MatchedNumberInfo> PhoneNumberMatched::HandlePositiveRule(PhoneNumberMatch* match,
481     icu::UnicodeString& message)
482 {
483     std::vector<MatchedNumberInfo> infoList;
484     std::vector<PositiveRule*> rules = GetPositiveRulesInside();
485     for (PositiveRule* rule : rules) {
486         infoList = rule->Handle(match, message);
487         if (infoList.size() != 0) {
488             break;
489         }
490     }
491     return infoList;
492 }
493 
DealStringWithOneBracket(icu::UnicodeString & message)494 icu::UnicodeString PhoneNumberMatched::DealStringWithOneBracket(icu::UnicodeString& message)
495 {
496     if (IsNumberWithOneBracket(message)) {
497         return message.tempSubString(1);
498     }
499     return message;
500 }
501 } // namespace I18n
502 } // namespace Global
503 } // namespace OHOS