• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <climits>
17 #include <set>
18 #include "i18n_hilog.h"
19 #include "regex_rule.h"
20 #include "phone_number_matched.h"
21 #include "utils.h"
22 
23 namespace OHOS {
24 namespace Global {
25 namespace I18n {
26 using i18n::phonenumbers::PhoneNumber;
27 
28 const int PhoneNumberMatched::CONTAIN = 9;
29 const int PhoneNumberMatched::CONTAIN_OR_INTERSECT = 8;
30 const UChar32 PhoneNumberMatched::REPLACE_CHAR = 'A';
31 
PhoneNumberMatched(std::string & country)32 PhoneNumberMatched::PhoneNumberMatched(std::string& country)
33 {
34     phoneNumberRule = new PhoneNumberRule(country);
35     phoneNumberUtil = PhoneNumberUtil::GetInstance();
36     shortNumberInfo = new ShortNumberInfo();
37     this->country = country;
38     if (phoneNumberRule != nullptr) {
39         phoneNumberRule->Init();
40     }
41 }
42 
~PhoneNumberMatched()43 PhoneNumberMatched::~PhoneNumberMatched()
44 {
45     delete phoneNumberRule;
46     delete shortNumberInfo;
47 }
48 
GetMatchedPhoneNumber(icu::UnicodeString & message)49 std::vector<int> PhoneNumberMatched::GetMatchedPhoneNumber(icu::UnicodeString& message)
50 {
51     icu::UnicodeString messageStr = message;
52     if (phoneNumberRule != nullptr && !phoneNumberRule->isFixed) {
53         return DealWithoutFixed(messageStr, country);
54     }
55     icu::UnicodeString filteredString = HandleNegativeRule(messageStr);
56     std::vector<MatchedNumberInfo> matchedNumberInfoList = GetPossibleNumberInfos(country,
57         messageStr, filteredString);
58     std::vector<MatchedNumberInfo> shortList = FindShortNumbers(country, filteredString);
59     if (shortList.size() != 0) {
60         matchedNumberInfoList.insert(matchedNumberInfoList.end(), shortList.begin(), shortList.end());
61     }
62     matchedNumberInfoList = DeleteRepeatedInfo(matchedNumberInfoList);
63     for (auto& matchedNumberInfo : matchedNumberInfoList) {
64         DealNumberWithOneBracket(matchedNumberInfo);
65     }
66     return DealResult(matchedNumberInfoList);
67 }
68 
DealWithoutFixed(icu::UnicodeString & message,std::string & country)69 std::vector<int> PhoneNumberMatched::DealWithoutFixed(icu::UnicodeString& message, std::string& country)
70 {
71     std::vector<PhoneNumberMatch*> matchList = FindNumbers(country, message);
72     std::vector<MatchedNumberInfo> result;
73     for (auto& match : matchList) {
74         if (match == nullptr) {
75             continue;
76         }
77         MatchedNumberInfo info;
78         icu::UnicodeString content = match->raw_string().c_str();
79         PhoneNumber phoneNumber = match->number();
80         if (phoneNumberUtil->IsValidNumber(phoneNumber)) {
81             info.SetBegin(match->start());
82             info.SetEnd(match->end());
83             info.SetContent(content);
84             result.push_back(info);
85         }
86         delete match;
87     }
88     std::vector<MatchedNumberInfo> shortResult = FindShortNumbers(country, message);
89     // Merge result
90     if (shortResult.size() != 0) {
91         result.insert(result.end(), shortResult.begin(), shortResult.end());
92     }
93     result = DeleteRepeatedInfo(result);
94     for (auto& res: result) {
95         DealNumberWithOneBracket(res);
96     }
97     return DealResult(result);
98 }
99 
100 // Filtering text using negative rules
HandleNegativeRule(icu::UnicodeString & src)101 icu::UnicodeString PhoneNumberMatched::HandleNegativeRule(icu::UnicodeString& src)
102 {
103     std::vector<NegativeRule*> rules = phoneNumberRule->GetNegativeRules();
104     icu::UnicodeString ret = src;
105     for (NegativeRule* rule : rules) {
106         if (rule == nullptr) {
107             continue;
108         }
109         ret = rule->Handle(ret);
110     }
111     return ret;
112 }
113 
114 // Replace the characters in the specified area with REPLACE_CHAR
ReplaceSpecifiedPos(icu::UnicodeString & chs,int start,int end)115 void PhoneNumberMatched::ReplaceSpecifiedPos(icu::UnicodeString& chs, int start, int end)
116 {
117     if (start >= end) {
118         return;
119     }
120     int len = chs.length();
121     for (int i = 0; i < len; i++) {
122         if (i >= start && i < end) {
123             chs.replace(i, 1, PhoneNumberMatched::REPLACE_CHAR);
124         }
125     }
126 }
127 
128 // Short numbers identification
FindShortNumbers(std::string & country,icu::UnicodeString & message)129 std::vector<MatchedNumberInfo> PhoneNumberMatched::FindShortNumbers(std::string& country,
130     icu::UnicodeString& message)
131 {
132     std::vector<MatchedNumberInfo> matchedNumberInfoList;
133     UErrorCode status = U_ZERO_ERROR;
134     size_t pos = GetFindRulesInside().size();
135     if (pos == 0) {
136         HILOG_ERROR_I18N("PhoneNumberRule.findRules is empty.");
137         return matchedNumberInfoList;
138     }
139     // 1 indicates the last position.
140     FindRule* shortRegexRule = GetFindRulesInside()[pos - 1];
141     icu::RegexPattern* shortPattern = shortRegexRule->GetPattern();
142     if (shortPattern == nullptr) {
143         HILOG_ERROR_I18N("shortPattern getPattern failed.");
144         return matchedNumberInfoList;
145     }
146     icu::RegexMatcher* shortMatch = shortPattern->matcher(message, status);
147     if (U_FAILURE(status) || shortMatch == nullptr) {
148         delete shortPattern;
149         HILOG_ERROR_I18N("shortPattern matcher failed.");
150         return matchedNumberInfoList;
151     }
152     while (shortMatch->find(status)) {
153         icu::UnicodeString numberToParse = shortMatch->group(status);
154         std::string stringParse;
155         numberToParse.toUTF8String(stringParse);
156         PhoneNumber phoneNumber;
157         PhoneNumberUtil::ErrorType errorType =
158             phoneNumberUtil->ParseAndKeepRawInput(stringParse, country, &phoneNumber);
159         if (errorType != PhoneNumberUtil::NO_PARSING_ERROR) {
160             HILOG_ERROR_I18N("PhoneNumberRule: failed to call the ParseAndKeepRawInput.");
161             continue;
162         }
163         // Add the valid short number to the result
164         if (shortNumberInfo != nullptr &&
165                 shortNumberInfo->IsPossibleShortNumberForRegion(phoneNumber, country)) {
166             MatchedNumberInfo matcher;
167             matcher.SetBegin(shortMatch->start(status));
168             matcher.SetEnd(shortMatch->end(status));
169             icu::UnicodeString stringShort = shortMatch->group(status);
170             matcher.SetContent(stringShort);
171             matchedNumberInfoList.push_back(matcher);
172         }
173     }
174     delete shortMatch;
175     delete shortPattern;
176     return matchedNumberInfoList;
177 }
178 
GetFindRulesInside()179 std::vector<FindRule*> PhoneNumberMatched::GetFindRulesInside()
180 {
181     if (phoneNumberRule != nullptr) {
182         return phoneNumberRule->GetFindRules();
183     }
184     return {};
185 }
186 
GetBorderRulesInside()187 std::vector<BorderRule*> PhoneNumberMatched::GetBorderRulesInside()
188 {
189     if (phoneNumberRule != nullptr) {
190         return phoneNumberRule->GetBorderRules();
191     }
192     return {};
193 }
194 
GetCodesRulesInside()195 std::vector<CodeRule*> PhoneNumberMatched::GetCodesRulesInside()
196 {
197     if (phoneNumberRule != nullptr) {
198         return phoneNumberRule->GetCodesRules();
199     }
200     return {};
201 }
202 
GetPositiveRulesInside()203 std::vector<PositiveRule*> PhoneNumberMatched::GetPositiveRulesInside()
204 {
205     if (phoneNumberRule != nullptr) {
206         return phoneNumberRule->GetPositiveRules();
207     }
208     return {};
209 }
210 
211 // Add the phone number that may be correct, and return true if successful
AddPhoneNumber(std::string & number,int start,std::vector<PhoneNumberMatch * > & matchList,std::string & country)212 bool PhoneNumberMatched::AddPhoneNumber(std::string& number, int start, std::vector<PhoneNumberMatch*>& matchList,
213     std::string& country)
214 {
215     PhoneNumber phoneNumber;
216     int lenNumber = 5;
217     icu::UnicodeString uNumber = number.c_str();
218     if (RegexRule::CountDigits(uNumber) < lenNumber) {
219         return false;
220     }
221     PhoneNumberUtil::ErrorType parseStatus = phoneNumberUtil->Parse(number, country, &phoneNumber);
222     if (parseStatus != PhoneNumberUtil::NO_PARSING_ERROR) {
223         return false;
224     }
225     UChar32 space = ' ';
226     UChar32 slash = '/';
227     // Add to matchList if phone number is not delimited, or valid
228     if ((uNumber.indexOf(space) == -1 && uNumber.indexOf(slash) == -1) ||
229         phoneNumberUtil->IsValidNumber(phoneNumber)) {
230         PhoneNumberMatch* match = new PhoneNumberMatch(start, number, phoneNumber);
231         matchList.push_back(match);
232         return true;
233     }
234     return false;
235 }
236 
237 // Add the valid phone number
FindNumbers(std::string & country,icu::UnicodeString & filteredString)238 std::vector<PhoneNumberMatch*> PhoneNumberMatched::FindNumbers(std::string& country,
239     icu::UnicodeString& filteredString)
240 {
241     std::vector<PhoneNumberMatch*> matchList;
242     size_t pos = GetFindRulesInside().size();
243     if (pos < 2) { // 2 indicates the penultimate position.
244         HILOG_ERROR_I18N("numberRegexRule size less then 2.");
245         return matchList;
246     }
247     // 2 indicates the penultimate position.
248     FindRule* numberRegexRule = GetFindRulesInside()[pos - 2];
249     icu::RegexPattern* numberPattern = numberRegexRule->GetPattern();
250     if (numberPattern == nullptr) {
251         HILOG_ERROR_I18N("numberRegexRule getPattern failed.");
252         return matchList;
253     }
254     UErrorCode status = U_ZERO_ERROR;
255     icu::RegexMatcher* numberMatcher = numberPattern->matcher(filteredString, status);
256     if (numberMatcher == nullptr) {
257         HILOG_ERROR_I18N("numberPattern matcher failed.");
258         delete numberPattern;
259         return matchList;
260     }
261     FindNumbersFromInputString(matchList, numberMatcher, country, filteredString);
262     delete numberMatcher;
263     delete numberPattern;
264     return matchList;
265 }
266 
FindNumbersFromInputString(std::vector<PhoneNumberMatch * > & matchList,icu::RegexMatcher * numberMatcher,std::string & country,icu::UnicodeString & filteredString)267 void PhoneNumberMatched::FindNumbersFromInputString(std::vector<PhoneNumberMatch*> &matchList,
268     icu::RegexMatcher* numberMatcher, std::string& country, icu::UnicodeString& filteredString)
269 {
270     UErrorCode status = U_ZERO_ERROR;
271     while (numberMatcher->find(status)) {
272         int32_t start = numberMatcher->start(status);
273         int32_t end = numberMatcher->end(status);
274         icu::UnicodeString uNumber = filteredString.tempSubString(start, end - start);
275         std::string number;
276         uNumber.toUTF8String(number);
277         // if the entire phone number is invalid, identify each segment that is separated
278         if (!AddPhoneNumber(number, start, matchList, country)) {
279             int searchStart = 0;
280             UChar32 space = ' ';
281             UChar32 slash = '/';
282             if (uNumber.indexOf(space, searchStart) == -1 && uNumber.indexOf(slash, searchStart) == -1) {
283                 continue;
284             }
285             while (uNumber.indexOf(space, searchStart) != -1 || uNumber.indexOf(slash, searchStart) != -1) {
286                 int phoneStart = searchStart;
287                 int indexSpace = uNumber.indexOf(space, searchStart);
288                 int indexSlash = uNumber.indexOf(slash, searchStart);
289                 int phoneEnd =
290                     (indexSpace == -1 || (indexSlash != -1 && indexSlash < indexSpace)) ? indexSlash : indexSpace;
291                 searchStart = phoneEnd + 1;
292                 std::string tempNumber = number.substr(phoneStart, phoneEnd - phoneStart);
293                 AddPhoneNumber(tempNumber, phoneStart + start, matchList, country);
294             }
295             // identify the last segment
296             std::string lastStr = number.substr(searchStart);
297             AddPhoneNumber(lastStr, searchStart + start, matchList, country);
298         }
299     }
300 }
301 
302 // Handing the situation of shortnumber/shortnumber.
HandleWithShortAndShort(std::vector<MatchedNumberInfo> & result,std::string & country,MatchedNumberInfo & info,std::pair<int,int> & pos,icu::UnicodeString & filteredString)303 bool PhoneNumberMatched::HandleWithShortAndShort(std::vector<MatchedNumberInfo>& result, std::string& country,
304     MatchedNumberInfo& info, std::pair<int, int>& pos, icu::UnicodeString& filteredString)
305 {
306     bool flag = false;
307     // 3 is the length of findRules.
308     size_t length = 3;
309     if (GetFindRulesInside().size() == length) {
310         FindRule* regexRule = GetFindRulesInside()[0];
311         if (regexRule == nullptr) {
312             HILOG_ERROR_I18N("PhoneNumberMatched::HandleWithShortAndShort: Get FindRule failed.");
313             return false;
314         }
315         icu::RegexPattern* pattern = regexRule->GetPattern();
316         if (pattern == nullptr) {
317             HILOG_ERROR_I18N("regexRule getPattern failed.");
318             return flag;
319         }
320         icu::UnicodeString str = "";
321         if (info.GetContent()[0] == '(' || info.GetContent()[0] == '[') {
322             str = info.GetContent().tempSubString(1);
323         } else {
324             str = info.GetContent();
325         }
326         UErrorCode status = U_ZERO_ERROR;
327         icu::RegexMatcher* matcher = pattern->matcher(str, status);
328         if (U_FAILURE(status) || matcher == nullptr) {
329             delete pattern;
330             HILOG_ERROR_I18N("pattern matcher failed.");
331             return flag;
332         }
333         if (!matcher->find(status) && U_SUCCESS(status)) {
334             result.push_back(info);
335             ReplaceSpecifiedPos(filteredString, pos.first, pos.second);
336             flag = true;
337         }
338         delete matcher;
339         delete pattern;
340     } else {
341         result.push_back(info);
342         ReplaceSpecifiedPos(filteredString, pos.first, pos.second);
343     }
344     return flag;
345 }
346 
347 // Get possible phone number
GetPossibleNumberInfos(std::string & country,icu::UnicodeString & src,icu::UnicodeString & filteredString)348 std::vector<MatchedNumberInfo> PhoneNumberMatched::GetPossibleNumberInfos(std::string& country,
349     icu::UnicodeString& src, icu::UnicodeString& filteredString)
350 {
351     std::vector<MatchedNumberInfo> result;
352     std::vector<PhoneNumberMatch*> matchList = FindNumbers(country, filteredString);
353     for (auto match : matchList) {
354         if (!HandleBorderRule(match, filteredString)) {
355             continue;
356         }
357         PhoneNumberMatch* delMatch = HandleCodesRule(match, src);
358         if (delMatch == nullptr) {
359             continue;
360         }
361         icu::UnicodeString content = delMatch->raw_string().c_str();
362         int contentStart = delMatch->start();
363         int contentEnd = delMatch->end();
364         std::pair<int, int> pos{contentStart, contentEnd};
365         if (phoneNumberUtil->IsValidNumber(delMatch->number())) {
366             MatchedNumberInfo info;
367             info.SetBegin(contentStart);
368             info.SetEnd(contentEnd);
369             info.SetContent(content);
370             bool flag = HandleWithShortAndShort(result, country, info, pos, filteredString);
371             if (flag) {
372                 continue;
373             }
374         }
375         std::vector<MatchedNumberInfo> posList = HandlePositiveRule(delMatch, filteredString);
376         if (posList.size() != 0) {
377             for (auto& matchInfo : posList) {
378                 ReplaceSpecifiedPos(filteredString, matchInfo.GetBegin(), matchInfo.GetEnd());
379             }
380             result.insert(result.end(), posList.begin(), posList.end());
381         }
382         delete match;
383     }
384     return result;
385 }
386 
387 // Remove duplicate results
DeleteRepeatedInfo(std::vector<MatchedNumberInfo> & list)388 std::vector<MatchedNumberInfo> PhoneNumberMatched::DeleteRepeatedInfo(std::vector<MatchedNumberInfo>& list)
389 {
390     std::set<MatchedNumberInfo> set;
391     std::vector<MatchedNumberInfo> ret;
392     for (auto info : list) {
393         if (set.find(info) == set.end()) {
394             ret.push_back(info);
395         }
396         set.insert(info);
397     }
398     return ret;
399 }
400 
401 // Process the case where the phone number starts with one bracket
DealNumberWithOneBracket(MatchedNumberInfo & info)402 void PhoneNumberMatched::DealNumberWithOneBracket(MatchedNumberInfo& info)
403 {
404     icu::UnicodeString message = info.GetContent();
405     if (IsNumberWithOneBracket(message)) {
406         info.SetBegin(info.GetBegin() + 1);
407         icu::UnicodeString content = info.GetContent().tempSubString(1);
408         info.SetContent(content);
409     }
410 }
411 
412 // check whether the bracket at the start position are redundant
IsNumberWithOneBracket(icu::UnicodeString & message)413 bool PhoneNumberMatched::IsNumberWithOneBracket(icu::UnicodeString& message)
414 {
415     if (message != "") {
416         int numLeft = 0;
417         int numRight = 0;
418         int len = message.length();
419         for (int i = 0; i < len; i++) {
420             if (message[i] == '(' || message[i] == '[') {
421                 numLeft++;
422             }
423             if (message[i] == ')' || message[i] == ']') {
424                 numRight++;
425             }
426         }
427         if (numLeft > numRight && (message[0] == '(' || message[0] == '[')) {
428             return true;
429         }
430     }
431     return false;
432 }
433 
DealResult(std::vector<MatchedNumberInfo> & matchedNumberInfoList)434 std::vector<int> PhoneNumberMatched::DealResult(std::vector<MatchedNumberInfo>& matchedNumberInfoList)
435 {
436     std::vector<int> result;
437     size_t length = matchedNumberInfoList.size();
438     if (length == 0) {
439         result.push_back(0);
440     } else {
441         size_t posNumber = 2;
442         size_t posStart = 1;
443         size_t posEnd = 2;
444         result.resize(posNumber * length + 1);
445         result[0] = static_cast<int>(length);
446         for (size_t i = 0; i < length; i++) {
447             result[posNumber * i + posStart] = matchedNumberInfoList[i].GetBegin();
448             result[posNumber * i + posEnd] = matchedNumberInfoList[i].GetEnd();
449         }
450     }
451     return result;
452 }
453 
454 // Filter result based on Border rule
HandleBorderRule(PhoneNumberMatch * match,icu::UnicodeString & message)455 bool PhoneNumberMatched::HandleBorderRule(PhoneNumberMatch* match, icu::UnicodeString& message)
456 {
457     if (match == nullptr) {
458         return false;
459     }
460     std::vector<BorderRule*> rules = GetBorderRulesInside();
461     if (rules.size() == 0) {
462         return true;
463     }
464     for (BorderRule* rule : rules) {
465         if (rule == nullptr) {
466             return false;
467         }
468         if (!rule->Handle(match, message)) {
469             return false;
470         }
471     }
472     return true;
473 }
474 
475 // Filter result based on Codes rule
HandleCodesRule(PhoneNumberMatch * phoneNumberMatch,icu::UnicodeString & message)476 PhoneNumberMatch* PhoneNumberMatched::HandleCodesRule(PhoneNumberMatch* phoneNumberMatch, icu::UnicodeString& message)
477 {
478     PhoneNumberMatch* match = phoneNumberMatch;
479     std::vector<CodeRule*> rules = GetCodesRulesInside();
480     if (rules.size() == 0) {
481         return nullptr;
482     }
483     for (CodeRule* rule : rules) {
484         if (rule == nullptr) {
485             continue;
486         }
487         match = rule->Handle(match, message);
488     }
489     return match;
490 }
491 
492 // Add phone numbers that meet the positive rule to the result
HandlePositiveRule(PhoneNumberMatch * match,icu::UnicodeString & message)493 std::vector<MatchedNumberInfo> PhoneNumberMatched::HandlePositiveRule(PhoneNumberMatch* match,
494     icu::UnicodeString& message)
495 {
496     std::vector<MatchedNumberInfo> infoList;
497     std::vector<PositiveRule*> rules = GetPositiveRulesInside();
498     for (PositiveRule* rule : rules) {
499         if (rule == nullptr) {
500             continue;
501         }
502         infoList = rule->Handle(match, message);
503         if (infoList.size() != 0) {
504             break;
505         }
506     }
507     return infoList;
508 }
509 
DealStringWithOneBracket(icu::UnicodeString & message)510 icu::UnicodeString PhoneNumberMatched::DealStringWithOneBracket(icu::UnicodeString& message)
511 {
512     if (IsNumberWithOneBracket(message)) {
513         return message.tempSubString(1);
514     }
515     return message;
516 }
517 } // namespace I18n
518 } // namespace Global
519 } // namespace OHOS