1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <climits>
17 #include <set>
18 #include "i18n_hilog.h"
19 #include "regex_rule.h"
20 #include "phone_number_matched.h"
21 #include "utils.h"
22
23 namespace OHOS {
24 namespace Global {
25 namespace I18n {
26 using i18n::phonenumbers::PhoneNumber;
27
28 const int PhoneNumberMatched::CONTAIN = 9;
29 const int PhoneNumberMatched::CONTAIN_OR_INTERSECT = 8;
30 const UChar32 PhoneNumberMatched::REPLACE_CHAR = 'A';
31
PhoneNumberMatched(std::string & country)32 PhoneNumberMatched::PhoneNumberMatched(std::string& country)
33 {
34 phoneNumberRule = new PhoneNumberRule(country);
35 phoneNumberUtil = PhoneNumberUtil::GetInstance();
36 shortNumberInfo = new ShortNumberInfo();
37 this->country = country;
38 if (phoneNumberRule != nullptr) {
39 phoneNumberRule->Init();
40 }
41 }
42
~PhoneNumberMatched()43 PhoneNumberMatched::~PhoneNumberMatched()
44 {
45 delete phoneNumberRule;
46 delete shortNumberInfo;
47 }
48
GetMatchedPhoneNumber(icu::UnicodeString & message)49 std::vector<int> PhoneNumberMatched::GetMatchedPhoneNumber(icu::UnicodeString& message)
50 {
51 icu::UnicodeString messageStr = message;
52 if (phoneNumberRule != nullptr && !phoneNumberRule->isFixed) {
53 return DealWithoutFixed(messageStr, country);
54 }
55 icu::UnicodeString filteredString = HandleNegativeRule(messageStr);
56 std::vector<MatchedNumberInfo> matchedNumberInfoList = GetPossibleNumberInfos(country,
57 messageStr, filteredString);
58 std::vector<MatchedNumberInfo> shortList = FindShortNumbers(country, filteredString);
59 if (shortList.size() != 0) {
60 matchedNumberInfoList.insert(matchedNumberInfoList.end(), shortList.begin(), shortList.end());
61 }
62 matchedNumberInfoList = DeleteRepeatedInfo(matchedNumberInfoList);
63 for (auto& matchedNumberInfo : matchedNumberInfoList) {
64 DealNumberWithOneBracket(matchedNumberInfo);
65 }
66 return DealResult(matchedNumberInfoList);
67 }
68
DealWithoutFixed(icu::UnicodeString & message,std::string & country)69 std::vector<int> PhoneNumberMatched::DealWithoutFixed(icu::UnicodeString& message, std::string& country)
70 {
71 std::vector<PhoneNumberMatch*> matchList = FindNumbers(country, message);
72 std::vector<MatchedNumberInfo> result;
73 for (auto& match : matchList) {
74 if (match == nullptr) {
75 continue;
76 }
77 MatchedNumberInfo info;
78 icu::UnicodeString content = match->raw_string().c_str();
79 PhoneNumber phoneNumber = match->number();
80 if (phoneNumberUtil->IsValidNumber(phoneNumber)) {
81 info.SetBegin(match->start());
82 info.SetEnd(match->end());
83 info.SetContent(content);
84 result.push_back(info);
85 }
86 delete match;
87 }
88 std::vector<MatchedNumberInfo> shortResult = FindShortNumbers(country, message);
89 // Merge result
90 if (shortResult.size() != 0) {
91 result.insert(result.end(), shortResult.begin(), shortResult.end());
92 }
93 result = DeleteRepeatedInfo(result);
94 for (auto& res: result) {
95 DealNumberWithOneBracket(res);
96 }
97 return DealResult(result);
98 }
99
100 // Filtering text using negative rules
HandleNegativeRule(icu::UnicodeString & src)101 icu::UnicodeString PhoneNumberMatched::HandleNegativeRule(icu::UnicodeString& src)
102 {
103 std::vector<NegativeRule*> rules = phoneNumberRule->GetNegativeRules();
104 icu::UnicodeString ret = src;
105 for (NegativeRule* rule : rules) {
106 if (rule == nullptr) {
107 continue;
108 }
109 ret = rule->Handle(ret);
110 }
111 return ret;
112 }
113
114 // Replace the characters in the specified area with REPLACE_CHAR
ReplaceSpecifiedPos(icu::UnicodeString & chs,int start,int end)115 void PhoneNumberMatched::ReplaceSpecifiedPos(icu::UnicodeString& chs, int start, int end)
116 {
117 if (start >= end) {
118 return;
119 }
120 int len = chs.length();
121 for (int i = 0; i < len; i++) {
122 if (i >= start && i < end) {
123 chs.replace(i, 1, PhoneNumberMatched::REPLACE_CHAR);
124 }
125 }
126 }
127
128 // Short numbers identification
FindShortNumbers(std::string & country,icu::UnicodeString & message)129 std::vector<MatchedNumberInfo> PhoneNumberMatched::FindShortNumbers(std::string& country,
130 icu::UnicodeString& message)
131 {
132 std::vector<MatchedNumberInfo> matchedNumberInfoList;
133 UErrorCode status = U_ZERO_ERROR;
134 size_t pos = GetFindRulesInside().size();
135 if (pos == 0) {
136 HILOG_ERROR_I18N("PhoneNumberRule.findRules is empty.");
137 return matchedNumberInfoList;
138 }
139 // 1 indicates the last position.
140 FindRule* shortRegexRule = GetFindRulesInside()[pos - 1];
141 icu::RegexPattern* shortPattern = shortRegexRule->GetPattern();
142 if (shortPattern == nullptr) {
143 HILOG_ERROR_I18N("shortPattern getPattern failed.");
144 return matchedNumberInfoList;
145 }
146 icu::RegexMatcher* shortMatch = shortPattern->matcher(message, status);
147 if (U_FAILURE(status) || shortMatch == nullptr) {
148 delete shortPattern;
149 HILOG_ERROR_I18N("shortPattern matcher failed.");
150 return matchedNumberInfoList;
151 }
152 while (shortMatch->find(status)) {
153 icu::UnicodeString numberToParse = shortMatch->group(status);
154 std::string stringParse;
155 numberToParse.toUTF8String(stringParse);
156 PhoneNumber phoneNumber;
157 PhoneNumberUtil::ErrorType errorType =
158 phoneNumberUtil->ParseAndKeepRawInput(stringParse, country, &phoneNumber);
159 if (errorType != PhoneNumberUtil::NO_PARSING_ERROR) {
160 HILOG_ERROR_I18N("PhoneNumberRule: failed to call the ParseAndKeepRawInput.");
161 continue;
162 }
163 // Add the valid short number to the result
164 if (shortNumberInfo != nullptr &&
165 shortNumberInfo->IsPossibleShortNumberForRegion(phoneNumber, country)) {
166 MatchedNumberInfo matcher;
167 matcher.SetBegin(shortMatch->start(status));
168 matcher.SetEnd(shortMatch->end(status));
169 icu::UnicodeString stringShort = shortMatch->group(status);
170 matcher.SetContent(stringShort);
171 matchedNumberInfoList.push_back(matcher);
172 }
173 }
174 delete shortMatch;
175 delete shortPattern;
176 return matchedNumberInfoList;
177 }
178
GetFindRulesInside()179 std::vector<FindRule*> PhoneNumberMatched::GetFindRulesInside()
180 {
181 if (phoneNumberRule != nullptr) {
182 return phoneNumberRule->GetFindRules();
183 }
184 return {};
185 }
186
GetBorderRulesInside()187 std::vector<BorderRule*> PhoneNumberMatched::GetBorderRulesInside()
188 {
189 if (phoneNumberRule != nullptr) {
190 return phoneNumberRule->GetBorderRules();
191 }
192 return {};
193 }
194
GetCodesRulesInside()195 std::vector<CodeRule*> PhoneNumberMatched::GetCodesRulesInside()
196 {
197 if (phoneNumberRule != nullptr) {
198 return phoneNumberRule->GetCodesRules();
199 }
200 return {};
201 }
202
GetPositiveRulesInside()203 std::vector<PositiveRule*> PhoneNumberMatched::GetPositiveRulesInside()
204 {
205 if (phoneNumberRule != nullptr) {
206 return phoneNumberRule->GetPositiveRules();
207 }
208 return {};
209 }
210
211 // Add the phone number that may be correct, and return true if successful
AddPhoneNumber(std::string & number,int start,std::vector<PhoneNumberMatch * > & matchList,std::string & country)212 bool PhoneNumberMatched::AddPhoneNumber(std::string& number, int start, std::vector<PhoneNumberMatch*>& matchList,
213 std::string& country)
214 {
215 PhoneNumber phoneNumber;
216 int lenNumber = 5;
217 icu::UnicodeString uNumber = number.c_str();
218 if (RegexRule::CountDigits(uNumber) < lenNumber) {
219 return false;
220 }
221 PhoneNumberUtil::ErrorType parseStatus = phoneNumberUtil->Parse(number, country, &phoneNumber);
222 if (parseStatus != PhoneNumberUtil::NO_PARSING_ERROR) {
223 return false;
224 }
225 UChar32 space = ' ';
226 UChar32 slash = '/';
227 // Add to matchList if phone number is not delimited, or valid
228 if ((uNumber.indexOf(space) == -1 && uNumber.indexOf(slash) == -1) ||
229 phoneNumberUtil->IsValidNumber(phoneNumber)) {
230 PhoneNumberMatch* match = new PhoneNumberMatch(start, number, phoneNumber);
231 matchList.push_back(match);
232 return true;
233 }
234 return false;
235 }
236
237 // Add the valid phone number
FindNumbers(std::string & country,icu::UnicodeString & filteredString)238 std::vector<PhoneNumberMatch*> PhoneNumberMatched::FindNumbers(std::string& country,
239 icu::UnicodeString& filteredString)
240 {
241 std::vector<PhoneNumberMatch*> matchList;
242 size_t pos = GetFindRulesInside().size();
243 if (pos < 2) { // 2 indicates the penultimate position.
244 HILOG_ERROR_I18N("numberRegexRule size less then 2.");
245 return matchList;
246 }
247 // 2 indicates the penultimate position.
248 FindRule* numberRegexRule = GetFindRulesInside()[pos - 2];
249 icu::RegexPattern* numberPattern = numberRegexRule->GetPattern();
250 if (numberPattern == nullptr) {
251 HILOG_ERROR_I18N("numberRegexRule getPattern failed.");
252 return matchList;
253 }
254 UErrorCode status = U_ZERO_ERROR;
255 icu::RegexMatcher* numberMatcher = numberPattern->matcher(filteredString, status);
256 if (numberMatcher == nullptr) {
257 HILOG_ERROR_I18N("numberPattern matcher failed.");
258 delete numberPattern;
259 return matchList;
260 }
261 FindNumbersFromInputString(matchList, numberMatcher, country, filteredString);
262 delete numberMatcher;
263 delete numberPattern;
264 return matchList;
265 }
266
FindNumbersFromInputString(std::vector<PhoneNumberMatch * > & matchList,icu::RegexMatcher * numberMatcher,std::string & country,icu::UnicodeString & filteredString)267 void PhoneNumberMatched::FindNumbersFromInputString(std::vector<PhoneNumberMatch*> &matchList,
268 icu::RegexMatcher* numberMatcher, std::string& country, icu::UnicodeString& filteredString)
269 {
270 UErrorCode status = U_ZERO_ERROR;
271 while (numberMatcher->find(status)) {
272 int32_t start = numberMatcher->start(status);
273 int32_t end = numberMatcher->end(status);
274 icu::UnicodeString uNumber = filteredString.tempSubString(start, end - start);
275 std::string number;
276 uNumber.toUTF8String(number);
277 // if the entire phone number is invalid, identify each segment that is separated
278 if (!AddPhoneNumber(number, start, matchList, country)) {
279 int searchStart = 0;
280 UChar32 space = ' ';
281 UChar32 slash = '/';
282 if (uNumber.indexOf(space, searchStart) == -1 && uNumber.indexOf(slash, searchStart) == -1) {
283 continue;
284 }
285 while (uNumber.indexOf(space, searchStart) != -1 || uNumber.indexOf(slash, searchStart) != -1) {
286 int phoneStart = searchStart;
287 int indexSpace = uNumber.indexOf(space, searchStart);
288 int indexSlash = uNumber.indexOf(slash, searchStart);
289 int phoneEnd =
290 (indexSpace == -1 || (indexSlash != -1 && indexSlash < indexSpace)) ? indexSlash : indexSpace;
291 searchStart = phoneEnd + 1;
292 std::string tempNumber = number.substr(phoneStart, phoneEnd - phoneStart);
293 AddPhoneNumber(tempNumber, phoneStart + start, matchList, country);
294 }
295 // identify the last segment
296 std::string lastStr = number.substr(searchStart);
297 AddPhoneNumber(lastStr, searchStart + start, matchList, country);
298 }
299 }
300 }
301
302 // Handing the situation of shortnumber/shortnumber.
HandleWithShortAndShort(std::vector<MatchedNumberInfo> & result,std::string & country,MatchedNumberInfo & info,std::pair<int,int> & pos,icu::UnicodeString & filteredString)303 bool PhoneNumberMatched::HandleWithShortAndShort(std::vector<MatchedNumberInfo>& result, std::string& country,
304 MatchedNumberInfo& info, std::pair<int, int>& pos, icu::UnicodeString& filteredString)
305 {
306 bool flag = false;
307 // 3 is the length of findRules.
308 size_t length = 3;
309 if (GetFindRulesInside().size() == length) {
310 FindRule* regexRule = GetFindRulesInside()[0];
311 if (regexRule == nullptr) {
312 HILOG_ERROR_I18N("PhoneNumberMatched::HandleWithShortAndShort: Get FindRule failed.");
313 return false;
314 }
315 icu::RegexPattern* pattern = regexRule->GetPattern();
316 if (pattern == nullptr) {
317 HILOG_ERROR_I18N("regexRule getPattern failed.");
318 return flag;
319 }
320 icu::UnicodeString str = "";
321 if (info.GetContent()[0] == '(' || info.GetContent()[0] == '[') {
322 str = info.GetContent().tempSubString(1);
323 } else {
324 str = info.GetContent();
325 }
326 UErrorCode status = U_ZERO_ERROR;
327 icu::RegexMatcher* matcher = pattern->matcher(str, status);
328 if (U_FAILURE(status) || matcher == nullptr) {
329 delete pattern;
330 HILOG_ERROR_I18N("pattern matcher failed.");
331 return flag;
332 }
333 if (!matcher->find(status) && U_SUCCESS(status)) {
334 result.push_back(info);
335 ReplaceSpecifiedPos(filteredString, pos.first, pos.second);
336 flag = true;
337 }
338 delete matcher;
339 delete pattern;
340 } else {
341 result.push_back(info);
342 ReplaceSpecifiedPos(filteredString, pos.first, pos.second);
343 }
344 return flag;
345 }
346
347 // Get possible phone number
GetPossibleNumberInfos(std::string & country,icu::UnicodeString & src,icu::UnicodeString & filteredString)348 std::vector<MatchedNumberInfo> PhoneNumberMatched::GetPossibleNumberInfos(std::string& country,
349 icu::UnicodeString& src, icu::UnicodeString& filteredString)
350 {
351 std::vector<MatchedNumberInfo> result;
352 std::vector<PhoneNumberMatch*> matchList = FindNumbers(country, filteredString);
353 for (auto match : matchList) {
354 if (!HandleBorderRule(match, filteredString)) {
355 continue;
356 }
357 PhoneNumberMatch* delMatch = HandleCodesRule(match, src);
358 if (delMatch == nullptr) {
359 continue;
360 }
361 icu::UnicodeString content = delMatch->raw_string().c_str();
362 int contentStart = delMatch->start();
363 int contentEnd = delMatch->end();
364 std::pair<int, int> pos{contentStart, contentEnd};
365 if (phoneNumberUtil->IsValidNumber(delMatch->number())) {
366 MatchedNumberInfo info;
367 info.SetBegin(contentStart);
368 info.SetEnd(contentEnd);
369 info.SetContent(content);
370 bool flag = HandleWithShortAndShort(result, country, info, pos, filteredString);
371 if (flag) {
372 continue;
373 }
374 }
375 std::vector<MatchedNumberInfo> posList = HandlePositiveRule(delMatch, filteredString);
376 if (posList.size() != 0) {
377 for (auto& matchInfo : posList) {
378 ReplaceSpecifiedPos(filteredString, matchInfo.GetBegin(), matchInfo.GetEnd());
379 }
380 result.insert(result.end(), posList.begin(), posList.end());
381 }
382 delete match;
383 }
384 return result;
385 }
386
387 // Remove duplicate results
DeleteRepeatedInfo(std::vector<MatchedNumberInfo> & list)388 std::vector<MatchedNumberInfo> PhoneNumberMatched::DeleteRepeatedInfo(std::vector<MatchedNumberInfo>& list)
389 {
390 std::set<MatchedNumberInfo> set;
391 std::vector<MatchedNumberInfo> ret;
392 for (auto info : list) {
393 if (set.find(info) == set.end()) {
394 ret.push_back(info);
395 }
396 set.insert(info);
397 }
398 return ret;
399 }
400
401 // Process the case where the phone number starts with one bracket
DealNumberWithOneBracket(MatchedNumberInfo & info)402 void PhoneNumberMatched::DealNumberWithOneBracket(MatchedNumberInfo& info)
403 {
404 icu::UnicodeString message = info.GetContent();
405 if (IsNumberWithOneBracket(message)) {
406 info.SetBegin(info.GetBegin() + 1);
407 icu::UnicodeString content = info.GetContent().tempSubString(1);
408 info.SetContent(content);
409 }
410 }
411
412 // check whether the bracket at the start position are redundant
IsNumberWithOneBracket(icu::UnicodeString & message)413 bool PhoneNumberMatched::IsNumberWithOneBracket(icu::UnicodeString& message)
414 {
415 if (message != "") {
416 int numLeft = 0;
417 int numRight = 0;
418 int len = message.length();
419 for (int i = 0; i < len; i++) {
420 if (message[i] == '(' || message[i] == '[') {
421 numLeft++;
422 }
423 if (message[i] == ')' || message[i] == ']') {
424 numRight++;
425 }
426 }
427 if (numLeft > numRight && (message[0] == '(' || message[0] == '[')) {
428 return true;
429 }
430 }
431 return false;
432 }
433
DealResult(std::vector<MatchedNumberInfo> & matchedNumberInfoList)434 std::vector<int> PhoneNumberMatched::DealResult(std::vector<MatchedNumberInfo>& matchedNumberInfoList)
435 {
436 std::vector<int> result;
437 size_t length = matchedNumberInfoList.size();
438 if (length == 0) {
439 result.push_back(0);
440 } else {
441 size_t posNumber = 2;
442 size_t posStart = 1;
443 size_t posEnd = 2;
444 result.resize(posNumber * length + 1);
445 result[0] = static_cast<int>(length);
446 for (size_t i = 0; i < length; i++) {
447 result[posNumber * i + posStart] = matchedNumberInfoList[i].GetBegin();
448 result[posNumber * i + posEnd] = matchedNumberInfoList[i].GetEnd();
449 }
450 }
451 return result;
452 }
453
454 // Filter result based on Border rule
HandleBorderRule(PhoneNumberMatch * match,icu::UnicodeString & message)455 bool PhoneNumberMatched::HandleBorderRule(PhoneNumberMatch* match, icu::UnicodeString& message)
456 {
457 if (match == nullptr) {
458 return false;
459 }
460 std::vector<BorderRule*> rules = GetBorderRulesInside();
461 if (rules.size() == 0) {
462 return true;
463 }
464 for (BorderRule* rule : rules) {
465 if (rule == nullptr) {
466 return false;
467 }
468 if (!rule->Handle(match, message)) {
469 return false;
470 }
471 }
472 return true;
473 }
474
475 // Filter result based on Codes rule
HandleCodesRule(PhoneNumberMatch * phoneNumberMatch,icu::UnicodeString & message)476 PhoneNumberMatch* PhoneNumberMatched::HandleCodesRule(PhoneNumberMatch* phoneNumberMatch, icu::UnicodeString& message)
477 {
478 PhoneNumberMatch* match = phoneNumberMatch;
479 std::vector<CodeRule*> rules = GetCodesRulesInside();
480 if (rules.size() == 0) {
481 return nullptr;
482 }
483 for (CodeRule* rule : rules) {
484 if (rule == nullptr) {
485 continue;
486 }
487 match = rule->Handle(match, message);
488 }
489 return match;
490 }
491
492 // Add phone numbers that meet the positive rule to the result
HandlePositiveRule(PhoneNumberMatch * match,icu::UnicodeString & message)493 std::vector<MatchedNumberInfo> PhoneNumberMatched::HandlePositiveRule(PhoneNumberMatch* match,
494 icu::UnicodeString& message)
495 {
496 std::vector<MatchedNumberInfo> infoList;
497 std::vector<PositiveRule*> rules = GetPositiveRulesInside();
498 for (PositiveRule* rule : rules) {
499 if (rule == nullptr) {
500 continue;
501 }
502 infoList = rule->Handle(match, message);
503 if (infoList.size() != 0) {
504 break;
505 }
506 }
507 return infoList;
508 }
509
DealStringWithOneBracket(icu::UnicodeString & message)510 icu::UnicodeString PhoneNumberMatched::DealStringWithOneBracket(icu::UnicodeString& message)
511 {
512 if (IsNumberWithOneBracket(message)) {
513 return message.tempSubString(1);
514 }
515 return message;
516 }
517 } // namespace I18n
518 } // namespace Global
519 } // namespace OHOS