1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <climits>
17 #include <set>
18 #include "i18n_hilog.h"
19 #include "regex_rule.h"
20 #include "phone_number_matched.h"
21 #include "utils.h"
22
23 namespace OHOS {
24 namespace Global {
25 namespace I18n {
26 using i18n::phonenumbers::PhoneNumber;
27
28 const int PhoneNumberMatched::CONTAIN = 9;
29 const int PhoneNumberMatched::CONTAIN_OR_INTERSECT = 8;
30 const UChar32 PhoneNumberMatched::REPLACE_CHAR = 'A';
31
PhoneNumberMatched(std::string & country)32 PhoneNumberMatched::PhoneNumberMatched(std::string& country)
33 {
34 phoneNumberRule = new PhoneNumberRule(country);
35 phoneNumberUtil = PhoneNumberUtil::GetInstance();
36 shortNumberInfo = new ShortNumberInfo();
37 this->country = country;
38 if (phoneNumberRule != nullptr) {
39 phoneNumberRule->Init();
40 }
41 }
42
~PhoneNumberMatched()43 PhoneNumberMatched::~PhoneNumberMatched()
44 {
45 delete phoneNumberRule;
46 delete shortNumberInfo;
47 }
48
GetMatchedPhoneNumber(icu::UnicodeString & message)49 std::vector<int> PhoneNumberMatched::GetMatchedPhoneNumber(icu::UnicodeString& message)
50 {
51 icu::UnicodeString messageStr = message;
52 if (phoneNumberRule != nullptr && !phoneNumberRule->isFixed) {
53 return DealWithoutFixed(messageStr, country);
54 }
55 icu::UnicodeString filteredString = HandleNegativeRule(messageStr);
56 std::vector<MatchedNumberInfo> matchedNumberInfoList = GetPossibleNumberInfos(country,
57 messageStr, filteredString);
58 std::vector<MatchedNumberInfo> shortList = FindShortNumbers(country, filteredString);
59 if (shortList.size() != 0) {
60 matchedNumberInfoList.insert(matchedNumberInfoList.end(), shortList.begin(), shortList.end());
61 }
62 matchedNumberInfoList = DeleteRepeatedInfo(matchedNumberInfoList);
63 for (auto& matchedNumberInfo : matchedNumberInfoList) {
64 DealNumberWithOneBracket(matchedNumberInfo);
65 }
66 return DealResult(matchedNumberInfoList);
67 }
68
DealWithoutFixed(icu::UnicodeString & message,std::string & country)69 std::vector<int> PhoneNumberMatched::DealWithoutFixed(icu::UnicodeString& message, std::string& country)
70 {
71 std::vector<PhoneNumberMatch*> matchList = FindNumbers(country, message);
72 std::vector<MatchedNumberInfo> result;
73 for (auto& match : matchList) {
74 if (match == nullptr) continue;
75 MatchedNumberInfo info;
76 icu::UnicodeString content = match->raw_string().c_str();
77 PhoneNumber phoneNumber = match->number();
78 if (phoneNumberUtil->IsValidNumber(phoneNumber)) {
79 info.SetBegin(match->start());
80 info.SetEnd(match->end());
81 info.SetContent(content);
82 result.push_back(info);
83 }
84 delete match;
85 }
86 std::vector<MatchedNumberInfo> shortResult = FindShortNumbers(country, message);
87 // Merge result
88 if (shortResult.size() != 0) {
89 result.insert(result.end(), shortResult.begin(), shortResult.end());
90 }
91 result = DeleteRepeatedInfo(result);
92 for (auto& res: result) {
93 DealNumberWithOneBracket(res);
94 }
95 return DealResult(result);
96 }
97
98 // Filtering text using negative rules
HandleNegativeRule(icu::UnicodeString & src)99 icu::UnicodeString PhoneNumberMatched::HandleNegativeRule(icu::UnicodeString& src)
100 {
101 std::vector<NegativeRule*> rules = phoneNumberRule->GetNegativeRules();
102 icu::UnicodeString ret = src;
103 for (NegativeRule* rule : rules) {
104 if (rule == nullptr) {
105 continue;
106 }
107 ret = rule->Handle(ret);
108 }
109 return ret;
110 }
111
112 // Replace the characters in the specified area with REPLACE_CHAR
ReplaceSpecifiedPos(icu::UnicodeString & chs,int start,int end)113 void PhoneNumberMatched::ReplaceSpecifiedPos(icu::UnicodeString& chs, int start, int end)
114 {
115 if (start < end) {
116 int len = chs.length();
117 for (int i = 0; i < len; i++) {
118 if (i >= start && i < end) {
119 chs.replace(i, 1, PhoneNumberMatched::REPLACE_CHAR);
120 }
121 }
122 }
123 }
124
125 // Short numbers identification
FindShortNumbers(std::string & country,icu::UnicodeString & message)126 std::vector<MatchedNumberInfo> PhoneNumberMatched::FindShortNumbers(std::string& country,
127 icu::UnicodeString& message)
128 {
129 std::vector<MatchedNumberInfo> matchedNumberInfoList;
130 UErrorCode status = U_ZERO_ERROR;
131 size_t pos = GetFindRulesInside().size();
132 if (pos == 0) {
133 HILOG_ERROR_I18N("PhoneNumberRule.findRules is empty.");
134 return matchedNumberInfoList;
135 }
136 // 1 indicates the last position.
137 FindRule* shortRegexRule = GetFindRulesInside()[pos - 1];
138 icu::RegexPattern* shortPattern = shortRegexRule->GetPattern();
139 if (shortPattern == nullptr) {
140 HILOG_ERROR_I18N("shortPattern getPattern failed.");
141 return matchedNumberInfoList;
142 }
143 icu::RegexMatcher* shortMatch = shortPattern->matcher(message, status);
144 if (shortMatch == nullptr) {
145 delete shortPattern;
146 HILOG_ERROR_I18N("shortPattern matcher failed.");
147 return matchedNumberInfoList;
148 }
149 while (shortMatch->find(status)) {
150 icu::UnicodeString numberToParse = shortMatch->group(status);
151 std::string stringParse;
152 numberToParse.toUTF8String(stringParse);
153 PhoneNumber phoneNumber;
154 PhoneNumberUtil::ErrorType errorType =
155 phoneNumberUtil->ParseAndKeepRawInput(stringParse, country, &phoneNumber);
156 if (errorType != PhoneNumberUtil::NO_PARSING_ERROR) {
157 HILOG_ERROR_I18N("PhoneNumberRule: failed to call the ParseAndKeepRawInput.");
158 continue;
159 }
160 // Add the valid short number to the result
161 if (shortNumberInfo != nullptr &&
162 shortNumberInfo->IsPossibleShortNumberForRegion(phoneNumber, country)) {
163 MatchedNumberInfo matcher;
164 matcher.SetBegin(shortMatch->start(status));
165 matcher.SetEnd(shortMatch->end(status));
166 icu::UnicodeString stringShort = shortMatch->group(status);
167 matcher.SetContent(stringShort);
168 matchedNumberInfoList.push_back(matcher);
169 }
170 }
171 delete shortMatch;
172 delete shortPattern;
173 return matchedNumberInfoList;
174 }
175
GetFindRulesInside()176 std::vector<FindRule*> PhoneNumberMatched::GetFindRulesInside()
177 {
178 if (phoneNumberRule != nullptr) {
179 return phoneNumberRule->GetFindRules();
180 }
181 return {};
182 }
183
GetBorderRulesInside()184 std::vector<BorderRule*> PhoneNumberMatched::GetBorderRulesInside()
185 {
186 if (phoneNumberRule != nullptr) {
187 return phoneNumberRule->GetBorderRules();
188 }
189 return {};
190 }
191
GetCodesRulesInside()192 std::vector<CodeRule*> PhoneNumberMatched::GetCodesRulesInside()
193 {
194 if (phoneNumberRule != nullptr) {
195 return phoneNumberRule->GetCodesRules();
196 }
197 return {};
198 }
199
GetPositiveRulesInside()200 std::vector<PositiveRule*> PhoneNumberMatched::GetPositiveRulesInside()
201 {
202 if (phoneNumberRule != nullptr) {
203 return phoneNumberRule->GetPositiveRules();
204 }
205 return {};
206 }
207
208 // Add the phone number that may be correct, and return true if successful
AddPhoneNumber(std::string & number,int start,std::vector<PhoneNumberMatch * > & matchList,std::string & country)209 bool PhoneNumberMatched::AddPhoneNumber(std::string& number, int start, std::vector<PhoneNumberMatch*>& matchList,
210 std::string& country)
211 {
212 PhoneNumber phoneNumber;
213 int lenNumber = 5;
214 icu::UnicodeString uNumber = number.c_str();
215 if (RegexRule::CountDigits(uNumber) < lenNumber) {
216 return false;
217 }
218 PhoneNumberUtil::ErrorType parseStatus = phoneNumberUtil->Parse(number, country, &phoneNumber);
219 if (parseStatus != PhoneNumberUtil::NO_PARSING_ERROR) {
220 return false;
221 }
222 UChar32 space = ' ';
223 UChar32 slash = '/';
224 // Add to matchList if phone number is not delimited, or valid
225 if ((uNumber.indexOf(space) == -1 && uNumber.indexOf(slash) == -1) ||
226 phoneNumberUtil->IsValidNumber(phoneNumber)) {
227 PhoneNumberMatch* match = new PhoneNumberMatch(start, number, phoneNumber);
228 matchList.push_back(match);
229 return true;
230 }
231 return false;
232 }
233
234 // Add the valid phone number
FindNumbers(std::string & country,icu::UnicodeString & filteredString)235 std::vector<PhoneNumberMatch*> PhoneNumberMatched::FindNumbers(std::string& country,
236 icu::UnicodeString& filteredString)
237 {
238 std::vector<PhoneNumberMatch*> matchList;
239 size_t pos = GetFindRulesInside().size();
240 if (pos < 2) { // 2 indicates the penultimate position.
241 HILOG_ERROR_I18N("numberRegexRule size less then 2.");
242 return matchList;
243 }
244 // 2 indicates the penultimate position.
245 FindRule* numberRegexRule = GetFindRulesInside()[pos - 2];
246 icu::RegexPattern* numberPattern = numberRegexRule->GetPattern();
247 if (numberPattern == nullptr) {
248 HILOG_ERROR_I18N("numberRegexRule getPattern failed.");
249 return matchList;
250 }
251 UErrorCode status = U_ZERO_ERROR;
252 icu::RegexMatcher* numberMatcher = numberPattern->matcher(filteredString, status);
253 if (numberMatcher == nullptr) {
254 HILOG_ERROR_I18N("numberPattern matcher failed.");
255 delete numberPattern;
256 return matchList;
257 }
258 FindNumbersFromInputString(matchList, numberMatcher, country, filteredString);
259 delete numberMatcher;
260 delete numberPattern;
261 return matchList;
262 }
263
FindNumbersFromInputString(std::vector<PhoneNumberMatch * > & matchList,icu::RegexMatcher * numberMatcher,std::string & country,icu::UnicodeString & filteredString)264 void PhoneNumberMatched::FindNumbersFromInputString(std::vector<PhoneNumberMatch*> &matchList,
265 icu::RegexMatcher* numberMatcher, std::string& country, icu::UnicodeString& filteredString)
266 {
267 UErrorCode status = U_ZERO_ERROR;
268 while (numberMatcher->find(status)) {
269 int32_t start = numberMatcher->start(status);
270 int32_t end = numberMatcher->end(status);
271 icu::UnicodeString uNumber = filteredString.tempSubString(start, end - start);
272 std::string number;
273 uNumber.toUTF8String(number);
274 // if the entire phone number is invalid, identify each segment that is separated
275 if (!AddPhoneNumber(number, start, matchList, country)) {
276 int searchStart = 0;
277 UChar32 space = ' ';
278 UChar32 slash = '/';
279 if (uNumber.indexOf(space, searchStart) == -1 && uNumber.indexOf(slash, searchStart) == -1) {
280 continue;
281 }
282 while (uNumber.indexOf(space, searchStart) != -1 || uNumber.indexOf(slash, searchStart) != -1) {
283 int phoneStart = searchStart;
284 int indexSpace = uNumber.indexOf(space, searchStart);
285 int indexSlash = uNumber.indexOf(slash, searchStart);
286 int phoneEnd =
287 (indexSpace == -1 || (indexSlash != -1 && indexSlash < indexSpace)) ? indexSlash : indexSpace;
288 searchStart = phoneEnd + 1;
289 std::string tempNumber = number.substr(phoneStart, phoneEnd - phoneStart);
290 AddPhoneNumber(tempNumber, phoneStart + start, matchList, country);
291 }
292 // identify the last segment
293 std::string lastStr = number.substr(searchStart);
294 AddPhoneNumber(lastStr, searchStart + start, matchList, country);
295 }
296 }
297 }
298
299 // Handing the situation of shortnumber/shortnumber.
HandleWithShortAndShort(std::vector<MatchedNumberInfo> & result,std::string & country,MatchedNumberInfo & info,std::pair<int,int> & pos,icu::UnicodeString & filteredString)300 bool PhoneNumberMatched::HandleWithShortAndShort(std::vector<MatchedNumberInfo>& result, std::string& country,
301 MatchedNumberInfo& info, std::pair<int, int>& pos, icu::UnicodeString& filteredString)
302 {
303 bool flag = false;
304 // 3 is the length of findRules.
305 size_t length = 3;
306 if (GetFindRulesInside().size() == length) {
307 UErrorCode status = U_ZERO_ERROR;
308 FindRule* regexRule = GetFindRulesInside()[0];
309 icu::RegexPattern* pattern = regexRule->GetPattern();
310 if (pattern == nullptr) {
311 HILOG_ERROR_I18N("regexRule getPattern failed.");
312 return flag;
313 }
314 icu::UnicodeString str = "";
315 if (info.GetContent()[0] == '(' || info.GetContent()[0] == '[') {
316 str = info.GetContent().tempSubString(1);
317 } else {
318 str = info.GetContent();
319 }
320 icu::RegexMatcher* matcher = pattern->matcher(str, status);
321 if (matcher == nullptr) {
322 delete pattern;
323 HILOG_ERROR_I18N("pattern matcher failed.");
324 return flag;
325 }
326 if (!matcher->find(status)) {
327 result.push_back(info);
328 ReplaceSpecifiedPos(filteredString, pos.first, pos.second);
329 flag = true;
330 }
331 delete matcher;
332 delete pattern;
333 } else {
334 result.push_back(info);
335 ReplaceSpecifiedPos(filteredString, pos.first, pos.second);
336 }
337 return flag;
338 }
339
340 // Get possible phone number
GetPossibleNumberInfos(std::string & country,icu::UnicodeString & src,icu::UnicodeString & filteredString)341 std::vector<MatchedNumberInfo> PhoneNumberMatched::GetPossibleNumberInfos(std::string& country,
342 icu::UnicodeString& src, icu::UnicodeString& filteredString)
343 {
344 std::vector<MatchedNumberInfo> result;
345 std::vector<PhoneNumberMatch*> matchList = FindNumbers(country, filteredString);
346 for (auto match : matchList) {
347 if (!HandleBorderRule(match, filteredString)) {
348 continue;
349 }
350 PhoneNumberMatch* delMatch = HandleCodesRule(match, src);
351 if (delMatch == nullptr) {
352 continue;
353 }
354 icu::UnicodeString content = delMatch->raw_string().c_str();
355 int contentStart = delMatch->start();
356 int contentEnd = delMatch->end();
357 std::pair<int, int> pos{contentStart, contentEnd};
358 if (phoneNumberUtil->IsValidNumber(delMatch->number())) {
359 MatchedNumberInfo info;
360 info.SetBegin(contentStart);
361 info.SetEnd(contentEnd);
362 info.SetContent(content);
363 bool flag = HandleWithShortAndShort(result, country, info, pos, filteredString);
364 if (flag) {
365 continue;
366 }
367 }
368 std::vector<MatchedNumberInfo> posList = HandlePositiveRule(delMatch, filteredString);
369 if (posList.size() != 0) {
370 for (auto& matchInfo : posList) {
371 ReplaceSpecifiedPos(filteredString, matchInfo.GetBegin(), matchInfo.GetEnd());
372 }
373 result.insert(result.end(), posList.begin(), posList.end());
374 }
375 delete match;
376 }
377 return result;
378 }
379
380 // Remove duplicate results
DeleteRepeatedInfo(std::vector<MatchedNumberInfo> & list)381 std::vector<MatchedNumberInfo> PhoneNumberMatched::DeleteRepeatedInfo(std::vector<MatchedNumberInfo>& list)
382 {
383 std::set<MatchedNumberInfo> set;
384 std::vector<MatchedNumberInfo> ret;
385 for (auto info : list) {
386 if (set.find(info) == set.end()) {
387 ret.push_back(info);
388 }
389 set.insert(info);
390 }
391 return ret;
392 }
393
394 // Process the case where the phone number starts with one bracket
DealNumberWithOneBracket(MatchedNumberInfo & info)395 void PhoneNumberMatched::DealNumberWithOneBracket(MatchedNumberInfo& info)
396 {
397 icu::UnicodeString message = info.GetContent();
398 if (IsNumberWithOneBracket(message)) {
399 info.SetBegin(info.GetBegin() + 1);
400 icu::UnicodeString content = info.GetContent().tempSubString(1);
401 info.SetContent(content);
402 }
403 }
404
405 // check whether the bracket at the start position are redundant
IsNumberWithOneBracket(icu::UnicodeString & message)406 bool PhoneNumberMatched::IsNumberWithOneBracket(icu::UnicodeString& message)
407 {
408 if (message != "") {
409 int numLeft = 0;
410 int numRight = 0;
411 int len = message.length();
412 for (int i = 0; i < len; i++) {
413 if (message[i] == '(' || message[i] == '[') {
414 numLeft++;
415 }
416 if (message[i] == ')' || message[i] == ']') {
417 numRight++;
418 }
419 }
420 if (numLeft > numRight && (message[0] == '(' || message[0] == '[')) {
421 return true;
422 }
423 }
424 return false;
425 }
426
DealResult(std::vector<MatchedNumberInfo> & matchedNumberInfoList)427 std::vector<int> PhoneNumberMatched::DealResult(std::vector<MatchedNumberInfo>& matchedNumberInfoList)
428 {
429 std::vector<int> result;
430 size_t length = matchedNumberInfoList.size();
431 if (length == 0) {
432 result.push_back(0);
433 } else {
434 size_t posNumber = 2;
435 size_t posStart = 1;
436 size_t posEnd = 2;
437 result.resize(posNumber * length + 1);
438 result[0] = static_cast<int>(length);
439 for (size_t i = 0; i < length; i++) {
440 result[posNumber * i + posStart] = matchedNumberInfoList[i].GetBegin();
441 result[posNumber * i + posEnd] = matchedNumberInfoList[i].GetEnd();
442 }
443 }
444 return result;
445 }
446
447 // Filter result based on Border rule
HandleBorderRule(PhoneNumberMatch * match,icu::UnicodeString & message)448 bool PhoneNumberMatched::HandleBorderRule(PhoneNumberMatch* match, icu::UnicodeString& message)
449 {
450 if (match == nullptr) {
451 return false;
452 }
453 std::vector<BorderRule*> rules = GetBorderRulesInside();
454 if (rules.size() == 0) {
455 return true;
456 }
457 for (BorderRule* rule : rules) {
458 if (!rule->Handle(match, message)) {
459 return false;
460 }
461 }
462 return true;
463 }
464
465 // Filter result based on Codes rule
HandleCodesRule(PhoneNumberMatch * phoneNumberMatch,icu::UnicodeString & message)466 PhoneNumberMatch* PhoneNumberMatched::HandleCodesRule(PhoneNumberMatch* phoneNumberMatch, icu::UnicodeString& message)
467 {
468 PhoneNumberMatch* match = phoneNumberMatch;
469 std::vector<CodeRule*> rules = GetCodesRulesInside();
470 if (rules.size() == 0) {
471 return nullptr;
472 }
473 for (CodeRule* rule : rules) {
474 match = rule->Handle(match, message);
475 }
476 return match;
477 }
478
479 // Add phone numbers that meet the positive rule to the result
HandlePositiveRule(PhoneNumberMatch * match,icu::UnicodeString & message)480 std::vector<MatchedNumberInfo> PhoneNumberMatched::HandlePositiveRule(PhoneNumberMatch* match,
481 icu::UnicodeString& message)
482 {
483 std::vector<MatchedNumberInfo> infoList;
484 std::vector<PositiveRule*> rules = GetPositiveRulesInside();
485 for (PositiveRule* rule : rules) {
486 infoList = rule->Handle(match, message);
487 if (infoList.size() != 0) {
488 break;
489 }
490 }
491 return infoList;
492 }
493
DealStringWithOneBracket(icu::UnicodeString & message)494 icu::UnicodeString PhoneNumberMatched::DealStringWithOneBracket(icu::UnicodeString& message)
495 {
496 if (IsNumberWithOneBracket(message)) {
497 return message.tempSubString(1);
498 }
499 return message;
500 }
501 } // namespace I18n
502 } // namespace Global
503 } // namespace OHOS