• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_
18 #define LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_
19 
20 #include "utils/base/integral_types.h"
21 #include "utils/utf8/unicodetext.h"
22 
23 namespace libtextclassifier3 {
24 
25 bool IsOpeningBracket(char32 codepoint);
26 bool IsClosingBracket(char32 codepoint);
27 bool IsWhitespace(char32 codepoint);
28 bool IsBidirectional(char32 codepoint);
29 bool IsDigit(char32 codepoint);
30 bool IsLower(char32 codepoint);
31 bool IsUpper(char32 codepoint);
32 bool IsPunctuation(char32 codepoint);
33 bool IsPercentage(char32 codepoint);
34 bool IsSlash(char32 codepoint);
35 bool IsMinus(char32 codepoint);
36 bool IsNumberSign(char32 codepoint);
37 bool IsDot(char32 codepoint);
38 bool IsApostrophe(char32 codepoint);
39 bool IsQuotation(char32 codepoint);
40 bool IsAmpersand(char32 codepoint);
41 
42 bool IsLatinLetter(char32 codepoint);
43 bool IsArabicLetter(char32 codepoint);
44 bool IsCyrillicLetter(char32 codepoint);
45 bool IsChineseLetter(char32 codepoint);
46 bool IsJapaneseLetter(char32 codepoint);
47 bool IsKoreanLetter(char32 codepoint);
48 bool IsThaiLetter(char32 codepoint);
49 bool IsLetter(char32 codepoint);
50 bool IsCJTletter(char32 codepoint);
51 
52 char32 ToLower(char32 codepoint);
53 char32 ToUpper(char32 codepoint);
54 char32 GetPairedBracket(char32 codepoint);
55 
56 // Checks if the text format is not likely to be a number. Used to avoid most of
57 // the java exceptions thrown when fail to parse.
58 template <class T>
PassesIntPreChesks(const UnicodeText & text,const T result)59 bool PassesIntPreChesks(const UnicodeText& text, const T result) {
60   if (text.empty() ||
61       (std::is_same<T, int32>::value && text.size_codepoints() > 10) ||
62       (std::is_same<T, int64>::value && text.size_codepoints() > 19)) {
63     return false;
64   }
65   for (auto it = text.begin(); it != text.end(); ++it) {
66     if (!IsDigit(*it)) {
67       return false;
68     }
69   }
70   return true;
71 }
72 
73 }  // namespace libtextclassifier3
74 
75 #endif  // LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_
76