1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_ 18 #define LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_ 19 20 #include "utils/base/integral_types.h" 21 #include "utils/utf8/unicodetext.h" 22 23 namespace libtextclassifier3 { 24 25 bool IsOpeningBracket(char32 codepoint); 26 bool IsClosingBracket(char32 codepoint); 27 bool IsWhitespace(char32 codepoint); 28 bool IsBidirectional(char32 codepoint); 29 bool IsDigit(char32 codepoint); 30 bool IsLower(char32 codepoint); 31 bool IsUpper(char32 codepoint); 32 bool IsPunctuation(char32 codepoint); 33 bool IsPercentage(char32 codepoint); 34 bool IsSlash(char32 codepoint); 35 bool IsMinus(char32 codepoint); 36 bool IsNumberSign(char32 codepoint); 37 bool IsDot(char32 codepoint); 38 bool IsApostrophe(char32 codepoint); 39 bool IsQuotation(char32 codepoint); 40 bool IsAmpersand(char32 codepoint); 41 42 bool IsLatinLetter(char32 codepoint); 43 bool IsArabicLetter(char32 codepoint); 44 bool IsCyrillicLetter(char32 codepoint); 45 bool IsChineseLetter(char32 codepoint); 46 bool IsJapaneseLetter(char32 codepoint); 47 bool IsKoreanLetter(char32 codepoint); 48 bool IsThaiLetter(char32 codepoint); 49 bool IsLetter(char32 codepoint); 50 bool IsCJTletter(char32 codepoint); 51 52 char32 ToLower(char32 codepoint); 53 char32 ToUpper(char32 codepoint); 54 char32 GetPairedBracket(char32 codepoint); 55 56 // Checks if the text format is not likely to be a number. Used to avoid most of 57 // the java exceptions thrown when fail to parse. 58 template <class T> PassesIntPreChesks(const UnicodeText & text,const T result)59bool PassesIntPreChesks(const UnicodeText& text, const T result) { 60 if (text.empty() || 61 (std::is_same<T, int32>::value && text.size_codepoints() > 10) || 62 (std::is_same<T, int64>::value && text.size_codepoints() > 19)) { 63 return false; 64 } 65 for (auto it = text.begin(); it != text.end(); ++it) { 66 if (!IsDigit(*it)) { 67 return false; 68 } 69 } 70 return true; 71 } 72 73 } // namespace libtextclassifier3 74 75 #endif // LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_ 76