1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_H_ 18 #define LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_H_ 19 20 #include "utils/base/integral_types.h" 21 #include "utils/utf8/unicodetext.h" 22 #include "utils/utf8/unilib-common.h" 23 24 #if defined TC3_UNILIB_ICU 25 #include "utils/utf8/unilib-icu.h" 26 #define INIT_UNILIB_FOR_TESTING(VAR) VAR() 27 #elif defined TC3_UNILIB_JAVAICU 28 #include "utils/utf8/unilib-javaicu.h" 29 #define INIT_UNILIB_FOR_TESTING(VAR) VAR(nullptr) 30 #elif defined TC3_UNILIB_APPLE 31 #include "utils/utf8/unilib-apple.h" 32 #define INIT_UNILIB_FOR_TESTING(VAR) VAR() 33 #elif defined TC3_UNILIB_DUMMY 34 #include "utils/utf8/unilib-dummy.h" 35 #define INIT_UNILIB_FOR_TESTING(VAR) VAR() 36 #else 37 #error No TC3_UNILIB implementation specified. 38 #endif 39 40 namespace libtextclassifier3 { 41 42 class UniLib : public UniLibBase { 43 public: 44 using UniLibBase::UniLibBase; 45 46 // Lowercase a unicode string. ToLowerText(const UnicodeText & text)47 UnicodeText ToLowerText(const UnicodeText& text) const { 48 UnicodeText result; 49 for (const char32 codepoint : text) { 50 result.push_back(ToLower(codepoint)); 51 } 52 return result; 53 } 54 55 // Uppercase a unicode string. ToUpperText(const UnicodeText & text)56 UnicodeText ToUpperText(const UnicodeText& text) const { 57 UnicodeText result; 58 for (const char32 codepoint : text) { 59 result.push_back(UniLibBase::ToUpper(codepoint)); 60 } 61 return result; 62 } 63 IsLowerText(const UnicodeText & text)64 bool IsLowerText(const UnicodeText& text) const { 65 for (const char32 codepoint : text) { 66 if (!IsLower(codepoint)) { 67 return false; 68 } 69 } 70 return true; 71 } 72 IsUpperText(const UnicodeText & text)73 bool IsUpperText(const UnicodeText& text) const { 74 for (const char32 codepoint : text) { 75 if (!IsUpper(codepoint)) { 76 return false; 77 } 78 } 79 return true; 80 } 81 IsDigits(const UnicodeText & text)82 bool IsDigits(const UnicodeText& text) const { 83 for (const char32 codepoint : text) { 84 if (!IsDigit(codepoint)) { 85 return false; 86 } 87 } 88 return true; 89 } 90 IsPercentage(char32 codepoint)91 bool IsPercentage(char32 codepoint) const { 92 return libtextclassifier3::IsPercentage(codepoint); 93 } 94 IsSlash(char32 codepoint)95 bool IsSlash(char32 codepoint) const { 96 return libtextclassifier3::IsSlash(codepoint); 97 } 98 IsMinus(char32 codepoint)99 bool IsMinus(char32 codepoint) const { 100 return libtextclassifier3::IsMinus(codepoint); 101 } 102 IsNumberSign(char32 codepoint)103 bool IsNumberSign(char32 codepoint) const { 104 return libtextclassifier3::IsNumberSign(codepoint); 105 } 106 IsDot(char32 codepoint)107 bool IsDot(char32 codepoint) const { 108 return libtextclassifier3::IsDot(codepoint); 109 } 110 IsLatinLetter(char32 codepoint)111 bool IsLatinLetter(char32 codepoint) const { 112 return libtextclassifier3::IsLatinLetter(codepoint); 113 } 114 IsArabicLetter(char32 codepoint)115 bool IsArabicLetter(char32 codepoint) const { 116 return libtextclassifier3::IsArabicLetter(codepoint); 117 } 118 IsCyrillicLetter(char32 codepoint)119 bool IsCyrillicLetter(char32 codepoint) const { 120 return libtextclassifier3::IsCyrillicLetter(codepoint); 121 } 122 IsChineseLetter(char32 codepoint)123 bool IsChineseLetter(char32 codepoint) const { 124 return libtextclassifier3::IsChineseLetter(codepoint); 125 } 126 IsJapaneseLetter(char32 codepoint)127 bool IsJapaneseLetter(char32 codepoint) const { 128 return libtextclassifier3::IsJapaneseLetter(codepoint); 129 } 130 IsKoreanLetter(char32 codepoint)131 bool IsKoreanLetter(char32 codepoint) const { 132 return libtextclassifier3::IsKoreanLetter(codepoint); 133 } 134 IsThaiLetter(char32 codepoint)135 bool IsThaiLetter(char32 codepoint) const { 136 return libtextclassifier3::IsThaiLetter(codepoint); 137 } 138 IsCJTletter(char32 codepoint)139 bool IsCJTletter(char32 codepoint) const { 140 return libtextclassifier3::IsCJTletter(codepoint); 141 } 142 IsLetter(char32 codepoint)143 bool IsLetter(char32 codepoint) const { 144 return libtextclassifier3::IsLetter(codepoint); 145 } 146 }; 147 148 } // namespace libtextclassifier3 149 #endif // LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_H_ 150