1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "utils/i18n/locale.h"
18
19 #include <string>
20
21 #include "utils/strings/split.h"
22
23 namespace libtextclassifier3 {
24
25 namespace {
26 constexpr const char* kAnyMatch = "*";
27
28 // BCP 47 code for "Undetermined Language".
29 constexpr const char* kUnknownLanguageCode = "und";
30
CheckLanguage(StringPiece language)31 bool CheckLanguage(StringPiece language) {
32 if (language.size() == 1 && language.data()[0] == '*') {
33 return true;
34 }
35
36 if (language.size() != 2 && language.size() != 3) {
37 return false;
38 }
39
40 // Needs to be all lowercase.
41 for (int i = 0; i < language.size(); ++i) {
42 if (!std::islower(language[i])) {
43 return false;
44 }
45 }
46
47 return true;
48 }
49
CheckScript(StringPiece script)50 bool CheckScript(StringPiece script) {
51 if (script.size() != 4) {
52 return false;
53 }
54
55 if (!std::isupper(script[0])) {
56 return false;
57 }
58
59 // Needs to be all lowercase.
60 for (int i = 1; i < script.size(); ++i) {
61 if (!std::islower(script[i])) {
62 return false;
63 }
64 }
65
66 return true;
67 }
68
CheckRegion(StringPiece region)69 bool CheckRegion(StringPiece region) {
70 if (region.size() == 2) {
71 return std::isupper(region[0]) && std::isupper(region[1]);
72 } else if (region.size() == 3) {
73 return std::isdigit(region[0]) && std::isdigit(region[1]) &&
74 std::isdigit(region[2]);
75 } else {
76 return false;
77 }
78 }
79
80 } // namespace
81
FromBCP47(const std::string & locale_tag)82 Locale Locale::FromBCP47(const std::string& locale_tag) {
83 std::vector<StringPiece> parts = strings::Split(locale_tag, '-');
84 if (parts.empty()) {
85 return Locale::Invalid();
86 }
87
88 auto parts_it = parts.begin();
89 StringPiece language = *parts_it;
90 if (!CheckLanguage(language)) {
91 return Locale::Invalid();
92 }
93 ++parts_it;
94
95 StringPiece script;
96 if (parts_it != parts.end()) {
97 script = *parts_it;
98 if (!CheckScript(script)) {
99 script = "";
100 } else {
101 ++parts_it;
102 }
103 }
104
105 StringPiece region;
106 if (parts_it != parts.end()) {
107 region = *parts_it;
108 if (!CheckRegion(region)) {
109 region = "";
110 } else {
111 ++parts_it;
112 }
113 }
114
115 // NOTE: We don't parse the rest of the BCP47 tag here even if specified.
116
117 return Locale(language.ToString(), script.ToString(), region.ToString());
118 }
119
FromLanguageTag(const LanguageTag * language_tag)120 Locale Locale::FromLanguageTag(const LanguageTag* language_tag) {
121 if (language_tag == nullptr || language_tag->language() == nullptr) {
122 return Locale::Invalid();
123 }
124
125 StringPiece language = language_tag->language()->c_str();
126 if (!CheckLanguage(language)) {
127 return Locale::Invalid();
128 }
129
130 StringPiece script;
131 if (language_tag->script() != nullptr) {
132 script = language_tag->script()->c_str();
133 if (!CheckScript(script)) {
134 script = "";
135 }
136 }
137
138 StringPiece region;
139 if (language_tag->region() != nullptr) {
140 region = language_tag->region()->c_str();
141 if (!CheckRegion(region)) {
142 region = "";
143 }
144 }
145 return Locale(language.ToString(), script.ToString(), region.ToString());
146 }
147
IsUnknown() const148 bool Locale::IsUnknown() const {
149 return is_valid_ && language_ == kUnknownLanguageCode;
150 }
151
IsLocaleSupported(const Locale & locale,const std::vector<Locale> & supported_locales,bool default_value)152 bool Locale::IsLocaleSupported(const Locale& locale,
153 const std::vector<Locale>& supported_locales,
154 bool default_value) {
155 if (!locale.IsValid()) {
156 return false;
157 }
158 if (locale.IsUnknown()) {
159 return default_value;
160 }
161 for (const Locale& supported_locale : supported_locales) {
162 if (!supported_locale.IsValid()) {
163 continue;
164 }
165 const bool language_matches =
166 supported_locale.Language().empty() ||
167 supported_locale.Language() == kAnyMatch ||
168 supported_locale.Language() == locale.Language();
169 const bool script_matches = supported_locale.Script().empty() ||
170 supported_locale.Script() == kAnyMatch ||
171 locale.Script().empty() ||
172 supported_locale.Script() == locale.Script();
173 const bool region_matches = supported_locale.Region().empty() ||
174 supported_locale.Region() == kAnyMatch ||
175 locale.Region().empty() ||
176 supported_locale.Region() == locale.Region();
177 if (language_matches && script_matches && region_matches) {
178 return true;
179 }
180 }
181 return false;
182 }
183
IsAnyLocaleSupported(const std::vector<Locale> & locales,const std::vector<Locale> & supported_locales,bool default_value)184 bool Locale::IsAnyLocaleSupported(const std::vector<Locale>& locales,
185 const std::vector<Locale>& supported_locales,
186 bool default_value) {
187 if (locales.empty()) {
188 return default_value;
189 }
190 if (supported_locales.empty()) {
191 return default_value;
192 }
193 for (const Locale& locale : locales) {
194 if (IsLocaleSupported(locale, supported_locales, default_value)) {
195 return true;
196 }
197 }
198 return false;
199 }
200
operator ==(const Locale & locale) const201 bool Locale::operator==(const Locale& locale) const {
202 return language_ == locale.language_ && region_ == locale.region_ &&
203 script_ == locale.script_;
204 }
205
operator <(const Locale & locale) const206 bool Locale::operator<(const Locale& locale) const {
207 return std::tie(language_, region_, script_) <
208 std::tie(locale.language_, locale.region_, locale.script_);
209 }
210
operator !=(const Locale & locale) const211 bool Locale::operator!=(const Locale& locale) const {
212 return !(*this == locale);
213 }
214
operator <<(logging::LoggingStringStream & stream,const Locale & locale)215 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
216 const Locale& locale) {
217 return stream << "Locale(language=" << locale.Language()
218 << ", script=" << locale.Script()
219 << ", region=" << locale.Region()
220 << ", is_valid=" << locale.IsValid()
221 << ", is_unknown=" << locale.IsUnknown() << ")";
222 }
223
ParseLocales(StringPiece locales_list,std::vector<Locale> * locales)224 bool ParseLocales(StringPiece locales_list, std::vector<Locale>* locales) {
225 for (const auto& locale_str : strings::Split(locales_list, ',')) {
226 const Locale locale = Locale::FromBCP47(locale_str.ToString());
227 if (!locale.IsValid()) {
228 TC3_LOG(ERROR) << "Invalid locale " << locale_str.ToString();
229 return false;
230 }
231 locales->push_back(locale);
232 }
233 return true;
234 }
235
236 } // namespace libtextclassifier3
237