1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "utils/i18n/locale.h"
18
19 #include "utils/strings/split.h"
20
21 namespace libtextclassifier3 {
22
23 namespace {
24 constexpr const char* kAnyMatch = "*";
25
26 // BCP 47 code for "Undetermined Language".
27 constexpr const char* kUnknownLanguageCode = "und";
28
CheckLanguage(StringPiece language)29 bool CheckLanguage(StringPiece language) {
30 if (language.size() == 1 && language.data()[0] == '*') {
31 return true;
32 }
33
34 if (language.size() != 2 && language.size() != 3) {
35 return false;
36 }
37
38 // Needs to be all lowercase.
39 for (int i = 0; i < language.size(); ++i) {
40 if (!std::islower(language[i])) {
41 return false;
42 }
43 }
44
45 return true;
46 }
47
CheckScript(StringPiece script)48 bool CheckScript(StringPiece script) {
49 if (script.size() != 4) {
50 return false;
51 }
52
53 if (!std::isupper(script[0])) {
54 return false;
55 }
56
57 // Needs to be all lowercase.
58 for (int i = 1; i < script.size(); ++i) {
59 if (!std::islower(script[i])) {
60 return false;
61 }
62 }
63
64 return true;
65 }
66
CheckRegion(StringPiece region)67 bool CheckRegion(StringPiece region) {
68 if (region.size() == 2) {
69 return std::isupper(region[0]) && std::isupper(region[1]);
70 } else if (region.size() == 3) {
71 return std::isdigit(region[0]) && std::isdigit(region[1]) &&
72 std::isdigit(region[2]);
73 } else {
74 return false;
75 }
76 }
77
78 } // namespace
79
FromBCP47(const std::string & locale_tag)80 Locale Locale::FromBCP47(const std::string& locale_tag) {
81 std::vector<StringPiece> parts = strings::Split(locale_tag, '-');
82 if (parts.empty()) {
83 return Locale::Invalid();
84 }
85
86 auto parts_it = parts.begin();
87 StringPiece language = *parts_it;
88 if (!CheckLanguage(language)) {
89 return Locale::Invalid();
90 }
91 ++parts_it;
92
93 StringPiece script;
94 if (parts_it != parts.end()) {
95 script = *parts_it;
96 if (!CheckScript(script)) {
97 script = "";
98 } else {
99 ++parts_it;
100 }
101 }
102
103 StringPiece region;
104 if (parts_it != parts.end()) {
105 region = *parts_it;
106 if (!CheckRegion(region)) {
107 region = "";
108 } else {
109 ++parts_it;
110 }
111 }
112
113 // NOTE: We don't parse the rest of the BCP47 tag here even if specified.
114
115 return Locale(language.ToString(), script.ToString(), region.ToString());
116 }
117
FromLanguageTag(const LanguageTag * language_tag)118 Locale Locale::FromLanguageTag(const LanguageTag* language_tag) {
119 if (language_tag == nullptr || language_tag->language() == nullptr) {
120 return Locale::Invalid();
121 }
122
123 StringPiece language = language_tag->language()->c_str();
124 if (!CheckLanguage(language)) {
125 return Locale::Invalid();
126 }
127
128 StringPiece script;
129 if (language_tag->script() != nullptr) {
130 script = language_tag->script()->c_str();
131 if (!CheckScript(script)) {
132 script = "";
133 }
134 }
135
136 StringPiece region;
137 if (language_tag->region() != nullptr) {
138 region = language_tag->region()->c_str();
139 if (!CheckRegion(region)) {
140 region = "";
141 }
142 }
143 return Locale(language.ToString(), script.ToString(), region.ToString());
144 }
145
IsUnknown() const146 bool Locale::IsUnknown() const {
147 return is_valid_ && language_ == kUnknownLanguageCode;
148 }
149
IsLocaleSupported(const Locale & locale,const std::vector<Locale> & supported_locales,bool default_value)150 bool Locale::IsLocaleSupported(const Locale& locale,
151 const std::vector<Locale>& supported_locales,
152 bool default_value) {
153 if (!locale.IsValid()) {
154 return false;
155 }
156 if (locale.IsUnknown()) {
157 return default_value;
158 }
159 for (const Locale& supported_locale : supported_locales) {
160 if (!supported_locale.IsValid()) {
161 continue;
162 }
163 const bool language_matches =
164 supported_locale.Language().empty() ||
165 supported_locale.Language() == kAnyMatch ||
166 supported_locale.Language() == locale.Language();
167 const bool script_matches = supported_locale.Script().empty() ||
168 supported_locale.Script() == kAnyMatch ||
169 locale.Script().empty() ||
170 supported_locale.Script() == locale.Script();
171 const bool region_matches = supported_locale.Region().empty() ||
172 supported_locale.Region() == kAnyMatch ||
173 locale.Region().empty() ||
174 supported_locale.Region() == locale.Region();
175 if (language_matches && script_matches && region_matches) {
176 return true;
177 }
178 }
179 return false;
180 }
181
IsAnyLocaleSupported(const std::vector<Locale> & locales,const std::vector<Locale> & supported_locales,bool default_value)182 bool Locale::IsAnyLocaleSupported(const std::vector<Locale>& locales,
183 const std::vector<Locale>& supported_locales,
184 bool default_value) {
185 if (locales.empty()) {
186 return default_value;
187 }
188 if (supported_locales.empty()) {
189 return default_value;
190 }
191 for (const Locale& locale : locales) {
192 if (IsLocaleSupported(locale, supported_locales, default_value)) {
193 return true;
194 }
195 }
196 return false;
197 }
198
operator <<(logging::LoggingStringStream & stream,const Locale & locale)199 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
200 const Locale& locale) {
201 return stream << "Locale(language=" << locale.Language()
202 << ", script=" << locale.Script()
203 << ", region=" << locale.Region()
204 << ", is_valid=" << locale.IsValid()
205 << ", is_unknown=" << locale.IsUnknown() << ")";
206 }
207
ParseLocales(StringPiece locales_list,std::vector<Locale> * locales)208 bool ParseLocales(StringPiece locales_list, std::vector<Locale>* locales) {
209 for (const auto& locale_str : strings::Split(locales_list, ',')) {
210 const Locale locale = Locale::FromBCP47(locale_str.ToString());
211 if (!locale.IsValid()) {
212 TC3_LOG(ERROR) << "Invalid locale " << locale_str.ToString();
213 return false;
214 }
215 locales->push_back(locale);
216 }
217 return true;
218 }
219
220 } // namespace libtextclassifier3
221