• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "utils/i18n/locale.h"
18 
19 #include "utils/strings/split.h"
20 
21 namespace libtextclassifier3 {
22 
23 namespace {
24 constexpr const char* kAnyMatch = "*";
25 
26 // BCP 47 code for "Undetermined Language".
27 constexpr const char* kUnknownLanguageCode = "und";
28 
CheckLanguage(StringPiece language)29 bool CheckLanguage(StringPiece language) {
30   if (language.size() == 1 && language.data()[0] == '*') {
31     return true;
32   }
33 
34   if (language.size() != 2 && language.size() != 3) {
35     return false;
36   }
37 
38   // Needs to be all lowercase.
39   for (int i = 0; i < language.size(); ++i) {
40     if (!std::islower(language[i])) {
41       return false;
42     }
43   }
44 
45   return true;
46 }
47 
CheckScript(StringPiece script)48 bool CheckScript(StringPiece script) {
49   if (script.size() != 4) {
50     return false;
51   }
52 
53   if (!std::isupper(script[0])) {
54     return false;
55   }
56 
57   // Needs to be all lowercase.
58   for (int i = 1; i < script.size(); ++i) {
59     if (!std::islower(script[i])) {
60       return false;
61     }
62   }
63 
64   return true;
65 }
66 
CheckRegion(StringPiece region)67 bool CheckRegion(StringPiece region) {
68   if (region.size() == 2) {
69     return std::isupper(region[0]) && std::isupper(region[1]);
70   } else if (region.size() == 3) {
71     return std::isdigit(region[0]) && std::isdigit(region[1]) &&
72            std::isdigit(region[2]);
73   } else {
74     return false;
75   }
76 }
77 
78 }  // namespace
79 
FromBCP47(const std::string & locale_tag)80 Locale Locale::FromBCP47(const std::string& locale_tag) {
81   std::vector<StringPiece> parts = strings::Split(locale_tag, '-');
82   if (parts.empty()) {
83     return Locale::Invalid();
84   }
85 
86   auto parts_it = parts.begin();
87   StringPiece language = *parts_it;
88   if (!CheckLanguage(language)) {
89     return Locale::Invalid();
90   }
91   ++parts_it;
92 
93   StringPiece script;
94   if (parts_it != parts.end()) {
95     script = *parts_it;
96     if (!CheckScript(script)) {
97       script = "";
98     } else {
99       ++parts_it;
100     }
101   }
102 
103   StringPiece region;
104   if (parts_it != parts.end()) {
105     region = *parts_it;
106     if (!CheckRegion(region)) {
107       region = "";
108     } else {
109       ++parts_it;
110     }
111   }
112 
113   // NOTE: We don't parse the rest of the BCP47 tag here even if specified.
114 
115   return Locale(language.ToString(), script.ToString(), region.ToString());
116 }
117 
FromLanguageTag(const LanguageTag * language_tag)118 Locale Locale::FromLanguageTag(const LanguageTag* language_tag) {
119   if (language_tag == nullptr || language_tag->language() == nullptr) {
120     return Locale::Invalid();
121   }
122 
123   StringPiece language = language_tag->language()->c_str();
124   if (!CheckLanguage(language)) {
125     return Locale::Invalid();
126   }
127 
128   StringPiece script;
129   if (language_tag->script() != nullptr) {
130     script = language_tag->script()->c_str();
131     if (!CheckScript(script)) {
132       script = "";
133     }
134   }
135 
136   StringPiece region;
137   if (language_tag->region() != nullptr) {
138     region = language_tag->region()->c_str();
139     if (!CheckRegion(region)) {
140       region = "";
141     }
142   }
143   return Locale(language.ToString(), script.ToString(), region.ToString());
144 }
145 
IsUnknown() const146 bool Locale::IsUnknown() const {
147   return is_valid_ && language_ == kUnknownLanguageCode;
148 }
149 
IsLocaleSupported(const Locale & locale,const std::vector<Locale> & supported_locales,bool default_value)150 bool Locale::IsLocaleSupported(const Locale& locale,
151                                const std::vector<Locale>& supported_locales,
152                                bool default_value) {
153   if (!locale.IsValid()) {
154     return false;
155   }
156   if (locale.IsUnknown()) {
157     return default_value;
158   }
159   for (const Locale& supported_locale : supported_locales) {
160     if (!supported_locale.IsValid()) {
161       continue;
162     }
163     const bool language_matches =
164         supported_locale.Language().empty() ||
165         supported_locale.Language() == kAnyMatch ||
166         supported_locale.Language() == locale.Language();
167     const bool script_matches = supported_locale.Script().empty() ||
168                                 supported_locale.Script() == kAnyMatch ||
169                                 locale.Script().empty() ||
170                                 supported_locale.Script() == locale.Script();
171     const bool region_matches = supported_locale.Region().empty() ||
172                                 supported_locale.Region() == kAnyMatch ||
173                                 locale.Region().empty() ||
174                                 supported_locale.Region() == locale.Region();
175     if (language_matches && script_matches && region_matches) {
176       return true;
177     }
178   }
179   return false;
180 }
181 
IsAnyLocaleSupported(const std::vector<Locale> & locales,const std::vector<Locale> & supported_locales,bool default_value)182 bool Locale::IsAnyLocaleSupported(const std::vector<Locale>& locales,
183                                   const std::vector<Locale>& supported_locales,
184                                   bool default_value) {
185   if (locales.empty()) {
186     return default_value;
187   }
188   if (supported_locales.empty()) {
189     return default_value;
190   }
191   for (const Locale& locale : locales) {
192     if (IsLocaleSupported(locale, supported_locales, default_value)) {
193       return true;
194     }
195   }
196   return false;
197 }
198 
operator <<(logging::LoggingStringStream & stream,const Locale & locale)199 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
200                                          const Locale& locale) {
201   return stream << "Locale(language=" << locale.Language()
202                 << ", script=" << locale.Script()
203                 << ", region=" << locale.Region()
204                 << ", is_valid=" << locale.IsValid()
205                 << ", is_unknown=" << locale.IsUnknown() << ")";
206 }
207 
ParseLocales(StringPiece locales_list,std::vector<Locale> * locales)208 bool ParseLocales(StringPiece locales_list, std::vector<Locale>* locales) {
209   for (const auto& locale_str : strings::Split(locales_list, ',')) {
210     const Locale locale = Locale::FromBCP47(locale_str.ToString());
211     if (!locale.IsValid()) {
212       TC3_LOG(ERROR) << "Invalid locale " << locale_str.ToString();
213       return false;
214     }
215     locales->push_back(locale);
216   }
217   return true;
218 }
219 
220 }  // namespace libtextclassifier3
221