• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "ui/base/l10n/l10n_util.h"
6 
7 #include <algorithm>
8 #include <cstdlib>
9 #include <iterator>
10 #include <string>
11 
12 #include "base/command_line.h"
13 #include "base/compiler_specific.h"
14 #include "base/file_util.h"
15 #include "base/i18n/file_util_icu.h"
16 #include "base/i18n/rtl.h"
17 #include "base/i18n/string_compare.h"
18 #include "base/lazy_instance.h"
19 #include "base/memory/scoped_ptr.h"
20 #include "base/path_service.h"
21 #include "base/strings/string_number_conversions.h"
22 #include "base/strings/string_split.h"
23 #include "base/strings/string_util.h"
24 #include "base/strings/stringprintf.h"
25 #include "base/strings/sys_string_conversions.h"
26 #include "base/strings/utf_string_conversions.h"
27 #include "build/build_config.h"
28 #include "third_party/icu/source/common/unicode/rbbi.h"
29 #include "third_party/icu/source/common/unicode/uloc.h"
30 #include "ui/base/l10n/l10n_util_collator.h"
31 #include "ui/base/l10n/l10n_util_plurals.h"
32 #include "ui/base/resource/resource_bundle.h"
33 #include "ui/base/ui_base_paths.h"
34 
35 #if defined(OS_ANDROID)
36 #include "ui/base/l10n/l10n_util_android.h"
37 #endif
38 
39 #if defined(USE_GLIB)
40 #include <glib.h>
41 #endif
42 
43 #if defined(OS_WIN)
44 #include "ui/base/l10n/l10n_util_win.h"
45 #endif  // OS_WIN
46 
47 namespace {
48 
49 static const char* const kAcceptLanguageList[] = {
50   "af",     // Afrikaans
51   "am",     // Amharic
52   "ar",     // Arabic
53   "az",     // Azerbaijani
54   "be",     // Belarusian
55   "bg",     // Bulgarian
56   "bh",     // Bihari
57   "bn",     // Bengali
58   "br",     // Breton
59   "bs",     // Bosnian
60   "ca",     // Catalan
61   "co",     // Corsican
62   "cs",     // Czech
63   "cy",     // Welsh
64   "da",     // Danish
65   "de",     // German
66   "de-AT",  // German (Austria)
67   "de-CH",  // German (Switzerland)
68   "de-DE",  // German (Germany)
69   "el",     // Greek
70   "en",     // English
71   "en-AU",  // English (Australia)
72   "en-CA",  // English (Canada)
73   "en-GB",  // English (UK)
74   "en-NZ",  // English (New Zealand)
75   "en-US",  // English (US)
76   "en-ZA",  // English (South Africa)
77   "eo",     // Esperanto
78   // TODO(jungshik) : Do we want to list all es-Foo for Latin-American
79   // Spanish speaking countries?
80   "es",     // Spanish
81   "es-419", // Spanish (Latin America)
82   "et",     // Estonian
83   "eu",     // Basque
84   "fa",     // Persian
85   "fi",     // Finnish
86   "fil",    // Filipino
87   "fo",     // Faroese
88   "fr",     // French
89   "fr-CA",  // French (Canada)
90   "fr-CH",  // French (Switzerland)
91   "fr-FR",  // French (France)
92   "fy",     // Frisian
93   "ga",     // Irish
94   "gd",     // Scots Gaelic
95   "gl",     // Galician
96   "gn",     // Guarani
97   "gu",     // Gujarati
98   "ha",     // Hausa
99   "haw",    // Hawaiian
100   "he",     // Hebrew
101   "hi",     // Hindi
102   "hr",     // Croatian
103   "hu",     // Hungarian
104   "hy",     // Armenian
105   "ia",     // Interlingua
106   "id",     // Indonesian
107   "is",     // Icelandic
108   "it",     // Italian
109   "it-CH",  // Italian (Switzerland)
110   "it-IT",  // Italian (Italy)
111   "ja",     // Japanese
112   "jw",     // Javanese
113   "ka",     // Georgian
114   "kk",     // Kazakh
115   "km",     // Cambodian
116   "kn",     // Kannada
117   "ko",     // Korean
118   "ku",     // Kurdish
119   "ky",     // Kyrgyz
120   "la",     // Latin
121   "ln",     // Lingala
122   "lo",     // Laothian
123   "lt",     // Lithuanian
124   "lv",     // Latvian
125   "mk",     // Macedonian
126   "ml",     // Malayalam
127   "mn",     // Mongolian
128   "mo",     // Moldavian
129   "mr",     // Marathi
130   "ms",     // Malay
131   "mt",     // Maltese
132   "nb",     // Norwegian (Bokmal)
133   "ne",     // Nepali
134   "nl",     // Dutch
135   "nn",     // Norwegian (Nynorsk)
136   "no",     // Norwegian
137   "oc",     // Occitan
138   "om",     // Oromo
139   "or",     // Oriya
140   "pa",     // Punjabi
141   "pl",     // Polish
142   "ps",     // Pashto
143   "pt",     // Portuguese
144   "pt-BR",  // Portuguese (Brazil)
145   "pt-PT",  // Portuguese (Portugal)
146   "qu",     // Quechua
147   "rm",     // Romansh
148   "ro",     // Romanian
149   "ru",     // Russian
150   "sd",     // Sindhi
151   "sh",     // Serbo-Croatian
152   "si",     // Sinhalese
153   "sk",     // Slovak
154   "sl",     // Slovenian
155   "sn",     // Shona
156   "so",     // Somali
157   "sq",     // Albanian
158   "sr",     // Serbian
159   "st",     // Sesotho
160   "su",     // Sundanese
161   "sv",     // Swedish
162   "sw",     // Swahili
163   "ta",     // Tamil
164   "te",     // Telugu
165   "tg",     // Tajik
166   "th",     // Thai
167   "ti",     // Tigrinya
168   "tk",     // Turkmen
169   "to",     // Tonga
170   "tr",     // Turkish
171   "tt",     // Tatar
172   "tw",     // Twi
173   "ug",     // Uighur
174   "uk",     // Ukrainian
175   "ur",     // Urdu
176   "uz",     // Uzbek
177   "vi",     // Vietnamese
178   "xh",     // Xhosa
179   "yi",     // Yiddish
180   "yo",     // Yoruba
181   "zh",     // Chinese
182   "zh-CN",  // Chinese (Simplified)
183   "zh-TW",  // Chinese (Traditional)
184   "zu",     // Zulu
185 };
186 
187 // Returns true if |locale_name| has an alias in the ICU data file.
IsDuplicateName(const std::string & locale_name)188 bool IsDuplicateName(const std::string& locale_name) {
189   static const char* const kDuplicateNames[] = {
190     "en",
191     "pt",
192     "zh",
193     "zh_hans_cn",
194     "zh_hant_hk",
195     "zh_hant_mo",
196     "zh_hans_sg",
197     "zh_hant_tw"
198   };
199 
200   // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain).
201   // 'es-419' (Spanish in Latin America) is not available in ICU so that it
202   // has to be added manually in GetAvailableLocales().
203   if (LowerCaseEqualsASCII(locale_name.substr(0, 3),  "es_"))
204     return true;
205   for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) {
206     if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0)
207       return true;
208   }
209   return false;
210 }
211 
212 // We added 30+ minimally populated locales with only a few entries
213 // (exemplar character set, script, writing direction and its own
214 // lanaguage name). These locales have to be distinguished from the
215 // fully populated locales to which Chrome is localized.
IsLocalePartiallyPopulated(const std::string & locale_name)216 bool IsLocalePartiallyPopulated(const std::string& locale_name) {
217   // For partially populated locales, even the translation for "English"
218   // is not available. A more robust/elegant way to check is to add a special
219   // field (say, 'isPartial' to our version of ICU locale files) and
220   // check its value, but this hack seems to work well.
221   return !l10n_util::IsLocaleNameTranslated("en", locale_name);
222 }
223 
224 #if !defined(OS_MACOSX)
IsLocaleAvailable(const std::string & locale)225 bool IsLocaleAvailable(const std::string& locale) {
226   // If locale has any illegal characters in it, we don't want to try to
227   // load it because it may be pointing outside the locale data file directory.
228   if (!file_util::IsFilenameLegal(base::ASCIIToUTF16(locale)))
229     return false;
230 
231   // IsLocalePartiallyPopulated() can be called here for an early return w/o
232   // checking the resource availability below. It'd help when Chrome is run
233   // under a system locale Chrome is not localized to (e.g.Farsi on Linux),
234   // but it'd slow down the start up time a little bit for locales Chrome is
235   // localized to. So, we don't call it here.
236   if (!l10n_util::IsLocaleSupportedByOS(locale))
237     return false;
238 
239   // If the ResourceBundle is not yet initialized, return false to avoid the
240   // CHECK failure in ResourceBundle::GetSharedInstance().
241   if (!ResourceBundle::HasSharedInstance())
242     return false;
243 
244   // TODO(hshi): make ResourceBundle::LocaleDataPakExists() a static function
245   // so that this can be invoked without initializing the global instance.
246   // See crbug.com/230432: CHECK failure in GetUserDataDir().
247   return ResourceBundle::GetSharedInstance().LocaleDataPakExists(locale);
248 }
249 #endif
250 
251 // On Linux, the text layout engine Pango determines paragraph directionality
252 // by looking at the first strongly-directional character in the text. This
253 // means text such as "Google Chrome foo bar..." will be layed out LTR even
254 // if "foo bar" is RTL. So this function prepends the necessary RLM in such
255 // cases.
AdjustParagraphDirectionality(base::string16 * paragraph)256 void AdjustParagraphDirectionality(base::string16* paragraph) {
257 #if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID)
258   if (base::i18n::IsRTL() &&
259       base::i18n::StringContainsStrongRTLChars(*paragraph)) {
260     paragraph->insert(0, 1,
261                       static_cast<base::char16>(base::i18n::kRightToLeftMark));
262   }
263 #endif
264 }
265 
266 struct AvailableLocalesTraits
267     : base::DefaultLazyInstanceTraits<std::vector<std::string> > {
New__anon5da2993c0111::AvailableLocalesTraits268   static std::vector<std::string>* New(void* instance) {
269     std::vector<std::string>* locales =
270         base::DefaultLazyInstanceTraits<std::vector<std::string> >::New(
271             instance);
272     int num_locales = uloc_countAvailable();
273     for (int i = 0; i < num_locales; ++i) {
274       std::string locale_name = uloc_getAvailable(i);
275       // Filter out the names that have aliases.
276       if (IsDuplicateName(locale_name))
277         continue;
278       // Filter out locales for which we have only partially populated data
279       // and to which Chrome is not localized.
280       if (IsLocalePartiallyPopulated(locale_name))
281         continue;
282       if (!l10n_util::IsLocaleSupportedByOS(locale_name))
283         continue;
284       // Normalize underscores to hyphens because that's what our locale files
285       // use.
286       std::replace(locale_name.begin(), locale_name.end(), '_', '-');
287 
288       // Map the Chinese locale names over to zh-CN and zh-TW.
289       if (LowerCaseEqualsASCII(locale_name, "zh-hans")) {
290         locale_name = "zh-CN";
291       } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) {
292         locale_name = "zh-TW";
293       }
294       locales->push_back(locale_name);
295     }
296 
297     // Manually add 'es-419' to the list. See the comment in IsDuplicateName().
298     locales->push_back("es-419");
299     return locales;
300   }
301 };
302 
303 base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits>
304     g_available_locales = LAZY_INSTANCE_INITIALIZER;
305 
306 }  // namespace
307 
308 namespace l10n_util {
309 
GetCanonicalLocale(const std::string & locale)310 std::string GetCanonicalLocale(const std::string& locale) {
311   return base::i18n::GetCanonicalLocale(locale.c_str());
312 }
313 
CheckAndResolveLocale(const std::string & locale,std::string * resolved_locale)314 bool CheckAndResolveLocale(const std::string& locale,
315                            std::string* resolved_locale) {
316 #if defined(OS_MACOSX)
317   NOTIMPLEMENTED();
318   return false;
319 #else
320   if (IsLocaleAvailable(locale)) {
321     *resolved_locale = locale;
322     return true;
323   }
324 
325   // If there's a variant, skip over it so we can try without the region
326   // code.  For example, ca_ES@valencia should cause us to try ca@valencia
327   // before ca.
328   std::string::size_type variant_pos = locale.find('@');
329   if (variant_pos != std::string::npos)
330     return false;
331 
332   // If the locale matches language but not country, use that instead.
333   // TODO(jungshik) : Nothing is done about languages that Chrome
334   // does not support but available on Windows. We fall
335   // back to en-US in GetApplicationLocale so that it's a not critical,
336   // but we can do better.
337   std::string::size_type hyphen_pos = locale.find('-');
338   std::string lang(locale, 0, hyphen_pos);
339   if (hyphen_pos != std::string::npos && hyphen_pos > 0) {
340     std::string region(locale, hyphen_pos + 1);
341     std::string tmp_locale(lang);
342     // Map es-RR other than es-ES to es-419 (Chrome's Latin American
343     // Spanish locale).
344     if (LowerCaseEqualsASCII(lang, "es") &&
345         !LowerCaseEqualsASCII(region, "es")) {
346       tmp_locale.append("-419");
347     } else if (LowerCaseEqualsASCII(lang, "zh")) {
348       // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN.
349       if (LowerCaseEqualsASCII(region, "hk") ||
350           LowerCaseEqualsASCII(region, "mo")) { // Macao
351         tmp_locale.append("-TW");
352       } else {
353         tmp_locale.append("-CN");
354       }
355     } else if (LowerCaseEqualsASCII(lang, "en")) {
356       // Map Australian, Canadian, New Zealand and South African English
357       // to British English for now.
358       // TODO(jungshik): en-CA may have to change sides once
359       // we have OS locale separate from app locale (Chrome's UI language).
360       if (LowerCaseEqualsASCII(region, "au") ||
361           LowerCaseEqualsASCII(region, "ca") ||
362           LowerCaseEqualsASCII(region, "nz") ||
363           LowerCaseEqualsASCII(region, "za")) {
364         tmp_locale.append("-GB");
365       } else {
366         tmp_locale.append("-US");
367       }
368     }
369     if (IsLocaleAvailable(tmp_locale)) {
370       resolved_locale->swap(tmp_locale);
371       return true;
372     }
373   }
374 
375   // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US.
376   struct {
377     const char* source;
378     const char* dest;
379   } alias_map[] = {
380       {"no", "nb"},
381       {"tl", "fil"},
382       {"iw", "he"},
383       {"en", "en-US"},
384   };
385 
386   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) {
387     if (LowerCaseEqualsASCII(lang, alias_map[i].source)) {
388       std::string tmp_locale(alias_map[i].dest);
389       if (IsLocaleAvailable(tmp_locale)) {
390         resolved_locale->swap(tmp_locale);
391         return true;
392       }
393     }
394   }
395 
396   return false;
397 #endif
398 }
399 
GetApplicationLocale(const std::string & pref_locale)400 std::string GetApplicationLocale(const std::string& pref_locale) {
401 #if defined(OS_MACOSX)
402 
403   // Use any override (Cocoa for the browser), otherwise use the preference
404   // passed to the function.
405   std::string app_locale = l10n_util::GetLocaleOverride();
406   if (app_locale.empty())
407     app_locale = pref_locale;
408 
409   // The above should handle all of the cases Chrome normally hits, but for some
410   // unit tests, we need something to fall back too.
411   if (app_locale.empty())
412     app_locale = "en-US";
413 
414   // Windows/Linux call SetICUDefaultLocale after determining the actual locale
415   // with CheckAndResolveLocal to make ICU APIs work in that locale.
416   // Mac doesn't use a locale directory tree of resources (it uses Mac style
417   // resources), so mirror the Windows/Linux behavior of calling
418   // SetICUDefaultLocale.
419   base::i18n::SetICUDefaultLocale(app_locale);
420   return app_locale;
421 
422 #else
423 
424   std::string resolved_locale;
425   std::vector<std::string> candidates;
426 
427   // We only use --lang and the app pref on Windows.  On Linux, we only
428   // look at the LC_*/LANG environment variables.  We do, however, pass --lang
429   // to renderer and plugin processes so they know what language the parent
430   // process decided to use.
431 
432 #if defined(OS_WIN)
433 
434   // First, try the preference value.
435   if (!pref_locale.empty())
436     candidates.push_back(GetCanonicalLocale(pref_locale));
437 
438   // Next, try the overridden locale.
439   const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides();
440   if (!languages.empty()) {
441     candidates.reserve(candidates.size() + languages.size());
442     std::transform(languages.begin(), languages.end(),
443                    std::back_inserter(candidates), &GetCanonicalLocale);
444   } else {
445     // If no override was set, defer to ICU
446     candidates.push_back(base::i18n::GetConfiguredLocale());
447   }
448 
449 #elif defined(OS_ANDROID)
450 
451   // On Android, query java.util.Locale for the default locale.
452   candidates.push_back(GetDefaultLocale());
453 
454 #elif defined(USE_GLIB) && !defined(OS_CHROMEOS)
455 
456   // GLib implements correct environment variable parsing with
457   // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG.
458   // We used to use our custom parsing code along with ICU for this purpose.
459   // If we have a port that does not depend on GTK, we have to
460   // restore our custom code for that port.
461   const char* const* languages = g_get_language_names();
462   DCHECK(languages);  // A valid pointer is guaranteed.
463   DCHECK(*languages);  // At least one entry, "C", is guaranteed.
464 
465   for (; *languages != NULL; ++languages) {
466     candidates.push_back(base::i18n::GetCanonicalLocale(*languages));
467   }
468 
469 #else
470 
471   // By default, use the application locale preference. This applies to ChromeOS
472   // and linux systems without glib.
473   if (!pref_locale.empty())
474     candidates.push_back(pref_locale);
475 
476 #endif
477 
478   std::vector<std::string>::const_iterator i = candidates.begin();
479   for (; i != candidates.end(); ++i) {
480     if (CheckAndResolveLocale(*i, &resolved_locale)) {
481       base::i18n::SetICUDefaultLocale(resolved_locale);
482       return resolved_locale;
483     }
484   }
485 
486   // Fallback on en-US.
487   const std::string fallback_locale("en-US");
488   if (IsLocaleAvailable(fallback_locale)) {
489     base::i18n::SetICUDefaultLocale(fallback_locale);
490     return fallback_locale;
491   }
492 
493   return std::string();
494 
495 #endif
496 }
497 
IsLocaleNameTranslated(const char * locale,const std::string & display_locale)498 bool IsLocaleNameTranslated(const char* locale,
499                             const std::string& display_locale) {
500   base::string16 display_name =
501       l10n_util::GetDisplayNameForLocale(locale, display_locale, false);
502   // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not
503   // uloc_getDisplayName returns the actual translation or the default
504   // value (locale code), we have to rely on this hack to tell whether
505   // the translation is available or not.  If ICU doesn't have a translated
506   // name for this locale, GetDisplayNameForLocale will just return the
507   // locale code.
508   return !base::IsStringASCII(display_name) ||
509       base::UTF16ToASCII(display_name) != locale;
510 }
511 
GetDisplayNameForLocale(const std::string & locale,const std::string & display_locale,bool is_for_ui)512 base::string16 GetDisplayNameForLocale(const std::string& locale,
513                                        const std::string& display_locale,
514                                        bool is_for_ui) {
515   std::string locale_code = locale;
516   // Internally, we use the language code of zh-CN and zh-TW, but we want the
517   // display names to be Chinese (Simplified) and Chinese (Traditional) instead
518   // of Chinese (China) and Chinese (Taiwan).  To do that, we pass zh-Hans
519   // and zh-Hant to ICU. Even with this mapping, we'd get
520   // 'Chinese (Simplified Han)' and 'Chinese (Traditional Han)' in English and
521   // even longer results in other languages. Arguably, they're better than
522   // the current results : Chinese (China) / Chinese (Taiwan).
523   // TODO(jungshik): Do one of the following:
524   // 1. Special-case Chinese by getting the custom-translation for them
525   // 2. Recycle IDS_ENCODING_{SIMP,TRAD}_CHINESE.
526   // 3. Get translations for two directly from the ICU resouce bundle
527   // because they're not accessible with other any API.
528   // 4. Patch ICU to special-case zh-Hans/zh-Hant for us.
529   // #1 and #2 wouldn't work if display_locale != current UI locale although
530   // we can think of additional hack to work around the problem.
531   // #3 can be potentially expensive.
532   if (locale_code == "zh-CN")
533     locale_code = "zh-Hans";
534   else if (locale_code == "zh-TW")
535     locale_code = "zh-Hant";
536 
537   base::string16 display_name;
538 #if defined(OS_ANDROID)
539   // Use Java API to get locale display name so that we can remove most of
540   // the lang data from icu data to reduce binary size, except for zh-Hans and
541   // zh-Hant because the current Android Java API doesn't support scripts.
542   // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once
543   // Android Java API supports scripts.
544   if (!StartsWithASCII(locale_code, "zh-Han", true)) {
545     display_name = GetDisplayNameForLocale(locale_code, display_locale);
546   } else
547 #endif
548   {
549     UErrorCode error = U_ZERO_ERROR;
550     const int kBufferSize = 1024;
551 
552     int actual_size = uloc_getDisplayName(
553         locale_code.c_str(), display_locale.c_str(),
554         WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
555     DCHECK(U_SUCCESS(error));
556     display_name.resize(actual_size);
557   }
558 
559   // Add directional markup so parentheses are properly placed.
560   if (is_for_ui && base::i18n::IsRTL())
561     base::i18n::AdjustStringForLocaleDirection(&display_name);
562   return display_name;
563 }
564 
GetDisplayNameForCountry(const std::string & country_code,const std::string & display_locale)565 base::string16 GetDisplayNameForCountry(const std::string& country_code,
566                                         const std::string& display_locale) {
567   return GetDisplayNameForLocale("_" + country_code, display_locale, false);
568 }
569 
NormalizeLocale(const std::string & locale)570 std::string NormalizeLocale(const std::string& locale) {
571   std::string normalized_locale(locale);
572   std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_');
573 
574   return normalized_locale;
575 }
576 
GetParentLocales(const std::string & current_locale,std::vector<std::string> * parent_locales)577 void GetParentLocales(const std::string& current_locale,
578                       std::vector<std::string>* parent_locales) {
579   std::string locale(NormalizeLocale(current_locale));
580 
581   const int kNameCapacity = 256;
582   char parent[kNameCapacity];
583   base::strlcpy(parent, locale.c_str(), kNameCapacity);
584   parent_locales->push_back(parent);
585   UErrorCode err = U_ZERO_ERROR;
586   while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) {
587     if (U_FAILURE(err))
588       break;
589     parent_locales->push_back(parent);
590   }
591 }
592 
IsValidLocaleSyntax(const std::string & locale)593 bool IsValidLocaleSyntax(const std::string& locale) {
594   // Check that the length is plausible.
595   if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY)
596     return false;
597 
598   // Strip off the part after an '@' sign, which might contain keywords,
599   // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil.
600   // We don't validate that part much, just check that there's at least one
601   // equals sign in a plausible place. Normalize the prefix so that hyphens
602   // are changed to underscores.
603   std::string prefix = NormalizeLocale(locale);
604   size_t split_point = locale.find("@");
605   if (split_point != std::string::npos) {
606     std::string keywords = locale.substr(split_point + 1);
607     prefix = locale.substr(0, split_point);
608 
609     size_t equals_loc = keywords.find("=");
610     if (equals_loc == std::string::npos ||
611         equals_loc < 1 || equals_loc > keywords.size() - 2)
612       return false;
613   }
614 
615   // Check that all characters before the at-sign are alphanumeric or
616   // underscore.
617   for (size_t i = 0; i < prefix.size(); i++) {
618     char ch = prefix[i];
619     if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '_')
620       return false;
621   }
622 
623   // Check that the initial token (before the first hyphen/underscore)
624   // is 1 - 3 alphabetical characters (a language tag).
625   for (size_t i = 0; i < prefix.size(); i++) {
626     char ch = prefix[i];
627     if (ch == '_') {
628       if (i < 1 || i > 3)
629         return false;
630       break;
631     }
632     if (!IsAsciiAlpha(ch))
633       return false;
634   }
635 
636   // Check that the all tokens after the initial token are 1 - 8 characters.
637   // (Tokenize/StringTokenizer don't work here, they collapse multiple
638   // delimiters into one.)
639   int token_len = 0;
640   int token_index = 0;
641   for (size_t i = 0; i < prefix.size(); i++) {
642     if (prefix[i] != '_') {
643       token_len++;
644       continue;
645     }
646 
647     if (token_index > 0 && (token_len < 1 || token_len > 8)) {
648       return false;
649     }
650     token_index++;
651     token_len = 0;
652   }
653   if (token_index == 0 && (token_len < 1 || token_len > 3)) {
654     return false;
655   } else if (token_len < 1 || token_len > 8) {
656     return false;
657   }
658 
659   return true;
660 }
661 
GetStringUTF8(int message_id)662 std::string GetStringUTF8(int message_id) {
663   return base::UTF16ToUTF8(GetStringUTF16(message_id));
664 }
665 
GetStringUTF16(int message_id)666 base::string16 GetStringUTF16(int message_id) {
667   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
668   base::string16 str = rb.GetLocalizedString(message_id);
669   AdjustParagraphDirectionality(&str);
670 
671   return str;
672 }
673 
GetStringFUTF16(int message_id,const std::vector<base::string16> & replacements,std::vector<size_t> * offsets)674 base::string16 GetStringFUTF16(int message_id,
675                                const std::vector<base::string16>& replacements,
676                                std::vector<size_t>* offsets) {
677   // TODO(tc): We could save a string copy if we got the raw string as
678   // a StringPiece and were able to call ReplaceStringPlaceholders with
679   // a StringPiece format string and base::string16 substitution strings.  In
680   // practice, the strings should be relatively short.
681   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
682   const base::string16& format_string = rb.GetLocalizedString(message_id);
683 
684 #ifndef NDEBUG
685   // Make sure every replacement string is being used, so we don't just
686   // silently fail to insert one. If |offsets| is non-NULL, then don't do this
687   // check as the code may simply want to find the placeholders rather than
688   // actually replacing them.
689   if (!offsets) {
690     std::string utf8_string = base::UTF16ToUTF8(format_string);
691 
692     // $9 is the highest allowed placeholder.
693     for (size_t i = 0; i < 9; ++i) {
694       bool placeholder_should_exist = replacements.size() > i;
695 
696       std::string placeholder =
697           base::StringPrintf("$%d", static_cast<int>(i + 1));
698       size_t pos = utf8_string.find(placeholder.c_str());
699       if (placeholder_should_exist) {
700         DCHECK_NE(std::string::npos, pos) <<
701             " Didn't find a " << placeholder << " placeholder in " <<
702             utf8_string;
703       } else {
704         DCHECK_EQ(std::string::npos, pos) <<
705             " Unexpectedly found a " << placeholder << " placeholder in " <<
706             utf8_string;
707       }
708     }
709   }
710 #endif
711 
712   base::string16 formatted = ReplaceStringPlaceholders(
713       format_string, replacements, offsets);
714   AdjustParagraphDirectionality(&formatted);
715 
716   return formatted;
717 }
718 
GetStringFUTF8(int message_id,const base::string16 & a)719 std::string GetStringFUTF8(int message_id,
720                            const base::string16& a) {
721   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a));
722 }
723 
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b)724 std::string GetStringFUTF8(int message_id,
725                            const base::string16& a,
726                            const base::string16& b) {
727   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b));
728 }
729 
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c)730 std::string GetStringFUTF8(int message_id,
731                            const base::string16& a,
732                            const base::string16& b,
733                            const base::string16& c) {
734   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c));
735 }
736 
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d)737 std::string GetStringFUTF8(int message_id,
738                            const base::string16& a,
739                            const base::string16& b,
740                            const base::string16& c,
741                            const base::string16& d) {
742   return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d));
743 }
744 
GetStringFUTF16(int message_id,const base::string16 & a)745 base::string16 GetStringFUTF16(int message_id,
746                                const base::string16& a) {
747   std::vector<base::string16> replacements;
748   replacements.push_back(a);
749   return GetStringFUTF16(message_id, replacements, NULL);
750 }
751 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b)752 base::string16 GetStringFUTF16(int message_id,
753                                const base::string16& a,
754                                const base::string16& b) {
755   return GetStringFUTF16(message_id, a, b, NULL);
756 }
757 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c)758 base::string16 GetStringFUTF16(int message_id,
759                                const base::string16& a,
760                                const base::string16& b,
761                                const base::string16& c) {
762   std::vector<base::string16> replacements;
763   replacements.push_back(a);
764   replacements.push_back(b);
765   replacements.push_back(c);
766   return GetStringFUTF16(message_id, replacements, NULL);
767 }
768 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d)769 base::string16 GetStringFUTF16(int message_id,
770                                const base::string16& a,
771                                const base::string16& b,
772                                const base::string16& c,
773                                const base::string16& d) {
774   std::vector<base::string16> replacements;
775   replacements.push_back(a);
776   replacements.push_back(b);
777   replacements.push_back(c);
778   replacements.push_back(d);
779   return GetStringFUTF16(message_id, replacements, NULL);
780 }
781 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d,const base::string16 & e)782 base::string16 GetStringFUTF16(int message_id,
783                                const base::string16& a,
784                                const base::string16& b,
785                                const base::string16& c,
786                                const base::string16& d,
787                                const base::string16& e) {
788   std::vector<base::string16> replacements;
789   replacements.push_back(a);
790   replacements.push_back(b);
791   replacements.push_back(c);
792   replacements.push_back(d);
793   replacements.push_back(e);
794   return GetStringFUTF16(message_id, replacements, NULL);
795 }
796 
GetStringFUTF16(int message_id,const base::string16 & a,size_t * offset)797 base::string16 GetStringFUTF16(int message_id,
798                                const base::string16& a,
799                                size_t* offset) {
800   DCHECK(offset);
801   std::vector<size_t> offsets;
802   std::vector<base::string16> replacements;
803   replacements.push_back(a);
804   base::string16 result = GetStringFUTF16(message_id, replacements, &offsets);
805   DCHECK(offsets.size() == 1);
806   *offset = offsets[0];
807   return result;
808 }
809 
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,std::vector<size_t> * offsets)810 base::string16 GetStringFUTF16(int message_id,
811                                const base::string16& a,
812                                const base::string16& b,
813                                std::vector<size_t>* offsets) {
814   std::vector<base::string16> replacements;
815   replacements.push_back(a);
816   replacements.push_back(b);
817   return GetStringFUTF16(message_id, replacements, offsets);
818 }
819 
GetStringFUTF16Int(int message_id,int a)820 base::string16 GetStringFUTF16Int(int message_id, int a) {
821   return GetStringFUTF16(message_id, base::UTF8ToUTF16(base::IntToString(a)));
822 }
823 
GetStringFUTF16Int(int message_id,int64 a)824 base::string16 GetStringFUTF16Int(int message_id, int64 a) {
825   return GetStringFUTF16(message_id, base::UTF8ToUTF16(base::Int64ToString(a)));
826 }
827 
828 // Specialization of operator() method for base::string16 version.
829 template <>
operator ()(const base::string16 & lhs,const base::string16 & rhs)830 bool StringComparator<base::string16>::operator()(const base::string16& lhs,
831                                                   const base::string16& rhs) {
832   // If we can not get collator instance for specified locale, just do simple
833   // string compare.
834   if (!collator_)
835     return lhs < rhs;
836   return base::i18n::CompareString16WithCollator(collator_, lhs, rhs) ==
837       UCOL_LESS;
838 };
839 
GetPluralStringFUTF16(const std::vector<int> & message_ids,int number)840 base::string16 GetPluralStringFUTF16(const std::vector<int>& message_ids,
841                                int number) {
842   scoped_ptr<icu::PluralFormat> format = BuildPluralFormat(message_ids);
843   DCHECK(format);
844 
845   UErrorCode err = U_ZERO_ERROR;
846   icu::UnicodeString result_files_string = format->format(number, err);
847   int capacity = result_files_string.length() + 1;
848   DCHECK_GT(capacity, 1);
849   base::string16 result;
850   result_files_string.extract(
851       static_cast<UChar*>(WriteInto(&result, capacity)), capacity, err);
852   DCHECK(U_SUCCESS(err));
853   return result;
854 }
855 
GetPluralStringFUTF8(const std::vector<int> & message_ids,int number)856 std::string GetPluralStringFUTF8(const std::vector<int>& message_ids,
857                                  int number) {
858   return base::UTF16ToUTF8(GetPluralStringFUTF16(message_ids, number));
859 }
860 
SortStrings16(const std::string & locale,std::vector<base::string16> * strings)861 void SortStrings16(const std::string& locale,
862                    std::vector<base::string16>* strings) {
863   SortVectorWithStringKey(locale, strings, false);
864 }
865 
GetAvailableLocales()866 const std::vector<std::string>& GetAvailableLocales() {
867   return g_available_locales.Get();
868 }
869 
GetAcceptLanguagesForLocale(const std::string & display_locale,std::vector<std::string> * locale_codes)870 void GetAcceptLanguagesForLocale(const std::string& display_locale,
871                                  std::vector<std::string>* locale_codes) {
872   for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) {
873     if (!l10n_util::IsLocaleNameTranslated(kAcceptLanguageList[i],
874                                            display_locale))
875       // TODO(jungshik) : Put them at the of the list with language codes
876       // enclosed by brackets instead of skipping.
877         continue;
878     locale_codes->push_back(kAcceptLanguageList[i]);
879   }
880 }
881 
GetLocalizedContentsWidthInPixels(int pixel_resource_id)882 int GetLocalizedContentsWidthInPixels(int pixel_resource_id) {
883   int width = 0;
884   base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width);
885   DCHECK_GT(width, 0);
886   return width;
887 }
888 
889 }  // namespace l10n_util
890