1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "ui/base/l10n/l10n_util.h"
6
7 #include <algorithm>
8 #include <cstdlib>
9 #include <iterator>
10 #include <string>
11
12 #include "base/command_line.h"
13 #include "base/compiler_specific.h"
14 #include "base/file_util.h"
15 #include "base/i18n/file_util_icu.h"
16 #include "base/i18n/rtl.h"
17 #include "base/i18n/string_compare.h"
18 #include "base/lazy_instance.h"
19 #include "base/memory/scoped_ptr.h"
20 #include "base/path_service.h"
21 #include "base/strings/string_number_conversions.h"
22 #include "base/strings/string_split.h"
23 #include "base/strings/string_util.h"
24 #include "base/strings/stringprintf.h"
25 #include "base/strings/sys_string_conversions.h"
26 #include "base/strings/utf_string_conversions.h"
27 #include "build/build_config.h"
28 #include "third_party/icu/source/common/unicode/rbbi.h"
29 #include "third_party/icu/source/common/unicode/uloc.h"
30 #include "ui/base/l10n/l10n_util_collator.h"
31 #include "ui/base/l10n/l10n_util_plurals.h"
32 #include "ui/base/resource/resource_bundle.h"
33 #include "ui/base/ui_base_paths.h"
34
35 #if defined(OS_ANDROID)
36 #include "ui/base/l10n/l10n_util_android.h"
37 #endif
38
39 #if defined(USE_GLIB)
40 #include <glib.h>
41 #endif
42
43 #if defined(OS_WIN)
44 #include "ui/base/l10n/l10n_util_win.h"
45 #endif // OS_WIN
46
47 namespace {
48
49 static const char* const kAcceptLanguageList[] = {
50 "af", // Afrikaans
51 "am", // Amharic
52 "ar", // Arabic
53 "az", // Azerbaijani
54 "be", // Belarusian
55 "bg", // Bulgarian
56 "bh", // Bihari
57 "bn", // Bengali
58 "br", // Breton
59 "bs", // Bosnian
60 "ca", // Catalan
61 "co", // Corsican
62 "cs", // Czech
63 "cy", // Welsh
64 "da", // Danish
65 "de", // German
66 "de-AT", // German (Austria)
67 "de-CH", // German (Switzerland)
68 "de-DE", // German (Germany)
69 "el", // Greek
70 "en", // English
71 "en-AU", // English (Australia)
72 "en-CA", // English (Canada)
73 "en-GB", // English (UK)
74 "en-NZ", // English (New Zealand)
75 "en-US", // English (US)
76 "en-ZA", // English (South Africa)
77 "eo", // Esperanto
78 // TODO(jungshik) : Do we want to list all es-Foo for Latin-American
79 // Spanish speaking countries?
80 "es", // Spanish
81 "es-419", // Spanish (Latin America)
82 "et", // Estonian
83 "eu", // Basque
84 "fa", // Persian
85 "fi", // Finnish
86 "fil", // Filipino
87 "fo", // Faroese
88 "fr", // French
89 "fr-CA", // French (Canada)
90 "fr-CH", // French (Switzerland)
91 "fr-FR", // French (France)
92 "fy", // Frisian
93 "ga", // Irish
94 "gd", // Scots Gaelic
95 "gl", // Galician
96 "gn", // Guarani
97 "gu", // Gujarati
98 "ha", // Hausa
99 "haw", // Hawaiian
100 "he", // Hebrew
101 "hi", // Hindi
102 "hr", // Croatian
103 "hu", // Hungarian
104 "hy", // Armenian
105 "ia", // Interlingua
106 "id", // Indonesian
107 "is", // Icelandic
108 "it", // Italian
109 "it-CH", // Italian (Switzerland)
110 "it-IT", // Italian (Italy)
111 "ja", // Japanese
112 "jw", // Javanese
113 "ka", // Georgian
114 "kk", // Kazakh
115 "km", // Cambodian
116 "kn", // Kannada
117 "ko", // Korean
118 "ku", // Kurdish
119 "ky", // Kyrgyz
120 "la", // Latin
121 "ln", // Lingala
122 "lo", // Laothian
123 "lt", // Lithuanian
124 "lv", // Latvian
125 "mk", // Macedonian
126 "ml", // Malayalam
127 "mn", // Mongolian
128 "mo", // Moldavian
129 "mr", // Marathi
130 "ms", // Malay
131 "mt", // Maltese
132 "nb", // Norwegian (Bokmal)
133 "ne", // Nepali
134 "nl", // Dutch
135 "nn", // Norwegian (Nynorsk)
136 "no", // Norwegian
137 "oc", // Occitan
138 "om", // Oromo
139 "or", // Oriya
140 "pa", // Punjabi
141 "pl", // Polish
142 "ps", // Pashto
143 "pt", // Portuguese
144 "pt-BR", // Portuguese (Brazil)
145 "pt-PT", // Portuguese (Portugal)
146 "qu", // Quechua
147 "rm", // Romansh
148 "ro", // Romanian
149 "ru", // Russian
150 "sd", // Sindhi
151 "sh", // Serbo-Croatian
152 "si", // Sinhalese
153 "sk", // Slovak
154 "sl", // Slovenian
155 "sn", // Shona
156 "so", // Somali
157 "sq", // Albanian
158 "sr", // Serbian
159 "st", // Sesotho
160 "su", // Sundanese
161 "sv", // Swedish
162 "sw", // Swahili
163 "ta", // Tamil
164 "te", // Telugu
165 "tg", // Tajik
166 "th", // Thai
167 "ti", // Tigrinya
168 "tk", // Turkmen
169 "to", // Tonga
170 "tr", // Turkish
171 "tt", // Tatar
172 "tw", // Twi
173 "ug", // Uighur
174 "uk", // Ukrainian
175 "ur", // Urdu
176 "uz", // Uzbek
177 "vi", // Vietnamese
178 "xh", // Xhosa
179 "yi", // Yiddish
180 "yo", // Yoruba
181 "zh", // Chinese
182 "zh-CN", // Chinese (Simplified)
183 "zh-TW", // Chinese (Traditional)
184 "zu", // Zulu
185 };
186
187 // Returns true if |locale_name| has an alias in the ICU data file.
IsDuplicateName(const std::string & locale_name)188 bool IsDuplicateName(const std::string& locale_name) {
189 static const char* const kDuplicateNames[] = {
190 "en",
191 "pt",
192 "zh",
193 "zh_hans_cn",
194 "zh_hant_hk",
195 "zh_hant_mo",
196 "zh_hans_sg",
197 "zh_hant_tw"
198 };
199
200 // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain).
201 // 'es-419' (Spanish in Latin America) is not available in ICU so that it
202 // has to be added manually in GetAvailableLocales().
203 if (LowerCaseEqualsASCII(locale_name.substr(0, 3), "es_"))
204 return true;
205 for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) {
206 if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0)
207 return true;
208 }
209 return false;
210 }
211
212 // We added 30+ minimally populated locales with only a few entries
213 // (exemplar character set, script, writing direction and its own
214 // lanaguage name). These locales have to be distinguished from the
215 // fully populated locales to which Chrome is localized.
IsLocalePartiallyPopulated(const std::string & locale_name)216 bool IsLocalePartiallyPopulated(const std::string& locale_name) {
217 // For partially populated locales, even the translation for "English"
218 // is not available. A more robust/elegant way to check is to add a special
219 // field (say, 'isPartial' to our version of ICU locale files) and
220 // check its value, but this hack seems to work well.
221 return !l10n_util::IsLocaleNameTranslated("en", locale_name);
222 }
223
224 #if !defined(OS_MACOSX)
IsLocaleAvailable(const std::string & locale)225 bool IsLocaleAvailable(const std::string& locale) {
226 // If locale has any illegal characters in it, we don't want to try to
227 // load it because it may be pointing outside the locale data file directory.
228 if (!file_util::IsFilenameLegal(base::ASCIIToUTF16(locale)))
229 return false;
230
231 // IsLocalePartiallyPopulated() can be called here for an early return w/o
232 // checking the resource availability below. It'd help when Chrome is run
233 // under a system locale Chrome is not localized to (e.g.Farsi on Linux),
234 // but it'd slow down the start up time a little bit for locales Chrome is
235 // localized to. So, we don't call it here.
236 if (!l10n_util::IsLocaleSupportedByOS(locale))
237 return false;
238
239 // If the ResourceBundle is not yet initialized, return false to avoid the
240 // CHECK failure in ResourceBundle::GetSharedInstance().
241 if (!ResourceBundle::HasSharedInstance())
242 return false;
243
244 // TODO(hshi): make ResourceBundle::LocaleDataPakExists() a static function
245 // so that this can be invoked without initializing the global instance.
246 // See crbug.com/230432: CHECK failure in GetUserDataDir().
247 return ResourceBundle::GetSharedInstance().LocaleDataPakExists(locale);
248 }
249 #endif
250
251 // On Linux, the text layout engine Pango determines paragraph directionality
252 // by looking at the first strongly-directional character in the text. This
253 // means text such as "Google Chrome foo bar..." will be layed out LTR even
254 // if "foo bar" is RTL. So this function prepends the necessary RLM in such
255 // cases.
AdjustParagraphDirectionality(base::string16 * paragraph)256 void AdjustParagraphDirectionality(base::string16* paragraph) {
257 #if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID)
258 if (base::i18n::IsRTL() &&
259 base::i18n::StringContainsStrongRTLChars(*paragraph)) {
260 paragraph->insert(0, 1,
261 static_cast<base::char16>(base::i18n::kRightToLeftMark));
262 }
263 #endif
264 }
265
266 struct AvailableLocalesTraits
267 : base::DefaultLazyInstanceTraits<std::vector<std::string> > {
New__anon5da2993c0111::AvailableLocalesTraits268 static std::vector<std::string>* New(void* instance) {
269 std::vector<std::string>* locales =
270 base::DefaultLazyInstanceTraits<std::vector<std::string> >::New(
271 instance);
272 int num_locales = uloc_countAvailable();
273 for (int i = 0; i < num_locales; ++i) {
274 std::string locale_name = uloc_getAvailable(i);
275 // Filter out the names that have aliases.
276 if (IsDuplicateName(locale_name))
277 continue;
278 // Filter out locales for which we have only partially populated data
279 // and to which Chrome is not localized.
280 if (IsLocalePartiallyPopulated(locale_name))
281 continue;
282 if (!l10n_util::IsLocaleSupportedByOS(locale_name))
283 continue;
284 // Normalize underscores to hyphens because that's what our locale files
285 // use.
286 std::replace(locale_name.begin(), locale_name.end(), '_', '-');
287
288 // Map the Chinese locale names over to zh-CN and zh-TW.
289 if (LowerCaseEqualsASCII(locale_name, "zh-hans")) {
290 locale_name = "zh-CN";
291 } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) {
292 locale_name = "zh-TW";
293 }
294 locales->push_back(locale_name);
295 }
296
297 // Manually add 'es-419' to the list. See the comment in IsDuplicateName().
298 locales->push_back("es-419");
299 return locales;
300 }
301 };
302
303 base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits>
304 g_available_locales = LAZY_INSTANCE_INITIALIZER;
305
306 } // namespace
307
308 namespace l10n_util {
309
GetCanonicalLocale(const std::string & locale)310 std::string GetCanonicalLocale(const std::string& locale) {
311 return base::i18n::GetCanonicalLocale(locale.c_str());
312 }
313
CheckAndResolveLocale(const std::string & locale,std::string * resolved_locale)314 bool CheckAndResolveLocale(const std::string& locale,
315 std::string* resolved_locale) {
316 #if defined(OS_MACOSX)
317 NOTIMPLEMENTED();
318 return false;
319 #else
320 if (IsLocaleAvailable(locale)) {
321 *resolved_locale = locale;
322 return true;
323 }
324
325 // If there's a variant, skip over it so we can try without the region
326 // code. For example, ca_ES@valencia should cause us to try ca@valencia
327 // before ca.
328 std::string::size_type variant_pos = locale.find('@');
329 if (variant_pos != std::string::npos)
330 return false;
331
332 // If the locale matches language but not country, use that instead.
333 // TODO(jungshik) : Nothing is done about languages that Chrome
334 // does not support but available on Windows. We fall
335 // back to en-US in GetApplicationLocale so that it's a not critical,
336 // but we can do better.
337 std::string::size_type hyphen_pos = locale.find('-');
338 std::string lang(locale, 0, hyphen_pos);
339 if (hyphen_pos != std::string::npos && hyphen_pos > 0) {
340 std::string region(locale, hyphen_pos + 1);
341 std::string tmp_locale(lang);
342 // Map es-RR other than es-ES to es-419 (Chrome's Latin American
343 // Spanish locale).
344 if (LowerCaseEqualsASCII(lang, "es") &&
345 !LowerCaseEqualsASCII(region, "es")) {
346 tmp_locale.append("-419");
347 } else if (LowerCaseEqualsASCII(lang, "zh")) {
348 // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN.
349 if (LowerCaseEqualsASCII(region, "hk") ||
350 LowerCaseEqualsASCII(region, "mo")) { // Macao
351 tmp_locale.append("-TW");
352 } else {
353 tmp_locale.append("-CN");
354 }
355 } else if (LowerCaseEqualsASCII(lang, "en")) {
356 // Map Australian, Canadian, New Zealand and South African English
357 // to British English for now.
358 // TODO(jungshik): en-CA may have to change sides once
359 // we have OS locale separate from app locale (Chrome's UI language).
360 if (LowerCaseEqualsASCII(region, "au") ||
361 LowerCaseEqualsASCII(region, "ca") ||
362 LowerCaseEqualsASCII(region, "nz") ||
363 LowerCaseEqualsASCII(region, "za")) {
364 tmp_locale.append("-GB");
365 } else {
366 tmp_locale.append("-US");
367 }
368 }
369 if (IsLocaleAvailable(tmp_locale)) {
370 resolved_locale->swap(tmp_locale);
371 return true;
372 }
373 }
374
375 // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US.
376 struct {
377 const char* source;
378 const char* dest;
379 } alias_map[] = {
380 {"no", "nb"},
381 {"tl", "fil"},
382 {"iw", "he"},
383 {"en", "en-US"},
384 };
385
386 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) {
387 if (LowerCaseEqualsASCII(lang, alias_map[i].source)) {
388 std::string tmp_locale(alias_map[i].dest);
389 if (IsLocaleAvailable(tmp_locale)) {
390 resolved_locale->swap(tmp_locale);
391 return true;
392 }
393 }
394 }
395
396 return false;
397 #endif
398 }
399
GetApplicationLocale(const std::string & pref_locale)400 std::string GetApplicationLocale(const std::string& pref_locale) {
401 #if defined(OS_MACOSX)
402
403 // Use any override (Cocoa for the browser), otherwise use the preference
404 // passed to the function.
405 std::string app_locale = l10n_util::GetLocaleOverride();
406 if (app_locale.empty())
407 app_locale = pref_locale;
408
409 // The above should handle all of the cases Chrome normally hits, but for some
410 // unit tests, we need something to fall back too.
411 if (app_locale.empty())
412 app_locale = "en-US";
413
414 // Windows/Linux call SetICUDefaultLocale after determining the actual locale
415 // with CheckAndResolveLocal to make ICU APIs work in that locale.
416 // Mac doesn't use a locale directory tree of resources (it uses Mac style
417 // resources), so mirror the Windows/Linux behavior of calling
418 // SetICUDefaultLocale.
419 base::i18n::SetICUDefaultLocale(app_locale);
420 return app_locale;
421
422 #else
423
424 std::string resolved_locale;
425 std::vector<std::string> candidates;
426
427 // We only use --lang and the app pref on Windows. On Linux, we only
428 // look at the LC_*/LANG environment variables. We do, however, pass --lang
429 // to renderer and plugin processes so they know what language the parent
430 // process decided to use.
431
432 #if defined(OS_WIN)
433
434 // First, try the preference value.
435 if (!pref_locale.empty())
436 candidates.push_back(GetCanonicalLocale(pref_locale));
437
438 // Next, try the overridden locale.
439 const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides();
440 if (!languages.empty()) {
441 candidates.reserve(candidates.size() + languages.size());
442 std::transform(languages.begin(), languages.end(),
443 std::back_inserter(candidates), &GetCanonicalLocale);
444 } else {
445 // If no override was set, defer to ICU
446 candidates.push_back(base::i18n::GetConfiguredLocale());
447 }
448
449 #elif defined(OS_ANDROID)
450
451 // On Android, query java.util.Locale for the default locale.
452 candidates.push_back(GetDefaultLocale());
453
454 #elif defined(USE_GLIB) && !defined(OS_CHROMEOS)
455
456 // GLib implements correct environment variable parsing with
457 // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG.
458 // We used to use our custom parsing code along with ICU for this purpose.
459 // If we have a port that does not depend on GTK, we have to
460 // restore our custom code for that port.
461 const char* const* languages = g_get_language_names();
462 DCHECK(languages); // A valid pointer is guaranteed.
463 DCHECK(*languages); // At least one entry, "C", is guaranteed.
464
465 for (; *languages != NULL; ++languages) {
466 candidates.push_back(base::i18n::GetCanonicalLocale(*languages));
467 }
468
469 #else
470
471 // By default, use the application locale preference. This applies to ChromeOS
472 // and linux systems without glib.
473 if (!pref_locale.empty())
474 candidates.push_back(pref_locale);
475
476 #endif
477
478 std::vector<std::string>::const_iterator i = candidates.begin();
479 for (; i != candidates.end(); ++i) {
480 if (CheckAndResolveLocale(*i, &resolved_locale)) {
481 base::i18n::SetICUDefaultLocale(resolved_locale);
482 return resolved_locale;
483 }
484 }
485
486 // Fallback on en-US.
487 const std::string fallback_locale("en-US");
488 if (IsLocaleAvailable(fallback_locale)) {
489 base::i18n::SetICUDefaultLocale(fallback_locale);
490 return fallback_locale;
491 }
492
493 return std::string();
494
495 #endif
496 }
497
IsLocaleNameTranslated(const char * locale,const std::string & display_locale)498 bool IsLocaleNameTranslated(const char* locale,
499 const std::string& display_locale) {
500 base::string16 display_name =
501 l10n_util::GetDisplayNameForLocale(locale, display_locale, false);
502 // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not
503 // uloc_getDisplayName returns the actual translation or the default
504 // value (locale code), we have to rely on this hack to tell whether
505 // the translation is available or not. If ICU doesn't have a translated
506 // name for this locale, GetDisplayNameForLocale will just return the
507 // locale code.
508 return !base::IsStringASCII(display_name) ||
509 base::UTF16ToASCII(display_name) != locale;
510 }
511
GetDisplayNameForLocale(const std::string & locale,const std::string & display_locale,bool is_for_ui)512 base::string16 GetDisplayNameForLocale(const std::string& locale,
513 const std::string& display_locale,
514 bool is_for_ui) {
515 std::string locale_code = locale;
516 // Internally, we use the language code of zh-CN and zh-TW, but we want the
517 // display names to be Chinese (Simplified) and Chinese (Traditional) instead
518 // of Chinese (China) and Chinese (Taiwan). To do that, we pass zh-Hans
519 // and zh-Hant to ICU. Even with this mapping, we'd get
520 // 'Chinese (Simplified Han)' and 'Chinese (Traditional Han)' in English and
521 // even longer results in other languages. Arguably, they're better than
522 // the current results : Chinese (China) / Chinese (Taiwan).
523 // TODO(jungshik): Do one of the following:
524 // 1. Special-case Chinese by getting the custom-translation for them
525 // 2. Recycle IDS_ENCODING_{SIMP,TRAD}_CHINESE.
526 // 3. Get translations for two directly from the ICU resouce bundle
527 // because they're not accessible with other any API.
528 // 4. Patch ICU to special-case zh-Hans/zh-Hant for us.
529 // #1 and #2 wouldn't work if display_locale != current UI locale although
530 // we can think of additional hack to work around the problem.
531 // #3 can be potentially expensive.
532 if (locale_code == "zh-CN")
533 locale_code = "zh-Hans";
534 else if (locale_code == "zh-TW")
535 locale_code = "zh-Hant";
536
537 base::string16 display_name;
538 #if defined(OS_ANDROID)
539 // Use Java API to get locale display name so that we can remove most of
540 // the lang data from icu data to reduce binary size, except for zh-Hans and
541 // zh-Hant because the current Android Java API doesn't support scripts.
542 // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once
543 // Android Java API supports scripts.
544 if (!StartsWithASCII(locale_code, "zh-Han", true)) {
545 display_name = GetDisplayNameForLocale(locale_code, display_locale);
546 } else
547 #endif
548 {
549 UErrorCode error = U_ZERO_ERROR;
550 const int kBufferSize = 1024;
551
552 int actual_size = uloc_getDisplayName(
553 locale_code.c_str(), display_locale.c_str(),
554 WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
555 DCHECK(U_SUCCESS(error));
556 display_name.resize(actual_size);
557 }
558
559 // Add directional markup so parentheses are properly placed.
560 if (is_for_ui && base::i18n::IsRTL())
561 base::i18n::AdjustStringForLocaleDirection(&display_name);
562 return display_name;
563 }
564
GetDisplayNameForCountry(const std::string & country_code,const std::string & display_locale)565 base::string16 GetDisplayNameForCountry(const std::string& country_code,
566 const std::string& display_locale) {
567 return GetDisplayNameForLocale("_" + country_code, display_locale, false);
568 }
569
NormalizeLocale(const std::string & locale)570 std::string NormalizeLocale(const std::string& locale) {
571 std::string normalized_locale(locale);
572 std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_');
573
574 return normalized_locale;
575 }
576
GetParentLocales(const std::string & current_locale,std::vector<std::string> * parent_locales)577 void GetParentLocales(const std::string& current_locale,
578 std::vector<std::string>* parent_locales) {
579 std::string locale(NormalizeLocale(current_locale));
580
581 const int kNameCapacity = 256;
582 char parent[kNameCapacity];
583 base::strlcpy(parent, locale.c_str(), kNameCapacity);
584 parent_locales->push_back(parent);
585 UErrorCode err = U_ZERO_ERROR;
586 while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) {
587 if (U_FAILURE(err))
588 break;
589 parent_locales->push_back(parent);
590 }
591 }
592
IsValidLocaleSyntax(const std::string & locale)593 bool IsValidLocaleSyntax(const std::string& locale) {
594 // Check that the length is plausible.
595 if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY)
596 return false;
597
598 // Strip off the part after an '@' sign, which might contain keywords,
599 // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil.
600 // We don't validate that part much, just check that there's at least one
601 // equals sign in a plausible place. Normalize the prefix so that hyphens
602 // are changed to underscores.
603 std::string prefix = NormalizeLocale(locale);
604 size_t split_point = locale.find("@");
605 if (split_point != std::string::npos) {
606 std::string keywords = locale.substr(split_point + 1);
607 prefix = locale.substr(0, split_point);
608
609 size_t equals_loc = keywords.find("=");
610 if (equals_loc == std::string::npos ||
611 equals_loc < 1 || equals_loc > keywords.size() - 2)
612 return false;
613 }
614
615 // Check that all characters before the at-sign are alphanumeric or
616 // underscore.
617 for (size_t i = 0; i < prefix.size(); i++) {
618 char ch = prefix[i];
619 if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '_')
620 return false;
621 }
622
623 // Check that the initial token (before the first hyphen/underscore)
624 // is 1 - 3 alphabetical characters (a language tag).
625 for (size_t i = 0; i < prefix.size(); i++) {
626 char ch = prefix[i];
627 if (ch == '_') {
628 if (i < 1 || i > 3)
629 return false;
630 break;
631 }
632 if (!IsAsciiAlpha(ch))
633 return false;
634 }
635
636 // Check that the all tokens after the initial token are 1 - 8 characters.
637 // (Tokenize/StringTokenizer don't work here, they collapse multiple
638 // delimiters into one.)
639 int token_len = 0;
640 int token_index = 0;
641 for (size_t i = 0; i < prefix.size(); i++) {
642 if (prefix[i] != '_') {
643 token_len++;
644 continue;
645 }
646
647 if (token_index > 0 && (token_len < 1 || token_len > 8)) {
648 return false;
649 }
650 token_index++;
651 token_len = 0;
652 }
653 if (token_index == 0 && (token_len < 1 || token_len > 3)) {
654 return false;
655 } else if (token_len < 1 || token_len > 8) {
656 return false;
657 }
658
659 return true;
660 }
661
GetStringUTF8(int message_id)662 std::string GetStringUTF8(int message_id) {
663 return base::UTF16ToUTF8(GetStringUTF16(message_id));
664 }
665
GetStringUTF16(int message_id)666 base::string16 GetStringUTF16(int message_id) {
667 ResourceBundle& rb = ResourceBundle::GetSharedInstance();
668 base::string16 str = rb.GetLocalizedString(message_id);
669 AdjustParagraphDirectionality(&str);
670
671 return str;
672 }
673
GetStringFUTF16(int message_id,const std::vector<base::string16> & replacements,std::vector<size_t> * offsets)674 base::string16 GetStringFUTF16(int message_id,
675 const std::vector<base::string16>& replacements,
676 std::vector<size_t>* offsets) {
677 // TODO(tc): We could save a string copy if we got the raw string as
678 // a StringPiece and were able to call ReplaceStringPlaceholders with
679 // a StringPiece format string and base::string16 substitution strings. In
680 // practice, the strings should be relatively short.
681 ResourceBundle& rb = ResourceBundle::GetSharedInstance();
682 const base::string16& format_string = rb.GetLocalizedString(message_id);
683
684 #ifndef NDEBUG
685 // Make sure every replacement string is being used, so we don't just
686 // silently fail to insert one. If |offsets| is non-NULL, then don't do this
687 // check as the code may simply want to find the placeholders rather than
688 // actually replacing them.
689 if (!offsets) {
690 std::string utf8_string = base::UTF16ToUTF8(format_string);
691
692 // $9 is the highest allowed placeholder.
693 for (size_t i = 0; i < 9; ++i) {
694 bool placeholder_should_exist = replacements.size() > i;
695
696 std::string placeholder =
697 base::StringPrintf("$%d", static_cast<int>(i + 1));
698 size_t pos = utf8_string.find(placeholder.c_str());
699 if (placeholder_should_exist) {
700 DCHECK_NE(std::string::npos, pos) <<
701 " Didn't find a " << placeholder << " placeholder in " <<
702 utf8_string;
703 } else {
704 DCHECK_EQ(std::string::npos, pos) <<
705 " Unexpectedly found a " << placeholder << " placeholder in " <<
706 utf8_string;
707 }
708 }
709 }
710 #endif
711
712 base::string16 formatted = ReplaceStringPlaceholders(
713 format_string, replacements, offsets);
714 AdjustParagraphDirectionality(&formatted);
715
716 return formatted;
717 }
718
GetStringFUTF8(int message_id,const base::string16 & a)719 std::string GetStringFUTF8(int message_id,
720 const base::string16& a) {
721 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a));
722 }
723
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b)724 std::string GetStringFUTF8(int message_id,
725 const base::string16& a,
726 const base::string16& b) {
727 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b));
728 }
729
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c)730 std::string GetStringFUTF8(int message_id,
731 const base::string16& a,
732 const base::string16& b,
733 const base::string16& c) {
734 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c));
735 }
736
GetStringFUTF8(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d)737 std::string GetStringFUTF8(int message_id,
738 const base::string16& a,
739 const base::string16& b,
740 const base::string16& c,
741 const base::string16& d) {
742 return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d));
743 }
744
GetStringFUTF16(int message_id,const base::string16 & a)745 base::string16 GetStringFUTF16(int message_id,
746 const base::string16& a) {
747 std::vector<base::string16> replacements;
748 replacements.push_back(a);
749 return GetStringFUTF16(message_id, replacements, NULL);
750 }
751
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b)752 base::string16 GetStringFUTF16(int message_id,
753 const base::string16& a,
754 const base::string16& b) {
755 return GetStringFUTF16(message_id, a, b, NULL);
756 }
757
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c)758 base::string16 GetStringFUTF16(int message_id,
759 const base::string16& a,
760 const base::string16& b,
761 const base::string16& c) {
762 std::vector<base::string16> replacements;
763 replacements.push_back(a);
764 replacements.push_back(b);
765 replacements.push_back(c);
766 return GetStringFUTF16(message_id, replacements, NULL);
767 }
768
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d)769 base::string16 GetStringFUTF16(int message_id,
770 const base::string16& a,
771 const base::string16& b,
772 const base::string16& c,
773 const base::string16& d) {
774 std::vector<base::string16> replacements;
775 replacements.push_back(a);
776 replacements.push_back(b);
777 replacements.push_back(c);
778 replacements.push_back(d);
779 return GetStringFUTF16(message_id, replacements, NULL);
780 }
781
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,const base::string16 & c,const base::string16 & d,const base::string16 & e)782 base::string16 GetStringFUTF16(int message_id,
783 const base::string16& a,
784 const base::string16& b,
785 const base::string16& c,
786 const base::string16& d,
787 const base::string16& e) {
788 std::vector<base::string16> replacements;
789 replacements.push_back(a);
790 replacements.push_back(b);
791 replacements.push_back(c);
792 replacements.push_back(d);
793 replacements.push_back(e);
794 return GetStringFUTF16(message_id, replacements, NULL);
795 }
796
GetStringFUTF16(int message_id,const base::string16 & a,size_t * offset)797 base::string16 GetStringFUTF16(int message_id,
798 const base::string16& a,
799 size_t* offset) {
800 DCHECK(offset);
801 std::vector<size_t> offsets;
802 std::vector<base::string16> replacements;
803 replacements.push_back(a);
804 base::string16 result = GetStringFUTF16(message_id, replacements, &offsets);
805 DCHECK(offsets.size() == 1);
806 *offset = offsets[0];
807 return result;
808 }
809
GetStringFUTF16(int message_id,const base::string16 & a,const base::string16 & b,std::vector<size_t> * offsets)810 base::string16 GetStringFUTF16(int message_id,
811 const base::string16& a,
812 const base::string16& b,
813 std::vector<size_t>* offsets) {
814 std::vector<base::string16> replacements;
815 replacements.push_back(a);
816 replacements.push_back(b);
817 return GetStringFUTF16(message_id, replacements, offsets);
818 }
819
GetStringFUTF16Int(int message_id,int a)820 base::string16 GetStringFUTF16Int(int message_id, int a) {
821 return GetStringFUTF16(message_id, base::UTF8ToUTF16(base::IntToString(a)));
822 }
823
GetStringFUTF16Int(int message_id,int64 a)824 base::string16 GetStringFUTF16Int(int message_id, int64 a) {
825 return GetStringFUTF16(message_id, base::UTF8ToUTF16(base::Int64ToString(a)));
826 }
827
828 // Specialization of operator() method for base::string16 version.
829 template <>
operator ()(const base::string16 & lhs,const base::string16 & rhs)830 bool StringComparator<base::string16>::operator()(const base::string16& lhs,
831 const base::string16& rhs) {
832 // If we can not get collator instance for specified locale, just do simple
833 // string compare.
834 if (!collator_)
835 return lhs < rhs;
836 return base::i18n::CompareString16WithCollator(collator_, lhs, rhs) ==
837 UCOL_LESS;
838 };
839
GetPluralStringFUTF16(const std::vector<int> & message_ids,int number)840 base::string16 GetPluralStringFUTF16(const std::vector<int>& message_ids,
841 int number) {
842 scoped_ptr<icu::PluralFormat> format = BuildPluralFormat(message_ids);
843 DCHECK(format);
844
845 UErrorCode err = U_ZERO_ERROR;
846 icu::UnicodeString result_files_string = format->format(number, err);
847 int capacity = result_files_string.length() + 1;
848 DCHECK_GT(capacity, 1);
849 base::string16 result;
850 result_files_string.extract(
851 static_cast<UChar*>(WriteInto(&result, capacity)), capacity, err);
852 DCHECK(U_SUCCESS(err));
853 return result;
854 }
855
GetPluralStringFUTF8(const std::vector<int> & message_ids,int number)856 std::string GetPluralStringFUTF8(const std::vector<int>& message_ids,
857 int number) {
858 return base::UTF16ToUTF8(GetPluralStringFUTF16(message_ids, number));
859 }
860
SortStrings16(const std::string & locale,std::vector<base::string16> * strings)861 void SortStrings16(const std::string& locale,
862 std::vector<base::string16>* strings) {
863 SortVectorWithStringKey(locale, strings, false);
864 }
865
GetAvailableLocales()866 const std::vector<std::string>& GetAvailableLocales() {
867 return g_available_locales.Get();
868 }
869
GetAcceptLanguagesForLocale(const std::string & display_locale,std::vector<std::string> * locale_codes)870 void GetAcceptLanguagesForLocale(const std::string& display_locale,
871 std::vector<std::string>* locale_codes) {
872 for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) {
873 if (!l10n_util::IsLocaleNameTranslated(kAcceptLanguageList[i],
874 display_locale))
875 // TODO(jungshik) : Put them at the of the list with language codes
876 // enclosed by brackets instead of skipping.
877 continue;
878 locale_codes->push_back(kAcceptLanguageList[i]);
879 }
880 }
881
GetLocalizedContentsWidthInPixels(int pixel_resource_id)882 int GetLocalizedContentsWidthInPixels(int pixel_resource_id) {
883 int width = 0;
884 base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width);
885 DCHECK_GT(width, 0);
886 return width;
887 }
888
889 } // namespace l10n_util
890