• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/rtl.h"
6 
7 #include "base/file_path.h"
8 #include "base/logging.h"
9 #include "base/string_util.h"
10 #include "base/utf_string_conversions.h"
11 #include "base/sys_string_conversions.h"
12 #include "unicode/coll.h"
13 #include "unicode/locid.h"
14 #include "unicode/uchar.h"
15 #include "unicode/uscript.h"
16 
17 #if defined(TOOLKIT_USES_GTK)
18 #include <gtk/gtk.h>
19 #endif
20 
21 namespace {
22 
23 // Extract language and country, ignore keywords, concatenate using dash.
GetLocaleString(const icu::Locale & locale)24 std::string GetLocaleString(const icu::Locale& locale) {
25   const char* language = locale.getLanguage();
26   const char* country = locale.getCountry();
27 
28   std::string result =
29       (language != NULL && *language != '\0') ? language : "und";
30 
31   if (country != NULL && *country != '\0') {
32     result += '-';
33     result += country;
34   }
35 
36   return result;
37 }
38 
39 }  // namespace
40 
41 namespace base {
42 namespace i18n {
43 
44 // Represents the locale-specific ICU text direction.
45 static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
46 
47 // Convert the ICU default locale to a string.
GetConfiguredLocale()48 std::string GetConfiguredLocale() {
49   return GetLocaleString(icu::Locale::getDefault());
50 }
51 
52 // Convert the ICU canonicalized locale to a string.
GetCanonicalLocale(const char * locale)53 std::string GetCanonicalLocale(const char* locale) {
54   return GetLocaleString(icu::Locale::createCanonical(locale));
55 }
56 
57 // Convert Chrome locale name to ICU locale name
ICULocaleName(const std::string & locale_string)58 std::string ICULocaleName(const std::string& locale_string) {
59   // If not Spanish, just return it.
60   if (locale_string.substr(0, 2) != "es")
61     return locale_string;
62   // Expand es to es-ES.
63   if (LowerCaseEqualsASCII(locale_string, "es"))
64     return "es-ES";
65   // Map es-419 (Latin American Spanish) to es-FOO depending on the system
66   // locale.  If it's es-RR other than es-ES, map to es-RR. Otherwise, map
67   // to es-MX (the most populous in Spanish-speaking Latin America).
68   if (LowerCaseEqualsASCII(locale_string, "es-419")) {
69     const icu::Locale& locale = icu::Locale::getDefault();
70     std::string language = locale.getLanguage();
71     const char* country = locale.getCountry();
72     if (LowerCaseEqualsASCII(language, "es") &&
73       !LowerCaseEqualsASCII(country, "es")) {
74         language += '-';
75         language += country;
76         return language;
77     }
78     return "es-MX";
79   }
80   // Currently, Chrome has only "es" and "es-419", but later we may have
81   // more specific "es-RR".
82   return locale_string;
83 }
84 
SetICUDefaultLocale(const std::string & locale_string)85 void SetICUDefaultLocale(const std::string& locale_string) {
86   icu::Locale locale(ICULocaleName(locale_string).c_str());
87   UErrorCode error_code = U_ZERO_ERROR;
88   icu::Locale::setDefault(locale, error_code);
89   // This return value is actually bogus because Locale object is
90   // an ID and setDefault seems to always succeed (regardless of the
91   // presence of actual locale data). However,
92   // it does not hurt to have it as a sanity check.
93   DCHECK(U_SUCCESS(error_code));
94   g_icu_text_direction = UNKNOWN_DIRECTION;
95 
96   // If we use Views toolkit on top of GtkWidget, then we need to keep
97   // GtkWidget's default text direction consistent with ICU's text direction.
98   // Because in this case ICU's text direction will be used instead.
99   // See IsRTL() function below.
100 #if defined(TOOLKIT_USES_GTK) && !defined(TOOLKIT_GTK)
101   gtk_widget_set_default_direction(
102       ICUIsRTL() ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR);
103 #endif
104 }
105 
IsRTL()106 bool IsRTL() {
107 #if defined(TOOLKIT_GTK)
108   GtkTextDirection gtk_dir = gtk_widget_get_default_direction();
109   return (gtk_dir == GTK_TEXT_DIR_RTL);
110 #else
111   return ICUIsRTL();
112 #endif
113 }
114 
ICUIsRTL()115 bool ICUIsRTL() {
116   if (g_icu_text_direction == UNKNOWN_DIRECTION) {
117     const icu::Locale& locale = icu::Locale::getDefault();
118     g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
119   }
120   return g_icu_text_direction == RIGHT_TO_LEFT;
121 }
122 
GetTextDirectionForLocale(const char * locale_name)123 TextDirection GetTextDirectionForLocale(const char* locale_name) {
124   UErrorCode status = U_ZERO_ERROR;
125   ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
126   DCHECK(U_SUCCESS(status));
127   // Treat anything other than RTL as LTR.
128   return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
129 }
130 
GetFirstStrongCharacterDirection(const string16 & text)131 TextDirection GetFirstStrongCharacterDirection(const string16& text) {
132   const UChar* string = text.c_str();
133   size_t length = text.length();
134   size_t position = 0;
135   while (position < length) {
136     UChar32 character;
137     size_t next_position = position;
138     U16_NEXT(string, next_position, length, character);
139 
140     // Now that we have the character, we use ICU in order to query for the
141     // appropriate Unicode BiDi character type.
142     int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
143     if ((property == U_RIGHT_TO_LEFT) ||
144         (property == U_RIGHT_TO_LEFT_ARABIC) ||
145         (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
146         (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
147       return RIGHT_TO_LEFT;
148     } else if ((property == U_LEFT_TO_RIGHT) ||
149                (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
150                (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
151       return LEFT_TO_RIGHT;
152     }
153 
154     position = next_position;
155   }
156 
157   return LEFT_TO_RIGHT;
158 }
159 
160 #if defined(WCHAR_T_IS_UTF32)
GetFirstStrongCharacterDirection(const std::wstring & text)161 TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) {
162   return GetFirstStrongCharacterDirection(WideToUTF16(text));
163 }
164 #endif
165 
166 #if defined(OS_WIN)
AdjustStringForLocaleDirection(string16 * text)167 bool AdjustStringForLocaleDirection(string16* text) {
168   if (!IsRTL() || text->empty())
169     return false;
170 
171   // Marking the string as LTR if the locale is RTL and the string does not
172   // contain strong RTL characters. Otherwise, mark the string as RTL.
173   bool has_rtl_chars = StringContainsStrongRTLChars(*text);
174   if (!has_rtl_chars)
175     WrapStringWithLTRFormatting(text);
176   else
177     WrapStringWithRTLFormatting(text);
178 
179   return true;
180 }
181 #else
AdjustStringForLocaleDirection(string16 * text)182 bool AdjustStringForLocaleDirection(string16* text) {
183   // On OS X & GTK the directionality of a label is determined by the first
184   // strongly directional character.
185   // However, we want to make sure that in an LTR-language-UI all strings are
186   // left aligned and vice versa.
187   // A problem can arise if we display a string which starts with user input.
188   // User input may be of the opposite directionality to the UI. So the whole
189   // string will be displayed in the opposite directionality, e.g. if we want to
190   // display in an LTR UI [such as US English]:
191   //
192   // EMAN_NOISNETXE is now installed.
193   //
194   // Since EXTENSION_NAME begins with a strong RTL char, the label's
195   // directionality will be set to RTL and the string will be displayed visually
196   // as:
197   //
198   // .is now installed EMAN_NOISNETXE
199   //
200   // In order to solve this issue, we prepend an LRM to the string. An LRM is a
201   // strongly directional LTR char.
202   // We also append an LRM at the end, which ensures that we're in an LTR
203   // context.
204 
205   // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
206   // box so there is no issue with displaying zero-width bidi control characters
207   // on any system.  Thus no need for the !IsRTL() check here.
208   if (text->empty())
209     return false;
210 
211   bool ui_direction_is_rtl = IsRTL();
212 
213   bool has_rtl_chars = StringContainsStrongRTLChars(*text);
214   if (!ui_direction_is_rtl && has_rtl_chars) {
215     WrapStringWithRTLFormatting(text);
216     text->insert(0, 1, kLeftToRightMark);
217     text->push_back(kLeftToRightMark);
218   } else if (ui_direction_is_rtl && has_rtl_chars) {
219     WrapStringWithRTLFormatting(text);
220     text->insert(0, 1, kRightToLeftMark);
221     text->push_back(kRightToLeftMark);
222   } else if (ui_direction_is_rtl) {
223     WrapStringWithLTRFormatting(text);
224     text->insert(0, 1, kRightToLeftMark);
225     text->push_back(kRightToLeftMark);
226   }
227 
228   return true;
229 }
230 
231 #endif  // !OS_WIN
232 
233 #if defined(WCHAR_T_IS_UTF32)
AdjustStringForLocaleDirection(std::wstring * text)234 bool AdjustStringForLocaleDirection(std::wstring* text) {
235   string16 temp = WideToUTF16(*text);
236   if (AdjustStringForLocaleDirection(&temp)) {
237     // We should only touch the output on success.
238     *text = UTF16ToWide(temp);
239     return true;
240   }
241   return false;
242 }
243 #endif
244 
StringContainsStrongRTLChars(const string16 & text)245 bool StringContainsStrongRTLChars(const string16& text) {
246   const UChar* string = text.c_str();
247   size_t length = text.length();
248   size_t position = 0;
249   while (position < length) {
250     UChar32 character;
251     size_t next_position = position;
252     U16_NEXT(string, next_position, length, character);
253 
254     // Now that we have the character, we use ICU in order to query for the
255     // appropriate Unicode BiDi character type.
256     int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
257     if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
258       return true;
259 
260     position = next_position;
261   }
262 
263   return false;
264 }
265 
266 #if defined(WCHAR_T_IS_UTF32)
StringContainsStrongRTLChars(const std::wstring & text)267 bool StringContainsStrongRTLChars(const std::wstring& text) {
268   return StringContainsStrongRTLChars(WideToUTF16(text));
269 }
270 #endif
271 
WrapStringWithLTRFormatting(string16 * text)272 void WrapStringWithLTRFormatting(string16* text) {
273   if (text->empty())
274     return;
275 
276   // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
277   text->insert(0, 1, kLeftToRightEmbeddingMark);
278 
279   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
280   text->push_back(kPopDirectionalFormatting);
281 }
282 
283 #if defined(WCHAR_T_IS_UTF32)
WrapStringWithLTRFormatting(std::wstring * text)284 void WrapStringWithLTRFormatting(std::wstring* text) {
285   if (text->empty())
286     return;
287 
288   // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
289   text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark));
290 
291   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
292   text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
293 }
294 #endif
295 
WrapStringWithRTLFormatting(string16 * text)296 void WrapStringWithRTLFormatting(string16* text) {
297   if (text->empty())
298     return;
299 
300   // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
301   text->insert(0, 1, kRightToLeftEmbeddingMark);
302 
303   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
304   text->push_back(kPopDirectionalFormatting);
305 }
306 
307 #if defined(WCHAR_T_IS_UTF32)
WrapStringWithRTLFormatting(std::wstring * text)308 void WrapStringWithRTLFormatting(std::wstring* text) {
309   if (text->empty())
310     return;
311 
312   // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
313   text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark));
314 
315   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
316   text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
317 }
318 #endif
319 
WrapPathWithLTRFormatting(const FilePath & path,string16 * rtl_safe_path)320 void WrapPathWithLTRFormatting(const FilePath& path,
321                                string16* rtl_safe_path) {
322   // Wrap the overall path with LRE-PDF pair which essentialy marks the
323   // string as a Left-To-Right string.
324   // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
325   rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
326 #if defined(OS_MACOSX)
327     rtl_safe_path->append(UTF8ToUTF16(path.value()));
328 #elif defined(OS_WIN)
329     rtl_safe_path->append(path.value());
330 #else  // defined(OS_POSIX) && !defined(OS_MACOSX)
331     std::wstring wide_path = base::SysNativeMBToWide(path.value());
332     rtl_safe_path->append(WideToUTF16(wide_path));
333 #endif
334   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
335   rtl_safe_path->push_back(kPopDirectionalFormatting);
336 }
337 
GetDisplayStringInLTRDirectionality(const string16 & text)338 string16 GetDisplayStringInLTRDirectionality(const string16& text) {
339   if (!IsRTL())
340     return text;
341   string16 text_mutable(text);
342   WrapStringWithLTRFormatting(&text_mutable);
343   return text_mutable;
344 }
345 
StripWrappingBidiControlCharacters(const string16 & text)346 const string16 StripWrappingBidiControlCharacters(const string16& text) {
347   if (text.empty())
348     return text;
349   size_t begin_index = 0;
350   char16 begin = text[begin_index];
351   if (begin == kLeftToRightEmbeddingMark ||
352       begin == kRightToLeftEmbeddingMark ||
353       begin == kLeftToRightOverride ||
354       begin == kRightToLeftOverride)
355     ++begin_index;
356   size_t end_index = text.length() - 1;
357   if (text[end_index] == kPopDirectionalFormatting)
358     --end_index;
359   return text.substr(begin_index, end_index - begin_index + 1);
360 }
361 
362 }  // namespace i18n
363 }  // namespace base
364