1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/i18n/rtl.h"
6
7 #include "base/file_path.h"
8 #include "base/logging.h"
9 #include "base/string_util.h"
10 #include "base/utf_string_conversions.h"
11 #include "base/sys_string_conversions.h"
12 #include "unicode/coll.h"
13 #include "unicode/locid.h"
14 #include "unicode/uchar.h"
15 #include "unicode/uscript.h"
16
17 #if defined(TOOLKIT_USES_GTK)
18 #include <gtk/gtk.h>
19 #endif
20
21 namespace {
22
23 // Extract language and country, ignore keywords, concatenate using dash.
GetLocaleString(const icu::Locale & locale)24 std::string GetLocaleString(const icu::Locale& locale) {
25 const char* language = locale.getLanguage();
26 const char* country = locale.getCountry();
27
28 std::string result =
29 (language != NULL && *language != '\0') ? language : "und";
30
31 if (country != NULL && *country != '\0') {
32 result += '-';
33 result += country;
34 }
35
36 return result;
37 }
38
39 } // namespace
40
41 namespace base {
42 namespace i18n {
43
44 // Represents the locale-specific ICU text direction.
45 static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
46
47 // Convert the ICU default locale to a string.
GetConfiguredLocale()48 std::string GetConfiguredLocale() {
49 return GetLocaleString(icu::Locale::getDefault());
50 }
51
52 // Convert the ICU canonicalized locale to a string.
GetCanonicalLocale(const char * locale)53 std::string GetCanonicalLocale(const char* locale) {
54 return GetLocaleString(icu::Locale::createCanonical(locale));
55 }
56
57 // Convert Chrome locale name to ICU locale name
ICULocaleName(const std::string & locale_string)58 std::string ICULocaleName(const std::string& locale_string) {
59 // If not Spanish, just return it.
60 if (locale_string.substr(0, 2) != "es")
61 return locale_string;
62 // Expand es to es-ES.
63 if (LowerCaseEqualsASCII(locale_string, "es"))
64 return "es-ES";
65 // Map es-419 (Latin American Spanish) to es-FOO depending on the system
66 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map
67 // to es-MX (the most populous in Spanish-speaking Latin America).
68 if (LowerCaseEqualsASCII(locale_string, "es-419")) {
69 const icu::Locale& locale = icu::Locale::getDefault();
70 std::string language = locale.getLanguage();
71 const char* country = locale.getCountry();
72 if (LowerCaseEqualsASCII(language, "es") &&
73 !LowerCaseEqualsASCII(country, "es")) {
74 language += '-';
75 language += country;
76 return language;
77 }
78 return "es-MX";
79 }
80 // Currently, Chrome has only "es" and "es-419", but later we may have
81 // more specific "es-RR".
82 return locale_string;
83 }
84
SetICUDefaultLocale(const std::string & locale_string)85 void SetICUDefaultLocale(const std::string& locale_string) {
86 icu::Locale locale(ICULocaleName(locale_string).c_str());
87 UErrorCode error_code = U_ZERO_ERROR;
88 icu::Locale::setDefault(locale, error_code);
89 // This return value is actually bogus because Locale object is
90 // an ID and setDefault seems to always succeed (regardless of the
91 // presence of actual locale data). However,
92 // it does not hurt to have it as a sanity check.
93 DCHECK(U_SUCCESS(error_code));
94 g_icu_text_direction = UNKNOWN_DIRECTION;
95
96 // If we use Views toolkit on top of GtkWidget, then we need to keep
97 // GtkWidget's default text direction consistent with ICU's text direction.
98 // Because in this case ICU's text direction will be used instead.
99 // See IsRTL() function below.
100 #if defined(TOOLKIT_USES_GTK) && !defined(TOOLKIT_GTK)
101 gtk_widget_set_default_direction(
102 ICUIsRTL() ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR);
103 #endif
104 }
105
IsRTL()106 bool IsRTL() {
107 #if defined(TOOLKIT_GTK)
108 GtkTextDirection gtk_dir = gtk_widget_get_default_direction();
109 return (gtk_dir == GTK_TEXT_DIR_RTL);
110 #else
111 return ICUIsRTL();
112 #endif
113 }
114
ICUIsRTL()115 bool ICUIsRTL() {
116 if (g_icu_text_direction == UNKNOWN_DIRECTION) {
117 const icu::Locale& locale = icu::Locale::getDefault();
118 g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
119 }
120 return g_icu_text_direction == RIGHT_TO_LEFT;
121 }
122
GetTextDirectionForLocale(const char * locale_name)123 TextDirection GetTextDirectionForLocale(const char* locale_name) {
124 UErrorCode status = U_ZERO_ERROR;
125 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
126 DCHECK(U_SUCCESS(status));
127 // Treat anything other than RTL as LTR.
128 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
129 }
130
GetFirstStrongCharacterDirection(const string16 & text)131 TextDirection GetFirstStrongCharacterDirection(const string16& text) {
132 const UChar* string = text.c_str();
133 size_t length = text.length();
134 size_t position = 0;
135 while (position < length) {
136 UChar32 character;
137 size_t next_position = position;
138 U16_NEXT(string, next_position, length, character);
139
140 // Now that we have the character, we use ICU in order to query for the
141 // appropriate Unicode BiDi character type.
142 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
143 if ((property == U_RIGHT_TO_LEFT) ||
144 (property == U_RIGHT_TO_LEFT_ARABIC) ||
145 (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
146 (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
147 return RIGHT_TO_LEFT;
148 } else if ((property == U_LEFT_TO_RIGHT) ||
149 (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
150 (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
151 return LEFT_TO_RIGHT;
152 }
153
154 position = next_position;
155 }
156
157 return LEFT_TO_RIGHT;
158 }
159
160 #if defined(WCHAR_T_IS_UTF32)
GetFirstStrongCharacterDirection(const std::wstring & text)161 TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) {
162 return GetFirstStrongCharacterDirection(WideToUTF16(text));
163 }
164 #endif
165
166 #if defined(OS_WIN)
AdjustStringForLocaleDirection(string16 * text)167 bool AdjustStringForLocaleDirection(string16* text) {
168 if (!IsRTL() || text->empty())
169 return false;
170
171 // Marking the string as LTR if the locale is RTL and the string does not
172 // contain strong RTL characters. Otherwise, mark the string as RTL.
173 bool has_rtl_chars = StringContainsStrongRTLChars(*text);
174 if (!has_rtl_chars)
175 WrapStringWithLTRFormatting(text);
176 else
177 WrapStringWithRTLFormatting(text);
178
179 return true;
180 }
181 #else
AdjustStringForLocaleDirection(string16 * text)182 bool AdjustStringForLocaleDirection(string16* text) {
183 // On OS X & GTK the directionality of a label is determined by the first
184 // strongly directional character.
185 // However, we want to make sure that in an LTR-language-UI all strings are
186 // left aligned and vice versa.
187 // A problem can arise if we display a string which starts with user input.
188 // User input may be of the opposite directionality to the UI. So the whole
189 // string will be displayed in the opposite directionality, e.g. if we want to
190 // display in an LTR UI [such as US English]:
191 //
192 // EMAN_NOISNETXE is now installed.
193 //
194 // Since EXTENSION_NAME begins with a strong RTL char, the label's
195 // directionality will be set to RTL and the string will be displayed visually
196 // as:
197 //
198 // .is now installed EMAN_NOISNETXE
199 //
200 // In order to solve this issue, we prepend an LRM to the string. An LRM is a
201 // strongly directional LTR char.
202 // We also append an LRM at the end, which ensures that we're in an LTR
203 // context.
204
205 // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
206 // box so there is no issue with displaying zero-width bidi control characters
207 // on any system. Thus no need for the !IsRTL() check here.
208 if (text->empty())
209 return false;
210
211 bool ui_direction_is_rtl = IsRTL();
212
213 bool has_rtl_chars = StringContainsStrongRTLChars(*text);
214 if (!ui_direction_is_rtl && has_rtl_chars) {
215 WrapStringWithRTLFormatting(text);
216 text->insert(0, 1, kLeftToRightMark);
217 text->push_back(kLeftToRightMark);
218 } else if (ui_direction_is_rtl && has_rtl_chars) {
219 WrapStringWithRTLFormatting(text);
220 text->insert(0, 1, kRightToLeftMark);
221 text->push_back(kRightToLeftMark);
222 } else if (ui_direction_is_rtl) {
223 WrapStringWithLTRFormatting(text);
224 text->insert(0, 1, kRightToLeftMark);
225 text->push_back(kRightToLeftMark);
226 }
227
228 return true;
229 }
230
231 #endif // !OS_WIN
232
233 #if defined(WCHAR_T_IS_UTF32)
AdjustStringForLocaleDirection(std::wstring * text)234 bool AdjustStringForLocaleDirection(std::wstring* text) {
235 string16 temp = WideToUTF16(*text);
236 if (AdjustStringForLocaleDirection(&temp)) {
237 // We should only touch the output on success.
238 *text = UTF16ToWide(temp);
239 return true;
240 }
241 return false;
242 }
243 #endif
244
StringContainsStrongRTLChars(const string16 & text)245 bool StringContainsStrongRTLChars(const string16& text) {
246 const UChar* string = text.c_str();
247 size_t length = text.length();
248 size_t position = 0;
249 while (position < length) {
250 UChar32 character;
251 size_t next_position = position;
252 U16_NEXT(string, next_position, length, character);
253
254 // Now that we have the character, we use ICU in order to query for the
255 // appropriate Unicode BiDi character type.
256 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
257 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
258 return true;
259
260 position = next_position;
261 }
262
263 return false;
264 }
265
266 #if defined(WCHAR_T_IS_UTF32)
StringContainsStrongRTLChars(const std::wstring & text)267 bool StringContainsStrongRTLChars(const std::wstring& text) {
268 return StringContainsStrongRTLChars(WideToUTF16(text));
269 }
270 #endif
271
WrapStringWithLTRFormatting(string16 * text)272 void WrapStringWithLTRFormatting(string16* text) {
273 if (text->empty())
274 return;
275
276 // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
277 text->insert(0, 1, kLeftToRightEmbeddingMark);
278
279 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
280 text->push_back(kPopDirectionalFormatting);
281 }
282
283 #if defined(WCHAR_T_IS_UTF32)
WrapStringWithLTRFormatting(std::wstring * text)284 void WrapStringWithLTRFormatting(std::wstring* text) {
285 if (text->empty())
286 return;
287
288 // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
289 text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark));
290
291 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
292 text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
293 }
294 #endif
295
WrapStringWithRTLFormatting(string16 * text)296 void WrapStringWithRTLFormatting(string16* text) {
297 if (text->empty())
298 return;
299
300 // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
301 text->insert(0, 1, kRightToLeftEmbeddingMark);
302
303 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
304 text->push_back(kPopDirectionalFormatting);
305 }
306
307 #if defined(WCHAR_T_IS_UTF32)
WrapStringWithRTLFormatting(std::wstring * text)308 void WrapStringWithRTLFormatting(std::wstring* text) {
309 if (text->empty())
310 return;
311
312 // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
313 text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark));
314
315 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
316 text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
317 }
318 #endif
319
WrapPathWithLTRFormatting(const FilePath & path,string16 * rtl_safe_path)320 void WrapPathWithLTRFormatting(const FilePath& path,
321 string16* rtl_safe_path) {
322 // Wrap the overall path with LRE-PDF pair which essentialy marks the
323 // string as a Left-To-Right string.
324 // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
325 rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
326 #if defined(OS_MACOSX)
327 rtl_safe_path->append(UTF8ToUTF16(path.value()));
328 #elif defined(OS_WIN)
329 rtl_safe_path->append(path.value());
330 #else // defined(OS_POSIX) && !defined(OS_MACOSX)
331 std::wstring wide_path = base::SysNativeMBToWide(path.value());
332 rtl_safe_path->append(WideToUTF16(wide_path));
333 #endif
334 // Inserting a PDF (Pop Directional Formatting) mark as the last character.
335 rtl_safe_path->push_back(kPopDirectionalFormatting);
336 }
337
GetDisplayStringInLTRDirectionality(const string16 & text)338 string16 GetDisplayStringInLTRDirectionality(const string16& text) {
339 if (!IsRTL())
340 return text;
341 string16 text_mutable(text);
342 WrapStringWithLTRFormatting(&text_mutable);
343 return text_mutable;
344 }
345
StripWrappingBidiControlCharacters(const string16 & text)346 const string16 StripWrappingBidiControlCharacters(const string16& text) {
347 if (text.empty())
348 return text;
349 size_t begin_index = 0;
350 char16 begin = text[begin_index];
351 if (begin == kLeftToRightEmbeddingMark ||
352 begin == kRightToLeftEmbeddingMark ||
353 begin == kLeftToRightOverride ||
354 begin == kRightToLeftOverride)
355 ++begin_index;
356 size_t end_index = text.length() - 1;
357 if (text[end_index] == kPopDirectionalFormatting)
358 --end_index;
359 return text.substr(begin_index, end_index - begin_index + 1);
360 }
361
362 } // namespace i18n
363 } // namespace base
364