• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/rtl.h"
6 
7 #include "base/files/file_path.h"
8 #include "base/logging.h"
9 #include "base/strings/string_util.h"
10 #include "base/strings/sys_string_conversions.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "third_party/icu/source/common/unicode/locid.h"
13 #include "third_party/icu/source/common/unicode/uchar.h"
14 #include "third_party/icu/source/common/unicode/uscript.h"
15 #include "third_party/icu/source/i18n/unicode/coll.h"
16 
17 namespace {
18 
19 // Extract language, country and variant, but ignore keywords.  For example,
20 // en-US, ca@valencia, ca-ES@valencia.
GetLocaleString(const icu::Locale & locale)21 std::string GetLocaleString(const icu::Locale& locale) {
22   const char* language = locale.getLanguage();
23   const char* country = locale.getCountry();
24   const char* variant = locale.getVariant();
25 
26   std::string result =
27       (language != NULL && *language != '\0') ? language : "und";
28 
29   if (country != NULL && *country != '\0') {
30     result += '-';
31     result += country;
32   }
33 
34   if (variant != NULL && *variant != '\0') {
35     std::string variant_str(variant);
36     StringToLowerASCII(&variant_str);
37     result += '@' + variant_str;
38   }
39 
40   return result;
41 }
42 
43 // Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong
44 // directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to
45 // http://unicode.org/reports/tr9/ for more information.
GetCharacterDirection(UChar32 character)46 base::i18n::TextDirection GetCharacterDirection(UChar32 character) {
47   // Now that we have the character, we use ICU in order to query for the
48   // appropriate Unicode BiDi character type.
49   int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
50   if ((property == U_RIGHT_TO_LEFT) ||
51       (property == U_RIGHT_TO_LEFT_ARABIC) ||
52       (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
53       (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
54     return base::i18n::RIGHT_TO_LEFT;
55   } else if ((property == U_LEFT_TO_RIGHT) ||
56              (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
57              (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
58     return base::i18n::LEFT_TO_RIGHT;
59   }
60   return base::i18n::UNKNOWN_DIRECTION;
61 }
62 
63 }  // namespace
64 
65 namespace base {
66 namespace i18n {
67 
68 // Represents the locale-specific ICU text direction.
69 static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
70 
71 // Convert the ICU default locale to a string.
GetConfiguredLocale()72 std::string GetConfiguredLocale() {
73   return GetLocaleString(icu::Locale::getDefault());
74 }
75 
76 // Convert the ICU canonicalized locale to a string.
GetCanonicalLocale(const char * locale)77 std::string GetCanonicalLocale(const char* locale) {
78   return GetLocaleString(icu::Locale::createCanonical(locale));
79 }
80 
81 // Convert Chrome locale name to ICU locale name
ICULocaleName(const std::string & locale_string)82 std::string ICULocaleName(const std::string& locale_string) {
83   // If not Spanish, just return it.
84   if (locale_string.substr(0, 2) != "es")
85     return locale_string;
86   // Expand es to es-ES.
87   if (LowerCaseEqualsASCII(locale_string, "es"))
88     return "es-ES";
89   // Map es-419 (Latin American Spanish) to es-FOO depending on the system
90   // locale.  If it's es-RR other than es-ES, map to es-RR. Otherwise, map
91   // to es-MX (the most populous in Spanish-speaking Latin America).
92   if (LowerCaseEqualsASCII(locale_string, "es-419")) {
93     const icu::Locale& locale = icu::Locale::getDefault();
94     std::string language = locale.getLanguage();
95     const char* country = locale.getCountry();
96     if (LowerCaseEqualsASCII(language, "es") &&
97       !LowerCaseEqualsASCII(country, "es")) {
98         language += '-';
99         language += country;
100         return language;
101     }
102     return "es-MX";
103   }
104   // Currently, Chrome has only "es" and "es-419", but later we may have
105   // more specific "es-RR".
106   return locale_string;
107 }
108 
SetICUDefaultLocale(const std::string & locale_string)109 void SetICUDefaultLocale(const std::string& locale_string) {
110   icu::Locale locale(ICULocaleName(locale_string).c_str());
111   UErrorCode error_code = U_ZERO_ERROR;
112   icu::Locale::setDefault(locale, error_code);
113   // This return value is actually bogus because Locale object is
114   // an ID and setDefault seems to always succeed (regardless of the
115   // presence of actual locale data). However,
116   // it does not hurt to have it as a sanity check.
117   DCHECK(U_SUCCESS(error_code));
118   g_icu_text_direction = UNKNOWN_DIRECTION;
119 }
120 
IsRTL()121 bool IsRTL() {
122   return ICUIsRTL();
123 }
124 
ICUIsRTL()125 bool ICUIsRTL() {
126   if (g_icu_text_direction == UNKNOWN_DIRECTION) {
127     const icu::Locale& locale = icu::Locale::getDefault();
128     g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
129   }
130   return g_icu_text_direction == RIGHT_TO_LEFT;
131 }
132 
GetTextDirectionForLocale(const char * locale_name)133 TextDirection GetTextDirectionForLocale(const char* locale_name) {
134   UErrorCode status = U_ZERO_ERROR;
135   ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
136   DCHECK(U_SUCCESS(status));
137   // Treat anything other than RTL as LTR.
138   return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
139 }
140 
GetFirstStrongCharacterDirection(const string16 & text)141 TextDirection GetFirstStrongCharacterDirection(const string16& text) {
142   const UChar* string = text.c_str();
143   size_t length = text.length();
144   size_t position = 0;
145   while (position < length) {
146     UChar32 character;
147     size_t next_position = position;
148     U16_NEXT(string, next_position, length, character);
149     TextDirection direction = GetCharacterDirection(character);
150     if (direction != UNKNOWN_DIRECTION)
151       return direction;
152     position = next_position;
153   }
154   return LEFT_TO_RIGHT;
155 }
156 
GetLastStrongCharacterDirection(const string16 & text)157 TextDirection GetLastStrongCharacterDirection(const string16& text) {
158   const UChar* string = text.c_str();
159   size_t position = text.length();
160   while (position > 0) {
161     UChar32 character;
162     size_t prev_position = position;
163     U16_PREV(string, 0, prev_position, character);
164     TextDirection direction = GetCharacterDirection(character);
165     if (direction != UNKNOWN_DIRECTION)
166       return direction;
167     position = prev_position;
168   }
169   return LEFT_TO_RIGHT;
170 }
171 
GetStringDirection(const string16 & text)172 TextDirection GetStringDirection(const string16& text) {
173   const UChar* string = text.c_str();
174   size_t length = text.length();
175   size_t position = 0;
176 
177   TextDirection result(UNKNOWN_DIRECTION);
178   while (position < length) {
179     UChar32 character;
180     size_t next_position = position;
181     U16_NEXT(string, next_position, length, character);
182     TextDirection direction = GetCharacterDirection(character);
183     if (direction != UNKNOWN_DIRECTION) {
184       if (result != UNKNOWN_DIRECTION && result != direction)
185         return UNKNOWN_DIRECTION;
186       result = direction;
187     }
188     position = next_position;
189   }
190 
191   // Handle the case of a string not containing any strong directionality
192   // characters defaulting to LEFT_TO_RIGHT.
193   if (result == UNKNOWN_DIRECTION)
194     return LEFT_TO_RIGHT;
195 
196   return result;
197 }
198 
199 #if defined(OS_WIN)
AdjustStringForLocaleDirection(string16 * text)200 bool AdjustStringForLocaleDirection(string16* text) {
201   if (!IsRTL() || text->empty())
202     return false;
203 
204   // Marking the string as LTR if the locale is RTL and the string does not
205   // contain strong RTL characters. Otherwise, mark the string as RTL.
206   bool has_rtl_chars = StringContainsStrongRTLChars(*text);
207   if (!has_rtl_chars)
208     WrapStringWithLTRFormatting(text);
209   else
210     WrapStringWithRTLFormatting(text);
211 
212   return true;
213 }
214 
UnadjustStringForLocaleDirection(string16 * text)215 bool UnadjustStringForLocaleDirection(string16* text) {
216   if (!IsRTL() || text->empty())
217     return false;
218 
219   *text = StripWrappingBidiControlCharacters(*text);
220   return true;
221 }
222 #else
AdjustStringForLocaleDirection(string16 * text)223 bool AdjustStringForLocaleDirection(string16* text) {
224   // On OS X & GTK the directionality of a label is determined by the first
225   // strongly directional character.
226   // However, we want to make sure that in an LTR-language-UI all strings are
227   // left aligned and vice versa.
228   // A problem can arise if we display a string which starts with user input.
229   // User input may be of the opposite directionality to the UI. So the whole
230   // string will be displayed in the opposite directionality, e.g. if we want to
231   // display in an LTR UI [such as US English]:
232   //
233   // EMAN_NOISNETXE is now installed.
234   //
235   // Since EXTENSION_NAME begins with a strong RTL char, the label's
236   // directionality will be set to RTL and the string will be displayed visually
237   // as:
238   //
239   // .is now installed EMAN_NOISNETXE
240   //
241   // In order to solve this issue, we prepend an LRM to the string. An LRM is a
242   // strongly directional LTR char.
243   // We also append an LRM at the end, which ensures that we're in an LTR
244   // context.
245 
246   // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
247   // box so there is no issue with displaying zero-width bidi control characters
248   // on any system.  Thus no need for the !IsRTL() check here.
249   if (text->empty())
250     return false;
251 
252   bool ui_direction_is_rtl = IsRTL();
253 
254   bool has_rtl_chars = StringContainsStrongRTLChars(*text);
255   if (!ui_direction_is_rtl && has_rtl_chars) {
256     WrapStringWithRTLFormatting(text);
257     text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
258                  kLeftToRightMark);
259     text->push_back(kLeftToRightMark);
260   } else if (ui_direction_is_rtl && has_rtl_chars) {
261     WrapStringWithRTLFormatting(text);
262     text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
263                  kRightToLeftMark);
264     text->push_back(kRightToLeftMark);
265   } else if (ui_direction_is_rtl) {
266     WrapStringWithLTRFormatting(text);
267     text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
268                  kRightToLeftMark);
269     text->push_back(kRightToLeftMark);
270   } else {
271     return false;
272   }
273 
274   return true;
275 }
276 
UnadjustStringForLocaleDirection(string16 * text)277 bool UnadjustStringForLocaleDirection(string16* text) {
278   if (text->empty())
279     return false;
280 
281   size_t begin_index = 0;
282   char16 begin = text->at(begin_index);
283   if (begin == kLeftToRightMark ||
284       begin == kRightToLeftMark) {
285     ++begin_index;
286   }
287 
288   size_t end_index = text->length() - 1;
289   char16 end = text->at(end_index);
290   if (end == kLeftToRightMark ||
291       end == kRightToLeftMark) {
292     --end_index;
293   }
294 
295   string16 unmarked_text =
296       text->substr(begin_index, end_index - begin_index + 1);
297   *text = StripWrappingBidiControlCharacters(unmarked_text);
298   return true;
299 }
300 
301 #endif  // !OS_WIN
302 
StringContainsStrongRTLChars(const string16 & text)303 bool StringContainsStrongRTLChars(const string16& text) {
304   const UChar* string = text.c_str();
305   size_t length = text.length();
306   size_t position = 0;
307   while (position < length) {
308     UChar32 character;
309     size_t next_position = position;
310     U16_NEXT(string, next_position, length, character);
311 
312     // Now that we have the character, we use ICU in order to query for the
313     // appropriate Unicode BiDi character type.
314     int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
315     if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
316       return true;
317 
318     position = next_position;
319   }
320 
321   return false;
322 }
323 
WrapStringWithLTRFormatting(string16 * text)324 void WrapStringWithLTRFormatting(string16* text) {
325   if (text->empty())
326     return;
327 
328   // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
329   text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
330                kLeftToRightEmbeddingMark);
331 
332   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
333   text->push_back(kPopDirectionalFormatting);
334 }
335 
WrapStringWithRTLFormatting(string16 * text)336 void WrapStringWithRTLFormatting(string16* text) {
337   if (text->empty())
338     return;
339 
340   // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
341   text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
342                kRightToLeftEmbeddingMark);
343 
344   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
345   text->push_back(kPopDirectionalFormatting);
346 }
347 
WrapPathWithLTRFormatting(const FilePath & path,string16 * rtl_safe_path)348 void WrapPathWithLTRFormatting(const FilePath& path,
349                                string16* rtl_safe_path) {
350   // Wrap the overall path with LRE-PDF pair which essentialy marks the
351   // string as a Left-To-Right string.
352   // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
353   rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
354 #if defined(OS_MACOSX)
355     rtl_safe_path->append(UTF8ToUTF16(path.value()));
356 #elif defined(OS_WIN)
357     rtl_safe_path->append(path.value());
358 #else  // defined(OS_POSIX) && !defined(OS_MACOSX)
359     std::wstring wide_path = base::SysNativeMBToWide(path.value());
360     rtl_safe_path->append(WideToUTF16(wide_path));
361 #endif
362   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
363   rtl_safe_path->push_back(kPopDirectionalFormatting);
364 }
365 
GetDisplayStringInLTRDirectionality(const string16 & text)366 string16 GetDisplayStringInLTRDirectionality(const string16& text) {
367   // Always wrap the string in RTL UI (it may be appended to RTL string).
368   // Also wrap strings with an RTL first strong character direction in LTR UI.
369   if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) {
370     string16 text_mutable(text);
371     WrapStringWithLTRFormatting(&text_mutable);
372     return text_mutable;
373   }
374   return text;
375 }
376 
StripWrappingBidiControlCharacters(const string16 & text)377 string16 StripWrappingBidiControlCharacters(const string16& text) {
378   if (text.empty())
379     return text;
380   size_t begin_index = 0;
381   char16 begin = text[begin_index];
382   if (begin == kLeftToRightEmbeddingMark ||
383       begin == kRightToLeftEmbeddingMark ||
384       begin == kLeftToRightOverride ||
385       begin == kRightToLeftOverride)
386     ++begin_index;
387   size_t end_index = text.length() - 1;
388   if (text[end_index] == kPopDirectionalFormatting)
389     --end_index;
390   return text.substr(begin_index, end_index - begin_index + 1);
391 }
392 
393 }  // namespace i18n
394 }  // namespace base
395