• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/icu_util.h"
6 
7 #include "build/build_config.h"
8 
9 #if BUILDFLAG(IS_WIN)
10 #include <windows.h>
11 #endif
12 
13 #include <string.h>
14 
15 #include <memory>
16 #include <string>
17 
18 #include "base/debug/alias.h"
19 #include "base/environment.h"
20 #include "base/files/file_path.h"
21 #include "base/files/file_util.h"
22 #include "base/files/memory_mapped_file.h"
23 #include "base/logging.h"
24 #include "base/metrics/histogram_functions.h"
25 #include "base/metrics/metrics_hashes.h"
26 #include "base/path_service.h"
27 #include "base/strings/string_util.h"
28 #include "build/chromecast_buildflags.h"
29 #include "third_party/icu/source/common/unicode/putil.h"
30 #include "third_party/icu/source/common/unicode/udata.h"
31 #include "third_party/icu/source/common/unicode/utrace.h"
32 
33 #if BUILDFLAG(IS_ANDROID)
34 #include "base/android/apk_assets.h"
35 #include "base/android/timezone_utils.h"
36 #endif
37 
38 #if BUILDFLAG(IS_IOS)
39 #include "base/ios/ios_util.h"
40 #endif
41 
42 #if BUILDFLAG(IS_APPLE)
43 #include "base/mac/foundation_util.h"
44 #endif
45 
46 #if BUILDFLAG(IS_FUCHSIA)
47 #include "base/fuchsia/intl_profile_watcher.h"
48 #endif
49 
50 #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_FUCHSIA)
51 #include "third_party/icu/source/common/unicode/unistr.h"
52 #endif
53 
54 #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_FUCHSIA) || \
55     BUILDFLAG(IS_CHROMEOS) || (BUILDFLAG(IS_LINUX) && !BUILDFLAG(IS_CASTOS))
56 #include "third_party/icu/source/i18n/unicode/timezone.h"
57 #endif
58 
59 namespace base::i18n {
60 
61 #if !BUILDFLAG(IS_NACL)
62 namespace {
63 
64 #if DCHECK_IS_ON()
65 // Assert that we are not called more than once.  Even though calling this
66 // function isn't harmful (ICU can handle it), being called twice probably
67 // indicates a programming error.
68 bool g_check_called_once = true;
69 bool g_called_once = false;
70 #endif  // DCHECK_IS_ON()
71 
72 #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
73 
74 // To debug http://crbug.com/445616.
75 int g_debug_icu_last_error;
76 int g_debug_icu_load;
77 int g_debug_icu_pf_error_details;
78 int g_debug_icu_pf_last_error;
79 #if BUILDFLAG(IS_WIN)
80 wchar_t g_debug_icu_pf_filename[_MAX_PATH];
81 #endif  // BUILDFLAG(IS_WIN)
82 // Use an unversioned file name to simplify a icu version update down the road.
83 // No need to change the filename in multiple places (gyp files, windows
84 // build pkg configurations, etc). 'l' stands for Little Endian.
85 // This variable is exported through the header file.
86 const char kIcuDataFileName[] = "icudtl.dat";
87 
88 // Time zone data loading.
89 // For now, only Fuchsia has a meaningful use case for this feature, so it is
90 // only implemented for OS_FUCHSIA.
91 #if BUILDFLAG(IS_FUCHSIA)
92 // The environment variable used to point the ICU data loader to the directory
93 // containing time zone data. This is available from ICU version 54. The env
94 // variable approach is antiquated by today's standards (2019), but is the
95 // recommended way to configure ICU.
96 //
97 // See for details: http://userguide.icu-project.org/datetime/timezone
98 const char kIcuTimeZoneEnvVariable[] = "ICU_TIMEZONE_FILES_DIR";
99 
100 // Up-to-date time zone data is expected to be provided by the system as a
101 // directory offered to Chromium components at /config/tzdata.  Chromium
102 // components should "use" the `tzdata` directory capability, specifying the
103 // "/config/tzdata" path.  The capability's "availability" should be set to
104 // "required" or "optional" as appropriate - if no data is provided then ICU
105 // initialization will (in future silently) fall-back to the (potentially stale)
106 // timezone data included in the package.
107 //
108 // TimeZoneDataTest.* tests verify that external timezone data is correctly
109 // loaded from the system, to alert developers if the platform and Chromium
110 // versions are no longer compatible versions.
111 const char kIcuTimeZoneDataDir[] = "/config/tzdata/icu/44/le";
112 
113 // Path used to receive tzdata via the legacy config-data mechanism.
114 const char kLegacyIcuTimeZoneDataDir[] = "/config/data/tzdata/icu/44/le";
115 #endif  // BUILDFLAG(IS_FUCHSIA)
116 
117 #if BUILDFLAG(IS_ANDROID)
118 const char kAndroidAssetsIcuDataFileName[] = "assets/icudtl.dat";
119 #endif  // BUILDFLAG(IS_ANDROID)
120 
121 // File handle intentionally never closed. Not using File here because its
122 // Windows implementation guards against two instances owning the same
123 // PlatformFile (which we allow since we know it is never freed).
124 PlatformFile g_icudtl_pf = kInvalidPlatformFile;
125 IcuDataFile* g_icudtl_mapped_file = nullptr;
126 MemoryMappedFile::Region g_icudtl_region;
127 
128 #if BUILDFLAG(IS_FUCHSIA)
129 // The directory from which the ICU data loader will be configured to load time
130 // zone data. It is only changed by SetIcuTimeZoneDataDirForTesting().
131 const char* g_icu_time_zone_data_dir = kIcuTimeZoneDataDir;
132 #endif  // BUILDFLAG(IS_FUCHSIA)
133 
LazyInitIcuDataFile()134 void LazyInitIcuDataFile() {
135   if (g_icudtl_pf != kInvalidPlatformFile) {
136     return;
137   }
138 #if BUILDFLAG(IS_ANDROID)
139   int fd =
140       android::OpenApkAsset(kAndroidAssetsIcuDataFileName, &g_icudtl_region);
141   g_icudtl_pf = fd;
142   if (fd != -1) {
143     return;
144   }
145 #endif  // BUILDFLAG(IS_ANDROID)
146   // For unit tests, data file is located on disk, so try there as a fallback.
147 #if !BUILDFLAG(IS_APPLE)
148   FilePath data_path;
149   if (!PathService::Get(DIR_ASSETS, &data_path)) {
150     LOG(ERROR) << "Can't find " << kIcuDataFileName;
151     return;
152   }
153 #if BUILDFLAG(IS_WIN)
154   // TODO(brucedawson): http://crbug.com/445616
155   wchar_t tmp_buffer[_MAX_PATH] = {0};
156   wcscpy_s(tmp_buffer, data_path.value().c_str());
157   debug::Alias(tmp_buffer);
158 #endif
159   data_path = data_path.AppendASCII(kIcuDataFileName);
160 
161 #if BUILDFLAG(IS_WIN)
162   // TODO(brucedawson): http://crbug.com/445616
163   wchar_t tmp_buffer2[_MAX_PATH] = {0};
164   wcscpy_s(tmp_buffer2, data_path.value().c_str());
165   debug::Alias(tmp_buffer2);
166 #endif
167 
168 #else  // !BUILDFLAG(IS_APPLE)
169   // Assume it is in the framework bundle's Resources directory.
170   FilePath data_path = mac::PathForFrameworkBundleResource(kIcuDataFileName);
171 #if BUILDFLAG(IS_IOS)
172   FilePath override_data_path = ios::FilePathOfEmbeddedICU();
173   if (!override_data_path.empty()) {
174     data_path = override_data_path;
175   }
176 #endif  // !BUILDFLAG(IS_IOS)
177   if (data_path.empty()) {
178     LOG(ERROR) << kIcuDataFileName << " not found in bundle";
179     return;
180   }
181 #endif  // !BUILDFLAG(IS_APPLE)
182   File file(data_path, File::FLAG_OPEN | File::FLAG_READ);
183   if (file.IsValid()) {
184     // TODO(brucedawson): http://crbug.com/445616.
185     g_debug_icu_pf_last_error = 0;
186     g_debug_icu_pf_error_details = 0;
187 #if BUILDFLAG(IS_WIN)
188     g_debug_icu_pf_filename[0] = 0;
189 #endif  // BUILDFLAG(IS_WIN)
190 
191     g_icudtl_pf = file.TakePlatformFile();
192     g_icudtl_region = MemoryMappedFile::Region::kWholeFile;
193   }
194 #if BUILDFLAG(IS_WIN)
195   else {
196     // TODO(brucedawson): http://crbug.com/445616.
197     g_debug_icu_pf_last_error = ::GetLastError();
198     g_debug_icu_pf_error_details = file.error_details();
199     wcscpy_s(g_debug_icu_pf_filename, data_path.value().c_str());
200   }
201 #endif  // BUILDFLAG(IS_WIN)
202 }
203 
204 // Configures ICU to load external time zone data, if appropriate.
InitializeExternalTimeZoneData()205 void InitializeExternalTimeZoneData() {
206 #if BUILDFLAG(IS_FUCHSIA)
207   // Set the environment variable to override the location used by ICU.
208   // Loading can still fail if the directory is empty or its data is invalid.
209   std::unique_ptr<base::Environment> env = base::Environment::Create();
210 
211   // If the ICU tzdata path exists then do not fall-back to config-data.
212   // TODO(crbug.com/1360077): Remove fall-back once all components are migrated.
213   if (base::PathExists(base::FilePath(g_icu_time_zone_data_dir))) {
214     // If the tzdata directory does not exist then silently fallback to
215     // using the inbuilt (possibly stale) timezone data.
216     if (base::DirectoryExists(base::FilePath(g_icu_time_zone_data_dir))) {
217       env->SetVar(kIcuTimeZoneEnvVariable, g_icu_time_zone_data_dir);
218     }
219 
220   } else if (g_icu_time_zone_data_dir == kIcuTimeZoneDataDir &&
221              base::DirectoryExists(
222                  base::FilePath((kLegacyIcuTimeZoneDataDir)))) {
223     // Only fall-back to attempting to load from the legacy config-data path
224     // if `g_icu_time_zone_data_dir` has not been changed by a test.
225     env->SetVar(kIcuTimeZoneEnvVariable, kLegacyIcuTimeZoneDataDir);
226   } else {
227     PLOG(WARNING) << "Could not locate tzdata in config-data. "
228                   << "Using built-in timezone database";
229   }
230 #endif  // BUILDFLAG(IS_FUCHSIA)
231 }
232 
LoadIcuData(PlatformFile data_fd,const MemoryMappedFile::Region & data_region,std::unique_ptr<IcuDataFile> * out_mapped_data_file,UErrorCode * out_error_code)233 int LoadIcuData(PlatformFile data_fd,
234                 const MemoryMappedFile::Region& data_region,
235                 std::unique_ptr<IcuDataFile>* out_mapped_data_file,
236                 UErrorCode* out_error_code) {
237   InitializeExternalTimeZoneData();
238 
239   if (data_fd == kInvalidPlatformFile) {
240     LOG(ERROR) << "Invalid file descriptor to ICU data received.";
241     return 1;  // To debug http://crbug.com/445616.
242   }
243 
244   *out_mapped_data_file = std::make_unique<IcuDataFile>();
245   if (!(*out_mapped_data_file)->Initialize(File(data_fd), data_region)) {
246     LOG(ERROR) << "Couldn't mmap icu data file";
247     return 2;  // To debug http://crbug.com/445616.
248   }
249 
250   (*out_error_code) = U_ZERO_ERROR;
251   udata_setCommonData(const_cast<uint8_t*>((*out_mapped_data_file)->data()),
252                       out_error_code);
253   if (U_FAILURE(*out_error_code)) {
254     LOG(ERROR) << "Failed to initialize ICU with data file: "
255                << u_errorName(*out_error_code);
256     return 3;  // To debug http://crbug.com/445616.
257   }
258 
259   return 0;
260 }
261 
InitializeICUWithFileDescriptorInternal(PlatformFile data_fd,const MemoryMappedFile::Region & data_region)262 bool InitializeICUWithFileDescriptorInternal(
263     PlatformFile data_fd,
264     const MemoryMappedFile::Region& data_region) {
265   // This can be called multiple times in tests.
266   if (g_icudtl_mapped_file) {
267     g_debug_icu_load = 0;  // To debug http://crbug.com/445616.
268     return true;
269   }
270 
271   std::unique_ptr<IcuDataFile> mapped_file;
272   UErrorCode err;
273   g_debug_icu_load = LoadIcuData(data_fd, data_region, &mapped_file, &err);
274   if (g_debug_icu_load == 1 || g_debug_icu_load == 2) {
275     return false;
276   }
277   g_icudtl_mapped_file = mapped_file.release();
278 
279   if (g_debug_icu_load == 3) {
280     g_debug_icu_last_error = err;
281   }
282 
283   // Never try to load ICU data from files.
284   udata_setFileAccess(UDATA_ONLY_PACKAGES, &err);
285   return U_SUCCESS(err);
286 }
287 
InitializeICUFromDataFile()288 bool InitializeICUFromDataFile() {
289   // If the ICU data directory is set, ICU won't actually load the data until
290   // it is needed.  This can fail if the process is sandboxed at that time.
291   // Instead, we map the file in and hand off the data so the sandbox won't
292   // cause any problems.
293   LazyInitIcuDataFile();
294   bool result =
295       InitializeICUWithFileDescriptorInternal(g_icudtl_pf, g_icudtl_region);
296 
297 #if BUILDFLAG(IS_WIN)
298   int debug_icu_load = g_debug_icu_load;
299   debug::Alias(&debug_icu_load);
300   int debug_icu_last_error = g_debug_icu_last_error;
301   debug::Alias(&debug_icu_last_error);
302   int debug_icu_pf_last_error = g_debug_icu_pf_last_error;
303   debug::Alias(&debug_icu_pf_last_error);
304   int debug_icu_pf_error_details = g_debug_icu_pf_error_details;
305   debug::Alias(&debug_icu_pf_error_details);
306   wchar_t debug_icu_pf_filename[_MAX_PATH] = {0};
307   wcscpy_s(debug_icu_pf_filename, g_debug_icu_pf_filename);
308   debug::Alias(&debug_icu_pf_filename);
309   CHECK(result);  // TODO(brucedawson): http://crbug.com/445616
310 #endif            // BUILDFLAG(IS_WIN)
311 
312   return result;
313 }
314 #endif  // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
315 
316 // Explicitly initialize ICU's time zone if necessary.
317 // On some platforms, the time zone must be explicitly initialized zone rather
318 // than relying on ICU's internal initialization.
InitializeIcuTimeZone()319 void InitializeIcuTimeZone() {
320 #if BUILDFLAG(IS_ANDROID)
321   // On Android, we can't leave it up to ICU to set the default time zone
322   // because ICU's time zone detection does not work in many time zones (e.g.
323   // Australia/Sydney, Asia/Seoul, Europe/Paris ). Use JNI to detect the host
324   // time zone and set the ICU default time zone accordingly in advance of
325   // actual use. See crbug.com/722821 and
326   // https://ssl.icu-project.org/trac/ticket/13208 .
327   std::u16string zone_id = android::GetDefaultTimeZoneId();
328   icu::TimeZone::adoptDefault(icu::TimeZone::createTimeZone(
329       icu::UnicodeString(false, zone_id.data(), zone_id.length())));
330 #elif BUILDFLAG(IS_FUCHSIA)
331   // The platform-specific mechanisms used by ICU's detectHostTimeZone() to
332   // determine the default time zone will not work on Fuchsia. Therefore,
333   // proactively set the default system.
334   // This is also required by TimeZoneMonitorFuchsia::ProfileMayHaveChanged(),
335   // which uses the current default to detect whether the time zone changed in
336   // the new profile.
337   // If the system time zone cannot be obtained or is not understood by ICU,
338   // the "unknown" time zone will be returned by createTimeZone() and used.
339   std::string zone_id =
340       FuchsiaIntlProfileWatcher::GetPrimaryTimeZoneIdForIcuInitialization();
341   icu::TimeZone::adoptDefault(
342       icu::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(zone_id)));
343 #elif BUILDFLAG(IS_CHROMEOS) || (BUILDFLAG(IS_LINUX) && !BUILDFLAG(IS_CASTOS))
344   // To respond to the time zone change properly, the default time zone
345   // cache in ICU has to be populated on starting up.
346   // See TimeZoneMonitorLinux::NotifyClientsFromImpl().
347   std::unique_ptr<icu::TimeZone> zone(icu::TimeZone::createDefault());
348 #endif  // BUILDFLAG(IS_ANDROID)
349 }
350 
351 enum class ICUCreateInstance {
352   kCharacterBreakIterator = 0,
353   kWordBreakIterator = 1,
354   kLineBreakIterator = 2,
355   kLineBreakIteratorTypeLoose = 3,
356   kLineBreakIteratorTypeNormal = 4,
357   kLineBreakIteratorTypeStrict = 5,
358   kSentenceBreakIterator = 6,
359   kTitleBreakIterator = 7,
360   kThaiBreakEngine = 8,
361   kLaoBreakEngine = 9,
362   kBurmeseBreakEngine = 10,
363   kKhmerBreakEngine = 11,
364   kChineseJapaneseBreakEngine = 12,
365 
366   kMaxValue = kChineseJapaneseBreakEngine
367 };
368 
369 // Common initialization to run regardless of how ICU is initialized.
370 // There are multiple exposed InitializeIcu* functions. This should be called
371 // as at the end of (the last functions in the sequence of) these functions.
DoCommonInitialization()372 bool DoCommonInitialization() {
373   // TODO(jungshik): Some callers do not care about tz at all. If necessary,
374   // add a boolean argument to this function to init the default tz only
375   // when requested.
376   InitializeIcuTimeZone();
377 
378   utrace_setLevel(UTRACE_VERBOSE);
379   return true;
380 }
381 
382 }  // namespace
383 
384 #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
InitializeICUWithFileDescriptor(PlatformFile data_fd,const MemoryMappedFile::Region & data_region)385 bool InitializeICUWithFileDescriptor(
386     PlatformFile data_fd,
387     const MemoryMappedFile::Region& data_region) {
388 #if DCHECK_IS_ON()
389   DCHECK(!g_check_called_once || !g_called_once);
390   g_called_once = true;
391 #endif
392   if (!InitializeICUWithFileDescriptorInternal(data_fd, data_region))
393     return false;
394 
395   return DoCommonInitialization();
396 }
397 
GetIcuDataFileHandle(MemoryMappedFile::Region * out_region)398 PlatformFile GetIcuDataFileHandle(MemoryMappedFile::Region* out_region) {
399   CHECK_NE(g_icudtl_pf, kInvalidPlatformFile);
400   *out_region = g_icudtl_region;
401   return g_icudtl_pf;
402 }
403 
ResetGlobalsForTesting()404 void ResetGlobalsForTesting() {
405   g_icudtl_pf = kInvalidPlatformFile;
406   g_icudtl_mapped_file = nullptr;
407 #if BUILDFLAG(IS_FUCHSIA)
408   g_icu_time_zone_data_dir = kIcuTimeZoneDataDir;
409 #endif  // BUILDFLAG(IS_FUCHSIA)
410 }
411 
412 #if BUILDFLAG(IS_FUCHSIA)
413 // |dir| must remain valid until ResetGlobalsForTesting() is called.
SetIcuTimeZoneDataDirForTesting(const char * dir)414 void SetIcuTimeZoneDataDirForTesting(const char* dir) {
415   g_icu_time_zone_data_dir = dir;
416 }
417 #endif  // BUILDFLAG(IS_FUCHSIA)
418 #endif  // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
419 
InitializeICU()420 bool InitializeICU() {
421 #if DCHECK_IS_ON()
422   DCHECK(!g_check_called_once || !g_called_once);
423   g_called_once = true;
424 #endif
425 
426 #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC)
427   // The ICU data is statically linked.
428 #elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
429   if (!InitializeICUFromDataFile())
430     return false;
431 #else
432 #error Unsupported ICU_UTIL_DATA_IMPL value
433 #endif  // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC)
434 
435   return DoCommonInitialization();
436 }
437 
AllowMultipleInitializeCallsForTesting()438 void AllowMultipleInitializeCallsForTesting() {
439 #if DCHECK_IS_ON()
440   g_check_called_once = false;
441 #endif
442 }
443 
444 #endif  // !BUILDFLAG(IS_NACL)
445 
446 }  // namespace base::i18n
447