1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/i18n/icu_util.h"
6
7 #include "build/build_config.h"
8
9 #if BUILDFLAG(IS_WIN)
10 #include <windows.h>
11 #endif
12
13 #include <string.h>
14
15 #include <memory>
16 #include <string>
17
18 #include "base/debug/alias.h"
19 #include "base/environment.h"
20 #include "base/files/file_path.h"
21 #include "base/files/file_util.h"
22 #include "base/files/memory_mapped_file.h"
23 #include "base/logging.h"
24 #include "base/metrics/histogram_functions.h"
25 #include "base/metrics/metrics_hashes.h"
26 #include "base/path_service.h"
27 #include "base/strings/string_util.h"
28 #include "build/chromecast_buildflags.h"
29 #include "third_party/icu/source/common/unicode/putil.h"
30 #include "third_party/icu/source/common/unicode/udata.h"
31 #include "third_party/icu/source/common/unicode/utrace.h"
32
33 #if BUILDFLAG(IS_ANDROID)
34 #include "base/android/apk_assets.h"
35 #include "base/android/timezone_utils.h"
36 #endif
37
38 #if BUILDFLAG(IS_IOS)
39 #include "base/ios/ios_util.h"
40 #endif
41
42 #if BUILDFLAG(IS_APPLE)
43 #include "base/mac/foundation_util.h"
44 #endif
45
46 #if BUILDFLAG(IS_FUCHSIA)
47 #include "base/fuchsia/intl_profile_watcher.h"
48 #endif
49
50 #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_FUCHSIA)
51 #include "third_party/icu/source/common/unicode/unistr.h"
52 #endif
53
54 #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_FUCHSIA) || \
55 BUILDFLAG(IS_CHROMEOS) || (BUILDFLAG(IS_LINUX) && !BUILDFLAG(IS_CASTOS))
56 #include "third_party/icu/source/i18n/unicode/timezone.h"
57 #endif
58
59 namespace base::i18n {
60
61 #if !BUILDFLAG(IS_NACL)
62 namespace {
63
64 #if DCHECK_IS_ON()
65 // Assert that we are not called more than once. Even though calling this
66 // function isn't harmful (ICU can handle it), being called twice probably
67 // indicates a programming error.
68 bool g_check_called_once = true;
69 bool g_called_once = false;
70 #endif // DCHECK_IS_ON()
71
72 #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
73
74 // To debug http://crbug.com/445616.
75 int g_debug_icu_last_error;
76 int g_debug_icu_load;
77 int g_debug_icu_pf_error_details;
78 int g_debug_icu_pf_last_error;
79 #if BUILDFLAG(IS_WIN)
80 wchar_t g_debug_icu_pf_filename[_MAX_PATH];
81 #endif // BUILDFLAG(IS_WIN)
82 // Use an unversioned file name to simplify a icu version update down the road.
83 // No need to change the filename in multiple places (gyp files, windows
84 // build pkg configurations, etc). 'l' stands for Little Endian.
85 // This variable is exported through the header file.
86 const char kIcuDataFileName[] = "icudtl.dat";
87
88 // Time zone data loading.
89 // For now, only Fuchsia has a meaningful use case for this feature, so it is
90 // only implemented for OS_FUCHSIA.
91 #if BUILDFLAG(IS_FUCHSIA)
92 // The environment variable used to point the ICU data loader to the directory
93 // containing time zone data. This is available from ICU version 54. The env
94 // variable approach is antiquated by today's standards (2019), but is the
95 // recommended way to configure ICU.
96 //
97 // See for details: http://userguide.icu-project.org/datetime/timezone
98 const char kIcuTimeZoneEnvVariable[] = "ICU_TIMEZONE_FILES_DIR";
99
100 // Up-to-date time zone data is expected to be provided by the system as a
101 // directory offered to Chromium components at /config/tzdata. Chromium
102 // components should "use" the `tzdata` directory capability, specifying the
103 // "/config/tzdata" path. The capability's "availability" should be set to
104 // "required" or "optional" as appropriate - if no data is provided then ICU
105 // initialization will (in future silently) fall-back to the (potentially stale)
106 // timezone data included in the package.
107 //
108 // TimeZoneDataTest.* tests verify that external timezone data is correctly
109 // loaded from the system, to alert developers if the platform and Chromium
110 // versions are no longer compatible versions.
111 const char kIcuTimeZoneDataDir[] = "/config/tzdata/icu/44/le";
112
113 // Path used to receive tzdata via the legacy config-data mechanism.
114 const char kLegacyIcuTimeZoneDataDir[] = "/config/data/tzdata/icu/44/le";
115 #endif // BUILDFLAG(IS_FUCHSIA)
116
117 #if BUILDFLAG(IS_ANDROID)
118 const char kAndroidAssetsIcuDataFileName[] = "assets/icudtl.dat";
119 #endif // BUILDFLAG(IS_ANDROID)
120
121 // File handle intentionally never closed. Not using File here because its
122 // Windows implementation guards against two instances owning the same
123 // PlatformFile (which we allow since we know it is never freed).
124 PlatformFile g_icudtl_pf = kInvalidPlatformFile;
125 IcuDataFile* g_icudtl_mapped_file = nullptr;
126 MemoryMappedFile::Region g_icudtl_region;
127
128 #if BUILDFLAG(IS_FUCHSIA)
129 // The directory from which the ICU data loader will be configured to load time
130 // zone data. It is only changed by SetIcuTimeZoneDataDirForTesting().
131 const char* g_icu_time_zone_data_dir = kIcuTimeZoneDataDir;
132 #endif // BUILDFLAG(IS_FUCHSIA)
133
LazyInitIcuDataFile()134 void LazyInitIcuDataFile() {
135 if (g_icudtl_pf != kInvalidPlatformFile) {
136 return;
137 }
138 #if BUILDFLAG(IS_ANDROID)
139 int fd =
140 android::OpenApkAsset(kAndroidAssetsIcuDataFileName, &g_icudtl_region);
141 g_icudtl_pf = fd;
142 if (fd != -1) {
143 return;
144 }
145 #endif // BUILDFLAG(IS_ANDROID)
146 // For unit tests, data file is located on disk, so try there as a fallback.
147 #if !BUILDFLAG(IS_APPLE)
148 FilePath data_path;
149 if (!PathService::Get(DIR_ASSETS, &data_path)) {
150 LOG(ERROR) << "Can't find " << kIcuDataFileName;
151 return;
152 }
153 #if BUILDFLAG(IS_WIN)
154 // TODO(brucedawson): http://crbug.com/445616
155 wchar_t tmp_buffer[_MAX_PATH] = {0};
156 wcscpy_s(tmp_buffer, data_path.value().c_str());
157 debug::Alias(tmp_buffer);
158 #endif
159 data_path = data_path.AppendASCII(kIcuDataFileName);
160
161 #if BUILDFLAG(IS_WIN)
162 // TODO(brucedawson): http://crbug.com/445616
163 wchar_t tmp_buffer2[_MAX_PATH] = {0};
164 wcscpy_s(tmp_buffer2, data_path.value().c_str());
165 debug::Alias(tmp_buffer2);
166 #endif
167
168 #else // !BUILDFLAG(IS_APPLE)
169 // Assume it is in the framework bundle's Resources directory.
170 FilePath data_path = mac::PathForFrameworkBundleResource(kIcuDataFileName);
171 #if BUILDFLAG(IS_IOS)
172 FilePath override_data_path = ios::FilePathOfEmbeddedICU();
173 if (!override_data_path.empty()) {
174 data_path = override_data_path;
175 }
176 #endif // !BUILDFLAG(IS_IOS)
177 if (data_path.empty()) {
178 LOG(ERROR) << kIcuDataFileName << " not found in bundle";
179 return;
180 }
181 #endif // !BUILDFLAG(IS_APPLE)
182 File file(data_path, File::FLAG_OPEN | File::FLAG_READ);
183 if (file.IsValid()) {
184 // TODO(brucedawson): http://crbug.com/445616.
185 g_debug_icu_pf_last_error = 0;
186 g_debug_icu_pf_error_details = 0;
187 #if BUILDFLAG(IS_WIN)
188 g_debug_icu_pf_filename[0] = 0;
189 #endif // BUILDFLAG(IS_WIN)
190
191 g_icudtl_pf = file.TakePlatformFile();
192 g_icudtl_region = MemoryMappedFile::Region::kWholeFile;
193 }
194 #if BUILDFLAG(IS_WIN)
195 else {
196 // TODO(brucedawson): http://crbug.com/445616.
197 g_debug_icu_pf_last_error = ::GetLastError();
198 g_debug_icu_pf_error_details = file.error_details();
199 wcscpy_s(g_debug_icu_pf_filename, data_path.value().c_str());
200 }
201 #endif // BUILDFLAG(IS_WIN)
202 }
203
204 // Configures ICU to load external time zone data, if appropriate.
InitializeExternalTimeZoneData()205 void InitializeExternalTimeZoneData() {
206 #if BUILDFLAG(IS_FUCHSIA)
207 // Set the environment variable to override the location used by ICU.
208 // Loading can still fail if the directory is empty or its data is invalid.
209 std::unique_ptr<base::Environment> env = base::Environment::Create();
210
211 // If the ICU tzdata path exists then do not fall-back to config-data.
212 // TODO(crbug.com/1360077): Remove fall-back once all components are migrated.
213 if (base::PathExists(base::FilePath(g_icu_time_zone_data_dir))) {
214 // If the tzdata directory does not exist then silently fallback to
215 // using the inbuilt (possibly stale) timezone data.
216 if (base::DirectoryExists(base::FilePath(g_icu_time_zone_data_dir))) {
217 env->SetVar(kIcuTimeZoneEnvVariable, g_icu_time_zone_data_dir);
218 }
219
220 } else if (g_icu_time_zone_data_dir == kIcuTimeZoneDataDir &&
221 base::DirectoryExists(
222 base::FilePath((kLegacyIcuTimeZoneDataDir)))) {
223 // Only fall-back to attempting to load from the legacy config-data path
224 // if `g_icu_time_zone_data_dir` has not been changed by a test.
225 env->SetVar(kIcuTimeZoneEnvVariable, kLegacyIcuTimeZoneDataDir);
226 } else {
227 PLOG(WARNING) << "Could not locate tzdata in config-data. "
228 << "Using built-in timezone database";
229 }
230 #endif // BUILDFLAG(IS_FUCHSIA)
231 }
232
LoadIcuData(PlatformFile data_fd,const MemoryMappedFile::Region & data_region,std::unique_ptr<IcuDataFile> * out_mapped_data_file,UErrorCode * out_error_code)233 int LoadIcuData(PlatformFile data_fd,
234 const MemoryMappedFile::Region& data_region,
235 std::unique_ptr<IcuDataFile>* out_mapped_data_file,
236 UErrorCode* out_error_code) {
237 InitializeExternalTimeZoneData();
238
239 if (data_fd == kInvalidPlatformFile) {
240 LOG(ERROR) << "Invalid file descriptor to ICU data received.";
241 return 1; // To debug http://crbug.com/445616.
242 }
243
244 *out_mapped_data_file = std::make_unique<IcuDataFile>();
245 if (!(*out_mapped_data_file)->Initialize(File(data_fd), data_region)) {
246 LOG(ERROR) << "Couldn't mmap icu data file";
247 return 2; // To debug http://crbug.com/445616.
248 }
249
250 (*out_error_code) = U_ZERO_ERROR;
251 udata_setCommonData(const_cast<uint8_t*>((*out_mapped_data_file)->data()),
252 out_error_code);
253 if (U_FAILURE(*out_error_code)) {
254 LOG(ERROR) << "Failed to initialize ICU with data file: "
255 << u_errorName(*out_error_code);
256 return 3; // To debug http://crbug.com/445616.
257 }
258
259 return 0;
260 }
261
InitializeICUWithFileDescriptorInternal(PlatformFile data_fd,const MemoryMappedFile::Region & data_region)262 bool InitializeICUWithFileDescriptorInternal(
263 PlatformFile data_fd,
264 const MemoryMappedFile::Region& data_region) {
265 // This can be called multiple times in tests.
266 if (g_icudtl_mapped_file) {
267 g_debug_icu_load = 0; // To debug http://crbug.com/445616.
268 return true;
269 }
270
271 std::unique_ptr<IcuDataFile> mapped_file;
272 UErrorCode err;
273 g_debug_icu_load = LoadIcuData(data_fd, data_region, &mapped_file, &err);
274 if (g_debug_icu_load == 1 || g_debug_icu_load == 2) {
275 return false;
276 }
277 g_icudtl_mapped_file = mapped_file.release();
278
279 if (g_debug_icu_load == 3) {
280 g_debug_icu_last_error = err;
281 }
282
283 // Never try to load ICU data from files.
284 udata_setFileAccess(UDATA_ONLY_PACKAGES, &err);
285 return U_SUCCESS(err);
286 }
287
InitializeICUFromDataFile()288 bool InitializeICUFromDataFile() {
289 // If the ICU data directory is set, ICU won't actually load the data until
290 // it is needed. This can fail if the process is sandboxed at that time.
291 // Instead, we map the file in and hand off the data so the sandbox won't
292 // cause any problems.
293 LazyInitIcuDataFile();
294 bool result =
295 InitializeICUWithFileDescriptorInternal(g_icudtl_pf, g_icudtl_region);
296
297 #if BUILDFLAG(IS_WIN)
298 int debug_icu_load = g_debug_icu_load;
299 debug::Alias(&debug_icu_load);
300 int debug_icu_last_error = g_debug_icu_last_error;
301 debug::Alias(&debug_icu_last_error);
302 int debug_icu_pf_last_error = g_debug_icu_pf_last_error;
303 debug::Alias(&debug_icu_pf_last_error);
304 int debug_icu_pf_error_details = g_debug_icu_pf_error_details;
305 debug::Alias(&debug_icu_pf_error_details);
306 wchar_t debug_icu_pf_filename[_MAX_PATH] = {0};
307 wcscpy_s(debug_icu_pf_filename, g_debug_icu_pf_filename);
308 debug::Alias(&debug_icu_pf_filename);
309 CHECK(result); // TODO(brucedawson): http://crbug.com/445616
310 #endif // BUILDFLAG(IS_WIN)
311
312 return result;
313 }
314 #endif // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
315
316 // Explicitly initialize ICU's time zone if necessary.
317 // On some platforms, the time zone must be explicitly initialized zone rather
318 // than relying on ICU's internal initialization.
InitializeIcuTimeZone()319 void InitializeIcuTimeZone() {
320 #if BUILDFLAG(IS_ANDROID)
321 // On Android, we can't leave it up to ICU to set the default time zone
322 // because ICU's time zone detection does not work in many time zones (e.g.
323 // Australia/Sydney, Asia/Seoul, Europe/Paris ). Use JNI to detect the host
324 // time zone and set the ICU default time zone accordingly in advance of
325 // actual use. See crbug.com/722821 and
326 // https://ssl.icu-project.org/trac/ticket/13208 .
327 std::u16string zone_id = android::GetDefaultTimeZoneId();
328 icu::TimeZone::adoptDefault(icu::TimeZone::createTimeZone(
329 icu::UnicodeString(false, zone_id.data(), zone_id.length())));
330 #elif BUILDFLAG(IS_FUCHSIA)
331 // The platform-specific mechanisms used by ICU's detectHostTimeZone() to
332 // determine the default time zone will not work on Fuchsia. Therefore,
333 // proactively set the default system.
334 // This is also required by TimeZoneMonitorFuchsia::ProfileMayHaveChanged(),
335 // which uses the current default to detect whether the time zone changed in
336 // the new profile.
337 // If the system time zone cannot be obtained or is not understood by ICU,
338 // the "unknown" time zone will be returned by createTimeZone() and used.
339 std::string zone_id =
340 FuchsiaIntlProfileWatcher::GetPrimaryTimeZoneIdForIcuInitialization();
341 icu::TimeZone::adoptDefault(
342 icu::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(zone_id)));
343 #elif BUILDFLAG(IS_CHROMEOS) || (BUILDFLAG(IS_LINUX) && !BUILDFLAG(IS_CASTOS))
344 // To respond to the time zone change properly, the default time zone
345 // cache in ICU has to be populated on starting up.
346 // See TimeZoneMonitorLinux::NotifyClientsFromImpl().
347 std::unique_ptr<icu::TimeZone> zone(icu::TimeZone::createDefault());
348 #endif // BUILDFLAG(IS_ANDROID)
349 }
350
351 enum class ICUCreateInstance {
352 kCharacterBreakIterator = 0,
353 kWordBreakIterator = 1,
354 kLineBreakIterator = 2,
355 kLineBreakIteratorTypeLoose = 3,
356 kLineBreakIteratorTypeNormal = 4,
357 kLineBreakIteratorTypeStrict = 5,
358 kSentenceBreakIterator = 6,
359 kTitleBreakIterator = 7,
360 kThaiBreakEngine = 8,
361 kLaoBreakEngine = 9,
362 kBurmeseBreakEngine = 10,
363 kKhmerBreakEngine = 11,
364 kChineseJapaneseBreakEngine = 12,
365
366 kMaxValue = kChineseJapaneseBreakEngine
367 };
368
369 // Common initialization to run regardless of how ICU is initialized.
370 // There are multiple exposed InitializeIcu* functions. This should be called
371 // as at the end of (the last functions in the sequence of) these functions.
DoCommonInitialization()372 bool DoCommonInitialization() {
373 // TODO(jungshik): Some callers do not care about tz at all. If necessary,
374 // add a boolean argument to this function to init the default tz only
375 // when requested.
376 InitializeIcuTimeZone();
377
378 utrace_setLevel(UTRACE_VERBOSE);
379 return true;
380 }
381
382 } // namespace
383
384 #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
InitializeICUWithFileDescriptor(PlatformFile data_fd,const MemoryMappedFile::Region & data_region)385 bool InitializeICUWithFileDescriptor(
386 PlatformFile data_fd,
387 const MemoryMappedFile::Region& data_region) {
388 #if DCHECK_IS_ON()
389 DCHECK(!g_check_called_once || !g_called_once);
390 g_called_once = true;
391 #endif
392 if (!InitializeICUWithFileDescriptorInternal(data_fd, data_region))
393 return false;
394
395 return DoCommonInitialization();
396 }
397
GetIcuDataFileHandle(MemoryMappedFile::Region * out_region)398 PlatformFile GetIcuDataFileHandle(MemoryMappedFile::Region* out_region) {
399 CHECK_NE(g_icudtl_pf, kInvalidPlatformFile);
400 *out_region = g_icudtl_region;
401 return g_icudtl_pf;
402 }
403
ResetGlobalsForTesting()404 void ResetGlobalsForTesting() {
405 g_icudtl_pf = kInvalidPlatformFile;
406 g_icudtl_mapped_file = nullptr;
407 #if BUILDFLAG(IS_FUCHSIA)
408 g_icu_time_zone_data_dir = kIcuTimeZoneDataDir;
409 #endif // BUILDFLAG(IS_FUCHSIA)
410 }
411
412 #if BUILDFLAG(IS_FUCHSIA)
413 // |dir| must remain valid until ResetGlobalsForTesting() is called.
SetIcuTimeZoneDataDirForTesting(const char * dir)414 void SetIcuTimeZoneDataDirForTesting(const char* dir) {
415 g_icu_time_zone_data_dir = dir;
416 }
417 #endif // BUILDFLAG(IS_FUCHSIA)
418 #endif // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
419
InitializeICU()420 bool InitializeICU() {
421 #if DCHECK_IS_ON()
422 DCHECK(!g_check_called_once || !g_called_once);
423 g_called_once = true;
424 #endif
425
426 #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC)
427 // The ICU data is statically linked.
428 #elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
429 if (!InitializeICUFromDataFile())
430 return false;
431 #else
432 #error Unsupported ICU_UTIL_DATA_IMPL value
433 #endif // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC)
434
435 return DoCommonInitialization();
436 }
437
AllowMultipleInitializeCallsForTesting()438 void AllowMultipleInitializeCallsForTesting() {
439 #if DCHECK_IS_ON()
440 g_check_called_once = false;
441 #endif
442 }
443
444 #endif // !BUILDFLAG(IS_NACL)
445
446 } // namespace base::i18n
447