// Copyright 2012 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "base/i18n/icu_util.h" #include "build/build_config.h" #if BUILDFLAG(IS_WIN) #include #endif #include #include #include #include "base/debug/alias.h" #include "base/environment.h" #include "base/files/file_path.h" #include "base/files/file_util.h" #include "base/files/memory_mapped_file.h" #include "base/logging.h" #include "base/metrics/histogram_functions.h" #include "base/metrics/metrics_hashes.h" #include "base/path_service.h" #include "base/strings/string_util.h" #include "build/chromecast_buildflags.h" #include "third_party/icu/source/common/unicode/putil.h" #include "third_party/icu/source/common/unicode/udata.h" #include "third_party/icu/source/common/unicode/utrace.h" #if BUILDFLAG(IS_ANDROID) #include "base/android/apk_assets.h" #include "base/android/timezone_utils.h" #endif #if BUILDFLAG(IS_IOS) #include "base/ios/ios_util.h" #endif #if BUILDFLAG(IS_APPLE) #include "base/mac/foundation_util.h" #endif #if BUILDFLAG(IS_FUCHSIA) #include "base/fuchsia/intl_profile_watcher.h" #endif #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_FUCHSIA) #include "third_party/icu/source/common/unicode/unistr.h" #endif #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_FUCHSIA) || \ BUILDFLAG(IS_CHROMEOS) || (BUILDFLAG(IS_LINUX) && !BUILDFLAG(IS_CASTOS)) #include "third_party/icu/source/i18n/unicode/timezone.h" #endif namespace base::i18n { #if !BUILDFLAG(IS_NACL) namespace { #if DCHECK_IS_ON() // Assert that we are not called more than once. Even though calling this // function isn't harmful (ICU can handle it), being called twice probably // indicates a programming error. bool g_check_called_once = true; bool g_called_once = false; #endif // DCHECK_IS_ON() #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE) // To debug http://crbug.com/445616. int g_debug_icu_last_error; int g_debug_icu_load; int g_debug_icu_pf_error_details; int g_debug_icu_pf_last_error; #if BUILDFLAG(IS_WIN) wchar_t g_debug_icu_pf_filename[_MAX_PATH]; #endif // BUILDFLAG(IS_WIN) // Use an unversioned file name to simplify a icu version update down the road. // No need to change the filename in multiple places (gyp files, windows // build pkg configurations, etc). 'l' stands for Little Endian. // This variable is exported through the header file. const char kIcuDataFileName[] = "icudtl.dat"; // Time zone data loading. // For now, only Fuchsia has a meaningful use case for this feature, so it is // only implemented for OS_FUCHSIA. #if BUILDFLAG(IS_FUCHSIA) // The environment variable used to point the ICU data loader to the directory // containing time zone data. This is available from ICU version 54. The env // variable approach is antiquated by today's standards (2019), but is the // recommended way to configure ICU. // // See for details: http://userguide.icu-project.org/datetime/timezone const char kIcuTimeZoneEnvVariable[] = "ICU_TIMEZONE_FILES_DIR"; // Up-to-date time zone data is expected to be provided by the system as a // directory offered to Chromium components at /config/tzdata. Chromium // components should "use" the `tzdata` directory capability, specifying the // "/config/tzdata" path. The capability's "availability" should be set to // "required" or "optional" as appropriate - if no data is provided then ICU // initialization will (in future silently) fall-back to the (potentially stale) // timezone data included in the package. // // TimeZoneDataTest.* tests verify that external timezone data is correctly // loaded from the system, to alert developers if the platform and Chromium // versions are no longer compatible versions. const char kIcuTimeZoneDataDir[] = "/config/tzdata/icu/44/le"; // Path used to receive tzdata via the legacy config-data mechanism. const char kLegacyIcuTimeZoneDataDir[] = "/config/data/tzdata/icu/44/le"; #endif // BUILDFLAG(IS_FUCHSIA) #if BUILDFLAG(IS_ANDROID) const char kAndroidAssetsIcuDataFileName[] = "assets/icudtl.dat"; #endif // BUILDFLAG(IS_ANDROID) // File handle intentionally never closed. Not using File here because its // Windows implementation guards against two instances owning the same // PlatformFile (which we allow since we know it is never freed). PlatformFile g_icudtl_pf = kInvalidPlatformFile; IcuDataFile* g_icudtl_mapped_file = nullptr; MemoryMappedFile::Region g_icudtl_region; #if BUILDFLAG(IS_FUCHSIA) // The directory from which the ICU data loader will be configured to load time // zone data. It is only changed by SetIcuTimeZoneDataDirForTesting(). const char* g_icu_time_zone_data_dir = kIcuTimeZoneDataDir; #endif // BUILDFLAG(IS_FUCHSIA) void LazyInitIcuDataFile() { if (g_icudtl_pf != kInvalidPlatformFile) { return; } #if BUILDFLAG(IS_ANDROID) int fd = android::OpenApkAsset(kAndroidAssetsIcuDataFileName, &g_icudtl_region); g_icudtl_pf = fd; if (fd != -1) { return; } #endif // BUILDFLAG(IS_ANDROID) // For unit tests, data file is located on disk, so try there as a fallback. #if !BUILDFLAG(IS_APPLE) FilePath data_path; if (!PathService::Get(DIR_ASSETS, &data_path)) { LOG(ERROR) << "Can't find " << kIcuDataFileName; return; } #if BUILDFLAG(IS_WIN) // TODO(brucedawson): http://crbug.com/445616 wchar_t tmp_buffer[_MAX_PATH] = {0}; wcscpy_s(tmp_buffer, data_path.value().c_str()); debug::Alias(tmp_buffer); #endif data_path = data_path.AppendASCII(kIcuDataFileName); #if BUILDFLAG(IS_WIN) // TODO(brucedawson): http://crbug.com/445616 wchar_t tmp_buffer2[_MAX_PATH] = {0}; wcscpy_s(tmp_buffer2, data_path.value().c_str()); debug::Alias(tmp_buffer2); #endif #else // !BUILDFLAG(IS_APPLE) // Assume it is in the framework bundle's Resources directory. FilePath data_path = mac::PathForFrameworkBundleResource(kIcuDataFileName); #if BUILDFLAG(IS_IOS) FilePath override_data_path = ios::FilePathOfEmbeddedICU(); if (!override_data_path.empty()) { data_path = override_data_path; } #endif // !BUILDFLAG(IS_IOS) if (data_path.empty()) { LOG(ERROR) << kIcuDataFileName << " not found in bundle"; return; } #endif // !BUILDFLAG(IS_APPLE) File file(data_path, File::FLAG_OPEN | File::FLAG_READ); if (file.IsValid()) { // TODO(brucedawson): http://crbug.com/445616. g_debug_icu_pf_last_error = 0; g_debug_icu_pf_error_details = 0; #if BUILDFLAG(IS_WIN) g_debug_icu_pf_filename[0] = 0; #endif // BUILDFLAG(IS_WIN) g_icudtl_pf = file.TakePlatformFile(); g_icudtl_region = MemoryMappedFile::Region::kWholeFile; } #if BUILDFLAG(IS_WIN) else { // TODO(brucedawson): http://crbug.com/445616. g_debug_icu_pf_last_error = ::GetLastError(); g_debug_icu_pf_error_details = file.error_details(); wcscpy_s(g_debug_icu_pf_filename, data_path.value().c_str()); } #endif // BUILDFLAG(IS_WIN) } // Configures ICU to load external time zone data, if appropriate. void InitializeExternalTimeZoneData() { #if BUILDFLAG(IS_FUCHSIA) // Set the environment variable to override the location used by ICU. // Loading can still fail if the directory is empty or its data is invalid. std::unique_ptr env = base::Environment::Create(); // If the ICU tzdata path exists then do not fall-back to config-data. // TODO(crbug.com/1360077): Remove fall-back once all components are migrated. if (base::PathExists(base::FilePath(g_icu_time_zone_data_dir))) { // If the tzdata directory does not exist then silently fallback to // using the inbuilt (possibly stale) timezone data. if (base::DirectoryExists(base::FilePath(g_icu_time_zone_data_dir))) { env->SetVar(kIcuTimeZoneEnvVariable, g_icu_time_zone_data_dir); } } else if (g_icu_time_zone_data_dir == kIcuTimeZoneDataDir && base::DirectoryExists( base::FilePath((kLegacyIcuTimeZoneDataDir)))) { // Only fall-back to attempting to load from the legacy config-data path // if `g_icu_time_zone_data_dir` has not been changed by a test. env->SetVar(kIcuTimeZoneEnvVariable, kLegacyIcuTimeZoneDataDir); } else { PLOG(WARNING) << "Could not locate tzdata in config-data. " << "Using built-in timezone database"; } #endif // BUILDFLAG(IS_FUCHSIA) } int LoadIcuData(PlatformFile data_fd, const MemoryMappedFile::Region& data_region, std::unique_ptr* out_mapped_data_file, UErrorCode* out_error_code) { InitializeExternalTimeZoneData(); if (data_fd == kInvalidPlatformFile) { LOG(ERROR) << "Invalid file descriptor to ICU data received."; return 1; // To debug http://crbug.com/445616. } *out_mapped_data_file = std::make_unique(); if (!(*out_mapped_data_file)->Initialize(File(data_fd), data_region)) { LOG(ERROR) << "Couldn't mmap icu data file"; return 2; // To debug http://crbug.com/445616. } (*out_error_code) = U_ZERO_ERROR; udata_setCommonData(const_cast((*out_mapped_data_file)->data()), out_error_code); if (U_FAILURE(*out_error_code)) { LOG(ERROR) << "Failed to initialize ICU with data file: " << u_errorName(*out_error_code); return 3; // To debug http://crbug.com/445616. } return 0; } bool InitializeICUWithFileDescriptorInternal( PlatformFile data_fd, const MemoryMappedFile::Region& data_region) { // This can be called multiple times in tests. if (g_icudtl_mapped_file) { g_debug_icu_load = 0; // To debug http://crbug.com/445616. return true; } std::unique_ptr mapped_file; UErrorCode err; g_debug_icu_load = LoadIcuData(data_fd, data_region, &mapped_file, &err); if (g_debug_icu_load == 1 || g_debug_icu_load == 2) { return false; } g_icudtl_mapped_file = mapped_file.release(); if (g_debug_icu_load == 3) { g_debug_icu_last_error = err; } // Never try to load ICU data from files. udata_setFileAccess(UDATA_ONLY_PACKAGES, &err); return U_SUCCESS(err); } bool InitializeICUFromDataFile() { // If the ICU data directory is set, ICU won't actually load the data until // it is needed. This can fail if the process is sandboxed at that time. // Instead, we map the file in and hand off the data so the sandbox won't // cause any problems. LazyInitIcuDataFile(); bool result = InitializeICUWithFileDescriptorInternal(g_icudtl_pf, g_icudtl_region); #if BUILDFLAG(IS_WIN) int debug_icu_load = g_debug_icu_load; debug::Alias(&debug_icu_load); int debug_icu_last_error = g_debug_icu_last_error; debug::Alias(&debug_icu_last_error); int debug_icu_pf_last_error = g_debug_icu_pf_last_error; debug::Alias(&debug_icu_pf_last_error); int debug_icu_pf_error_details = g_debug_icu_pf_error_details; debug::Alias(&debug_icu_pf_error_details); wchar_t debug_icu_pf_filename[_MAX_PATH] = {0}; wcscpy_s(debug_icu_pf_filename, g_debug_icu_pf_filename); debug::Alias(&debug_icu_pf_filename); CHECK(result); // TODO(brucedawson): http://crbug.com/445616 #endif // BUILDFLAG(IS_WIN) return result; } #endif // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE) // Explicitly initialize ICU's time zone if necessary. // On some platforms, the time zone must be explicitly initialized zone rather // than relying on ICU's internal initialization. void InitializeIcuTimeZone() { #if BUILDFLAG(IS_ANDROID) // On Android, we can't leave it up to ICU to set the default time zone // because ICU's time zone detection does not work in many time zones (e.g. // Australia/Sydney, Asia/Seoul, Europe/Paris ). Use JNI to detect the host // time zone and set the ICU default time zone accordingly in advance of // actual use. See crbug.com/722821 and // https://ssl.icu-project.org/trac/ticket/13208 . std::u16string zone_id = android::GetDefaultTimeZoneId(); icu::TimeZone::adoptDefault(icu::TimeZone::createTimeZone( icu::UnicodeString(false, zone_id.data(), zone_id.length()))); #elif BUILDFLAG(IS_FUCHSIA) // The platform-specific mechanisms used by ICU's detectHostTimeZone() to // determine the default time zone will not work on Fuchsia. Therefore, // proactively set the default system. // This is also required by TimeZoneMonitorFuchsia::ProfileMayHaveChanged(), // which uses the current default to detect whether the time zone changed in // the new profile. // If the system time zone cannot be obtained or is not understood by ICU, // the "unknown" time zone will be returned by createTimeZone() and used. std::string zone_id = FuchsiaIntlProfileWatcher::GetPrimaryTimeZoneIdForIcuInitialization(); icu::TimeZone::adoptDefault( icu::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(zone_id))); #elif BUILDFLAG(IS_CHROMEOS) || (BUILDFLAG(IS_LINUX) && !BUILDFLAG(IS_CASTOS)) // To respond to the time zone change properly, the default time zone // cache in ICU has to be populated on starting up. // See TimeZoneMonitorLinux::NotifyClientsFromImpl(). std::unique_ptr zone(icu::TimeZone::createDefault()); #endif // BUILDFLAG(IS_ANDROID) } enum class ICUCreateInstance { kCharacterBreakIterator = 0, kWordBreakIterator = 1, kLineBreakIterator = 2, kLineBreakIteratorTypeLoose = 3, kLineBreakIteratorTypeNormal = 4, kLineBreakIteratorTypeStrict = 5, kSentenceBreakIterator = 6, kTitleBreakIterator = 7, kThaiBreakEngine = 8, kLaoBreakEngine = 9, kBurmeseBreakEngine = 10, kKhmerBreakEngine = 11, kChineseJapaneseBreakEngine = 12, kMaxValue = kChineseJapaneseBreakEngine }; // Common initialization to run regardless of how ICU is initialized. // There are multiple exposed InitializeIcu* functions. This should be called // as at the end of (the last functions in the sequence of) these functions. bool DoCommonInitialization() { // TODO(jungshik): Some callers do not care about tz at all. If necessary, // add a boolean argument to this function to init the default tz only // when requested. InitializeIcuTimeZone(); utrace_setLevel(UTRACE_VERBOSE); return true; } } // namespace #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE) bool InitializeICUWithFileDescriptor( PlatformFile data_fd, const MemoryMappedFile::Region& data_region) { #if DCHECK_IS_ON() DCHECK(!g_check_called_once || !g_called_once); g_called_once = true; #endif if (!InitializeICUWithFileDescriptorInternal(data_fd, data_region)) return false; return DoCommonInitialization(); } PlatformFile GetIcuDataFileHandle(MemoryMappedFile::Region* out_region) { CHECK_NE(g_icudtl_pf, kInvalidPlatformFile); *out_region = g_icudtl_region; return g_icudtl_pf; } void ResetGlobalsForTesting() { g_icudtl_pf = kInvalidPlatformFile; g_icudtl_mapped_file = nullptr; #if BUILDFLAG(IS_FUCHSIA) g_icu_time_zone_data_dir = kIcuTimeZoneDataDir; #endif // BUILDFLAG(IS_FUCHSIA) } #if BUILDFLAG(IS_FUCHSIA) // |dir| must remain valid until ResetGlobalsForTesting() is called. void SetIcuTimeZoneDataDirForTesting(const char* dir) { g_icu_time_zone_data_dir = dir; } #endif // BUILDFLAG(IS_FUCHSIA) #endif // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE) bool InitializeICU() { #if DCHECK_IS_ON() DCHECK(!g_check_called_once || !g_called_once); g_called_once = true; #endif #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC) // The ICU data is statically linked. #elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE) if (!InitializeICUFromDataFile()) return false; #else #error Unsupported ICU_UTIL_DATA_IMPL value #endif // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC) return DoCommonInitialization(); } void AllowMultipleInitializeCallsForTesting() { #if DCHECK_IS_ON() g_check_called_once = false; #endif } #endif // !BUILDFLAG(IS_NACL) } // namespace base::i18n