// Copyright 2022 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "base/i18n/icu_mergeable_data_file.h" #include #include "base/hash/hash.h" #include "base/numerics/safe_conversions.h" #include "base/strings/string_number_conversions.h" #include "base/threading/scoped_blocking_call.h" #include "build/chromeos_buildflags.h" namespace base::i18n { // Enable merging of `icudtl.dat` in Lacros. BASE_FEATURE(kLacrosMergeIcuDataFile, "LacrosMergeIcuDataFile", base::FEATURE_ENABLED_BY_DEFAULT); namespace { #if BUILDFLAG(IS_CHROMEOS_DEVICE) // Path of Ash's ICU data file. constexpr char kIcuDataFileAshPath[] = "/opt/google/chrome/icudtl.dat"; #endif // BUILDFLAG(IS_CHROMEOS_DEVICE) // Expected size of a system page. constexpr int64_t kPageSize = 0x1000; // Size of a page hash. Changing this will break compatibility // with existing `icudtl.dat.hash` files, so be careful. constexpr size_t kHashBytes = 8; static_assert(sizeof(IcuMergeableDataFile::HashType) == kHashBytes); inline IcuMergeableDataFile::HashType HashPage(const uint8_t* page) { return FastHash(base::make_span(page, static_cast(kPageSize))); } IcuMergeableDataFile::HashType ReadHash(const uint8_t* data, size_t offset) { DCHECK_EQ(0ul, offset % kHashBytes); IcuMergeableDataFile::HashType hash = 0; for (size_t i = 0; i < kHashBytes; i++) { IcuMergeableDataFile::HashType byte = data[offset + i]; hash |= byte << (i * 8); } return hash; } constexpr size_t NPages(size_t length) { return (length + kPageSize - 1) / kPageSize; } } // namespace class AshMemoryMappedFile { public: bool Initialize(File ash_file) { fd_ = ash_file.GetPlatformFile(); return memory_mapped_file_.Initialize(std::move(ash_file)); } PlatformFile fd() const { return fd_; } const uint8_t* data() const { return memory_mapped_file_.data(); } size_t length() const { return memory_mapped_file_.length(); } private: PlatformFile fd_; MemoryMappedFile memory_mapped_file_; }; std::unique_ptr MmapAshFile( const FilePath& ash_file_path) { ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK); // Open Ash's data file. File ash_file(FilePath(ash_file_path), File::FLAG_OPEN | File::FLAG_READ); // Mmap Ash's data file. auto ash_mapped_file = std::make_unique(); bool map_successful = ash_mapped_file->Initialize(std::move(ash_file)); if (!map_successful) { PLOG(DFATAL) << "Failed to mmap Ash's icudtl.dat"; return nullptr; } return ash_mapped_file; } // Class wrapping the memory-merging logic for `icudtl.dat`. IcuMergeableDataFile::IcuMergeableDataFile() = default; IcuMergeableDataFile::~IcuMergeableDataFile() { if (lacros_data_) { ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK); munmap(lacros_data_, lacros_length_); } } IcuMergeableDataFile::Hashes::Hashes() = default; IcuMergeableDataFile::Hashes::Hashes(HashToOffsetMap ash, std::vector lacros) : ash(std::move(ash)), lacros(std::move(lacros)) {} IcuMergeableDataFile::Hashes::Hashes(Hashes&& other) = default; IcuMergeableDataFile::Hashes& IcuMergeableDataFile::Hashes::operator=( Hashes&& other) = default; IcuMergeableDataFile::Hashes::~Hashes() = default; bool IcuMergeableDataFile::Initialize(File lacros_file, MemoryMappedFile::Region region) { DCHECK(region == MemoryMappedFile::Region::kWholeFile); DCHECK(!lacros_file_.IsValid()) << "ICUDataFile::Initialize called twice"; lacros_file_ = std::move(lacros_file); int64_t lacros_length = lacros_file_.GetLength(); if (lacros_length < 0) { return false; } // Narrow to size_t, since it's used for pointer arithmetic, mmap and other // APIs that accept size_t. lacros_length_ = base::checked_cast(lacros_length); // Map Lacros's version of `icudtl.dat`, then attempt merging with Ash. bool map_successful = MmapLacrosFile(/*remap=*/false); #if BUILDFLAG(IS_CHROMEOS_DEVICE) // If we're inside an actual ChromeOS system (i.e. not just in // linux-lacros-rel) then we can expect Ash Chrome (and its version of // `icudtl.dat`) to be present in the default directory. // In that case, we can attempt merging. if (map_successful && base::FeatureList::IsEnabled(kLacrosMergeIcuDataFile)) { bool merge_successful = MergeWithAshVersion(FilePath(kIcuDataFileAshPath)); // If we hit a critical failure while merging, remap Lacros's version. if (!merge_successful) { PLOG(DFATAL) << "Attempt to merge Lacros's icudtl.dat with Ash's failed"; map_successful = MmapLacrosFile(/*remap=*/true); } } #endif // BUILDFLAG(IS_CHROMEOS_DEVICE) return map_successful; } const uint8_t* IcuMergeableDataFile::data() const { return static_cast(lacros_data_); } bool IcuMergeableDataFile::MergeWithAshVersion(const FilePath& ash_file_path) { // Verify the assumption that page size is 4K. DCHECK_EQ(sysconf(_SC_PAGESIZE), kPageSize); // Mmap Ash's data file. auto ash_file = MmapAshFile(ash_file_path); if (!ash_file) return true; // Non-critical failure. // Calculate hashes for each page in Ash and Lacros's data files. Hashes hashes = CalculateHashes(*ash_file, ash_file_path); // Find Lacros's ICU pages that are duplicated in Ash. size_t lacros_offset = 0; while (lacros_offset < lacros_length_) { Slice ash_overlap = FindOverlap(*ash_file, hashes, lacros_offset); // If there's no overlap, move to the next page and keep scanning. if (ash_overlap.length == 0) { lacros_offset += kPageSize; continue; } // Found a sequence of equal pages, merge them with Ash. bool merge_successful = MergeArea(*ash_file, ash_overlap, lacros_offset); if (!merge_successful) return false; // Critical failure. lacros_offset += ash_overlap.length; } return true; // Success. } bool IcuMergeableDataFile::MmapLacrosFile(bool remap) { ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK); if (remap) { // If `remap` == true, we add the MAP_FIXED option to unmap the // existing map and replace it with the new one in a single operation. DCHECK_NE(lacros_data_, nullptr); lacros_data_ = static_cast( mmap(lacros_data_, lacros_length_, PROT_READ, MAP_FIXED | MAP_PRIVATE, lacros_file_.GetPlatformFile(), 0)); } else { // Otherwise, simply map the file. lacros_data_ = static_cast( mmap(nullptr, lacros_length_, PROT_READ, MAP_PRIVATE, lacros_file_.GetPlatformFile(), 0)); } if (lacros_data_ == MAP_FAILED) { lacros_data_ = nullptr; PLOG(DFATAL) << "Failed to mmap Lacros's icudtl.dat"; return false; } return true; } IcuMergeableDataFile::Slice IcuMergeableDataFile::FindOverlap( const AshMemoryMappedFile& ash_file, const Hashes& hashes, size_t lacros_offset) const { // Search for equal pages by hash. HashType hash = hashes.lacros[lacros_offset / kPageSize]; auto search = hashes.ash.find(hash); if (search == hashes.ash.end()) return {0, 0}; // Count how many pages (if any) have the same content. size_t ash_offset = search->second; size_t overlap_length = kPageSize * CountEqualPages(ash_file, ash_file.data() + ash_offset, lacros_data_ + lacros_offset); return {ash_offset, overlap_length}; } bool IcuMergeableDataFile::MergeArea(const AshMemoryMappedFile& ash_file, const Slice& ash_overlap, size_t lacros_offset) { ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK); // Unmap from Lacros's file and map from Ash's file instead. // NOTE: "[...] If the memory region specified by addr and length overlaps // pages of any existing mapping(s), then the overlapped part of the // existing mapping(s) will be discarded. If the specified address // cannot be used, mmap() will fail." // Reference: https://man7.org/linux/man-pages/man2/mmap.2.html uint8_t* map_result = static_cast( mmap(lacros_data_ + lacros_offset, ash_overlap.length, PROT_READ, MAP_FIXED | MAP_PRIVATE, ash_file.fd(), ash_overlap.offset)); if (map_result == MAP_FAILED) { PLOG(DFATAL) << "Couldn't mmap Ash's icudtl.dat while merging"; return false; } return true; } size_t IcuMergeableDataFile::CountEqualPages( const AshMemoryMappedFile& ash_file, const uint8_t* ash_page, const uint8_t* lacros_page) const { size_t pages = 0; const uint8_t* ash_end = ash_file.data() + ash_file.length(); const uint8_t* lacros_end = lacros_data_ + lacros_length_; while (ash_page < ash_end && lacros_page < lacros_end && memcmp(ash_page, lacros_page, kPageSize) == 0) { ash_page += kPageSize; lacros_page += kPageSize; pages++; } return pages; } IcuMergeableDataFile::Hashes IcuMergeableDataFile::CalculateHashes( const AshMemoryMappedFile& ash_file, const FilePath& ash_file_path) { // Try loading hashes from the pre-computed files first. Hashes hashes; used_cached_hashes_ = MaybeLoadCachedHashes(ash_file, ash_file_path, hashes); if (!used_cached_hashes_) { // Calculate hashes for each page in Ash's data file. std::vector ash_hashes; ash_hashes.reserve(NPages(ash_file.length())); for (size_t offset = 0; offset < ash_file.length(); offset += kPageSize) { // NOTE: "POSIX specifies that the system shall always zero fill any // partial page at the end of the object [...]". // Reference: https://man7.org/linux/man-pages/man2/mmap.2.html // // Therefore this code works even if the size of Ash's `icudtl.dat` is not // a multiple of the page size. HashType hash = HashPage(ash_file.data() + offset); ash_hashes.emplace_back(hash, offset); } // Calculate hashes for each page in Lacros's data file. hashes.lacros.reserve(NPages(lacros_length_)); for (size_t offset = 0; offset < lacros_length_; offset += kPageSize) { HashType hash = HashPage(lacros_data_ + offset); hashes.lacros.emplace_back(hash); } hashes.ash = HashToOffsetMap(std::move(ash_hashes)); } return hashes; } bool IcuMergeableDataFile::MaybeLoadCachedHashes( const AshMemoryMappedFile& ash_file, const FilePath& ash_file_path, Hashes& hashes) { FilePath ash_hash_path = ash_file_path.AddExtensionASCII(kIcuDataFileHashExtension); FilePath lacros_hash_path = GetLacrosFilePath().AddExtensionASCII(kIcuDataFileHashExtension); // Memory map Ash's `icudtl.dat.hash`. Ensure its size is valid and consistent // with the current version of `icudtl.dat`. MemoryMappedFile ash_hash_file; size_t ash_pages = NPages(ash_file.length()); bool result = ash_hash_file.Initialize(ash_hash_path); if (!result || (ash_hash_file.length() % kHashBytes) || ((ash_hash_file.length() / kHashBytes) != ash_pages)) { return false; } // Same for Lacros's `icudtl.dat.hash`. MemoryMappedFile lacros_hash_file; size_t lacros_pages = NPages(lacros_length_); result = lacros_hash_file.Initialize(lacros_hash_path); if (!result || (lacros_hash_file.length() % kHashBytes) || ((lacros_hash_file.length() / kHashBytes) != lacros_pages)) { return false; } // Load Ash's hashes. std::vector ash_hashes; ash_hashes.reserve(ash_pages); for (size_t i = 0; i < ash_hash_file.length(); i += kHashBytes) { HashType hash = ReadHash(ash_hash_file.data(), i); size_t offset = (i / kHashBytes) * kPageSize; ash_hashes.emplace_back(hash, offset); } // Load Lacros's hashes. hashes.lacros.reserve(lacros_pages); for (size_t i = 0; i < lacros_hash_file.length(); i += kHashBytes) { HashType hash = ReadHash(lacros_hash_file.data(), i); hashes.lacros.emplace_back(hash); } hashes.ash = HashToOffsetMap(std::move(ash_hashes)); return true; } FilePath IcuMergeableDataFile::GetLacrosFilePath() { // /proc/self/fd/ // This is a subdirectory containing one entry for each file // which the process has open, named by its file descriptor, // and which is a symbolic link to the actual file. // Reference: proc(5) - Linux manual page. char path[PATH_MAX]; FilePath proc_path = FilePath("/proc/self/fd/") .AppendASCII(base::NumberToString(lacros_file_.GetPlatformFile())); // We read the content of the symbolic link to find the path of the // file associated with the file descriptor. int64_t path_len = readlink(proc_path.value().c_str(), path, sizeof(path)); DCHECK_NE(path_len, -1); DCHECK_LT(path_len, PATH_MAX); return FilePath(std::string(path, 0, path_len)); } } // namespace base::i18n