1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/i18n/icu_mergeable_data_file.h"
6
7 #include <sys/mman.h>
8
9 #include "base/hash/hash.h"
10 #include "base/numerics/safe_conversions.h"
11 #include "base/strings/string_number_conversions.h"
12 #include "base/threading/scoped_blocking_call.h"
13 #include "build/chromeos_buildflags.h"
14
15 namespace base::i18n {
16
17 // Enable merging of `icudtl.dat` in Lacros.
18 BASE_FEATURE(kLacrosMergeIcuDataFile,
19 "LacrosMergeIcuDataFile",
20 base::FEATURE_ENABLED_BY_DEFAULT);
21
22 namespace {
23
24 #if BUILDFLAG(IS_CHROMEOS_DEVICE)
25 // Path of Ash's ICU data file.
26 constexpr char kIcuDataFileAshPath[] = "/opt/google/chrome/icudtl.dat";
27 #endif // BUILDFLAG(IS_CHROMEOS_DEVICE)
28
29 // Expected size of a system page.
30 constexpr int64_t kPageSize = 0x1000;
31
32 // Size of a page hash. Changing this will break compatibility
33 // with existing `icudtl.dat.hash` files, so be careful.
34 constexpr size_t kHashBytes = 8;
35 static_assert(sizeof(IcuMergeableDataFile::HashType) == kHashBytes);
36
HashPage(const uint8_t * page)37 inline IcuMergeableDataFile::HashType HashPage(const uint8_t* page) {
38 return FastHash(base::make_span(page, static_cast<size_t>(kPageSize)));
39 }
40
ReadHash(const uint8_t * data,size_t offset)41 IcuMergeableDataFile::HashType ReadHash(const uint8_t* data, size_t offset) {
42 DCHECK_EQ(0ul, offset % kHashBytes);
43 IcuMergeableDataFile::HashType hash = 0;
44 for (size_t i = 0; i < kHashBytes; i++) {
45 IcuMergeableDataFile::HashType byte = data[offset + i];
46 hash |= byte << (i * 8);
47 }
48 return hash;
49 }
50
NPages(size_t length)51 constexpr size_t NPages(size_t length) {
52 return (length + kPageSize - 1) / kPageSize;
53 }
54
55 } // namespace
56
57 class AshMemoryMappedFile {
58 public:
Initialize(File ash_file)59 bool Initialize(File ash_file) {
60 fd_ = ash_file.GetPlatformFile();
61 return memory_mapped_file_.Initialize(std::move(ash_file));
62 }
63
fd() const64 PlatformFile fd() const { return fd_; }
data() const65 const uint8_t* data() const { return memory_mapped_file_.data(); }
length() const66 size_t length() const { return memory_mapped_file_.length(); }
67
68 private:
69 PlatformFile fd_;
70 MemoryMappedFile memory_mapped_file_;
71 };
72
MmapAshFile(const FilePath & ash_file_path)73 std::unique_ptr<AshMemoryMappedFile> MmapAshFile(
74 const FilePath& ash_file_path) {
75 ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
76
77 // Open Ash's data file.
78 File ash_file(FilePath(ash_file_path), File::FLAG_OPEN | File::FLAG_READ);
79
80 // Mmap Ash's data file.
81 auto ash_mapped_file = std::make_unique<AshMemoryMappedFile>();
82 bool map_successful = ash_mapped_file->Initialize(std::move(ash_file));
83 if (!map_successful) {
84 PLOG(DFATAL) << "Failed to mmap Ash's icudtl.dat";
85 return nullptr;
86 }
87
88 return ash_mapped_file;
89 }
90
91 // Class wrapping the memory-merging logic for `icudtl.dat`.
92 IcuMergeableDataFile::IcuMergeableDataFile() = default;
93
~IcuMergeableDataFile()94 IcuMergeableDataFile::~IcuMergeableDataFile() {
95 if (lacros_data_) {
96 ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
97 munmap(lacros_data_, lacros_length_);
98 }
99 }
100
101 IcuMergeableDataFile::Hashes::Hashes() = default;
Hashes(HashToOffsetMap ash,std::vector<HashType> lacros)102 IcuMergeableDataFile::Hashes::Hashes(HashToOffsetMap ash,
103 std::vector<HashType> lacros)
104 : ash(std::move(ash)), lacros(std::move(lacros)) {}
105 IcuMergeableDataFile::Hashes::Hashes(Hashes&& other) = default;
106 IcuMergeableDataFile::Hashes& IcuMergeableDataFile::Hashes::operator=(
107 Hashes&& other) = default;
108 IcuMergeableDataFile::Hashes::~Hashes() = default;
109
Initialize(File lacros_file,MemoryMappedFile::Region region)110 bool IcuMergeableDataFile::Initialize(File lacros_file,
111 MemoryMappedFile::Region region) {
112 DCHECK(region == MemoryMappedFile::Region::kWholeFile);
113 DCHECK(!lacros_file_.IsValid()) << "ICUDataFile::Initialize called twice";
114
115 lacros_file_ = std::move(lacros_file);
116 int64_t lacros_length = lacros_file_.GetLength();
117 if (lacros_length < 0) {
118 return false;
119 }
120 // Narrow to size_t, since it's used for pointer arithmetic, mmap and other
121 // APIs that accept size_t.
122 lacros_length_ = base::checked_cast<size_t>(lacros_length);
123
124 // Map Lacros's version of `icudtl.dat`, then attempt merging with Ash.
125 bool map_successful = MmapLacrosFile(/*remap=*/false);
126
127 #if BUILDFLAG(IS_CHROMEOS_DEVICE)
128 // If we're inside an actual ChromeOS system (i.e. not just in
129 // linux-lacros-rel) then we can expect Ash Chrome (and its version of
130 // `icudtl.dat`) to be present in the default directory.
131 // In that case, we can attempt merging.
132 if (map_successful && base::FeatureList::IsEnabled(kLacrosMergeIcuDataFile)) {
133 bool merge_successful = MergeWithAshVersion(FilePath(kIcuDataFileAshPath));
134 // If we hit a critical failure while merging, remap Lacros's version.
135 if (!merge_successful) {
136 PLOG(DFATAL) << "Attempt to merge Lacros's icudtl.dat with Ash's failed";
137 map_successful = MmapLacrosFile(/*remap=*/true);
138 }
139 }
140 #endif // BUILDFLAG(IS_CHROMEOS_DEVICE)
141
142 return map_successful;
143 }
144
data() const145 const uint8_t* IcuMergeableDataFile::data() const {
146 return static_cast<const uint8_t*>(lacros_data_);
147 }
148
MergeWithAshVersion(const FilePath & ash_file_path)149 bool IcuMergeableDataFile::MergeWithAshVersion(const FilePath& ash_file_path) {
150 // Verify the assumption that page size is 4K.
151 DCHECK_EQ(sysconf(_SC_PAGESIZE), kPageSize);
152
153 // Mmap Ash's data file.
154 auto ash_file = MmapAshFile(ash_file_path);
155 if (!ash_file)
156 return true; // Non-critical failure.
157
158 // Calculate hashes for each page in Ash and Lacros's data files.
159 Hashes hashes = CalculateHashes(*ash_file, ash_file_path);
160
161 // Find Lacros's ICU pages that are duplicated in Ash.
162 size_t lacros_offset = 0;
163 while (lacros_offset < lacros_length_) {
164 Slice ash_overlap = FindOverlap(*ash_file, hashes, lacros_offset);
165 // If there's no overlap, move to the next page and keep scanning.
166 if (ash_overlap.length == 0) {
167 lacros_offset += kPageSize;
168 continue;
169 }
170
171 // Found a sequence of equal pages, merge them with Ash.
172 bool merge_successful = MergeArea(*ash_file, ash_overlap, lacros_offset);
173 if (!merge_successful)
174 return false; // Critical failure.
175
176 lacros_offset += ash_overlap.length;
177 }
178
179 return true; // Success.
180 }
181
MmapLacrosFile(bool remap)182 bool IcuMergeableDataFile::MmapLacrosFile(bool remap) {
183 ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
184
185 if (remap) {
186 // If `remap` == true, we add the MAP_FIXED option to unmap the
187 // existing map and replace it with the new one in a single operation.
188 DCHECK_NE(lacros_data_, nullptr);
189 lacros_data_ = static_cast<uint8_t*>(
190 mmap(lacros_data_, lacros_length_, PROT_READ, MAP_FIXED | MAP_PRIVATE,
191 lacros_file_.GetPlatformFile(), 0));
192 } else {
193 // Otherwise, simply map the file.
194 lacros_data_ = static_cast<uint8_t*>(
195 mmap(nullptr, lacros_length_, PROT_READ, MAP_PRIVATE,
196 lacros_file_.GetPlatformFile(), 0));
197 }
198
199 if (lacros_data_ == MAP_FAILED) {
200 lacros_data_ = nullptr;
201 PLOG(DFATAL) << "Failed to mmap Lacros's icudtl.dat";
202 return false;
203 }
204
205 return true;
206 }
207
FindOverlap(const AshMemoryMappedFile & ash_file,const Hashes & hashes,size_t lacros_offset) const208 IcuMergeableDataFile::Slice IcuMergeableDataFile::FindOverlap(
209 const AshMemoryMappedFile& ash_file,
210 const Hashes& hashes,
211 size_t lacros_offset) const {
212 // Search for equal pages by hash.
213 HashType hash = hashes.lacros[lacros_offset / kPageSize];
214 auto search = hashes.ash.find(hash);
215 if (search == hashes.ash.end())
216 return {0, 0};
217
218 // Count how many pages (if any) have the same content.
219 size_t ash_offset = search->second;
220 size_t overlap_length =
221 kPageSize * CountEqualPages(ash_file, ash_file.data() + ash_offset,
222 lacros_data_ + lacros_offset);
223
224 return {ash_offset, overlap_length};
225 }
226
MergeArea(const AshMemoryMappedFile & ash_file,const Slice & ash_overlap,size_t lacros_offset)227 bool IcuMergeableDataFile::MergeArea(const AshMemoryMappedFile& ash_file,
228 const Slice& ash_overlap,
229 size_t lacros_offset) {
230 ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
231
232 // Unmap from Lacros's file and map from Ash's file instead.
233 // NOTE: "[...] If the memory region specified by addr and length overlaps
234 // pages of any existing mapping(s), then the overlapped part of the
235 // existing mapping(s) will be discarded. If the specified address
236 // cannot be used, mmap() will fail."
237 // Reference: https://man7.org/linux/man-pages/man2/mmap.2.html
238 uint8_t* map_result = static_cast<uint8_t*>(
239 mmap(lacros_data_ + lacros_offset, ash_overlap.length, PROT_READ,
240 MAP_FIXED | MAP_PRIVATE, ash_file.fd(), ash_overlap.offset));
241
242 if (map_result == MAP_FAILED) {
243 PLOG(DFATAL) << "Couldn't mmap Ash's icudtl.dat while merging";
244 return false;
245 }
246
247 return true;
248 }
249
CountEqualPages(const AshMemoryMappedFile & ash_file,const uint8_t * ash_page,const uint8_t * lacros_page) const250 size_t IcuMergeableDataFile::CountEqualPages(
251 const AshMemoryMappedFile& ash_file,
252 const uint8_t* ash_page,
253 const uint8_t* lacros_page) const {
254 size_t pages = 0;
255 const uint8_t* ash_end = ash_file.data() + ash_file.length();
256 const uint8_t* lacros_end = lacros_data_ + lacros_length_;
257
258 while (ash_page < ash_end && lacros_page < lacros_end &&
259 memcmp(ash_page, lacros_page, kPageSize) == 0) {
260 ash_page += kPageSize;
261 lacros_page += kPageSize;
262 pages++;
263 }
264
265 return pages;
266 }
267
CalculateHashes(const AshMemoryMappedFile & ash_file,const FilePath & ash_file_path)268 IcuMergeableDataFile::Hashes IcuMergeableDataFile::CalculateHashes(
269 const AshMemoryMappedFile& ash_file,
270 const FilePath& ash_file_path) {
271 // Try loading hashes from the pre-computed files first.
272 Hashes hashes;
273 used_cached_hashes_ = MaybeLoadCachedHashes(ash_file, ash_file_path, hashes);
274
275 if (!used_cached_hashes_) {
276 // Calculate hashes for each page in Ash's data file.
277 std::vector<HashOffset> ash_hashes;
278 ash_hashes.reserve(NPages(ash_file.length()));
279 for (size_t offset = 0; offset < ash_file.length(); offset += kPageSize) {
280 // NOTE: "POSIX specifies that the system shall always zero fill any
281 // partial page at the end of the object [...]".
282 // Reference: https://man7.org/linux/man-pages/man2/mmap.2.html
283 //
284 // Therefore this code works even if the size of Ash's `icudtl.dat` is not
285 // a multiple of the page size.
286 HashType hash = HashPage(ash_file.data() + offset);
287 ash_hashes.emplace_back(hash, offset);
288 }
289
290 // Calculate hashes for each page in Lacros's data file.
291 hashes.lacros.reserve(NPages(lacros_length_));
292 for (size_t offset = 0; offset < lacros_length_; offset += kPageSize) {
293 HashType hash = HashPage(lacros_data_ + offset);
294 hashes.lacros.emplace_back(hash);
295 }
296
297 hashes.ash = HashToOffsetMap(std::move(ash_hashes));
298 }
299
300 return hashes;
301 }
302
MaybeLoadCachedHashes(const AshMemoryMappedFile & ash_file,const FilePath & ash_file_path,Hashes & hashes)303 bool IcuMergeableDataFile::MaybeLoadCachedHashes(
304 const AshMemoryMappedFile& ash_file,
305 const FilePath& ash_file_path,
306 Hashes& hashes) {
307 FilePath ash_hash_path =
308 ash_file_path.AddExtensionASCII(kIcuDataFileHashExtension);
309 FilePath lacros_hash_path =
310 GetLacrosFilePath().AddExtensionASCII(kIcuDataFileHashExtension);
311
312 // Memory map Ash's `icudtl.dat.hash`. Ensure its size is valid and consistent
313 // with the current version of `icudtl.dat`.
314 MemoryMappedFile ash_hash_file;
315 size_t ash_pages = NPages(ash_file.length());
316 bool result = ash_hash_file.Initialize(ash_hash_path);
317 if (!result || (ash_hash_file.length() % kHashBytes) ||
318 ((ash_hash_file.length() / kHashBytes) != ash_pages)) {
319 return false;
320 }
321
322 // Same for Lacros's `icudtl.dat.hash`.
323 MemoryMappedFile lacros_hash_file;
324 size_t lacros_pages = NPages(lacros_length_);
325 result = lacros_hash_file.Initialize(lacros_hash_path);
326 if (!result || (lacros_hash_file.length() % kHashBytes) ||
327 ((lacros_hash_file.length() / kHashBytes) != lacros_pages)) {
328 return false;
329 }
330
331 // Load Ash's hashes.
332 std::vector<HashOffset> ash_hashes;
333 ash_hashes.reserve(ash_pages);
334 for (size_t i = 0; i < ash_hash_file.length(); i += kHashBytes) {
335 HashType hash = ReadHash(ash_hash_file.data(), i);
336 size_t offset = (i / kHashBytes) * kPageSize;
337 ash_hashes.emplace_back(hash, offset);
338 }
339
340 // Load Lacros's hashes.
341 hashes.lacros.reserve(lacros_pages);
342 for (size_t i = 0; i < lacros_hash_file.length(); i += kHashBytes) {
343 HashType hash = ReadHash(lacros_hash_file.data(), i);
344 hashes.lacros.emplace_back(hash);
345 }
346
347 hashes.ash = HashToOffsetMap(std::move(ash_hashes));
348 return true;
349 }
350
GetLacrosFilePath()351 FilePath IcuMergeableDataFile::GetLacrosFilePath() {
352 // /proc/self/fd/<fd>
353 // This is a subdirectory containing one entry for each file
354 // which the process has open, named by its file descriptor,
355 // and which is a symbolic link to the actual file.
356 // Reference: proc(5) - Linux manual page.
357 char path[PATH_MAX];
358 FilePath proc_path =
359 FilePath("/proc/self/fd/")
360 .AppendASCII(base::NumberToString(lacros_file_.GetPlatformFile()));
361
362 // We read the content of the symbolic link to find the path of the
363 // file associated with the file descriptor.
364 int64_t path_len = readlink(proc_path.value().c_str(), path, sizeof(path));
365 DCHECK_NE(path_len, -1);
366 DCHECK_LT(path_len, PATH_MAX);
367
368 return FilePath(std::string(path, 0, path_len));
369 }
370
371 } // namespace base::i18n
372