• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/icu_mergeable_data_file.h"
6 
7 #include <sys/mman.h>
8 
9 #include "base/hash/hash.h"
10 #include "base/numerics/safe_conversions.h"
11 #include "base/strings/string_number_conversions.h"
12 #include "base/threading/scoped_blocking_call.h"
13 #include "build/chromeos_buildflags.h"
14 
15 namespace base::i18n {
16 
17 // Enable merging of `icudtl.dat` in Lacros.
18 BASE_FEATURE(kLacrosMergeIcuDataFile,
19              "LacrosMergeIcuDataFile",
20              base::FEATURE_ENABLED_BY_DEFAULT);
21 
22 namespace {
23 
24 #if BUILDFLAG(IS_CHROMEOS_DEVICE)
25 // Path of Ash's ICU data file.
26 constexpr char kIcuDataFileAshPath[] = "/opt/google/chrome/icudtl.dat";
27 #endif  // BUILDFLAG(IS_CHROMEOS_DEVICE)
28 
29 // Expected size of a system page.
30 constexpr int64_t kPageSize = 0x1000;
31 
32 // Size of a page hash. Changing this will break compatibility
33 // with existing `icudtl.dat.hash` files, so be careful.
34 constexpr size_t kHashBytes = 8;
35 static_assert(sizeof(IcuMergeableDataFile::HashType) == kHashBytes);
36 
HashPage(const uint8_t * page)37 inline IcuMergeableDataFile::HashType HashPage(const uint8_t* page) {
38   return FastHash(base::make_span(page, static_cast<size_t>(kPageSize)));
39 }
40 
ReadHash(const uint8_t * data,size_t offset)41 IcuMergeableDataFile::HashType ReadHash(const uint8_t* data, size_t offset) {
42   DCHECK_EQ(0ul, offset % kHashBytes);
43   IcuMergeableDataFile::HashType hash = 0;
44   for (size_t i = 0; i < kHashBytes; i++) {
45     IcuMergeableDataFile::HashType byte = data[offset + i];
46     hash |= byte << (i * 8);
47   }
48   return hash;
49 }
50 
NPages(size_t length)51 constexpr size_t NPages(size_t length) {
52   return (length + kPageSize - 1) / kPageSize;
53 }
54 
55 }  // namespace
56 
57 class AshMemoryMappedFile {
58  public:
Initialize(File ash_file)59   bool Initialize(File ash_file) {
60     fd_ = ash_file.GetPlatformFile();
61     return memory_mapped_file_.Initialize(std::move(ash_file));
62   }
63 
fd() const64   PlatformFile fd() const { return fd_; }
data() const65   const uint8_t* data() const { return memory_mapped_file_.data(); }
length() const66   size_t length() const { return memory_mapped_file_.length(); }
67 
68  private:
69   PlatformFile fd_;
70   MemoryMappedFile memory_mapped_file_;
71 };
72 
MmapAshFile(const FilePath & ash_file_path)73 std::unique_ptr<AshMemoryMappedFile> MmapAshFile(
74     const FilePath& ash_file_path) {
75   ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
76 
77   // Open Ash's data file.
78   File ash_file(FilePath(ash_file_path), File::FLAG_OPEN | File::FLAG_READ);
79 
80   // Mmap Ash's data file.
81   auto ash_mapped_file = std::make_unique<AshMemoryMappedFile>();
82   bool map_successful = ash_mapped_file->Initialize(std::move(ash_file));
83   if (!map_successful) {
84     PLOG(DFATAL) << "Failed to mmap Ash's icudtl.dat";
85     return nullptr;
86   }
87 
88   return ash_mapped_file;
89 }
90 
91 // Class wrapping the memory-merging logic for `icudtl.dat`.
92 IcuMergeableDataFile::IcuMergeableDataFile() = default;
93 
~IcuMergeableDataFile()94 IcuMergeableDataFile::~IcuMergeableDataFile() {
95   if (lacros_data_) {
96     ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
97     munmap(lacros_data_, lacros_length_);
98   }
99 }
100 
101 IcuMergeableDataFile::Hashes::Hashes() = default;
Hashes(HashToOffsetMap ash,std::vector<HashType> lacros)102 IcuMergeableDataFile::Hashes::Hashes(HashToOffsetMap ash,
103                                      std::vector<HashType> lacros)
104     : ash(std::move(ash)), lacros(std::move(lacros)) {}
105 IcuMergeableDataFile::Hashes::Hashes(Hashes&& other) = default;
106 IcuMergeableDataFile::Hashes& IcuMergeableDataFile::Hashes::operator=(
107     Hashes&& other) = default;
108 IcuMergeableDataFile::Hashes::~Hashes() = default;
109 
Initialize(File lacros_file,MemoryMappedFile::Region region)110 bool IcuMergeableDataFile::Initialize(File lacros_file,
111                                       MemoryMappedFile::Region region) {
112   DCHECK(region == MemoryMappedFile::Region::kWholeFile);
113   DCHECK(!lacros_file_.IsValid()) << "ICUDataFile::Initialize called twice";
114 
115   lacros_file_ = std::move(lacros_file);
116   int64_t lacros_length = lacros_file_.GetLength();
117   if (lacros_length < 0) {
118     return false;
119   }
120   // Narrow to size_t, since it's used for pointer arithmetic, mmap and other
121   // APIs that accept size_t.
122   lacros_length_ = base::checked_cast<size_t>(lacros_length);
123 
124   // Map Lacros's version of `icudtl.dat`, then attempt merging with Ash.
125   bool map_successful = MmapLacrosFile(/*remap=*/false);
126 
127 #if BUILDFLAG(IS_CHROMEOS_DEVICE)
128   // If we're inside an actual ChromeOS system (i.e. not just in
129   // linux-lacros-rel) then we can expect Ash Chrome (and its version of
130   // `icudtl.dat`) to be present in the default directory.
131   // In that case, we can attempt merging.
132   if (map_successful && base::FeatureList::IsEnabled(kLacrosMergeIcuDataFile)) {
133     bool merge_successful = MergeWithAshVersion(FilePath(kIcuDataFileAshPath));
134     // If we hit a critical failure while merging, remap Lacros's version.
135     if (!merge_successful) {
136       PLOG(DFATAL) << "Attempt to merge Lacros's icudtl.dat with Ash's failed";
137       map_successful = MmapLacrosFile(/*remap=*/true);
138     }
139   }
140 #endif  // BUILDFLAG(IS_CHROMEOS_DEVICE)
141 
142   return map_successful;
143 }
144 
data() const145 const uint8_t* IcuMergeableDataFile::data() const {
146   return static_cast<const uint8_t*>(lacros_data_);
147 }
148 
MergeWithAshVersion(const FilePath & ash_file_path)149 bool IcuMergeableDataFile::MergeWithAshVersion(const FilePath& ash_file_path) {
150   // Verify the assumption that page size is 4K.
151   DCHECK_EQ(sysconf(_SC_PAGESIZE), kPageSize);
152 
153   // Mmap Ash's data file.
154   auto ash_file = MmapAshFile(ash_file_path);
155   if (!ash_file)
156     return true;  // Non-critical failure.
157 
158   // Calculate hashes for each page in Ash and Lacros's data files.
159   Hashes hashes = CalculateHashes(*ash_file, ash_file_path);
160 
161   // Find Lacros's ICU pages that are duplicated in Ash.
162   size_t lacros_offset = 0;
163   while (lacros_offset < lacros_length_) {
164     Slice ash_overlap = FindOverlap(*ash_file, hashes, lacros_offset);
165     // If there's no overlap, move to the next page and keep scanning.
166     if (ash_overlap.length == 0) {
167       lacros_offset += kPageSize;
168       continue;
169     }
170 
171     // Found a sequence of equal pages, merge them with Ash.
172     bool merge_successful = MergeArea(*ash_file, ash_overlap, lacros_offset);
173     if (!merge_successful)
174       return false;  // Critical failure.
175 
176     lacros_offset += ash_overlap.length;
177   }
178 
179   return true;  // Success.
180 }
181 
MmapLacrosFile(bool remap)182 bool IcuMergeableDataFile::MmapLacrosFile(bool remap) {
183   ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
184 
185   if (remap) {
186     // If `remap` == true, we add the MAP_FIXED option to unmap the
187     // existing map and replace it with the new one in a single operation.
188     DCHECK_NE(lacros_data_, nullptr);
189     lacros_data_ = static_cast<uint8_t*>(
190         mmap(lacros_data_, lacros_length_, PROT_READ, MAP_FIXED | MAP_PRIVATE,
191              lacros_file_.GetPlatformFile(), 0));
192   } else {
193     // Otherwise, simply map the file.
194     lacros_data_ = static_cast<uint8_t*>(
195         mmap(nullptr, lacros_length_, PROT_READ, MAP_PRIVATE,
196              lacros_file_.GetPlatformFile(), 0));
197   }
198 
199   if (lacros_data_ == MAP_FAILED) {
200     lacros_data_ = nullptr;
201     PLOG(DFATAL) << "Failed to mmap Lacros's icudtl.dat";
202     return false;
203   }
204 
205   return true;
206 }
207 
FindOverlap(const AshMemoryMappedFile & ash_file,const Hashes & hashes,size_t lacros_offset) const208 IcuMergeableDataFile::Slice IcuMergeableDataFile::FindOverlap(
209     const AshMemoryMappedFile& ash_file,
210     const Hashes& hashes,
211     size_t lacros_offset) const {
212   // Search for equal pages by hash.
213   HashType hash = hashes.lacros[lacros_offset / kPageSize];
214   auto search = hashes.ash.find(hash);
215   if (search == hashes.ash.end())
216     return {0, 0};
217 
218   // Count how many pages (if any) have the same content.
219   size_t ash_offset = search->second;
220   size_t overlap_length =
221       kPageSize * CountEqualPages(ash_file, ash_file.data() + ash_offset,
222                                   lacros_data_ + lacros_offset);
223 
224   return {ash_offset, overlap_length};
225 }
226 
MergeArea(const AshMemoryMappedFile & ash_file,const Slice & ash_overlap,size_t lacros_offset)227 bool IcuMergeableDataFile::MergeArea(const AshMemoryMappedFile& ash_file,
228                                      const Slice& ash_overlap,
229                                      size_t lacros_offset) {
230   ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
231 
232   // Unmap from Lacros's file and map from Ash's file instead.
233   // NOTE: "[...] If the memory region specified by addr and length overlaps
234   //        pages of any existing mapping(s), then the overlapped part of the
235   //        existing mapping(s) will be discarded.  If the specified address
236   //        cannot be used, mmap() will fail."
237   // Reference: https://man7.org/linux/man-pages/man2/mmap.2.html
238   uint8_t* map_result = static_cast<uint8_t*>(
239       mmap(lacros_data_ + lacros_offset, ash_overlap.length, PROT_READ,
240            MAP_FIXED | MAP_PRIVATE, ash_file.fd(), ash_overlap.offset));
241 
242   if (map_result == MAP_FAILED) {
243     PLOG(DFATAL) << "Couldn't mmap Ash's icudtl.dat while merging";
244     return false;
245   }
246 
247   return true;
248 }
249 
CountEqualPages(const AshMemoryMappedFile & ash_file,const uint8_t * ash_page,const uint8_t * lacros_page) const250 size_t IcuMergeableDataFile::CountEqualPages(
251     const AshMemoryMappedFile& ash_file,
252     const uint8_t* ash_page,
253     const uint8_t* lacros_page) const {
254   size_t pages = 0;
255   const uint8_t* ash_end = ash_file.data() + ash_file.length();
256   const uint8_t* lacros_end = lacros_data_ + lacros_length_;
257 
258   while (ash_page < ash_end && lacros_page < lacros_end &&
259          memcmp(ash_page, lacros_page, kPageSize) == 0) {
260     ash_page += kPageSize;
261     lacros_page += kPageSize;
262     pages++;
263   }
264 
265   return pages;
266 }
267 
CalculateHashes(const AshMemoryMappedFile & ash_file,const FilePath & ash_file_path)268 IcuMergeableDataFile::Hashes IcuMergeableDataFile::CalculateHashes(
269     const AshMemoryMappedFile& ash_file,
270     const FilePath& ash_file_path) {
271   // Try loading hashes from the pre-computed files first.
272   Hashes hashes;
273   used_cached_hashes_ = MaybeLoadCachedHashes(ash_file, ash_file_path, hashes);
274 
275   if (!used_cached_hashes_) {
276     // Calculate hashes for each page in Ash's data file.
277     std::vector<HashOffset> ash_hashes;
278     ash_hashes.reserve(NPages(ash_file.length()));
279     for (size_t offset = 0; offset < ash_file.length(); offset += kPageSize) {
280       // NOTE: "POSIX specifies that the system shall always zero fill any
281       //        partial page at the end of the object [...]".
282       // Reference: https://man7.org/linux/man-pages/man2/mmap.2.html
283       //
284       // Therefore this code works even if the size of Ash's `icudtl.dat` is not
285       // a multiple of the page size.
286       HashType hash = HashPage(ash_file.data() + offset);
287       ash_hashes.emplace_back(hash, offset);
288     }
289 
290     // Calculate hashes for each page in Lacros's data file.
291     hashes.lacros.reserve(NPages(lacros_length_));
292     for (size_t offset = 0; offset < lacros_length_; offset += kPageSize) {
293       HashType hash = HashPage(lacros_data_ + offset);
294       hashes.lacros.emplace_back(hash);
295     }
296 
297     hashes.ash = HashToOffsetMap(std::move(ash_hashes));
298   }
299 
300   return hashes;
301 }
302 
MaybeLoadCachedHashes(const AshMemoryMappedFile & ash_file,const FilePath & ash_file_path,Hashes & hashes)303 bool IcuMergeableDataFile::MaybeLoadCachedHashes(
304     const AshMemoryMappedFile& ash_file,
305     const FilePath& ash_file_path,
306     Hashes& hashes) {
307   FilePath ash_hash_path =
308       ash_file_path.AddExtensionASCII(kIcuDataFileHashExtension);
309   FilePath lacros_hash_path =
310       GetLacrosFilePath().AddExtensionASCII(kIcuDataFileHashExtension);
311 
312   // Memory map Ash's `icudtl.dat.hash`. Ensure its size is valid and consistent
313   // with the current version of `icudtl.dat`.
314   MemoryMappedFile ash_hash_file;
315   size_t ash_pages = NPages(ash_file.length());
316   bool result = ash_hash_file.Initialize(ash_hash_path);
317   if (!result || (ash_hash_file.length() % kHashBytes) ||
318       ((ash_hash_file.length() / kHashBytes) != ash_pages)) {
319     return false;
320   }
321 
322   // Same for Lacros's `icudtl.dat.hash`.
323   MemoryMappedFile lacros_hash_file;
324   size_t lacros_pages = NPages(lacros_length_);
325   result = lacros_hash_file.Initialize(lacros_hash_path);
326   if (!result || (lacros_hash_file.length() % kHashBytes) ||
327       ((lacros_hash_file.length() / kHashBytes) != lacros_pages)) {
328     return false;
329   }
330 
331   // Load Ash's hashes.
332   std::vector<HashOffset> ash_hashes;
333   ash_hashes.reserve(ash_pages);
334   for (size_t i = 0; i < ash_hash_file.length(); i += kHashBytes) {
335     HashType hash = ReadHash(ash_hash_file.data(), i);
336     size_t offset = (i / kHashBytes) * kPageSize;
337     ash_hashes.emplace_back(hash, offset);
338   }
339 
340   // Load Lacros's hashes.
341   hashes.lacros.reserve(lacros_pages);
342   for (size_t i = 0; i < lacros_hash_file.length(); i += kHashBytes) {
343     HashType hash = ReadHash(lacros_hash_file.data(), i);
344     hashes.lacros.emplace_back(hash);
345   }
346 
347   hashes.ash = HashToOffsetMap(std::move(ash_hashes));
348   return true;
349 }
350 
GetLacrosFilePath()351 FilePath IcuMergeableDataFile::GetLacrosFilePath() {
352   // /proc/self/fd/<fd>
353   //   This is a subdirectory containing one entry for each file
354   //   which the process has open, named by its file descriptor,
355   //   and which is a symbolic link to the actual file.
356   // Reference: proc(5) - Linux manual page.
357   char path[PATH_MAX];
358   FilePath proc_path =
359       FilePath("/proc/self/fd/")
360           .AppendASCII(base::NumberToString(lacros_file_.GetPlatformFile()));
361 
362   // We read the content of the symbolic link to find the path of the
363   // file associated with the file descriptor.
364   int64_t path_len = readlink(proc_path.value().c_str(), path, sizeof(path));
365   DCHECK_NE(path_len, -1);
366   DCHECK_LT(path_len, PATH_MAX);
367 
368   return FilePath(std::string(path, 0, path_len));
369 }
370 
371 }  // namespace base::i18n
372