1 // Copyright 2022 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef BASE_I18N_ICU_MERGEABLE_DATA_FILE_H_ 6 #define BASE_I18N_ICU_MERGEABLE_DATA_FILE_H_ 7 8 #include <stdint.h> 9 10 #include <memory> 11 #include <utility> 12 #include <vector> 13 14 #include "base/containers/flat_map.h" 15 #include "base/feature_list.h" 16 #include "base/files/memory_mapped_file.h" 17 #include "base/i18n/base_i18n_export.h" 18 #include "base/memory/raw_ptr.h" 19 20 namespace base::i18n { 21 22 // Enable merging of icudtl.dat in Lacros. 23 BASE_I18N_EXPORT BASE_DECLARE_FEATURE(kLacrosMergeIcuDataFile); 24 25 // Class wrapping the memory-mapped instance of Ash's icudtl.dat. 26 // Needed to keep track of its file descriptor. 27 class AshMemoryMappedFile; 28 29 // Class wrapping the memory-merging logic for icudtl.dat. 30 class BASE_I18N_EXPORT IcuMergeableDataFile { 31 public: 32 using HashType = uint64_t; 33 34 // Extension for ICU data's associated files containing page hashes. 35 static constexpr char kIcuDataFileHashExtension[] = "hash"; 36 37 IcuMergeableDataFile(); 38 ~IcuMergeableDataFile(); 39 40 // The following APIs are designed to be consistent with MemoryMappedFile. 41 bool Initialize(File lacros_file, MemoryMappedFile::Region region); 42 const uint8_t* data() const; 43 44 // Attempt merging with Ash's icudtl.dat. 45 // Return `true` if successful or in case of non-critical failure. 46 // Return `false` in case of critical failure (mmap will need to be called 47 // again). 48 bool MergeWithAshVersion(const FilePath& icudtl_ash_path); 49 50 // True if page hashes were read from cache files, false otherwise. used_cached_hashes()51 bool used_cached_hashes() const { return used_cached_hashes_; } 52 53 private: 54 using HashOffset = std::pair<HashType, size_t>; 55 using HashToOffsetMap = base::flat_map<HashType, size_t>; 56 57 struct Hashes { 58 Hashes(); 59 Hashes(HashToOffsetMap ash, std::vector<HashType> lacros); 60 Hashes(Hashes&& other); 61 Hashes& operator=(Hashes&& other); 62 ~Hashes(); 63 // Map from page hashes to offsets for Ash's icudtl.dat. 64 HashToOffsetMap ash; 65 // Vector of page hashes for Lacros's icudtl.dat. Indexed by page index. 66 std::vector<HashType> lacros; 67 }; 68 69 struct Slice { 70 size_t offset; 71 size_t length; 72 }; 73 74 bool MmapLacrosFile(bool remap); 75 76 Slice FindOverlap(const AshMemoryMappedFile& ash_file, 77 const Hashes& hashes, 78 size_t lacros_offset) const; 79 80 bool MergeArea(const AshMemoryMappedFile& ash_file, 81 const Slice& ash_overlap, 82 size_t lacros_offset); 83 84 // Count the number of equal pages (if any), starting at the given Ash and 85 // Lacros offsets. `ash_page` and `lacros_page` are pages with the same 86 // hash, so they likely represent the beginning of an overlapping area 87 // in their respective `icudtl.dat` file. 88 size_t CountEqualPages(const AshMemoryMappedFile& ash_file, 89 const uint8_t* ash_page, 90 const uint8_t* lacros_page) const; 91 92 Hashes CalculateHashes(const AshMemoryMappedFile& ash_file, 93 const FilePath& ash_file_path); 94 95 // Try loading pre-computed hashes from `icudtl.dat.hash` files. 96 // Return `true` if successful, `false` otherwise. 97 // `hashes` will contain the pre-computed hashes if successful, 98 // will be left untouched otherwise. 99 bool MaybeLoadCachedHashes(const AshMemoryMappedFile& ash_file, 100 const FilePath& ash_file_path, 101 Hashes& hashes); 102 103 // Get Lacros's `icudtl.dat` path from its file descriptor. 104 // Necessary because `File` objects don't keep track of the file path. 105 FilePath GetLacrosFilePath(); 106 107 File lacros_file_; 108 size_t lacros_length_ = 0; 109 raw_ptr<uint8_t, AllowPtrArithmetic> lacros_data_ = nullptr; 110 bool used_cached_hashes_ = false; 111 }; 112 113 } // namespace base::i18n 114 115 #endif // BASE_I18N_MERGEABLE_ICU_DATA_FILE_H_ 116