1 // Copyright (C) 2020 The Android Open Source Project 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #pragma once 16 17 #include <linux/types.h> 18 #include <stdint.h> 19 #include <stdlib.h> 20 #include <sys/mman.h> 21 22 #include <bitset> 23 #include <condition_variable> 24 #include <csignal> 25 #include <cstring> 26 #include <future> 27 #include <iostream> 28 #include <limits> 29 #include <map> 30 #include <mutex> 31 #include <string> 32 #include <thread> 33 #include <unordered_map> 34 #include <unordered_set> 35 #include <vector> 36 37 #include <android-base/file.h> 38 #include <android-base/logging.h> 39 #include <android-base/stringprintf.h> 40 #include <android-base/unique_fd.h> 41 #include <ext4_utils/ext4_utils.h> 42 #include <libdm/dm.h> 43 #include <libsnapshot/cow_reader.h> 44 #include <libsnapshot/cow_writer.h> 45 #include <snapuserd/snapuserd_buffer.h> 46 #include <snapuserd/snapuserd_kernel.h> 47 48 namespace android { 49 namespace snapshot { 50 51 using android::base::unique_fd; 52 using namespace std::chrono_literals; 53 54 static constexpr size_t PAYLOAD_SIZE = (1UL << 20); 55 static_assert(PAYLOAD_SIZE >= BLOCK_SZ); 56 57 /* 58 * With 4 threads, we get optimal performance 59 * when update_verifier reads the partition during 60 * boot. 61 */ 62 static constexpr int NUM_THREADS_PER_PARTITION = 4; 63 64 /* 65 * State transitions between worker threads and read-ahead 66 * threads. 67 * 68 * READ_AHEAD_BEGIN: Worker threads initiates the read-ahead 69 * thread to begin reading the copy operations 70 * for each bounded region. 71 * 72 * READ_AHEAD_IN_PROGRESS: When read ahead thread is in-flight 73 * and reading the copy operations. 74 * 75 * IO_IN_PROGRESS: Merge operation is in-progress by worker threads. 76 * 77 * IO_TERMINATED: When all the worker threads are done, request the 78 * read-ahead thread to terminate 79 * 80 * READ_AHEAD_FAILURE: If there are any IO failures when read-ahead 81 * thread is reading from COW device. 82 * 83 * The transition of each states is described in snapuserd_readahead.cpp 84 */ 85 enum class READ_AHEAD_IO_TRANSITION { 86 READ_AHEAD_BEGIN, 87 READ_AHEAD_IN_PROGRESS, 88 IO_IN_PROGRESS, 89 IO_TERMINATED, 90 READ_AHEAD_FAILURE, 91 }; 92 93 class Snapuserd; 94 95 class ReadAheadThread { 96 public: 97 ReadAheadThread(const std::string& cow_device, const std::string& backing_device, 98 const std::string& misc_name, std::shared_ptr<Snapuserd> snapuserd); 99 bool RunThread(); 100 101 private: 102 void InitializeRAIter(); 103 bool RAIterDone(); 104 void RAIterNext(); 105 const CowOperation* GetRAOpIter(); 106 void InitializeBuffer(); 107 108 bool InitializeFds(); CloseFds()109 void CloseFds() { 110 cow_fd_ = {}; 111 backing_store_fd_ = {}; 112 } 113 114 bool ReadAheadIOStart(); 115 void PrepareReadAhead(uint64_t* source_offset, int* pending_ops, std::vector<uint64_t>& blocks); 116 bool ReconstructDataFromCow(); 117 void CheckOverlap(const CowOperation* cow_op); 118 119 void* read_ahead_buffer_; 120 void* metadata_buffer_; 121 std::vector<const CowOperation*>::reverse_iterator read_ahead_iter_; 122 std::string cow_device_; 123 std::string backing_store_device_; 124 std::string misc_name_; 125 126 unique_fd cow_fd_; 127 unique_fd backing_store_fd_; 128 129 std::shared_ptr<Snapuserd> snapuserd_; 130 131 std::unordered_set<uint64_t> dest_blocks_; 132 std::unordered_set<uint64_t> source_blocks_; 133 bool overlap_; 134 }; 135 136 class WorkerThread { 137 public: 138 WorkerThread(const std::string& cow_device, const std::string& backing_device, 139 const std::string& control_device, const std::string& misc_name, 140 std::shared_ptr<Snapuserd> snapuserd); 141 bool RunThread(); 142 143 private: 144 // Initialization 145 void InitializeBufsink(); 146 bool InitializeFds(); 147 bool InitReader(); CloseFds()148 void CloseFds() { 149 ctrl_fd_ = {}; 150 backing_store_fd_ = {}; 151 } 152 153 // Functions interacting with dm-user 154 bool ReadDmUserHeader(); 155 bool DmuserReadRequest(); 156 bool DmuserWriteRequest(); 157 bool ReadDmUserPayload(void* buffer, size_t size); 158 bool WriteDmUserPayload(size_t size, bool header_response); 159 160 bool ReadDiskExceptions(chunk_t chunk, size_t size); 161 bool ZerofillDiskExceptions(size_t read_size); 162 void ConstructKernelCowHeader(); 163 164 // IO Path 165 bool ProcessIORequest(); 166 int ReadData(sector_t sector, size_t size); 167 int ReadUnalignedSector(sector_t sector, size_t size, 168 std::vector<std::pair<sector_t, const CowOperation*>>::iterator& it); 169 170 // Processing COW operations 171 bool ProcessCowOp(const CowOperation* cow_op); 172 bool ProcessReplaceOp(const CowOperation* cow_op); 173 // Handles Copy and Xor 174 bool ProcessCopyOp(const CowOperation* cow_op); 175 bool ProcessXorOp(const CowOperation* cow_op); 176 bool ProcessZeroOp(); 177 178 bool ReadFromBaseDevice(const CowOperation* cow_op); 179 bool GetReadAheadPopulatedBuffer(const CowOperation* cow_op); 180 181 // Merge related functions 182 bool ProcessMergeComplete(chunk_t chunk, void* buffer); 183 loff_t GetMergeStartOffset(void* merged_buffer, void* unmerged_buffer, 184 int* unmerged_exceptions); 185 186 int GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset, 187 int unmerged_exceptions, bool* copy_op, bool* commit); 188 ChunkToSector(chunk_t chunk)189 sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; } SectorToChunk(sector_t sector)190 chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; } 191 192 std::unique_ptr<CowReader> reader_; 193 BufferSink bufsink_; 194 XorSink xorsink_; 195 196 std::string cow_device_; 197 std::string backing_store_device_; 198 std::string control_device_; 199 std::string misc_name_; 200 201 unique_fd cow_fd_; 202 unique_fd backing_store_fd_; 203 unique_fd ctrl_fd_; 204 205 std::shared_ptr<Snapuserd> snapuserd_; 206 uint32_t exceptions_per_area_; 207 }; 208 209 class Snapuserd : public std::enable_shared_from_this<Snapuserd> { 210 public: 211 Snapuserd(const std::string& misc_name, const std::string& cow_device, 212 const std::string& backing_device); 213 bool InitCowDevice(); 214 bool Start(); GetControlDevicePath()215 const std::string& GetControlDevicePath() { return control_device_; } GetMiscName()216 const std::string& GetMiscName() { return misc_name_; } GetNumSectors()217 uint64_t GetNumSectors() { return num_sectors_; } IsAttached()218 bool IsAttached() const { return attached_; } AttachControlDevice()219 void AttachControlDevice() { attached_ = true; } 220 221 void CheckMergeCompletionStatus(); 222 bool CommitMerge(int num_merge_ops); 223 CloseFds()224 void CloseFds() { cow_fd_ = {}; } FreeResources()225 void FreeResources() { 226 worker_threads_.clear(); 227 read_ahead_thread_ = nullptr; 228 } GetMetadataAreaSize()229 size_t GetMetadataAreaSize() { return vec_.size(); } GetExceptionBuffer(size_t i)230 void* GetExceptionBuffer(size_t i) { return vec_[i].get(); } 231 232 bool InitializeWorkers(); 233 std::unique_ptr<CowReader> CloneReaderForWorker(); GetSharedPtr()234 std::shared_ptr<Snapuserd> GetSharedPtr() { return shared_from_this(); } 235 GetChunkVec()236 std::vector<std::pair<sector_t, const CowOperation*>>& GetChunkVec() { return chunk_vec_; } GetMetadataVec()237 const std::vector<std::unique_ptr<uint8_t[]>>& GetMetadataVec() const { return vec_; } 238 compare(std::pair<sector_t,const CowOperation * > p1,std::pair<sector_t,const CowOperation * > p2)239 static bool compare(std::pair<sector_t, const CowOperation*> p1, 240 std::pair<sector_t, const CowOperation*> p2) { 241 return p1.first < p2.first; 242 } 243 244 void UnmapBufferRegion(); 245 bool MmapMetadata(); 246 247 // Read-ahead related functions GetReadAheadOpsVec()248 std::vector<const CowOperation*>& GetReadAheadOpsVec() { return read_ahead_ops_; } GetReadAheadMap()249 std::unordered_map<uint64_t, void*>& GetReadAheadMap() { return read_ahead_buffer_map_; } GetMappedAddr()250 void* GetMappedAddr() { return mapped_addr_; } IsReadAheadFeaturePresent()251 bool IsReadAheadFeaturePresent() { return read_ahead_feature_; } 252 void PrepareReadAhead(); 253 void StartReadAhead(); 254 void MergeCompleted(); 255 bool ReadAheadIOCompleted(bool sync); 256 void ReadAheadIOFailed(); 257 bool WaitForMergeToComplete(); 258 bool GetReadAheadPopulatedBuffer(uint64_t block, void* buffer); ReconstructDataFromCow()259 bool ReconstructDataFromCow() { return populate_data_from_cow_; } ReconstructDataFromCowFinish()260 void ReconstructDataFromCowFinish() { populate_data_from_cow_ = false; } 261 bool WaitForReadAheadToStart(); 262 263 uint64_t GetBufferMetadataOffset(); 264 size_t GetBufferMetadataSize(); 265 size_t GetBufferDataOffset(); 266 size_t GetBufferDataSize(); 267 268 // Final block to be merged in a given read-ahead buffer region SetFinalBlockMerged(uint64_t x)269 void SetFinalBlockMerged(uint64_t x) { final_block_merged_ = x; } GetFinalBlockMerged()270 uint64_t GetFinalBlockMerged() { return final_block_merged_; } 271 // Total number of blocks to be merged in a given read-ahead buffer region SetTotalRaBlocksMerged(int x)272 void SetTotalRaBlocksMerged(int x) { total_ra_blocks_merged_ = x; } GetTotalRaBlocksMerged()273 int GetTotalRaBlocksMerged() { return total_ra_blocks_merged_; } SetSocketPresent(bool socket)274 void SetSocketPresent(bool socket) { is_socket_present_ = socket; } 275 276 private: 277 bool IsChunkIdMetadata(chunk_t chunk); 278 chunk_t GetNextAllocatableChunkId(chunk_t chunk_id); 279 280 bool GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block, void* buffer); 281 bool ReadMetadata(); ChunkToSector(chunk_t chunk)282 sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; } SectorToChunk(sector_t sector)283 chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; } IsBlockAligned(int read_size)284 bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); } 285 struct BufferState* GetBufferState(); 286 287 void ReadBlocks(const std::string& partition_name, const std::string& dm_block_device); 288 void ReadBlocksToCache(const std::string& dm_block_device, const std::string& partition_name, 289 off_t offset, size_t size); 290 291 std::string cow_device_; 292 std::string backing_store_device_; 293 std::string control_device_; 294 std::string misc_name_; 295 296 unique_fd cow_fd_; 297 298 uint32_t exceptions_per_area_; 299 uint64_t num_sectors_; 300 301 std::unique_ptr<CowReader> reader_; 302 303 // Vector of disk exception which is a 304 // mapping of old-chunk to new-chunk 305 std::vector<std::unique_ptr<uint8_t[]>> vec_; 306 307 // chunk_vec stores the pseudo mapping of sector 308 // to COW operations. 309 std::vector<std::pair<sector_t, const CowOperation*>> chunk_vec_; 310 311 std::mutex lock_; 312 std::condition_variable cv; 313 314 void* mapped_addr_; 315 size_t total_mapped_addr_length_; 316 317 std::vector<std::unique_ptr<WorkerThread>> worker_threads_; 318 // Read-ahead related 319 std::unordered_map<uint64_t, void*> read_ahead_buffer_map_; 320 std::vector<const CowOperation*> read_ahead_ops_; 321 bool populate_data_from_cow_ = false; 322 bool read_ahead_feature_; 323 uint64_t final_block_merged_; 324 int total_ra_blocks_merged_ = 0; 325 READ_AHEAD_IO_TRANSITION io_state_; 326 std::unique_ptr<ReadAheadThread> read_ahead_thread_; 327 328 bool merge_initiated_ = false; 329 bool attached_ = false; 330 bool is_socket_present_; 331 }; 332 333 } // namespace snapshot 334 } // namespace android 335